just string and regex literals left barring me fucking up implementation of integer literals

This commit is contained in:
Pagwin 2026-01-02 20:49:57 -05:00
parent 1f89316fdf
commit 11d2228362
No known key found for this signature in database
GPG key ID: 81137023740CA260
4 changed files with 75 additions and 34 deletions

View file

@ -4,7 +4,6 @@
- JS - JS
- Doing our own tokenizer lol - Doing our own tokenizer lol
- [ ] swap from using `draft` to using `date` for determing draft status, lack of date = draft - [ ] swap from using `draft` to using `date` for determing draft status, lack of date = draft
- [ ] Fix timestamp(s) shown to use local offset instead of absolute time https://www.rfc-editor.org/rfc/rfc3339#section-4.2
- [ ] setup fingerprinting in file names for css and js - [ ] setup fingerprinting in file names for css and js
- setup lambdas via: https://hackage-content.haskell.org/package/mustache-2.4.3.1/docs/Text-Mustache.html#v:overText - setup lambdas via: https://hackage-content.haskell.org/package/mustache-2.4.3.1/docs/Text-Mustache.html#v:overText
- This may require a refactor of how we handle templates to use `object` instead of just using aeson integration from the mustache crate - This may require a refactor of how we handle templates to use `object` instead of just using aeson integration from the mustache crate
@ -27,4 +26,5 @@
- [ ] Make a function which takes IR and spits out some kind of table of contents - [ ] Make a function which takes IR and spits out some kind of table of contents
- [ ] Add rst or org support and convert markdown handling to custom parser instead of pandoc - [ ] Add rst or org support and convert markdown handling to custom parser instead of pandoc
- [ ] Add in functionality for footnotes - [ ] Add in functionality for footnotes
- [ ] Fix time via timestamps potentially meaning something (via preshim?) and use local offset instead of absolute time https://www.rfc-editor.org/rfc/rfc3339#section-4.2
- [ ] see if performance can be improved, it shouldn't be necessary but if I'm looking at doing something for this and everything above this got checked off then this is a sensible next thing - [ ] see if performance can be improved, it shouldn't be necessary but if I'm looking at doing something for this and everything above this got checked off then this is a sensible next thing

View file

@ -97,13 +97,8 @@ js_resources =
map (outputDir </>) jsGlobs |%> \target -> do map (outputDir </>) jsGlobs |%> \target -> do
let src_file = FP.dropDirectory1 target let src_file = FP.dropDirectory1 target
src <- Shake.readFile' $ src_file src <- Shake.readFile' $ src_file
-- TODO: write to fingerprinted location as well
let tokenization = JS.toTokens src_file src Shake.writeFileChanged target $ JS.minify src
case tokenization of
Left e -> error $ "Attempt to tokenize javascript file failed with: " <> errorBundlePretty e
Right tokens ->
-- TODO: write to fingerprinted location as well
Shake.writeFileChanged target $ foldMap JS.displayToken $ JS.minify $ tokens
-- there's probably a better way of doing this that allows for the target's origin file extension to get passed in but for now we're doing brute force -- there's probably a better way of doing this that allows for the target's origin file extension to get passed in but for now we're doing brute force
postsRule :: Rules () postsRule :: Rules ()

View file

@ -3,31 +3,38 @@
module Utilities.Javascript module Utilities.Javascript
( minify, ( minify,
minifyTokens,
toTokens, toTokens,
displayToken, displayToken,
) )
where where
import Control.Applicative (Alternative (many), optional, (<|>)) import Control.Applicative (Alternative (many, some), empty, optional, (<|>))
import Control.Monad.Trans.Class (MonadTrans (lift)) import Control.Monad.Trans.Class (MonadTrans (lift))
import Control.Monad.Trans.State (StateT, evalStateT, put) import Control.Monad.Trans.State (StateT, evalStateT, get, put)
import Data.Data (Proxy (Proxy)) import Data.Data (Proxy (Proxy))
import Data.Functor (void, (<&>)) import Data.Functor (void, (<&>))
import Data.Functor.Identity (Identity (Identity, runIdentity))
import Data.Maybe (maybeToList) import Data.Maybe (maybeToList)
import Data.String (IsString (fromString)) import Data.String (IsString (fromString))
import Data.Void (Void) import Data.Void (Void)
import Logger import Logger
import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, ParsecT, Stream (tokensToChunk), anySingle, choice, parse, runParserT) import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, ParsecT, Stream (tokensToChunk), anySingle, choice, parse, runParserT)
import qualified Text.Megaparsec as MP import qualified Text.Megaparsec as MP
import Text.Megaparsec.Char (char, digitChar, eol, hspace, letterChar, newline, string) import Text.Megaparsec.Char (binDigitChar, char, digitChar, eol, hexDigitChar, hspace, letterChar, newline, octDigitChar, string)
import Utilities.Parsing (Characters, ToChar (fromChar), ToText (fromText, toString, toText)) import Utilities.Parsing (Characters, ToChar (fromChar), ToText (fromText, toString, toText))
data Possibility = ExprAllowed | ExprNotAllowed deriving (Eq) data Possibility = ExprAllowed | ExprNotAllowed deriving (Eq)
type Parser s m = ParsecT Void s (StateT Possibility m) type Parser s m = ParsecT Void s (StateT Possibility m)
minify :: (Characters s) => [Token s] -> [Token s] minify :: forall s. (Characters s, MP.VisualStream s, MP.TraversableStream s) => s -> s
minify = reduce_identifiers . remove_redundants minify src = foldMap displayToken $ minifyTokens $ case runIdentity ((toTokens "" src) :: Identity (Either (ParseErrorBundle s Void) [Token s])) of
Left e -> error $ "Attempt to tokenize javascript file failed with: " <> MP.errorBundlePretty e
Right v -> v
minifyTokens :: (Characters s) => [Token s] -> [Token s]
minifyTokens = reduce_identifiers . remove_redundants
where where
-- need to figure out how to add State into this -- need to figure out how to add State into this
reduce_identifiers = map $ \token -> case token of reduce_identifiers = map $ \token -> case token of
@ -201,10 +208,11 @@ exprNoop :: (Stream s, Monad m) => String -> Parser s m ()
-- string arg is just as a comment -- string arg is just as a comment
exprNoop _ = pure () exprNoop _ = pure ()
-- TODO: read https://github.com/jquery/esprima/blob/main/src/scanner.ts -- INFO: read https://github.com/jquery/esprima/blob/main/src/scanner.ts
-- and https://github.com/acornjs/acorn/blob/master/acorn/src/tokenize.js -- and https://github.com/acornjs/acorn/blob/master/acorn/src/tokenize.js
-- specific logic at https://github.com/acornjs/acorn/blob/54097dcf8c08733695df7168692d0faac3a2f768/acorn/src/tokencontext.js#L92 -- specific logic at https://github.com/acornjs/acorn/blob/54097dcf8c08733695df7168692d0faac3a2f768/acorn/src/tokencontext.js#L92
-- https://astexplorer.net/ -- https://astexplorer.net/
-- atm this is guesswork
token :: (Logger m, Characters s) => Parser s m (Token s) token :: (Logger m, Characters s) => Parser s m (Token s)
token = token =
choice choice
@ -335,7 +343,7 @@ identifier = do
rem_char :: Parser s m (MP.Token s) rem_char :: Parser s m (MP.Token s)
rem_char = start_char <|> digitChar rem_char = start_char <|> digitChar
private_identifier :: (Logger m, Characters s) => Parser s m (Token s) private_identifier :: forall s m. (Logger m, Characters s) => Parser s m (Token s)
private_identifier = private_identifier =
char '#' char '#'
*> identifier *> identifier
@ -375,20 +383,55 @@ literal =
char '`' char '`'
pure $ TemplateTail $ fromText $ mconcat $ map toText $ contents pure $ TemplateTail $ fromText $ mconcat $ map toText $ contents
template_char :: Parser s m s template_char :: Parser s m s
template_char = fromText . toText <$> choice [try (string "$" <* (notFollowedBy $ char '{')), try (char '\\' *> ((try template_escape_seq) <|> not_escape_seq)), try ((optional $ char '\\') *> (eol)), (notFollowedBy (choice $ linebreak : (map (fromChar <$> char) "`\\$"))) *> source_char] template_char =
source_char = error "TODO" fromText . toText
template_escape_seq = error "TODO: TemplateEscapeSequence, prepend backslash" <$> choice
not_escape_seq = error "TODO: NotEscapeSequence, prepend backslash" [ try (string "$" <* (notFollowedBy $ char '{')),
try escape_seq,
try ((optional $ char '\\') *> (eol)),
-- I'm sure this is doable without do but do makes it much easier
do
notFollowedBy (choice [void linebreak, void $ char '`', void $ char '\\', void $ char '$'])
c <- source_char
pure $ fromString $ c : []
]
source_char = anySingle
escape_seq = do
char '\\'
ret <- anySingle
pure $ fromString ('\\' : [ret])
num_lit = Number <$> (choice [try legacy_oct, try decimal_bigint, try decimal_literal, try hex_int, try oct_int, try bin_int, zero])
zero = char '0' *> pure "0"
decimal_literal = fromString <$> some (digitChar <|> char '_')
decimal_bigint = do
most <- decimal_literal
char 'n'
pure $ fromText $ toText most <> "n"
legacy_oct = char '0' *> (fromString <$> some (octDigitChar <|> char '_'))
oct_int = char '0' *> (char 'o' <|> char 'O') *> (fromString <$> some (octDigitChar <|> char '_'))
hex_int = char '0' *> (char 'x' <|> char 'X') *> (fromString <$> some (hexDigitChar <|> char '_'))
bin_int = char '0' *> (char 'b' <|> char 'B') *> (fromString <$> some (binDigitChar <|> char '_'))
string_lit = String <$> error "TODO" string_lit = String <$> error "TODO"
num_lit = Number <$> (choice [try decimal_literal, try decimal_bigint, try plain_bigint, try normal_integer, octal_int])
decimal_literal = error "TODO"
decimal_bigint = error "TODO"
plain_bigint = error "TODO"
normal_integer = error "TODO"
octal_int = error "TODO"
fslash_handler :: (Logger m, Characters s) => Parser s m (Token s) fslash_handler :: forall s m. (Logger m, Characters s) => Parser s m (Token s)
fslash_handler = error "TODO: Regex literal, division and division assignment" fslash_handler = do
allowed <- lift $ get
let re = case allowed of
ExprNotAllowed -> empty
ExprAllowed -> regex_literal
choice [try re, try division_assign, division]
where
regex_literal :: Parser s m (Token s)
regex_literal = do
char '/'
error "TODO"
pure $ Literal $ Regex {}
division_assign :: Parser s m (Token s)
division_assign = (string "/=") *> (pure $ Punc $ DivAssign :: Parser s m (Token s))
division :: Parser s m (Token s)
division = char '/' *> (pure $ Punc $ Div :: Parser s m (Token s))
punctuator :: (Logger m, Characters s) => Parser s m (Token s) punctuator :: (Logger m, Characters s) => Parser s m (Token s)
punctuator = punctuator =
@ -405,8 +448,10 @@ punctuator =
try $ string "&&=" *> pure LogicalAndAssign <* exprAllowed, try $ string "&&=" *> pure LogicalAndAssign <* exprAllowed,
try $ string "||=" *> pure LogicalOrAssign <* exprAllowed, try $ string "||=" *> pure LogicalOrAssign <* exprAllowed,
try $ string "??=" *> pure NullishAssign <* exprAllowed, try $ string "??=" *> pure NullishAssign <* exprAllowed,
try $ string "++" *> pure Inc <* error "TODO: Ambiguous precrement vs postcrement", -- best effort guess based on my usage that it'll always be postcrement
try $ string "--" *> pure Dec <* error "TODO: Ambiguous precrement postcrement", -- Shouldn't come up in my use case though
try $ string "++" *> pure Inc <* exprNotAllowed,
try $ string "--" *> pure Dec <* exprNotAllowed,
try $ string "?." *> (notFollowedBy digitChar) *> pure OptionalChain <* exprNotAllowed, try $ string "?." *> (notFollowedBy digitChar) *> pure OptionalChain <* exprNotAllowed,
try $ string "**" *> pure Exp <* exprAllowed, try $ string "**" *> pure Exp <* exprAllowed,
try $ string "<=" *> pure LTEQ <* exprAllowed, try $ string "<=" *> pure LTEQ <* exprAllowed,
@ -439,11 +484,12 @@ punctuator =
char ';' *> pure Semicolon <* exprAllowed, char ';' *> pure Semicolon <* exprAllowed,
char ',' *> pure Comma <* exprAllowed, char ',' *> pure Comma <* exprAllowed,
char '!' *> pure LogicalNot <* exprAllowed, char '!' *> pure LogicalNot <* exprAllowed,
-- HERE -- Note: parens and curlies are unambiguously ambiguous
char '(' *> pure LParen <* exprNotAllowed, -- Opening ones will generally allow an expression and closing ones will generally not allow an expression
char '(' *> pure LParen <* exprAllowed,
char ')' *> pure RParen <* exprNotAllowed, char ')' *> pure RParen <* exprNotAllowed,
char '{' *> pure LCurly <* error "TODO: Ambiguous", char '{' *> pure LCurly <* exprAllowed,
char '}' *> pure RCurly <* error "TODO: Ambiguous", char '}' *> pure RCurly <* exprNotAllowed,
char '[' *> pure LSquare <* exprNotAllowed, char '[' *> pure LSquare <* exprNotAllowed,
char ']' *> pure RSquare <* exprNotAllowed, char ']' *> pure RSquare <* exprNotAllowed,
char '.' *> pure Dot <* exprNotAllowed char '.' *> pure Dot <* exprNotAllowed

View file

@ -10,7 +10,7 @@ import Text.Megaparsec (ParsecT, Stream, Token, Tokens)
type Parser = ParsecT Void type Parser = ParsecT Void
class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s) => Characters s class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s, Monoid s) => Characters s
class ToText t where class ToText t where
toText :: t -> Text toText :: t -> Text