From 11d22283628737be1c35a8daa4085ae8abecd158 Mon Sep 17 00:00:00 2001 From: Pagwin Date: Fri, 2 Jan 2026 20:49:57 -0500 Subject: [PATCH] just string and regex literals left barring me fucking up implementation of integer literals --- TODO.md | 2 +- src/Psb/Main.hs | 9 +--- src/Utilities/Javascript.hs | 96 +++++++++++++++++++++++++++---------- src/Utilities/Parsing.hs | 2 +- 4 files changed, 75 insertions(+), 34 deletions(-) diff --git a/TODO.md b/TODO.md index 23533a6..a2edef1 100644 --- a/TODO.md +++ b/TODO.md @@ -4,7 +4,6 @@ - JS - Doing our own tokenizer lol - [ ] swap from using `draft` to using `date` for determing draft status, lack of date = draft -- [ ] Fix timestamp(s) shown to use local offset instead of absolute time https://www.rfc-editor.org/rfc/rfc3339#section-4.2 - [ ] setup fingerprinting in file names for css and js - setup lambdas via: https://hackage-content.haskell.org/package/mustache-2.4.3.1/docs/Text-Mustache.html#v:overText - This may require a refactor of how we handle templates to use `object` instead of just using aeson integration from the mustache crate @@ -27,4 +26,5 @@ - [ ] Make a function which takes IR and spits out some kind of table of contents - [ ] Add rst or org support and convert markdown handling to custom parser instead of pandoc - [ ] Add in functionality for footnotes +- [ ] Fix time via timestamps potentially meaning something (via preshim?) and use local offset instead of absolute time https://www.rfc-editor.org/rfc/rfc3339#section-4.2 - [ ] see if performance can be improved, it shouldn't be necessary but if I'm looking at doing something for this and everything above this got checked off then this is a sensible next thing diff --git a/src/Psb/Main.hs b/src/Psb/Main.hs index 24b23cf..c96c6c9 100644 --- a/src/Psb/Main.hs +++ b/src/Psb/Main.hs @@ -97,13 +97,8 @@ js_resources = map (outputDir ) jsGlobs |%> \target -> do let src_file = FP.dropDirectory1 target src <- Shake.readFile' $ src_file - - let tokenization = JS.toTokens src_file src - case tokenization of - Left e -> error $ "Attempt to tokenize javascript file failed with: " <> errorBundlePretty e - Right tokens -> - -- TODO: write to fingerprinted location as well - Shake.writeFileChanged target $ foldMap JS.displayToken $ JS.minify $ tokens + -- TODO: write to fingerprinted location as well + Shake.writeFileChanged target $ JS.minify src -- there's probably a better way of doing this that allows for the target's origin file extension to get passed in but for now we're doing brute force postsRule :: Rules () diff --git a/src/Utilities/Javascript.hs b/src/Utilities/Javascript.hs index 3cd58c9..a3b6ce8 100644 --- a/src/Utilities/Javascript.hs +++ b/src/Utilities/Javascript.hs @@ -3,31 +3,38 @@ module Utilities.Javascript ( minify, + minifyTokens, toTokens, displayToken, ) where -import Control.Applicative (Alternative (many), optional, (<|>)) +import Control.Applicative (Alternative (many, some), empty, optional, (<|>)) import Control.Monad.Trans.Class (MonadTrans (lift)) -import Control.Monad.Trans.State (StateT, evalStateT, put) +import Control.Monad.Trans.State (StateT, evalStateT, get, put) import Data.Data (Proxy (Proxy)) import Data.Functor (void, (<&>)) +import Data.Functor.Identity (Identity (Identity, runIdentity)) import Data.Maybe (maybeToList) import Data.String (IsString (fromString)) import Data.Void (Void) import Logger import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, ParsecT, Stream (tokensToChunk), anySingle, choice, parse, runParserT) import qualified Text.Megaparsec as MP -import Text.Megaparsec.Char (char, digitChar, eol, hspace, letterChar, newline, string) +import Text.Megaparsec.Char (binDigitChar, char, digitChar, eol, hexDigitChar, hspace, letterChar, newline, octDigitChar, string) import Utilities.Parsing (Characters, ToChar (fromChar), ToText (fromText, toString, toText)) data Possibility = ExprAllowed | ExprNotAllowed deriving (Eq) type Parser s m = ParsecT Void s (StateT Possibility m) -minify :: (Characters s) => [Token s] -> [Token s] -minify = reduce_identifiers . remove_redundants +minify :: forall s. (Characters s, MP.VisualStream s, MP.TraversableStream s) => s -> s +minify src = foldMap displayToken $ minifyTokens $ case runIdentity ((toTokens "" src) :: Identity (Either (ParseErrorBundle s Void) [Token s])) of + Left e -> error $ "Attempt to tokenize javascript file failed with: " <> MP.errorBundlePretty e + Right v -> v + +minifyTokens :: (Characters s) => [Token s] -> [Token s] +minifyTokens = reduce_identifiers . remove_redundants where -- need to figure out how to add State into this reduce_identifiers = map $ \token -> case token of @@ -201,10 +208,11 @@ exprNoop :: (Stream s, Monad m) => String -> Parser s m () -- string arg is just as a comment exprNoop _ = pure () --- TODO: read https://github.com/jquery/esprima/blob/main/src/scanner.ts +-- INFO: read https://github.com/jquery/esprima/blob/main/src/scanner.ts -- and https://github.com/acornjs/acorn/blob/master/acorn/src/tokenize.js -- specific logic at https://github.com/acornjs/acorn/blob/54097dcf8c08733695df7168692d0faac3a2f768/acorn/src/tokencontext.js#L92 -- https://astexplorer.net/ +-- atm this is guesswork token :: (Logger m, Characters s) => Parser s m (Token s) token = choice @@ -335,7 +343,7 @@ identifier = do rem_char :: Parser s m (MP.Token s) rem_char = start_char <|> digitChar -private_identifier :: (Logger m, Characters s) => Parser s m (Token s) +private_identifier :: forall s m. (Logger m, Characters s) => Parser s m (Token s) private_identifier = char '#' *> identifier @@ -375,20 +383,55 @@ literal = char '`' pure $ TemplateTail $ fromText $ mconcat $ map toText $ contents template_char :: Parser s m s - template_char = fromText . toText <$> choice [try (string "$" <* (notFollowedBy $ char '{')), try (char '\\' *> ((try template_escape_seq) <|> not_escape_seq)), try ((optional $ char '\\') *> (eol)), (notFollowedBy (choice $ linebreak : (map (fromChar <$> char) "`\\$"))) *> source_char] - source_char = error "TODO" - template_escape_seq = error "TODO: TemplateEscapeSequence, prepend backslash" - not_escape_seq = error "TODO: NotEscapeSequence, prepend backslash" + template_char = + fromText . toText + <$> choice + [ try (string "$" <* (notFollowedBy $ char '{')), + try escape_seq, + try ((optional $ char '\\') *> (eol)), + -- I'm sure this is doable without do but do makes it much easier + do + notFollowedBy (choice [void linebreak, void $ char '`', void $ char '\\', void $ char '$']) + c <- source_char + pure $ fromString $ c : [] + ] + source_char = anySingle + escape_seq = do + char '\\' + ret <- anySingle + pure $ fromString ('\\' : [ret]) + num_lit = Number <$> (choice [try legacy_oct, try decimal_bigint, try decimal_literal, try hex_int, try oct_int, try bin_int, zero]) + zero = char '0' *> pure "0" + decimal_literal = fromString <$> some (digitChar <|> char '_') + decimal_bigint = do + most <- decimal_literal + char 'n' + pure $ fromText $ toText most <> "n" + legacy_oct = char '0' *> (fromString <$> some (octDigitChar <|> char '_')) + oct_int = char '0' *> (char 'o' <|> char 'O') *> (fromString <$> some (octDigitChar <|> char '_')) + hex_int = char '0' *> (char 'x' <|> char 'X') *> (fromString <$> some (hexDigitChar <|> char '_')) + bin_int = char '0' *> (char 'b' <|> char 'B') *> (fromString <$> some (binDigitChar <|> char '_')) string_lit = String <$> error "TODO" - num_lit = Number <$> (choice [try decimal_literal, try decimal_bigint, try plain_bigint, try normal_integer, octal_int]) - decimal_literal = error "TODO" - decimal_bigint = error "TODO" - plain_bigint = error "TODO" - normal_integer = error "TODO" - octal_int = error "TODO" -fslash_handler :: (Logger m, Characters s) => Parser s m (Token s) -fslash_handler = error "TODO: Regex literal, division and division assignment" +fslash_handler :: forall s m. (Logger m, Characters s) => Parser s m (Token s) +fslash_handler = do + allowed <- lift $ get + let re = case allowed of + ExprNotAllowed -> empty + ExprAllowed -> regex_literal + + choice [try re, try division_assign, division] + where + regex_literal :: Parser s m (Token s) + regex_literal = do + char '/' + error "TODO" + pure $ Literal $ Regex {} + division_assign :: Parser s m (Token s) + division_assign = (string "/=") *> (pure $ Punc $ DivAssign :: Parser s m (Token s)) + + division :: Parser s m (Token s) + division = char '/' *> (pure $ Punc $ Div :: Parser s m (Token s)) punctuator :: (Logger m, Characters s) => Parser s m (Token s) punctuator = @@ -405,8 +448,10 @@ punctuator = try $ string "&&=" *> pure LogicalAndAssign <* exprAllowed, try $ string "||=" *> pure LogicalOrAssign <* exprAllowed, try $ string "??=" *> pure NullishAssign <* exprAllowed, - try $ string "++" *> pure Inc <* error "TODO: Ambiguous precrement vs postcrement", - try $ string "--" *> pure Dec <* error "TODO: Ambiguous precrement postcrement", + -- best effort guess based on my usage that it'll always be postcrement + -- Shouldn't come up in my use case though + try $ string "++" *> pure Inc <* exprNotAllowed, + try $ string "--" *> pure Dec <* exprNotAllowed, try $ string "?." *> (notFollowedBy digitChar) *> pure OptionalChain <* exprNotAllowed, try $ string "**" *> pure Exp <* exprAllowed, try $ string "<=" *> pure LTEQ <* exprAllowed, @@ -439,11 +484,12 @@ punctuator = char ';' *> pure Semicolon <* exprAllowed, char ',' *> pure Comma <* exprAllowed, char '!' *> pure LogicalNot <* exprAllowed, - -- HERE - char '(' *> pure LParen <* exprNotAllowed, + -- Note: parens and curlies are unambiguously ambiguous + -- Opening ones will generally allow an expression and closing ones will generally not allow an expression + char '(' *> pure LParen <* exprAllowed, char ')' *> pure RParen <* exprNotAllowed, - char '{' *> pure LCurly <* error "TODO: Ambiguous", - char '}' *> pure RCurly <* error "TODO: Ambiguous", + char '{' *> pure LCurly <* exprAllowed, + char '}' *> pure RCurly <* exprNotAllowed, char '[' *> pure LSquare <* exprNotAllowed, char ']' *> pure RSquare <* exprNotAllowed, char '.' *> pure Dot <* exprNotAllowed diff --git a/src/Utilities/Parsing.hs b/src/Utilities/Parsing.hs index d8f9127..61247a6 100644 --- a/src/Utilities/Parsing.hs +++ b/src/Utilities/Parsing.hs @@ -10,7 +10,7 @@ import Text.Megaparsec (ParsecT, Stream, Token, Tokens) type Parser = ParsecT Void -class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s) => Characters s +class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s, Monoid s) => Characters s class ToText t where toText :: t -> Text