just string and regex literals left barring me fucking up implementation of integer literals

2026-01-02 20:49:57 -05:00 · 2026-01-02 20:49:57 -05:00 · 11d2228362
commit 11d2228362
parent 1f89316fdf
4 changed files with 75 additions and 34 deletions
--- a/TODO.md
+++ b/TODO.md
@ -4,7 +4,6 @@
    - JS
        - Doing our own tokenizer lol
 - [ ] swap from using `draft` to using `date` for determing draft status, lack of date = draft
 - [ ] Fix timestamp(s) shown to use local offset instead of absolute time https://www.rfc-editor.org/rfc/rfc3339#section-4.2
 - [ ] setup fingerprinting in file names for css and js
    - setup lambdas via: https://hackage-content.haskell.org/package/mustache-2.4.3.1/docs/Text-Mustache.html#v:overText
    - This may require a refactor of how we handle templates to use `object` instead of just using aeson integration from the mustache crate
@ -27,4 +26,5 @@
 - [ ] Make a function which takes IR and spits out some kind of table of contents
 - [ ] Add rst or org support and convert markdown handling to custom parser instead of pandoc
    - [ ] Add in functionality for footnotes
 - [ ] Fix time via timestamps potentially meaning something (via preshim?) and use local offset instead of absolute time https://www.rfc-editor.org/rfc/rfc3339#section-4.2
 -  [ ] see if performance can be improved, it shouldn't be necessary but if I'm looking at doing something for this and everything above this got checked off then this is a sensible next thing
--- a/src/Psb/Main.hs
+++ b/src/Psb/Main.hs
@ -97,13 +97,8 @@ js_resources =
  map (outputDir </>) jsGlobs |%> \target -> do
    let src_file = FP.dropDirectory1 target
    src <- Shake.readFile' $ src_file
-
+    -- TODO: write to fingerprinted location as well
-    let tokenization = JS.toTokens src_file src
+    Shake.writeFileChanged target $ JS.minify src
    case tokenization of
      Left e -> error $ "Attempt to tokenize javascript file failed with: " <> errorBundlePretty e
      Right tokens ->
        -- TODO: write to fingerprinted location as well
        Shake.writeFileChanged target $ foldMap JS.displayToken $ JS.minify $ tokens
 -- there's probably a better way of doing this that allows for the target's origin file extension to get passed in but for now we're doing brute force
 postsRule :: Rules ()
--- a/src/Utilities/Javascript.hs
+++ b/src/Utilities/Javascript.hs
@ -3,31 +3,38 @@
 module Utilities.Javascript
  ( minify,
    minifyTokens,
    toTokens,
    displayToken,
  )
 where
-import Control.Applicative (Alternative (many), optional, (<|>))
+import Control.Applicative (Alternative (many, some), empty, optional, (<|>))
 import Control.Monad.Trans.Class (MonadTrans (lift))
-import Control.Monad.Trans.State (StateT, evalStateT, put)
+import Control.Monad.Trans.State (StateT, evalStateT, get, put)
 import Data.Data (Proxy (Proxy))
 import Data.Functor (void, (<&>))
 import Data.Functor.Identity (Identity (Identity, runIdentity))
 import Data.Maybe (maybeToList)
 import Data.String (IsString (fromString))
 import Data.Void (Void)
 import Logger
 import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, ParsecT, Stream (tokensToChunk), anySingle, choice, parse, runParserT)
 import qualified Text.Megaparsec as MP
-import Text.Megaparsec.Char (char, digitChar, eol, hspace, letterChar, newline, string)
+import Text.Megaparsec.Char (binDigitChar, char, digitChar, eol, hexDigitChar, hspace, letterChar, newline, octDigitChar, string)
 import Utilities.Parsing (Characters, ToChar (fromChar), ToText (fromText, toString, toText))
 data Possibility = ExprAllowed | ExprNotAllowed deriving (Eq)
 type Parser s m = ParsecT Void s (StateT Possibility m)
-minify :: (Characters s) => [Token s] -> [Token s]
+minify :: forall s. (Characters s, MP.VisualStream s, MP.TraversableStream s) => s -> s
-minify = reduce_identifiers . remove_redundants
+minify src = foldMap displayToken $ minifyTokens $ case runIdentity ((toTokens "" src) :: Identity (Either (ParseErrorBundle s Void) [Token s])) of
  Left e -> error $ "Attempt to tokenize javascript file failed with: " <> MP.errorBundlePretty e
  Right v -> v
 minifyTokens :: (Characters s) => [Token s] -> [Token s]
 minifyTokens = reduce_identifiers . remove_redundants
  where
    -- need to figure out how to add State into this
    reduce_identifiers = map $ \token -> case token of
@ -201,10 +208,11 @@ exprNoop :: (Stream s, Monad m) => String -> Parser s m ()
 -- string arg is just as a comment
 exprNoop _ = pure ()
-- TODO: read https://github.com/jquery/esprima/blob/main/src/scanner.ts
+-- INFO: read https://github.com/jquery/esprima/blob/main/src/scanner.ts
 -- and https://github.com/acornjs/acorn/blob/master/acorn/src/tokenize.js
 -- specific logic at https://github.com/acornjs/acorn/blob/54097dcf8c08733695df7168692d0faac3a2f768/acorn/src/tokencontext.js#L92
 -- https://astexplorer.net/
 -- atm this is guesswork
 token :: (Logger m, Characters s) => Parser s m (Token s)
 token =
  choice
@ -335,7 +343,7 @@ identifier = do
    rem_char :: Parser s m (MP.Token s)
    rem_char = start_char <|> digitChar
-private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
+private_identifier :: forall s m. (Logger m, Characters s) => Parser s m (Token s)
 private_identifier =
  char '#'
    *> identifier
@ -375,20 +383,55 @@ literal =
      char '`'
      pure $ TemplateTail $ fromText $ mconcat $ map toText $ contents
    template_char :: Parser s m s
-    template_char = fromText . toText <$> choice [try (string "$" <* (notFollowedBy $ char '{')), try (char '\\' *> ((try template_escape_seq) <|> not_escape_seq)), try ((optional $ char '\\') *> (eol)), (notFollowedBy (choice $ linebreak : (map (fromChar <$> char) "`\\$"))) *> source_char]
+    template_char =
-    source_char = error "TODO"
+      fromText . toText
-    template_escape_seq = error "TODO: TemplateEscapeSequence, prepend backslash"
+        <$> choice
-    not_escape_seq = error "TODO: NotEscapeSequence, prepend backslash"
+          [ try (string "$" <* (notFollowedBy $ char '{')),
            try escape_seq,
            try ((optional $ char '\\') *> (eol)),
            -- I'm sure this is doable without do but do makes it much easier
            do
              notFollowedBy (choice [void linebreak, void $ char '`', void $ char '\\', void $ char '$'])
              c <- source_char
              pure $ fromString $ c : []
          ]
    source_char = anySingle
    escape_seq = do
      char '\\'
      ret <- anySingle
      pure $ fromString ('\\' : [ret])
    num_lit = Number <$> (choice [try legacy_oct, try decimal_bigint, try decimal_literal, try hex_int, try oct_int, try bin_int, zero])
    zero = char '0' *> pure "0"
    decimal_literal = fromString <$> some (digitChar <|> char '_')
    decimal_bigint = do
      most <- decimal_literal
      char 'n'
      pure $ fromText $ toText most <> "n"
    legacy_oct = char '0' *> (fromString <$> some (octDigitChar <|> char '_'))
    oct_int = char '0' *> (char 'o' <|> char 'O') *> (fromString <$> some (octDigitChar <|> char '_'))
    hex_int = char '0' *> (char 'x' <|> char 'X') *> (fromString <$> some (hexDigitChar <|> char '_'))
    bin_int = char '0' *> (char 'b' <|> char 'B') *> (fromString <$> some (binDigitChar <|> char '_'))
    string_lit = String <$> error "TODO"
    num_lit = Number <$> (choice [try decimal_literal, try decimal_bigint, try plain_bigint, try normal_integer, octal_int])
    decimal_literal = error "TODO"
    decimal_bigint = error "TODO"
    plain_bigint = error "TODO"
    normal_integer = error "TODO"
    octal_int = error "TODO"
-fslash_handler :: (Logger m, Characters s) => Parser s m (Token s)
+fslash_handler :: forall s m. (Logger m, Characters s) => Parser s m (Token s)
-fslash_handler = error "TODO: Regex literal, division and division assignment"
+fslash_handler = do
  allowed <- lift $ get
  let re = case allowed of
        ExprNotAllowed -> empty
        ExprAllowed -> regex_literal
  choice [try re, try division_assign, division]
  where
    regex_literal :: Parser s m (Token s)
    regex_literal = do
      char '/'
      error "TODO"
      pure $ Literal $ Regex {}
    division_assign :: Parser s m (Token s)
    division_assign = (string "/=") *> (pure $ Punc $ DivAssign :: Parser s m (Token s))
    division :: Parser s m (Token s)
    division = char '/' *> (pure $ Punc $ Div :: Parser s m (Token s))
 punctuator :: (Logger m, Characters s) => Parser s m (Token s)
 punctuator =
@ -405,8 +448,10 @@ punctuator =
              try $ string "&&=" *> pure LogicalAndAssign <* exprAllowed,
              try $ string "||=" *> pure LogicalOrAssign <* exprAllowed,
              try $ string "??=" *> pure NullishAssign <* exprAllowed,
-              try $ string "++" *> pure Inc <* error "TODO: Ambiguous precrement vs postcrement",
+              -- best effort guess based on my usage that it'll always be postcrement
-              try $ string "--" *> pure Dec <* error "TODO: Ambiguous precrement postcrement",
+              -- Shouldn't come up in my use case though
              try $ string "++" *> pure Inc <* exprNotAllowed,
              try $ string "--" *> pure Dec <* exprNotAllowed,
              try $ string "?." *> (notFollowedBy digitChar) *> pure OptionalChain <* exprNotAllowed,
              try $ string "**" *> pure Exp <* exprAllowed,
              try $ string "<=" *> pure LTEQ <* exprAllowed,
@ -439,11 +484,12 @@ punctuator =
              char ';' *> pure Semicolon <* exprAllowed,
              char ',' *> pure Comma <* exprAllowed,
              char '!' *> pure LogicalNot <* exprAllowed,
-              -- HERE
+              -- Note: parens and curlies are unambiguously ambiguous
-              char '(' *> pure LParen <* exprNotAllowed,
+              -- Opening ones will generally allow an expression and closing ones will generally not allow an expression
              char '(' *> pure LParen <* exprAllowed,
              char ')' *> pure RParen <* exprNotAllowed,
-              char '{' *> pure LCurly <* error "TODO: Ambiguous",
+              char '{' *> pure LCurly <* exprAllowed,
-              char '}' *> pure RCurly <* error "TODO: Ambiguous",
+              char '}' *> pure RCurly <* exprNotAllowed,
              char '[' *> pure LSquare <* exprNotAllowed,
              char ']' *> pure RSquare <* exprNotAllowed,
              char '.' *> pure Dot <* exprNotAllowed
--- a/src/Utilities/Parsing.hs
+++ b/src/Utilities/Parsing.hs
@ -10,7 +10,7 @@ import Text.Megaparsec (ParsecT, Stream, Token, Tokens)
 type Parser = ParsecT Void
-class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s) => Characters s
+class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s, Monoid s) => Characters s
 class ToText t where
  toText :: t -> Text