From 7df40c237cd348b382697111521e9482954ebc16 Mon Sep 17 00:00:00 2001 From: Pagwin Date: Sun, 28 Dec 2025 20:38:13 -0500 Subject: [PATCH] more js tokenization work --- src/Utilities/Javascript.hs | 114 ++++++++++++++++++++++++++++++------ 1 file changed, 97 insertions(+), 17 deletions(-) diff --git a/src/Utilities/Javascript.hs b/src/Utilities/Javascript.hs index b2027e4..e2ac161 100644 --- a/src/Utilities/Javascript.hs +++ b/src/Utilities/Javascript.hs @@ -10,12 +10,14 @@ where import Control.Applicative (Alternative (many), optional, (<|>)) import Data.Data (Proxy (Proxy)) +import Data.Functor ((<&>)) import Data.Maybe (maybeToList) import Data.String (IsString (fromString)) import Data.Void (Void) import Logger import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, Stream (tokensToChunk), anySingle, choice, parse) -import Text.Megaparsec.Char (hspace, newline, string) +import qualified Text.Megaparsec as MP +import Text.Megaparsec.Char (char, digitChar, hspace, letterChar, newline, string) import Utilities.Parsing minify :: (Characters s) => [Token s] -> [Token s] @@ -30,8 +32,8 @@ minify = reduce_identifiers . remove_redundants WhiteSpace -> False _ -> True -toTokens :: (Characters s) => s -> Either (ParseErrorBundle s Void) [Token s] -toTokens = parse tokens "" +toTokens :: (Characters s) => String -> s -> Either (ParseErrorBundle s Void) [Token s] +toTokens = parse tokens displayToken :: (ToText s) => Token s -> s displayToken _ = error "TODO" @@ -50,14 +52,15 @@ data Token s | ReservedWord Reserved | Literal (Literal s) | Punc Punctuator + deriving (Eq) -data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield +data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield deriving (Eq) -data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragment s) +data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragment s) deriving (Eq) -data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s +data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s deriving (Eq) -data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain +data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain deriving (Eq) tokens :: (Logger m, Characters s) => Parser s m [Token s] tokens = do @@ -138,26 +141,103 @@ reserved_word = choice [try await, try break, try case_, try catch_, try class_, with = string "with" *> pure (ReservedWord With) yield = string "yield" *> pure (ReservedWord Yield) -identifier :: (Logger m, Characters s) => Parser s m (Token s) +identifier :: forall s m. (Logger m, Characters s) => Parser s m (Token s) identifier = do first <- start_char rem <- many rem_char - let tmp = toString $ tokensToChunk (Proxy :: Proxy s) rem - pure $ Identifier $ fromString (first : tmp) + pure $ Identifier $ fromString (first : rem) where - start_char :: Parser s m (Token s) - start_char = error "TODO" - rem_char :: Parser s m (Token s) - rem_char = error "TODO" + start_char :: Parser s m (MP.Token s) + start_char = (char '$') <|> char '_' <|> letterChar + rem_char :: Parser s m (MP.Token s) + rem_char = start_char <|> digitChar private_identifier :: (Logger m, Characters s) => Parser s m (Token s) -private_identifier = error "TODO" +private_identifier = + char '#' + *> identifier + <&> \(Identifier i) -> PrivateIdentifier i literal :: (Logger m, Characters s) => Parser s m (Token s) -literal = error "TODO" +literal = + Literal + <$> ( choice + [ try template_fragment, + try string_lit, + num_lit + ] + ) + where + template_fragment = TemplateFragment <$> error "TODO" + string_lit = String <$> error "TODO" + num_lit = Number <$> (choice [try decimal_literal, try decimal_bigint, try plain_bigint, try normal_integer, octal_int]) + decimal_literal = error "TODO" + decimal_bigint = error "TODO" + plain_bigint = error "TODO" + normal_integer = error "TODO" + octal_int = error "TODO" + +fslash_handler :: (Logger m, Characters s) => Parser s m (Token s) +fslash_handler = error "TODO: Regex literal, division and division assignment" punctuator :: (Logger m, Characters s) => Parser s m (Token s) -punctuator = error "TODO" +punctuator = + Punc + <$> ( choice + [ try $ string ">>>=" *> pure UnsignedRightShiftAssign, + try $ string "..." *> pure Spread, + try $ string "===" *> pure TripleEqual, + try $ string "!==" *> pure DoubleNotEqual, + try $ string "<<=" *> pure LeftShiftAssign, + try $ string ">>=" *> pure RightShiftAssign, + try $ string ">>>" *> pure UnsignedRightShift, + try $ string "**=" *> pure ExpAssign, + try $ string "&&=" *> pure LogicalAndAssign, + try $ string "||=" *> pure LogicalOrAssign, + try $ string "??=" *> pure NullishAssign, + try $ string "?." *> (notFollowedBy digitChar) *> pure OptionalChain, + try $ string "**" *> pure Exp, + try $ string "++" *> pure Inc, + try $ string "--" *> pure Dec, + try $ string "<=" *> pure LTEQ, + try $ string ">=" *> pure GTEQ, + try $ string "==" *> pure DoubleEqual, + try $ string "!=" *> pure NotEqual, + try $ string "<<" *> pure LeftShift, + try $ string ">>" *> pure RightShift, + try $ string "+=" *> pure AddAssign, + try $ string "-=" *> pure SubAssign, + try $ string "*=" *> pure MultAssign, + try $ string "%=" *> pure ModAssign, + try $ string "&=" *> pure BitwiseAndAssign, + try $ string "|=" *> pure BitwiseOrAssign, + try $ string "^=" *> pure BitwiseXorAssign, + try $ string "&&" *> pure LogicalAnd, + try $ string "||" *> pure LogicalOr, + try $ string "??" *> pure Nullish, + char '+' *> pure Add, + char '-' *> pure Sub, + char '*' *> pure Mult, + char '%' *> pure Mod, + char '<' *> pure Utilities.Javascript.LT, + char '>' *> pure Utilities.Javascript.GT, + char '&' *> pure BitwiseAnd, + char '|' *> pure BitwiseOr, + char '^' *> pure BitwiseXor, + char '~' *> pure BitwiseNot, + char '=' *> pure Assign, + char '(' *> pure LParen, + char ')' *> pure RParen, + char '{' *> pure LCurly, + char '}' *> pure RCurly, + char '[' *> pure LSquare, + char ']' *> pure RSquare, + char '.' *> pure Dot, + char ';' *> pure Semicolon, + char ',' *> pure Comma, + char '!' *> pure LogicalNot + ] + ) linebreak :: (Logger m, Characters s) => Parser s m (Token s) linebreak = newline *> pure WhiteSpace