javascript tokenization continues oh boy identifier chars allowed is going to be fun though

This commit is contained in:
Pagwin 2025-12-26 22:27:12 -05:00
parent aeb70675a9
commit e03bd6c2ef
No known key found for this signature in database
GPG key ID: 81137023740CA260
2 changed files with 97 additions and 26 deletions

View file

@ -1,12 +1,18 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Utilities.Javascript module Utilities.Javascript
( minify, ( minify,
) )
where where
import Control.Applicative (Alternative (many), optional) import Control.Applicative (Alternative (many), optional, (<|>))
import Data.Data (Proxy (Proxy))
import Data.Maybe (maybeToList) import Data.Maybe (maybeToList)
import Data.String (IsString (fromString))
import Logger import Logger
import Text.Megaparsec (MonadParsec (try), choice) import Text.Megaparsec (MonadParsec (notFollowedBy, try), Stream (tokensToChunk), anySingle, choice)
import Text.Megaparsec.Char (hspace, newline, string)
import Utilities.Parsing import Utilities.Parsing
minify :: String -> String minify :: String -> String
@ -25,18 +31,7 @@ data Token s
| PrivateIdentifier s | PrivateIdentifier s
| ReservedWord Reserved | ReservedWord Reserved
| Literal (Literal s) | Literal (Literal s)
| LParen | Punc Punctuator
| RParen
| LCurly
| RCurly
| LSquare
| RSquare
| Dot
| Spread
| Semicolon
| Comma
| OptionalChain
| Operator Operator
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield
@ -44,7 +39,7 @@ data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragm
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s
data Operator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain
tokens :: (Logger m, Characters s) => Parser s m [Token s] tokens :: (Logger m, Characters s) => Parser s m [Token s]
tokens = do tokens = do
@ -56,28 +51,86 @@ token :: (Logger m, Characters s) => Parser s m (Token s)
token = token =
choice choice
[ try comment, [ try comment,
try operator,
try reserved_word, try reserved_word,
try identifier, try identifier,
try private_identifier, try private_identifier,
try literal, try literal,
try punctuator try punctuator,
try linebreak,
whitespace
] ]
hashbang_comment :: (Logger m, Characters s) => Parser s m (Token s) hashbang_comment :: (Logger m, Characters s) => Parser s m (Token s)
hashbang_comment = error "TODO" hashbang_comment = do
string "#!"
many ((notFollowedBy newline) *> anySingle)
pure HashBangComment
comment :: (Logger m, Characters s) => Parser s m (Token s) comment :: (Logger m, Characters s) => Parser s m (Token s)
comment = error "TODO" comment = (try singleline_com) <|> multiline_com
where
operator :: (Logger m, Characters s) => Parser s m (Token s) singleline_com = do
operator = error "TODO" string "//"
many ((notFollowedBy newline) *> anySingle)
pure SingleLineComment
multiline_com = do
string "/*"
many ((notFollowedBy $ string "*/") *> anySingle)
pure MultiLineComment
reserved_word :: (Logger m, Characters s) => Parser s m (Token s) reserved_word :: (Logger m, Characters s) => Parser s m (Token s)
reserved_word = error "TODO" reserved_word = choice [try await, try break, try case_, try catch_, try class_, try const, try continue, try debugger, try default_, try delete, try do_, try else_, try enum, try export, try extends, try false, try finally_, try for_, try function, try if_, try import_, try in_, try instanceof, try new, try null, try return, try super, try switch, try this, try throw_, try true, try try_, try typeof, try var, try void, try while, try with, yield]
where
await = string "await" *> pure (ReservedWord Await)
break = string "break" *> pure (ReservedWord Break)
case_ = string "case" *> pure (ReservedWord Case)
catch_ = string "catch" *> pure (ReservedWord Catch)
class_ = string "class" *> pure (ReservedWord Class)
const = string "const" *> pure (ReservedWord Const)
continue = string "continue" *> pure (ReservedWord Continue)
debugger = string "debugger" *> pure (ReservedWord Debugger)
default_ = string "default" *> pure (ReservedWord Default)
delete = string "delete" *> pure (ReservedWord Delete)
do_ = string "do" *> pure (ReservedWord Do)
else_ = string "else" *> pure (ReservedWord Else)
enum = string "enum" *> pure (ReservedWord Enum)
export = string "export" *> pure (ReservedWord Export)
extends = string "extends" *> pure (ReservedWord Extends)
false = string "false" *> pure (ReservedWord FalseVal)
finally_ = string "finally" *> pure (ReservedWord Finally)
for_ = string "for" *> pure (ReservedWord For)
function = string "function" *> pure (ReservedWord Function)
if_ = string "if" *> pure (ReservedWord If)
import_ = string "import" *> pure (ReservedWord Import)
in_ = string "in" *> pure (ReservedWord In)
instanceof = string "instanceof" *> pure (ReservedWord Instanceof)
new = string "new" *> pure (ReservedWord New)
null = string "null" *> pure (ReservedWord Null)
return = string "return" *> pure (ReservedWord Return)
super = string "super" *> pure (ReservedWord Super)
switch = string "switch" *> pure (ReservedWord Switch)
this = string "this" *> pure (ReservedWord This)
throw_ = string "throw" *> pure (ReservedWord Throw)
true = string "true" *> pure (ReservedWord TrueVal)
try_ = string "try" *> pure (ReservedWord Try)
typeof = string "typeof" *> pure (ReservedWord Typeof)
var = string "var" *> pure (ReservedWord Var)
void = string "void" *> pure (ReservedWord Void)
while = string "while" *> pure (ReservedWord While)
with = string "with" *> pure (ReservedWord With)
yield = string "yield" *> pure (ReservedWord Yield)
identifier :: (Logger m, Characters s) => Parser s m (Token s) identifier :: (Logger m, Characters s) => Parser s m (Token s)
identifier = error "TODO" identifier = do
first <- start_char
rem <- many rem_char
let tmp = toString $ tokensToChunk (Proxy :: Proxy s) rem
pure $ Identifier $ fromString (first : tmp)
where
start_char :: Parser s m (Token s)
start_char = error "TODO"
rem_char :: Parser s m (Token s)
rem_char = error "TODO"
private_identifier :: (Logger m, Characters s) => Parser s m (Token s) private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
private_identifier = error "TODO" private_identifier = error "TODO"
@ -87,3 +140,9 @@ literal = error "TODO"
punctuator :: (Logger m, Characters s) => Parser s m (Token s) punctuator :: (Logger m, Characters s) => Parser s m (Token s)
punctuator = error "TODO" punctuator = error "TODO"
linebreak :: (Logger m, Characters s) => Parser s m (Token s)
linebreak = newline *> pure WhiteSpace
whitespace :: (Logger m, Characters s) => Parser s m (Token s)
whitespace = hspace *> pure WhiteSpace

View file

@ -4,23 +4,35 @@
module Utilities.Parsing where module Utilities.Parsing where
import Data.String (IsString) import Data.String (IsString)
import Data.Text (Text, pack) import Data.Text (Text, pack, unpack)
import Data.Void (Void) import Data.Void (Void)
import Text.Megaparsec (ParsecT, Stream, Token, Tokens) import Text.Megaparsec (ParsecT, Stream, Token, Tokens)
type Parser = ParsecT Void type Parser = ParsecT Void
class (Token s ~ Char, Stream s, ToText (Tokens s), IsString (Tokens s), Monoid (Tokens s), Eq (Tokens s), Show s) => Characters s class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s) => Characters s
class ToText t where class ToText t where
toText :: t -> Text toText :: t -> Text
fromText :: Text -> t
toString :: t -> String
class ToChar c where
toChar :: c -> Char
instance ToChar Char where
toChar = id
instance Characters Text instance Characters Text
instance ToText Text where instance ToText Text where
toText = id toText = id
fromText = id
toString = unpack
instance Characters String instance Characters String
instance ToText String where instance ToText String where
toText = pack toText = pack
fromText = unpack
toString = id