javascript tokenization continues oh boy identifier chars allowed is going to be fun though

This commit is contained in:
Pagwin 2025-12-26 22:27:12 -05:00
parent aeb70675a9
commit e03bd6c2ef
No known key found for this signature in database
GPG key ID: 81137023740CA260
2 changed files with 97 additions and 26 deletions

View file

@ -1,12 +1,18 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Utilities.Javascript
( minify,
)
where
import Control.Applicative (Alternative (many), optional)
import Control.Applicative (Alternative (many), optional, (<|>))
import Data.Data (Proxy (Proxy))
import Data.Maybe (maybeToList)
import Data.String (IsString (fromString))
import Logger
import Text.Megaparsec (MonadParsec (try), choice)
import Text.Megaparsec (MonadParsec (notFollowedBy, try), Stream (tokensToChunk), anySingle, choice)
import Text.Megaparsec.Char (hspace, newline, string)
import Utilities.Parsing
minify :: String -> String
@ -25,18 +31,7 @@ data Token s
| PrivateIdentifier s
| ReservedWord Reserved
| Literal (Literal s)
| LParen
| RParen
| LCurly
| RCurly
| LSquare
| RSquare
| Dot
| Spread
| Semicolon
| Comma
| OptionalChain
| Operator Operator
| Punc Punctuator
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield
@ -44,7 +39,7 @@ data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragm
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s
data Operator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign
data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain
tokens :: (Logger m, Characters s) => Parser s m [Token s]
tokens = do
@ -56,28 +51,86 @@ token :: (Logger m, Characters s) => Parser s m (Token s)
token =
choice
[ try comment,
try operator,
try reserved_word,
try identifier,
try private_identifier,
try literal,
try punctuator
try punctuator,
try linebreak,
whitespace
]
hashbang_comment :: (Logger m, Characters s) => Parser s m (Token s)
hashbang_comment = error "TODO"
hashbang_comment = do
string "#!"
many ((notFollowedBy newline) *> anySingle)
pure HashBangComment
comment :: (Logger m, Characters s) => Parser s m (Token s)
comment = error "TODO"
operator :: (Logger m, Characters s) => Parser s m (Token s)
operator = error "TODO"
comment = (try singleline_com) <|> multiline_com
where
singleline_com = do
string "//"
many ((notFollowedBy newline) *> anySingle)
pure SingleLineComment
multiline_com = do
string "/*"
many ((notFollowedBy $ string "*/") *> anySingle)
pure MultiLineComment
reserved_word :: (Logger m, Characters s) => Parser s m (Token s)
reserved_word = error "TODO"
reserved_word = choice [try await, try break, try case_, try catch_, try class_, try const, try continue, try debugger, try default_, try delete, try do_, try else_, try enum, try export, try extends, try false, try finally_, try for_, try function, try if_, try import_, try in_, try instanceof, try new, try null, try return, try super, try switch, try this, try throw_, try true, try try_, try typeof, try var, try void, try while, try with, yield]
where
await = string "await" *> pure (ReservedWord Await)
break = string "break" *> pure (ReservedWord Break)
case_ = string "case" *> pure (ReservedWord Case)
catch_ = string "catch" *> pure (ReservedWord Catch)
class_ = string "class" *> pure (ReservedWord Class)
const = string "const" *> pure (ReservedWord Const)
continue = string "continue" *> pure (ReservedWord Continue)
debugger = string "debugger" *> pure (ReservedWord Debugger)
default_ = string "default" *> pure (ReservedWord Default)
delete = string "delete" *> pure (ReservedWord Delete)
do_ = string "do" *> pure (ReservedWord Do)
else_ = string "else" *> pure (ReservedWord Else)
enum = string "enum" *> pure (ReservedWord Enum)
export = string "export" *> pure (ReservedWord Export)
extends = string "extends" *> pure (ReservedWord Extends)
false = string "false" *> pure (ReservedWord FalseVal)
finally_ = string "finally" *> pure (ReservedWord Finally)
for_ = string "for" *> pure (ReservedWord For)
function = string "function" *> pure (ReservedWord Function)
if_ = string "if" *> pure (ReservedWord If)
import_ = string "import" *> pure (ReservedWord Import)
in_ = string "in" *> pure (ReservedWord In)
instanceof = string "instanceof" *> pure (ReservedWord Instanceof)
new = string "new" *> pure (ReservedWord New)
null = string "null" *> pure (ReservedWord Null)
return = string "return" *> pure (ReservedWord Return)
super = string "super" *> pure (ReservedWord Super)
switch = string "switch" *> pure (ReservedWord Switch)
this = string "this" *> pure (ReservedWord This)
throw_ = string "throw" *> pure (ReservedWord Throw)
true = string "true" *> pure (ReservedWord TrueVal)
try_ = string "try" *> pure (ReservedWord Try)
typeof = string "typeof" *> pure (ReservedWord Typeof)
var = string "var" *> pure (ReservedWord Var)
void = string "void" *> pure (ReservedWord Void)
while = string "while" *> pure (ReservedWord While)
with = string "with" *> pure (ReservedWord With)
yield = string "yield" *> pure (ReservedWord Yield)
identifier :: (Logger m, Characters s) => Parser s m (Token s)
identifier = error "TODO"
identifier = do
first <- start_char
rem <- many rem_char
let tmp = toString $ tokensToChunk (Proxy :: Proxy s) rem
pure $ Identifier $ fromString (first : tmp)
where
start_char :: Parser s m (Token s)
start_char = error "TODO"
rem_char :: Parser s m (Token s)
rem_char = error "TODO"
private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
private_identifier = error "TODO"
@ -87,3 +140,9 @@ literal = error "TODO"
punctuator :: (Logger m, Characters s) => Parser s m (Token s)
punctuator = error "TODO"
linebreak :: (Logger m, Characters s) => Parser s m (Token s)
linebreak = newline *> pure WhiteSpace
whitespace :: (Logger m, Characters s) => Parser s m (Token s)
whitespace = hspace *> pure WhiteSpace

View file

@ -4,23 +4,35 @@
module Utilities.Parsing where
import Data.String (IsString)
import Data.Text (Text, pack)
import Data.Text (Text, pack, unpack)
import Data.Void (Void)
import Text.Megaparsec (ParsecT, Stream, Token, Tokens)
type Parser = ParsecT Void
class (Token s ~ Char, Stream s, ToText (Tokens s), IsString (Tokens s), Monoid (Tokens s), Eq (Tokens s), Show s) => Characters s
class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s) => Characters s
class ToText t where
toText :: t -> Text
fromText :: Text -> t
toString :: t -> String
class ToChar c where
toChar :: c -> Char
instance ToChar Char where
toChar = id
instance Characters Text
instance ToText Text where
toText = id
fromText = id
toString = unpack
instance Characters String
instance ToText String where
toText = pack
fromText = unpack
toString = id