more js tokenization work
This commit is contained in:
parent
0f44431086
commit
7df40c237c
1 changed files with 97 additions and 17 deletions
|
|
@ -10,12 +10,14 @@ where
|
|||
|
||||
import Control.Applicative (Alternative (many), optional, (<|>))
|
||||
import Data.Data (Proxy (Proxy))
|
||||
import Data.Functor ((<&>))
|
||||
import Data.Maybe (maybeToList)
|
||||
import Data.String (IsString (fromString))
|
||||
import Data.Void (Void)
|
||||
import Logger
|
||||
import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, Stream (tokensToChunk), anySingle, choice, parse)
|
||||
import Text.Megaparsec.Char (hspace, newline, string)
|
||||
import qualified Text.Megaparsec as MP
|
||||
import Text.Megaparsec.Char (char, digitChar, hspace, letterChar, newline, string)
|
||||
import Utilities.Parsing
|
||||
|
||||
minify :: (Characters s) => [Token s] -> [Token s]
|
||||
|
|
@ -30,8 +32,8 @@ minify = reduce_identifiers . remove_redundants
|
|||
WhiteSpace -> False
|
||||
_ -> True
|
||||
|
||||
toTokens :: (Characters s) => s -> Either (ParseErrorBundle s Void) [Token s]
|
||||
toTokens = parse tokens ""
|
||||
toTokens :: (Characters s) => String -> s -> Either (ParseErrorBundle s Void) [Token s]
|
||||
toTokens = parse tokens
|
||||
|
||||
displayToken :: (ToText s) => Token s -> s
|
||||
displayToken _ = error "TODO"
|
||||
|
|
@ -50,14 +52,15 @@ data Token s
|
|||
| ReservedWord Reserved
|
||||
| Literal (Literal s)
|
||||
| Punc Punctuator
|
||||
deriving (Eq)
|
||||
|
||||
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield
|
||||
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield deriving (Eq)
|
||||
|
||||
data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragment s)
|
||||
data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragment s) deriving (Eq)
|
||||
|
||||
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s
|
||||
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s deriving (Eq)
|
||||
|
||||
data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain
|
||||
data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain deriving (Eq)
|
||||
|
||||
tokens :: (Logger m, Characters s) => Parser s m [Token s]
|
||||
tokens = do
|
||||
|
|
@ -138,26 +141,103 @@ reserved_word = choice [try await, try break, try case_, try catch_, try class_,
|
|||
with = string "with" *> pure (ReservedWord With)
|
||||
yield = string "yield" *> pure (ReservedWord Yield)
|
||||
|
||||
identifier :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
identifier :: forall s m. (Logger m, Characters s) => Parser s m (Token s)
|
||||
identifier = do
|
||||
first <- start_char
|
||||
rem <- many rem_char
|
||||
let tmp = toString $ tokensToChunk (Proxy :: Proxy s) rem
|
||||
pure $ Identifier $ fromString (first : tmp)
|
||||
pure $ Identifier $ fromString (first : rem)
|
||||
where
|
||||
start_char :: Parser s m (Token s)
|
||||
start_char = error "TODO"
|
||||
rem_char :: Parser s m (Token s)
|
||||
rem_char = error "TODO"
|
||||
start_char :: Parser s m (MP.Token s)
|
||||
start_char = (char '$') <|> char '_' <|> letterChar
|
||||
rem_char :: Parser s m (MP.Token s)
|
||||
rem_char = start_char <|> digitChar
|
||||
|
||||
private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
private_identifier = error "TODO"
|
||||
private_identifier =
|
||||
char '#'
|
||||
*> identifier
|
||||
<&> \(Identifier i) -> PrivateIdentifier i
|
||||
|
||||
literal :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
literal = error "TODO"
|
||||
literal =
|
||||
Literal
|
||||
<$> ( choice
|
||||
[ try template_fragment,
|
||||
try string_lit,
|
||||
num_lit
|
||||
]
|
||||
)
|
||||
where
|
||||
template_fragment = TemplateFragment <$> error "TODO"
|
||||
string_lit = String <$> error "TODO"
|
||||
num_lit = Number <$> (choice [try decimal_literal, try decimal_bigint, try plain_bigint, try normal_integer, octal_int])
|
||||
decimal_literal = error "TODO"
|
||||
decimal_bigint = error "TODO"
|
||||
plain_bigint = error "TODO"
|
||||
normal_integer = error "TODO"
|
||||
octal_int = error "TODO"
|
||||
|
||||
fslash_handler :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
fslash_handler = error "TODO: Regex literal, division and division assignment"
|
||||
|
||||
punctuator :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
punctuator = error "TODO"
|
||||
punctuator =
|
||||
Punc
|
||||
<$> ( choice
|
||||
[ try $ string ">>>=" *> pure UnsignedRightShiftAssign,
|
||||
try $ string "..." *> pure Spread,
|
||||
try $ string "===" *> pure TripleEqual,
|
||||
try $ string "!==" *> pure DoubleNotEqual,
|
||||
try $ string "<<=" *> pure LeftShiftAssign,
|
||||
try $ string ">>=" *> pure RightShiftAssign,
|
||||
try $ string ">>>" *> pure UnsignedRightShift,
|
||||
try $ string "**=" *> pure ExpAssign,
|
||||
try $ string "&&=" *> pure LogicalAndAssign,
|
||||
try $ string "||=" *> pure LogicalOrAssign,
|
||||
try $ string "??=" *> pure NullishAssign,
|
||||
try $ string "?." *> (notFollowedBy digitChar) *> pure OptionalChain,
|
||||
try $ string "**" *> pure Exp,
|
||||
try $ string "++" *> pure Inc,
|
||||
try $ string "--" *> pure Dec,
|
||||
try $ string "<=" *> pure LTEQ,
|
||||
try $ string ">=" *> pure GTEQ,
|
||||
try $ string "==" *> pure DoubleEqual,
|
||||
try $ string "!=" *> pure NotEqual,
|
||||
try $ string "<<" *> pure LeftShift,
|
||||
try $ string ">>" *> pure RightShift,
|
||||
try $ string "+=" *> pure AddAssign,
|
||||
try $ string "-=" *> pure SubAssign,
|
||||
try $ string "*=" *> pure MultAssign,
|
||||
try $ string "%=" *> pure ModAssign,
|
||||
try $ string "&=" *> pure BitwiseAndAssign,
|
||||
try $ string "|=" *> pure BitwiseOrAssign,
|
||||
try $ string "^=" *> pure BitwiseXorAssign,
|
||||
try $ string "&&" *> pure LogicalAnd,
|
||||
try $ string "||" *> pure LogicalOr,
|
||||
try $ string "??" *> pure Nullish,
|
||||
char '+' *> pure Add,
|
||||
char '-' *> pure Sub,
|
||||
char '*' *> pure Mult,
|
||||
char '%' *> pure Mod,
|
||||
char '<' *> pure Utilities.Javascript.LT,
|
||||
char '>' *> pure Utilities.Javascript.GT,
|
||||
char '&' *> pure BitwiseAnd,
|
||||
char '|' *> pure BitwiseOr,
|
||||
char '^' *> pure BitwiseXor,
|
||||
char '~' *> pure BitwiseNot,
|
||||
char '=' *> pure Assign,
|
||||
char '(' *> pure LParen,
|
||||
char ')' *> pure RParen,
|
||||
char '{' *> pure LCurly,
|
||||
char '}' *> pure RCurly,
|
||||
char '[' *> pure LSquare,
|
||||
char ']' *> pure RSquare,
|
||||
char '.' *> pure Dot,
|
||||
char ';' *> pure Semicolon,
|
||||
char ',' *> pure Comma,
|
||||
char '!' *> pure LogicalNot
|
||||
]
|
||||
)
|
||||
|
||||
linebreak :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
linebreak = newline *> pure WhiteSpace
|
||||
|
|
|
|||
Loading…
Reference in a new issue