more js tokenization work
This commit is contained in:
parent
0f44431086
commit
7df40c237c
1 changed files with 97 additions and 17 deletions
|
|
@ -10,12 +10,14 @@ where
|
||||||
|
|
||||||
import Control.Applicative (Alternative (many), optional, (<|>))
|
import Control.Applicative (Alternative (many), optional, (<|>))
|
||||||
import Data.Data (Proxy (Proxy))
|
import Data.Data (Proxy (Proxy))
|
||||||
|
import Data.Functor ((<&>))
|
||||||
import Data.Maybe (maybeToList)
|
import Data.Maybe (maybeToList)
|
||||||
import Data.String (IsString (fromString))
|
import Data.String (IsString (fromString))
|
||||||
import Data.Void (Void)
|
import Data.Void (Void)
|
||||||
import Logger
|
import Logger
|
||||||
import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, Stream (tokensToChunk), anySingle, choice, parse)
|
import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, Stream (tokensToChunk), anySingle, choice, parse)
|
||||||
import Text.Megaparsec.Char (hspace, newline, string)
|
import qualified Text.Megaparsec as MP
|
||||||
|
import Text.Megaparsec.Char (char, digitChar, hspace, letterChar, newline, string)
|
||||||
import Utilities.Parsing
|
import Utilities.Parsing
|
||||||
|
|
||||||
minify :: (Characters s) => [Token s] -> [Token s]
|
minify :: (Characters s) => [Token s] -> [Token s]
|
||||||
|
|
@ -30,8 +32,8 @@ minify = reduce_identifiers . remove_redundants
|
||||||
WhiteSpace -> False
|
WhiteSpace -> False
|
||||||
_ -> True
|
_ -> True
|
||||||
|
|
||||||
toTokens :: (Characters s) => s -> Either (ParseErrorBundle s Void) [Token s]
|
toTokens :: (Characters s) => String -> s -> Either (ParseErrorBundle s Void) [Token s]
|
||||||
toTokens = parse tokens ""
|
toTokens = parse tokens
|
||||||
|
|
||||||
displayToken :: (ToText s) => Token s -> s
|
displayToken :: (ToText s) => Token s -> s
|
||||||
displayToken _ = error "TODO"
|
displayToken _ = error "TODO"
|
||||||
|
|
@ -50,14 +52,15 @@ data Token s
|
||||||
| ReservedWord Reserved
|
| ReservedWord Reserved
|
||||||
| Literal (Literal s)
|
| Literal (Literal s)
|
||||||
| Punc Punctuator
|
| Punc Punctuator
|
||||||
|
deriving (Eq)
|
||||||
|
|
||||||
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield
|
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield deriving (Eq)
|
||||||
|
|
||||||
data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragment s)
|
data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragment s) deriving (Eq)
|
||||||
|
|
||||||
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s
|
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s deriving (Eq)
|
||||||
|
|
||||||
data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain
|
data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain deriving (Eq)
|
||||||
|
|
||||||
tokens :: (Logger m, Characters s) => Parser s m [Token s]
|
tokens :: (Logger m, Characters s) => Parser s m [Token s]
|
||||||
tokens = do
|
tokens = do
|
||||||
|
|
@ -138,26 +141,103 @@ reserved_word = choice [try await, try break, try case_, try catch_, try class_,
|
||||||
with = string "with" *> pure (ReservedWord With)
|
with = string "with" *> pure (ReservedWord With)
|
||||||
yield = string "yield" *> pure (ReservedWord Yield)
|
yield = string "yield" *> pure (ReservedWord Yield)
|
||||||
|
|
||||||
identifier :: (Logger m, Characters s) => Parser s m (Token s)
|
identifier :: forall s m. (Logger m, Characters s) => Parser s m (Token s)
|
||||||
identifier = do
|
identifier = do
|
||||||
first <- start_char
|
first <- start_char
|
||||||
rem <- many rem_char
|
rem <- many rem_char
|
||||||
let tmp = toString $ tokensToChunk (Proxy :: Proxy s) rem
|
pure $ Identifier $ fromString (first : rem)
|
||||||
pure $ Identifier $ fromString (first : tmp)
|
|
||||||
where
|
where
|
||||||
start_char :: Parser s m (Token s)
|
start_char :: Parser s m (MP.Token s)
|
||||||
start_char = error "TODO"
|
start_char = (char '$') <|> char '_' <|> letterChar
|
||||||
rem_char :: Parser s m (Token s)
|
rem_char :: Parser s m (MP.Token s)
|
||||||
rem_char = error "TODO"
|
rem_char = start_char <|> digitChar
|
||||||
|
|
||||||
private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
|
private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
|
||||||
private_identifier = error "TODO"
|
private_identifier =
|
||||||
|
char '#'
|
||||||
|
*> identifier
|
||||||
|
<&> \(Identifier i) -> PrivateIdentifier i
|
||||||
|
|
||||||
literal :: (Logger m, Characters s) => Parser s m (Token s)
|
literal :: (Logger m, Characters s) => Parser s m (Token s)
|
||||||
literal = error "TODO"
|
literal =
|
||||||
|
Literal
|
||||||
|
<$> ( choice
|
||||||
|
[ try template_fragment,
|
||||||
|
try string_lit,
|
||||||
|
num_lit
|
||||||
|
]
|
||||||
|
)
|
||||||
|
where
|
||||||
|
template_fragment = TemplateFragment <$> error "TODO"
|
||||||
|
string_lit = String <$> error "TODO"
|
||||||
|
num_lit = Number <$> (choice [try decimal_literal, try decimal_bigint, try plain_bigint, try normal_integer, octal_int])
|
||||||
|
decimal_literal = error "TODO"
|
||||||
|
decimal_bigint = error "TODO"
|
||||||
|
plain_bigint = error "TODO"
|
||||||
|
normal_integer = error "TODO"
|
||||||
|
octal_int = error "TODO"
|
||||||
|
|
||||||
|
fslash_handler :: (Logger m, Characters s) => Parser s m (Token s)
|
||||||
|
fslash_handler = error "TODO: Regex literal, division and division assignment"
|
||||||
|
|
||||||
punctuator :: (Logger m, Characters s) => Parser s m (Token s)
|
punctuator :: (Logger m, Characters s) => Parser s m (Token s)
|
||||||
punctuator = error "TODO"
|
punctuator =
|
||||||
|
Punc
|
||||||
|
<$> ( choice
|
||||||
|
[ try $ string ">>>=" *> pure UnsignedRightShiftAssign,
|
||||||
|
try $ string "..." *> pure Spread,
|
||||||
|
try $ string "===" *> pure TripleEqual,
|
||||||
|
try $ string "!==" *> pure DoubleNotEqual,
|
||||||
|
try $ string "<<=" *> pure LeftShiftAssign,
|
||||||
|
try $ string ">>=" *> pure RightShiftAssign,
|
||||||
|
try $ string ">>>" *> pure UnsignedRightShift,
|
||||||
|
try $ string "**=" *> pure ExpAssign,
|
||||||
|
try $ string "&&=" *> pure LogicalAndAssign,
|
||||||
|
try $ string "||=" *> pure LogicalOrAssign,
|
||||||
|
try $ string "??=" *> pure NullishAssign,
|
||||||
|
try $ string "?." *> (notFollowedBy digitChar) *> pure OptionalChain,
|
||||||
|
try $ string "**" *> pure Exp,
|
||||||
|
try $ string "++" *> pure Inc,
|
||||||
|
try $ string "--" *> pure Dec,
|
||||||
|
try $ string "<=" *> pure LTEQ,
|
||||||
|
try $ string ">=" *> pure GTEQ,
|
||||||
|
try $ string "==" *> pure DoubleEqual,
|
||||||
|
try $ string "!=" *> pure NotEqual,
|
||||||
|
try $ string "<<" *> pure LeftShift,
|
||||||
|
try $ string ">>" *> pure RightShift,
|
||||||
|
try $ string "+=" *> pure AddAssign,
|
||||||
|
try $ string "-=" *> pure SubAssign,
|
||||||
|
try $ string "*=" *> pure MultAssign,
|
||||||
|
try $ string "%=" *> pure ModAssign,
|
||||||
|
try $ string "&=" *> pure BitwiseAndAssign,
|
||||||
|
try $ string "|=" *> pure BitwiseOrAssign,
|
||||||
|
try $ string "^=" *> pure BitwiseXorAssign,
|
||||||
|
try $ string "&&" *> pure LogicalAnd,
|
||||||
|
try $ string "||" *> pure LogicalOr,
|
||||||
|
try $ string "??" *> pure Nullish,
|
||||||
|
char '+' *> pure Add,
|
||||||
|
char '-' *> pure Sub,
|
||||||
|
char '*' *> pure Mult,
|
||||||
|
char '%' *> pure Mod,
|
||||||
|
char '<' *> pure Utilities.Javascript.LT,
|
||||||
|
char '>' *> pure Utilities.Javascript.GT,
|
||||||
|
char '&' *> pure BitwiseAnd,
|
||||||
|
char '|' *> pure BitwiseOr,
|
||||||
|
char '^' *> pure BitwiseXor,
|
||||||
|
char '~' *> pure BitwiseNot,
|
||||||
|
char '=' *> pure Assign,
|
||||||
|
char '(' *> pure LParen,
|
||||||
|
char ')' *> pure RParen,
|
||||||
|
char '{' *> pure LCurly,
|
||||||
|
char '}' *> pure RCurly,
|
||||||
|
char '[' *> pure LSquare,
|
||||||
|
char ']' *> pure RSquare,
|
||||||
|
char '.' *> pure Dot,
|
||||||
|
char ';' *> pure Semicolon,
|
||||||
|
char ',' *> pure Comma,
|
||||||
|
char '!' *> pure LogicalNot
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
linebreak :: (Logger m, Characters s) => Parser s m (Token s)
|
linebreak :: (Logger m, Characters s) => Parser s m (Token s)
|
||||||
linebreak = newline *> pure WhiteSpace
|
linebreak = newline *> pure WhiteSpace
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue