javascript tokenization continues oh boy identifier chars allowed is going to be fun though
This commit is contained in:
parent
aeb70675a9
commit
e03bd6c2ef
2 changed files with 97 additions and 26 deletions
|
|
@ -1,12 +1,18 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
|
||||
module Utilities.Javascript
|
||||
( minify,
|
||||
)
|
||||
where
|
||||
|
||||
import Control.Applicative (Alternative (many), optional)
|
||||
import Control.Applicative (Alternative (many), optional, (<|>))
|
||||
import Data.Data (Proxy (Proxy))
|
||||
import Data.Maybe (maybeToList)
|
||||
import Data.String (IsString (fromString))
|
||||
import Logger
|
||||
import Text.Megaparsec (MonadParsec (try), choice)
|
||||
import Text.Megaparsec (MonadParsec (notFollowedBy, try), Stream (tokensToChunk), anySingle, choice)
|
||||
import Text.Megaparsec.Char (hspace, newline, string)
|
||||
import Utilities.Parsing
|
||||
|
||||
minify :: String -> String
|
||||
|
|
@ -25,18 +31,7 @@ data Token s
|
|||
| PrivateIdentifier s
|
||||
| ReservedWord Reserved
|
||||
| Literal (Literal s)
|
||||
| LParen
|
||||
| RParen
|
||||
| LCurly
|
||||
| RCurly
|
||||
| LSquare
|
||||
| RSquare
|
||||
| Dot
|
||||
| Spread
|
||||
| Semicolon
|
||||
| Comma
|
||||
| OptionalChain
|
||||
| Operator Operator
|
||||
| Punc Punctuator
|
||||
|
||||
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield
|
||||
|
||||
|
|
@ -44,7 +39,7 @@ data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragm
|
|||
|
||||
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s
|
||||
|
||||
data Operator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign
|
||||
data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain
|
||||
|
||||
tokens :: (Logger m, Characters s) => Parser s m [Token s]
|
||||
tokens = do
|
||||
|
|
@ -56,28 +51,86 @@ token :: (Logger m, Characters s) => Parser s m (Token s)
|
|||
token =
|
||||
choice
|
||||
[ try comment,
|
||||
try operator,
|
||||
try reserved_word,
|
||||
try identifier,
|
||||
try private_identifier,
|
||||
try literal,
|
||||
try punctuator
|
||||
try punctuator,
|
||||
try linebreak,
|
||||
whitespace
|
||||
]
|
||||
|
||||
hashbang_comment :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
hashbang_comment = error "TODO"
|
||||
hashbang_comment = do
|
||||
string "#!"
|
||||
many ((notFollowedBy newline) *> anySingle)
|
||||
pure HashBangComment
|
||||
|
||||
comment :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
comment = error "TODO"
|
||||
|
||||
operator :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
operator = error "TODO"
|
||||
comment = (try singleline_com) <|> multiline_com
|
||||
where
|
||||
singleline_com = do
|
||||
string "//"
|
||||
many ((notFollowedBy newline) *> anySingle)
|
||||
pure SingleLineComment
|
||||
multiline_com = do
|
||||
string "/*"
|
||||
many ((notFollowedBy $ string "*/") *> anySingle)
|
||||
pure MultiLineComment
|
||||
|
||||
reserved_word :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
reserved_word = error "TODO"
|
||||
reserved_word = choice [try await, try break, try case_, try catch_, try class_, try const, try continue, try debugger, try default_, try delete, try do_, try else_, try enum, try export, try extends, try false, try finally_, try for_, try function, try if_, try import_, try in_, try instanceof, try new, try null, try return, try super, try switch, try this, try throw_, try true, try try_, try typeof, try var, try void, try while, try with, yield]
|
||||
where
|
||||
await = string "await" *> pure (ReservedWord Await)
|
||||
break = string "break" *> pure (ReservedWord Break)
|
||||
case_ = string "case" *> pure (ReservedWord Case)
|
||||
catch_ = string "catch" *> pure (ReservedWord Catch)
|
||||
class_ = string "class" *> pure (ReservedWord Class)
|
||||
const = string "const" *> pure (ReservedWord Const)
|
||||
continue = string "continue" *> pure (ReservedWord Continue)
|
||||
debugger = string "debugger" *> pure (ReservedWord Debugger)
|
||||
default_ = string "default" *> pure (ReservedWord Default)
|
||||
delete = string "delete" *> pure (ReservedWord Delete)
|
||||
do_ = string "do" *> pure (ReservedWord Do)
|
||||
else_ = string "else" *> pure (ReservedWord Else)
|
||||
enum = string "enum" *> pure (ReservedWord Enum)
|
||||
export = string "export" *> pure (ReservedWord Export)
|
||||
extends = string "extends" *> pure (ReservedWord Extends)
|
||||
false = string "false" *> pure (ReservedWord FalseVal)
|
||||
finally_ = string "finally" *> pure (ReservedWord Finally)
|
||||
for_ = string "for" *> pure (ReservedWord For)
|
||||
function = string "function" *> pure (ReservedWord Function)
|
||||
if_ = string "if" *> pure (ReservedWord If)
|
||||
import_ = string "import" *> pure (ReservedWord Import)
|
||||
in_ = string "in" *> pure (ReservedWord In)
|
||||
instanceof = string "instanceof" *> pure (ReservedWord Instanceof)
|
||||
new = string "new" *> pure (ReservedWord New)
|
||||
null = string "null" *> pure (ReservedWord Null)
|
||||
return = string "return" *> pure (ReservedWord Return)
|
||||
super = string "super" *> pure (ReservedWord Super)
|
||||
switch = string "switch" *> pure (ReservedWord Switch)
|
||||
this = string "this" *> pure (ReservedWord This)
|
||||
throw_ = string "throw" *> pure (ReservedWord Throw)
|
||||
true = string "true" *> pure (ReservedWord TrueVal)
|
||||
try_ = string "try" *> pure (ReservedWord Try)
|
||||
typeof = string "typeof" *> pure (ReservedWord Typeof)
|
||||
var = string "var" *> pure (ReservedWord Var)
|
||||
void = string "void" *> pure (ReservedWord Void)
|
||||
while = string "while" *> pure (ReservedWord While)
|
||||
with = string "with" *> pure (ReservedWord With)
|
||||
yield = string "yield" *> pure (ReservedWord Yield)
|
||||
|
||||
identifier :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
identifier = error "TODO"
|
||||
identifier = do
|
||||
first <- start_char
|
||||
rem <- many rem_char
|
||||
let tmp = toString $ tokensToChunk (Proxy :: Proxy s) rem
|
||||
pure $ Identifier $ fromString (first : tmp)
|
||||
where
|
||||
start_char :: Parser s m (Token s)
|
||||
start_char = error "TODO"
|
||||
rem_char :: Parser s m (Token s)
|
||||
rem_char = error "TODO"
|
||||
|
||||
private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
private_identifier = error "TODO"
|
||||
|
|
@ -87,3 +140,9 @@ literal = error "TODO"
|
|||
|
||||
punctuator :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
punctuator = error "TODO"
|
||||
|
||||
linebreak :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
linebreak = newline *> pure WhiteSpace
|
||||
|
||||
whitespace :: (Logger m, Characters s) => Parser s m (Token s)
|
||||
whitespace = hspace *> pure WhiteSpace
|
||||
|
|
|
|||
|
|
@ -4,23 +4,35 @@
|
|||
module Utilities.Parsing where
|
||||
|
||||
import Data.String (IsString)
|
||||
import Data.Text (Text, pack)
|
||||
import Data.Text (Text, pack, unpack)
|
||||
import Data.Void (Void)
|
||||
import Text.Megaparsec (ParsecT, Stream, Token, Tokens)
|
||||
|
||||
type Parser = ParsecT Void
|
||||
|
||||
class (Token s ~ Char, Stream s, ToText (Tokens s), IsString (Tokens s), Monoid (Tokens s), Eq (Tokens s), Show s) => Characters s
|
||||
class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s) => Characters s
|
||||
|
||||
class ToText t where
|
||||
toText :: t -> Text
|
||||
fromText :: Text -> t
|
||||
toString :: t -> String
|
||||
|
||||
class ToChar c where
|
||||
toChar :: c -> Char
|
||||
|
||||
instance ToChar Char where
|
||||
toChar = id
|
||||
|
||||
instance Characters Text
|
||||
|
||||
instance ToText Text where
|
||||
toText = id
|
||||
fromText = id
|
||||
toString = unpack
|
||||
|
||||
instance Characters String
|
||||
|
||||
instance ToText String where
|
||||
toText = pack
|
||||
fromText = unpack
|
||||
toString = id
|
||||
|
|
|
|||
Loading…
Reference in a new issue