Compare commits

...

2 commits

Author SHA1 Message Date
Pagwin
7df40c237c
more js tokenization work 2025-12-28 20:38:13 -05:00
Pagwin
0f44431086
changed main code to handle refactored js parsing API 2025-12-28 20:38:04 -05:00
2 changed files with 107 additions and 20 deletions

View file

@ -21,6 +21,7 @@ import qualified Development.Shake as Shake
import Development.Shake.FilePath ((</>))
import qualified Development.Shake.FilePath as FP
import Templates
import Text.Megaparsec (errorBundlePretty)
import Types
import Utilities.Action (getPublishedPosts, isDraft', markdownToHtml, markdownToPost, now, psbProgress)
import qualified Utilities.CSS as CSS
@ -92,9 +93,15 @@ css_resources =
js_resources :: Rules ()
js_resources =
map (outputDir </>) jsGlobs |%> \target -> do
src <- Shake.readFile' $ FP.dropDirectory1 target
-- TODO: write to fingerprinted location as well
Shake.writeFileChanged target $ JS.minify src
let src_file = FP.dropDirectory1 target
src <- Shake.readFile' $ src_file
let tokenization = JS.toTokens src_file src
case tokenization of
Left e -> error $ "Attempt to tokenize javascript file failed with: " <> errorBundlePretty e
Right tokens ->
-- TODO: write to fingerprinted location as well
Shake.writeFileChanged target $ foldMap JS.displayToken $ JS.minify $ tokens
-- there's probably a better way of doing this that allows for the target's origin file extension to get passed in but for now we're doing brute force
postsRule :: Rules ()

View file

@ -10,12 +10,14 @@ where
import Control.Applicative (Alternative (many), optional, (<|>))
import Data.Data (Proxy (Proxy))
import Data.Functor ((<&>))
import Data.Maybe (maybeToList)
import Data.String (IsString (fromString))
import Data.Void (Void)
import Logger
import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, Stream (tokensToChunk), anySingle, choice, parse)
import Text.Megaparsec.Char (hspace, newline, string)
import qualified Text.Megaparsec as MP
import Text.Megaparsec.Char (char, digitChar, hspace, letterChar, newline, string)
import Utilities.Parsing
minify :: (Characters s) => [Token s] -> [Token s]
@ -30,8 +32,8 @@ minify = reduce_identifiers . remove_redundants
WhiteSpace -> False
_ -> True
toTokens :: (Characters s) => s -> Either (ParseErrorBundle s Void) [Token s]
toTokens = parse tokens ""
toTokens :: (Characters s) => String -> s -> Either (ParseErrorBundle s Void) [Token s]
toTokens = parse tokens
displayToken :: (ToText s) => Token s -> s
displayToken _ = error "TODO"
@ -50,14 +52,15 @@ data Token s
| ReservedWord Reserved
| Literal (Literal s)
| Punc Punctuator
deriving (Eq)
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield deriving (Eq)
data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragment s)
data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragment s) deriving (Eq)
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s deriving (Eq)
data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain
data Punctuator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign | LParen | RParen | LCurly | RCurly | LSquare | RSquare | Dot | Spread | Semicolon | Comma | OptionalChain deriving (Eq)
tokens :: (Logger m, Characters s) => Parser s m [Token s]
tokens = do
@ -138,26 +141,103 @@ reserved_word = choice [try await, try break, try case_, try catch_, try class_,
with = string "with" *> pure (ReservedWord With)
yield = string "yield" *> pure (ReservedWord Yield)
identifier :: (Logger m, Characters s) => Parser s m (Token s)
identifier :: forall s m. (Logger m, Characters s) => Parser s m (Token s)
identifier = do
first <- start_char
rem <- many rem_char
let tmp = toString $ tokensToChunk (Proxy :: Proxy s) rem
pure $ Identifier $ fromString (first : tmp)
pure $ Identifier $ fromString (first : rem)
where
start_char :: Parser s m (Token s)
start_char = error "TODO"
rem_char :: Parser s m (Token s)
rem_char = error "TODO"
start_char :: Parser s m (MP.Token s)
start_char = (char '$') <|> char '_' <|> letterChar
rem_char :: Parser s m (MP.Token s)
rem_char = start_char <|> digitChar
private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
private_identifier = error "TODO"
private_identifier =
char '#'
*> identifier
<&> \(Identifier i) -> PrivateIdentifier i
literal :: (Logger m, Characters s) => Parser s m (Token s)
literal = error "TODO"
literal =
Literal
<$> ( choice
[ try template_fragment,
try string_lit,
num_lit
]
)
where
template_fragment = TemplateFragment <$> error "TODO"
string_lit = String <$> error "TODO"
num_lit = Number <$> (choice [try decimal_literal, try decimal_bigint, try plain_bigint, try normal_integer, octal_int])
decimal_literal = error "TODO"
decimal_bigint = error "TODO"
plain_bigint = error "TODO"
normal_integer = error "TODO"
octal_int = error "TODO"
fslash_handler :: (Logger m, Characters s) => Parser s m (Token s)
fslash_handler = error "TODO: Regex literal, division and division assignment"
punctuator :: (Logger m, Characters s) => Parser s m (Token s)
punctuator = error "TODO"
punctuator =
Punc
<$> ( choice
[ try $ string ">>>=" *> pure UnsignedRightShiftAssign,
try $ string "..." *> pure Spread,
try $ string "===" *> pure TripleEqual,
try $ string "!==" *> pure DoubleNotEqual,
try $ string "<<=" *> pure LeftShiftAssign,
try $ string ">>=" *> pure RightShiftAssign,
try $ string ">>>" *> pure UnsignedRightShift,
try $ string "**=" *> pure ExpAssign,
try $ string "&&=" *> pure LogicalAndAssign,
try $ string "||=" *> pure LogicalOrAssign,
try $ string "??=" *> pure NullishAssign,
try $ string "?." *> (notFollowedBy digitChar) *> pure OptionalChain,
try $ string "**" *> pure Exp,
try $ string "++" *> pure Inc,
try $ string "--" *> pure Dec,
try $ string "<=" *> pure LTEQ,
try $ string ">=" *> pure GTEQ,
try $ string "==" *> pure DoubleEqual,
try $ string "!=" *> pure NotEqual,
try $ string "<<" *> pure LeftShift,
try $ string ">>" *> pure RightShift,
try $ string "+=" *> pure AddAssign,
try $ string "-=" *> pure SubAssign,
try $ string "*=" *> pure MultAssign,
try $ string "%=" *> pure ModAssign,
try $ string "&=" *> pure BitwiseAndAssign,
try $ string "|=" *> pure BitwiseOrAssign,
try $ string "^=" *> pure BitwiseXorAssign,
try $ string "&&" *> pure LogicalAnd,
try $ string "||" *> pure LogicalOr,
try $ string "??" *> pure Nullish,
char '+' *> pure Add,
char '-' *> pure Sub,
char '*' *> pure Mult,
char '%' *> pure Mod,
char '<' *> pure Utilities.Javascript.LT,
char '>' *> pure Utilities.Javascript.GT,
char '&' *> pure BitwiseAnd,
char '|' *> pure BitwiseOr,
char '^' *> pure BitwiseXor,
char '~' *> pure BitwiseNot,
char '=' *> pure Assign,
char '(' *> pure LParen,
char ')' *> pure RParen,
char '{' *> pure LCurly,
char '}' *> pure RCurly,
char '[' *> pure LSquare,
char ']' *> pure RSquare,
char '.' *> pure Dot,
char ';' *> pure Semicolon,
char ',' *> pure Comma,
char '!' *> pure LogicalNot
]
)
linebreak :: (Logger m, Characters s) => Parser s m (Token s)
linebreak = newline *> pure WhiteSpace