factored out some useful parsing utilities to a separate file and began work on javascript tokenization

This commit is contained in:
Pagwin 2025-12-26 14:02:29 -05:00
parent 65e7091899
commit aeb70675a9
No known key found for this signature in database
GPG key ID: 81137023740CA260
5 changed files with 111 additions and 17 deletions

View file

@ -28,6 +28,7 @@ common warnings
library
hs-source-dirs: src
exposed-modules: Markdown HTML Logger IR Logger.Shake Psb.Main Utilities Utilities.FilePath Utilities.Action Utilities.Javascript Utilities.CSS Templates Types Config
other-modules: Utilities.Parsing
build-depends: base >=4.20 && < 4.21, mustache >=2.4.2, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text >= 2.1.2, time, unordered-containers, yaml, megaparsec >= 9.7.0, transformers >= 0.6.2, css-syntax >= 0.1.0.2
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances

View file

@ -23,23 +23,7 @@ import Text.Megaparsec (ParsecT, Stream, Token, Tokens, anySingle, anySingleBut,
import qualified Text.Megaparsec as MP
import Text.Megaparsec.Char (alphaNumChar, char, digitChar, newline, space, spaceChar)
import qualified Text.Megaparsec.Stream as MPS
type Parser = ParsecT Void
class (Token s ~ Char, Stream s, ToText (Tokens s), IsString (Tokens s), Monoid (Tokens s), Eq (Tokens s), Show s) => Characters s
class ToText t where
toText :: t -> Text
instance Characters Text
instance ToText Text where
toText = id
instance Characters String
instance ToText String where
toText = T.pack
import Utilities.Parsing
string :: (MP.MonadParsec e s m) => Tokens s -> m (Tokens s)
string = chunk

View file

@ -72,6 +72,7 @@ buildRules = do
assets
postsRule
rss
-- TODO: add rules for specifically the checksummed files which depend on the non-checksummed files
css_resources
js_resources

View file

@ -3,5 +3,87 @@ module Utilities.Javascript
)
where
import Control.Applicative (Alternative (many), optional)
import Data.Maybe (maybeToList)
import Logger
import Text.Megaparsec (MonadParsec (try), choice)
import Utilities.Parsing
minify :: String -> String
minify = id
-- yeah I guess I'm making a javascript tokenizer
-- s is either Text or String
-- Regex will be tokenized
data Token s
= WhiteSpace
| LineTerminator
| SingleLineComment
| MultiLineComment
| HashBangComment
| Identifier s
| PrivateIdentifier s
| ReservedWord Reserved
| Literal (Literal s)
| LParen
| RParen
| LCurly
| RCurly
| LSquare
| RSquare
| Dot
| Spread
| Semicolon
| Comma
| OptionalChain
| Operator Operator
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield
data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragment s)
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s
data Operator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign
tokens :: (Logger m, Characters s) => Parser s m [Token s]
tokens = do
hashbang <- (optional hashbang_comment)
tokens <- many token
pure $ (maybeToList hashbang) ++ tokens
token :: (Logger m, Characters s) => Parser s m (Token s)
token =
choice
[ try comment,
try operator,
try reserved_word,
try identifier,
try private_identifier,
try literal,
try punctuator
]
hashbang_comment :: (Logger m, Characters s) => Parser s m (Token s)
hashbang_comment = error "TODO"
comment :: (Logger m, Characters s) => Parser s m (Token s)
comment = error "TODO"
operator :: (Logger m, Characters s) => Parser s m (Token s)
operator = error "TODO"
reserved_word :: (Logger m, Characters s) => Parser s m (Token s)
reserved_word = error "TODO"
identifier :: (Logger m, Characters s) => Parser s m (Token s)
identifier = error "TODO"
private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
private_identifier = error "TODO"
literal :: (Logger m, Characters s) => Parser s m (Token s)
literal = error "TODO"
punctuator :: (Logger m, Characters s) => Parser s m (Token s)
punctuator = error "TODO"

26
src/Utilities/Parsing.hs Normal file
View file

@ -0,0 +1,26 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeOperators #-}
module Utilities.Parsing where
import Data.String (IsString)
import Data.Text (Text, pack)
import Data.Void (Void)
import Text.Megaparsec (ParsecT, Stream, Token, Tokens)
type Parser = ParsecT Void
class (Token s ~ Char, Stream s, ToText (Tokens s), IsString (Tokens s), Monoid (Tokens s), Eq (Tokens s), Show s) => Characters s
class ToText t where
toText :: t -> Text
instance Characters Text
instance ToText Text where
toText = id
instance Characters String
instance ToText String where
toText = pack