factored out some useful parsing utilities to a separate file and began work on javascript tokenization

This commit is contained in:
Pagwin 2025-12-26 14:02:29 -05:00
parent 65e7091899
commit aeb70675a9
No known key found for this signature in database
GPG key ID: 81137023740CA260
5 changed files with 111 additions and 17 deletions

View file

@ -28,6 +28,7 @@ common warnings
library library
hs-source-dirs: src hs-source-dirs: src
exposed-modules: Markdown HTML Logger IR Logger.Shake Psb.Main Utilities Utilities.FilePath Utilities.Action Utilities.Javascript Utilities.CSS Templates Types Config exposed-modules: Markdown HTML Logger IR Logger.Shake Psb.Main Utilities Utilities.FilePath Utilities.Action Utilities.Javascript Utilities.CSS Templates Types Config
other-modules: Utilities.Parsing
build-depends: base >=4.20 && < 4.21, mustache >=2.4.2, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text >= 2.1.2, time, unordered-containers, yaml, megaparsec >= 9.7.0, transformers >= 0.6.2, css-syntax >= 0.1.0.2 build-depends: base >=4.20 && < 4.21, mustache >=2.4.2, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text >= 2.1.2, time, unordered-containers, yaml, megaparsec >= 9.7.0, transformers >= 0.6.2, css-syntax >= 0.1.0.2
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances

View file

@ -23,23 +23,7 @@ import Text.Megaparsec (ParsecT, Stream, Token, Tokens, anySingle, anySingleBut,
import qualified Text.Megaparsec as MP import qualified Text.Megaparsec as MP
import Text.Megaparsec.Char (alphaNumChar, char, digitChar, newline, space, spaceChar) import Text.Megaparsec.Char (alphaNumChar, char, digitChar, newline, space, spaceChar)
import qualified Text.Megaparsec.Stream as MPS import qualified Text.Megaparsec.Stream as MPS
import Utilities.Parsing
type Parser = ParsecT Void
class (Token s ~ Char, Stream s, ToText (Tokens s), IsString (Tokens s), Monoid (Tokens s), Eq (Tokens s), Show s) => Characters s
class ToText t where
toText :: t -> Text
instance Characters Text
instance ToText Text where
toText = id
instance Characters String
instance ToText String where
toText = T.pack
string :: (MP.MonadParsec e s m) => Tokens s -> m (Tokens s) string :: (MP.MonadParsec e s m) => Tokens s -> m (Tokens s)
string = chunk string = chunk

View file

@ -72,6 +72,7 @@ buildRules = do
assets assets
postsRule postsRule
rss rss
-- TODO: add rules for specifically the checksummed files which depend on the non-checksummed files
css_resources css_resources
js_resources js_resources

View file

@ -3,5 +3,87 @@ module Utilities.Javascript
) )
where where
import Control.Applicative (Alternative (many), optional)
import Data.Maybe (maybeToList)
import Logger
import Text.Megaparsec (MonadParsec (try), choice)
import Utilities.Parsing
minify :: String -> String minify :: String -> String
minify = id minify = id
-- yeah I guess I'm making a javascript tokenizer
-- s is either Text or String
-- Regex will be tokenized
data Token s
= WhiteSpace
| LineTerminator
| SingleLineComment
| MultiLineComment
| HashBangComment
| Identifier s
| PrivateIdentifier s
| ReservedWord Reserved
| Literal (Literal s)
| LParen
| RParen
| LCurly
| RCurly
| LSquare
| RSquare
| Dot
| Spread
| Semicolon
| Comma
| OptionalChain
| Operator Operator
data Reserved = Await | Break | Case | Catch | Class | Const | Continue | Debugger | Default | Delete | Do | Else | Enum | Export | Extends | FalseVal | Finally | For | Function | If | Import | In | Instanceof | New | Null | Return | Super | Switch | This | Throw | TrueVal | Try | Typeof | Var | Void | While | With | Yield
data Literal s = Number s | String s | Regex s | TemplateFragment (TemplateFragment s)
data TemplateFragment s = NoSub s | TemplateHead s | TemplateMiddle s | TemplateTail s
data Operator = Add | Sub | Mult | Div | Mod | Exp | Inc | Dec | LT | GT | LTEQ | GTEQ | DoubleEqual | NotEqual | TripleEqual | DoubleNotEqual | LeftShift | RightShift {- >>> -} | UnsignedRightShift | BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseNot | LogicalAnd | LogicalOr | LogicalNot {- ?? -} | Nullish | Assign | AddAssign | SubAssign | MultAssign | DivAssign | ModAssign | ExpAssign | LeftShiftAssign | RightShiftAssign | UnsignedRightShiftAssign | BitwiseAndAssign | BitwiseOrAssign | BitwiseXorAssign | LogicalAndAssign | LogicalOrAssign | NullishAssign
tokens :: (Logger m, Characters s) => Parser s m [Token s]
tokens = do
hashbang <- (optional hashbang_comment)
tokens <- many token
pure $ (maybeToList hashbang) ++ tokens
token :: (Logger m, Characters s) => Parser s m (Token s)
token =
choice
[ try comment,
try operator,
try reserved_word,
try identifier,
try private_identifier,
try literal,
try punctuator
]
hashbang_comment :: (Logger m, Characters s) => Parser s m (Token s)
hashbang_comment = error "TODO"
comment :: (Logger m, Characters s) => Parser s m (Token s)
comment = error "TODO"
operator :: (Logger m, Characters s) => Parser s m (Token s)
operator = error "TODO"
reserved_word :: (Logger m, Characters s) => Parser s m (Token s)
reserved_word = error "TODO"
identifier :: (Logger m, Characters s) => Parser s m (Token s)
identifier = error "TODO"
private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
private_identifier = error "TODO"
literal :: (Logger m, Characters s) => Parser s m (Token s)
literal = error "TODO"
punctuator :: (Logger m, Characters s) => Parser s m (Token s)
punctuator = error "TODO"

26
src/Utilities/Parsing.hs Normal file
View file

@ -0,0 +1,26 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeOperators #-}
module Utilities.Parsing where
import Data.String (IsString)
import Data.Text (Text, pack)
import Data.Void (Void)
import Text.Megaparsec (ParsecT, Stream, Token, Tokens)
type Parser = ParsecT Void
class (Token s ~ Char, Stream s, ToText (Tokens s), IsString (Tokens s), Monoid (Tokens s), Eq (Tokens s), Show s) => Characters s
class ToText t where
toText :: t -> Text
instance Characters Text
instance ToText Text where
toText = id
instance Characters String
instance ToText String where
toText = pack