just string and regex literals left barring me fucking up implementation of integer literals
This commit is contained in:
parent
1f89316fdf
commit
11d2228362
4 changed files with 75 additions and 34 deletions
2
TODO.md
2
TODO.md
|
|
@ -4,7 +4,6 @@
|
||||||
- JS
|
- JS
|
||||||
- Doing our own tokenizer lol
|
- Doing our own tokenizer lol
|
||||||
- [ ] swap from using `draft` to using `date` for determing draft status, lack of date = draft
|
- [ ] swap from using `draft` to using `date` for determing draft status, lack of date = draft
|
||||||
- [ ] Fix timestamp(s) shown to use local offset instead of absolute time https://www.rfc-editor.org/rfc/rfc3339#section-4.2
|
|
||||||
- [ ] setup fingerprinting in file names for css and js
|
- [ ] setup fingerprinting in file names for css and js
|
||||||
- setup lambdas via: https://hackage-content.haskell.org/package/mustache-2.4.3.1/docs/Text-Mustache.html#v:overText
|
- setup lambdas via: https://hackage-content.haskell.org/package/mustache-2.4.3.1/docs/Text-Mustache.html#v:overText
|
||||||
- This may require a refactor of how we handle templates to use `object` instead of just using aeson integration from the mustache crate
|
- This may require a refactor of how we handle templates to use `object` instead of just using aeson integration from the mustache crate
|
||||||
|
|
@ -27,4 +26,5 @@
|
||||||
- [ ] Make a function which takes IR and spits out some kind of table of contents
|
- [ ] Make a function which takes IR and spits out some kind of table of contents
|
||||||
- [ ] Add rst or org support and convert markdown handling to custom parser instead of pandoc
|
- [ ] Add rst or org support and convert markdown handling to custom parser instead of pandoc
|
||||||
- [ ] Add in functionality for footnotes
|
- [ ] Add in functionality for footnotes
|
||||||
|
- [ ] Fix time via timestamps potentially meaning something (via preshim?) and use local offset instead of absolute time https://www.rfc-editor.org/rfc/rfc3339#section-4.2
|
||||||
- [ ] see if performance can be improved, it shouldn't be necessary but if I'm looking at doing something for this and everything above this got checked off then this is a sensible next thing
|
- [ ] see if performance can be improved, it shouldn't be necessary but if I'm looking at doing something for this and everything above this got checked off then this is a sensible next thing
|
||||||
|
|
|
||||||
|
|
@ -97,13 +97,8 @@ js_resources =
|
||||||
map (outputDir </>) jsGlobs |%> \target -> do
|
map (outputDir </>) jsGlobs |%> \target -> do
|
||||||
let src_file = FP.dropDirectory1 target
|
let src_file = FP.dropDirectory1 target
|
||||||
src <- Shake.readFile' $ src_file
|
src <- Shake.readFile' $ src_file
|
||||||
|
-- TODO: write to fingerprinted location as well
|
||||||
let tokenization = JS.toTokens src_file src
|
Shake.writeFileChanged target $ JS.minify src
|
||||||
case tokenization of
|
|
||||||
Left e -> error $ "Attempt to tokenize javascript file failed with: " <> errorBundlePretty e
|
|
||||||
Right tokens ->
|
|
||||||
-- TODO: write to fingerprinted location as well
|
|
||||||
Shake.writeFileChanged target $ foldMap JS.displayToken $ JS.minify $ tokens
|
|
||||||
|
|
||||||
-- there's probably a better way of doing this that allows for the target's origin file extension to get passed in but for now we're doing brute force
|
-- there's probably a better way of doing this that allows for the target's origin file extension to get passed in but for now we're doing brute force
|
||||||
postsRule :: Rules ()
|
postsRule :: Rules ()
|
||||||
|
|
|
||||||
|
|
@ -3,31 +3,38 @@
|
||||||
|
|
||||||
module Utilities.Javascript
|
module Utilities.Javascript
|
||||||
( minify,
|
( minify,
|
||||||
|
minifyTokens,
|
||||||
toTokens,
|
toTokens,
|
||||||
displayToken,
|
displayToken,
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
|
||||||
import Control.Applicative (Alternative (many), optional, (<|>))
|
import Control.Applicative (Alternative (many, some), empty, optional, (<|>))
|
||||||
import Control.Monad.Trans.Class (MonadTrans (lift))
|
import Control.Monad.Trans.Class (MonadTrans (lift))
|
||||||
import Control.Monad.Trans.State (StateT, evalStateT, put)
|
import Control.Monad.Trans.State (StateT, evalStateT, get, put)
|
||||||
import Data.Data (Proxy (Proxy))
|
import Data.Data (Proxy (Proxy))
|
||||||
import Data.Functor (void, (<&>))
|
import Data.Functor (void, (<&>))
|
||||||
|
import Data.Functor.Identity (Identity (Identity, runIdentity))
|
||||||
import Data.Maybe (maybeToList)
|
import Data.Maybe (maybeToList)
|
||||||
import Data.String (IsString (fromString))
|
import Data.String (IsString (fromString))
|
||||||
import Data.Void (Void)
|
import Data.Void (Void)
|
||||||
import Logger
|
import Logger
|
||||||
import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, ParsecT, Stream (tokensToChunk), anySingle, choice, parse, runParserT)
|
import Text.Megaparsec (MonadParsec (notFollowedBy, try), ParseErrorBundle, ParsecT, Stream (tokensToChunk), anySingle, choice, parse, runParserT)
|
||||||
import qualified Text.Megaparsec as MP
|
import qualified Text.Megaparsec as MP
|
||||||
import Text.Megaparsec.Char (char, digitChar, eol, hspace, letterChar, newline, string)
|
import Text.Megaparsec.Char (binDigitChar, char, digitChar, eol, hexDigitChar, hspace, letterChar, newline, octDigitChar, string)
|
||||||
import Utilities.Parsing (Characters, ToChar (fromChar), ToText (fromText, toString, toText))
|
import Utilities.Parsing (Characters, ToChar (fromChar), ToText (fromText, toString, toText))
|
||||||
|
|
||||||
data Possibility = ExprAllowed | ExprNotAllowed deriving (Eq)
|
data Possibility = ExprAllowed | ExprNotAllowed deriving (Eq)
|
||||||
|
|
||||||
type Parser s m = ParsecT Void s (StateT Possibility m)
|
type Parser s m = ParsecT Void s (StateT Possibility m)
|
||||||
|
|
||||||
minify :: (Characters s) => [Token s] -> [Token s]
|
minify :: forall s. (Characters s, MP.VisualStream s, MP.TraversableStream s) => s -> s
|
||||||
minify = reduce_identifiers . remove_redundants
|
minify src = foldMap displayToken $ minifyTokens $ case runIdentity ((toTokens "" src) :: Identity (Either (ParseErrorBundle s Void) [Token s])) of
|
||||||
|
Left e -> error $ "Attempt to tokenize javascript file failed with: " <> MP.errorBundlePretty e
|
||||||
|
Right v -> v
|
||||||
|
|
||||||
|
minifyTokens :: (Characters s) => [Token s] -> [Token s]
|
||||||
|
minifyTokens = reduce_identifiers . remove_redundants
|
||||||
where
|
where
|
||||||
-- need to figure out how to add State into this
|
-- need to figure out how to add State into this
|
||||||
reduce_identifiers = map $ \token -> case token of
|
reduce_identifiers = map $ \token -> case token of
|
||||||
|
|
@ -201,10 +208,11 @@ exprNoop :: (Stream s, Monad m) => String -> Parser s m ()
|
||||||
-- string arg is just as a comment
|
-- string arg is just as a comment
|
||||||
exprNoop _ = pure ()
|
exprNoop _ = pure ()
|
||||||
|
|
||||||
-- TODO: read https://github.com/jquery/esprima/blob/main/src/scanner.ts
|
-- INFO: read https://github.com/jquery/esprima/blob/main/src/scanner.ts
|
||||||
-- and https://github.com/acornjs/acorn/blob/master/acorn/src/tokenize.js
|
-- and https://github.com/acornjs/acorn/blob/master/acorn/src/tokenize.js
|
||||||
-- specific logic at https://github.com/acornjs/acorn/blob/54097dcf8c08733695df7168692d0faac3a2f768/acorn/src/tokencontext.js#L92
|
-- specific logic at https://github.com/acornjs/acorn/blob/54097dcf8c08733695df7168692d0faac3a2f768/acorn/src/tokencontext.js#L92
|
||||||
-- https://astexplorer.net/
|
-- https://astexplorer.net/
|
||||||
|
-- atm this is guesswork
|
||||||
token :: (Logger m, Characters s) => Parser s m (Token s)
|
token :: (Logger m, Characters s) => Parser s m (Token s)
|
||||||
token =
|
token =
|
||||||
choice
|
choice
|
||||||
|
|
@ -335,7 +343,7 @@ identifier = do
|
||||||
rem_char :: Parser s m (MP.Token s)
|
rem_char :: Parser s m (MP.Token s)
|
||||||
rem_char = start_char <|> digitChar
|
rem_char = start_char <|> digitChar
|
||||||
|
|
||||||
private_identifier :: (Logger m, Characters s) => Parser s m (Token s)
|
private_identifier :: forall s m. (Logger m, Characters s) => Parser s m (Token s)
|
||||||
private_identifier =
|
private_identifier =
|
||||||
char '#'
|
char '#'
|
||||||
*> identifier
|
*> identifier
|
||||||
|
|
@ -375,20 +383,55 @@ literal =
|
||||||
char '`'
|
char '`'
|
||||||
pure $ TemplateTail $ fromText $ mconcat $ map toText $ contents
|
pure $ TemplateTail $ fromText $ mconcat $ map toText $ contents
|
||||||
template_char :: Parser s m s
|
template_char :: Parser s m s
|
||||||
template_char = fromText . toText <$> choice [try (string "$" <* (notFollowedBy $ char '{')), try (char '\\' *> ((try template_escape_seq) <|> not_escape_seq)), try ((optional $ char '\\') *> (eol)), (notFollowedBy (choice $ linebreak : (map (fromChar <$> char) "`\\$"))) *> source_char]
|
template_char =
|
||||||
source_char = error "TODO"
|
fromText . toText
|
||||||
template_escape_seq = error "TODO: TemplateEscapeSequence, prepend backslash"
|
<$> choice
|
||||||
not_escape_seq = error "TODO: NotEscapeSequence, prepend backslash"
|
[ try (string "$" <* (notFollowedBy $ char '{')),
|
||||||
|
try escape_seq,
|
||||||
|
try ((optional $ char '\\') *> (eol)),
|
||||||
|
-- I'm sure this is doable without do but do makes it much easier
|
||||||
|
do
|
||||||
|
notFollowedBy (choice [void linebreak, void $ char '`', void $ char '\\', void $ char '$'])
|
||||||
|
c <- source_char
|
||||||
|
pure $ fromString $ c : []
|
||||||
|
]
|
||||||
|
source_char = anySingle
|
||||||
|
escape_seq = do
|
||||||
|
char '\\'
|
||||||
|
ret <- anySingle
|
||||||
|
pure $ fromString ('\\' : [ret])
|
||||||
|
num_lit = Number <$> (choice [try legacy_oct, try decimal_bigint, try decimal_literal, try hex_int, try oct_int, try bin_int, zero])
|
||||||
|
zero = char '0' *> pure "0"
|
||||||
|
decimal_literal = fromString <$> some (digitChar <|> char '_')
|
||||||
|
decimal_bigint = do
|
||||||
|
most <- decimal_literal
|
||||||
|
char 'n'
|
||||||
|
pure $ fromText $ toText most <> "n"
|
||||||
|
legacy_oct = char '0' *> (fromString <$> some (octDigitChar <|> char '_'))
|
||||||
|
oct_int = char '0' *> (char 'o' <|> char 'O') *> (fromString <$> some (octDigitChar <|> char '_'))
|
||||||
|
hex_int = char '0' *> (char 'x' <|> char 'X') *> (fromString <$> some (hexDigitChar <|> char '_'))
|
||||||
|
bin_int = char '0' *> (char 'b' <|> char 'B') *> (fromString <$> some (binDigitChar <|> char '_'))
|
||||||
string_lit = String <$> error "TODO"
|
string_lit = String <$> error "TODO"
|
||||||
num_lit = Number <$> (choice [try decimal_literal, try decimal_bigint, try plain_bigint, try normal_integer, octal_int])
|
|
||||||
decimal_literal = error "TODO"
|
|
||||||
decimal_bigint = error "TODO"
|
|
||||||
plain_bigint = error "TODO"
|
|
||||||
normal_integer = error "TODO"
|
|
||||||
octal_int = error "TODO"
|
|
||||||
|
|
||||||
fslash_handler :: (Logger m, Characters s) => Parser s m (Token s)
|
fslash_handler :: forall s m. (Logger m, Characters s) => Parser s m (Token s)
|
||||||
fslash_handler = error "TODO: Regex literal, division and division assignment"
|
fslash_handler = do
|
||||||
|
allowed <- lift $ get
|
||||||
|
let re = case allowed of
|
||||||
|
ExprNotAllowed -> empty
|
||||||
|
ExprAllowed -> regex_literal
|
||||||
|
|
||||||
|
choice [try re, try division_assign, division]
|
||||||
|
where
|
||||||
|
regex_literal :: Parser s m (Token s)
|
||||||
|
regex_literal = do
|
||||||
|
char '/'
|
||||||
|
error "TODO"
|
||||||
|
pure $ Literal $ Regex {}
|
||||||
|
division_assign :: Parser s m (Token s)
|
||||||
|
division_assign = (string "/=") *> (pure $ Punc $ DivAssign :: Parser s m (Token s))
|
||||||
|
|
||||||
|
division :: Parser s m (Token s)
|
||||||
|
division = char '/' *> (pure $ Punc $ Div :: Parser s m (Token s))
|
||||||
|
|
||||||
punctuator :: (Logger m, Characters s) => Parser s m (Token s)
|
punctuator :: (Logger m, Characters s) => Parser s m (Token s)
|
||||||
punctuator =
|
punctuator =
|
||||||
|
|
@ -405,8 +448,10 @@ punctuator =
|
||||||
try $ string "&&=" *> pure LogicalAndAssign <* exprAllowed,
|
try $ string "&&=" *> pure LogicalAndAssign <* exprAllowed,
|
||||||
try $ string "||=" *> pure LogicalOrAssign <* exprAllowed,
|
try $ string "||=" *> pure LogicalOrAssign <* exprAllowed,
|
||||||
try $ string "??=" *> pure NullishAssign <* exprAllowed,
|
try $ string "??=" *> pure NullishAssign <* exprAllowed,
|
||||||
try $ string "++" *> pure Inc <* error "TODO: Ambiguous precrement vs postcrement",
|
-- best effort guess based on my usage that it'll always be postcrement
|
||||||
try $ string "--" *> pure Dec <* error "TODO: Ambiguous precrement postcrement",
|
-- Shouldn't come up in my use case though
|
||||||
|
try $ string "++" *> pure Inc <* exprNotAllowed,
|
||||||
|
try $ string "--" *> pure Dec <* exprNotAllowed,
|
||||||
try $ string "?." *> (notFollowedBy digitChar) *> pure OptionalChain <* exprNotAllowed,
|
try $ string "?." *> (notFollowedBy digitChar) *> pure OptionalChain <* exprNotAllowed,
|
||||||
try $ string "**" *> pure Exp <* exprAllowed,
|
try $ string "**" *> pure Exp <* exprAllowed,
|
||||||
try $ string "<=" *> pure LTEQ <* exprAllowed,
|
try $ string "<=" *> pure LTEQ <* exprAllowed,
|
||||||
|
|
@ -439,11 +484,12 @@ punctuator =
|
||||||
char ';' *> pure Semicolon <* exprAllowed,
|
char ';' *> pure Semicolon <* exprAllowed,
|
||||||
char ',' *> pure Comma <* exprAllowed,
|
char ',' *> pure Comma <* exprAllowed,
|
||||||
char '!' *> pure LogicalNot <* exprAllowed,
|
char '!' *> pure LogicalNot <* exprAllowed,
|
||||||
-- HERE
|
-- Note: parens and curlies are unambiguously ambiguous
|
||||||
char '(' *> pure LParen <* exprNotAllowed,
|
-- Opening ones will generally allow an expression and closing ones will generally not allow an expression
|
||||||
|
char '(' *> pure LParen <* exprAllowed,
|
||||||
char ')' *> pure RParen <* exprNotAllowed,
|
char ')' *> pure RParen <* exprNotAllowed,
|
||||||
char '{' *> pure LCurly <* error "TODO: Ambiguous",
|
char '{' *> pure LCurly <* exprAllowed,
|
||||||
char '}' *> pure RCurly <* error "TODO: Ambiguous",
|
char '}' *> pure RCurly <* exprNotAllowed,
|
||||||
char '[' *> pure LSquare <* exprNotAllowed,
|
char '[' *> pure LSquare <* exprNotAllowed,
|
||||||
char ']' *> pure RSquare <* exprNotAllowed,
|
char ']' *> pure RSquare <* exprNotAllowed,
|
||||||
char '.' *> pure Dot <* exprNotAllowed
|
char '.' *> pure Dot <* exprNotAllowed
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ import Text.Megaparsec (ParsecT, Stream, Token, Tokens)
|
||||||
|
|
||||||
type Parser = ParsecT Void
|
type Parser = ParsecT Void
|
||||||
|
|
||||||
class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s) => Characters s
|
class (Token s ~ Char, Stream s, ToText (Tokens s), ToText s, IsString (Tokens s), IsString s, Monoid (Tokens s), ToChar (Token s), Eq (Tokens s), Show s, Monoid s) => Characters s
|
||||||
|
|
||||||
class ToText t where
|
class ToText t where
|
||||||
toText :: t -> Text
|
toText :: t -> Text
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue