fixed more bugs but currently reckoning with the whole situation of how markdown deals with singular vs double newlines

This commit is contained in:
Pagwin 2025-11-09 16:13:10 -05:00
parent 79e54b112b
commit 3546654a66
No known key found for this signature in database
GPG key ID: 81137023740CA260
3 changed files with 48 additions and 46 deletions

14
TODO.md
View file

@ -1,14 +1,12 @@
- add rst support and convert markdown handling to custom parser instead of pandoc - [ ] add rst support and convert markdown handling to custom parser instead of pandoc
- process source code blocks with tree sitter https://hackage.haskell.org/package/tree-sitter - [ ] process source code blocks with tree sitter https://hackage.haskell.org/package/tree-sitter
- Alternatively consider skylighting https://hackage.haskell.org/package/skylighting - Alternatively consider skylighting https://hackage.haskell.org/package/skylighting
- minify js and css when copying over instead of just copying - [ ] minify js and css when copying over instead of just copying
- setup fingerprinting in file names for css and js - [ ] setup fingerprinting in file names for css and js
- dev server setup (with live reloading) - [ ] dev server setup (with live reloading)
- see if performance can be improved (it isn't slow atm but it definitely feels like there's a bottleneck) - [ ] see if performance can be improved (it isn't slow atm but it definitely feels like there's a bottleneck)
- look into adding postcss support perhaps

View file

@ -2,6 +2,7 @@
module HTML (compileToHTML) where module HTML (compileToHTML) where
import Data.Char (isAlphaNum, toLower)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import qualified Data.Text as T import qualified Data.Text as T
import IR import IR
@ -19,6 +20,23 @@ escapeChar c = T.singleton c
escapeText :: T.Text -> T.Text escapeText :: T.Text -> T.Text
escapeText = T.concat . map escapeChar . T.unpack escapeText = T.concat . map escapeChar . T.unpack
genHeaderId :: Heading -> T.Text
genHeaderId header =
T.concat
[ " id=\"",
T.map (toLower . textSub) $
T.strip $
T.filter (\c -> isAlphaNum c || c == ' ') $
serializeInlineToHTML header.text,
"\" "
]
where
textSub ' ' = '-'
textSub c = c
genHeaderClasses :: Heading -> T.Text
genHeaderClasses = const ""
tshow :: (Show s) => s -> T.Text tshow :: (Show s) => s -> T.Text
tshow = T.pack . show tshow = T.pack . show
@ -26,7 +44,7 @@ compileToHTML :: Document -> T.Text
compileToHTML (Doc elements) = T.concat $ map elementToHTML elements compileToHTML (Doc elements) = T.concat $ map elementToHTML elements
elementToHTML :: Element -> T.Text elementToHTML :: Element -> T.Text
elementToHTML (Heading header) = T.concat ["<h", tshow header.level, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"] elementToHTML (Heading header) = T.concat ["<h", tshow header.level, genHeaderId header, genHeaderClasses header, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"]
-- --
elementToHTML (Code code_block) = T.concat ["<pre class=\"sourceCode ", language, "\"><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"] elementToHTML (Code code_block) = T.concat ["<pre class=\"sourceCode ", language, "\"><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"]
where where
@ -58,6 +76,6 @@ serializeInlineToHTML (Bold elems : remaining) = T.concat ["<b>", serializeInlin
serializeInlineToHTML (Italic elems : remaining) = T.concat ["<i>", serializeInlineToHTML elems, "</i>", serializeInlineToHTML remaining] serializeInlineToHTML (Italic elems : remaining) = T.concat ["<i>", serializeInlineToHTML elems, "</i>", serializeInlineToHTML remaining]
serializeInlineToHTML (Crossed elems : remaining) = T.concat ["<s>", serializeInlineToHTML elems, "</s>", serializeInlineToHTML remaining] serializeInlineToHTML (Crossed elems : remaining) = T.concat ["<s>", serializeInlineToHTML elems, "</s>", serializeInlineToHTML remaining]
serializeInlineToHTML (InlineCode code : remaining) = T.concat ["<code>", escapeText code, "</code>", serializeInlineToHTML remaining] serializeInlineToHTML (InlineCode code : remaining) = T.concat ["<code>", escapeText code, "</code>", serializeInlineToHTML remaining]
serializeInlineToHTML (Link {linkText, url, title} : remaining) = T.concat ["<a href=\"", url, "\" ", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, "\">", serializeInlineToHTML linkText, "</a>", serializeInlineToHTML remaining] serializeInlineToHTML (Link {linkText, url, title} : remaining) = T.concat ["<a href=\"", url, "\"", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML linkText, "</a>", serializeInlineToHTML remaining]
serializeInlineToHTML (Image {altText, url, title} : remaining) = T.concat ["<img src=\">", url, "\" alt=\"", escapeText altText, "\"", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML remaining] serializeInlineToHTML (Image {altText, url, title} : remaining) = T.concat ["<img src=\">", url, "\" alt=\"", escapeText altText, "\"", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML remaining]
serializeInlineToHTML (HTMLInline {inline_html_content} : remaining) = inline_html_content <> serializeInlineToHTML remaining serializeInlineToHTML (HTMLInline {inline_html_content} : remaining) = inline_html_content <> serializeInlineToHTML remaining

View file

@ -253,7 +253,7 @@ strongUnderscore = do
crossedText :: Parser InlineText crossedText :: Parser InlineText
crossedText = do crossedText = do
string "~~" string "~~"
content <- some (notFollowedBy (string "~~") >> inlineElementNo '~') content <- some (notFollowedBy (string "~~") >> inlineElement)
string "~~" string "~~"
pure $ Crossed content pure $ Crossed content
@ -284,35 +284,23 @@ inlineElementNo c =
try link, try link,
try htmlInline, try htmlInline,
try escapedChar, try escapedChar,
plainTextNo c plainTextNo [c]
] ]
plainTextNo :: Char -> Parser InlineText plainTextNo :: [Char] -> Parser InlineText
plainTextNo c = fmap (Text . T.pack) $ some $ noneOf [c, '\n'] plainTextNo disallow = do
firstChar <- noneOf disallow
remChars <- some $ plainTextCharNo disallow <* notFollowedBy lineEnding
pure $ Text $ T.map wspHandler $ T.pack $ firstChar : remChars
where
wspHandler '\n' = ' '
wspHandler c = c
inlineElementNoAsterisk :: Parser InlineText inlineElementNoAsterisk :: Parser InlineText
inlineElementNoAsterisk = inlineElementNoAsterisk = inlineElementNo '*'
choice
[ try strong,
try codeSpan,
try image,
try link,
try htmlInline,
try escapedChar,
plainTextNo '*'
]
inlineElementNoUnderscore :: Parser InlineText inlineElementNoUnderscore :: Parser InlineText
inlineElementNoUnderscore = inlineElementNoUnderscore = inlineElementNo '_'
choice
[ try strong,
try codeSpan,
try image,
try link,
try htmlInline,
try escapedChar,
plainTextNo '_'
]
-- Code Span -- Code Span
codeSpan :: Parser InlineText codeSpan :: Parser InlineText
@ -408,29 +396,27 @@ escapedChar = do
pure $ Text (T.singleton c) pure $ Text (T.singleton c)
-- Plain Text -- Plain Text
-- TODO: this eats stuff it shouldn't, inefficient solution is to try other inline elements and exit if they succeed
plainText :: Parser InlineText plainText :: Parser InlineText
plainText = fmap (Text . T.pack) (liftA2 (:) (noneOf "\n") $ many plainTextChar) plainText = plainTextNo []
plainTextChar :: Parser Char plainTextBaseDisallow :: [Char]
plainTextChar = noneOf "\n[~`_*" plainTextBaseDisallow = "[~`_*<"
plainTextCharNo :: [Char] -> Parser Char
plainTextCharNo additional = noneOf $ additional <> plainTextBaseDisallow
plainTextNoAsterisk :: Parser InlineText plainTextNoAsterisk :: Parser InlineText
plainTextNoAsterisk = fmap (Text . T.pack) $ some $ noneOf "*\n" plainTextNoAsterisk = plainTextNo "*"
plainTextNoUnderscore :: Parser InlineText plainTextNoUnderscore :: Parser InlineText
plainTextNoUnderscore = fmap (Text . T.pack) $ some $ noneOf "_\n" plainTextNoUnderscore = plainTextNo "_"
plainTextNoBracket :: Parser InlineText plainTextNoBracket :: Parser InlineText
plainTextNoBracket = plainTextNoBracket = plainTextNo "[]"
fmap (Text . T.pack) $
some $
satisfy
(`notElem` ("[]" :: String))
-- Helper Parsers -- Helper Parsers
lineEnding :: Parser () lineEnding :: Parser ()
lineEnding = void (try (string "\r\n") <|> try (string "\n") <|> string "\r") lineEnding = void $ count 2 (try (string "\r\n") <|> try (string "\n") <|> string "\r")
wsParser :: Parser () wsParser :: Parser ()
wsParser = void $ some (char ' ' <|> char '\t') wsParser = void $ some (char ' ' <|> char '\t')