From 3546654a66677a71dde2e86a5335785cfc5011e2 Mon Sep 17 00:00:00 2001 From: Pagwin Date: Sun, 9 Nov 2025 16:13:10 -0500 Subject: [PATCH] fixed more bugs but currently reckoning with the whole situation of how markdown deals with singular vs double newlines --- TODO.md | 14 +++++------- app/HTML.hs | 22 +++++++++++++++++-- app/Markdown.hs | 58 +++++++++++++++++++------------------------------ 3 files changed, 48 insertions(+), 46 deletions(-) diff --git a/TODO.md b/TODO.md index 5d0caa0..3fbc422 100644 --- a/TODO.md +++ b/TODO.md @@ -1,14 +1,12 @@ -- add rst support and convert markdown handling to custom parser instead of pandoc +- [ ] add rst support and convert markdown handling to custom parser instead of pandoc -- process source code blocks with tree sitter https://hackage.haskell.org/package/tree-sitter +- [ ] process source code blocks with tree sitter https://hackage.haskell.org/package/tree-sitter - Alternatively consider skylighting https://hackage.haskell.org/package/skylighting -- minify js and css when copying over instead of just copying +- [ ] minify js and css when copying over instead of just copying -- setup fingerprinting in file names for css and js +- [ ] setup fingerprinting in file names for css and js -- dev server setup (with live reloading) +- [ ] dev server setup (with live reloading) -- see if performance can be improved (it isn't slow atm but it definitely feels like there's a bottleneck) - -- look into adding postcss support perhaps +- [ ] see if performance can be improved (it isn't slow atm but it definitely feels like there's a bottleneck) diff --git a/app/HTML.hs b/app/HTML.hs index c324fa7..d5777be 100644 --- a/app/HTML.hs +++ b/app/HTML.hs @@ -2,6 +2,7 @@ module HTML (compileToHTML) where +import Data.Char (isAlphaNum, toLower) import Data.Maybe (fromMaybe) import qualified Data.Text as T import IR @@ -19,6 +20,23 @@ escapeChar c = T.singleton c escapeText :: T.Text -> T.Text escapeText = T.concat . map escapeChar . T.unpack +genHeaderId :: Heading -> T.Text +genHeaderId header = + T.concat + [ " id=\"", + T.map (toLower . textSub) $ + T.strip $ + T.filter (\c -> isAlphaNum c || c == ' ') $ + serializeInlineToHTML header.text, + "\" " + ] + where + textSub ' ' = '-' + textSub c = c + +genHeaderClasses :: Heading -> T.Text +genHeaderClasses = const "" + tshow :: (Show s) => s -> T.Text tshow = T.pack . show @@ -26,7 +44,7 @@ compileToHTML :: Document -> T.Text compileToHTML (Doc elements) = T.concat $ map elementToHTML elements elementToHTML :: Element -> T.Text -elementToHTML (Heading header) = T.concat ["", serializeInlineToHTML header.text, ""] +elementToHTML (Heading header) = T.concat ["", serializeInlineToHTML header.text, ""] -- elementToHTML (Code code_block) = T.concat ["
", escapeText code_block.code, "", "
"] where @@ -58,6 +76,6 @@ serializeInlineToHTML (Bold elems : remaining) = T.concat ["", serializeInlin serializeInlineToHTML (Italic elems : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining] serializeInlineToHTML (Crossed elems : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining] serializeInlineToHTML (InlineCode code : remaining) = T.concat ["", escapeText code, "", serializeInlineToHTML remaining] -serializeInlineToHTML (Link {linkText, url, title} : remaining) = T.concat [" T.concat ["title=\"", escapeText t, "\""]) title, "\">", serializeInlineToHTML linkText, "", serializeInlineToHTML remaining] +serializeInlineToHTML (Link {linkText, url, title} : remaining) = T.concat [" T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML linkText, "", serializeInlineToHTML remaining] serializeInlineToHTML (Image {altText, url, title} : remaining) = T.concat ["", url, "\" alt=\"", escapeText altText, "\"", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML remaining] serializeInlineToHTML (HTMLInline {inline_html_content} : remaining) = inline_html_content <> serializeInlineToHTML remaining diff --git a/app/Markdown.hs b/app/Markdown.hs index 40ed572..721271a 100644 --- a/app/Markdown.hs +++ b/app/Markdown.hs @@ -253,7 +253,7 @@ strongUnderscore = do crossedText :: Parser InlineText crossedText = do string "~~" - content <- some (notFollowedBy (string "~~") >> inlineElementNo '~') + content <- some (notFollowedBy (string "~~") >> inlineElement) string "~~" pure $ Crossed content @@ -284,35 +284,23 @@ inlineElementNo c = try link, try htmlInline, try escapedChar, - plainTextNo c + plainTextNo [c] ] -plainTextNo :: Char -> Parser InlineText -plainTextNo c = fmap (Text . T.pack) $ some $ noneOf [c, '\n'] +plainTextNo :: [Char] -> Parser InlineText +plainTextNo disallow = do + firstChar <- noneOf disallow + remChars <- some $ plainTextCharNo disallow <* notFollowedBy lineEnding + pure $ Text $ T.map wspHandler $ T.pack $ firstChar : remChars + where + wspHandler '\n' = ' ' + wspHandler c = c inlineElementNoAsterisk :: Parser InlineText -inlineElementNoAsterisk = - choice - [ try strong, - try codeSpan, - try image, - try link, - try htmlInline, - try escapedChar, - plainTextNo '*' - ] +inlineElementNoAsterisk = inlineElementNo '*' inlineElementNoUnderscore :: Parser InlineText -inlineElementNoUnderscore = - choice - [ try strong, - try codeSpan, - try image, - try link, - try htmlInline, - try escapedChar, - plainTextNo '_' - ] +inlineElementNoUnderscore = inlineElementNo '_' -- Code Span codeSpan :: Parser InlineText @@ -408,29 +396,27 @@ escapedChar = do pure $ Text (T.singleton c) -- Plain Text --- TODO: this eats stuff it shouldn't, inefficient solution is to try other inline elements and exit if they succeed plainText :: Parser InlineText -plainText = fmap (Text . T.pack) (liftA2 (:) (noneOf "\n") $ many plainTextChar) +plainText = plainTextNo [] -plainTextChar :: Parser Char -plainTextChar = noneOf "\n[~`_*" +plainTextBaseDisallow :: [Char] +plainTextBaseDisallow = "[~`_*<" + +plainTextCharNo :: [Char] -> Parser Char +plainTextCharNo additional = noneOf $ additional <> plainTextBaseDisallow plainTextNoAsterisk :: Parser InlineText -plainTextNoAsterisk = fmap (Text . T.pack) $ some $ noneOf "*\n" +plainTextNoAsterisk = plainTextNo "*" plainTextNoUnderscore :: Parser InlineText -plainTextNoUnderscore = fmap (Text . T.pack) $ some $ noneOf "_\n" +plainTextNoUnderscore = plainTextNo "_" plainTextNoBracket :: Parser InlineText -plainTextNoBracket = - fmap (Text . T.pack) $ - some $ - satisfy - (`notElem` ("[]" :: String)) +plainTextNoBracket = plainTextNo "[]" -- Helper Parsers lineEnding :: Parser () -lineEnding = void (try (string "\r\n") <|> try (string "\n") <|> string "\r") +lineEnding = void $ count 2 (try (string "\r\n") <|> try (string "\n") <|> string "\r") wsParser :: Parser () wsParser = void $ some (char ' ' <|> char '\t')