From 3546654a66677a71dde2e86a5335785cfc5011e2 Mon Sep 17 00:00:00 2001
From: Pagwin <pgp2024@pagwin.xyz>
Date: Sun, 9 Nov 2025 16:13:10 -0500
Subject: [PATCH] fixed more bugs but currently reckoning with the whole
 situation of how markdown deals with singular vs double newlines

---
 TODO.md         | 14 +++++-------
 app/HTML.hs     | 22 +++++++++++++++++--
 app/Markdown.hs | 58 +++++++++++++++++++------------------------------
 3 files changed, 48 insertions(+), 46 deletions(-)
diff --git a/TODO.md b/TODO.md
index 5d0caa0..3fbc422 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,14 +1,12 @@
-- add rst support and convert markdown handling to custom parser instead of pandoc
+- [ ] add rst support and convert markdown handling to custom parser instead of pandoc
 
-- process source code blocks with tree sitter https://hackage.haskell.org/package/tree-sitter
+- [ ] process source code blocks with tree sitter https://hackage.haskell.org/package/tree-sitter
     - Alternatively consider skylighting https://hackage.haskell.org/package/skylighting
 
-- minify js and css when copying over instead of just copying
+- [ ] minify js and css when copying over instead of just copying
 
-- setup fingerprinting in file names for css and js
+- [ ] setup fingerprinting in file names for css and js
 
-- dev server setup (with live reloading)
+- [ ] dev server setup (with live reloading)
 
-- see if performance can be improved (it isn't slow atm but it definitely feels like there's a bottleneck)
-
-- look into adding postcss support perhaps
+-  [ ] see if performance can be improved (it isn't slow atm but it definitely feels like there's a bottleneck)
diff --git a/app/HTML.hs b/app/HTML.hs
index c324fa7..d5777be 100644
--- a/app/HTML.hs
+++ b/app/HTML.hs
@@ -2,6 +2,7 @@
 
 module HTML (compileToHTML) where
 
+import Data.Char (isAlphaNum, toLower)
 import Data.Maybe (fromMaybe)
 import qualified Data.Text as T
 import IR
@@ -19,6 +20,23 @@ escapeChar c = T.singleton c
 escapeText :: T.Text -> T.Text
 escapeText = T.concat . map escapeChar . T.unpack
 
+genHeaderId :: Heading -> T.Text
+genHeaderId header =
+  T.concat
+    [ " id=\"",
+      T.map (toLower . textSub) $
+        T.strip $
+          T.filter (\c -> isAlphaNum c || c == ' ') $
+            serializeInlineToHTML header.text,
+      "\" "
+    ]
+  where
+    textSub ' ' = '-'
+    textSub c = c
+
+genHeaderClasses :: Heading -> T.Text
+genHeaderClasses = const ""
+
 tshow :: (Show s) => s -> T.Text
 tshow = T.pack . show
 
@@ -26,7 +44,7 @@ compileToHTML :: Document -> T.Text
 compileToHTML (Doc elements) = T.concat $ map elementToHTML elements
 
 elementToHTML :: Element -> T.Text
-elementToHTML (Heading header) = T.concat ["<h", tshow header.level, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"]
+elementToHTML (Heading header) = T.concat ["<h", tshow header.level, genHeaderId header, genHeaderClasses header, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"]
 --
 elementToHTML (Code code_block) = T.concat ["<pre class=\"sourceCode ", language, "\"><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"]
   where
@@ -58,6 +76,6 @@ serializeInlineToHTML (Bold elems : remaining) = T.concat ["<b>", serializeInlin
 serializeInlineToHTML (Italic elems : remaining) = T.concat ["<i>", serializeInlineToHTML elems, "</i>", serializeInlineToHTML remaining]
 serializeInlineToHTML (Crossed elems : remaining) = T.concat ["<s>", serializeInlineToHTML elems, "</s>", serializeInlineToHTML remaining]
 serializeInlineToHTML (InlineCode code : remaining) = T.concat ["<code>", escapeText code, "</code>", serializeInlineToHTML remaining]
-serializeInlineToHTML (Link {linkText, url, title} : remaining) = T.concat ["<a href=\"", url, "\" ", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, "\">", serializeInlineToHTML linkText, "</a>", serializeInlineToHTML remaining]
+serializeInlineToHTML (Link {linkText, url, title} : remaining) = T.concat ["<a href=\"", url, "\"", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML linkText, "</a>", serializeInlineToHTML remaining]
 serializeInlineToHTML (Image {altText, url, title} : remaining) = T.concat ["<img src=\">", url, "\" alt=\"", escapeText altText, "\"", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML remaining]
 serializeInlineToHTML (HTMLInline {inline_html_content} : remaining) = inline_html_content <> serializeInlineToHTML remaining
diff --git a/app/Markdown.hs b/app/Markdown.hs
index 40ed572..721271a 100644
--- a/app/Markdown.hs
+++ b/app/Markdown.hs
@@ -253,7 +253,7 @@ strongUnderscore = do
 crossedText :: Parser InlineText
 crossedText = do
   string "~~"
-  content <- some (notFollowedBy (string "~~") >> inlineElementNo '~')
+  content <- some (notFollowedBy (string "~~") >> inlineElement)
   string "~~"
   pure $ Crossed content
 
@@ -284,35 +284,23 @@ inlineElementNo c =
       try link,
       try htmlInline,
       try escapedChar,
-      plainTextNo c
+      plainTextNo [c]
     ]
 
-plainTextNo :: Char -> Parser InlineText
-plainTextNo c = fmap (Text . T.pack) $ some $ noneOf [c, '\n']
+plainTextNo :: [Char] -> Parser InlineText
+plainTextNo disallow = do
+  firstChar <- noneOf disallow
+  remChars <- some $ plainTextCharNo disallow <* notFollowedBy lineEnding
+  pure $ Text $ T.map wspHandler $ T.pack $ firstChar : remChars
+  where
+    wspHandler '\n' = ' '
+    wspHandler c = c
 
 inlineElementNoAsterisk :: Parser InlineText
-inlineElementNoAsterisk =
-  choice
-    [ try strong,
-      try codeSpan,
-      try image,
-      try link,
-      try htmlInline,
-      try escapedChar,
-      plainTextNo '*'
-    ]
+inlineElementNoAsterisk = inlineElementNo '*'
 
 inlineElementNoUnderscore :: Parser InlineText
-inlineElementNoUnderscore =
-  choice
-    [ try strong,
-      try codeSpan,
-      try image,
-      try link,
-      try htmlInline,
-      try escapedChar,
-      plainTextNo '_'
-    ]
+inlineElementNoUnderscore = inlineElementNo '_'
 
 -- Code Span
 codeSpan :: Parser InlineText
@@ -408,29 +396,27 @@ escapedChar = do
   pure $ Text (T.singleton c)
 
 -- Plain Text
--- TODO: this eats stuff it shouldn't, inefficient solution is to try other inline elements and exit if they succeed
 plainText :: Parser InlineText
-plainText = fmap (Text . T.pack) (liftA2 (:) (noneOf "\n") $ many plainTextChar)
+plainText = plainTextNo []
 
-plainTextChar :: Parser Char
-plainTextChar = noneOf "\n[~`_*"
+plainTextBaseDisallow :: [Char]
+plainTextBaseDisallow = "[~`_*<"
+
+plainTextCharNo :: [Char] -> Parser Char
+plainTextCharNo additional = noneOf $ additional <> plainTextBaseDisallow
 
 plainTextNoAsterisk :: Parser InlineText
-plainTextNoAsterisk = fmap (Text . T.pack) $ some $ noneOf "*\n"
+plainTextNoAsterisk = plainTextNo "*"
 
 plainTextNoUnderscore :: Parser InlineText
-plainTextNoUnderscore = fmap (Text . T.pack) $ some $ noneOf "_\n"
+plainTextNoUnderscore = plainTextNo "_"
 
 plainTextNoBracket :: Parser InlineText
-plainTextNoBracket =
-  fmap (Text . T.pack) $
-    some $
-      satisfy
-        (`notElem` ("[]" :: String))
+plainTextNoBracket = plainTextNo "[]"
 
 -- Helper Parsers
 lineEnding :: Parser ()
-lineEnding = void (try (string "\r\n") <|> try (string "\n") <|> string "\r")
+lineEnding = void $ count 2 (try (string "\r\n") <|> try (string "\n") <|> string "\r")
 
 wsParser :: Parser ()
 wsParser = void $ some (char ' ' <|> char '\t')