bunch of work on djot

2026-04-10 15:22:06 -04:00 · 2026-04-10 15:22:06 -04:00 · 1a2feb95e9
commit 1a2feb95e9
parent cc4f0b7672
3 changed files with 133 additions and 13 deletions
--- a/src/Djot.hs
+++ b/src/Djot.hs
@ -1,3 +1,4 @@
 {-# LANGUAGE BlockArguments #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE RankNTypes #-}
@ -10,14 +11,17 @@ module Djot
 where
 import Control.Applicative (many, optional, some, (<|>))
 import Control.Monad.ST (runST)
 import Data.Foldable (for_)
 import Data.Functor (void, (<$>))
 import Data.List (elemIndex)
 import qualified Data.List.NonEmpty as NE
 import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
 import Data.Text (Text)
 import qualified Data.Text as T
 import IR
 import Logger (Logger (logCallStack, logDebug, logError))
-import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, sepBy, setErrorOffset, someTill)
+import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, satisfy, sepBy, setErrorOffset, someTill)
 import Text.Megaparsec.Char (char, lowerChar, newline, numberChar, space, string, tab, upperChar)
 import Utilities.Parsing
@ -46,10 +50,10 @@ blockElement accumulated_attributes =
      try $ thematicBreak accumulated_attributes,
      lookAhead (string ":::") *> containerBlock accumulated_attributes,
      -- try used due to table having a non-trivial structure at the start
-      try tableBlock accumulated_attributes,
+      try $ tableBlock accumulated_attributes,
      -- using try due to ambiguity between these and normal text until we've already done some amount of parsing
-      try footnoteDefinition accumulated_attributes,
+      try $ footnoteDefinition accumulated_attributes,
-      try referenceDef accumulated_attributes,
+      try $ referenceDef accumulated_attributes,
      lookAhead (char '{') *> blockAttribute accumulated_attributes,
      paragraph accumulated_attributes
    ]
@ -86,7 +90,9 @@ blockQuote attrs = do
      parseError $ remap $ NE.head errs
  where
    blockQuote' = do
-      pure $ BlockQuote (error "todo") attrs
+      -- using document for convenience
      (Doc contents) <- document
      pure $ BlockQuote (Q contents) attrs
    bq_line = do
      char '>'
      space
@ -98,7 +104,7 @@ listBlock attrs = do
  startOffset <- getOffset
  list_type <- listMarker
  first_item <- list_item_content
-  rem_items <- manyTill (listMarker *> list_item_content) $ lookAhead blockSeparator
+  rem_items <- manyTill (listMarker' list_type *> list_item_content) $ lookAhead blockSeparator
  let items = first_item : rem_items
  case parse listBlock' "" $ error "todo: figure out how to take our list of items and put them toegether for easy parsing" of
    Right ret -> pure ret
@ -127,6 +133,41 @@ lowerAlphabet = ['a' .. 'z']
 upperAlphabet :: [Char]
 upperAlphabet = ['A' .. 'Z']
 -- inner parsing of roman numerals done via Gemini due to lazy
 romanValue :: Char -> Int
 romanValue 'I' = 1
 romanValue 'V' = 5
 romanValue 'X' = 10
 romanValue 'L' = 50
 romanValue 'C' = 100
 romanValue 'D' = 500
 romanValue 'M' = 1000
 romanValue _ = 0
 romanToInt :: Text -> Int
 romanToInt s = runST $ do
  total <- newSTRef 0
  prevVal <- newSTRef 0
  for_ (T.unpack s) $ \char -> do
    let curr = romanValue char
    p <- readSTRef prevVal
    -- If current > previous (e.g., IV), we subtract the previous twice
    if curr > p
      then modifySTRef' total (\t -> t + curr - 2 * p)
      else modifySTRef' total (+ curr)
    writeSTRef prevVal curr
  readSTRef total
 upperRoman :: [Char]
 upperRoman = "IVXLCDM"
 lowerRoman :: [Char]
 lowerRoman = T.unpack $ T.toLower $ T.pack upperRoman
 -- consumes whitespace as well for convenience in element parser
 listMarker :: (Logger m, Characters s) => Parser s m ListType
 listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
@ -145,7 +186,14 @@ listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lo
      letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
      let start_number = elemIndex letter upperAlphabet
      pure $ Ordered {start_number, style = Just "A"}
-    roman_numeral = error "todo"
+    lower_roman_numeral = do
      numeral <- choice $ map ($ some (satisfy (`elem` lowerRoman))) [try . surroundParen, try . rightParen, point]
      let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
      pure $ Ordered {start_number, style = Just "i"}
    upper_roman_numeral = do
      numeral <- choice $ map ($ some (satisfy (`elem` upperRoman))) [try . surroundParen, try . rightParen, point]
      let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
      pure $ Ordered {start_number, style = Just "I"}
    surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
    surroundParen parser = do
@ -180,7 +228,7 @@ codeBlock attrs = do
          else
            Just $ toText language'
  code <- toText <$> manyTill anySingle codeFence
-  pure $ Code $ C {language, code}
+  pure $ Code (C {language, code}) attrs
 blockAttribute :: (Logger m, Characters s) => Attrs -> Parser s m Element
 blockAttribute attrs = do
@ -310,4 +358,76 @@ inlineContent' opened = someTill (inlineElement opened) $ lookAhead blockSeparat
 inlineElement :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 inlineElement opened =
  choice
-    [lookAhead (string "![") *> image, lookAhead]
+    -- spamming try because backtracking is easier than having state for partial parses
    [ try $ image opened,
      try $ Djot.link opened,
      try $ autolink opened,
      try $ verbatim opened,
      try $ emphasis opened,
      try $ highlight opened,
      try $ superscript opened,
      try $ subscript opened,
      try $ insert opened,
      try $ math opened,
      try $ footnoteRef opened,
      try $ linebreak opened,
      try $ symbol opened,
      try $ rawInline opened,
      try $ Djot.span opened,
      try $ inlineAttribute opened,
      plainText
    ]
 image :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 image opened = error "todo"
 link :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 link opened = error "todo"
 autolink :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 autolink opened = error "todo"
 verbatim :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 verbatim opened = error "todo"
 emphasis :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 emphasis opened = error "todo"
 highlight :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 highlight opened = error "todo"
 superscript :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 superscript opene = error "todo"
 subscript :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 subscript opened = error "todo"
 insert :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 insert opened = error "todo"
 delete :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 delete = error "todo"
 math :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 math opened = error "todo"
 footnoteRef :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 footnoteRef opened = error "todo"
 linebreak :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 linebreak opened = error "todo"
 symbol :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 symbol opened = error "todo"
 rawInline :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 rawInline opened = error "todo"
 span :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 span opened = error "todo"
 inlineAttribute :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
 inlineAttribute opened = error "todo"
 plainText :: (Logger m, Characters s) => Parser s m InlineText
 plainText = error "todo"
--- a/src/HTML.hs
+++ b/src/HTML.hs
@ -49,16 +49,16 @@ spaceSep = T.intercalate " "
 elementToHTML :: Element -> T.Text
 elementToHTML (Heading header attrs) = T.concat ["<h", tshow header.level, headerAttrs header attrs, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"]
 --
-elementToHTML (Code code_block) = T.concat ["<pre class=\"sourceCode ", language, "\"><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"]
+elementToHTML (Code code_block attrs) = T.concat ["<pre class=\"sourceCode ", language, "\" ", handleAttrs attrs, "><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"]
  where
    language = fromMaybe "" code_block.language
-elementToHTML (BlockQuote (Q elems)) = T.concat ["<blockquote>", elementsToHTML elems, "</blockquote>"]
+elementToHTML (BlockQuote (Q elems) attrs) = T.concat ["<blockquote ", handleAttrs attrs, ">", elementsToHTML elems, "</blockquote>"]
 elementToHTML (List (L {list_type = Ordered {start_number, style}, items}) attrs) = spaceSep ["<ol", maybe "" handleStart start_number, maybe "" handleStyle style, ">", generateLiElems items, "</ol>"]
 elementToHTML (List (L {list_type = Unordered {style}, items}) attrs) = spaceSep ["<ul", maybe "" handleStyle style, ">", generateLiElems items, "</ul>"]
 elementToHTML (HTML (HTMLTag {html_content})) = html_content
 elementToHTML (Paragraph (P snippets) attrs) = spaceSep ["<p", handleAttrs attrs, ">", serializeInlineToHTML snippets, "</p>"]
 elementToHTML (Transparent snippets) = serializeInlineToHTML snippets
-elementToHTML HorizontalRule = "<hr>"
+elementToHTML (HorizontalRule attrs) = "<hr " <> handleAttrs attrs <> ">"
 elementToHTML (Table _ _) = error "TODO"
 elementToHTML (Container _ _) = error "TODO"
 elementToHTML (Footnote _ _) = error "TODO"
--- a/src/Markdown.hs
+++ b/src/Markdown.hs
@ -138,7 +138,7 @@ fencedCodeBlock = between (string "```") (string "```") $ do
  lineEnding
  code <- T.pack <$> (many ((notFollowedBy $ string "```") *> anySingle))
  let language = if language' == "" then Just language' else Nothing
-  pure $ Code $ C {language, code}
+  pure $ Code (C {language, code}) mempty
 blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
 blockquoteBlock = do