bunch of work on djot

This commit is contained in:
Pagwin 2026-04-10 15:22:06 -04:00
parent cc4f0b7672
commit 1a2feb95e9
No known key found for this signature in database
GPG key ID: 81137023740CA260
3 changed files with 133 additions and 13 deletions

View file

@ -1,3 +1,4 @@
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
@ -10,14 +11,17 @@ module Djot
where where
import Control.Applicative (many, optional, some, (<|>)) import Control.Applicative (many, optional, some, (<|>))
import Control.Monad.ST (runST)
import Data.Foldable (for_)
import Data.Functor (void, (<$>)) import Data.Functor (void, (<$>))
import Data.List (elemIndex) import Data.List (elemIndex)
import qualified Data.List.NonEmpty as NE import qualified Data.List.NonEmpty as NE
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import IR import IR
import Logger (Logger (logCallStack, logDebug, logError)) import Logger (Logger (logCallStack, logDebug, logError))
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, sepBy, setErrorOffset, someTill) import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, satisfy, sepBy, setErrorOffset, someTill)
import Text.Megaparsec.Char (char, lowerChar, newline, numberChar, space, string, tab, upperChar) import Text.Megaparsec.Char (char, lowerChar, newline, numberChar, space, string, tab, upperChar)
import Utilities.Parsing import Utilities.Parsing
@ -46,10 +50,10 @@ blockElement accumulated_attributes =
try $ thematicBreak accumulated_attributes, try $ thematicBreak accumulated_attributes,
lookAhead (string ":::") *> containerBlock accumulated_attributes, lookAhead (string ":::") *> containerBlock accumulated_attributes,
-- try used due to table having a non-trivial structure at the start -- try used due to table having a non-trivial structure at the start
try tableBlock accumulated_attributes, try $ tableBlock accumulated_attributes,
-- using try due to ambiguity between these and normal text until we've already done some amount of parsing -- using try due to ambiguity between these and normal text until we've already done some amount of parsing
try footnoteDefinition accumulated_attributes, try $ footnoteDefinition accumulated_attributes,
try referenceDef accumulated_attributes, try $ referenceDef accumulated_attributes,
lookAhead (char '{') *> blockAttribute accumulated_attributes, lookAhead (char '{') *> blockAttribute accumulated_attributes,
paragraph accumulated_attributes paragraph accumulated_attributes
] ]
@ -86,7 +90,9 @@ blockQuote attrs = do
parseError $ remap $ NE.head errs parseError $ remap $ NE.head errs
where where
blockQuote' = do blockQuote' = do
pure $ BlockQuote (error "todo") attrs -- using document for convenience
(Doc contents) <- document
pure $ BlockQuote (Q contents) attrs
bq_line = do bq_line = do
char '>' char '>'
space space
@ -98,7 +104,7 @@ listBlock attrs = do
startOffset <- getOffset startOffset <- getOffset
list_type <- listMarker list_type <- listMarker
first_item <- list_item_content first_item <- list_item_content
rem_items <- manyTill (listMarker *> list_item_content) $ lookAhead blockSeparator rem_items <- manyTill (listMarker' list_type *> list_item_content) $ lookAhead blockSeparator
let items = first_item : rem_items let items = first_item : rem_items
case parse listBlock' "" $ error "todo: figure out how to take our list of items and put them toegether for easy parsing" of case parse listBlock' "" $ error "todo: figure out how to take our list of items and put them toegether for easy parsing" of
Right ret -> pure ret Right ret -> pure ret
@ -127,6 +133,41 @@ lowerAlphabet = ['a' .. 'z']
upperAlphabet :: [Char] upperAlphabet :: [Char]
upperAlphabet = ['A' .. 'Z'] upperAlphabet = ['A' .. 'Z']
-- inner parsing of roman numerals done via Gemini due to lazy
romanValue :: Char -> Int
romanValue 'I' = 1
romanValue 'V' = 5
romanValue 'X' = 10
romanValue 'L' = 50
romanValue 'C' = 100
romanValue 'D' = 500
romanValue 'M' = 1000
romanValue _ = 0
romanToInt :: Text -> Int
romanToInt s = runST $ do
total <- newSTRef 0
prevVal <- newSTRef 0
for_ (T.unpack s) $ \char -> do
let curr = romanValue char
p <- readSTRef prevVal
-- If current > previous (e.g., IV), we subtract the previous twice
if curr > p
then modifySTRef' total (\t -> t + curr - 2 * p)
else modifySTRef' total (+ curr)
writeSTRef prevVal curr
readSTRef total
upperRoman :: [Char]
upperRoman = "IVXLCDM"
lowerRoman :: [Char]
lowerRoman = T.unpack $ T.toLower $ T.pack upperRoman
-- consumes whitespace as well for convenience in element parser -- consumes whitespace as well for convenience in element parser
listMarker :: (Logger m, Characters s) => Parser s m ListType listMarker :: (Logger m, Characters s) => Parser s m ListType
listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
@ -145,7 +186,14 @@ listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lo
letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point] letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
let start_number = elemIndex letter upperAlphabet let start_number = elemIndex letter upperAlphabet
pure $ Ordered {start_number, style = Just "A"} pure $ Ordered {start_number, style = Just "A"}
roman_numeral = error "todo" lower_roman_numeral = do
numeral <- choice $ map ($ some (satisfy (`elem` lowerRoman))) [try . surroundParen, try . rightParen, point]
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
pure $ Ordered {start_number, style = Just "i"}
upper_roman_numeral = do
numeral <- choice $ map ($ some (satisfy (`elem` upperRoman))) [try . surroundParen, try . rightParen, point]
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
pure $ Ordered {start_number, style = Just "I"}
surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
surroundParen parser = do surroundParen parser = do
@ -180,7 +228,7 @@ codeBlock attrs = do
else else
Just $ toText language' Just $ toText language'
code <- toText <$> manyTill anySingle codeFence code <- toText <$> manyTill anySingle codeFence
pure $ Code $ C {language, code} pure $ Code (C {language, code}) attrs
blockAttribute :: (Logger m, Characters s) => Attrs -> Parser s m Element blockAttribute :: (Logger m, Characters s) => Attrs -> Parser s m Element
blockAttribute attrs = do blockAttribute attrs = do
@ -310,4 +358,76 @@ inlineContent' opened = someTill (inlineElement opened) $ lookAhead blockSeparat
inlineElement :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText inlineElement :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
inlineElement opened = inlineElement opened =
choice choice
[lookAhead (string "![") *> image, lookAhead] -- spamming try because backtracking is easier than having state for partial parses
[ try $ image opened,
try $ Djot.link opened,
try $ autolink opened,
try $ verbatim opened,
try $ emphasis opened,
try $ highlight opened,
try $ superscript opened,
try $ subscript opened,
try $ insert opened,
try $ math opened,
try $ footnoteRef opened,
try $ linebreak opened,
try $ symbol opened,
try $ rawInline opened,
try $ Djot.span opened,
try $ inlineAttribute opened,
plainText
]
image :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
image opened = error "todo"
link :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
link opened = error "todo"
autolink :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
autolink opened = error "todo"
verbatim :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
verbatim opened = error "todo"
emphasis :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
emphasis opened = error "todo"
highlight :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
highlight opened = error "todo"
superscript :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
superscript opene = error "todo"
subscript :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
subscript opened = error "todo"
insert :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
insert opened = error "todo"
delete :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
delete = error "todo"
math :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
math opened = error "todo"
footnoteRef :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
footnoteRef opened = error "todo"
linebreak :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
linebreak opened = error "todo"
symbol :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
symbol opened = error "todo"
rawInline :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
rawInline opened = error "todo"
span :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
span opened = error "todo"
inlineAttribute :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
inlineAttribute opened = error "todo"
plainText :: (Logger m, Characters s) => Parser s m InlineText
plainText = error "todo"

View file

@ -49,16 +49,16 @@ spaceSep = T.intercalate " "
elementToHTML :: Element -> T.Text elementToHTML :: Element -> T.Text
elementToHTML (Heading header attrs) = T.concat ["<h", tshow header.level, headerAttrs header attrs, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"] elementToHTML (Heading header attrs) = T.concat ["<h", tshow header.level, headerAttrs header attrs, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"]
-- --
elementToHTML (Code code_block) = T.concat ["<pre class=\"sourceCode ", language, "\"><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"] elementToHTML (Code code_block attrs) = T.concat ["<pre class=\"sourceCode ", language, "\" ", handleAttrs attrs, "><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"]
where where
language = fromMaybe "" code_block.language language = fromMaybe "" code_block.language
elementToHTML (BlockQuote (Q elems)) = T.concat ["<blockquote>", elementsToHTML elems, "</blockquote>"] elementToHTML (BlockQuote (Q elems) attrs) = T.concat ["<blockquote ", handleAttrs attrs, ">", elementsToHTML elems, "</blockquote>"]
elementToHTML (List (L {list_type = Ordered {start_number, style}, items}) attrs) = spaceSep ["<ol", maybe "" handleStart start_number, maybe "" handleStyle style, ">", generateLiElems items, "</ol>"] elementToHTML (List (L {list_type = Ordered {start_number, style}, items}) attrs) = spaceSep ["<ol", maybe "" handleStart start_number, maybe "" handleStyle style, ">", generateLiElems items, "</ol>"]
elementToHTML (List (L {list_type = Unordered {style}, items}) attrs) = spaceSep ["<ul", maybe "" handleStyle style, ">", generateLiElems items, "</ul>"] elementToHTML (List (L {list_type = Unordered {style}, items}) attrs) = spaceSep ["<ul", maybe "" handleStyle style, ">", generateLiElems items, "</ul>"]
elementToHTML (HTML (HTMLTag {html_content})) = html_content elementToHTML (HTML (HTMLTag {html_content})) = html_content
elementToHTML (Paragraph (P snippets) attrs) = spaceSep ["<p", handleAttrs attrs, ">", serializeInlineToHTML snippets, "</p>"] elementToHTML (Paragraph (P snippets) attrs) = spaceSep ["<p", handleAttrs attrs, ">", serializeInlineToHTML snippets, "</p>"]
elementToHTML (Transparent snippets) = serializeInlineToHTML snippets elementToHTML (Transparent snippets) = serializeInlineToHTML snippets
elementToHTML HorizontalRule = "<hr>" elementToHTML (HorizontalRule attrs) = "<hr " <> handleAttrs attrs <> ">"
elementToHTML (Table _ _) = error "TODO" elementToHTML (Table _ _) = error "TODO"
elementToHTML (Container _ _) = error "TODO" elementToHTML (Container _ _) = error "TODO"
elementToHTML (Footnote _ _) = error "TODO" elementToHTML (Footnote _ _) = error "TODO"

View file

@ -138,7 +138,7 @@ fencedCodeBlock = between (string "```") (string "```") $ do
lineEnding lineEnding
code <- T.pack <$> (many ((notFollowedBy $ string "```") *> anySingle)) code <- T.pack <$> (many ((notFollowedBy $ string "```") *> anySingle))
let language = if language' == "" then Just language' else Nothing let language = if language' == "" then Just language' else Nothing
pure $ Code $ C {language, code} pure $ Code (C {language, code}) mempty
blockquoteBlock :: (Logger m, Characters s) => Parser s m Element blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
blockquoteBlock = do blockquoteBlock = do