lots of block level work for djot

This commit is contained in:
Pagwin 2026-04-07 17:26:19 -04:00
parent 4bf7614eda
commit 12280ef761
No known key found for this signature in database
GPG key ID: 81137023740CA260
3 changed files with 132 additions and 26 deletions

View file

@ -10,13 +10,13 @@ module Djot
where
import Control.Applicative (many, some, (<|>))
import Control.Monad.Trans.Accum (accum)
import Data.Functor (void, (<$>), (<&>))
import Data.Functor (void, (<$>))
import qualified Data.List.NonEmpty as NE
import Data.Text (Text)
import qualified Data.Text as T
import IR
import Logger (Logger (logCallStack, logDebug))
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, try), anySingle, choice, manyTill, sepBy)
import Logger (Logger (logCallStack, logDebug, logError))
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, sepBy, setErrorOffset)
import Text.Megaparsec.Char (char, newline, space, string)
import Utilities.Parsing
@ -36,7 +36,7 @@ blockElement accumulated_attributes =
[ lookAhead (char '#') *> header accumulated_attributes,
lookAhead (char '>') *> blockQuote accumulated_attributes,
lookAhead listMarker
*> (lookAhead taskMarker *> taskListBlock accumulated_attributes)
*> (lookAhead (bullet *> taskMarker) *> taskListBlock accumulated_attributes)
<|> listBlock accumulated_attributes,
lookAhead codeFence
*> (lookAhead rawLang *> rawBlock accumulated_attributes)
@ -53,49 +53,152 @@ blockElement accumulated_attributes =
]
where
rawLang = space *> char '=' *> some (notFollowedBy newline *> anySingle)
taskMarker = void $ char '[' *> choice (map char " xX") *> char ']'
header :: (Logger m, Characters s) => Attrs -> Parser s m Element
header = error "todo"
header attrs = do
level <- length <$> some (char '#')
space
startOffset <- getOffset
raw <- manyTill anySingle $ lookAhead blockSeparator
-- second pass for inline elements
case parse header' "" (fromText $ toText raw) of
Right ret -> pure ret
Left (ParseErrorBundle errs _) ->
let remap err = setErrorOffset (errorOffset err + startOffset) err in parseError $ remap $ NE.head errs
where
header' = do
pure $ Heading (error "todo") attrs
blockQuote :: (Logger m, Characters s) => Attrs -> Parser s m Element
blockQuote = error "todo"
blockQuote attrs = do
startOffset <- getOffset
first_line <- bq_line
lines <- manyTill (newline *> bq_line) $ lookAhead blockSeparator
let lines' = map toText $ first_line : lines
case parse blockQuote' "" (fromText $ T.intercalate "\n" lines') of
Right ret -> pure ret
Left (ParseErrorBundle errs _) -> do
logError "Error in blockQuote offset may be off"
let remap err = setErrorOffset (errorOffset err + startOffset) err
parseError $ remap $ NE.head errs
where
blockQuote' = do
pure $ BlockQuote (error "todo") attrs
bq_line = do
char '>'
space
manyTill anySingle $ lookAhead newline
listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
listBlock = error "todo"
listBlock attrs = do
startOffset <- getOffset
list_type <- listMarker
first_item <- list_item_content
rem_items <- manyTill (listMarker *> list_item_content) $ lookAhead blockSeparator
let items = first_item : rem_items
case parse listBlock' "" $ error "todo: fiture out how to take our list of items and put them toegether for easy parsing" of
Right ret -> pure ret
Left (ParseErrorBundle errs _) -> do
logError "Error in blockQuote offset may be off"
let remap err = setErrorOffset (errorOffset err + startOffset) err
parseError $ remap $ NE.head errs
where
list_item_content = error "todo"
listBlock' = error "todo"
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
taskListBlock attrs = do
startOffset <- getOffset
bullet
first_item <- task_item
rem_items <- manyTill (listMarker *> task_item) $ lookAhead blockSeparator
pure $ TaskList (TL {items = first_item : rem_items}) attrs
where
task_item = error "todo"
-- consumes whitespace as well for convenience in element parser
listMarker :: (Logger m, Characters s) => Parser s m ListType
listMarker = error "todo"
listMarker = choice [bullet, decimal, try letter, roman_numeral] <* space
where
decimal = error "todo"
letter = error "todo"
roman_numeral = error "todo"
bullet :: (Logger m, Characters s) => Parser s m ListType
bullet = error "todo"
taskMarker :: (Logger m, Characters s) => Parser s m Char
taskMarker = char '[' *> choice (map char " xX") <* char ']' <* space
codeFence :: (Logger m, Characters s) => Parser s m ()
codeFence = void $ string "```"
codeBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
codeBlock = error "todo"
paragraph :: (Logger m, Characters s) => Attrs -> Parser s m Element
paragraph = error "todo"
codeBlock attrs = do
codeFence
space
language' <- manyTill anySingle newline
let language =
if null language'
then
Nothing
else
Just $ toText language'
code <- toText <$> manyTill anySingle codeFence
pure $ Code $ C {language, code}
blockAttribute :: (Logger m, Characters s) => Attrs -> Parser s m Element
blockAttribute attrs = (blockAttribute' <&> (attrs <>) >>= blockElement) <|> error "eof handle"
blockAttribute attrs = do
current <- blockAttribute'
let attrs' = attrs <> current
blockElement attrs' <|> error "eof handle"
blockAttribute' :: (Logger m, Characters s) => Parser s m Attrs
blockAttribute' = error "todo"
blockAttribute' = do
startOffset <- getOffset
input <- fromText . toText <$> getInput
char '{'
contents <- manyTill anySingle $ char '}'
case parse blockAttribute'' input $ fromText $ toText contents of
Right ret -> pure ret
Left (ParseErrorBundle errs _) ->
let remap err = setErrorOffset (errorOffset err + startOffset) err in parseError $ remap $ NE.head errs
where
blockAttribute'' = error "todo"
blockSeparator :: (Logger m, Characters s) => Parser s m ()
blockSeparator = void $ newline *> newline
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
taskListBlock = error "todo"
referenceDef :: (Logger m, Characters s) => Attrs -> Parser s m Element
referenceDef = error "todo"
referenceDef attrs = do
char '['
label <- toText <$> manyTill anySingle (char ']')
char ':'
link <- toText <$> manyTill anySingle (lookAhead blockSeparator)
pure $ ReferenceDefinition $ RD {label, link}
rawBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
rawBlock = error "todo"
thematicBreak :: (Logger m, Characters s) => Attrs -> Parser s m Element
thematicBreak = error "todo"
thematicBreak attrs = do
-- even if there's a more concise way to write this
-- that way would probably be less readable
-- \s*[*\-]\s*[*\-]\s*[*\-]\s*([*\-]\s*)*
-- is more comprehensible than
-- (\s*[*\-]){3}\s*([*\-]\s*)*
-- and only mildlylonger
space
part
space
part
space
part
space
many (part *> space)
pure $ HorizontalRule attrs
where
part = choice $ map char "*-"
containerBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
containerBlock = error "todo"
@ -108,3 +211,6 @@ tableRow = error "todo"
footnoteDefinition :: (Logger m, Characters s) => Attrs -> Parser s m Element
footnoteDefinition = error "todo"
paragraph :: (Logger m, Characters s) => Attrs -> Parser s m Element
paragraph = error "todo"

View file

@ -9,14 +9,14 @@ newtype Document = Doc [Element]
data Element
= Heading Heading Attrs
| Code Code
| BlockQuote BlockQuote
| BlockQuote BlockQuote Attrs
| List List Attrs
| -- Markdown only, DJOT will produce a RawBlock with an html type
HTML HTML
| Paragraph Paragraph Attrs
| -- to avoid breaking generation when swapping InlineText to Element in Markdown parser
Transparent [InlineText]
| HorizontalRule
| HorizontalRule Attrs
| Table Table Attrs
| -- Djot :::
Container [Element] Attrs
@ -176,9 +176,9 @@ data RawBlock = RB
}
deriving (Show)
-- TODO
data RefDef = RD
{
{ label :: Text,
link :: Text
}
deriving (Show)

View file

@ -143,7 +143,7 @@ fencedCodeBlock = between (string "```") (string "```") $ do
blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
blockquoteBlock = do
content <- Q . concat <$> some blockquoteLine
pure $ BlockQuote content
pure $ BlockQuote content mempty
where
blockquoteLine = do
char '>'