lots of block level work for djot

This commit is contained in:
Pagwin 2026-04-07 17:26:19 -04:00
parent 4bf7614eda
commit 12280ef761
No known key found for this signature in database
GPG key ID: 81137023740CA260
3 changed files with 132 additions and 26 deletions

View file

@ -10,13 +10,13 @@ module Djot
where where
import Control.Applicative (many, some, (<|>)) import Control.Applicative (many, some, (<|>))
import Control.Monad.Trans.Accum (accum) import Data.Functor (void, (<$>))
import Data.Functor (void, (<$>), (<&>)) import qualified Data.List.NonEmpty as NE
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import IR import IR
import Logger (Logger (logCallStack, logDebug)) import Logger (Logger (logCallStack, logDebug, logError))
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, try), anySingle, choice, manyTill, sepBy) import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, sepBy, setErrorOffset)
import Text.Megaparsec.Char (char, newline, space, string) import Text.Megaparsec.Char (char, newline, space, string)
import Utilities.Parsing import Utilities.Parsing
@ -36,7 +36,7 @@ blockElement accumulated_attributes =
[ lookAhead (char '#') *> header accumulated_attributes, [ lookAhead (char '#') *> header accumulated_attributes,
lookAhead (char '>') *> blockQuote accumulated_attributes, lookAhead (char '>') *> blockQuote accumulated_attributes,
lookAhead listMarker lookAhead listMarker
*> (lookAhead taskMarker *> taskListBlock accumulated_attributes) *> (lookAhead (bullet *> taskMarker) *> taskListBlock accumulated_attributes)
<|> listBlock accumulated_attributes, <|> listBlock accumulated_attributes,
lookAhead codeFence lookAhead codeFence
*> (lookAhead rawLang *> rawBlock accumulated_attributes) *> (lookAhead rawLang *> rawBlock accumulated_attributes)
@ -53,49 +53,152 @@ blockElement accumulated_attributes =
] ]
where where
rawLang = space *> char '=' *> some (notFollowedBy newline *> anySingle) rawLang = space *> char '=' *> some (notFollowedBy newline *> anySingle)
taskMarker = void $ char '[' *> choice (map char " xX") *> char ']'
header :: (Logger m, Characters s) => Attrs -> Parser s m Element header :: (Logger m, Characters s) => Attrs -> Parser s m Element
header = error "todo" header attrs = do
level <- length <$> some (char '#')
space
startOffset <- getOffset
raw <- manyTill anySingle $ lookAhead blockSeparator
-- second pass for inline elements
case parse header' "" (fromText $ toText raw) of
Right ret -> pure ret
Left (ParseErrorBundle errs _) ->
let remap err = setErrorOffset (errorOffset err + startOffset) err in parseError $ remap $ NE.head errs
where
header' = do
pure $ Heading (error "todo") attrs
blockQuote :: (Logger m, Characters s) => Attrs -> Parser s m Element blockQuote :: (Logger m, Characters s) => Attrs -> Parser s m Element
blockQuote = error "todo" blockQuote attrs = do
startOffset <- getOffset
first_line <- bq_line
lines <- manyTill (newline *> bq_line) $ lookAhead blockSeparator
let lines' = map toText $ first_line : lines
case parse blockQuote' "" (fromText $ T.intercalate "\n" lines') of
Right ret -> pure ret
Left (ParseErrorBundle errs _) -> do
logError "Error in blockQuote offset may be off"
let remap err = setErrorOffset (errorOffset err + startOffset) err
parseError $ remap $ NE.head errs
where
blockQuote' = do
pure $ BlockQuote (error "todo") attrs
bq_line = do
char '>'
space
manyTill anySingle $ lookAhead newline
listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
listBlock = error "todo" listBlock attrs = do
startOffset <- getOffset
list_type <- listMarker
first_item <- list_item_content
rem_items <- manyTill (listMarker *> list_item_content) $ lookAhead blockSeparator
let items = first_item : rem_items
case parse listBlock' "" $ error "todo: fiture out how to take our list of items and put them toegether for easy parsing" of
Right ret -> pure ret
Left (ParseErrorBundle errs _) -> do
logError "Error in blockQuote offset may be off"
let remap err = setErrorOffset (errorOffset err + startOffset) err
parseError $ remap $ NE.head errs
where
list_item_content = error "todo"
listBlock' = error "todo"
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
taskListBlock attrs = do
startOffset <- getOffset
bullet
first_item <- task_item
rem_items <- manyTill (listMarker *> task_item) $ lookAhead blockSeparator
pure $ TaskList (TL {items = first_item : rem_items}) attrs
where
task_item = error "todo"
-- consumes whitespace as well for convenience in element parser
listMarker :: (Logger m, Characters s) => Parser s m ListType listMarker :: (Logger m, Characters s) => Parser s m ListType
listMarker = error "todo" listMarker = choice [bullet, decimal, try letter, roman_numeral] <* space
where
decimal = error "todo"
letter = error "todo"
roman_numeral = error "todo"
bullet :: (Logger m, Characters s) => Parser s m ListType
bullet = error "todo"
taskMarker :: (Logger m, Characters s) => Parser s m Char
taskMarker = char '[' *> choice (map char " xX") <* char ']' <* space
codeFence :: (Logger m, Characters s) => Parser s m () codeFence :: (Logger m, Characters s) => Parser s m ()
codeFence = void $ string "```" codeFence = void $ string "```"
codeBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element codeBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
codeBlock = error "todo" codeBlock attrs = do
codeFence
paragraph :: (Logger m, Characters s) => Attrs -> Parser s m Element space
paragraph = error "todo" language' <- manyTill anySingle newline
let language =
if null language'
then
Nothing
else
Just $ toText language'
code <- toText <$> manyTill anySingle codeFence
pure $ Code $ C {language, code}
blockAttribute :: (Logger m, Characters s) => Attrs -> Parser s m Element blockAttribute :: (Logger m, Characters s) => Attrs -> Parser s m Element
blockAttribute attrs = (blockAttribute' <&> (attrs <>) >>= blockElement) <|> error "eof handle" blockAttribute attrs = do
current <- blockAttribute'
let attrs' = attrs <> current
blockElement attrs' <|> error "eof handle"
blockAttribute' :: (Logger m, Characters s) => Parser s m Attrs blockAttribute' :: (Logger m, Characters s) => Parser s m Attrs
blockAttribute' = error "todo" blockAttribute' = do
startOffset <- getOffset
input <- fromText . toText <$> getInput
char '{'
contents <- manyTill anySingle $ char '}'
case parse blockAttribute'' input $ fromText $ toText contents of
Right ret -> pure ret
Left (ParseErrorBundle errs _) ->
let remap err = setErrorOffset (errorOffset err + startOffset) err in parseError $ remap $ NE.head errs
where
blockAttribute'' = error "todo"
blockSeparator :: (Logger m, Characters s) => Parser s m () blockSeparator :: (Logger m, Characters s) => Parser s m ()
blockSeparator = void $ newline *> newline blockSeparator = void $ newline *> newline
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
taskListBlock = error "todo"
referenceDef :: (Logger m, Characters s) => Attrs -> Parser s m Element referenceDef :: (Logger m, Characters s) => Attrs -> Parser s m Element
referenceDef = error "todo" referenceDef attrs = do
char '['
label <- toText <$> manyTill anySingle (char ']')
char ':'
link <- toText <$> manyTill anySingle (lookAhead blockSeparator)
pure $ ReferenceDefinition $ RD {label, link}
rawBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element rawBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
rawBlock = error "todo" rawBlock = error "todo"
thematicBreak :: (Logger m, Characters s) => Attrs -> Parser s m Element thematicBreak :: (Logger m, Characters s) => Attrs -> Parser s m Element
thematicBreak = error "todo" thematicBreak attrs = do
-- even if there's a more concise way to write this
-- that way would probably be less readable
-- \s*[*\-]\s*[*\-]\s*[*\-]\s*([*\-]\s*)*
-- is more comprehensible than
-- (\s*[*\-]){3}\s*([*\-]\s*)*
-- and only mildlylonger
space
part
space
part
space
part
space
many (part *> space)
pure $ HorizontalRule attrs
where
part = choice $ map char "*-"
containerBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element containerBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
containerBlock = error "todo" containerBlock = error "todo"
@ -108,3 +211,6 @@ tableRow = error "todo"
footnoteDefinition :: (Logger m, Characters s) => Attrs -> Parser s m Element footnoteDefinition :: (Logger m, Characters s) => Attrs -> Parser s m Element
footnoteDefinition = error "todo" footnoteDefinition = error "todo"
paragraph :: (Logger m, Characters s) => Attrs -> Parser s m Element
paragraph = error "todo"

View file

@ -9,14 +9,14 @@ newtype Document = Doc [Element]
data Element data Element
= Heading Heading Attrs = Heading Heading Attrs
| Code Code | Code Code
| BlockQuote BlockQuote | BlockQuote BlockQuote Attrs
| List List Attrs | List List Attrs
| -- Markdown only, DJOT will produce a RawBlock with an html type | -- Markdown only, DJOT will produce a RawBlock with an html type
HTML HTML HTML HTML
| Paragraph Paragraph Attrs | Paragraph Paragraph Attrs
| -- to avoid breaking generation when swapping InlineText to Element in Markdown parser | -- to avoid breaking generation when swapping InlineText to Element in Markdown parser
Transparent [InlineText] Transparent [InlineText]
| HorizontalRule | HorizontalRule Attrs
| Table Table Attrs | Table Table Attrs
| -- Djot ::: | -- Djot :::
Container [Element] Attrs Container [Element] Attrs
@ -176,9 +176,9 @@ data RawBlock = RB
} }
deriving (Show) deriving (Show)
-- TODO
data RefDef = RD data RefDef = RD
{ { label :: Text,
link :: Text
} }
deriving (Show) deriving (Show)

View file

@ -143,7 +143,7 @@ fencedCodeBlock = between (string "```") (string "```") $ do
blockquoteBlock :: (Logger m, Characters s) => Parser s m Element blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
blockquoteBlock = do blockquoteBlock = do
content <- Q . concat <$> some blockquoteLine content <- Q . concat <$> some blockquoteLine
pure $ BlockQuote content pure $ BlockQuote content mempty
where where
blockquoteLine = do blockquoteLine = do
char '>' char '>'