started work on inline elements

This commit is contained in:
Pagwin 2026-04-08 17:16:25 -04:00
parent abf93fbd8b
commit cc4f0b7672
No known key found for this signature in database
GPG key ID: 81137023740CA260

View file

@ -11,13 +11,14 @@ where
import Control.Applicative (many, optional, some, (<|>)) import Control.Applicative (many, optional, some, (<|>))
import Data.Functor (void, (<$>)) import Data.Functor (void, (<$>))
import Data.List (elemIndex)
import qualified Data.List.NonEmpty as NE import qualified Data.List.NonEmpty as NE
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import IR import IR
import Logger (Logger (logCallStack, logDebug, logError)) import Logger (Logger (logCallStack, logDebug, logError))
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, sepBy, setErrorOffset, someTill) import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, sepBy, setErrorOffset, someTill)
import Text.Megaparsec.Char (char, newline, space, string, tab) import Text.Megaparsec.Char (char, lowerChar, newline, numberChar, space, string, tab, upperChar)
import Utilities.Parsing import Utilities.Parsing
(.>) :: (a -> b) -> (b -> c) -> a -> c (.>) :: (a -> b) -> (b -> c) -> a -> c
@ -39,15 +40,16 @@ blockElement accumulated_attributes =
*> (lookAhead (bullet *> taskMarker) *> taskListBlock accumulated_attributes) *> (lookAhead (bullet *> taskMarker) *> taskListBlock accumulated_attributes)
<|> listBlock accumulated_attributes, <|> listBlock accumulated_attributes,
lookAhead codeFence lookAhead codeFence
*> (lookAhead rawLang *> rawBlock accumulated_attributes) *> (lookAhead (codeFence *> rawLang) *> rawBlock accumulated_attributes)
<|> codeBlock accumulated_attributes, <|> codeBlock accumulated_attributes,
-- Why lookAhead when checking is fully equivalent to parsing -- Why lookAhead when checking is fully equivalent to parsing
try $ thematicBreak accumulated_attributes, try $ thematicBreak accumulated_attributes,
lookAhead (string ":::") *> containerBlock accumulated_attributes, lookAhead (string ":::") *> containerBlock accumulated_attributes,
lookAhead tableRow *> tableBlock accumulated_attributes, -- try used due to table having a non-trivial structure at the start
lookAhead (char '[') try tableBlock accumulated_attributes,
*> (lookAhead (char '^') *> footnoteDefinition accumulated_attributes) -- using try due to ambiguity between these and normal text until we've already done some amount of parsing
<|> referenceDef accumulated_attributes, try footnoteDefinition accumulated_attributes,
try referenceDef accumulated_attributes,
lookAhead (char '{') *> blockAttribute accumulated_attributes, lookAhead (char '{') *> blockAttribute accumulated_attributes,
paragraph accumulated_attributes paragraph accumulated_attributes
] ]
@ -61,13 +63,14 @@ header attrs = do
startOffset <- getOffset startOffset <- getOffset
raw <- manyTill anySingle $ lookAhead blockSeparator raw <- manyTill anySingle $ lookAhead blockSeparator
-- second pass for inline elements -- second pass for inline elements
case parse header' "" (fromText $ toText raw) of case parse (header' level) "" (fromText $ toText raw) of
Right ret -> pure ret Right ret -> pure ret
Left (ParseErrorBundle errs _) -> Left (ParseErrorBundle errs _) ->
let remap err = setErrorOffset (errorOffset err + startOffset) err in parseError $ remap $ NE.head errs let remap err = setErrorOffset (errorOffset err + startOffset) err in parseError $ remap $ NE.head errs
where where
header' = do header' level = do
pure $ Heading (error "todo") attrs text <- inlineContent
pure $ Heading (H {level, text}) attrs
blockQuote :: (Logger m, Characters s) => Attrs -> Parser s m Element blockQuote :: (Logger m, Characters s) => Attrs -> Parser s m Element
blockQuote attrs = do blockQuote attrs = do
@ -91,12 +94,13 @@ blockQuote attrs = do
listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
listBlock attrs = do listBlock attrs = do
error "this is probably fundamentally wrong and needs to be redone to work"
startOffset <- getOffset startOffset <- getOffset
list_type <- listMarker list_type <- listMarker
first_item <- list_item_content first_item <- list_item_content
rem_items <- manyTill (listMarker *> list_item_content) $ lookAhead blockSeparator rem_items <- manyTill (listMarker *> list_item_content) $ lookAhead blockSeparator
let items = first_item : rem_items let items = first_item : rem_items
case parse listBlock' "" $ error "todo: fiture out how to take our list of items and put them toegether for easy parsing" of case parse listBlock' "" $ error "todo: figure out how to take our list of items and put them toegether for easy parsing" of
Right ret -> pure ret Right ret -> pure ret
Left (ParseErrorBundle errs _) -> do Left (ParseErrorBundle errs _) -> do
logError "Error in blockQuote offset may be off" logError "Error in blockQuote offset may be off"
@ -108,6 +112,7 @@ listBlock attrs = do
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
taskListBlock attrs = do taskListBlock attrs = do
error "this is also probably fundamentally wrong"
startOffset <- getOffset startOffset <- getOffset
bullet bullet
first_item <- task_item first_item <- task_item
@ -116,14 +121,44 @@ taskListBlock attrs = do
where where
task_item = error "todo" task_item = error "todo"
lowerAlphabet :: [Char]
lowerAlphabet = ['a' .. 'z']
upperAlphabet :: [Char]
upperAlphabet = ['A' .. 'Z']
-- consumes whitespace as well for convenience in element parser -- consumes whitespace as well for convenience in element parser
listMarker :: (Logger m, Characters s) => Parser s m ListType listMarker :: (Logger m, Characters s) => Parser s m ListType
listMarker = choice [bullet, decimal, try letter, roman_numeral] <* space listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
where where
decimal = error "todo" decimal = do
letter = error "todo" num <- choice $ map ($ some numberChar) [try . surroundParen, try . rightParen, point]
let start_number = Just $ read num
pure $ Ordered {start_number, style = Just "1"}
lower_letter :: (Logger m, Characters s) => Parser s m ListType
lower_letter = do
letter <- choice $ map ($ lowerChar) [try . surroundParen, try . rightParen, point]
let start_number = elemIndex letter lowerAlphabet
pure $ Ordered {start_number, style = Just "a"}
upper_letter :: (Logger m, Characters s) => Parser s m ListType
upper_letter = do
letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
let start_number = elemIndex letter upperAlphabet
pure $ Ordered {start_number, style = Just "A"}
roman_numeral = error "todo" roman_numeral = error "todo"
surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
surroundParen parser = do
char '('
ret <- parser
char ')'
space
pure ret
rightParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
rightParen parser = parser <* char ')' <* space
point :: (Logger m, Characters s) => Parser s m a -> Parser s m a
point parser = parser <* char '.' <* space
bullet :: (Logger m, Characters s) => Parser s m ListType bullet :: (Logger m, Characters s) => Parser s m ListType
bullet = error "todo" bullet = error "todo"
@ -225,14 +260,16 @@ containerBlock' n attrs = do
tableBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element tableBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
tableBlock attrs = do tableBlock attrs = do
prefix <- tablePrefix prefix <- tablePrefix
rem_rows <- manyTill tableRow blockSeparator rem_rows <- manyTill tableRow $ lookAhead blockSeparator
case prefix of case prefix of
Just (header, separator) -> pure $ Table (T {tableCaption = Nothing, tableHead = Just header, tableBody = rem_rows, columnAlignments = Just separator}) attrs Just (header, separator) -> pure $ Table (T {tableCaption = Nothing, tableHead = Just header, tableBody = rem_rows, columnAlignments = Just separator}) attrs
Nothing -> pure $ Table (T {tableCaption = Nothing, columnAlignments = Nothing, tableHead = Nothing, tableBody = rem_rows}) attrs Nothing -> pure $ Table (T {tableCaption = Nothing, columnAlignments = Nothing, tableHead = Nothing, tableBody = rem_rows}) attrs
where where
tablePrefix = error "todo" :: Parser s m (Maybe (TableRow, [Alignment]))
tableSeparatorRow = error "todo" :: Parser s m [Alignment] tableSeparatorRow = error "todo" :: Parser s m [Alignment]
tablePrefix :: (Logger m, Characters s) => Parser s m (Maybe (TableRow, [Alignment]))
tablePrefix = error "todo"
tableRow :: (Logger m, Characters s) => Parser s m TableRow tableRow :: (Logger m, Characters s) => Parser s m TableRow
tableRow = do tableRow = do
char '|' char '|'
@ -255,4 +292,22 @@ footnoteDefinition attrs = do
footnoteElement = tab *> footnoteElement' footnoteElement = tab *> footnoteElement'
paragraph :: (Logger m, Characters s) => Attrs -> Parser s m Element paragraph :: (Logger m, Characters s) => Attrs -> Parser s m Element
paragraph = error "todo" paragraph attrs = do
content <- inlineContent
pure $ Paragraph (P content) attrs
data OpenInline = SquareBracket | CurlyBracket | Paren | Underscore | Asterisk | Backtick Int | Insert | Delete | Highlight | Superscript | Subscript | AngleBracket deriving (Show)
closingInline :: (Logger m, Characters s) => [OpenInline] -> Parser s m OpenInline
closingInline = error "todo"
inlineContent :: (Logger m, Characters s) => Parser s m [InlineText]
inlineContent = inlineContent' []
inlineContent' :: (Logger m, Characters s) => [OpenInline] -> Parser s m [InlineText]
inlineContent' opened = someTill (inlineElement opened) $ lookAhead blockSeparator
inlineElement :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
inlineElement opened =
choice
[lookAhead (string "![") *> image, lookAhead]