swapped to being generic over loggers and continuing to suffer from double newlines

This commit is contained in:
Pagwin 2025-11-20 23:17:12 -05:00
parent eef5ec8bc0
commit 36f2529aca
No known key found for this signature in database
GPG key ID: 81137023740CA260

View file

@ -22,45 +22,42 @@ type ParserT m = ParsecT Void String m
type Parser = ParserT Identity type Parser = ParserT Identity
log_ :: T.Text -> Parser () logP :: (Logger m, Show s) => ParserT m s -> ParserT m s
log_ = logDebug
logP :: (Show s) => Parser s -> Parser s
logP v = do logP v = do
underlying <- v underlying <- v
logDebug $ T.show underlying logDebug $ T.show underlying
v v
anyChar :: Parser Char anyChar :: ParserT m Char
anyChar = anySingle anyChar = anySingle
alphaNum :: Parser Char alphaNum :: ParserT m Char
alphaNum = alphaNumChar alphaNum = alphaNumChar
digit :: Parser Char digit :: ParserT m Char
digit = digitChar digit = digitChar
noneOf :: [Char] -> Parser Char noneOf :: [Char] -> ParserT m Char
noneOf = MP.noneOf noneOf = MP.noneOf
oneOf :: [Char] -> Parser Char oneOf :: [Char] -> ParserT m Char
oneOf = MP.oneOf oneOf = MP.oneOf
optionMaybe :: Parser a -> Parser (Maybe a) optionMaybe :: ParserT m a -> ParserT m (Maybe a)
optionMaybe = optional optionMaybe = optional
skipMany1 :: Parser a -> Parser () skipMany1 :: ParserT m a -> ParserT m ()
skipMany1 = skipSome skipMany1 = skipSome
metadata :: Parser Text metadata :: ParserT m Text
metadata = bound *> fmap T.pack (many $ try (char '-' <* notFollowedBy (string "--")) <|> anySingleBut '-') <* bound metadata = bound *> fmap T.pack (many $ try (char '-' <* notFollowedBy (string "--")) <|> anySingleBut '-') <* bound
where where
bound = string "---" bound = string "---"
document :: Parser Document document :: (Logger m) => ParserT m Document
document = Doc <$> many element <* eof document = Doc <$> many element <* eof
element :: Parser Element element :: (Logger m) => ParserT m Element
element = element =
choice choice
[ try headingBlock <?> "Element Heading", [ try headingBlock <?> "Element Heading",
@ -76,30 +73,29 @@ element =
] ]
-- Blank lines (consumed but not stored) -- Blank lines (consumed but not stored)
blankLines :: Parser Element blankLines :: (Logger m) => ParserT m Element
blankLines = do blankLines = do
skipMany1 (blankLine *> notFollowedBy eof) skipMany1 (blankLine *> notFollowedBy eof)
element <|> fmap (const $ HTML HTMLTag {html_content = ""}) eof -- Parse the next element (or handle eof) element <|> fmap (const $ HTML HTMLTag {html_content = ""}) eof -- Parse the next element (or handle eof)
blankLine :: Parser () blankLine :: (Logger m) => ParserT m ()
blankLine = do blankLine = do
many (char ' ' <|> char '\t') many (char ' ' <|> char '\t')
lineEnding lineEnding
pure () pure ()
-- Heading Block -- Heading Block
headingBlock :: Parser Element headingBlock :: (Logger m) => ParserT m Element
headingBlock = do headingBlock = do
hashes <- some (char '#') <?> "Heading Hashes" hashes <- some (char '#') <?> "Heading Hashes"
let level = length hashes let level = length hashes
guard (level <= 6) <?> "Higher than level 6" guard (level <= 6) <?> "Higher than level 6"
many (char ' ' <|> char '\t') <?> "Pre-Text Whitespace" many (char ' ' <|> char '\t') <?> "Pre-Text Whitespace"
log_ "heading content start" content <- manyTill (inlineElement <?> "Header Text") (try lineEnding <?> "Header Ending")
content <- manyTill (inlineElement <?> "Header Text") (log_ "attempt" *> try lineEnding <?> "Header Ending")
pure $ Heading $ H level content pure $ Heading $ H level content
-- Fenced Code Block -- Fenced Code Block
fencedCodeBlock :: Parser Element fencedCodeBlock :: (Logger m) => ParserT m Element
fencedCodeBlock = do fencedCodeBlock = do
fence <- string "```" <|> string "~~~" fence <- string "```" <|> string "~~~"
lang <- optionMaybe languageInfo lang <- optionMaybe languageInfo
@ -107,17 +103,17 @@ fencedCodeBlock = do
codeLines <- manyTill codeLine (try $ string fence) codeLines <- manyTill codeLine (try $ string fence)
pure $ Code $ C lang (T.pack $ unlines codeLines) pure $ Code $ C lang (T.pack $ unlines codeLines)
languageInfo :: Parser Text languageInfo :: (Logger m) => ParserT m Text
languageInfo = T.pack <$> some (alphaNum <|> char '-' <|> char '+' <|> char '.') languageInfo = T.pack <$> some (alphaNum <|> char '-' <|> char '+' <|> char '.')
codeLine :: Parser String codeLine :: (Logger m) => ParserT m String
codeLine = do codeLine = do
line <- many $ noneOf "\n\r" line <- many $ noneOf "\n\r"
lineEnding lineEnding
pure line pure line
-- Indented Code Block -- Indented Code Block
indentedCodeBlock :: Parser Element indentedCodeBlock :: (Logger m) => ParserT m Element
indentedCodeBlock = do indentedCodeBlock = do
lines' <- some indentedLine lines' <- some indentedLine
pure $ Code $ C Nothing (T.pack $ unlines lines') pure $ Code $ C Nothing (T.pack $ unlines lines')
@ -129,7 +125,7 @@ indentedCodeBlock = do
pure line pure line
-- Blockquote Block -- Blockquote Block
blockquoteBlock :: Parser Element blockquoteBlock :: (Logger m) => ParserT m Element
blockquoteBlock = do blockquoteBlock = do
lines' <- some blockquoteLine lines' <- some blockquoteLine
pure $ BlockQuote $ Q (concat lines') pure $ BlockQuote $ Q (concat lines')
@ -141,7 +137,7 @@ blockquoteBlock = do
pure content pure content
-- Horizontal Rule Block -- Horizontal Rule Block
horizontalRuleBlock :: Parser Element horizontalRuleBlock :: (Logger m) => ParserT m Element
horizontalRuleBlock = do horizontalRuleBlock = do
choice choice
[ try (count 3 (char '*') >> many (char ' ' <|> char '*')), [ try (count 3 (char '*') >> many (char ' ' <|> char '*')),
@ -152,12 +148,12 @@ horizontalRuleBlock = do
pure HorizontalRule pure HorizontalRule
-- Unordered List Block -- Unordered List Block
unorderedListBlock :: Parser Element unorderedListBlock :: (Logger m) => ParserT m Element
unorderedListBlock = do unorderedListBlock = do
items <- some unorderedListItem items <- some unorderedListItem
pure $ List $ L Unordered items pure $ List $ L Unordered items
unorderedListItem :: Parser ListItem unorderedListItem :: (Logger m) => ParserT m ListItem
unorderedListItem = do unorderedListItem = do
oneOf "*-+" oneOf "*-+"
char ' ' <|> char '\t' char ' ' <|> char '\t'
@ -166,7 +162,7 @@ unorderedListItem = do
children <- many (try indentedList) children <- many (try indentedList)
pure $ LI content children pure $ LI content children
listContinuation :: Parser [InlineText] listContinuation :: (Logger m) => ParserT m [InlineText]
listContinuation = do listContinuation = do
count 2 (char ' ' <|> char '\t') count 2 (char ' ' <|> char '\t')
many (char ' ' <|> char '\t') many (char ' ' <|> char '\t')
@ -176,35 +172,35 @@ listContinuation = do
pure content pure content
-- TODO: handle list indentation at all levels -- TODO: handle list indentation at all levels
indentedList :: Parser List indentedList :: (Logger m) => ParserT m List
indentedList = do indentedList = do
let n = 1 let n = 1
void $ count (4 * n) (char ' ') <|> count n (char '\t') void $ count (4 * n) (char ' ') <|> count n (char '\t')
choice [try indentedUnorderedList, indentedOrderedList] choice [try indentedUnorderedList, indentedOrderedList]
indentedUnorderedList :: Parser List indentedUnorderedList :: (Logger m) => ParserT m List
indentedUnorderedList = do indentedUnorderedList = do
items <- some (try $ indentedListItem (oneOf "*-+" >> void (char ' ' <|> char '\t'))) items <- some (try $ indentedListItem (oneOf "*-+" >> void (char ' ' <|> char '\t')))
pure $ L Unordered items pure $ L Unordered items
indentedOrderedList :: Parser List indentedOrderedList :: (Logger m) => ParserT m List
indentedOrderedList = do indentedOrderedList = do
items <- some (try $ indentedListItem (some digit >> char '.' >> void (char ' ' <|> char '\t'))) items <- some (try $ indentedListItem (some digit >> char '.' >> void (char ' ' <|> char '\t')))
pure $ L Ordered items pure $ L Ordered items
indentedListItem :: Parser () -> Parser ListItem indentedListItem :: (Logger m) => ParserT m () -> ParserT m ListItem
indentedListItem marker = do indentedListItem marker = do
marker marker
content <- manyTill inlineElement (try $ lineEnding <|> eof) content <- manyTill inlineElement (try $ lineEnding <|> eof)
pure $ LI content [] pure $ LI content []
-- Ordered List Block -- Ordered List Block
orderedListBlock :: Parser Element orderedListBlock :: (Logger m) => ParserT m Element
orderedListBlock = do orderedListBlock = do
items <- some orderedListItem items <- some orderedListItem
pure $ List $ L Ordered items pure $ List $ L Ordered items
orderedListItem :: Parser ListItem orderedListItem :: (Logger m) => ParserT m ListItem
orderedListItem = do orderedListItem = do
some digit some digit
char '.' char '.'
@ -215,7 +211,7 @@ orderedListItem = do
pure $ LI (content ++ concat continuations) children pure $ LI (content ++ concat continuations) children
-- HTML Block -- HTML Block
htmlBlock :: Parser Element htmlBlock :: (Logger m) => ParserT m Element
htmlBlock = do htmlBlock = do
char '<' char '<'
-- Capture the entire HTML block as raw text -- Capture the entire HTML block as raw text
@ -223,25 +219,25 @@ htmlBlock = do
let content = '<' : rest let content = '<' : rest
return $ HTML $ HTMLTag (T.pack content) return $ HTML $ HTMLTag (T.pack content)
tagName :: Parser String tagName :: (Logger m) => ParserT m String
tagName = do tagName = do
first <- satisfy isAlpha first <- satisfy isAlpha
rest <- many (alphaNum <|> char '-' <|> char ':') rest <- many (alphaNum <|> char '-' <|> char ':')
pure (first : rest) pure (first : rest)
attribute :: Parser (Text, Maybe Text) attribute :: (Logger m) => ParserT m (Text, Maybe Text)
attribute = do attribute = do
name <- attributeName name <- attributeName
value <- optionMaybe (char '=' >> attributeValue) value <- optionMaybe (char '=' >> attributeValue)
pure (T.pack name, fmap T.pack value) pure (T.pack name, fmap T.pack value)
attributeName :: Parser String attributeName :: (Logger m) => ParserT m String
attributeName = do attributeName = do
first <- satisfy isAlpha first <- satisfy isAlpha
rest <- many (alphaNum <|> char '-' <|> char ':') rest <- many (alphaNum <|> char '-' <|> char ':')
pure (first : rest) pure (first : rest)
attributeValue :: Parser String attributeValue :: (Logger m) => ParserT m String
attributeValue = attributeValue =
choice choice
[ between (char '"') (char '"') (many $ anySingleBut '"'), [ between (char '"') (char '"') (many $ anySingleBut '"'),
@ -250,47 +246,46 @@ attributeValue =
] ]
-- Paragraph Block -- Paragraph Block
paragraphBlock :: Parser Element paragraphBlock :: (Logger m) => ParserT m Element
paragraphBlock = do paragraphBlock = do
content <- some inlineElement content <- some inlineElement
lineEnding <|> eof lineEnding <|> eof
pure $ Paragraph $ P content pure $ Paragraph $ P content
-- Inline Elements -- Inline Elements
inlineElement :: Parser InlineText inlineElement :: (Logger m) => ParserT m InlineText
inlineElement = inlineElement =
log_ "inline element call" choice
*> choice [ try strong <?> "Inline Strong Text",
[ try strong <?> "Inline Strong Text", try emphasis <?> "Inline Italic Text",
try emphasis <?> "Inline Italic Text", try crossedText <?> "Inline Crossed Text",
try crossedText <?> "Inline Crossed Text", try codeSpan <?> "Inline Code",
try codeSpan <?> "Inline Code", try image <?> "Inline Image",
try image <?> "Inline Image", try link <?> "Inline Link",
try link <?> "Inline Link", try htmlInline <?> "Inline HTML",
try htmlInline <?> "Inline HTML", try escapedChar <?> "Escaped Character",
try escapedChar <?> "Escaped Character", plainText <?> "Inline Plain Text"
plainText <?> "Inline Plain Text" ]
]
-- Strong (Bold) -- Strong (Bold)
strong :: Parser InlineText strong :: (Logger m) => ParserT m InlineText
strong = strongAsterisk <|> strongUnderscore strong = strongAsterisk <|> strongUnderscore
strongAsterisk :: Parser InlineText strongAsterisk :: (Logger m) => ParserT m InlineText
strongAsterisk = do strongAsterisk = do
string "**" string "**"
content <- some (notFollowedBy (string "**") >> inlineElement) content <- some (notFollowedBy (string "**") >> inlineElement)
string "**" string "**"
pure $ Bold content pure $ Bold content
strongUnderscore :: Parser InlineText strongUnderscore :: (Logger m) => ParserT m InlineText
strongUnderscore = do strongUnderscore = do
string "__" string "__"
content <- some (notFollowedBy (string "__") >> inlineElement) content <- some (notFollowedBy (string "__") >> inlineElement)
string "__" string "__"
pure $ Bold content pure $ Bold content
crossedText :: Parser InlineText crossedText :: (Logger m) => ParserT m InlineText
crossedText = do crossedText = do
string "~~" string "~~"
content <- some (notFollowedBy (string "~~") >> inlineElement) content <- some (notFollowedBy (string "~~") >> inlineElement)
@ -298,24 +293,24 @@ crossedText = do
pure $ Crossed content pure $ Crossed content
-- Emphasis (Italic) -- Emphasis (Italic)
emphasis :: Parser InlineText emphasis :: (Logger m) => ParserT m InlineText
emphasis = emphasisAsterisk <|> emphasisUnderscore emphasis = emphasisAsterisk <|> emphasisUnderscore
emphasisAsterisk :: Parser InlineText emphasisAsterisk :: (Logger m) => ParserT m InlineText
emphasisAsterisk = do emphasisAsterisk = do
char '*' char '*'
content <- some (notFollowedBy (char '*') >> inlineElementNoAsterisk) content <- some (notFollowedBy (char '*') >> inlineElementNoAsterisk)
char '*' char '*'
pure $ Italic content pure $ Italic content
emphasisUnderscore :: Parser InlineText emphasisUnderscore :: (Logger m) => ParserT m InlineText
emphasisUnderscore = do emphasisUnderscore = do
char '_' char '_'
content <- some inlineElementNoUnderscore content <- some inlineElementNoUnderscore
char '_' char '_'
pure $ Italic content pure $ Italic content
inlineElementNo :: Char -> Parser InlineText inlineElementNo :: (Logger m) => Char -> ParserT m InlineText
inlineElementNo c = inlineElementNo c =
choice choice
[ try strong, [ try strong,
@ -327,25 +322,26 @@ inlineElementNo c =
plainTextNo [c] plainTextNo [c]
] ]
plainTextNo :: [Char] -> Parser InlineText plainTextNo :: (Logger m) => [Char] -> ParserT m InlineText
plainTextNo disallow = do plainTextNo = plainTextNo' False
log_ "a"
firstChar <- noneOf disallow <?> "Plain Text Initial Disallow" plainTextNo' :: (Logger m) => Bool -> [Char] -> ParserT m InlineText
log_ "b" plainTextNo' block_whitespace disallow = do
firstChar <- noneOf (disallow <> if block_whitespace then " \t\r\n" else []) <?> "Plain Text Initial Disallow"
remChars <- many $ notFollowedBy lineEnding *> plainTextCharNo disallow remChars <- many $ notFollowedBy lineEnding *> plainTextCharNo disallow
pure $ Text $ T.map wspHandler $ T.pack $ firstChar : remChars pure $ Text $ T.map wspHandler $ T.pack $ firstChar : remChars
where where
wspHandler '\n' = ' ' wspHandler '\n' = ' '
wspHandler c = c wspHandler c = c
inlineElementNoAsterisk :: Parser InlineText inlineElementNoAsterisk :: (Logger m) => ParserT m InlineText
inlineElementNoAsterisk = inlineElementNo '*' inlineElementNoAsterisk = inlineElementNo '*'
inlineElementNoUnderscore :: Parser InlineText inlineElementNoUnderscore :: (Logger m) => ParserT m InlineText
inlineElementNoUnderscore = inlineElementNo '_' inlineElementNoUnderscore = inlineElementNo '_'
-- Code Span -- Code Span
codeSpan :: Parser InlineText codeSpan :: (Logger m) => ParserT m InlineText
codeSpan = codeSpan =
choice choice
[ try tripleBacktick, [ try tripleBacktick,
@ -368,7 +364,7 @@ codeSpan =
pure $ InlineCode (T.pack content) pure $ InlineCode (T.pack content)
-- Image -- Image
image :: Parser InlineText image :: (Logger m) => ParserT m InlineText
image = do image = do
char '!' char '!'
char '[' char '['
@ -378,7 +374,7 @@ image = do
return $ Image {altText = alt, url = url, title = title} return $ Image {altText = alt, url = url, title = title}
-- Link -- Link
link :: Parser InlineText link :: (Logger m) => ParserT m InlineText
link = do link = do
char '[' char '['
content <- some (notFollowedBy (char ']') >> inlineElementNoBracket) content <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
@ -386,7 +382,7 @@ link = do
(url, title) <- linkDestination (url, title) <- linkDestination
pure $ Link content url title pure $ Link content url title
inlineElementNoBracket :: Parser InlineText inlineElementNoBracket :: (Logger m) => ParserT m InlineText
inlineElementNoBracket = inlineElementNoBracket =
choice choice
[ try strong, [ try strong,
@ -397,7 +393,7 @@ inlineElementNoBracket =
plainTextNoBracket plainTextNoBracket
] ]
linkDestination :: Parser (Text, Maybe Text) linkDestination :: (Logger m) => ParserT m (Text, Maybe Text)
linkDestination = directLink <|> referenceLink linkDestination = directLink <|> referenceLink
where where
directLink = do directLink = do
@ -414,7 +410,7 @@ linkDestination = directLink <|> referenceLink
-- In a real implementation, you'd look up the reference -- In a real implementation, you'd look up the reference
pure (T.pack $ "[" ++ ref ++ "]", Nothing) pure (T.pack $ "[" ++ ref ++ "]", Nothing)
titleParser :: Parser Text titleParser :: (Logger m) => ParserT m Text
titleParser = titleParser =
T.pack T.pack
<$> choice <$> choice
@ -424,41 +420,41 @@ titleParser =
] ]
-- HTML Inline -- HTML Inline
htmlInline :: Parser InlineText htmlInline :: (Logger m) => ParserT m InlineText
htmlInline = do htmlInline = do
start <- char '<' start <- char '<'
content <- manyTill anyChar (try $ char '>') content <- manyTill anyChar (try $ char '>')
return $ HTMLInline (T.pack (start : content ++ ">")) return $ HTMLInline (T.pack (start : content ++ ">"))
-- Escaped Character -- Escaped Character
escapedChar :: Parser InlineText escapedChar :: (Logger m) => ParserT m InlineText
escapedChar = do escapedChar = do
char '\\' char '\\'
c <- satisfy (\x -> x >= '!' && x <= '~') c <- satisfy (\x -> x >= '!' && x <= '~')
pure $ Text (T.singleton c) pure $ Text (T.singleton c)
-- Plain Text -- Plain Text
plainText :: Parser InlineText plainText :: (Logger m) => ParserT m InlineText
plainText = plainTextNo [] <?> "Baseline Plain Text" plainText = plainTextNo' True [] <?> "Baseline Plain Text"
plainTextBaseDisallow :: [Char] plainTextBaseDisallow :: [Char]
plainTextBaseDisallow = "[~`_*<" plainTextBaseDisallow = "[~`_*<"
plainTextCharNo :: [Char] -> Parser Char plainTextCharNo :: (Logger m) => [Char] -> ParserT m Char
plainTextCharNo additional = noneOf $ additional <> plainTextBaseDisallow plainTextCharNo additional = noneOf $ additional <> plainTextBaseDisallow
plainTextNoAsterisk :: Parser InlineText plainTextNoAsterisk :: (Logger m) => ParserT m InlineText
plainTextNoAsterisk = plainTextNo "*" plainTextNoAsterisk = plainTextNo "*"
plainTextNoUnderscore :: Parser InlineText plainTextNoUnderscore :: (Logger m) => ParserT m InlineText
plainTextNoUnderscore = plainTextNo "_" plainTextNoUnderscore = plainTextNo "_"
plainTextNoBracket :: Parser InlineText plainTextNoBracket :: (Logger m) => ParserT m InlineText
plainTextNoBracket = plainTextNo "[]" plainTextNoBracket = plainTextNo "[]"
-- Helper Parsers -- Helper Parsers
lineEnding :: Parser () lineEnding :: (Logger m) => ParserT m ()
lineEnding = void (try $ count 2 (try (string "\r\n") <|> try (string "\n") <|> string "\r")) <|> eof lineEnding = void (try $ count 2 (try (string "\r\n") <|> try (string "\n") <|> string "\r")) <|> eof
wsParser :: Parser () wsParser :: (Logger m) => ParserT m ()
wsParser = void $ some (char ' ' <|> char '\t') wsParser = void $ some (char ' ' <|> char '\t')