fixed HTML block case, and greedy plain inline text

This commit is contained in:
Pagwin 2025-12-13 16:51:40 -05:00
parent 132496cdca
commit 4b40fa19ad
No known key found for this signature in database
GPG key ID: 81137023740CA260
2 changed files with 41 additions and 2 deletions

View file

@ -122,7 +122,14 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow
char '>'
pure $ mconcat ["<", inner, ">"]
plain_text disallow = Text . T.pack <$> (many ((notFollowedBy disallow) *> anySingle))
plain_text :: Parser s m () -> Parser s m InlineText
plain_text disallow = do
first <- optional $ ((notFollowedBy disallow) *> anySingle)
rem <- many ((notFollowedBy (disallow <|> (void $ choice $ (map char "*[~")))) *> anySingle)
pure $ Text $ T.pack $ case first of
Nothing -> []
Just c -> (c : rem)
headingBlock :: (Logger m, Characters s) => Parser s m Element
headingBlock = do
@ -203,6 +210,9 @@ htmlBlock = do
-- technically not standard markdown but I don't want to write a full HTML parser in my
inside <- many (notFollowedBy ((chunk $ "</" <> tagName <> ">") <|> chunk "</>") *> anySingle)
end <- toText <$> ((chunk $ "</" <> tagName <> ">") <|> chunk "</>")
-- if a blockEnding after some whitespace isn't next when we should parse this as inline text/paragraph
many ((notFollowedBy lineEnding) *> spaceChar)
lookAhead blockEnding
pure $ HTML $ HTMLTag $ T.concat ["<", toText tagName, fromMaybe "" attrs, ">", T.pack inside, if end == "</>" then "" else end]
where
tagNameEnd = (lookAhead spaceChar <* space) <|> char '>'

View file

@ -29,6 +29,7 @@ main = do
Group
"Parse Tests"
[ ("all_compile", all_compiles),
("block_html_compile_edgecase", block_html_compile_edgecase),
("header_and_paragraph", header_and_paragraph),
("paragraph_and_header_and_paragraph", paragraph_and_header_and_paragraph),
("bold_and_header_and_paragraph", bold_and_header_and_paragraph),
@ -41,7 +42,8 @@ main = do
("multiple_ordered_lists", multiple_ordered_lists),
("header_then_ordered_list", header_then_ordered_list),
("simple_nested_ordered_list", simple_nested_ordered_list),
("nested_unordered_list", nested_unordered_list)
("nested_unordered_list", nested_unordered_list),
("greedy_plain_text", greedy_plain_text)
-- ("",),
]
if cond
@ -60,6 +62,18 @@ all_compiles = property $ do
(Just (Right _)) -> success
(Just (Left e)) -> fail $ errorBundlePretty e
block_html_compile_edgecase :: Property
block_html_compile_edgecase = property $ do
let gen = forAll $ Gen.text (Range.linear 0 10) Gen.alphaNum
tagName <- gen
misc1 <- gen
misc2 <- gen
parsed <- generic_parse $ (T.concat ["<", tagName, ">", misc1, "</", tagName, "> ", misc2])
case parsed of
Nothing -> fail $ "Hit Timeout"
(Just (Right _)) -> success
(Just (Left e)) -> fail $ errorBundlePretty e
header_and_paragraph :: Property
header_and_paragraph = property $ do
header_text <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha
@ -307,3 +321,18 @@ header_then_ordered_list = property $ do
) -> success
(Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree
(Just (Left e)) -> fail $ errorBundlePretty e
greedy_plain_text :: Property
greedy_plain_text = property $ do
let text_gen = forAll $ Gen.text (Range.linear 1 10) Gen.alphaNum
pretext <- text_gen
shown <- text_gen
link <- text_gen
parsed <- generic_parse $ T.concat [pretext, "[", shown, "]", "(", link, ")"]
case parsed of
Nothing -> fail $ "Hit Timeout"
(Just (Right (Doc [Paragraph (P [Text (pretext), Link {linkText = [Text shown], url = link, title = Nothing}])]))) -> success
(Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree
(Just (Left e)) -> fail $ errorBundlePretty e