From 28712089bf3a7254625a370e5a604cbac8ac0328 Mon Sep 17 00:00:00 2001 From: Pagwin Date: Sat, 13 Dec 2025 16:51:40 -0500 Subject: [PATCH] fixed HTML block case, now I have a confusing link/inline text case which is partially fixed in this PR --- src/Markdown.hs | 12 +++++++++++- tests/Markdown/Parse.hs | 31 ++++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/Markdown.hs b/src/Markdown.hs index 663d923..673f439 100644 --- a/src/Markdown.hs +++ b/src/Markdown.hs @@ -122,7 +122,14 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow char '>' pure $ mconcat ["<", inner, ">"] - plain_text disallow = Text . T.pack <$> (many ((notFollowedBy disallow) *> anySingle)) + plain_text :: Parser s m () -> Parser s m InlineText + plain_text disallow = do + first <- optional $ ((notFollowedBy disallow) *> anySingle) + rem <- many ((notFollowedBy (disallow <|> (void $ choice $ (map char "*[~")))) *> anySingle) + + pure $ Text $ T.pack $ case first of + Nothing -> [] + Just c -> (c : rem) headingBlock :: (Logger m, Characters s) => Parser s m Element headingBlock = do @@ -203,6 +210,9 @@ htmlBlock = do -- technically not standard markdown but I don't want to write a full HTML parser in my inside <- many (notFollowedBy ((chunk $ " tagName <> ">") <|> chunk "") *> anySingle) end <- toText <$> ((chunk $ " tagName <> ">") <|> chunk "") + -- if a blockEnding after some whitespace isn't next when we should parse this as inline text/paragraph + many ((notFollowedBy lineEnding) *> spaceChar) + lookAhead blockEnding pure $ HTML $ HTMLTag $ T.concat ["<", toText tagName, fromMaybe "" attrs, ">", T.pack inside, if end == "" then "" else end] where tagNameEnd = (lookAhead spaceChar <* space) <|> char '>' diff --git a/tests/Markdown/Parse.hs b/tests/Markdown/Parse.hs index 0c82131..fdb513e 100644 --- a/tests/Markdown/Parse.hs +++ b/tests/Markdown/Parse.hs @@ -29,6 +29,7 @@ main = do Group "Parse Tests" [ ("all_compile", all_compiles), + ("block_html_compile_edgecase", block_html_compile_edgecase), ("header_and_paragraph", header_and_paragraph), ("paragraph_and_header_and_paragraph", paragraph_and_header_and_paragraph), ("bold_and_header_and_paragraph", bold_and_header_and_paragraph), @@ -41,7 +42,8 @@ main = do ("multiple_ordered_lists", multiple_ordered_lists), ("header_then_ordered_list", header_then_ordered_list), ("simple_nested_ordered_list", simple_nested_ordered_list), - ("nested_unordered_list", nested_unordered_list) + ("nested_unordered_list", nested_unordered_list), + ("greedy_plain_text", greedy_plain_text) -- ("",), ] if cond @@ -60,6 +62,18 @@ all_compiles = property $ do (Just (Right _)) -> success (Just (Left e)) -> fail $ errorBundlePretty e +block_html_compile_edgecase :: Property +block_html_compile_edgecase = property $ do + let gen = forAll $ Gen.text (Range.linear 0 10) Gen.alphaNum + tagName <- gen + misc1 <- gen + misc2 <- gen + parsed <- generic_parse $ (T.concat ["<", tagName, ">", misc1, " ", misc2]) + case parsed of + Nothing -> fail $ "Hit Timeout" + (Just (Right _)) -> success + (Just (Left e)) -> fail $ errorBundlePretty e + header_and_paragraph :: Property header_and_paragraph = property $ do header_text <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha @@ -307,3 +321,18 @@ header_then_ordered_list = property $ do ) -> success (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree (Just (Left e)) -> fail $ errorBundlePretty e + +greedy_plain_text :: Property +greedy_plain_text = property $ do + let text_gen = forAll $ Gen.text (Range.linear 1 10) Gen.alphaNum + pretext <- text_gen + shown <- text_gen + link <- text_gen + + parsed <- generic_parse $ T.concat [pretext, "[", shown, "]", "(", link, ")"] + + case parsed of + Nothing -> fail $ "Hit Timeout" + (Just (Right (Doc [Paragraph (P [Text (pretext), Link {linkText = [Text shown], url = link, title = Nothing}])]))) -> success + (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree + (Just (Left e)) -> fail $ errorBundlePretty e