From 6dfccf331deb4560b3812bbbcb3a0cbaef87f9ca Mon Sep 17 00:00:00 2001 From: Pagwin Date: Wed, 26 Nov 2025 21:00:49 -0500 Subject: [PATCH] fixed block quotes bug and implemented tests for remaining issues --- app/Markdown.hs | 11 ++-- app/Tests/Markdown/Parse.hs | 128 +++++++++++++++++++++++++++--------- 2 files changed, 102 insertions(+), 37 deletions(-) diff --git a/app/Markdown.hs b/app/Markdown.hs index f296607..8d8e837 100644 --- a/app/Markdown.hs +++ b/app/Markdown.hs @@ -109,16 +109,17 @@ fencedCodeBlock = do logDebug "langInfo" lineEnding' logDebug "lineEnding" - codeLines <- manyTill codeLine (try $ string fence) + codeLines <- manyTill (codeLine fence) (try $ string fence) logDebug "lines" pure $ Code $ C lang (T.pack $ unlines codeLines) languageInfo :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Text languageInfo = T.pack <$> some (alphaNum <|> char '-' <|> char '+' <|> char '.') -codeLine :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m String -codeLine = do - line <- many $ noneOf "\n\r" +codeLine :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => (Tokens s) -> ParserTG s m String +codeLine fence = do + -- this is a hack which can only haunt me if I continue using markdown + line <- many $ (notFollowedBy $ string fence) *> noneOf "\n\r" lineEnding' pure line @@ -143,7 +144,7 @@ blockquoteBlock = do blockquoteLine = do char '>' optional (char ' ') - content <- manyTill inlineElement (try lineEnding) + content <- many $ notFollowedBy lineEnding' *> inlineElement pure content -- Horizontal Rule Block diff --git a/app/Tests/Markdown/Parse.hs b/app/Tests/Markdown/Parse.hs index 3b97f74..4461737 100644 --- a/app/Tests/Markdown/Parse.hs +++ b/app/Tests/Markdown/Parse.hs @@ -2,9 +2,11 @@ module Main where +import Control.Exception (evaluate) import Control.Monad.Trans.Class (MonadTrans (lift)) import Data.Either (isRight) import Data.Functor.Identity (Identity (Identity)) +import Data.String (IsString) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as TIO @@ -17,6 +19,7 @@ import IR import Markdown import qualified Markdown import System.Exit (exitFailure, exitSuccess) +import System.Timeout (timeout) import Text.Megaparsec main :: IO () @@ -29,20 +32,28 @@ main = do ("header_and_paragraph", header_and_paragraph), ("paragraph_and_header_and_paragraph", paragraph_and_header_and_paragraph), ("bold_and_header_and_paragraph", bold_and_header_and_paragraph), - ("code_block", code_block) + ("code_block", code_block), + ("code_block_hanging", code_block_hanging), + ("two_blockquotes", two_blockquotes), + ("unordered_list", unordered_list), + ("header_after_unordered_list", header_after_unordered_list) ] if cond then exitSuccess else exitFailure +-- timeout of 1 second, all of these tests should be completely clear of that, if they run longer they should fail +generic_parse inp = lift $ timeout 1000000 $ evaluate $ parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" inp + all_compiles :: Property all_compiles = property $ do xs <- forAll $ Gen.text (Range.linear 0 10) Gen.ascii annotate $ T.unpack xs - let parsed = parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" xs + parsed <- generic_parse xs case parsed of - Right _ -> success - Left e -> fail $ errorBundlePretty e + Nothing -> fail $ "Hit Timeout" + (Just (Right _)) -> success + (Just (Left e)) -> fail $ errorBundlePretty e header_and_paragraph :: Property header_and_paragraph = property $ do @@ -52,12 +63,13 @@ header_and_paragraph = property $ do let input = (T.pack $ take header_level $ repeat '#') <> header_text <> "\n\n" <> paragraph_text - let parsed = parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" input + parsed <- generic_parse input case parsed of - Right (Doc [Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph_text]))]) -> success - Right tree -> fail $ "Incorrect syntax tree: " <> show tree - Left e -> fail $ errorBundlePretty e + Nothing -> fail $ "Hit Timeout" + (Just (Right (Doc [Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph_text]))]))) -> success + (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree + (Just (Left e)) -> fail $ errorBundlePretty e paragraph_and_header_and_paragraph :: Property paragraph_and_header_and_paragraph = property $ do @@ -68,12 +80,13 @@ paragraph_and_header_and_paragraph = property $ do let input = paragraph1_text <> "\n\n" <> (T.pack $ take header_level $ repeat '#') <> header_text <> "\n\n" <> paragraph2_text - let parsed = parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" input + parsed <- generic_parse input case parsed of - Right (Doc [Paragraph (P ([Text paragarph1_text])), Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph2_text]))]) -> success - Right tree -> fail $ "Incorrect syntax tree: " <> show tree - Left e -> fail $ errorBundlePretty e + Nothing -> fail $ "Hit Timeout" + (Just (Right (Doc [Paragraph (P ([Text paragarph1_text])), Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph2_text]))]))) -> success + (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree + (Just (Left e)) -> fail $ errorBundlePretty e bold_and_header_and_paragraph :: Property bold_and_header_and_paragraph = property $ do @@ -84,37 +97,88 @@ bold_and_header_and_paragraph = property $ do let input = "**" <> bold_text <> "**\n\n" <> (T.pack $ take header_level $ repeat '#') <> header_text <> "\n\n" <> paragraph_text - let parsed = parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" input + parsed <- generic_parse input case parsed of - Right (Doc [Paragraph (P ([Bold [Text bold_text]])), Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph_text]))]) -> success - Right tree -> fail $ "Incorrect syntax tree: " <> show tree - Left e -> fail $ errorBundlePretty e + Nothing -> fail $ "Hit Timeout" + (Just (Right (Doc [Paragraph (P ([Bold [Text bold_text]])), Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph_text]))]))) -> success + (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree + (Just (Left e)) -> fail $ errorBundlePretty e code_block :: Property code_block = property $ do language <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha code <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha - let input = "```" <> language <> "\n" <> code <> "```" - - let parsed = parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" input + let input = "```" <> language <> "\n" <> code <> "\n```" + annotate $ "Input: " <> T.unpack input + parsed <- generic_parse input case parsed of - Right (Doc [Code (C {language, code})]) -> success - Right tree -> fail $ "Incorrect syntax tree: " <> show tree - Left e -> fail $ errorBundlePretty e + Nothing -> fail $ "Hit Timeout" + (Just (Right (Doc [Code (C {language, code})]))) -> success + (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree + (Just (Left e)) -> fail $ errorBundlePretty e --- from new writing learning --- Error case, joins block quotes toegether --- > [*Sic*](https://www.merriam-webster.com/dictionary/sic) usually appears in parentheses or brackets, sometimes with the letters in italics. In this context it means “intentionally so written.” On its own, *sic* means “so” or “thus” and can be found in phrases such as *sic transit gloria mundi* ("so passes away the glory of the world") and *sic semper tyrannis* ("thus ever to tyrants," the motto of the state of Virginia). --- --- > What is denoted by *sic* is that the word or phrase that precedes it occurs in the original passage being quoted or name being used and was not introduced by the writer doing the quoting. --- +code_block_hanging :: Property +code_block_hanging = property $ do + language <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha + code <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha + let input = "```" <> language <> "\n" <> code <> "```" + annotate $ "Input: " <> T.unpack input + parsed <- generic_parse input --- from micro blogs --- Error case 2: made one bullet instead of two, happens for any number of items and probably applicable to numbered list as well --- - item 1 --- - item 2 + case parsed of + Nothing -> fail $ "Hit Timeout" + -- we're just testing for hanging + (Just (Right _)) -> success + (Just (Left e)) -> fail $ errorBundlePretty e + +two_blockquotes :: Property +two_blockquotes = property $ do + let text_gen = forAll $ Gen.text (Range.linear 1 10) Gen.alpha + text_1 <- text_gen + text_2 <- text_gen + let input = "> " <> text_1 <> "\n\n> " <> text_2 + + parsed <- generic_parse input + + case parsed of + Nothing -> fail $ "Hit Timeout" + (Just (Right (Doc [BlockQuote (Q [Text text_1]), BlockQuote (Q [Text text_2])]))) -> success + (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree + (Just (Left e)) -> fail $ errorBundlePretty e + +unordered_list :: Property +unordered_list = property $ do + let text_gen = forAll $ Gen.text (Range.linear 1 10) Gen.alpha + text_1 <- text_gen + text_2 <- text_gen + let input = "- " <> text_1 <> "\n- " <> text_2 + + parsed <- generic_parse input + + case parsed of + Nothing -> fail $ "Hit Timeout" + (Just (Right (Doc [List (L {list_type = Unordered, items = [LI {content = [Text text_1], children = []}, LI {content = [Text text_2], children = []}]})]))) -> success + (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree + (Just (Left e)) -> fail $ errorBundlePretty e + +header_after_unordered_list :: Property +header_after_unordered_list = property $ do + let text_gen = forAll $ Gen.text (Range.linear 1 10) Gen.alpha + bullet_text <- text_gen + header_text <- text_gen + header_level <- forAll $ Gen.int (Range.linear 1 6) + + let input = "- " <> bullet_text <> "\n\n" <> (T.pack $ take header_level $ repeat '#') <> header_text + + parsed <- generic_parse input + + case parsed of + Nothing -> fail $ "Hit Timeout" + (Just (Right (Doc [List (L {list_type = Unordered, items = [LI {content = [Text bullet_text], children = []}]}), Heading (H {level = header_level, text = [Text header_text]})]))) -> success + (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree + (Just (Left e)) -> fail $ errorBundlePretty e -- From weekly notes, doing a header after a unordered list causes it to be seens as an inline element --