fixed block quotes bug and implemented tests for remaining issues

This commit is contained in:
Pagwin 2025-11-26 21:00:49 -05:00
parent c1acbe15f1
commit 6dfccf331d
No known key found for this signature in database
GPG key ID: 81137023740CA260
2 changed files with 102 additions and 37 deletions

View file

@ -109,16 +109,17 @@ fencedCodeBlock = do
logDebug "langInfo" logDebug "langInfo"
lineEnding' lineEnding'
logDebug "lineEnding" logDebug "lineEnding"
codeLines <- manyTill codeLine (try $ string fence) codeLines <- manyTill (codeLine fence) (try $ string fence)
logDebug "lines" logDebug "lines"
pure $ Code $ C lang (T.pack $ unlines codeLines) pure $ Code $ C lang (T.pack $ unlines codeLines)
languageInfo :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Text languageInfo :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Text
languageInfo = T.pack <$> some (alphaNum <|> char '-' <|> char '+' <|> char '.') languageInfo = T.pack <$> some (alphaNum <|> char '-' <|> char '+' <|> char '.')
codeLine :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m String codeLine :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => (Tokens s) -> ParserTG s m String
codeLine = do codeLine fence = do
line <- many $ noneOf "\n\r" -- this is a hack which can only haunt me if I continue using markdown
line <- many $ (notFollowedBy $ string fence) *> noneOf "\n\r"
lineEnding' lineEnding'
pure line pure line
@ -143,7 +144,7 @@ blockquoteBlock = do
blockquoteLine = do blockquoteLine = do
char '>' char '>'
optional (char ' ') optional (char ' ')
content <- manyTill inlineElement (try lineEnding) content <- many $ notFollowedBy lineEnding' *> inlineElement
pure content pure content
-- Horizontal Rule Block -- Horizontal Rule Block

View file

@ -2,9 +2,11 @@
module Main where module Main where
import Control.Exception (evaluate)
import Control.Monad.Trans.Class (MonadTrans (lift)) import Control.Monad.Trans.Class (MonadTrans (lift))
import Data.Either (isRight) import Data.Either (isRight)
import Data.Functor.Identity (Identity (Identity)) import Data.Functor.Identity (Identity (Identity))
import Data.String (IsString)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.IO as TIO import qualified Data.Text.IO as TIO
@ -17,6 +19,7 @@ import IR
import Markdown import Markdown
import qualified Markdown import qualified Markdown
import System.Exit (exitFailure, exitSuccess) import System.Exit (exitFailure, exitSuccess)
import System.Timeout (timeout)
import Text.Megaparsec import Text.Megaparsec
main :: IO () main :: IO ()
@ -29,20 +32,28 @@ main = do
("header_and_paragraph", header_and_paragraph), ("header_and_paragraph", header_and_paragraph),
("paragraph_and_header_and_paragraph", paragraph_and_header_and_paragraph), ("paragraph_and_header_and_paragraph", paragraph_and_header_and_paragraph),
("bold_and_header_and_paragraph", bold_and_header_and_paragraph), ("bold_and_header_and_paragraph", bold_and_header_and_paragraph),
("code_block", code_block) ("code_block", code_block),
("code_block_hanging", code_block_hanging),
("two_blockquotes", two_blockquotes),
("unordered_list", unordered_list),
("header_after_unordered_list", header_after_unordered_list)
] ]
if cond if cond
then exitSuccess then exitSuccess
else exitFailure else exitFailure
-- timeout of 1 second, all of these tests should be completely clear of that, if they run longer they should fail
generic_parse inp = lift $ timeout 1000000 $ evaluate $ parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" inp
all_compiles :: Property all_compiles :: Property
all_compiles = property $ do all_compiles = property $ do
xs <- forAll $ Gen.text (Range.linear 0 10) Gen.ascii xs <- forAll $ Gen.text (Range.linear 0 10) Gen.ascii
annotate $ T.unpack xs annotate $ T.unpack xs
let parsed = parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" xs parsed <- generic_parse xs
case parsed of case parsed of
Right _ -> success Nothing -> fail $ "Hit Timeout"
Left e -> fail $ errorBundlePretty e (Just (Right _)) -> success
(Just (Left e)) -> fail $ errorBundlePretty e
header_and_paragraph :: Property header_and_paragraph :: Property
header_and_paragraph = property $ do header_and_paragraph = property $ do
@ -52,12 +63,13 @@ header_and_paragraph = property $ do
let input = (T.pack $ take header_level $ repeat '#') <> header_text <> "\n\n" <> paragraph_text let input = (T.pack $ take header_level $ repeat '#') <> header_text <> "\n\n" <> paragraph_text
let parsed = parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" input parsed <- generic_parse input
case parsed of case parsed of
Right (Doc [Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph_text]))]) -> success Nothing -> fail $ "Hit Timeout"
Right tree -> fail $ "Incorrect syntax tree: " <> show tree (Just (Right (Doc [Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph_text]))]))) -> success
Left e -> fail $ errorBundlePretty e (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree
(Just (Left e)) -> fail $ errorBundlePretty e
paragraph_and_header_and_paragraph :: Property paragraph_and_header_and_paragraph :: Property
paragraph_and_header_and_paragraph = property $ do paragraph_and_header_and_paragraph = property $ do
@ -68,12 +80,13 @@ paragraph_and_header_and_paragraph = property $ do
let input = paragraph1_text <> "\n\n" <> (T.pack $ take header_level $ repeat '#') <> header_text <> "\n\n" <> paragraph2_text let input = paragraph1_text <> "\n\n" <> (T.pack $ take header_level $ repeat '#') <> header_text <> "\n\n" <> paragraph2_text
let parsed = parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" input parsed <- generic_parse input
case parsed of case parsed of
Right (Doc [Paragraph (P ([Text paragarph1_text])), Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph2_text]))]) -> success Nothing -> fail $ "Hit Timeout"
Right tree -> fail $ "Incorrect syntax tree: " <> show tree (Just (Right (Doc [Paragraph (P ([Text paragarph1_text])), Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph2_text]))]))) -> success
Left e -> fail $ errorBundlePretty e (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree
(Just (Left e)) -> fail $ errorBundlePretty e
bold_and_header_and_paragraph :: Property bold_and_header_and_paragraph :: Property
bold_and_header_and_paragraph = property $ do bold_and_header_and_paragraph = property $ do
@ -84,37 +97,88 @@ bold_and_header_and_paragraph = property $ do
let input = "**" <> bold_text <> "**\n\n" <> (T.pack $ take header_level $ repeat '#') <> header_text <> "\n\n" <> paragraph_text let input = "**" <> bold_text <> "**\n\n" <> (T.pack $ take header_level $ repeat '#') <> header_text <> "\n\n" <> paragraph_text
let parsed = parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" input parsed <- generic_parse input
case parsed of case parsed of
Right (Doc [Paragraph (P ([Bold [Text bold_text]])), Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph_text]))]) -> success Nothing -> fail $ "Hit Timeout"
Right tree -> fail $ "Incorrect syntax tree: " <> show tree (Just (Right (Doc [Paragraph (P ([Bold [Text bold_text]])), Heading (H {level = header_level, text = [Text (header_text)]}), Paragraph (P ([Text paragraph_text]))]))) -> success
Left e -> fail $ errorBundlePretty e (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree
(Just (Left e)) -> fail $ errorBundlePretty e
code_block :: Property code_block :: Property
code_block = property $ do code_block = property $ do
language <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha language <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha
code <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha code <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha
let input = "```" <> language <> "\n" <> code <> "```" let input = "```" <> language <> "\n" <> code <> "\n```"
annotate $ "Input: " <> T.unpack input
let parsed = parse (Markdown.document :: ParsecT Void Text Identity IR.Document) "test_input" input parsed <- generic_parse input
case parsed of case parsed of
Right (Doc [Code (C {language, code})]) -> success Nothing -> fail $ "Hit Timeout"
Right tree -> fail $ "Incorrect syntax tree: " <> show tree (Just (Right (Doc [Code (C {language, code})]))) -> success
Left e -> fail $ errorBundlePretty e (Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree
(Just (Left e)) -> fail $ errorBundlePretty e
-- from new writing learning code_block_hanging :: Property
-- Error case, joins block quotes toegether code_block_hanging = property $ do
-- > [*Sic*](https://www.merriam-webster.com/dictionary/sic) usually appears in parentheses or brackets, sometimes with the letters in italics. In this context it means “intentionally so written.” On its own, *sic* means “so” or “thus” and can be found in phrases such as *sic transit gloria mundi* ("so passes away the glory of the world") and *sic semper tyrannis* ("thus ever to tyrants," the motto of the state of Virginia). language <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha
-- code <- forAll $ Gen.text (Range.linear 1 10) Gen.alpha
-- > What is denoted by *sic* is that the word or phrase that precedes it occurs in the original passage being quoted or name being used and was not introduced by the writer doing the quoting. let input = "```" <> language <> "\n" <> code <> "```"
-- annotate $ "Input: " <> T.unpack input
parsed <- generic_parse input
-- from micro blogs case parsed of
-- Error case 2: made one bullet instead of two, happens for any number of items and probably applicable to numbered list as well Nothing -> fail $ "Hit Timeout"
-- - item 1 -- we're just testing for hanging
-- - item 2 (Just (Right _)) -> success
(Just (Left e)) -> fail $ errorBundlePretty e
two_blockquotes :: Property
two_blockquotes = property $ do
let text_gen = forAll $ Gen.text (Range.linear 1 10) Gen.alpha
text_1 <- text_gen
text_2 <- text_gen
let input = "> " <> text_1 <> "\n\n> " <> text_2
parsed <- generic_parse input
case parsed of
Nothing -> fail $ "Hit Timeout"
(Just (Right (Doc [BlockQuote (Q [Text text_1]), BlockQuote (Q [Text text_2])]))) -> success
(Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree
(Just (Left e)) -> fail $ errorBundlePretty e
unordered_list :: Property
unordered_list = property $ do
let text_gen = forAll $ Gen.text (Range.linear 1 10) Gen.alpha
text_1 <- text_gen
text_2 <- text_gen
let input = "- " <> text_1 <> "\n- " <> text_2
parsed <- generic_parse input
case parsed of
Nothing -> fail $ "Hit Timeout"
(Just (Right (Doc [List (L {list_type = Unordered, items = [LI {content = [Text text_1], children = []}, LI {content = [Text text_2], children = []}]})]))) -> success
(Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree
(Just (Left e)) -> fail $ errorBundlePretty e
header_after_unordered_list :: Property
header_after_unordered_list = property $ do
let text_gen = forAll $ Gen.text (Range.linear 1 10) Gen.alpha
bullet_text <- text_gen
header_text <- text_gen
header_level <- forAll $ Gen.int (Range.linear 1 6)
let input = "- " <> bullet_text <> "\n\n" <> (T.pack $ take header_level $ repeat '#') <> header_text
parsed <- generic_parse input
case parsed of
Nothing -> fail $ "Hit Timeout"
(Just (Right (Doc [List (L {list_type = Unordered, items = [LI {content = [Text bullet_text], children = []}]}), Heading (H {level = header_level, text = [Text header_text]})]))) -> success
(Just (Right tree)) -> fail $ "Incorrect syntax tree: " <> show tree
(Just (Left e)) -> fail $ errorBundlePretty e
-- From weekly notes, doing a header after a unordered list causes it to be seens as an inline element -- From weekly notes, doing a header after a unordered list causes it to be seens as an inline element
-- --