redoing markdown parsing and adding underlining
This commit is contained in:
parent
82c9fd8799
commit
115270a70b
2 changed files with 78 additions and 380 deletions
|
|
@ -58,6 +58,7 @@ data InlineText
|
||||||
| Bold [InlineText]
|
| Bold [InlineText]
|
||||||
| Italic [InlineText]
|
| Italic [InlineText]
|
||||||
| Crossed [InlineText]
|
| Crossed [InlineText]
|
||||||
|
| Underlined [InlineText]
|
||||||
| InlineCode Text
|
| InlineCode Text
|
||||||
| Link
|
| Link
|
||||||
{ linkText :: [InlineText],
|
{ linkText :: [InlineText],
|
||||||
|
|
|
||||||
457
src/Markdown.hs
457
src/Markdown.hs
|
|
@ -18,413 +18,110 @@ import IR
|
||||||
import Logger (Logger (logDebug))
|
import Logger (Logger (logDebug))
|
||||||
import Text.Megaparsec (ParsecT, Stream, Token, Tokens, anySingle, anySingleBut, between, choice, count, eof, manyTill, notFollowedBy, satisfy, skipSome, try, (<?>))
|
import Text.Megaparsec (ParsecT, Stream, Token, Tokens, anySingle, anySingleBut, between, choice, count, eof, manyTill, notFollowedBy, satisfy, skipSome, try, (<?>))
|
||||||
import qualified Text.Megaparsec as MP
|
import qualified Text.Megaparsec as MP
|
||||||
import Text.Megaparsec.Char (alphaNumChar, char, digitChar, string)
|
import Text.Megaparsec.Char (alphaNumChar, char, digitChar, newline, string)
|
||||||
|
|
||||||
type ParserTG = ParsecT Void
|
type Parser = ParsecT Void
|
||||||
|
|
||||||
type ParserT m = ParserTG T.Text m
|
class (Token s ~ Char, Stream s, IsString (Tokens s), Show s) => Characters s
|
||||||
|
|
||||||
type Parser = ParserT Identity
|
instance Characters Text
|
||||||
|
|
||||||
anyChar :: (Token s ~ Char, Stream s) => ParserTG s m Char
|
instance Characters String
|
||||||
anyChar = anySingle
|
|
||||||
|
|
||||||
alphaNum :: (Token s ~ Char, Stream s) => ParserTG s m Char
|
metadata :: (Logger m, Characters s) => Parser s m Text
|
||||||
alphaNum = alphaNumChar
|
|
||||||
|
|
||||||
digit :: (Token s ~ Char, Stream s) => ParserTG s m Char
|
|
||||||
digit = digitChar
|
|
||||||
|
|
||||||
noneOf :: (Token s ~ Char, Stream s) => [Char] -> ParserTG s m Char
|
|
||||||
noneOf = MP.noneOf
|
|
||||||
|
|
||||||
oneOf :: (Token s ~ Char, Stream s) => [Char] -> ParserTG s m Char
|
|
||||||
oneOf = MP.oneOf
|
|
||||||
|
|
||||||
optionMaybe :: (Token s ~ Char, Stream s) => ParserTG s m a -> ParserTG s m (Maybe a)
|
|
||||||
optionMaybe = optional
|
|
||||||
|
|
||||||
skipMany1 :: (Token s ~ Char, Stream s) => ParserTG s m a -> ParserTG s m ()
|
|
||||||
skipMany1 = skipSome
|
|
||||||
|
|
||||||
metadata :: (Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Text
|
|
||||||
metadata = bound *> fmap T.pack (many $ try (char '-' <* notFollowedBy (string "--")) <|> anySingleBut '-') <* bound
|
metadata = bound *> fmap T.pack (many $ try (char '-' <* notFollowedBy (string "--")) <|> anySingleBut '-') <* bound
|
||||||
where
|
where
|
||||||
bound = string "---"
|
bound = string "---"
|
||||||
|
|
||||||
document :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Document
|
document :: (Logger m, Characters s) => Parser s m Document
|
||||||
document = do
|
document = Doc <$> many element
|
||||||
logDebug "document"
|
|
||||||
Doc <$> many element <* eof
|
|
||||||
|
|
||||||
element :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
element :: (Logger m, Characters s) => Parser s m Element
|
||||||
element =
|
element =
|
||||||
choice
|
choice
|
||||||
[ try headingBlock <?> "Element Heading",
|
[ try headingBlock <?> "Element Heading",
|
||||||
try fencedCodeBlock <?> "Fenced Code Block",
|
try fencedCodeBlock <?> "Fenced Code Block",
|
||||||
try indentedCodeBlock <?> "Indented Code Block",
|
|
||||||
try blockquoteBlock <?> "BlockQuote",
|
try blockquoteBlock <?> "BlockQuote",
|
||||||
try unorderedListBlock <?> "Unordered List",
|
(try $ unorderedListBlock 0) <?> "Unordered List",
|
||||||
try orderedListBlock <?> "Ordered List",
|
(try $ orderedListBlock 0) <?> "Ordered List",
|
||||||
try horizontalRuleBlock <?> "Horizontal Rule",
|
|
||||||
try htmlBlock <?> "HTML Block",
|
try htmlBlock <?> "HTML Block",
|
||||||
try blankLines <?> "Blank Lines", -- Consume blank lines but don't add to AST
|
|
||||||
paragraphBlock <?> "Paragarph"
|
paragraphBlock <?> "Paragarph"
|
||||||
]
|
]
|
||||||
|
<* blockEnding
|
||||||
|
|
||||||
-- Blank lines (consumed but not stored)
|
lineEnding :: (Logger m, Characters s) => Parser s m ()
|
||||||
blankLines :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
lineEnding = (try eof) <|> void newline
|
||||||
blankLines = do
|
|
||||||
skipMany1 (blankLine *> notFollowedBy eof)
|
|
||||||
element <|> fmap (const $ HTML HTMLTag {html_content = ""}) eof -- Parse the next element (or handle eof)
|
|
||||||
|
|
||||||
blankLine :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m ()
|
blockEnding :: (Logger m, Characters s) => Parser s m ()
|
||||||
blankLine = do
|
blockEnding = (try (lineEnding *> lineEnding)) <|> eof
|
||||||
many (char ' ' <|> char '\t')
|
|
||||||
lineEnding
|
|
||||||
pure ()
|
|
||||||
|
|
||||||
-- Heading Block
|
-- TODO: check if inlineHTML needs to be handled in any markdown posts
|
||||||
headingBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
-- TODO: link impl
|
||||||
headingBlock = do
|
-- TODO: image impl
|
||||||
hashes <- some (char '#') <?> "Heading Hashes"
|
inlineText :: (Logger m, Characters s) => Parser s m InlineText
|
||||||
let level = length hashes
|
inlineText = choice [try strikethrough, try bold, try italic, try underlined, try code, try link, try image, plain_text]
|
||||||
guard (level <= 6) <?> "Higher than level 6"
|
|
||||||
many (char ' ' <|> char '\t') <?> "Pre-Text Whitespace"
|
|
||||||
content <- manyTill (inlineElement <?> "Header Text") (try lineEnding <?> "Header Ending")
|
|
||||||
pure $ Heading $ H level content
|
|
||||||
|
|
||||||
-- Fenced Code Block
|
|
||||||
fencedCodeBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
|
||||||
fencedCodeBlock = do
|
|
||||||
logDebug "fenced_coding_block"
|
|
||||||
fence <- string "```" <|> string "~~~"
|
|
||||||
logDebug "fence"
|
|
||||||
lang <- optionMaybe languageInfo
|
|
||||||
logDebug "langInfo"
|
|
||||||
lineEnding'
|
|
||||||
logDebug "lineEnding"
|
|
||||||
codeLines <- manyTill (codeLine fence) (try $ string fence)
|
|
||||||
logDebug "lines"
|
|
||||||
pure $ Code $ C lang (T.pack $ unlines codeLines)
|
|
||||||
|
|
||||||
languageInfo :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Text
|
|
||||||
languageInfo = T.pack <$> some (alphaNum <|> char '-' <|> char '+' <|> char '.')
|
|
||||||
|
|
||||||
codeLine :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => (Tokens s) -> ParserTG s m String
|
|
||||||
codeLine fence = do
|
|
||||||
-- this is a hack which can only haunt me if I continue using markdown
|
|
||||||
line <- many $ (notFollowedBy $ string fence) *> noneOf "\n\r"
|
|
||||||
lineEnding'
|
|
||||||
pure line
|
|
||||||
|
|
||||||
-- Indented Code Block
|
|
||||||
indentedCodeBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
|
||||||
indentedCodeBlock = do
|
|
||||||
lines' <- some indentedLine
|
|
||||||
pure $ Code $ C Nothing (T.pack $ unlines lines')
|
|
||||||
where
|
where
|
||||||
indentedLine = do
|
between' start end middle_piece = between start end $ many ((notFollowedBy end) *> middle_piece)
|
||||||
count 4 (char ' ' <|> char '\t')
|
|
||||||
line <- many $ noneOf "\n\r"
|
|
||||||
lineEnding
|
|
||||||
pure line
|
|
||||||
|
|
||||||
-- Blockquote Block
|
strikethrough = Crossed <$> (between' (string "~~") (string "~~") inlineText)
|
||||||
blockquoteBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
|
||||||
blockquoteBlock = do
|
bold = Bold <$> (between' (string "**") (string "**") inlineText)
|
||||||
lines' <- some blockquoteLine
|
|
||||||
pure $ BlockQuote $ Q (concat lines')
|
italic = Italic <$> (between' (char '*') (char '*') inlineText)
|
||||||
|
|
||||||
|
underlined = Underlined <$> (between' (string "__") (string "__") inlineText)
|
||||||
|
|
||||||
|
code = InlineCode . T.pack <$> (between' (char '`') (char '`') (notFollowedBy lineEnding *> anySingle))
|
||||||
|
|
||||||
|
link = do
|
||||||
|
linkText <- error "linkText parser"
|
||||||
|
url <- error "url parser"
|
||||||
|
title <- error "title parser"
|
||||||
|
pure Link {linkText, url, title}
|
||||||
|
|
||||||
|
image = do
|
||||||
|
altText <- error "altText"
|
||||||
|
url <- error "url"
|
||||||
|
title <- error "title"
|
||||||
|
pure Image {altText, url, title}
|
||||||
|
|
||||||
|
plain_text = Text . T.pack <$> (many ((notFollowedBy blockEnding) *> anySingle))
|
||||||
|
|
||||||
|
headingBlock :: (Logger m, Characters s) => Parser s m Element
|
||||||
|
headingBlock = do
|
||||||
|
heading_level <- length <$> (some $ char '#')
|
||||||
|
optional $ char ' '
|
||||||
|
text <- many $ inlineText
|
||||||
|
pure $ Heading $ H {level = heading_level, text}
|
||||||
|
|
||||||
|
fencedCodeBlock :: (Logger m, Characters s) => Parser s m Element
|
||||||
|
fencedCodeBlock = between (string "```") (string "```") $ do
|
||||||
|
language' <- T.pack <$> (many (notFollowedBy lineEnding *> anySingle))
|
||||||
|
lineEnding
|
||||||
|
code <- T.pack <$> (many ((notFollowedBy $ string "```") *> anySingle))
|
||||||
|
let language = if language' == "" then Just language' else Nothing
|
||||||
|
pure $ Code $ C {language, code}
|
||||||
|
|
||||||
|
blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
|
||||||
|
blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine)
|
||||||
where
|
where
|
||||||
blockquoteLine = do
|
blockquoteLine = do
|
||||||
char '>'
|
char '>'
|
||||||
optional (char ' ')
|
optional $ char ' '
|
||||||
content <- many $ notFollowedBy lineEnding' *> inlineElement
|
ret <- (many ((notFollowedBy lineEnding) *> inlineText))
|
||||||
pure content
|
-- this dance with optional and notFollowedBy is done so we
|
||||||
|
-- aren't accidentally consuming part of a block ending
|
||||||
|
(optional ((notFollowedBy blockEnding) *> lineEnding))
|
||||||
|
pure ret
|
||||||
|
|
||||||
-- Horizontal Rule Block
|
unorderedListBlock :: (Logger m, Characters s) => Int -> Parser s m Element
|
||||||
horizontalRuleBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
unorderedListBlock indent_level = error "TODO: unorderedListBlock"
|
||||||
horizontalRuleBlock = do
|
|
||||||
choice
|
|
||||||
[ try (count 3 (char '*') >> many (char ' ' <|> char '*')),
|
|
||||||
try (count 3 (char '-') >> many (char ' ' <|> char '-')),
|
|
||||||
try (count 3 (char '_') >> many (char ' ' <|> char '_'))
|
|
||||||
]
|
|
||||||
lineEnding
|
|
||||||
pure HorizontalRule
|
|
||||||
|
|
||||||
-- Unordered List Block
|
orderedListBlock :: (Logger m, Characters s) => Int -> Parser s m Element
|
||||||
unorderedListBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
orderedListBlock indent_level = error "TODO: orderedListBlock"
|
||||||
unorderedListBlock = do
|
|
||||||
items <- some unorderedListItem
|
|
||||||
lineEnding'
|
|
||||||
pure $ List $ L Unordered items
|
|
||||||
|
|
||||||
unorderedListItem :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m ListItem
|
htmlBlock :: (Logger m, Characters s) => Parser s m Element
|
||||||
unorderedListItem = do
|
htmlBlock = error "TODO: htmlBlock"
|
||||||
oneOf "*-+"
|
|
||||||
optional (char ' ' <|> char '\t')
|
|
||||||
content <- many $ notFollowedBy lineEnding' *> inlineElement
|
|
||||||
lineEnding'
|
|
||||||
-- continuations <- many listContinuation
|
|
||||||
children <- maybeToList <$> (optional $ indentedList 1)
|
|
||||||
pure $ LI content children
|
|
||||||
|
|
||||||
-- TODO: handle list indentation at all levels
|
paragraphBlock :: (Logger m, Characters s) => Parser s m Element
|
||||||
indentedList :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => Int -> ParserTG s m List
|
paragraphBlock = Paragraph . P <$> (many inlineText)
|
||||||
indentedList n = do
|
|
||||||
void $ (count (4 * n) (char ' ')) <|> count n (char '\t')
|
|
||||||
ret <- (try indentedUnorderedList) <|> indentedOrderedList
|
|
||||||
pure ret <* lineEnding'
|
|
||||||
|
|
||||||
indentedUnorderedList :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m List
|
|
||||||
indentedUnorderedList = do
|
|
||||||
items <- some (indentedListItem (oneOf "*-+" >> void (char ' ' <|> char '\t')))
|
|
||||||
pure $ L Unordered items
|
|
||||||
|
|
||||||
indentedOrderedList :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m List
|
|
||||||
indentedOrderedList = do
|
|
||||||
items <- some (try $ indentedListItem (some digit >> char '.' >> void (char ' ' <|> char '\t')))
|
|
||||||
pure $ L Ordered items
|
|
||||||
|
|
||||||
indentedListItem :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m () -> ParserTG s m ListItem
|
|
||||||
indentedListItem marker = do
|
|
||||||
marker
|
|
||||||
content <- many $ notFollowedBy lineEnding' *> inlineElement
|
|
||||||
pure $ LI content []
|
|
||||||
|
|
||||||
-- Ordered List Block
|
|
||||||
orderedListBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
|
||||||
orderedListBlock = do
|
|
||||||
items <- some orderedListItem
|
|
||||||
lineEnding'
|
|
||||||
pure $ List $ L Ordered items
|
|
||||||
|
|
||||||
orderedListItem :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m ListItem
|
|
||||||
orderedListItem = do
|
|
||||||
some digit
|
|
||||||
char '.' <|> char ')'
|
|
||||||
optional (char ' ' <|> char '\t')
|
|
||||||
content <- many $ notFollowedBy lineEnding' *> inlineElement
|
|
||||||
lineEnding'
|
|
||||||
-- continuations <- many listContinuation
|
|
||||||
children <- maybeToList <$> (optional $ indentedList 1)
|
|
||||||
pure $ LI content children
|
|
||||||
|
|
||||||
-- HTML Block
|
|
||||||
htmlBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
|
||||||
htmlBlock = do
|
|
||||||
char '<'
|
|
||||||
-- Capture the entire HTML block as raw text
|
|
||||||
rest <- manyTill anyChar (try $ char '>' >> lineEnding)
|
|
||||||
let content = '<' : (rest <> ">")
|
|
||||||
return $ HTML $ HTMLTag (T.pack content)
|
|
||||||
|
|
||||||
-- Paragraph Block
|
|
||||||
paragraphBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
|
|
||||||
paragraphBlock = do
|
|
||||||
logDebug "paragraph"
|
|
||||||
content <- some (notFollowedBy lineEnding *> inlineElement)
|
|
||||||
lineEnding <|> eof
|
|
||||||
pure $ Paragraph $ P content
|
|
||||||
|
|
||||||
-- Inline Elements
|
|
||||||
inlineElement :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
inlineElement =
|
|
||||||
logDebug "inlineElement"
|
|
||||||
*> choice
|
|
||||||
[ try strong <?> "Inline Strong Text",
|
|
||||||
try emphasis <?> "Inline Italic Text",
|
|
||||||
try crossedText <?> "Inline Crossed Text",
|
|
||||||
try codeSpan <?> "Inline Code",
|
|
||||||
try image <?> "Inline Image",
|
|
||||||
try link <?> "Inline Link",
|
|
||||||
try htmlInline <?> "Inline HTML",
|
|
||||||
try escapedChar <?> "Escaped Character",
|
|
||||||
plainText <?> "Inline Plain Text"
|
|
||||||
]
|
|
||||||
|
|
||||||
-- Strong (Bold)
|
|
||||||
strong :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
strong = strongAsterisk <|> strongUnderscore
|
|
||||||
|
|
||||||
strongAsterisk :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
strongAsterisk = do
|
|
||||||
string "**"
|
|
||||||
content <- some (notFollowedBy (string "**") >> inlineElement)
|
|
||||||
string "**"
|
|
||||||
pure $ Bold content
|
|
||||||
|
|
||||||
strongUnderscore :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
strongUnderscore = do
|
|
||||||
string "__"
|
|
||||||
content <- some (notFollowedBy (string "__") >> inlineElement)
|
|
||||||
string "__"
|
|
||||||
pure $ Bold content
|
|
||||||
|
|
||||||
crossedText :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
crossedText = do
|
|
||||||
string "~~"
|
|
||||||
content <- some (notFollowedBy (string "~~") >> inlineElement)
|
|
||||||
string "~~"
|
|
||||||
pure $ Crossed content
|
|
||||||
|
|
||||||
-- Emphasis (Italic)
|
|
||||||
emphasis :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
emphasis = emphasisAsterisk <|> emphasisUnderscore
|
|
||||||
|
|
||||||
emphasisAsterisk :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
emphasisAsterisk = do
|
|
||||||
char '*'
|
|
||||||
content <- some (notFollowedBy (char '*') >> inlineElementNoAsterisk)
|
|
||||||
char '*'
|
|
||||||
pure $ Italic content
|
|
||||||
|
|
||||||
emphasisUnderscore :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
emphasisUnderscore = do
|
|
||||||
char '_'
|
|
||||||
content <- some inlineElementNoUnderscore
|
|
||||||
char '_'
|
|
||||||
pure $ Italic content
|
|
||||||
|
|
||||||
inlineElementNo :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => Char -> ParserTG s m InlineText
|
|
||||||
inlineElementNo c =
|
|
||||||
choice
|
|
||||||
[ try strong,
|
|
||||||
try codeSpan,
|
|
||||||
try image,
|
|
||||||
try link,
|
|
||||||
try htmlInline,
|
|
||||||
try escapedChar,
|
|
||||||
plainTextNo [c]
|
|
||||||
]
|
|
||||||
|
|
||||||
plainTextNo :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => [Char] -> ParserTG s m InlineText
|
|
||||||
plainTextNo list = do
|
|
||||||
plainTextNo' False list
|
|
||||||
|
|
||||||
plainTextNo' :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => Bool -> [Char] -> ParserTG s m InlineText
|
|
||||||
plainTextNo' block_whitespace disallow = do
|
|
||||||
logDebug $ "base plain Text: " <> T.show block_whitespace <> " " <> T.show disallow
|
|
||||||
firstChar <- noneOf (disallow <> if block_whitespace then " \t\r\n" else []) <?> "Plain Text Initial Disallow"
|
|
||||||
remChars <- many $ notFollowedBy lineEnding' *> plainTextCharNo disallow
|
|
||||||
pure $ Text $ T.map wspHandler $ T.pack $ firstChar : remChars
|
|
||||||
where
|
|
||||||
wspHandler '\n' = ' '
|
|
||||||
wspHandler c = c
|
|
||||||
|
|
||||||
inlineElementNoAsterisk :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
inlineElementNoAsterisk = inlineElementNo '*'
|
|
||||||
|
|
||||||
inlineElementNoUnderscore :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
inlineElementNoUnderscore = inlineElementNo '_'
|
|
||||||
|
|
||||||
-- Code Span
|
|
||||||
codeSpan :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
codeSpan =
|
|
||||||
choice
|
|
||||||
[ try tripleBacktick,
|
|
||||||
try doubleBacktick,
|
|
||||||
singleBacktick
|
|
||||||
]
|
|
||||||
where
|
|
||||||
singleBacktick = do
|
|
||||||
char '`'
|
|
||||||
content <- many $ noneOf "`\n\r"
|
|
||||||
char '`'
|
|
||||||
pure $ InlineCode (T.pack content)
|
|
||||||
doubleBacktick = do
|
|
||||||
string "``"
|
|
||||||
content <- manyTill anyChar (try $ string "``")
|
|
||||||
pure $ InlineCode (T.pack content)
|
|
||||||
tripleBacktick = do
|
|
||||||
string "```"
|
|
||||||
content <- manyTill anyChar (try $ string "```")
|
|
||||||
pure $ InlineCode (T.pack content)
|
|
||||||
|
|
||||||
-- Image
|
|
||||||
image :: (Logger m, Token s ~ Char, Stream s) => ParserTG s m InlineText
|
|
||||||
image = do
|
|
||||||
char '!'
|
|
||||||
char '['
|
|
||||||
alt <- T.pack <$> many (noneOf "]\n\r")
|
|
||||||
char ']'
|
|
||||||
(url, title) <- linkDestination
|
|
||||||
return $ Image {altText = alt, url = url, title = title}
|
|
||||||
|
|
||||||
-- Link
|
|
||||||
link :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
link = do
|
|
||||||
char '['
|
|
||||||
content <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
|
|
||||||
char ']'
|
|
||||||
(url, title) <- linkDestination
|
|
||||||
pure $ Link content url title
|
|
||||||
|
|
||||||
inlineElementNoBracket :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
inlineElementNoBracket =
|
|
||||||
choice
|
|
||||||
[ try strong,
|
|
||||||
try emphasis,
|
|
||||||
try codeSpan,
|
|
||||||
try htmlInline,
|
|
||||||
try escapedChar,
|
|
||||||
plainTextNo "[]"
|
|
||||||
]
|
|
||||||
|
|
||||||
linkDestination :: (Logger m, Token s ~ Char, Stream s) => ParserTG s m (Text, Maybe Text)
|
|
||||||
linkDestination = directLink <|> referenceLink
|
|
||||||
where
|
|
||||||
directLink = do
|
|
||||||
char '('
|
|
||||||
url <- many $ noneOf " \t\n\r)"
|
|
||||||
title <- optionMaybe (try $ some (char ' ' <|> char '\t') >> titleParser)
|
|
||||||
char ')'
|
|
||||||
pure (T.pack url, title)
|
|
||||||
referenceLink = do
|
|
||||||
char '['
|
|
||||||
ref <- some (alphaNum <|> char ' ' <|> char '\t')
|
|
||||||
char ']'
|
|
||||||
-- For simplicity, we're not resolving references here
|
|
||||||
-- In a real implementation, you'd look up the reference
|
|
||||||
pure (T.pack $ "[" ++ ref ++ "]", Nothing)
|
|
||||||
|
|
||||||
titleParser :: (Logger m, Token s ~ Char, Stream s) => ParserTG s m Text
|
|
||||||
titleParser =
|
|
||||||
T.pack
|
|
||||||
<$> choice
|
|
||||||
[ between (char '"') (char '"') (many $ anySingleBut '"'),
|
|
||||||
between (char '\'') (char '\'') (many $ anySingleBut '\''),
|
|
||||||
between (char '(') (char ')') (many $ anySingleBut ')')
|
|
||||||
]
|
|
||||||
|
|
||||||
-- HTML Inline
|
|
||||||
htmlInline :: (Logger m, Token s ~ Char, Stream s) => ParserTG s m InlineText
|
|
||||||
htmlInline = do
|
|
||||||
start <- char '<'
|
|
||||||
content <- manyTill anyChar (try $ char '>')
|
|
||||||
return $ HTMLInline (T.pack (start : content ++ ">"))
|
|
||||||
|
|
||||||
-- Escaped Character
|
|
||||||
escapedChar :: (Logger m, Token s ~ Char, Stream s) => ParserTG s m InlineText
|
|
||||||
escapedChar = do
|
|
||||||
char '\\'
|
|
||||||
c <- satisfy (\x -> x >= '!' && x <= '~')
|
|
||||||
pure $ Text (T.singleton c)
|
|
||||||
|
|
||||||
-- Plain Text
|
|
||||||
plainText :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
|
|
||||||
plainText = plainTextNo' False [] <?> "Baseline Plain Text"
|
|
||||||
|
|
||||||
plainTextBaseDisallow :: [Char]
|
|
||||||
plainTextBaseDisallow = "[~`_*<"
|
|
||||||
|
|
||||||
plainTextCharNo :: (Logger m, Token s ~ Char, Stream s) => [Char] -> ParserTG s m Char
|
|
||||||
plainTextCharNo additional = noneOf $ additional <> plainTextBaseDisallow
|
|
||||||
|
|
||||||
-- Helper Parsers
|
|
||||||
lineEnding :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m ()
|
|
||||||
lineEnding = void (try $ count 2 (try (string "\r\n") <|> try (string "\n") <|> string "\r")) <|> eof
|
|
||||||
|
|
||||||
lineEnding' :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m ()
|
|
||||||
lineEnding' = void (try (string "\r\n") <|> try (string "\n") <|> string "\r") <|> eof
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue