naive refactor of markdown generation for the new IR
This commit is contained in:
parent
24606a9204
commit
22f4f89137
4 changed files with 91 additions and 46 deletions
|
|
@ -36,7 +36,7 @@ test-suite test-markdown-parse
|
|||
hs-source-dirs: tests
|
||||
type: exitcode-stdio-1.0
|
||||
main-is: Markdown/Parse.hs
|
||||
build-depends: base >=4.20 && < 4.21, text >= 2.1.2 , megaparsec >= 9.7.0 , transformers >= 0.6.2 , hedgehog >= 1.7 , time, psb
|
||||
build-depends: base, text, megaparsec, transformers, hedgehog, time, psb
|
||||
|
||||
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances
|
||||
default-language: Haskell2010
|
||||
|
|
|
|||
71
src/HTML.hs
71
src/HTML.hs
|
|
@ -34,37 +34,63 @@ genHeaderId header =
|
|||
textSub ' ' = '-'
|
||||
textSub c = c
|
||||
|
||||
genHeaderClasses :: Heading -> T.Text
|
||||
genHeaderClasses = const ""
|
||||
|
||||
tshow :: (Show s) => s -> T.Text
|
||||
tshow = T.pack . show
|
||||
|
||||
compileToHTML :: Document -> T.Text
|
||||
compileToHTML (Doc elements) = T.concat $ map elementToHTML elements
|
||||
|
||||
elementsToHTML :: [Element] -> T.Text
|
||||
elementsToHTML = T.concat . map elementToHTML
|
||||
|
||||
elementToHTML :: Element -> T.Text
|
||||
elementToHTML (Heading header) = T.concat ["<h", tshow header.level, genHeaderId header, genHeaderClasses header, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"]
|
||||
elementToHTML (Heading header attrs) = T.concat ["<h", tshow header.level, headerAttrs header attrs, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"]
|
||||
--
|
||||
elementToHTML (Code code_block) = T.concat ["<pre class=\"sourceCode ", language, "\"><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"]
|
||||
where
|
||||
language = fromMaybe "" code_block.language
|
||||
elementToHTML (BlockQuote (Q elems)) = T.concat ["<blockquote>", serializeInlineToHTML elems, "</blockquote>"]
|
||||
elementToHTML (List (L {list_type = Ordered, items})) = T.concat ["<ol>", generateLiElems items, "</ol>"]
|
||||
elementToHTML (List (L {list_type = Unordered, items})) = T.concat ["<ul>", generateLiElems items, "</ul>"]
|
||||
elementToHTML (BlockQuote (Q elems)) = T.concat ["<blockquote>", elementsToHTML elems, "</blockquote>"]
|
||||
elementToHTML (List (L {list_type = Ordered {start_number, style}, items}) attrs) = T.concat ["<ol", maybe "" handleStart start_number, maybe "" handleStyle style, ">", generateLiElems items, "</ol>"]
|
||||
elementToHTML (List (L {list_type = Unordered {style}, items}) attrs) = T.concat ["<ul", maybe "" handleStyle style, ">", generateLiElems items, "</ul>"]
|
||||
elementToHTML (HTML (HTMLTag {html_content})) = html_content
|
||||
elementToHTML (Paragraph (P snippets)) = T.concat ["<p>", serializeInlineToHTML snippets, "</p>"]
|
||||
elementToHTML (Paragraph (P snippets) attrs) = T.concat ["<p", handleAttrs attrs, ">", serializeInlineToHTML snippets, "</p>"]
|
||||
elementToHTML HorizontalRule = "<hr>"
|
||||
elementToHTML (Table _ _) = error "TODO"
|
||||
elementToHTML (Container _ _) = error "TODO"
|
||||
elementToHTML (Footnote _ _) = error "TODO"
|
||||
elementToHTML (DescriptionList _ _) = error "TODO"
|
||||
elementToHTML (RawBlock _ _) = error "TODO"
|
||||
elementToHTML (TaskList _ _) = error "TODO"
|
||||
|
||||
handleStyle :: T.Text -> T.Text
|
||||
handleStyle style = T.concat ["type=\"", style, "\""]
|
||||
|
||||
handleStart :: Int -> T.Text
|
||||
handleStart start = T.concat ["start=\"", T.show start, "\""]
|
||||
|
||||
handleAttrs :: Attrs -> T.Text
|
||||
handleAttrs Attrs {attrId, attrClasses, attrKV} =
|
||||
T.concat
|
||||
[ maybe "" (\id -> "id=\"" <> id <> "\"") attrId,
|
||||
"class=\"" <> T.intercalate " " attrClasses <> "\"",
|
||||
T.intercalate " " $ map (\(key, val) -> key <> "=\"" <> val <> "\"") attrKV
|
||||
]
|
||||
|
||||
headerAttrs :: Heading -> Attrs -> T.Text
|
||||
headerAttrs header Attrs {attrId, attrClasses, attrKV} =
|
||||
T.concat
|
||||
-- id is overcomplicated due to header having id logic written I don't wanna bother with
|
||||
[ maybe (genHeaderId header) (\id -> "id=\"" <> id <> "\"") attrId,
|
||||
"class=\"" <> T.intercalate " " attrClasses <> "\"",
|
||||
T.intercalate " " $ map (\(key, val) -> key <> "=\"" <> val <> "\"") attrKV
|
||||
]
|
||||
|
||||
generateLiElems :: [ListItem] -> T.Text
|
||||
generateLiElems [] = ""
|
||||
generateLiElems (element : remainder) =
|
||||
T.concat
|
||||
[ "<li>",
|
||||
-- We assume child lists are stricly after our contents
|
||||
-- if they aren't this is fucked
|
||||
serializeInlineToHTML element.content,
|
||||
fromMaybe "" $ fmap (elementToHTML . List) element.child,
|
||||
elementsToHTML element.content,
|
||||
"</li>",
|
||||
generateLiElems remainder
|
||||
]
|
||||
|
|
@ -72,10 +98,19 @@ generateLiElems (element : remainder) =
|
|||
serializeInlineToHTML :: [InlineText] -> T.Text
|
||||
serializeInlineToHTML [] = ""
|
||||
serializeInlineToHTML (Text t : remaining) = escapeText t <> serializeInlineToHTML remaining
|
||||
serializeInlineToHTML (Bold elems : remaining) = T.concat ["<b>", serializeInlineToHTML elems, "</b>", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML (Italic elems : remaining) = T.concat ["<i>", serializeInlineToHTML elems, "</i>", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML (Crossed elems : remaining) = T.concat ["<s>", serializeInlineToHTML elems, "</s>", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML (InlineCode code : remaining) = T.concat ["<code>", escapeText code, "</code>", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML (Link {linkText, url, title} : remaining) = T.concat ["<a href=\"", url, "\"", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML linkText, "</a>", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML (Image {altText, url, title} : remaining) = T.concat ["<img src=\"", url, "\" alt=\"", escapeText altText, "\"", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML ((Bold elems attrs) : remaining) = T.concat ["<b", handleAttrs attrs, ">", serializeInlineToHTML elems, "</b>", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML ((Italic elems attrs) : remaining) = T.concat ["<i", handleAttrs attrs, ">", serializeInlineToHTML elems, "</i>", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML ((Crossed elems attrs) : remaining) = T.concat ["<s", handleAttrs attrs, ">", serializeInlineToHTML elems, "</s>", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML ((InlineCode code attrs) : remaining) = T.concat ["<code", handleAttrs attrs, ">", escapeText code, "</code>", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML (Link {linkText, url, title, misc_attrs} : remaining) = T.concat ["<a href=\"", url, "\"", handleAttrs misc_attrs, maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML linkText, "</a>", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML (Image {altText, url, title, misc_attrs} : remaining) = T.concat ["<img src=\"", url, "\" alt=\"", escapeText altText, "\"", maybe "" (\t -> T.concat ["title=\"", escapeText t, "\""]) title, handleAttrs misc_attrs, ">", serializeInlineToHTML remaining]
|
||||
serializeInlineToHTML (HTMLInline {inline_html_content} : remaining) = inline_html_content <> serializeInlineToHTML remaining
|
||||
serializeInlineToHTML ((Superscript _ _) : remaining) = error "TODO"
|
||||
serializeInlineToHTML ((Subscript _ _) : remaining) = error "TODO"
|
||||
serializeInlineToHTML ((Highlighted _ _) : remaining) = error "TODO"
|
||||
serializeInlineToHTML ((Insert _ _) : remaining) = error "TODO"
|
||||
serializeInlineToHTML ((Math _ _) : remaining) = error "TODO"
|
||||
serializeInlineToHTML ((FootnoteReference {}) : remaining) = error "TODO"
|
||||
serializeInlineToHTML ((Symbol _) : remaining) = error "TODO"
|
||||
serializeInlineToHTML ((RawInline _ _) : remaining) = error "TODO"
|
||||
serializeInlineToHTML ((Span _ _) : remaining) = error "TODO"
|
||||
|
|
|
|||
18
src/IR.hs
18
src/IR.hs
|
|
@ -14,12 +14,10 @@ data Element
|
|||
HTML HTML
|
||||
| Paragraph Paragraph Attrs
|
||||
| HorizontalRule
|
||||
| -- TODO
|
||||
Table Table Attrs
|
||||
| Table Table Attrs
|
||||
| -- Djot :::
|
||||
Container [Element] Attrs
|
||||
| -- TODO
|
||||
Footnote Footnote Attrs
|
||||
| Footnote Footnote Attrs
|
||||
| DescriptionList DescriptionList Attrs
|
||||
| RawBlock RawBlock Attrs
|
||||
| TaskList TaskList Attrs
|
||||
|
|
@ -39,15 +37,15 @@ data Code = C
|
|||
}
|
||||
deriving (Show)
|
||||
|
||||
data BlockQuote = Q [Element] deriving (Show)
|
||||
newtype BlockQuote = Q [Element] deriving (Show)
|
||||
|
||||
data ListItem = LI
|
||||
{ content :: [Element], -- Flatten continuations into here
|
||||
child :: Maybe List
|
||||
newtype ListItem = LI
|
||||
-- children are just more elements
|
||||
{ content :: [Element] -- Flatten continuations into here
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
data ListType = Ordered {start_number :: Text, style :: Text} | Unordered {style :: Text} deriving (Show)
|
||||
data ListType = Ordered {start_number :: Maybe Int, style :: Maybe Text} | Unordered {style :: Maybe Text} deriving (Show)
|
||||
|
||||
data List = L
|
||||
{ list_type :: ListType,
|
||||
|
|
@ -64,7 +62,7 @@ newtype HTML
|
|||
newtype Paragraph = P [InlineText] deriving (Show)
|
||||
|
||||
data InlineText
|
||||
= Text Text Attrs -- Combined Normal and Escaped
|
||||
= Text Text -- Combined Normal and Escaped
|
||||
| Bold [InlineText] Attrs
|
||||
| Italic [InlineText] Attrs
|
||||
| Crossed [InlineText] Attrs
|
||||
|
|
|
|||
|
|
@ -65,16 +65,24 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow
|
|||
where
|
||||
between' disallow start end middle_piece = between start end $ many ((notFollowedBy ((try $ void end) <|> disallow)) *> middle_piece)
|
||||
|
||||
strikethrough disallow = Crossed <$> (between' disallow (string "~~") (void $ string "~~") (inlineText' (disallow <|> (void $ string "~~"))))
|
||||
strikethrough disallow = do
|
||||
content <- between' disallow (string "~~") (void $ string "~~") (inlineText' (disallow <|> (void $ string "~~")))
|
||||
pure $ Crossed content emptyAttrs
|
||||
|
||||
bold disallow = Bold <$> (between' disallow (string "**") (disallow <|> (void $ string "**")) (inlineText' (disallow <|> (void $ string "**"))))
|
||||
bold disallow = do
|
||||
content <- between' disallow (string "**") (disallow <|> (void $ string "**")) (inlineText' (disallow <|> (void $ string "**")))
|
||||
pure $ Bold content emptyAttrs
|
||||
|
||||
italic :: (HasCallStack) => Parser s m () -> Parser s m InlineText
|
||||
italic disallow = Italic <$> (between' disallow (char '*') ((void $ char '*') <|> disallow) (inlineText' (disallow <|> (void $ char '*'))))
|
||||
italic disallow = do
|
||||
content <- between' disallow (char '*') ((void $ char '*') <|> disallow) (inlineText' (disallow <|> (void $ char '*')))
|
||||
pure $ Italic content emptyAttrs
|
||||
|
||||
underlined disallow = Underlined <$> (between' disallow (string "__") ((void $ string "__") <|> disallow) (inlineText' (disallow <|> (void $ string "__"))))
|
||||
|
||||
code = InlineCode . T.pack <$> (between' disallow (char '`') (char '`') ((notFollowedBy lineEnding) *> anySingle))
|
||||
code = do
|
||||
contents <- T.pack <$> between' disallow (char '`') (char '`') (notFollowedBy lineEnding *> anySingle)
|
||||
pure $ InlineCode contents emptyAttrs
|
||||
link :: (HasCallStack) => Parser s m () -> Parser s m InlineText
|
||||
link disallow = do
|
||||
linkText <- between' disallow (char '[') ((void $ char ']') <|> disallow) (inlineText' (disallow <|> (void $ char ']')))
|
||||
|
|
@ -88,7 +96,7 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow
|
|||
Nothing -> pure Nothing
|
||||
char ')'
|
||||
pure (url, title)
|
||||
pure Link {linkText, url, title}
|
||||
pure Link {linkText, url, title, misc_attrs = emptyAttrs}
|
||||
|
||||
image disallow = do
|
||||
logDebug "image:before excl"
|
||||
|
|
@ -102,7 +110,7 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow
|
|||
Link {linkText = [Text altText], url, title} -> pure (altText, url, title)
|
||||
Link {linkText = [], url, title} -> pure ("", url, title)
|
||||
_ -> fail "Image alt text must be normal text, cannot be stylized in any way"
|
||||
pure Image {altText, url, title}
|
||||
pure Image {altText, url, title, misc_attrs = emptyAttrs}
|
||||
|
||||
inline_html =
|
||||
HTMLInline <$> do
|
||||
|
|
@ -125,7 +133,7 @@ headingBlock = do
|
|||
heading_level <- length <$> (some $ char '#')
|
||||
optional spaceChar
|
||||
text <- many ((notFollowedBy blockEnding) *> inlineText)
|
||||
pure $ Heading $ H {level = heading_level, text}
|
||||
pure $ Heading (H {level = heading_level, text}) emptyAttrs
|
||||
|
||||
fencedCodeBlock :: (Logger m, Characters s) => Parser s m Element
|
||||
fencedCodeBlock = between (string "```") (string "```") $ do
|
||||
|
|
@ -136,7 +144,9 @@ fencedCodeBlock = between (string "```") (string "```") $ do
|
|||
pure $ Code $ C {language, code}
|
||||
|
||||
blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
|
||||
blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine)
|
||||
blockquoteBlock = do
|
||||
content <- Q . concat <$> some blockquoteLine
|
||||
pure $ BlockQuote content
|
||||
where
|
||||
blockquoteLine = do
|
||||
char '>'
|
||||
|
|
@ -145,7 +155,7 @@ blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine)
|
|||
-- this dance with optional and notFollowedBy is done so we
|
||||
-- aren't accidentally consuming part of a block ending
|
||||
(optional ((notFollowedBy blockEnding) *> lineEnding))
|
||||
pure ret
|
||||
pure [(Paragraph $ P ret) emptyAttrs]
|
||||
|
||||
-- type of list the parser returns
|
||||
-- parser which grabs the prefix for each item of the list
|
||||
|
|
@ -154,7 +164,7 @@ blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine)
|
|||
listBlock :: (Logger m, Characters s) => ListType -> Parser s m prefix -> (Int -> Parser s m List) -> Int -> Parser s m Element
|
||||
listBlock list_type prefix child_parser_factory nest_level = do
|
||||
items <- some $ listItem
|
||||
pure $ List $ L {list_type, items}
|
||||
pure $ List (L {list_type, items}) emptyAttrs
|
||||
where
|
||||
listItem = do
|
||||
count nest_level ((try $ void $ char '\t') <|> (void $ (count 4 $ char ' ')))
|
||||
|
|
@ -164,23 +174,23 @@ listBlock list_type prefix child_parser_factory nest_level = do
|
|||
optional ((notFollowedBy blockEnding) *> lineEnding)
|
||||
|
||||
child <- optional $ child_parser_factory $ nest_level + 1
|
||||
|
||||
pure $ LI {content, child}
|
||||
error "TODO, child list handling works different now, child needs to be combined into content"
|
||||
pure $ LI {content = [Paragraph (P content) emptyAttrs]}
|
||||
|
||||
unorderedListBlock :: (Logger m, Characters s) => Int -> Parser s m Element
|
||||
unorderedListBlock = listBlock Unordered unordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level))
|
||||
unorderedListBlock = listBlock Unordered {style = Nothing} unordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level))
|
||||
where
|
||||
unordered_prefix = (choice $ map char "*-+") *> (notFollowedBy newline *> spaceChar)
|
||||
-- not exhaustive but we know listBlock is returning a List
|
||||
unwrap (List l) = l
|
||||
unwrap (List l _) = l
|
||||
|
||||
orderedListBlock :: (Logger m, Characters s) => Int -> Parser s m Element
|
||||
orderedListBlock = listBlock Ordered ordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level))
|
||||
orderedListBlock = listBlock Ordered {style = Nothing, start_number = Nothing} ordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level))
|
||||
where
|
||||
-- regex equivalent: [0-9]+[.)]\s?
|
||||
ordered_prefix = (some digitChar) *> (char '.' <|> char ')') *> (notFollowedBy newline *> spaceChar)
|
||||
-- not exhaustive but we know listBlock is returning a List
|
||||
unwrap (List l) = l
|
||||
unwrap (List l _) = l
|
||||
|
||||
htmlBlock :: forall m s. (Logger m, Characters s) => Parser s m Element
|
||||
htmlBlock = do
|
||||
|
|
@ -216,4 +226,6 @@ htmlBlock = do
|
|||
pure $ mconcat [name, "=\"", value, "\""]
|
||||
|
||||
paragraphBlock :: (Logger m, Characters s) => Parser s m Element
|
||||
paragraphBlock = Paragraph . P <$> (many ((notFollowedBy blockEnding) *> inlineText))
|
||||
paragraphBlock = do
|
||||
content <- P <$> many ((notFollowedBy blockEnding) *> inlineText)
|
||||
pure $ Paragraph content emptyAttrs
|
||||
|
|
|
|||
Loading…
Reference in a new issue