diff --git a/psb.cabal b/psb.cabal
index eff131a..44c0b79 100644
--- a/psb.cabal
+++ b/psb.cabal
@@ -36,7 +36,7 @@ test-suite test-markdown-parse
hs-source-dirs: tests
type: exitcode-stdio-1.0
main-is: Markdown/Parse.hs
- build-depends: base >=4.20 && < 4.21, text >= 2.1.2 , megaparsec >= 9.7.0 , transformers >= 0.6.2 , hedgehog >= 1.7 , time, psb
+ build-depends: base, text, megaparsec, transformers, hedgehog, time, psb
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances
default-language: Haskell2010
diff --git a/src/HTML.hs b/src/HTML.hs
index 249f846..42efb85 100644
--- a/src/HTML.hs
+++ b/src/HTML.hs
@@ -34,37 +34,63 @@ genHeaderId header =
textSub ' ' = '-'
textSub c = c
-genHeaderClasses :: Heading -> T.Text
-genHeaderClasses = const ""
-
tshow :: (Show s) => s -> T.Text
tshow = T.pack . show
compileToHTML :: Document -> T.Text
compileToHTML (Doc elements) = T.concat $ map elementToHTML elements
+elementsToHTML :: [Element] -> T.Text
+elementsToHTML = T.concat . map elementToHTML
+
elementToHTML :: Element -> T.Text
-elementToHTML (Heading header) = T.concat ["", serializeInlineToHTML header.text, ""]
+elementToHTML (Heading header attrs) = T.concat ["", serializeInlineToHTML header.text, ""]
--
elementToHTML (Code code_block) = T.concat ["
", escapeText code_block.code, "", "
"]
where
language = fromMaybe "" code_block.language
-elementToHTML (BlockQuote (Q elems)) = T.concat ["", serializeInlineToHTML elems, "
"]
-elementToHTML (List (L {list_type = Ordered, items})) = T.concat ["", generateLiElems items, "
"]
-elementToHTML (List (L {list_type = Unordered, items})) = T.concat ["", generateLiElems items, "
"]
+elementToHTML (BlockQuote (Q elems)) = T.concat ["", elementsToHTML elems, "
"]
+elementToHTML (List (L {list_type = Ordered {start_number, style}, items}) attrs) = T.concat ["", generateLiElems items, "
"]
+elementToHTML (List (L {list_type = Unordered {style}, items}) attrs) = T.concat ["", generateLiElems items, "
"]
elementToHTML (HTML (HTMLTag {html_content})) = html_content
-elementToHTML (Paragraph (P snippets)) = T.concat ["", serializeInlineToHTML snippets, "
"]
+elementToHTML (Paragraph (P snippets) attrs) = T.concat ["", serializeInlineToHTML snippets, "
"]
elementToHTML HorizontalRule = "
"
+elementToHTML (Table _ _) = error "TODO"
+elementToHTML (Container _ _) = error "TODO"
+elementToHTML (Footnote _ _) = error "TODO"
+elementToHTML (DescriptionList _ _) = error "TODO"
+elementToHTML (RawBlock _ _) = error "TODO"
+elementToHTML (TaskList _ _) = error "TODO"
+
+handleStyle :: T.Text -> T.Text
+handleStyle style = T.concat ["type=\"", style, "\""]
+
+handleStart :: Int -> T.Text
+handleStart start = T.concat ["start=\"", T.show start, "\""]
+
+handleAttrs :: Attrs -> T.Text
+handleAttrs Attrs {attrId, attrClasses, attrKV} =
+ T.concat
+ [ maybe "" (\id -> "id=\"" <> id <> "\"") attrId,
+ "class=\"" <> T.intercalate " " attrClasses <> "\"",
+ T.intercalate " " $ map (\(key, val) -> key <> "=\"" <> val <> "\"") attrKV
+ ]
+
+headerAttrs :: Heading -> Attrs -> T.Text
+headerAttrs header Attrs {attrId, attrClasses, attrKV} =
+ T.concat
+ -- id is overcomplicated due to header having id logic written I don't wanna bother with
+ [ maybe (genHeaderId header) (\id -> "id=\"" <> id <> "\"") attrId,
+ "class=\"" <> T.intercalate " " attrClasses <> "\"",
+ T.intercalate " " $ map (\(key, val) -> key <> "=\"" <> val <> "\"") attrKV
+ ]
generateLiElems :: [ListItem] -> T.Text
generateLiElems [] = ""
generateLiElems (element : remainder) =
T.concat
[ "",
- -- We assume child lists are stricly after our contents
- -- if they aren't this is fucked
- serializeInlineToHTML element.content,
- fromMaybe "" $ fmap (elementToHTML . List) element.child,
+ elementsToHTML element.content,
"",
generateLiElems remainder
]
@@ -72,10 +98,19 @@ generateLiElems (element : remainder) =
serializeInlineToHTML :: [InlineText] -> T.Text
serializeInlineToHTML [] = ""
serializeInlineToHTML (Text t : remaining) = escapeText t <> serializeInlineToHTML remaining
-serializeInlineToHTML (Bold elems : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining]
-serializeInlineToHTML (Italic elems : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining]
-serializeInlineToHTML (Crossed elems : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining]
-serializeInlineToHTML (InlineCode code : remaining) = T.concat ["", escapeText code, "", serializeInlineToHTML remaining]
-serializeInlineToHTML (Link {linkText, url, title} : remaining) = T.concat [" T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML linkText, "", serializeInlineToHTML remaining]
-serializeInlineToHTML (Image {altText, url, title} : remaining) = T.concat ["
T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML remaining]
+serializeInlineToHTML ((Bold elems attrs) : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining]
+serializeInlineToHTML ((Italic elems attrs) : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining]
+serializeInlineToHTML ((Crossed elems attrs) : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining]
+serializeInlineToHTML ((InlineCode code attrs) : remaining) = T.concat ["", escapeText code, "", serializeInlineToHTML remaining]
+serializeInlineToHTML (Link {linkText, url, title, misc_attrs} : remaining) = T.concat [" T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML linkText, "", serializeInlineToHTML remaining]
+serializeInlineToHTML (Image {altText, url, title, misc_attrs} : remaining) = T.concat ["
T.concat ["title=\"", escapeText t, "\""]) title, handleAttrs misc_attrs, ">", serializeInlineToHTML remaining]
serializeInlineToHTML (HTMLInline {inline_html_content} : remaining) = inline_html_content <> serializeInlineToHTML remaining
+serializeInlineToHTML ((Superscript _ _) : remaining) = error "TODO"
+serializeInlineToHTML ((Subscript _ _) : remaining) = error "TODO"
+serializeInlineToHTML ((Highlighted _ _) : remaining) = error "TODO"
+serializeInlineToHTML ((Insert _ _) : remaining) = error "TODO"
+serializeInlineToHTML ((Math _ _) : remaining) = error "TODO"
+serializeInlineToHTML ((FootnoteReference {}) : remaining) = error "TODO"
+serializeInlineToHTML ((Symbol _) : remaining) = error "TODO"
+serializeInlineToHTML ((RawInline _ _) : remaining) = error "TODO"
+serializeInlineToHTML ((Span _ _) : remaining) = error "TODO"
diff --git a/src/IR.hs b/src/IR.hs
index e9dbe63..6ecbc3f 100644
--- a/src/IR.hs
+++ b/src/IR.hs
@@ -14,12 +14,10 @@ data Element
HTML HTML
| Paragraph Paragraph Attrs
| HorizontalRule
- | -- TODO
- Table Table Attrs
+ | Table Table Attrs
| -- Djot :::
Container [Element] Attrs
- | -- TODO
- Footnote Footnote Attrs
+ | Footnote Footnote Attrs
| DescriptionList DescriptionList Attrs
| RawBlock RawBlock Attrs
| TaskList TaskList Attrs
@@ -39,15 +37,15 @@ data Code = C
}
deriving (Show)
-data BlockQuote = Q [Element] deriving (Show)
+newtype BlockQuote = Q [Element] deriving (Show)
-data ListItem = LI
- { content :: [Element], -- Flatten continuations into here
- child :: Maybe List
+newtype ListItem = LI
+ -- children are just more elements
+ { content :: [Element] -- Flatten continuations into here
}
deriving (Show)
-data ListType = Ordered {start_number :: Text, style :: Text} | Unordered {style :: Text} deriving (Show)
+data ListType = Ordered {start_number :: Maybe Int, style :: Maybe Text} | Unordered {style :: Maybe Text} deriving (Show)
data List = L
{ list_type :: ListType,
@@ -64,7 +62,7 @@ newtype HTML
newtype Paragraph = P [InlineText] deriving (Show)
data InlineText
- = Text Text Attrs -- Combined Normal and Escaped
+ = Text Text -- Combined Normal and Escaped
| Bold [InlineText] Attrs
| Italic [InlineText] Attrs
| Crossed [InlineText] Attrs
diff --git a/src/Markdown.hs b/src/Markdown.hs
index 7c682a9..8f69d01 100644
--- a/src/Markdown.hs
+++ b/src/Markdown.hs
@@ -65,16 +65,24 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow
where
between' disallow start end middle_piece = between start end $ many ((notFollowedBy ((try $ void end) <|> disallow)) *> middle_piece)
- strikethrough disallow = Crossed <$> (between' disallow (string "~~") (void $ string "~~") (inlineText' (disallow <|> (void $ string "~~"))))
+ strikethrough disallow = do
+ content <- between' disallow (string "~~") (void $ string "~~") (inlineText' (disallow <|> (void $ string "~~")))
+ pure $ Crossed content emptyAttrs
- bold disallow = Bold <$> (between' disallow (string "**") (disallow <|> (void $ string "**")) (inlineText' (disallow <|> (void $ string "**"))))
+ bold disallow = do
+ content <- between' disallow (string "**") (disallow <|> (void $ string "**")) (inlineText' (disallow <|> (void $ string "**")))
+ pure $ Bold content emptyAttrs
italic :: (HasCallStack) => Parser s m () -> Parser s m InlineText
- italic disallow = Italic <$> (between' disallow (char '*') ((void $ char '*') <|> disallow) (inlineText' (disallow <|> (void $ char '*'))))
+ italic disallow = do
+ content <- between' disallow (char '*') ((void $ char '*') <|> disallow) (inlineText' (disallow <|> (void $ char '*')))
+ pure $ Italic content emptyAttrs
underlined disallow = Underlined <$> (between' disallow (string "__") ((void $ string "__") <|> disallow) (inlineText' (disallow <|> (void $ string "__"))))
- code = InlineCode . T.pack <$> (between' disallow (char '`') (char '`') ((notFollowedBy lineEnding) *> anySingle))
+ code = do
+ contents <- T.pack <$> between' disallow (char '`') (char '`') (notFollowedBy lineEnding *> anySingle)
+ pure $ InlineCode contents emptyAttrs
link :: (HasCallStack) => Parser s m () -> Parser s m InlineText
link disallow = do
linkText <- between' disallow (char '[') ((void $ char ']') <|> disallow) (inlineText' (disallow <|> (void $ char ']')))
@@ -88,7 +96,7 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow
Nothing -> pure Nothing
char ')'
pure (url, title)
- pure Link {linkText, url, title}
+ pure Link {linkText, url, title, misc_attrs = emptyAttrs}
image disallow = do
logDebug "image:before excl"
@@ -102,7 +110,7 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow
Link {linkText = [Text altText], url, title} -> pure (altText, url, title)
Link {linkText = [], url, title} -> pure ("", url, title)
_ -> fail "Image alt text must be normal text, cannot be stylized in any way"
- pure Image {altText, url, title}
+ pure Image {altText, url, title, misc_attrs = emptyAttrs}
inline_html =
HTMLInline <$> do
@@ -125,7 +133,7 @@ headingBlock = do
heading_level <- length <$> (some $ char '#')
optional spaceChar
text <- many ((notFollowedBy blockEnding) *> inlineText)
- pure $ Heading $ H {level = heading_level, text}
+ pure $ Heading (H {level = heading_level, text}) emptyAttrs
fencedCodeBlock :: (Logger m, Characters s) => Parser s m Element
fencedCodeBlock = between (string "```") (string "```") $ do
@@ -136,7 +144,9 @@ fencedCodeBlock = between (string "```") (string "```") $ do
pure $ Code $ C {language, code}
blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
-blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine)
+blockquoteBlock = do
+ content <- Q . concat <$> some blockquoteLine
+ pure $ BlockQuote content
where
blockquoteLine = do
char '>'
@@ -145,7 +155,7 @@ blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine)
-- this dance with optional and notFollowedBy is done so we
-- aren't accidentally consuming part of a block ending
(optional ((notFollowedBy blockEnding) *> lineEnding))
- pure ret
+ pure [(Paragraph $ P ret) emptyAttrs]
-- type of list the parser returns
-- parser which grabs the prefix for each item of the list
@@ -154,7 +164,7 @@ blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine)
listBlock :: (Logger m, Characters s) => ListType -> Parser s m prefix -> (Int -> Parser s m List) -> Int -> Parser s m Element
listBlock list_type prefix child_parser_factory nest_level = do
items <- some $ listItem
- pure $ List $ L {list_type, items}
+ pure $ List (L {list_type, items}) emptyAttrs
where
listItem = do
count nest_level ((try $ void $ char '\t') <|> (void $ (count 4 $ char ' ')))
@@ -164,23 +174,23 @@ listBlock list_type prefix child_parser_factory nest_level = do
optional ((notFollowedBy blockEnding) *> lineEnding)
child <- optional $ child_parser_factory $ nest_level + 1
-
- pure $ LI {content, child}
+ error "TODO, child list handling works different now, child needs to be combined into content"
+ pure $ LI {content = [Paragraph (P content) emptyAttrs]}
unorderedListBlock :: (Logger m, Characters s) => Int -> Parser s m Element
-unorderedListBlock = listBlock Unordered unordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level))
+unorderedListBlock = listBlock Unordered {style = Nothing} unordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level))
where
unordered_prefix = (choice $ map char "*-+") *> (notFollowedBy newline *> spaceChar)
-- not exhaustive but we know listBlock is returning a List
- unwrap (List l) = l
+ unwrap (List l _) = l
orderedListBlock :: (Logger m, Characters s) => Int -> Parser s m Element
-orderedListBlock = listBlock Ordered ordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level))
+orderedListBlock = listBlock Ordered {style = Nothing, start_number = Nothing} ordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level))
where
-- regex equivalent: [0-9]+[.)]\s?
ordered_prefix = (some digitChar) *> (char '.' <|> char ')') *> (notFollowedBy newline *> spaceChar)
-- not exhaustive but we know listBlock is returning a List
- unwrap (List l) = l
+ unwrap (List l _) = l
htmlBlock :: forall m s. (Logger m, Characters s) => Parser s m Element
htmlBlock = do
@@ -216,4 +226,6 @@ htmlBlock = do
pure $ mconcat [name, "=\"", value, "\""]
paragraphBlock :: (Logger m, Characters s) => Parser s m Element
-paragraphBlock = Paragraph . P <$> (many ((notFollowedBy blockEnding) *> inlineText))
+paragraphBlock = do
+ content <- P <$> many ((notFollowedBy blockEnding) *> inlineText)
+ pure $ Paragraph content emptyAttrs