diff --git a/psb.cabal b/psb.cabal index eff131a..44c0b79 100644 --- a/psb.cabal +++ b/psb.cabal @@ -36,7 +36,7 @@ test-suite test-markdown-parse hs-source-dirs: tests type: exitcode-stdio-1.0 main-is: Markdown/Parse.hs - build-depends: base >=4.20 && < 4.21, text >= 2.1.2 , megaparsec >= 9.7.0 , transformers >= 0.6.2 , hedgehog >= 1.7 , time, psb + build-depends: base, text, megaparsec, transformers, hedgehog, time, psb default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances default-language: Haskell2010 diff --git a/src/HTML.hs b/src/HTML.hs index 249f846..42efb85 100644 --- a/src/HTML.hs +++ b/src/HTML.hs @@ -34,37 +34,63 @@ genHeaderId header = textSub ' ' = '-' textSub c = c -genHeaderClasses :: Heading -> T.Text -genHeaderClasses = const "" - tshow :: (Show s) => s -> T.Text tshow = T.pack . show compileToHTML :: Document -> T.Text compileToHTML (Doc elements) = T.concat $ map elementToHTML elements +elementsToHTML :: [Element] -> T.Text +elementsToHTML = T.concat . map elementToHTML + elementToHTML :: Element -> T.Text -elementToHTML (Heading header) = T.concat ["", serializeInlineToHTML header.text, ""] +elementToHTML (Heading header attrs) = T.concat ["", serializeInlineToHTML header.text, ""] -- elementToHTML (Code code_block) = T.concat ["
", escapeText code_block.code, "", "
"] where language = fromMaybe "" code_block.language -elementToHTML (BlockQuote (Q elems)) = T.concat ["
", serializeInlineToHTML elems, "
"] -elementToHTML (List (L {list_type = Ordered, items})) = T.concat ["
    ", generateLiElems items, "
"] -elementToHTML (List (L {list_type = Unordered, items})) = T.concat [""] +elementToHTML (BlockQuote (Q elems)) = T.concat ["
", elementsToHTML elems, "
"] +elementToHTML (List (L {list_type = Ordered {start_number, style}, items}) attrs) = T.concat ["", generateLiElems items, ""] +elementToHTML (List (L {list_type = Unordered {style}, items}) attrs) = T.concat ["", generateLiElems items, ""] elementToHTML (HTML (HTMLTag {html_content})) = html_content -elementToHTML (Paragraph (P snippets)) = T.concat ["

", serializeInlineToHTML snippets, "

"] +elementToHTML (Paragraph (P snippets) attrs) = T.concat ["", serializeInlineToHTML snippets, "

"] elementToHTML HorizontalRule = "
" +elementToHTML (Table _ _) = error "TODO" +elementToHTML (Container _ _) = error "TODO" +elementToHTML (Footnote _ _) = error "TODO" +elementToHTML (DescriptionList _ _) = error "TODO" +elementToHTML (RawBlock _ _) = error "TODO" +elementToHTML (TaskList _ _) = error "TODO" + +handleStyle :: T.Text -> T.Text +handleStyle style = T.concat ["type=\"", style, "\""] + +handleStart :: Int -> T.Text +handleStart start = T.concat ["start=\"", T.show start, "\""] + +handleAttrs :: Attrs -> T.Text +handleAttrs Attrs {attrId, attrClasses, attrKV} = + T.concat + [ maybe "" (\id -> "id=\"" <> id <> "\"") attrId, + "class=\"" <> T.intercalate " " attrClasses <> "\"", + T.intercalate " " $ map (\(key, val) -> key <> "=\"" <> val <> "\"") attrKV + ] + +headerAttrs :: Heading -> Attrs -> T.Text +headerAttrs header Attrs {attrId, attrClasses, attrKV} = + T.concat + -- id is overcomplicated due to header having id logic written I don't wanna bother with + [ maybe (genHeaderId header) (\id -> "id=\"" <> id <> "\"") attrId, + "class=\"" <> T.intercalate " " attrClasses <> "\"", + T.intercalate " " $ map (\(key, val) -> key <> "=\"" <> val <> "\"") attrKV + ] generateLiElems :: [ListItem] -> T.Text generateLiElems [] = "" generateLiElems (element : remainder) = T.concat [ "
  • ", - -- We assume child lists are stricly after our contents - -- if they aren't this is fucked - serializeInlineToHTML element.content, - fromMaybe "" $ fmap (elementToHTML . List) element.child, + elementsToHTML element.content, "
  • ", generateLiElems remainder ] @@ -72,10 +98,19 @@ generateLiElems (element : remainder) = serializeInlineToHTML :: [InlineText] -> T.Text serializeInlineToHTML [] = "" serializeInlineToHTML (Text t : remaining) = escapeText t <> serializeInlineToHTML remaining -serializeInlineToHTML (Bold elems : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining] -serializeInlineToHTML (Italic elems : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining] -serializeInlineToHTML (Crossed elems : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining] -serializeInlineToHTML (InlineCode code : remaining) = T.concat ["", escapeText code, "", serializeInlineToHTML remaining] -serializeInlineToHTML (Link {linkText, url, title} : remaining) = T.concat [" T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML linkText, "", serializeInlineToHTML remaining] -serializeInlineToHTML (Image {altText, url, title} : remaining) = T.concat ["\"", T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML remaining] +serializeInlineToHTML ((Bold elems attrs) : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining] +serializeInlineToHTML ((Italic elems attrs) : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining] +serializeInlineToHTML ((Crossed elems attrs) : remaining) = T.concat ["", serializeInlineToHTML elems, "", serializeInlineToHTML remaining] +serializeInlineToHTML ((InlineCode code attrs) : remaining) = T.concat ["", escapeText code, "", serializeInlineToHTML remaining] +serializeInlineToHTML (Link {linkText, url, title, misc_attrs} : remaining) = T.concat [" T.concat ["title=\"", escapeText t, "\""]) title, ">", serializeInlineToHTML linkText, "", serializeInlineToHTML remaining] +serializeInlineToHTML (Image {altText, url, title, misc_attrs} : remaining) = T.concat ["\"", T.concat ["title=\"", escapeText t, "\""]) title, handleAttrs misc_attrs, ">", serializeInlineToHTML remaining] serializeInlineToHTML (HTMLInline {inline_html_content} : remaining) = inline_html_content <> serializeInlineToHTML remaining +serializeInlineToHTML ((Superscript _ _) : remaining) = error "TODO" +serializeInlineToHTML ((Subscript _ _) : remaining) = error "TODO" +serializeInlineToHTML ((Highlighted _ _) : remaining) = error "TODO" +serializeInlineToHTML ((Insert _ _) : remaining) = error "TODO" +serializeInlineToHTML ((Math _ _) : remaining) = error "TODO" +serializeInlineToHTML ((FootnoteReference {}) : remaining) = error "TODO" +serializeInlineToHTML ((Symbol _) : remaining) = error "TODO" +serializeInlineToHTML ((RawInline _ _) : remaining) = error "TODO" +serializeInlineToHTML ((Span _ _) : remaining) = error "TODO" diff --git a/src/IR.hs b/src/IR.hs index e9dbe63..6ecbc3f 100644 --- a/src/IR.hs +++ b/src/IR.hs @@ -14,12 +14,10 @@ data Element HTML HTML | Paragraph Paragraph Attrs | HorizontalRule - | -- TODO - Table Table Attrs + | Table Table Attrs | -- Djot ::: Container [Element] Attrs - | -- TODO - Footnote Footnote Attrs + | Footnote Footnote Attrs | DescriptionList DescriptionList Attrs | RawBlock RawBlock Attrs | TaskList TaskList Attrs @@ -39,15 +37,15 @@ data Code = C } deriving (Show) -data BlockQuote = Q [Element] deriving (Show) +newtype BlockQuote = Q [Element] deriving (Show) -data ListItem = LI - { content :: [Element], -- Flatten continuations into here - child :: Maybe List +newtype ListItem = LI + -- children are just more elements + { content :: [Element] -- Flatten continuations into here } deriving (Show) -data ListType = Ordered {start_number :: Text, style :: Text} | Unordered {style :: Text} deriving (Show) +data ListType = Ordered {start_number :: Maybe Int, style :: Maybe Text} | Unordered {style :: Maybe Text} deriving (Show) data List = L { list_type :: ListType, @@ -64,7 +62,7 @@ newtype HTML newtype Paragraph = P [InlineText] deriving (Show) data InlineText - = Text Text Attrs -- Combined Normal and Escaped + = Text Text -- Combined Normal and Escaped | Bold [InlineText] Attrs | Italic [InlineText] Attrs | Crossed [InlineText] Attrs diff --git a/src/Markdown.hs b/src/Markdown.hs index 7c682a9..8f69d01 100644 --- a/src/Markdown.hs +++ b/src/Markdown.hs @@ -65,16 +65,24 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow where between' disallow start end middle_piece = between start end $ many ((notFollowedBy ((try $ void end) <|> disallow)) *> middle_piece) - strikethrough disallow = Crossed <$> (between' disallow (string "~~") (void $ string "~~") (inlineText' (disallow <|> (void $ string "~~")))) + strikethrough disallow = do + content <- between' disallow (string "~~") (void $ string "~~") (inlineText' (disallow <|> (void $ string "~~"))) + pure $ Crossed content emptyAttrs - bold disallow = Bold <$> (between' disallow (string "**") (disallow <|> (void $ string "**")) (inlineText' (disallow <|> (void $ string "**")))) + bold disallow = do + content <- between' disallow (string "**") (disallow <|> (void $ string "**")) (inlineText' (disallow <|> (void $ string "**"))) + pure $ Bold content emptyAttrs italic :: (HasCallStack) => Parser s m () -> Parser s m InlineText - italic disallow = Italic <$> (between' disallow (char '*') ((void $ char '*') <|> disallow) (inlineText' (disallow <|> (void $ char '*')))) + italic disallow = do + content <- between' disallow (char '*') ((void $ char '*') <|> disallow) (inlineText' (disallow <|> (void $ char '*'))) + pure $ Italic content emptyAttrs underlined disallow = Underlined <$> (between' disallow (string "__") ((void $ string "__") <|> disallow) (inlineText' (disallow <|> (void $ string "__")))) - code = InlineCode . T.pack <$> (between' disallow (char '`') (char '`') ((notFollowedBy lineEnding) *> anySingle)) + code = do + contents <- T.pack <$> between' disallow (char '`') (char '`') (notFollowedBy lineEnding *> anySingle) + pure $ InlineCode contents emptyAttrs link :: (HasCallStack) => Parser s m () -> Parser s m InlineText link disallow = do linkText <- between' disallow (char '[') ((void $ char ']') <|> disallow) (inlineText' (disallow <|> (void $ char ']'))) @@ -88,7 +96,7 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow Nothing -> pure Nothing char ')' pure (url, title) - pure Link {linkText, url, title} + pure Link {linkText, url, title, misc_attrs = emptyAttrs} image disallow = do logDebug "image:before excl" @@ -102,7 +110,7 @@ inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow Link {linkText = [Text altText], url, title} -> pure (altText, url, title) Link {linkText = [], url, title} -> pure ("", url, title) _ -> fail "Image alt text must be normal text, cannot be stylized in any way" - pure Image {altText, url, title} + pure Image {altText, url, title, misc_attrs = emptyAttrs} inline_html = HTMLInline <$> do @@ -125,7 +133,7 @@ headingBlock = do heading_level <- length <$> (some $ char '#') optional spaceChar text <- many ((notFollowedBy blockEnding) *> inlineText) - pure $ Heading $ H {level = heading_level, text} + pure $ Heading (H {level = heading_level, text}) emptyAttrs fencedCodeBlock :: (Logger m, Characters s) => Parser s m Element fencedCodeBlock = between (string "```") (string "```") $ do @@ -136,7 +144,9 @@ fencedCodeBlock = between (string "```") (string "```") $ do pure $ Code $ C {language, code} blockquoteBlock :: (Logger m, Characters s) => Parser s m Element -blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine) +blockquoteBlock = do + content <- Q . concat <$> some blockquoteLine + pure $ BlockQuote content where blockquoteLine = do char '>' @@ -145,7 +155,7 @@ blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine) -- this dance with optional and notFollowedBy is done so we -- aren't accidentally consuming part of a block ending (optional ((notFollowedBy blockEnding) *> lineEnding)) - pure ret + pure [(Paragraph $ P ret) emptyAttrs] -- type of list the parser returns -- parser which grabs the prefix for each item of the list @@ -154,7 +164,7 @@ blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine) listBlock :: (Logger m, Characters s) => ListType -> Parser s m prefix -> (Int -> Parser s m List) -> Int -> Parser s m Element listBlock list_type prefix child_parser_factory nest_level = do items <- some $ listItem - pure $ List $ L {list_type, items} + pure $ List (L {list_type, items}) emptyAttrs where listItem = do count nest_level ((try $ void $ char '\t') <|> (void $ (count 4 $ char ' '))) @@ -164,23 +174,23 @@ listBlock list_type prefix child_parser_factory nest_level = do optional ((notFollowedBy blockEnding) *> lineEnding) child <- optional $ child_parser_factory $ nest_level + 1 - - pure $ LI {content, child} + error "TODO, child list handling works different now, child needs to be combined into content" + pure $ LI {content = [Paragraph (P content) emptyAttrs]} unorderedListBlock :: (Logger m, Characters s) => Int -> Parser s m Element -unorderedListBlock = listBlock Unordered unordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level)) +unorderedListBlock = listBlock Unordered {style = Nothing} unordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level)) where unordered_prefix = (choice $ map char "*-+") *> (notFollowedBy newline *> spaceChar) -- not exhaustive but we know listBlock is returning a List - unwrap (List l) = l + unwrap (List l _) = l orderedListBlock :: (Logger m, Characters s) => Int -> Parser s m Element -orderedListBlock = listBlock Ordered ordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level)) +orderedListBlock = listBlock Ordered {style = Nothing, start_number = Nothing} ordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level)) where -- regex equivalent: [0-9]+[.)]\s? ordered_prefix = (some digitChar) *> (char '.' <|> char ')') *> (notFollowedBy newline *> spaceChar) -- not exhaustive but we know listBlock is returning a List - unwrap (List l) = l + unwrap (List l _) = l htmlBlock :: forall m s. (Logger m, Characters s) => Parser s m Element htmlBlock = do @@ -216,4 +226,6 @@ htmlBlock = do pure $ mconcat [name, "=\"", value, "\""] paragraphBlock :: (Logger m, Characters s) => Parser s m Element -paragraphBlock = Paragraph . P <$> (many ((notFollowedBy blockEnding) *> inlineText)) +paragraphBlock = do + content <- P <$> many ((notFollowedBy blockEnding) *> inlineText) + pure $ Paragraph content emptyAttrs