Compare commits

..

3 commits

5 changed files with 61 additions and 135 deletions

View file

@ -15,14 +15,14 @@ compileToHTML (Doc elements) = T.concat $ map elementToHTML elements
elementToHTML :: Element -> T.Text
elementToHTML (Heading (H {level, text})) = T.concat ["<h", tshow level, ">", serializeInlineToHTML text, "</h", tshow level, ">"]
--
elementToHTML (Code (C {language = m_language, code})) = T.concat ["<pre class=\"sourceCode ", language, "\"><code class=\"sourceCode ", language, "\">"]
elementToHTML (Code (C {language = m_language, code})) = T.concat ["<pre class=\"sourceCode ", language, "\"><code class=\"sourceCode ", language, "\">", code, "</code>"]
where
language = fromMaybe "" m_language
elementToHTML (BlockQuote (Q elems)) = T.concat ["<blockquote>", serializeInlineToHTML elems, "</blockquote>"]
elementToHTML (List (L {list_type = Ordered, items})) = T.concat ["<ol>", generateLiElems items, "</ol>"]
elementToHTML (List (L {list_type = Unordered, items})) = T.concat ["<ul>", generateLiElems items, "</ul>"]
elementToHTML (HTML (HTMLTag {tagName, attributes, html_content})) = T.concat ["<", tagName, T.concat $ map (\(name, value) -> T.concat [name, "=", "\"", fromMaybe "" value, "\""]) attributes, ">", html_content, "</", tagName, ">"]
elementToHTML (Paragraph (P snippets)) = serializeInlineToHTML snippets
elementToHTML (HTML (HTMLTag {html_content})) = html_content
elementToHTML (Paragraph (P snippets)) = T.concat ["<p>", serializeInlineToHTML snippets, "</p>"]
elementToHTML HorizontalRule = "<hr>"
generateLiElems :: [ListItem] -> T.Text
@ -34,11 +34,16 @@ generateLiElems (LI {content, children} : remainder) =
-- if they aren't this is fucked
serializeInlineToHTML content,
T.concat $ map (elementToHTML . List) children,
"</li>"
"</li>",
generateLiElems remainder
]
serializeInlineToHTML :: [InlineText] -> T.Text
serializeInlineToHTML [] = ""
serializeInlineToHTML (Text t : rem) = t <> serializeInlineToHTML rem
serializeInlineToHTML (Bold elems : rem) = T.concat ["<b>", serializeInlineToHTML elems, "</b>", serializeInlineToHTML rem]
serializeInlineToHTML (Italic elems : rem) = T.concat ["<i>", serializeInlineToHTML elems, "</i>", serializeInlineToHTML rem]
serializeInlineToHTML (Text t : remaining) = t <> serializeInlineToHTML remaining
serializeInlineToHTML (Bold elems : remaining) = T.concat ["<b>", serializeInlineToHTML elems, "</b>", serializeInlineToHTML remaining]
serializeInlineToHTML (Italic elems : remaining) = T.concat ["<i>", serializeInlineToHTML elems, "</i>", serializeInlineToHTML remaining]
serializeInlineToHTML (InlineCode code : remaining) = T.concat ["<code>", code, "</code>", serializeInlineToHTML remaining]
serializeInlineToHTML (Link {linkText, url, title} : remaining) = T.concat ["<a href=\"", url, "\" ", maybe "" (\t -> T.concat ["title=\"", t, "\""]) title, "\">", serializeInlineToHTML linkText, "</a>", serializeInlineToHTML remaining]
serializeInlineToHTML (Image {altText, url, title} : remaining) = T.concat ["<img src=\">", url, "\" alt=\"", altText, "\"", maybe "" (\t -> T.concat ["title=\"", t, "\""]) title, ">", serializeInlineToHTML remaining]
serializeInlineToHTML (HTMLInline {inline_html_content} : remaining) = inline_html_content <> serializeInlineToHTML remaining

View file

@ -3,6 +3,7 @@ module IR where
import Data.Text
newtype Document = Doc [Element]
deriving (Show)
data Element
= Heading Heading
@ -12,6 +13,7 @@ data Element
| HTML HTML
| Paragraph Paragraph
| HorizontalRule
deriving (Show)
-- Removed: BlankLine
@ -19,34 +21,37 @@ data Heading = H
{ level :: Int,
text :: [InlineText]
}
deriving (Show)
data Code = C
{ language :: Maybe Text,
code :: Text
}
deriving (Show)
data BlockQuote = Q [InlineText]
data BlockQuote = Q [InlineText] deriving (Show)
data ListItem = LI
{ content :: [InlineText], -- Flatten continuations into here
children :: [List]
}
deriving (Show)
data ListType = Ordered | Unordered
data ListType = Ordered | Unordered deriving (Show)
data List = L
{ list_type :: ListType,
items :: [ListItem]
}
deriving (Show)
data HTML
= HTMLTag
{ tagName :: Text,
attributes :: [(Text, Maybe Text)],
html_content :: Text
{ html_content :: Text
}
deriving (Show)
newtype Paragraph = P [InlineText]
newtype Paragraph = P [InlineText] deriving (Show)
data InlineText
= Text Text -- Combined Normal and Escaped
@ -63,10 +68,8 @@ data InlineText
url :: Text,
title :: Maybe Text
}
| HTMLInline
{ inlineTagName :: Text,
inlineAttributes :: [(Text, Maybe Text)]
}
| HTMLInline {inline_html_content :: Text}
deriving (Show)
-- for processing math
-- https://hackage.haskell.org/package/typst-0.6.1/docs/Typst-Parse.html#v:parseTypst

View file

@ -2,10 +2,11 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module Markdown (document, metadata) where
-- (document, metadata)
module Markdown where
import Control.Applicative (many, optional, some, (<|>))
import Control.Monad (guard, void, when)
import Control.Monad (guard, void)
import Data.Char (isAlpha)
import Data.Text (Text)
import qualified Data.Text as T
@ -174,37 +175,12 @@ orderedListItem = do
-- HTML Block
htmlBlock :: Parser Element
htmlBlock = char '<' *> choice [try htmlCommentBlock, try htmlDeclarationBlock, htmlTagBlock]
htmlCommentBlock :: Parser Element
htmlCommentBlock = do
string "!--"
content <- manyTill anyChar (try $ string "-->")
lineEnding
pure $ HTML $ HTMLTag "!--" [] (T.pack content)
htmlDeclarationBlock :: Parser Element
htmlDeclarationBlock = do
char '!'
decl <- some (satisfy isAlpha)
rest <- many (noneOf ">\n\r")
char '>'
lineEnding
pure $ HTML $ HTMLTag (T.pack $ "!" ++ decl) [] (T.pack rest)
htmlTagBlock :: Parser Element
htmlTagBlock = do
name <- Markdown.tagName
attrs <- many (try $ wsParser >> attribute)
optional wsParser
selfClose <- option False (char '/' >> pure True)
char '>'
content <-
if selfClose
then pure ""
else manyTill anyChar (try $ string "</" >> string name >> char '>')
when (not selfClose) lineEnding
pure $ HTML $ HTMLTag (T.pack name) attrs (T.pack content)
htmlBlock = do
char '<'
-- Capture the entire HTML block as raw text
rest <- manyTill anyChar (try $ char '>' >> lineEnding)
let content = '<' : rest
return $ HTML $ HTMLTag (T.pack content)
tagName :: Parser String
tagName = do
@ -243,10 +219,10 @@ paragraphBlock = do
notFollowedBy (string "```" <|> string "~~~")
notFollowedBy (count 4 (char ' ' <|> char '\t'))
notFollowedBy (count 3 (char '*') <|> count 3 (char '-') <|> count 3 (char '_'))
notFollowedBy (char '<')
-- notFollowedBy (char '<')
content <- some inlineElement
lineEnding
lineEnding <|> eof
pure $ Paragraph $ P content
-- Inline Elements
@ -353,10 +329,10 @@ image :: Parser InlineText
image = do
char '!'
char '['
alt <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
alt <- T.pack <$> many (noneOf "]\n\r")
char ']'
(url, title) <- linkDestination
pure $ Image alt url title
return $ Image {altText = alt, url = url, title = title}
-- Link
link :: Parser InlineText
@ -407,13 +383,9 @@ titleParser =
-- HTML Inline
htmlInline :: Parser InlineText
htmlInline = do
char '<'
name <- Markdown.tagName
attrs <- many (try $ wsParser >> attribute)
optional wsParser
_ <- option False (char '/' >> pure True)
char '>'
pure $ HTMLInline (T.pack name) attrs
start <- char '<'
content <- manyTill anyChar (try $ char '>')
return $ HTMLInline (T.pack (start : content ++ ">"))
-- Escaped Character
escapedChar :: Parser InlineText
@ -428,14 +400,14 @@ plainText = Text . T.pack <$> some plainTextChar
plainTextChar :: Parser Char
plainTextChar = satisfy $ \c ->
(c `notElem` ("*_`[!<\\\n\r" :: String)) && c >= ' '
(c `notElem` ("*_`[<\\\n\r" :: String)) && c >= ' '
plainTextNoAsterisk :: Parser InlineText
plainTextNoAsterisk =
Text . T.pack
<$> some
( satisfy $ \c ->
(c `notElem` ("*_`[!<\\\n\r" :: String)) && c >= ' '
(c `notElem` ("*_`[<\\\n\r" :: String)) && c >= ' '
)
plainTextNoUnderscore :: Parser InlineText
@ -443,7 +415,7 @@ plainTextNoUnderscore =
Text . T.pack
<$> some
( satisfy $ \c ->
not (c `elem` ("_*`[!<\\\n\r" :: String)) && c >= ' '
not (c `elem` ("_*`[<\\\n\r" :: String)) && c >= ' '
)
plainTextNoBracket :: Parser InlineText
@ -451,7 +423,7 @@ plainTextNoBracket =
Text . T.pack
<$> some
( satisfy $ \c ->
not (c `elem` ("]_*`[!<\\\n\r" :: String)) && c >= ' '
not (c `elem` ("]_*`[<\\\n\r" :: String)) && c >= ' '
)
-- Helper Parsers

View file

@ -2,8 +2,6 @@ module Utilities where
import Config
import Control.Monad (filterM)
import Data.Aeson (Result (Error, Success))
import qualified Data.Aeson as A
import Data.List (find)
import Data.Text (Text)
import qualified Data.Text as T
@ -17,8 +15,6 @@ import Development.Shake.FilePath ((<.>), (</>))
import qualified Development.Shake.FilePath as FP
import HTML
import Markdown
import Text.Pandoc (Block (Plain), Meta (..), MetaValue (..), Pandoc (..))
import qualified Text.Pandoc as Pandoc
import Text.Parsec hiding (Error)
import Types
@ -41,79 +37,29 @@ indexHtmlMarkdownSourcePath =
markdownToHtml :: (FromJSON a) => FilePath -> Action (a, Text)
markdownToHtml filePath = do
content <- Shake.readFile' filePath
let Right (metadataText, document) = parse (liftA2 (,) Markdown.metadata Markdown.document) filePath content
let Right metadata = decodeEither' $ encodeUtf8 metadataText
let (metadataText, document) = case parse (liftA2 (,) Markdown.metadata Markdown.document) filePath content of
Right (a, b) -> (a, b)
Left e -> error $ show e
let metadata = case decodeEither' $ encodeUtf8 metadataText of
Right m -> m
Left e -> error $ show e
pure (metadata, compileToHTML document)
markdownToHtml_ filePath = do
content <- Shake.readFile' filePath
Shake.quietly . Shake.traced "Markdown to HTML" $ do
pandoc@(Pandoc meta _) <-
runPandoc . Pandoc.readMarkdown readerOptions . T.pack $ content
-- WARNING markdown needs to have no whitespace before/after dashes
-- print meta
meta' <- fromMeta meta
html <- runPandoc . Pandoc.writeHtml5String writerOptions $ pandoc
return (meta', html)
where
readerOptions =
Pandoc.def
{ Pandoc.readerStandalone = True,
Pandoc.readerExtensions = Pandoc.enableExtension Pandoc.Ext_yaml_metadata_block Pandoc.pandocExtensions
}
writerOptions =
Pandoc.def {Pandoc.writerExtensions = Pandoc.pandocExtensions}
fromMeta (Meta meta) =
A.fromJSON . A.toJSON <$> traverse metaValueToJSON meta >>= \case
Success res -> pure res
Error err -> fail $ "json conversion error:" <> err
metaValueToJSON = \case
MetaMap m -> A.toJSON <$> traverse metaValueToJSON m
MetaList m -> A.toJSONList <$> traverse metaValueToJSON m
MetaBool m -> pure $ A.toJSON m
MetaString m -> pure $ A.toJSON $ T.strip m
MetaInlines m -> metaValueToJSON $ MetaBlocks [Plain m]
MetaBlocks m ->
fmap (A.toJSON . T.strip)
. runPandoc
. Pandoc.writePlain Pandoc.def
$ Pandoc mempty m
runPandoc :: Pandoc.PandocIO b -> IO b
runPandoc action =
Pandoc.runIO (Pandoc.setVerbosity Pandoc.ERROR >> action)
>>= either (fail . show) return
now :: Action T.Text
now = Shake.liftIO $ fmap (T.pack . iso8601Show) getCurrentTime
markdownToPost :: FilePath -> Action Post
markdownToPost path = do
content <- Shake.readFile' path
(Pandoc meta _) <-
Shake.liftIO . runPandoc . Pandoc.readMarkdown readerOptions . T.pack $ content
Shake.liftIO $ fromMeta meta
where
readerOptions =
Pandoc.def
{ Pandoc.readerStandalone = True,
Pandoc.readerExtensions = Pandoc.enableExtension Pandoc.Ext_yaml_metadata_block Pandoc.pandocExtensions
}
fromMeta (Meta meta) =
A.fromJSON . A.toJSON <$> traverse metaValueToJSON meta >>= \case
Success res -> pure res
Error err -> fail $ "json conversion error:" <> err
metaValueToJSON = \case
MetaMap m -> A.toJSON <$> traverse metaValueToJSON m
MetaList m -> A.toJSONList <$> traverse metaValueToJSON m
MetaBool m -> pure $ A.toJSON m
MetaString m -> pure $ A.toJSON $ T.strip m
MetaInlines m -> metaValueToJSON $ MetaBlocks [Plain m]
MetaBlocks m ->
fmap (A.toJSON . T.strip)
. runPandoc
. Pandoc.writePlain Pandoc.def
$ Pandoc mempty m
-- TODO: error handling
let postData = case parse Markdown.metadata path content of
Right p -> p
Left e -> error $ show e
let post = case decodeEither' $ encodeUtf8 postData of
Right p -> p
Left e -> error $ show e
pure post
yamlToPost :: FilePath -> Action Post
yamlToPost path = do

View file

@ -35,7 +35,7 @@ executable psb
-- Other library packages from which modules are imported.
-- https://hackage.haskell.org/package/texmath
build-depends: base >=4.17.2.1, mustache >=2.4.2, pandoc >=3.2.1, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text, time, unordered-containers, yaml, parsec >= 3.1.18.0, typst >= 0.6.1, typst-symbols >= 0.1.7
build-depends: base >=4.17.2.1, mustache >=2.4.2, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text, time, unordered-containers, yaml, parsec >= 3.1.18.0
-- Directories containing source files.
hs-source-dirs: app