Compare commits

..

3 commits

5 changed files with 455 additions and 67 deletions

9
app/HTML.hs Normal file
View file

@ -0,0 +1,9 @@
{-# LANGUAGE OverloadedStrings #-}
module HTML (compileToHTML) where
import Data.Text
import IR
compileToHTML :: Document -> Text
compileToHTML = const ""

View file

@ -148,8 +148,7 @@ rss =
-- Shake.putInfo $ "Built " <> target -- Shake.putInfo $ "Built " <> target
readPost :: FilePath -> Action Post readPost :: FilePath -> Action Post
readPost postPath = do readPost postPath = case FP.takeExtension postPath of
case FP.takeExtension postPath of
".md" -> readMarkdownPost postPath ".md" -> readMarkdownPost postPath
_ -> error $ "unknown file extension for file" <> postPath _ -> error $ "unknown file extension for file" <> postPath

View file

@ -2,91 +2,461 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
module Markdown (markdownParser) where module Markdown (document, metadata) where
import Data.Functor import Control.Applicative (many, optional, some, (<|>))
import Data.Text import Control.Monad (guard, void, when)
import Data.Char (isAlpha)
import Data.Text (Text)
import qualified Data.Text as T
import IR import IR
import Text.Parsec import Text.Parsec hiding (Line, many, optional, (<|>))
import Text.Parsec.Combinator import Text.Parsec.String (Parser)
type Parser a = forall s u m. (Stream s m Char) => ParsecT s u m a metadata :: Parser Text
metadata = bound *> fmap T.pack (many $ try (char '-' <* notFollowedBy (string "--")) <|> noneOf "-") <* bound
where
bound = string "---"
markdownParser :: Parser Document document :: Parser Document
markdownParser = Doc <$> many block document = Doc <$> many element <* eof
block :: Parser Element element :: Parser Element
block = choice [heading, codeBlock, quoteBlock, list, table, htmlBlock, paragraph, blankLine] element =
choice
[ try headingBlock,
try fencedCodeBlock,
try indentedCodeBlock,
try blockquoteBlock,
try unorderedListBlock,
try orderedListBlock,
try horizontalRuleBlock,
try htmlBlock,
try blankLines, -- Consume blank lines but don't add to AST
paragraphBlock
]
heading :: Parser Element -- Blank lines (consumed but not stored)
heading = pure $ Heading $ H {level = 1, text = ""} blankLines :: Parser Element
blankLines = do
skipMany1 blankLine
element -- Parse the next element
codeBlock :: Parser Element blankLine :: Parser ()
codeBlock = pure $ Code $ C {language = "", code = ""} blankLine = do
many (char ' ' <|> char '\t')
lineEnding
pure ()
quoteBlock :: Parser Element -- Heading Block
quoteBlock = pure $ BlockQuote $ Q "" headingBlock :: Parser Element
headingBlock = do
hashes <- some (char '#')
let level = length hashes
guard (level <= 6)
many (char ' ' <|> char '\t')
content <- manyTill inlineElement (try lineEnding)
pure $ Heading $ H level content
list :: Parser Element -- Fenced Code Block
list = pure $ List $ L {list_type = Ordered, items = []} fencedCodeBlock :: Parser Element
fencedCodeBlock = do
fence <- string "```" <|> string "~~~"
lang <- optionMaybe languageInfo
lineEnding
codeLines <- manyTill codeLine (try $ string fence >> lineEnding)
pure $ Code $ C lang (T.pack $ unlines codeLines)
table :: Parser Element languageInfo :: Parser Text
table = pure $ Table $ T {header = TH [], rows = []} languageInfo = T.pack <$> many1 (alphaNum <|> char '-' <|> char '+' <|> char '.')
codeLine :: Parser String
codeLine = do
line <- many (noneOf "\n\r")
lineEnding
pure line
-- Indented Code Block
indentedCodeBlock :: Parser Element
indentedCodeBlock = do
lines' <- some indentedLine
pure $ Code $ C Nothing (T.pack $ unlines lines')
where
indentedLine = do
count 4 (char ' ' <|> char '\t')
line <- many (noneOf "\n\r")
lineEnding
pure line
-- Blockquote Block
blockquoteBlock :: Parser Element
blockquoteBlock = do
lines' <- some blockquoteLine
pure $ BlockQuote $ Q (concat lines')
where
blockquoteLine = do
char '>'
optional (char ' ')
content <- manyTill inlineElement (try lineEnding)
pure content
-- Horizontal Rule Block
horizontalRuleBlock :: Parser Element
horizontalRuleBlock = do
choice
[ try (count 3 (char '*') >> many (char ' ' <|> char '*')),
try (count 3 (char '-') >> many (char ' ' <|> char '-')),
try (count 3 (char '_') >> many (char ' ' <|> char '_'))
]
lineEnding
pure HorizontalRule
-- Unordered List Block
unorderedListBlock :: Parser Element
unorderedListBlock = do
items <- some unorderedListItem
pure $ List $ L Unordered items
unorderedListItem :: Parser ListItem
unorderedListItem = do
oneOf "*-+"
char ' ' <|> char '\t'
content <- manyTill inlineElement (try lineEnding)
continuations <- many listContinuation
children <- many (try indentedList)
pure $ LI (content ++ concat continuations) children
listContinuation :: Parser [InlineText]
listContinuation = do
count 2 (char ' ' <|> char '\t')
many (char ' ' <|> char '\t')
notFollowedBy (oneOf "*-+")
notFollowedBy (digit >> char '.')
content <- manyTill inlineElement (try lineEnding)
pure content
indentedList :: Parser List
indentedList = do
void $ count 2 (char ' ' <|> char '\t')
choice [try indentedUnorderedList, indentedOrderedList]
indentedUnorderedList :: Parser List
indentedUnorderedList = do
items <- some (try $ indentedListItem (oneOf "*-+" >> void (char ' ' <|> char '\t')))
pure $ L Unordered items
indentedOrderedList :: Parser List
indentedOrderedList = do
items <- some (try $ indentedListItem (some digit >> char '.' >> void (char ' ' <|> char '\t')))
pure $ L Ordered items
indentedListItem :: Parser () -> Parser ListItem
indentedListItem marker = do
marker
content <- manyTill inlineElement (try lineEnding)
pure $ LI content []
-- Ordered List Block
orderedListBlock :: Parser Element
orderedListBlock = do
items <- some orderedListItem
pure $ List $ L Ordered items
orderedListItem :: Parser ListItem
orderedListItem = do
some digit
char '.'
char ' ' <|> char '\t'
content <- manyTill inlineElement (try lineEnding)
continuations <- many listContinuation
children <- many (try indentedList)
pure $ LI (content ++ concat continuations) children
-- HTML Block
htmlBlock :: Parser Element htmlBlock :: Parser Element
htmlBlock = pure $ HTML $ Raw "" htmlBlock = char '<' *> choice [try htmlCommentBlock, try htmlDeclarationBlock, htmlTagBlock]
paragraph :: Parser Element htmlCommentBlock :: Parser Element
paragraph = do htmlCommentBlock = do
first <- paragraphLine string "!--"
rem <- many paragraphContinuation content <- manyTill anyChar (try $ string "-->")
let combined = Prelude.concat (first : rem) lineEnding
pure $ Paragraph $ P combined pure $ HTML $ HTMLTag "!--" [] (T.pack content)
paragraphLine :: Parser [InlineText] htmlDeclarationBlock :: Parser Element
paragraphLine = many inlineText <* endOfLine htmlDeclarationBlock = do
char '!'
decl <- some (satisfy isAlpha)
rest <- many (noneOf ">\n\r")
char '>'
lineEnding
pure $ HTML $ HTMLTag (T.pack $ "!" ++ decl) [] (T.pack rest)
paragraphContinuation :: Parser [InlineText] htmlTagBlock :: Parser Element
paragraphContinuation = notFollowedBy blockElemStart *> paragraphLine htmlTagBlock = do
name <- Markdown.tagName
attrs <- many (try $ wsParser >> attribute)
optional wsParser
selfClose <- option False (char '/' >> pure True)
char '>'
content <-
if selfClose
then pure ""
else manyTill anyChar (try $ string "</" >> string name >> char '>')
when (not selfClose) lineEnding
pure $ HTML $ HTMLTag (T.pack name) attrs (T.pack content)
inlineText :: Parser InlineText tagName :: Parser String
inlineText = choice [emphasis, strong, inlineCode, link, image, inlineHTML, escapedChar, plainText] tagName = do
first <- satisfy isAlpha
rest <- many (alphaNum <|> char '-' <|> char ':')
pure (first : rest)
plainText :: Parser InlineText attribute :: Parser (Text, Maybe Text)
-- abnf is very specific about what's allowed due to actual ABNF not allowing negation but I'm lazy attribute = do
plainText = fmap (Normal . pack) $ many $ noneOf "*_`[]()<>#+-.!&\\\n" name <- attributeName
value <- optionMaybe (char '=' >> attributeValue)
pure (T.pack name, fmap T.pack value)
escapedChar :: Parser InlineText attributeName :: Parser String
escapedChar = char '\\' *> fmap Escaped visibleChar attributeName = do
first <- satisfy isAlpha
rest <- many (alphaNum <|> char '-' <|> char ':')
pure (first : rest)
attributeValue :: Parser String
attributeValue =
choice
[ between (char '"') (char '"') (many $ noneOf "\""),
between (char '\'') (char '\'') (many $ noneOf "'"),
some (noneOf " \t\n\r>\"'=<`")
]
-- Paragraph Block
paragraphBlock :: Parser Element
paragraphBlock = do
-- Make sure we're not starting another block element
notFollowedBy (char '#')
notFollowedBy (char '>')
notFollowedBy (oneOf "*-+" >> (char ' ' <|> char '\t'))
notFollowedBy (digit >> char '.')
notFollowedBy (string "```" <|> string "~~~")
notFollowedBy (count 4 (char ' ' <|> char '\t'))
notFollowedBy (count 3 (char '*') <|> count 3 (char '-') <|> count 3 (char '_'))
notFollowedBy (char '<')
content <- some inlineElement
lineEnding
pure $ Paragraph $ P content
-- Inline Elements
inlineElement :: Parser InlineText
inlineElement =
choice
[ try strong,
try emphasis,
try codeSpan,
try image,
try link,
try htmlInline,
try escapedChar,
plainText
]
-- Strong (Bold)
strong :: Parser InlineText
strong = strongAsterisk <|> strongUnderscore
strongAsterisk :: Parser InlineText
strongAsterisk = do
string "**"
content <- some (notFollowedBy (string "**") >> inlineElement)
string "**"
pure $ Bold content
strongUnderscore :: Parser InlineText
strongUnderscore = do
string "__"
content <- some (notFollowedBy (string "__") >> inlineElement)
string "__"
pure $ Bold content
-- Emphasis (Italic)
emphasis :: Parser InlineText
emphasis = emphasisAsterisk <|> emphasisUnderscore
emphasisAsterisk :: Parser InlineText
emphasisAsterisk = do
char '*'
notFollowedBy (char '*')
content <- some (notFollowedBy (char '*') >> inlineElementNoAsterisk)
char '*'
pure $ Italic content
emphasisUnderscore :: Parser InlineText
emphasisUnderscore = do
char '_'
notFollowedBy (char '_')
content <- some (notFollowedBy (char '_') >> inlineElementNoUnderscore)
char '_'
pure $ Italic content
inlineElementNoAsterisk :: Parser InlineText
inlineElementNoAsterisk =
choice
[ try strong,
try codeSpan,
try image,
try link,
try htmlInline,
try escapedChar,
plainTextNoAsterisk
]
inlineElementNoUnderscore :: Parser InlineText
inlineElementNoUnderscore =
choice
[ try strong,
try codeSpan,
try image,
try link,
try htmlInline,
try escapedChar,
plainTextNoUnderscore
]
-- Code Span
codeSpan :: Parser InlineText
codeSpan =
choice
[ try tripleBacktick,
try doubleBacktick,
singleBacktick
]
where
singleBacktick = do
char '`'
content <- many (noneOf "`\n\r")
char '`'
pure $ InlineCode (T.pack content)
doubleBacktick = do
string "``"
content <- manyTill anyChar (try $ string "``")
pure $ InlineCode (T.pack content)
tripleBacktick = do
string "```"
content <- manyTill anyChar (try $ string "```")
pure $ InlineCode (T.pack content)
-- Image
image :: Parser InlineText
image = do
char '!'
char '['
alt <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
char ']'
(url, title) <- linkDestination
pure $ Image alt url title
-- Link
link :: Parser InlineText
link = do
char '['
content <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
char ']'
(url, title) <- linkDestination
pure $ Link content url title
inlineElementNoBracket :: Parser InlineText
inlineElementNoBracket =
choice
[ try strong,
try emphasis,
try codeSpan,
try htmlInline,
try escapedChar,
plainTextNoBracket
]
linkDestination :: Parser (Text, Maybe Text)
linkDestination = directLink <|> referenceLink
where
directLink = do
char '('
url <- many (noneOf " \t\n\r)")
title <- optionMaybe (try $ some (char ' ' <|> char '\t') >> titleParser)
char ')'
pure (T.pack url, title)
referenceLink = do
char '['
ref <- some (alphaNum <|> char ' ' <|> char '\t')
char ']'
-- For simplicity, we're not resolving references here
-- In a real implementation, you'd look up the reference
pure (T.pack $ "[" ++ ref ++ "]", Nothing)
titleParser :: Parser Text
titleParser =
T.pack
<$> choice
[ between (char '"') (char '"') (many $ noneOf "\""),
between (char '\'') (char '\'') (many $ noneOf "'"),
between (char '(') (char ')') (many $ noneOf ")")
]
-- HTML Inline
htmlInline :: Parser InlineText htmlInline :: Parser InlineText
htmlInline = do htmlInline = do
char '<' char '<'
tagName <- name name <- Markdown.tagName
remaining <- htmlInlineRemainder attrs <- many (try $ wsParser >> attribute)
whiteSpace optional wsParser
_ <- option False (char '/' >> pure True)
char '>' char '>'
let remainingTagText = foldl' (\ongoing current -> ongoing ++ ' ' : current) "" remaining pure $ HTMLInline (T.pack name) attrs
pure $ HTMLIn $ pack $ '<' : name ++ remaining -- Escaped Character
where escapedChar :: Parser InlineText
htmlInlineRemainder = many $ whiteSpace *> attribute escapedChar = do
name = many $ choice [alphaNum, char '-', char ':'] char '\\'
value = do c <- satisfy (\x -> x >= '!' && x <= '~')
char '"' pure $ Text (T.singleton c)
l <- letter
rem <- many $ choice [alphaNum, char '-', char ':']
char '"'
pure '"' : l : rem ++ "\""
attribute = do
attrName <- name
char '='
attrValue <- value
pure attrName ++ ('=' : attrValue)
whiteSpace :: Parser Text -- Plain Text
whiteSpace = pack <$> many space plainText :: Parser InlineText
plainText = Text . T.pack <$> some plainTextChar
visibleChar :: Parser Char plainTextChar :: Parser Char
-- technically more strict but I'm just going to hope I never have to deal with that plainTextChar = satisfy $ \c ->
visibleChar = anyChar (c `notElem` ("*_`[!<\\\n\r" :: String)) && c >= ' '
plainTextNoAsterisk :: Parser InlineText
plainTextNoAsterisk =
Text . T.pack
<$> some
( satisfy $ \c ->
(c `notElem` ("*_`[!<\\\n\r" :: String)) && c >= ' '
)
plainTextNoUnderscore :: Parser InlineText
plainTextNoUnderscore =
Text . T.pack
<$> some
( satisfy $ \c ->
not (c `elem` ("_*`[!<\\\n\r" :: String)) && c >= ' '
)
plainTextNoBracket :: Parser InlineText
plainTextNoBracket =
Text . T.pack
<$> some
( satisfy $ \c ->
not (c `elem` ("]_*`[!<\\\n\r" :: String)) && c >= ' '
)
-- Helper Parsers
lineEnding :: Parser ()
lineEnding = void (try (string "\r\n") <|> try (string "\n") <|> string "\r")
wsParser :: Parser ()
wsParser = void $ some (char ' ' <|> char '\t')

View file

@ -7,6 +7,7 @@ import qualified Data.Aeson as A
import Data.List (find) import Data.List (find)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)
import Data.Time import Data.Time
import Data.Time.Format.ISO8601 (iso8601Show) import Data.Time.Format.ISO8601 (iso8601Show)
import Data.Yaml.Aeson import Data.Yaml.Aeson
@ -14,8 +15,11 @@ import Development.Shake (Action)
import qualified Development.Shake as Shake import qualified Development.Shake as Shake
import Development.Shake.FilePath ((<.>), (</>)) import Development.Shake.FilePath ((<.>), (</>))
import qualified Development.Shake.FilePath as FP import qualified Development.Shake.FilePath as FP
import HTML
import Markdown
import Text.Pandoc (Block (Plain), Meta (..), MetaValue (..), Pandoc (..)) import Text.Pandoc (Block (Plain), Meta (..), MetaValue (..), Pandoc (..))
import qualified Text.Pandoc as Pandoc import qualified Text.Pandoc as Pandoc
import Text.Parsec hiding (Error)
import Types import Types
indexHtmlOutputPath :: FilePath -> FilePath indexHtmlOutputPath :: FilePath -> FilePath
@ -36,6 +40,12 @@ indexHtmlMarkdownSourcePath =
markdownToHtml :: (FromJSON a) => FilePath -> Action (a, Text) markdownToHtml :: (FromJSON a) => FilePath -> Action (a, Text)
markdownToHtml filePath = do markdownToHtml filePath = do
content <- Shake.readFile' filePath
let Right (metadataText, document) = parse (liftA2 (,) Markdown.metadata Markdown.document) filePath content
let Right metadata = decodeEither' $ encodeUtf8 metadataText
pure (metadata, compileToHTML document)
markdownToHtml_ filePath = do
content <- Shake.readFile' filePath content <- Shake.readFile' filePath
Shake.quietly . Shake.traced "Markdown to HTML" $ do Shake.quietly . Shake.traced "Markdown to HTML" $ do
pandoc@(Pandoc meta _) <- pandoc@(Pandoc meta _) <-

View file

@ -29,7 +29,7 @@ executable psb
-- .hs or .lhs file containing the Main module. -- .hs or .lhs file containing the Main module.
main-is: Main.hs main-is: Main.hs
other-modules: Config Utilities Templates Types IR Markdown Restruct other-modules: Config Utilities Templates Types IR Markdown Restruct HTML
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric