Compare commits

..

No commits in common. "d61fd86b76d206670c34086fadaad8c1bc8a749c" and "4893b40cc981f3ec0db1ee9ef2276743477b1935" have entirely different histories.

5 changed files with 67 additions and 455 deletions

View file

@ -1,9 +0,0 @@
{-# LANGUAGE OverloadedStrings #-}
module HTML (compileToHTML) where
import Data.Text
import IR
compileToHTML :: Document -> Text
compileToHTML = const ""

View file

@ -148,7 +148,8 @@ rss =
-- Shake.putInfo $ "Built " <> target -- Shake.putInfo $ "Built " <> target
readPost :: FilePath -> Action Post readPost :: FilePath -> Action Post
readPost postPath = case FP.takeExtension postPath of readPost postPath = do
case FP.takeExtension postPath of
".md" -> readMarkdownPost postPath ".md" -> readMarkdownPost postPath
_ -> error $ "unknown file extension for file" <> postPath _ -> error $ "unknown file extension for file" <> postPath

View file

@ -2,461 +2,91 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
module Markdown (document, metadata) where module Markdown (markdownParser) where
import Control.Applicative (many, optional, some, (<|>)) import Data.Functor
import Control.Monad (guard, void, when) import Data.Text
import Data.Char (isAlpha)
import Data.Text (Text)
import qualified Data.Text as T
import IR import IR
import Text.Parsec hiding (Line, many, optional, (<|>)) import Text.Parsec
import Text.Parsec.String (Parser) import Text.Parsec.Combinator
metadata :: Parser Text type Parser a = forall s u m. (Stream s m Char) => ParsecT s u m a
metadata = bound *> fmap T.pack (many $ try (char '-' <* notFollowedBy (string "--")) <|> noneOf "-") <* bound
where
bound = string "---"
document :: Parser Document markdownParser :: Parser Document
document = Doc <$> many element <* eof markdownParser = Doc <$> many block
element :: Parser Element block :: Parser Element
element = block = choice [heading, codeBlock, quoteBlock, list, table, htmlBlock, paragraph, blankLine]
choice
[ try headingBlock,
try fencedCodeBlock,
try indentedCodeBlock,
try blockquoteBlock,
try unorderedListBlock,
try orderedListBlock,
try horizontalRuleBlock,
try htmlBlock,
try blankLines, -- Consume blank lines but don't add to AST
paragraphBlock
]
-- Blank lines (consumed but not stored) heading :: Parser Element
blankLines :: Parser Element heading = pure $ Heading $ H {level = 1, text = ""}
blankLines = do
skipMany1 blankLine
element -- Parse the next element
blankLine :: Parser () codeBlock :: Parser Element
blankLine = do codeBlock = pure $ Code $ C {language = "", code = ""}
many (char ' ' <|> char '\t')
lineEnding
pure ()
-- Heading Block quoteBlock :: Parser Element
headingBlock :: Parser Element quoteBlock = pure $ BlockQuote $ Q ""
headingBlock = do
hashes <- some (char '#')
let level = length hashes
guard (level <= 6)
many (char ' ' <|> char '\t')
content <- manyTill inlineElement (try lineEnding)
pure $ Heading $ H level content
-- Fenced Code Block list :: Parser Element
fencedCodeBlock :: Parser Element list = pure $ List $ L {list_type = Ordered, items = []}
fencedCodeBlock = do
fence <- string "```" <|> string "~~~"
lang <- optionMaybe languageInfo
lineEnding
codeLines <- manyTill codeLine (try $ string fence >> lineEnding)
pure $ Code $ C lang (T.pack $ unlines codeLines)
languageInfo :: Parser Text table :: Parser Element
languageInfo = T.pack <$> many1 (alphaNum <|> char '-' <|> char '+' <|> char '.') table = pure $ Table $ T {header = TH [], rows = []}
codeLine :: Parser String
codeLine = do
line <- many (noneOf "\n\r")
lineEnding
pure line
-- Indented Code Block
indentedCodeBlock :: Parser Element
indentedCodeBlock = do
lines' <- some indentedLine
pure $ Code $ C Nothing (T.pack $ unlines lines')
where
indentedLine = do
count 4 (char ' ' <|> char '\t')
line <- many (noneOf "\n\r")
lineEnding
pure line
-- Blockquote Block
blockquoteBlock :: Parser Element
blockquoteBlock = do
lines' <- some blockquoteLine
pure $ BlockQuote $ Q (concat lines')
where
blockquoteLine = do
char '>'
optional (char ' ')
content <- manyTill inlineElement (try lineEnding)
pure content
-- Horizontal Rule Block
horizontalRuleBlock :: Parser Element
horizontalRuleBlock = do
choice
[ try (count 3 (char '*') >> many (char ' ' <|> char '*')),
try (count 3 (char '-') >> many (char ' ' <|> char '-')),
try (count 3 (char '_') >> many (char ' ' <|> char '_'))
]
lineEnding
pure HorizontalRule
-- Unordered List Block
unorderedListBlock :: Parser Element
unorderedListBlock = do
items <- some unorderedListItem
pure $ List $ L Unordered items
unorderedListItem :: Parser ListItem
unorderedListItem = do
oneOf "*-+"
char ' ' <|> char '\t'
content <- manyTill inlineElement (try lineEnding)
continuations <- many listContinuation
children <- many (try indentedList)
pure $ LI (content ++ concat continuations) children
listContinuation :: Parser [InlineText]
listContinuation = do
count 2 (char ' ' <|> char '\t')
many (char ' ' <|> char '\t')
notFollowedBy (oneOf "*-+")
notFollowedBy (digit >> char '.')
content <- manyTill inlineElement (try lineEnding)
pure content
indentedList :: Parser List
indentedList = do
void $ count 2 (char ' ' <|> char '\t')
choice [try indentedUnorderedList, indentedOrderedList]
indentedUnorderedList :: Parser List
indentedUnorderedList = do
items <- some (try $ indentedListItem (oneOf "*-+" >> void (char ' ' <|> char '\t')))
pure $ L Unordered items
indentedOrderedList :: Parser List
indentedOrderedList = do
items <- some (try $ indentedListItem (some digit >> char '.' >> void (char ' ' <|> char '\t')))
pure $ L Ordered items
indentedListItem :: Parser () -> Parser ListItem
indentedListItem marker = do
marker
content <- manyTill inlineElement (try lineEnding)
pure $ LI content []
-- Ordered List Block
orderedListBlock :: Parser Element
orderedListBlock = do
items <- some orderedListItem
pure $ List $ L Ordered items
orderedListItem :: Parser ListItem
orderedListItem = do
some digit
char '.'
char ' ' <|> char '\t'
content <- manyTill inlineElement (try lineEnding)
continuations <- many listContinuation
children <- many (try indentedList)
pure $ LI (content ++ concat continuations) children
-- HTML Block
htmlBlock :: Parser Element htmlBlock :: Parser Element
htmlBlock = char '<' *> choice [try htmlCommentBlock, try htmlDeclarationBlock, htmlTagBlock] htmlBlock = pure $ HTML $ Raw ""
htmlCommentBlock :: Parser Element paragraph :: Parser Element
htmlCommentBlock = do paragraph = do
string "!--" first <- paragraphLine
content <- manyTill anyChar (try $ string "-->") rem <- many paragraphContinuation
lineEnding let combined = Prelude.concat (first : rem)
pure $ HTML $ HTMLTag "!--" [] (T.pack content) pure $ Paragraph $ P combined
htmlDeclarationBlock :: Parser Element paragraphLine :: Parser [InlineText]
htmlDeclarationBlock = do paragraphLine = many inlineText <* endOfLine
char '!'
decl <- some (satisfy isAlpha)
rest <- many (noneOf ">\n\r")
char '>'
lineEnding
pure $ HTML $ HTMLTag (T.pack $ "!" ++ decl) [] (T.pack rest)
htmlTagBlock :: Parser Element paragraphContinuation :: Parser [InlineText]
htmlTagBlock = do paragraphContinuation = notFollowedBy blockElemStart *> paragraphLine
name <- Markdown.tagName
attrs <- many (try $ wsParser >> attribute)
optional wsParser
selfClose <- option False (char '/' >> pure True)
char '>'
content <-
if selfClose
then pure ""
else manyTill anyChar (try $ string "</" >> string name >> char '>')
when (not selfClose) lineEnding
pure $ HTML $ HTMLTag (T.pack name) attrs (T.pack content)
tagName :: Parser String inlineText :: Parser InlineText
tagName = do inlineText = choice [emphasis, strong, inlineCode, link, image, inlineHTML, escapedChar, plainText]
first <- satisfy isAlpha
rest <- many (alphaNum <|> char '-' <|> char ':')
pure (first : rest)
attribute :: Parser (Text, Maybe Text) plainText :: Parser InlineText
attribute = do -- abnf is very specific about what's allowed due to actual ABNF not allowing negation but I'm lazy
name <- attributeName plainText = fmap (Normal . pack) $ many $ noneOf "*_`[]()<>#+-.!&\\\n"
value <- optionMaybe (char '=' >> attributeValue)
pure (T.pack name, fmap T.pack value)
attributeName :: Parser String escapedChar :: Parser InlineText
attributeName = do escapedChar = char '\\' *> fmap Escaped visibleChar
first <- satisfy isAlpha
rest <- many (alphaNum <|> char '-' <|> char ':')
pure (first : rest)
attributeValue :: Parser String
attributeValue =
choice
[ between (char '"') (char '"') (many $ noneOf "\""),
between (char '\'') (char '\'') (many $ noneOf "'"),
some (noneOf " \t\n\r>\"'=<`")
]
-- Paragraph Block
paragraphBlock :: Parser Element
paragraphBlock = do
-- Make sure we're not starting another block element
notFollowedBy (char '#')
notFollowedBy (char '>')
notFollowedBy (oneOf "*-+" >> (char ' ' <|> char '\t'))
notFollowedBy (digit >> char '.')
notFollowedBy (string "```" <|> string "~~~")
notFollowedBy (count 4 (char ' ' <|> char '\t'))
notFollowedBy (count 3 (char '*') <|> count 3 (char '-') <|> count 3 (char '_'))
notFollowedBy (char '<')
content <- some inlineElement
lineEnding
pure $ Paragraph $ P content
-- Inline Elements
inlineElement :: Parser InlineText
inlineElement =
choice
[ try strong,
try emphasis,
try codeSpan,
try image,
try link,
try htmlInline,
try escapedChar,
plainText
]
-- Strong (Bold)
strong :: Parser InlineText
strong = strongAsterisk <|> strongUnderscore
strongAsterisk :: Parser InlineText
strongAsterisk = do
string "**"
content <- some (notFollowedBy (string "**") >> inlineElement)
string "**"
pure $ Bold content
strongUnderscore :: Parser InlineText
strongUnderscore = do
string "__"
content <- some (notFollowedBy (string "__") >> inlineElement)
string "__"
pure $ Bold content
-- Emphasis (Italic)
emphasis :: Parser InlineText
emphasis = emphasisAsterisk <|> emphasisUnderscore
emphasisAsterisk :: Parser InlineText
emphasisAsterisk = do
char '*'
notFollowedBy (char '*')
content <- some (notFollowedBy (char '*') >> inlineElementNoAsterisk)
char '*'
pure $ Italic content
emphasisUnderscore :: Parser InlineText
emphasisUnderscore = do
char '_'
notFollowedBy (char '_')
content <- some (notFollowedBy (char '_') >> inlineElementNoUnderscore)
char '_'
pure $ Italic content
inlineElementNoAsterisk :: Parser InlineText
inlineElementNoAsterisk =
choice
[ try strong,
try codeSpan,
try image,
try link,
try htmlInline,
try escapedChar,
plainTextNoAsterisk
]
inlineElementNoUnderscore :: Parser InlineText
inlineElementNoUnderscore =
choice
[ try strong,
try codeSpan,
try image,
try link,
try htmlInline,
try escapedChar,
plainTextNoUnderscore
]
-- Code Span
codeSpan :: Parser InlineText
codeSpan =
choice
[ try tripleBacktick,
try doubleBacktick,
singleBacktick
]
where
singleBacktick = do
char '`'
content <- many (noneOf "`\n\r")
char '`'
pure $ InlineCode (T.pack content)
doubleBacktick = do
string "``"
content <- manyTill anyChar (try $ string "``")
pure $ InlineCode (T.pack content)
tripleBacktick = do
string "```"
content <- manyTill anyChar (try $ string "```")
pure $ InlineCode (T.pack content)
-- Image
image :: Parser InlineText
image = do
char '!'
char '['
alt <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
char ']'
(url, title) <- linkDestination
pure $ Image alt url title
-- Link
link :: Parser InlineText
link = do
char '['
content <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
char ']'
(url, title) <- linkDestination
pure $ Link content url title
inlineElementNoBracket :: Parser InlineText
inlineElementNoBracket =
choice
[ try strong,
try emphasis,
try codeSpan,
try htmlInline,
try escapedChar,
plainTextNoBracket
]
linkDestination :: Parser (Text, Maybe Text)
linkDestination = directLink <|> referenceLink
where
directLink = do
char '('
url <- many (noneOf " \t\n\r)")
title <- optionMaybe (try $ some (char ' ' <|> char '\t') >> titleParser)
char ')'
pure (T.pack url, title)
referenceLink = do
char '['
ref <- some (alphaNum <|> char ' ' <|> char '\t')
char ']'
-- For simplicity, we're not resolving references here
-- In a real implementation, you'd look up the reference
pure (T.pack $ "[" ++ ref ++ "]", Nothing)
titleParser :: Parser Text
titleParser =
T.pack
<$> choice
[ between (char '"') (char '"') (many $ noneOf "\""),
between (char '\'') (char '\'') (many $ noneOf "'"),
between (char '(') (char ')') (many $ noneOf ")")
]
-- HTML Inline
htmlInline :: Parser InlineText htmlInline :: Parser InlineText
htmlInline = do htmlInline = do
char '<' char '<'
name <- Markdown.tagName tagName <- name
attrs <- many (try $ wsParser >> attribute) remaining <- htmlInlineRemainder
optional wsParser whiteSpace
_ <- option False (char '/' >> pure True)
char '>' char '>'
pure $ HTMLInline (T.pack name) attrs let remainingTagText = foldl' (\ongoing current -> ongoing ++ ' ' : current) "" remaining
-- Escaped Character pure $ HTMLIn $ pack $ '<' : name ++ remaining
escapedChar :: Parser InlineText where
escapedChar = do htmlInlineRemainder = many $ whiteSpace *> attribute
char '\\' name = many $ choice [alphaNum, char '-', char ':']
c <- satisfy (\x -> x >= '!' && x <= '~') value = do
pure $ Text (T.singleton c) char '"'
l <- letter
rem <- many $ choice [alphaNum, char '-', char ':']
char '"'
pure '"' : l : rem ++ "\""
attribute = do
attrName <- name
char '='
attrValue <- value
pure attrName ++ ('=' : attrValue)
-- Plain Text whiteSpace :: Parser Text
plainText :: Parser InlineText whiteSpace = pack <$> many space
plainText = Text . T.pack <$> some plainTextChar
plainTextChar :: Parser Char visibleChar :: Parser Char
plainTextChar = satisfy $ \c -> -- technically more strict but I'm just going to hope I never have to deal with that
(c `notElem` ("*_`[!<\\\n\r" :: String)) && c >= ' ' visibleChar = anyChar
plainTextNoAsterisk :: Parser InlineText
plainTextNoAsterisk =
Text . T.pack
<$> some
( satisfy $ \c ->
(c `notElem` ("*_`[!<\\\n\r" :: String)) && c >= ' '
)
plainTextNoUnderscore :: Parser InlineText
plainTextNoUnderscore =
Text . T.pack
<$> some
( satisfy $ \c ->
not (c `elem` ("_*`[!<\\\n\r" :: String)) && c >= ' '
)
plainTextNoBracket :: Parser InlineText
plainTextNoBracket =
Text . T.pack
<$> some
( satisfy $ \c ->
not (c `elem` ("]_*`[!<\\\n\r" :: String)) && c >= ' '
)
-- Helper Parsers
lineEnding :: Parser ()
lineEnding = void (try (string "\r\n") <|> try (string "\n") <|> string "\r")
wsParser :: Parser ()
wsParser = void $ some (char ' ' <|> char '\t')

View file

@ -7,7 +7,6 @@ import qualified Data.Aeson as A
import Data.List (find) import Data.List (find)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)
import Data.Time import Data.Time
import Data.Time.Format.ISO8601 (iso8601Show) import Data.Time.Format.ISO8601 (iso8601Show)
import Data.Yaml.Aeson import Data.Yaml.Aeson
@ -15,11 +14,8 @@ import Development.Shake (Action)
import qualified Development.Shake as Shake import qualified Development.Shake as Shake
import Development.Shake.FilePath ((<.>), (</>)) import Development.Shake.FilePath ((<.>), (</>))
import qualified Development.Shake.FilePath as FP import qualified Development.Shake.FilePath as FP
import HTML
import Markdown
import Text.Pandoc (Block (Plain), Meta (..), MetaValue (..), Pandoc (..)) import Text.Pandoc (Block (Plain), Meta (..), MetaValue (..), Pandoc (..))
import qualified Text.Pandoc as Pandoc import qualified Text.Pandoc as Pandoc
import Text.Parsec hiding (Error)
import Types import Types
indexHtmlOutputPath :: FilePath -> FilePath indexHtmlOutputPath :: FilePath -> FilePath
@ -40,12 +36,6 @@ indexHtmlMarkdownSourcePath =
markdownToHtml :: (FromJSON a) => FilePath -> Action (a, Text) markdownToHtml :: (FromJSON a) => FilePath -> Action (a, Text)
markdownToHtml filePath = do markdownToHtml filePath = do
content <- Shake.readFile' filePath
let Right (metadataText, document) = parse (liftA2 (,) Markdown.metadata Markdown.document) filePath content
let Right metadata = decodeEither' $ encodeUtf8 metadataText
pure (metadata, compileToHTML document)
markdownToHtml_ filePath = do
content <- Shake.readFile' filePath content <- Shake.readFile' filePath
Shake.quietly . Shake.traced "Markdown to HTML" $ do Shake.quietly . Shake.traced "Markdown to HTML" $ do
pandoc@(Pandoc meta _) <- pandoc@(Pandoc meta _) <-

View file

@ -29,7 +29,7 @@ executable psb
-- .hs or .lhs file containing the Main module. -- .hs or .lhs file containing the Main module.
main-is: Main.hs main-is: Main.hs
other-modules: Config Utilities Templates Types IR Markdown Restruct HTML other-modules: Config Utilities Templates Types IR Markdown Restruct
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric