5 changed files with 67 additions and 455 deletions
--- a/app/HTML.hs
+++ b/app/HTML.hs
@ -1,9 +0,0 @@
 {-# LANGUAGE OverloadedStrings #-}
 module HTML (compileToHTML) where
 import Data.Text
 import IR
 compileToHTML :: Document -> Text
 compileToHTML = const ""
--- a/app/Main.hs
+++ b/app/Main.hs
@ -148,7 +148,8 @@ rss =
 -- Shake.putInfo $ "Built " <> target
 readPost :: FilePath -> Action Post
-readPost postPath = case FP.takeExtension postPath of
+readPost postPath = do
  case FP.takeExtension postPath of
    ".md" -> readMarkdownPost postPath
    _ -> error $ "unknown file extension for file" <> postPath
--- a/app/Markdown.hs
+++ b/app/Markdown.hs
@ -2,461 +2,91 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE RankNTypes #-}
-module Markdown (document, metadata) where
+module Markdown (markdownParser) where
-import Control.Applicative (many, optional, some, (<|>))
+import Data.Functor
-import Control.Monad (guard, void, when)
+import Data.Text
 import Data.Char (isAlpha)
 import Data.Text (Text)
 import qualified Data.Text as T
 import IR
-import Text.Parsec hiding (Line, many, optional, (<|>))
+import Text.Parsec
-import Text.Parsec.String (Parser)
+import Text.Parsec.Combinator
-metadata :: Parser Text
+type Parser a = forall s u m. (Stream s m Char) => ParsecT s u m a
 metadata = bound *> fmap T.pack (many $ try (char '-' <* notFollowedBy (string "--")) <|> noneOf "-") <* bound
  where
    bound = string "---"
-document :: Parser Document
+markdownParser :: Parser Document
-document = Doc <$> many element <* eof
+markdownParser = Doc <$> many block
-element :: Parser Element
+block :: Parser Element
-element =
+block = choice [heading, codeBlock, quoteBlock, list, table, htmlBlock, paragraph, blankLine]
  choice
    [ try headingBlock,
      try fencedCodeBlock,
      try indentedCodeBlock,
      try blockquoteBlock,
      try unorderedListBlock,
      try orderedListBlock,
      try horizontalRuleBlock,
      try htmlBlock,
      try blankLines, -- Consume blank lines but don't add to AST
      paragraphBlock
    ]
-- Blank lines (consumed but not stored)
+heading :: Parser Element
-blankLines :: Parser Element
+heading = pure $ Heading $ H {level = 1, text = ""}
 blankLines = do
  skipMany1 blankLine
  element -- Parse the next element
-blankLine :: Parser ()
+codeBlock :: Parser Element
-blankLine = do
+codeBlock = pure $ Code $ C {language = "", code = ""}
  many (char ' ' <|> char '\t')
  lineEnding
  pure ()
-- Heading Block
+quoteBlock :: Parser Element
-headingBlock :: Parser Element
+quoteBlock = pure $ BlockQuote $ Q ""
 headingBlock = do
  hashes <- some (char '#')
  let level = length hashes
  guard (level <= 6)
  many (char ' ' <|> char '\t')
  content <- manyTill inlineElement (try lineEnding)
  pure $ Heading $ H level content
-- Fenced Code Block
+list :: Parser Element
-fencedCodeBlock :: Parser Element
+list = pure $ List $ L {list_type = Ordered, items = []}
 fencedCodeBlock = do
  fence <- string "```" <|> string "~~~"
  lang <- optionMaybe languageInfo
  lineEnding
  codeLines <- manyTill codeLine (try $ string fence >> lineEnding)
  pure $ Code $ C lang (T.pack $ unlines codeLines)
-languageInfo :: Parser Text
+table :: Parser Element
-languageInfo = T.pack <$> many1 (alphaNum <|> char '-' <|> char '+' <|> char '.')
+table = pure $ Table $ T {header = TH [], rows = []}
 codeLine :: Parser String
 codeLine = do
  line <- many (noneOf "\n\r")
  lineEnding
  pure line
 -- Indented Code Block
 indentedCodeBlock :: Parser Element
 indentedCodeBlock = do
  lines' <- some indentedLine
  pure $ Code $ C Nothing (T.pack $ unlines lines')
  where
    indentedLine = do
      count 4 (char ' ' <|> char '\t')
      line <- many (noneOf "\n\r")
      lineEnding
      pure line
 -- Blockquote Block
 blockquoteBlock :: Parser Element
 blockquoteBlock = do
  lines' <- some blockquoteLine
  pure $ BlockQuote $ Q (concat lines')
  where
    blockquoteLine = do
      char '>'
      optional (char ' ')
      content <- manyTill inlineElement (try lineEnding)
      pure content
 -- Horizontal Rule Block
 horizontalRuleBlock :: Parser Element
 horizontalRuleBlock = do
  choice
    [ try (count 3 (char '*') >> many (char ' ' <|> char '*')),
      try (count 3 (char '-') >> many (char ' ' <|> char '-')),
      try (count 3 (char '_') >> many (char ' ' <|> char '_'))
    ]
  lineEnding
  pure HorizontalRule
 -- Unordered List Block
 unorderedListBlock :: Parser Element
 unorderedListBlock = do
  items <- some unorderedListItem
  pure $ List $ L Unordered items
 unorderedListItem :: Parser ListItem
 unorderedListItem = do
  oneOf "*-+"
  char ' ' <|> char '\t'
  content <- manyTill inlineElement (try lineEnding)
  continuations <- many listContinuation
  children <- many (try indentedList)
  pure $ LI (content ++ concat continuations) children
 listContinuation :: Parser [InlineText]
 listContinuation = do
  count 2 (char ' ' <|> char '\t')
  many (char ' ' <|> char '\t')
  notFollowedBy (oneOf "*-+")
  notFollowedBy (digit >> char '.')
  content <- manyTill inlineElement (try lineEnding)
  pure content
 indentedList :: Parser List
 indentedList = do
  void $ count 2 (char ' ' <|> char '\t')
  choice [try indentedUnorderedList, indentedOrderedList]
 indentedUnorderedList :: Parser List
 indentedUnorderedList = do
  items <- some (try $ indentedListItem (oneOf "*-+" >> void (char ' ' <|> char '\t')))
  pure $ L Unordered items
 indentedOrderedList :: Parser List
 indentedOrderedList = do
  items <- some (try $ indentedListItem (some digit >> char '.' >> void (char ' ' <|> char '\t')))
  pure $ L Ordered items
 indentedListItem :: Parser () -> Parser ListItem
 indentedListItem marker = do
  marker
  content <- manyTill inlineElement (try lineEnding)
  pure $ LI content []
 -- Ordered List Block
 orderedListBlock :: Parser Element
 orderedListBlock = do
  items <- some orderedListItem
  pure $ List $ L Ordered items
 orderedListItem :: Parser ListItem
 orderedListItem = do
  some digit
  char '.'
  char ' ' <|> char '\t'
  content <- manyTill inlineElement (try lineEnding)
  continuations <- many listContinuation
  children <- many (try indentedList)
  pure $ LI (content ++ concat continuations) children
 -- HTML Block
 htmlBlock :: Parser Element
-htmlBlock = char '<' *> choice [try htmlCommentBlock, try htmlDeclarationBlock, htmlTagBlock]
+htmlBlock = pure $ HTML $ Raw ""
-htmlCommentBlock :: Parser Element
+paragraph :: Parser Element
-htmlCommentBlock = do
+paragraph = do
-  string "!--"
+  first <- paragraphLine
-  content <- manyTill anyChar (try $ string "-->")
+  rem <- many paragraphContinuation
-  lineEnding
+  let combined = Prelude.concat (first : rem)
-  pure $ HTML $ HTMLTag "!--" [] (T.pack content)
+  pure $ Paragraph $ P combined
-htmlDeclarationBlock :: Parser Element
+paragraphLine :: Parser [InlineText]
-htmlDeclarationBlock = do
+paragraphLine = many inlineText <* endOfLine
  char '!'
  decl <- some (satisfy isAlpha)
  rest <- many (noneOf ">\n\r")
  char '>'
  lineEnding
  pure $ HTML $ HTMLTag (T.pack $ "!" ++ decl) [] (T.pack rest)
-htmlTagBlock :: Parser Element
+paragraphContinuation :: Parser [InlineText]
-htmlTagBlock = do
+paragraphContinuation = notFollowedBy blockElemStart *> paragraphLine
  name <- Markdown.tagName
  attrs <- many (try $ wsParser >> attribute)
  optional wsParser
  selfClose <- option False (char '/' >> pure True)
  char '>'
  content <-
    if selfClose
      then pure ""
      else manyTill anyChar (try $ string "</" >> string name >> char '>')
  when (not selfClose) lineEnding
  pure $ HTML $ HTMLTag (T.pack name) attrs (T.pack content)
-tagName :: Parser String
+inlineText :: Parser InlineText
-tagName = do
+inlineText = choice [emphasis, strong, inlineCode, link, image, inlineHTML, escapedChar, plainText]
  first <- satisfy isAlpha
  rest <- many (alphaNum <|> char '-' <|> char ':')
  pure (first : rest)
-attribute :: Parser (Text, Maybe Text)
+plainText :: Parser InlineText
-attribute = do
+-- abnf is very specific about what's allowed due to actual ABNF not allowing negation but I'm lazy
-  name <- attributeName
+plainText = fmap (Normal . pack) $ many $ noneOf "*_`[]()<>#+-.!&\\\n"
  value <- optionMaybe (char '=' >> attributeValue)
  pure (T.pack name, fmap T.pack value)
-attributeName :: Parser String
+escapedChar :: Parser InlineText
-attributeName = do
+escapedChar = char '\\' *> fmap Escaped visibleChar
  first <- satisfy isAlpha
  rest <- many (alphaNum <|> char '-' <|> char ':')
  pure (first : rest)
 attributeValue :: Parser String
 attributeValue =
  choice
    [ between (char '"') (char '"') (many $ noneOf "\""),
      between (char '\'') (char '\'') (many $ noneOf "'"),
      some (noneOf " \t\n\r>\"'=<`")
    ]
 -- Paragraph Block
 paragraphBlock :: Parser Element
 paragraphBlock = do
  -- Make sure we're not starting another block element
  notFollowedBy (char '#')
  notFollowedBy (char '>')
  notFollowedBy (oneOf "*-+" >> (char ' ' <|> char '\t'))
  notFollowedBy (digit >> char '.')
  notFollowedBy (string "```" <|> string "~~~")
  notFollowedBy (count 4 (char ' ' <|> char '\t'))
  notFollowedBy (count 3 (char '*') <|> count 3 (char '-') <|> count 3 (char '_'))
  notFollowedBy (char '<')
  content <- some inlineElement
  lineEnding
  pure $ Paragraph $ P content
 -- Inline Elements
 inlineElement :: Parser InlineText
 inlineElement =
  choice
    [ try strong,
      try emphasis,
      try codeSpan,
      try image,
      try link,
      try htmlInline,
      try escapedChar,
      plainText
    ]
 -- Strong (Bold)
 strong :: Parser InlineText
 strong = strongAsterisk <|> strongUnderscore
 strongAsterisk :: Parser InlineText
 strongAsterisk = do
  string "**"
  content <- some (notFollowedBy (string "**") >> inlineElement)
  string "**"
  pure $ Bold content
 strongUnderscore :: Parser InlineText
 strongUnderscore = do
  string "__"
  content <- some (notFollowedBy (string "__") >> inlineElement)
  string "__"
  pure $ Bold content
 -- Emphasis (Italic)
 emphasis :: Parser InlineText
 emphasis = emphasisAsterisk <|> emphasisUnderscore
 emphasisAsterisk :: Parser InlineText
 emphasisAsterisk = do
  char '*'
  notFollowedBy (char '*')
  content <- some (notFollowedBy (char '*') >> inlineElementNoAsterisk)
  char '*'
  pure $ Italic content
 emphasisUnderscore :: Parser InlineText
 emphasisUnderscore = do
  char '_'
  notFollowedBy (char '_')
  content <- some (notFollowedBy (char '_') >> inlineElementNoUnderscore)
  char '_'
  pure $ Italic content
 inlineElementNoAsterisk :: Parser InlineText
 inlineElementNoAsterisk =
  choice
    [ try strong,
      try codeSpan,
      try image,
      try link,
      try htmlInline,
      try escapedChar,
      plainTextNoAsterisk
    ]
 inlineElementNoUnderscore :: Parser InlineText
 inlineElementNoUnderscore =
  choice
    [ try strong,
      try codeSpan,
      try image,
      try link,
      try htmlInline,
      try escapedChar,
      plainTextNoUnderscore
    ]
 -- Code Span
 codeSpan :: Parser InlineText
 codeSpan =
  choice
    [ try tripleBacktick,
      try doubleBacktick,
      singleBacktick
    ]
  where
    singleBacktick = do
      char '`'
      content <- many (noneOf "`\n\r")
      char '`'
      pure $ InlineCode (T.pack content)
    doubleBacktick = do
      string "``"
      content <- manyTill anyChar (try $ string "``")
      pure $ InlineCode (T.pack content)
    tripleBacktick = do
      string "```"
      content <- manyTill anyChar (try $ string "```")
      pure $ InlineCode (T.pack content)
 -- Image
 image :: Parser InlineText
 image = do
  char '!'
  char '['
  alt <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
  char ']'
  (url, title) <- linkDestination
  pure $ Image alt url title
 -- Link
 link :: Parser InlineText
 link = do
  char '['
  content <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
  char ']'
  (url, title) <- linkDestination
  pure $ Link content url title
 inlineElementNoBracket :: Parser InlineText
 inlineElementNoBracket =
  choice
    [ try strong,
      try emphasis,
      try codeSpan,
      try htmlInline,
      try escapedChar,
      plainTextNoBracket
    ]
 linkDestination :: Parser (Text, Maybe Text)
 linkDestination = directLink <|> referenceLink
  where
    directLink = do
      char '('
      url <- many (noneOf " \t\n\r)")
      title <- optionMaybe (try $ some (char ' ' <|> char '\t') >> titleParser)
      char ')'
      pure (T.pack url, title)
    referenceLink = do
      char '['
      ref <- some (alphaNum <|> char ' ' <|> char '\t')
      char ']'
      -- For simplicity, we're not resolving references here
      -- In a real implementation, you'd look up the reference
      pure (T.pack $ "[" ++ ref ++ "]", Nothing)
 titleParser :: Parser Text
 titleParser =
  T.pack
    <$> choice
      [ between (char '"') (char '"') (many $ noneOf "\""),
        between (char '\'') (char '\'') (many $ noneOf "'"),
        between (char '(') (char ')') (many $ noneOf ")")
      ]
 -- HTML Inline
 htmlInline :: Parser InlineText
 htmlInline = do
  char '<'
-  name <- Markdown.tagName
+  tagName <- name
-  attrs <- many (try $ wsParser >> attribute)
+  remaining <- htmlInlineRemainder
-  optional wsParser
+  whiteSpace
  _ <- option False (char '/' >> pure True)
  char '>'
-  pure $ HTMLInline (T.pack name) attrs
+  let remainingTagText = foldl' (\ongoing current -> ongoing ++ ' ' : current) "" remaining
-- Escaped Character
+  pure $ HTMLIn $ pack $ '<' : name ++ remaining
-escapedChar :: Parser InlineText
+  where
-escapedChar = do
+    htmlInlineRemainder = many $ whiteSpace *> attribute
-  char '\\'
+    name = many $ choice [alphaNum, char '-', char ':']
-  c <- satisfy (\x -> x >= '!' && x <= '~')
+    value = do
-  pure $ Text (T.singleton c)
+      char '"'
      l <- letter
      rem <- many $ choice [alphaNum, char '-', char ':']
      char '"'
      pure '"' : l : rem ++ "\""
    attribute = do
      attrName <- name
      char '='
      attrValue <- value
      pure attrName ++ ('=' : attrValue)
-- Plain Text
+whiteSpace :: Parser Text
-plainText :: Parser InlineText
+whiteSpace = pack <$> many space
 plainText = Text . T.pack <$> some plainTextChar
-plainTextChar :: Parser Char
+visibleChar :: Parser Char
-plainTextChar = satisfy $ \c ->
+-- technically more strict but I'm just going to hope I never have to deal with that
-  (c `notElem` ("*_`[!<\\\n\r" :: String)) && c >= ' '
+visibleChar = anyChar
 plainTextNoAsterisk :: Parser InlineText
 plainTextNoAsterisk =
  Text . T.pack
    <$> some
      ( satisfy $ \c ->
          (c `notElem` ("*_`[!<\\\n\r" :: String)) && c >= ' '
      )
 plainTextNoUnderscore :: Parser InlineText
 plainTextNoUnderscore =
  Text . T.pack
    <$> some
      ( satisfy $ \c ->
          not (c `elem` ("_*`[!<\\\n\r" :: String)) && c >= ' '
      )
 plainTextNoBracket :: Parser InlineText
 plainTextNoBracket =
  Text . T.pack
    <$> some
      ( satisfy $ \c ->
          not (c `elem` ("]_*`[!<\\\n\r" :: String)) && c >= ' '
      )
 -- Helper Parsers
 lineEnding :: Parser ()
 lineEnding = void (try (string "\r\n") <|> try (string "\n") <|> string "\r")
 wsParser :: Parser ()
 wsParser = void $ some (char ' ' <|> char '\t')
--- a/app/Utilities.hs
+++ b/app/Utilities.hs
@ -7,7 +7,6 @@ import qualified Data.Aeson as A
 import Data.List (find)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.Text.Encoding (encodeUtf8)
 import Data.Time
 import Data.Time.Format.ISO8601 (iso8601Show)
 import Data.Yaml.Aeson
@ -15,11 +14,8 @@ import Development.Shake (Action)
 import qualified Development.Shake as Shake
 import Development.Shake.FilePath ((<.>), (</>))
 import qualified Development.Shake.FilePath as FP
 import HTML
 import Markdown
 import Text.Pandoc (Block (Plain), Meta (..), MetaValue (..), Pandoc (..))
 import qualified Text.Pandoc as Pandoc
 import Text.Parsec hiding (Error)
 import Types
 indexHtmlOutputPath :: FilePath -> FilePath
@ -40,12 +36,6 @@ indexHtmlMarkdownSourcePath =
 markdownToHtml :: (FromJSON a) => FilePath -> Action (a, Text)
 markdownToHtml filePath = do
  content <- Shake.readFile' filePath
  let Right (metadataText, document) = parse (liftA2 (,) Markdown.metadata Markdown.document) filePath content
  let Right metadata = decodeEither' $ encodeUtf8 metadataText
  pure (metadata, compileToHTML document)
 markdownToHtml_ filePath = do
  content <- Shake.readFile' filePath
  Shake.quietly . Shake.traced "Markdown to HTML" $ do
    pandoc@(Pandoc meta _) <-
--- a/psb.cabal
+++ b/psb.cabal
@ -29,7 +29,7 @@ executable psb
    -- .hs or .lhs file containing the Main module.
    main-is:          Main.hs
-    other-modules: Config Utilities Templates Types IR Markdown Restruct HTML
+    other-modules: Config Utilities Templates Types IR Markdown Restruct
    default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric