Compare commits

...

15 commits

Author SHA1 Message Date
Pagwin
667477e01d
fixed toText issue 2025-12-11 13:18:05 -05:00
Pagwin
b8d76d7663
struggling more with the type system than html 2025-12-10 21:41:21 -05:00
Pagwin
0acf1668c0
ordered list prefix 2025-12-10 15:22:08 -05:00
Pagwin
628d85a8f3
handled nested lists 2025-12-10 15:16:11 -05:00
Pagwin
bc1ea7e84b
finished link and image, only need to handle lists and HTML 2025-12-10 15:04:11 -05:00
Pagwin
9330e44b58
list item and handling children lists potentially being different from the parent 2025-12-10 11:41:22 -05:00
Pagwin
dbb501b1da
redid list handling to all be done in 1 function 2025-12-10 00:49:13 -05:00
Pagwin
656490ffe6
started handling lists 2025-12-10 00:30:26 -05:00
Pagwin
ce5794e7ea
simplified block ending 2025-12-08 21:23:08 -05:00
Pagwin
115270a70b
redoing markdown parsing and adding underlining 2025-12-08 21:18:22 -05:00
Pagwin
82c9fd8799
may need to accept the inevitable 2025-12-05 21:16:50 -05:00
Pagwin
b35fa4d699
better progress bar 2025-12-05 20:45:56 -05:00
Pagwin
cd0cf5f016
fixed a test and then passed it 2025-12-05 20:07:19 -05:00
Pagwin
c4ff078785
moved everything into src for ease of testing 2025-12-05 19:42:15 -05:00
Pagwin
6c81f761d6
wrote out the scaffolding for work to do a live server later 2025-12-05 13:26:09 -05:00
15 changed files with 355 additions and 568 deletions

View file

@ -1,3 +1,4 @@
- [ ] Figure out what the fuck is going on with the soft engineering retrospective post or accept it's broken and rewrite the markdown parser later
- [ ] add rst or org support and convert markdown handling to custom parser instead of pandoc
- [ ] process source code blocks with tree sitter https://hackage.haskell.org/package/tree-sitter
- Alternatively consider skylighting https://hackage.haskell.org/package/skylighting
@ -6,4 +7,5 @@
- [ ] dev server setup (with live reloading)
- https://hackage-content.haskell.org/package/warp-3.4.10
- https://hackage.haskell.org/package/file-embed
- https://hackage.haskell.org/package/fsnotify
- [ ] see if performance can be improved (it isn't slow atm but it definitely feels like there's a bottleneck)

View file

@ -1,170 +1,6 @@
-- pulling heavily from https://abhinavsarkar.net/posts/static-site-generator-using-shake/
-- docs:
-- https://hackage.haskell.org/package/shake-0.19.8/docs/doc-index-All.html
-- https://hackage.haskell.org/package/pandoc-3.2.1/docs/doc-index-All.html
-- https://hackage.haskell.org/package/mustache-2.4.2/docs/doc-index.html
--
module Main where
import Config
import Control.Monad (forM, when)
import qualified Data.HashMap.Strict as HM
import Data.List (sortOn)
import qualified Data.Ord as Ord
import qualified Data.Text as T
import Deriving.Aeson
import Deriving.Aeson.Stock (Vanilla)
import Development.Shake (Action, Rules, (%>), (|%>), (~>))
import qualified Development.Shake as Shake
import Development.Shake.FilePath ((</>))
import qualified Development.Shake.FilePath as FP
import Templates
import Types
import Utilities.Action (getPublishedPosts, isDraft', markdownToHtml, markdownToPost, now)
import Utilities.FilePath (indexHtmlOutputPath, indexHtmlSourcePaths, isMarkdownPost, urlConvert)
-- target = thing we want
-- Rule = pattern of thing being made + actions to produce the thing
-- Action = actions to produce a thing
import qualified Psb.Main as Psb
main :: IO ()
main = do
Shake.shakeArgs Shake.shakeOptions {Shake.shakeProgress = Shake.progressSimple} $ do
Shake.withTargetDocs "Build the site" $
"build" ~> buildSite
Shake.withTargetDocs "Clean the built site" $
"clean" ~> Shake.removeFilesAfter outputDir ["//*"]
Shake.withoutTargets buildRules
buildSite :: Action ()
buildSite = do
-- static files
assetPaths <- Shake.getDirectoryFiles "" assetGlobs
-- path concat each asset path so it's output into the outputDir
Shake.need $ map (outputDir </>) assetPaths
-- take the misc pages which aren't blog posts and make their html files
Shake.need $ map indexHtmlOutputPath pagePaths
-- handle posts
postPaths <- getPublishedPosts isDraft
Shake.need $ map indexHtmlOutputPath postPaths
-- remaining pages, index.xml = rss feed
Shake.need $ map (outputDir </>) ["index.html", "index.xml"]
buildRules :: Rules ()
buildRules = do
home
assets
postsRule
rss
-- make a rule of the pattern outputDir/asset_name which copes from outputDir/../pages
assets :: Rules ()
assets =
map (outputDir </>) assetGlobs |%> \target -> do
let src = FP.dropDirectory1 target
Shake.copyFileChanged src target
-- there's probably a better way of doing this that allows for the target's origin file extension to get passed in but for now we're doing brute force
postsRule :: Rules ()
postsRule =
map indexHtmlOutputPath postGlobs |%> \target -> do
let potentials = indexHtmlSourcePaths target
Shake.forP
potentials
( \path -> do
exists <- Shake.doesFileExist path
should <- if exists then not <$> isDraft path else pure False
when
should
( case FP.takeExtension path of
".md" -> markdownPost path
_ -> error $ "invalid file extension for post " <> target
)
)
return ()
markdownPost :: FP.FilePath -> Action ()
markdownPost src = do
Shake.need [src]
let target = indexHtmlOutputPath src
post <- readMarkdownPost src
let rPost = fromPost post
postHtml <- applyTemplate "post.html" rPost
time <- Utilities.Action.now
-- Shake.putInfo $ T.unpack $ urlConvert target
let page =
Page
{ pageTitle = rPostTitle rPost,
pageContent = postHtml,
pageNow = time,
pageUrl = urlConvert target
}
applyTemplateAndWrite "default.html" page target
-- Shake.putInfo $ "Built " <> target <> " from " <> src
home :: Rules ()
home =
outputDir </> "index.html" %> \target -> do
postPaths <- getPublishedPosts isDraft
posts <-
sortOn (Ord.Down . postDate)
<$> forM postPaths readPost
let posts' = map fromPost posts
html <- applyTemplate "home.html" $ HM.singleton "posts" posts'
time <- Utilities.Action.now
-- Shake.putInfo $ T.unpack $ urlConvert target
let page =
Page
{ pageTitle = T.pack "Home",
pageContent = html,
pageNow = time,
pageUrl = urlConvert target
}
applyTemplateAndWrite "default.html" page target
-- Shake.putInfo $ "Built " <> target
data Rss = Rss
{ now :: T.Text,
posts :: [RenderedPost]
}
deriving (Show, Generic)
deriving (ToJSON) via Vanilla Rss
rss :: Rules ()
rss =
outputDir </> "index.xml" %> \target -> do
postPaths <- getPublishedPosts isDraft
posts <- map fromPost . sortOn (Ord.Down . postDate) <$> forM postPaths readPost
time <- Utilities.Action.now
applyTemplateAndWrite "feed.xml" (Rss time posts) target
-- Shake.putInfo $ "Built " <> target
readPost :: FilePath -> Action Post
readPost postPath = case FP.takeExtension postPath of
".md" -> readMarkdownPost postPath
_ -> error $ "unknown file extension for file" <> postPath
readMarkdownPost :: FilePath -> Action Post
readMarkdownPost postPath = do
(post, html) <- markdownToHtml postPath
-- Shake.putInfo $ "Read " <> postPath
return $
post
{ postContent = Just html,
postLink = Just . T.pack $ "/" <> FP.dropExtension postPath <> "/"
}
postHandles :: [(FilePath -> Bool, FilePath -> Action Post)]
postHandles = [(isMarkdownPost, markdownToPost)]
isDraft :: FilePath -> Action Bool
isDraft = isDraft' postHandles
main = Psb.main

View file

@ -27,43 +27,30 @@ common warnings
library
hs-source-dirs: src
exposed-modules: Markdown HTML Logger IR Logger.Shake
build-depends: base >=4.20 && < 4.21, text >= 2.1.2 , megaparsec >= 9.7.0 , transformers >= 0.6.2, shake >= 0.19.8,
exposed-modules: Markdown HTML Logger IR Logger.Shake Psb.Main Utilities Utilities.FilePath Utilities.Action Templates Types Config
build-depends: base >=4.20 && < 4.21, mustache >=2.4.2, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text >= 2.1.2, time, unordered-containers, yaml, megaparsec >= 9.7.0, transformers >= 0.6.2
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances
test-suite test-markdown-parse
hs-source-dirs: app
hs-source-dirs: tests
type: exitcode-stdio-1.0
main-is: Tests/Markdown/Parse.hs
main-is: Markdown/Parse.hs
build-depends: base >=4.20 && < 4.21, text >= 2.1.2 , megaparsec >= 9.7.0 , transformers >= 0.6.2 , hedgehog >= 1.7 , time, psb
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances
other-modules: Utilities
default-language: Haskell2010
executable psb
-- Import common warning flags.
import: warnings
hs-source-dirs: app
-- .hs or .lhs file containing the Main module.
main-is: Main.hs
other-modules: Config Utilities Utilities.FilePath Utilities.Action Templates Types
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances
-- Other library packages from which modules are imported.
-- https://hackage.haskell.org/package/texmath
build-depends: base >=4.20 && < 4.21, mustache >=2.4.2, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text >= 2.1.2, time, unordered-containers, yaml, megaparsec >= 9.7.0, transformers >= 0.6.2, psb
--parsec >= 3.1.18.0
-- Directories containing source files.
hs-source-dirs: app
-- Base language which the package is written in.
default-language: Haskell2010
ghc-options: -threaded
build-depends: psb, base
-- default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances
source-repository head
type: git

View file

@ -64,7 +64,7 @@ generateLiElems (element : remainder) =
-- We assume child lists are stricly after our contents
-- if they aren't this is fucked
serializeInlineToHTML element.content,
T.concat $ map (elementToHTML . List) element.children,
fromMaybe "" $ fmap (elementToHTML . List) element.child,
"</li>",
generateLiElems remainder
]

View file

@ -33,7 +33,7 @@ data BlockQuote = Q [InlineText] deriving (Show)
data ListItem = LI
{ content :: [InlineText], -- Flatten continuations into here
children :: [List]
child :: Maybe List
}
deriving (Show)
@ -58,6 +58,7 @@ data InlineText
| Bold [InlineText]
| Italic [InlineText]
| Crossed [InlineText]
| Underlined [InlineText]
| InlineCode Text
| Link
{ linkText :: [InlineText],

View file

@ -1,6 +1,7 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
-- (document, metadata)
@ -9,421 +10,188 @@ module Markdown (document, metadata) where
import Control.Applicative (many, optional, some, (<|>))
import Control.Monad (guard, void)
import Data.Functor.Identity (Identity)
import Data.Maybe (fromMaybe, maybeToList)
import Data.Proxy (Proxy (Proxy))
import Data.String (IsString)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Void (Void)
import IR
import Logger (Logger (logDebug))
import Text.Megaparsec (ParsecT, Stream, Token, Tokens, anySingle, anySingleBut, between, choice, count, eof, manyTill, notFollowedBy, satisfy, skipSome, try, (<?>))
import Text.Megaparsec (ParsecT, Stream, Token, Tokens, anySingle, anySingleBut, between, choice, chunk, count, eof, manyTill, notFollowedBy, satisfy, skipSome, try, (<?>))
import qualified Text.Megaparsec as MP
import Text.Megaparsec.Char (alphaNumChar, char, digitChar, string)
import Text.Megaparsec.Char (alphaNumChar, char, digitChar, newline, spaceChar)
import qualified Text.Megaparsec.Stream as MPS
type ParserTG = ParsecT Void
type Parser = ParsecT Void
type ParserT m = ParserTG T.Text m
class (Token s ~ Char, Stream s, ToText (Tokens s), IsString (Tokens s), Monoid (Tokens s), Eq (Tokens s), Show s) => Characters s
type Parser = ParserT Identity
class ToText t where
toText :: t -> Text
anyChar :: (Token s ~ Char, Stream s) => ParserTG s m Char
anyChar = anySingle
instance Characters Text
alphaNum :: (Token s ~ Char, Stream s) => ParserTG s m Char
alphaNum = alphaNumChar
instance ToText Text where
toText = id
digit :: (Token s ~ Char, Stream s) => ParserTG s m Char
digit = digitChar
instance Characters String
noneOf :: (Token s ~ Char, Stream s) => [Char] -> ParserTG s m Char
noneOf = MP.noneOf
instance ToText String where
toText = T.pack
oneOf :: (Token s ~ Char, Stream s) => [Char] -> ParserTG s m Char
oneOf = MP.oneOf
string :: (MP.MonadParsec e s m) => Tokens s -> m (Tokens s)
string = chunk
optionMaybe :: (Token s ~ Char, Stream s) => ParserTG s m a -> ParserTG s m (Maybe a)
optionMaybe = optional
skipMany1 :: (Token s ~ Char, Stream s) => ParserTG s m a -> ParserTG s m ()
skipMany1 = skipSome
metadata :: (Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Text
metadata = bound *> fmap T.pack (many $ try (char '-' <* notFollowedBy (string "--")) <|> anySingleBut '-') <* bound
metadata :: (Logger m, Characters s) => Parser s m Text
metadata = bound *> fmap T.pack (many $ try (char '-' <* notFollowedBy (chunk "--")) <|> anySingleBut '-') <* bound
where
bound = string "---"
document :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Document
document = do
logDebug "document"
Doc <$> many element <* eof
document :: (Logger m, Characters s) => Parser s m Document
document = Doc <$> many element
element :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
element :: (Logger m, Characters s) => Parser s m Element
element =
choice
[ try headingBlock <?> "Element Heading",
try fencedCodeBlock <?> "Fenced Code Block",
try indentedCodeBlock <?> "Indented Code Block",
try blockquoteBlock <?> "BlockQuote",
try unorderedListBlock <?> "Unordered List",
try orderedListBlock <?> "Ordered List",
try horizontalRuleBlock <?> "Horizontal Rule",
(try $ unorderedListBlock 0) <?> "Unordered List",
(try $ orderedListBlock 0) <?> "Ordered List",
try htmlBlock <?> "HTML Block",
try blankLines <?> "Blank Lines", -- Consume blank lines but don't add to AST
paragraphBlock <?> "Paragarph"
]
<* blockEnding
-- Blank lines (consumed but not stored)
blankLines :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
blankLines = do
skipMany1 (blankLine *> notFollowedBy eof)
element <|> fmap (const $ HTML HTMLTag {html_content = ""}) eof -- Parse the next element (or handle eof)
lineEnding :: (Logger m, Characters s) => Parser s m ()
lineEnding = (try eof) <|> void newline
blankLine :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m ()
blankLine = do
many (char ' ' <|> char '\t')
lineEnding
pure ()
-- we don't need to parse eof, lineEnding does that, eof *> eof works just fine in place of eof
blockEnding :: (Logger m, Characters s) => Parser s m ()
blockEnding = lineEnding *> lineEnding
-- Heading Block
headingBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
headingBlock = do
hashes <- some (char '#') <?> "Heading Hashes"
let level = length hashes
guard (level <= 6) <?> "Higher than level 6"
many (char ' ' <|> char '\t') <?> "Pre-Text Whitespace"
content <- manyTill (inlineElement <?> "Header Text") (try lineEnding <?> "Header Ending")
pure $ Heading $ H level content
-- Fenced Code Block
fencedCodeBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
fencedCodeBlock = do
logDebug "fenced_coding_block"
fence <- string "```" <|> string "~~~"
logDebug "fence"
lang <- optionMaybe languageInfo
logDebug "langInfo"
lineEnding'
logDebug "lineEnding"
codeLines <- manyTill (codeLine fence) (try $ string fence)
logDebug "lines"
pure $ Code $ C lang (T.pack $ unlines codeLines)
languageInfo :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Text
languageInfo = T.pack <$> some (alphaNum <|> char '-' <|> char '+' <|> char '.')
codeLine :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => (Tokens s) -> ParserTG s m String
codeLine fence = do
-- this is a hack which can only haunt me if I continue using markdown
line <- many $ (notFollowedBy $ string fence) *> noneOf "\n\r"
lineEnding'
pure line
-- Indented Code Block
indentedCodeBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
indentedCodeBlock = do
lines' <- some indentedLine
pure $ Code $ C Nothing (T.pack $ unlines lines')
-- TODO: check if inlineHTML needs to be handled in any markdown posts
inlineText :: (Logger m, Characters s) => Parser s m InlineText
inlineText = inlineText' (fail "notFollowedBy noop")
where
indentedLine = do
count 4 (char ' ' <|> char '\t')
line <- many $ noneOf "\n\r"
lineEnding
pure line
inlineText' disallow = choice [try $ strikethrough disallow, try $ bold disallow, try $ italic disallow, try $ underlined disallow, try code, try $ link disallow, try $ image disallow, plain_text disallow]
between' start end middle_piece = between start end $ many ((notFollowedBy end) *> middle_piece)
-- Blockquote Block
blockquoteBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
blockquoteBlock = do
lines' <- some blockquoteLine
pure $ BlockQuote $ Q (concat lines')
strikethrough disallow = Crossed <$> (between' (string "~~") (string "~~") (inlineText' (disallow <|> (void $ string "~~"))))
bold disallow = Bold <$> (between' (string "**") (string "**") (inlineText' (disallow <|> (void $ string "**"))))
italic disallow = Italic <$> (between' (char '*') (char '*') (inlineText' (disallow <|> (void $ char '*'))))
underlined disallow = Underlined <$> (between' (string "__") (string "__") (inlineText' (disallow <|> (void $ string "__"))))
code = InlineCode . T.pack <$> (between' (char '`') (char '`') (notFollowedBy lineEnding *> anySingle))
link disallow = do
linkText <- between' (char '[') (char ']') (inlineText' (disallow <|> (void $ char ']')))
(url, title) <- do
char '('
-- might fail on newline char situation
url <- T.pack <$> (many (notFollowedBy (char ')' <|> spaceChar) *> anySingle))
hasTitle <- optional spaceChar
title <- case hasTitle of
Just _ -> Just . T.pack <$> (many (notFollowedBy ((void $ char ')') <|> lineEnding) *> anySingle))
Nothing -> pure Nothing
char ')'
pure (url, title)
pure Link {linkText, url, title}
image disallow = do
char '!'
-- Is this a hack? Yes. Bite me
link_hack <- link disallow
(altText, url, title) <- case link_hack of
Link {linkText = [Text altText], url, title} -> pure (altText, url, title)
_ -> fail "Image alt text must be normal text, cannot be stylized in any way"
pure Image {altText, url, title}
plain_text disallow = Text . T.pack <$> (many ((notFollowedBy (blockEnding <|> disallow)) *> anySingle))
headingBlock :: (Logger m, Characters s) => Parser s m Element
headingBlock = do
heading_level <- length <$> (some $ char '#')
optional $ char ' '
text <- many $ inlineText
pure $ Heading $ H {level = heading_level, text}
fencedCodeBlock :: (Logger m, Characters s) => Parser s m Element
fencedCodeBlock = between (string "```") (string "```") $ do
language' <- T.pack <$> (many (notFollowedBy lineEnding *> anySingle))
lineEnding
code <- T.pack <$> (many ((notFollowedBy $ string "```") *> anySingle))
let language = if language' == "" then Just language' else Nothing
pure $ Code $ C {language, code}
blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
blockquoteBlock = BlockQuote . Q . concat <$> (some blockquoteLine)
where
blockquoteLine = do
char '>'
optional (char ' ')
content <- many $ notFollowedBy lineEnding' *> inlineElement
pure content
optional $ char ' '
ret <- (many ((notFollowedBy lineEnding) *> inlineText))
-- this dance with optional and notFollowedBy is done so we
-- aren't accidentally consuming part of a block ending
(optional ((notFollowedBy blockEnding) *> lineEnding))
pure ret
-- Horizontal Rule Block
horizontalRuleBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
horizontalRuleBlock = do
choice
[ try (count 3 (char '*') >> many (char ' ' <|> char '*')),
try (count 3 (char '-') >> many (char ' ' <|> char '-')),
try (count 3 (char '_') >> many (char ' ' <|> char '_'))
]
lineEnding
pure HorizontalRule
-- type of list the parser returns
-- parser which grabs the prefix for each item of the list
-- parser used for child lists
-- nesting amount
listBlock :: (Logger m, Characters s) => ListType -> Parser s m prefix -> (Int -> Parser s m List) -> Int -> Parser s m Element
listBlock list_type prefix child_parser_factory nest_level = do
items <- some $ (try (listItem <* notFollowedBy blockEnding)) <|> (listItem <* lineEnding)
pure $ List $ L {list_type, items}
where
listItem = do
count nest_level ((try $ void $ char '\t') <|> (void $ string " "))
prefix
content <- many inlineText
child <- optional $ child_parser_factory $ nest_level + 1
pure $ LI {content, child}
-- Unordered List Block
unorderedListBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
unorderedListBlock = do
items <- some unorderedListItem
lineEnding'
pure $ List $ L Unordered items
unorderedListBlock :: (Logger m, Characters s) => Int -> Parser s m Element
unorderedListBlock = listBlock Unordered unordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level))
where
unordered_prefix = (choice $ map char "*-+") *> optional spaceChar
-- not exhaustive but we know listBlock is returning a List
unwrap (List l) = l
unorderedListItem :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m ListItem
unorderedListItem = do
oneOf "*-+"
char ' ' <|> char '\t'
content <- many $ notFollowedBy lineEnding' *> inlineElement
lineEnding'
-- continuations <- many listContinuation
children <- many (try indentedList)
pure $ LI content children
orderedListBlock :: (Logger m, Characters s) => Int -> Parser s m Element
orderedListBlock = listBlock Ordered ordered_prefix (\level -> unwrap <$> ((try $ unorderedListBlock level) <|> orderedListBlock level))
where
-- regex equivalent: [0-9]+[.)]\s?
ordered_prefix = (some digitChar) *> (char '.' <|> char ')') *> optional spaceChar
-- not exhaustive but we know listBlock is returning a List
unwrap (List l) = l
-- TODO: handle list indentation at all levels
indentedList :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m List
indentedList = do
let n = 1
void $ (count (4 * n) (char ' ')) <|> count n (char '\t')
choice [try indentedUnorderedList, indentedOrderedList]
indentedUnorderedList :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m List
indentedUnorderedList = do
items <- some (try $ indentedListItem (oneOf "*-+" >> void (char ' ' <|> char '\t')))
pure $ L Unordered items
indentedOrderedList :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m List
indentedOrderedList = do
items <- some (try $ indentedListItem (some digit >> char '.' >> void (char ' ' <|> char '\t')))
pure $ L Ordered items
indentedListItem :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m () -> ParserTG s m ListItem
indentedListItem marker = do
marker
content <- many $ notFollowedBy lineEnding' *> inlineElement
pure $ LI content []
-- Ordered List Block
orderedListBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
orderedListBlock = do
items <- some orderedListItem
lineEnding'
pure $ List $ L Ordered items
orderedListItem :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m ListItem
orderedListItem = do
some digit
char '.' <|> char ')'
optional (char ' ' <|> char '\t')
content <- many $ notFollowedBy lineEnding' *> inlineElement
lineEnding'
-- continuations <- many listContinuation
children <- many (try indentedList)
pure $ LI content children
-- HTML Block
htmlBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
htmlBlock :: forall m s. (Logger m, Characters s) => Parser s m Element
htmlBlock = do
char '<'
-- Capture the entire HTML block as raw text
rest <- manyTill anyChar (try $ char '>' >> lineEnding)
let content = '<' : (rest <> ">")
return $ HTML $ HTMLTag (T.pack content)
-- Paragraph Block
paragraphBlock :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m Element
paragraphBlock = do
logDebug "paragraph"
content <- some (notFollowedBy lineEnding *> inlineElement)
lineEnding <|> eof
pure $ Paragraph $ P content
-- Inline Elements
inlineElement :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
inlineElement =
logDebug "inlineElement"
*> choice
[ try strong <?> "Inline Strong Text",
try emphasis <?> "Inline Italic Text",
try crossedText <?> "Inline Crossed Text",
try codeSpan <?> "Inline Code",
try image <?> "Inline Image",
try link <?> "Inline Link",
try htmlInline <?> "Inline HTML",
try escapedChar <?> "Escaped Character",
plainText <?> "Inline Plain Text"
]
-- Strong (Bold)
strong :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
strong = strongAsterisk <|> strongUnderscore
strongAsterisk :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
strongAsterisk = do
string "**"
content <- some (notFollowedBy (string "**") >> inlineElement)
string "**"
pure $ Bold content
strongUnderscore :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
strongUnderscore = do
string "__"
content <- some (notFollowedBy (string "__") >> inlineElement)
string "__"
pure $ Bold content
crossedText :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
crossedText = do
string "~~"
content <- some (notFollowedBy (string "~~") >> inlineElement)
string "~~"
pure $ Crossed content
-- Emphasis (Italic)
emphasis :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
emphasis = emphasisAsterisk <|> emphasisUnderscore
emphasisAsterisk :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
emphasisAsterisk = do
char '*'
content <- some (notFollowedBy (char '*') >> inlineElementNoAsterisk)
char '*'
pure $ Italic content
emphasisUnderscore :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
emphasisUnderscore = do
char '_'
content <- some inlineElementNoUnderscore
char '_'
pure $ Italic content
inlineElementNo :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => Char -> ParserTG s m InlineText
inlineElementNo c =
choice
[ try strong,
try codeSpan,
try image,
try link,
try htmlInline,
try escapedChar,
plainTextNo [c]
]
plainTextNo :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => [Char] -> ParserTG s m InlineText
plainTextNo list = do
plainTextNo' False list
plainTextNo' :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => Bool -> [Char] -> ParserTG s m InlineText
plainTextNo' block_whitespace disallow = do
logDebug $ "base plain Text: " <> T.show block_whitespace <> " " <> T.show disallow
firstChar <- noneOf (disallow <> if block_whitespace then " \t\r\n" else []) <?> "Plain Text Initial Disallow"
remChars <- many $ notFollowedBy lineEnding' *> plainTextCharNo disallow
pure $ Text $ T.map wspHandler $ T.pack $ firstChar : remChars
tagName <- MPS.tokensToChunk (Proxy :: Proxy s) <$> (some ((notFollowedBy ((try $ void tagNameEnd) <|> blockEnding)) *> (anySingle :: Parser s m (Token s))))
notFollowedBy blockEnding
ending <- tagNameEnd
hasEnded <- case ending of
'>' -> pure True
_ -> pure False
attrs <-
if not hasEnded
then
error "TODO: handle attributes"
else pure Nothing
-- technically not standard markdown but I don't want to write a full HTML parser in my
inside <- many (notFollowedBy ((chunk $ "</" <> tagName <> ">") <|> chunk "</>") *> anySingle)
end <- toText <$> ((chunk $ "</" <> tagName <> ">") <|> chunk "</>")
pure $ HTML $ HTMLTag $ T.concat ["<", toText tagName, fromMaybe "" attrs, ">", T.pack inside, if end == "</>" then "" else end]
where
wspHandler '\n' = ' '
wspHandler c = c
tagNameEnd :: Parser s m Char
tagNameEnd = spaceChar <|> char '>'
inlineElementNoAsterisk :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
inlineElementNoAsterisk = inlineElementNo '*'
inlineElementNoUnderscore :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
inlineElementNoUnderscore = inlineElementNo '_'
-- Code Span
codeSpan :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
codeSpan =
choice
[ try tripleBacktick,
try doubleBacktick,
singleBacktick
]
where
singleBacktick = do
char '`'
content <- many $ noneOf "`\n\r"
char '`'
pure $ InlineCode (T.pack content)
doubleBacktick = do
string "``"
content <- manyTill anyChar (try $ string "``")
pure $ InlineCode (T.pack content)
tripleBacktick = do
string "```"
content <- manyTill anyChar (try $ string "```")
pure $ InlineCode (T.pack content)
-- Image
image :: (Logger m, Token s ~ Char, Stream s) => ParserTG s m InlineText
image = do
char '!'
char '['
alt <- T.pack <$> many (noneOf "]\n\r")
char ']'
(url, title) <- linkDestination
return $ Image {altText = alt, url = url, title = title}
-- Link
link :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
link = do
char '['
content <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
char ']'
(url, title) <- linkDestination
pure $ Link content url title
inlineElementNoBracket :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
inlineElementNoBracket =
choice
[ try strong,
try emphasis,
try codeSpan,
try htmlInline,
try escapedChar,
plainTextNo "[]"
]
linkDestination :: (Logger m, Token s ~ Char, Stream s) => ParserTG s m (Text, Maybe Text)
linkDestination = directLink <|> referenceLink
where
directLink = do
char '('
url <- many $ noneOf " \t\n\r)"
title <- optionMaybe (try $ some (char ' ' <|> char '\t') >> titleParser)
char ')'
pure (T.pack url, title)
referenceLink = do
char '['
ref <- some (alphaNum <|> char ' ' <|> char '\t')
char ']'
-- For simplicity, we're not resolving references here
-- In a real implementation, you'd look up the reference
pure (T.pack $ "[" ++ ref ++ "]", Nothing)
titleParser :: (Logger m, Token s ~ Char, Stream s) => ParserTG s m Text
titleParser =
T.pack
<$> choice
[ between (char '"') (char '"') (many $ anySingleBut '"'),
between (char '\'') (char '\'') (many $ anySingleBut '\''),
between (char '(') (char ')') (many $ anySingleBut ')')
]
-- HTML Inline
htmlInline :: (Logger m, Token s ~ Char, Stream s) => ParserTG s m InlineText
htmlInline = do
start <- char '<'
content <- manyTill anyChar (try $ char '>')
return $ HTMLInline (T.pack (start : content ++ ">"))
-- Escaped Character
escapedChar :: (Logger m, Token s ~ Char, Stream s) => ParserTG s m InlineText
escapedChar = do
char '\\'
c <- satisfy (\x -> x >= '!' && x <= '~')
pure $ Text (T.singleton c)
-- Plain Text
plainText :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m InlineText
plainText = plainTextNo' False [] <?> "Baseline Plain Text"
plainTextBaseDisallow :: [Char]
plainTextBaseDisallow = "[~`_*<"
plainTextCharNo :: (Logger m, Token s ~ Char, Stream s) => [Char] -> ParserTG s m Char
plainTextCharNo additional = noneOf $ additional <> plainTextBaseDisallow
-- Helper Parsers
lineEnding :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m ()
lineEnding = void (try $ count 2 (try (string "\r\n") <|> try (string "\n") <|> string "\r")) <|> eof
lineEnding' :: (Logger m, Token s ~ Char, Stream s, IsString (Tokens s)) => ParserTG s m ()
lineEnding' = void (try (string "\r\n") <|> try (string "\n") <|> string "\r") <|> eof
paragraphBlock :: (Logger m, Characters s) => Parser s m Element
paragraphBlock = Paragraph . P <$> (many inlineText)

176
src/Psb/Main.hs Normal file
View file

@ -0,0 +1,176 @@
-- pulling heavily from https://abhinavsarkar.net/posts/static-site-generator-using-shake/
-- docs:
-- https://hackage.haskell.org/package/shake-0.19.8/docs/doc-index-All.html
-- https://hackage.haskell.org/package/pandoc-3.2.1/docs/doc-index-All.html
-- https://hackage.haskell.org/package/mustache-2.4.2/docs/doc-index.html
--
module Psb.Main where
import Config
import Control.Monad (forM, when)
import qualified Data.HashMap.Strict as HM
import Data.List (sortOn)
import qualified Data.Ord as Ord
import qualified Data.Text as T
import Deriving.Aeson
import Deriving.Aeson.Stock (Vanilla)
import Development.Shake (Action, Rules, (%>), (|%>), (~>))
import qualified Development.Shake as Shake
import Development.Shake.FilePath ((</>))
import qualified Development.Shake.FilePath as FP
import Templates
import Types
import Utilities.Action (getPublishedPosts, isDraft', markdownToHtml, markdownToPost, now, psbProgress)
import Utilities.FilePath (indexHtmlOutputPath, indexHtmlSourcePaths, isMarkdownPost, urlConvert)
-- target = thing we want
-- Rule = pattern of thing being made + actions to produce the thing
-- Action = actions to produce a thing
-- note: live watch should be done outside of shake with the watcher then running shake which is rather annoying
main :: IO ()
main = do
Shake.shakeArgs
Shake.shakeOptions
{ Shake.shakeProgress = psbProgress,
Shake.shakeColor = True
}
$ do
Shake.withTargetDocs "Build the site" $
"build" ~> buildSite
Shake.withTargetDocs "Clean the built site" $
"clean" ~> Shake.removeFilesAfter outputDir ["//*"]
Shake.withoutTargets buildRules
buildSite :: Action ()
buildSite = do
-- static files
assetPaths <- Shake.getDirectoryFiles "" assetGlobs
-- path concat each asset path so it's output into the outputDir
Shake.need $ map (outputDir </>) assetPaths
-- take the misc pages which aren't blog posts and make their html files
Shake.need $ map indexHtmlOutputPath pagePaths
-- handle posts
postPaths <- getPublishedPosts isDraft
Shake.need $ map indexHtmlOutputPath postPaths
-- remaining pages, index.xml = rss feed
Shake.need $ map (outputDir </>) ["index.html", "index.xml"]
buildRules :: Rules ()
buildRules = do
home
assets
postsRule
rss
-- make a rule of the pattern outputDir/asset_name which copes from outputDir/../pages
assets :: Rules ()
assets =
map (outputDir </>) assetGlobs |%> \target -> do
let src = FP.dropDirectory1 target
Shake.copyFileChanged src target
-- there's probably a better way of doing this that allows for the target's origin file extension to get passed in but for now we're doing brute force
postsRule :: Rules ()
postsRule =
map indexHtmlOutputPath postGlobs |%> \target -> do
let potentials = indexHtmlSourcePaths target
Shake.forP
potentials
( \path -> do
exists <- Shake.doesFileExist path
should <- if exists then not <$> isDraft path else pure False
when
should
( case FP.takeExtension path of
".md" -> markdownPost path
_ -> error $ "invalid file extension for post " <> target
)
)
return ()
markdownPost :: FP.FilePath -> Action ()
markdownPost src = do
Shake.need [src]
let target = indexHtmlOutputPath src
post <- readMarkdownPost src
let rPost = fromPost post
postHtml <- applyTemplate "post.html" rPost
time <- Utilities.Action.now
-- Shake.putInfo $ T.unpack $ urlConvert target
let page =
Page
{ pageTitle = rPostTitle rPost,
pageContent = postHtml,
pageNow = time,
pageUrl = urlConvert target
}
applyTemplateAndWrite "default.html" page target
-- Shake.putInfo $ "Built " <> target <> " from " <> src
home :: Rules ()
home =
outputDir </> "index.html" %> \target -> do
postPaths <- getPublishedPosts isDraft
posts <-
sortOn (Ord.Down . postDate)
<$> forM postPaths readPost
let posts' = map fromPost posts
html <- applyTemplate "home.html" $ HM.singleton "posts" posts'
time <- Utilities.Action.now
-- Shake.putInfo $ T.unpack $ urlConvert target
let page =
Page
{ pageTitle = T.pack "Home",
pageContent = html,
pageNow = time,
pageUrl = urlConvert target
}
applyTemplateAndWrite "default.html" page target
-- Shake.putInfo $ "Built " <> target
data Rss = Rss
{ now :: T.Text,
posts :: [RenderedPost]
}
deriving (Show, Generic)
deriving (ToJSON) via Vanilla Rss
rss :: Rules ()
rss =
outputDir </> "index.xml" %> \target -> do
postPaths <- getPublishedPosts isDraft
posts <- map fromPost . sortOn (Ord.Down . postDate) <$> forM postPaths readPost
time <- Utilities.Action.now
applyTemplateAndWrite "feed.xml" (Rss time posts) target
-- Shake.putInfo $ "Built " <> target
readPost :: FilePath -> Action Post
readPost postPath = case FP.takeExtension postPath of
".md" -> readMarkdownPost postPath
_ -> error $ "unknown file extension for file" <> postPath
readMarkdownPost :: FilePath -> Action Post
readMarkdownPost postPath = do
(post, html) <- markdownToHtml postPath
-- Shake.putInfo $ "Read " <> postPath
return $
post
{ postContent = Just html,
postLink = Just . T.pack $ "/" <> FP.dropExtension postPath <> "/"
}
postHandles :: [(FilePath -> Bool, FilePath -> Action Post)]
postHandles = [(isMarkdownPost, markdownToPost)]
isDraft :: FilePath -> Action Bool
isDraft = isDraft' postHandles

View file

@ -1,19 +1,25 @@
{-# LANGUAGE OverloadedStrings #-}
module Utilities.Action where
import Config (postGlobs)
import Control.Monad (filterM)
import Data.Functor.Identity (Identity (runIdentity))
import Data.IORef (IORef, newIORef, readIORef, writeIORef)
import Data.List (find)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)
import qualified Data.Text.IO as TIO
import Data.Time (getCurrentTime)
import Data.Time.Format.ISO8601 (iso8601Show)
import Data.Yaml.Aeson
import Development.Shake (Action)
import qualified Development.Shake as Shake
import GHC.IO (unsafePerformIO)
import HTML
import Markdown
import System.IO (hFlush, hPutStr, stderr)
import Text.Megaparsec (errorBundlePretty, runParserT)
import Types
@ -60,3 +66,14 @@ getPublishedPosts :: (FilePath -> Action Bool) -> Action [FilePath]
getPublishedPosts draftCheck = do
postPaths <- Shake.getDirectoryFiles "" postGlobs
filterM (fmap not . draftCheck) postPaths
psbProgress :: IO Shake.Progress -> IO ()
psbProgress getProgress = do
Shake.progressDisplay 0.01 psbProgress' getProgress
where
psbProgress' msg = do
TIO.hPutStr stderr "\x1b[K\r"
hPutStr stderr msg
hFlush stderr
p <- getProgress
if (Shake.countTodo p + Shake.countUnknown p) < 5 then putStrLn "" else pure ()

View file

@ -242,7 +242,7 @@ nested_unordered_list = property $ do
( Just
( Right
( Doc
[ List (L {list_type = Unordered, items = [LI {content = [Text item_1], children = [L {list_type = Unordered, items = [LI {content = [Text item_2]}]}]}, LI {content = [Text item_3], children = []}]})
[ List (L {list_type = Unordered, items = [LI {content = [Text item_1], children = [L {list_type = Unordered, items = [LI {content = [Text item_2], children = []}]}]}, LI {content = [Text item_3], children = []}]})
]
)
)