Compare commits
No commits in common. "aa69b3f6ef2938f64e3525311dafb32674b63c26" and "b6a689868be4bd883e0d8aef0fa67b630e893367" have entirely different histories.
aa69b3f6ef
...
b6a689868b
3 changed files with 320 additions and 6 deletions
|
|
@ -27,7 +27,7 @@ common warnings
|
||||||
|
|
||||||
library
|
library
|
||||||
hs-source-dirs: src
|
hs-source-dirs: src
|
||||||
exposed-modules: Djot Markdown HTML Logger IR Logger.Shake Psb.Main Utilities Utilities.FilePath Utilities.Action Utilities.Javascript Utilities.CSS Templates Types Config Utilities.Bundling
|
exposed-modules: Djot Djot.Lists Markdown HTML Logger IR Logger.Shake Psb.Main Utilities Utilities.FilePath Utilities.Action Utilities.Javascript Utilities.CSS Templates Types Config Utilities.Bundling
|
||||||
other-modules: Utilities.Parsing
|
other-modules: Utilities.Parsing
|
||||||
build-depends: base, mustache >=2.4.2, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text >= 2.1.2, time, unordered-containers, yaml, megaparsec >= 9.7.0, transformers >= 0.6.2, bytestring
|
build-depends: base, mustache >=2.4.2, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text >= 2.1.2, time, unordered-containers, yaml, megaparsec >= 9.7.0, transformers >= 0.6.2, bytestring
|
||||||
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances
|
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances
|
||||||
|
|
|
||||||
103
src/Djot.hs
103
src/Djot.hs
|
|
@ -11,12 +11,15 @@ module Djot
|
||||||
where
|
where
|
||||||
|
|
||||||
import Control.Applicative (many, optional, some, (<|>))
|
import Control.Applicative (many, optional, some, (<|>))
|
||||||
|
import Control.Monad.ST (runST)
|
||||||
import Data.Foldable (for_)
|
import Data.Foldable (for_)
|
||||||
import Data.Functor (void, (<$>))
|
import Data.Functor (void, (<$>))
|
||||||
import Data.List (elemIndex)
|
import Data.List (elemIndex)
|
||||||
import qualified Data.List.NonEmpty as NE
|
import qualified Data.List.NonEmpty as NE
|
||||||
|
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
import qualified Djot.Lists
|
||||||
import IR
|
import IR
|
||||||
import Logger (Logger (logCallStack, logDebug, logError))
|
import Logger (Logger (logCallStack, logDebug, logError))
|
||||||
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, satisfy, sepBy, setErrorOffset, someTill)
|
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, satisfy, sepBy, setErrorOffset, someTill)
|
||||||
|
|
@ -97,15 +100,105 @@ blockQuote attrs = do
|
||||||
|
|
||||||
listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||||
listBlock attrs = do
|
listBlock attrs = do
|
||||||
error "todo"
|
list <- Djot.Lists.djotList
|
||||||
|
error ""
|
||||||
|
|
||||||
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||||
taskListBlock attrs = do
|
taskListBlock attrs = do
|
||||||
error "todo"
|
error "this is also probably fundamentally wrong"
|
||||||
|
startOffset <- getOffset
|
||||||
|
bullet
|
||||||
|
first_item <- task_item
|
||||||
|
rem_items <- manyTill (listMarker *> task_item) $ lookAhead blockSeparator
|
||||||
|
pure $ TaskList (TL {items = first_item : rem_items}) attrs
|
||||||
|
where
|
||||||
|
task_item = error "todo"
|
||||||
|
|
||||||
descriptionListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
lowerAlphabet :: [Char]
|
||||||
descriptionListBlock attrs = do
|
lowerAlphabet = ['a' .. 'z']
|
||||||
error "todo"
|
|
||||||
|
upperAlphabet :: [Char]
|
||||||
|
upperAlphabet = ['A' .. 'Z']
|
||||||
|
|
||||||
|
-- inner parsing of roman numerals done via Gemini due to lazy
|
||||||
|
romanValue :: Char -> Int
|
||||||
|
romanValue 'I' = 1
|
||||||
|
romanValue 'V' = 5
|
||||||
|
romanValue 'X' = 10
|
||||||
|
romanValue 'L' = 50
|
||||||
|
romanValue 'C' = 100
|
||||||
|
romanValue 'D' = 500
|
||||||
|
romanValue 'M' = 1000
|
||||||
|
romanValue _ = 0
|
||||||
|
|
||||||
|
romanToInt :: Text -> Int
|
||||||
|
romanToInt s = runST $ do
|
||||||
|
total <- newSTRef 0
|
||||||
|
prevVal <- newSTRef 0
|
||||||
|
|
||||||
|
for_ (T.unpack s) $ \char -> do
|
||||||
|
let curr = romanValue char
|
||||||
|
p <- readSTRef prevVal
|
||||||
|
|
||||||
|
-- If current > previous (e.g., IV), we subtract the previous twice
|
||||||
|
if curr > p
|
||||||
|
then modifySTRef' total (\t -> t + curr - 2 * p)
|
||||||
|
else modifySTRef' total (+ curr)
|
||||||
|
|
||||||
|
writeSTRef prevVal curr
|
||||||
|
|
||||||
|
readSTRef total
|
||||||
|
|
||||||
|
upperRoman :: [Char]
|
||||||
|
upperRoman = "IVXLCDM"
|
||||||
|
|
||||||
|
lowerRoman :: [Char]
|
||||||
|
lowerRoman = T.unpack $ T.toLower $ T.pack upperRoman
|
||||||
|
|
||||||
|
-- consumes whitespace as well for convenience in element parser
|
||||||
|
listMarker :: (Logger m, Characters s) => Parser s m ListType
|
||||||
|
listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
|
||||||
|
where
|
||||||
|
decimal = do
|
||||||
|
num <- choice $ map ($ some numberChar) [try . surroundParen, try . rightParen, point]
|
||||||
|
let start_number = Just $ read num
|
||||||
|
pure $ Ordered {start_number, style = Just "1"}
|
||||||
|
lower_letter :: (Logger m, Characters s) => Parser s m ListType
|
||||||
|
lower_letter = do
|
||||||
|
letter <- choice $ map ($ lowerChar) [try . surroundParen, try . rightParen, point]
|
||||||
|
let start_number = elemIndex letter lowerAlphabet
|
||||||
|
pure $ Ordered {start_number, style = Just "a"}
|
||||||
|
upper_letter :: (Logger m, Characters s) => Parser s m ListType
|
||||||
|
upper_letter = do
|
||||||
|
letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
|
||||||
|
let start_number = elemIndex letter upperAlphabet
|
||||||
|
pure $ Ordered {start_number, style = Just "A"}
|
||||||
|
lower_roman_numeral = do
|
||||||
|
numeral <- choice $ map ($ some (satisfy (`elem` lowerRoman))) [try . surroundParen, try . rightParen, point]
|
||||||
|
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
||||||
|
pure $ Ordered {start_number, style = Just "i"}
|
||||||
|
upper_roman_numeral = do
|
||||||
|
numeral <- choice $ map ($ some (satisfy (`elem` upperRoman))) [try . surroundParen, try . rightParen, point]
|
||||||
|
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
||||||
|
pure $ Ordered {start_number, style = Just "I"}
|
||||||
|
|
||||||
|
surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||||
|
surroundParen parser = do
|
||||||
|
char '('
|
||||||
|
ret <- parser
|
||||||
|
char ')'
|
||||||
|
space
|
||||||
|
pure ret
|
||||||
|
rightParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||||
|
rightParen parser = parser <* char ')' <* space
|
||||||
|
point :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||||
|
point parser = parser <* char '.' <* space
|
||||||
|
|
||||||
|
bullet :: (Logger m, Characters s) => Parser s m ListType
|
||||||
|
bullet = error "todo"
|
||||||
|
|
||||||
|
taskMarker :: (Logger m, Characters s) => Parser s m Char
|
||||||
|
taskMarker = char '[' *> choice (map char " xX") <* char ']' <* space
|
||||||
|
|
||||||
codeFence :: (Logger m, Characters s) => Parser s m ()
|
codeFence :: (Logger m, Characters s) => Parser s m ()
|
||||||
codeFence = void $ string "```"
|
codeFence = void $ string "```"
|
||||||
|
|
|
||||||
221
src/Djot/Lists.hs
Normal file
221
src/Djot/Lists.hs
Normal file
|
|
@ -0,0 +1,221 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
-- This is written by Gemini barring minor adjustments made manually to fit
|
||||||
|
-- into the Parsing utilities and logger stuff
|
||||||
|
|
||||||
|
module Djot.Lists (List, djotList, TaskList, djotTaskList) where
|
||||||
|
|
||||||
|
import Data.Functor (void)
|
||||||
|
import Data.Text (Text)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import Data.Void
|
||||||
|
import Logger (Logger (logCallStack, logDebug, logError))
|
||||||
|
import Text.Megaparsec
|
||||||
|
import Text.Megaparsec.Char
|
||||||
|
import qualified Text.Megaparsec.Char.Lexer as L
|
||||||
|
import Utilities.Parsing
|
||||||
|
|
||||||
|
-- | Djot supports three types of ordered list delimiters.
|
||||||
|
data Delimiter = Period | RightParen | Enclosed -- e.g., '1.', '1)', '(1)'
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
-- | The style of the list marker.
|
||||||
|
data MarkerStyle
|
||||||
|
= Bullet Char -- '*', '-', '+'
|
||||||
|
| Decimal Delimiter -- '1'
|
||||||
|
| LowerAlpha Delimiter -- 'a'
|
||||||
|
| UpperAlpha Delimiter -- 'A'
|
||||||
|
| LowerRoman Delimiter -- 'i'
|
||||||
|
| UpperRoman Delimiter -- 'I'
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
-- | A single item in a list.
|
||||||
|
data ListItem = ListItem
|
||||||
|
{ itemStyle :: MarkerStyle,
|
||||||
|
itemIndent :: Int, -- Column where the block content starts
|
||||||
|
itemContent :: [Text] -- Simplified: usually this would be [Block]
|
||||||
|
}
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
-- | A full list is a collection of items sharing the exact same MarkerStyle.
|
||||||
|
data List = List MarkerStyle [ListItem]
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
-- | Parses whitespace but does not consume newlines.
|
||||||
|
sc :: (Logger m, Characters s) => Parser s m ()
|
||||||
|
sc = hidden (void $ takeWhileP (Just "space") (== ' '))
|
||||||
|
|
||||||
|
-- | Parses the punctuation after/around an ordered list number/letter.
|
||||||
|
-- E.g., takes a parser for the numeral (like `lowerRoman`) and applies delimiters.
|
||||||
|
orderedDelimiter :: (Logger m, Characters s) => Parser s m a -> Parser s m (a, Delimiter)
|
||||||
|
orderedDelimiter inner =
|
||||||
|
(try (between (char '(') (char ')') inner) >>= \x -> return (x, Enclosed))
|
||||||
|
<|> (try (inner <* char ')') >>= \x -> return (x, RightParen))
|
||||||
|
<|> (try (inner <* char '.') >>= \x -> return (x, Period))
|
||||||
|
|
||||||
|
-- | Parses a bullet marker.
|
||||||
|
bulletMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
|
bulletMarker = Bullet <$> oneOf ['*', '-', '+']
|
||||||
|
|
||||||
|
-- | Decimals: '1.', '1)', '(1)'
|
||||||
|
decimalMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
|
decimalMarker = Decimal . snd <$> orderedDelimiter L.decimal
|
||||||
|
|
||||||
|
-- | Lower/Upper Alpha: 'a.', 'A)', etc.
|
||||||
|
-- (Note: In a full implementation, you must ensure these are single letters)
|
||||||
|
lowerAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
|
lowerAlphaMarker = LowerAlpha . snd <$> orderedDelimiter lowerChar
|
||||||
|
|
||||||
|
upperAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
|
upperAlphaMarker = UpperAlpha . snd <$> orderedDelimiter upperChar
|
||||||
|
|
||||||
|
-- | Lower/Upper Roman
|
||||||
|
-- Uses a simplified roman numeral regex/parser for illustration.
|
||||||
|
romanLower :: (Logger m, Characters s) => Parser s m Text
|
||||||
|
romanLower = toText <$> takeWhile1P (Just "lower roman") (`elem` ['i', 'v', 'x', 'l', 'c', 'd', 'm'])
|
||||||
|
|
||||||
|
romanUpper :: (Logger m, Characters s) => Parser s m Text
|
||||||
|
romanUpper = toText <$> takeWhile1P (Just "upper roman") (`elem` ['I', 'V', 'X', 'L', 'C', 'D', 'M'])
|
||||||
|
|
||||||
|
lowerRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
|
lowerRomanMarker = LowerRoman . snd <$> orderedDelimiter romanLower
|
||||||
|
|
||||||
|
upperRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
|
upperRomanMarker = UpperRoman . snd <$> orderedDelimiter romanUpper
|
||||||
|
|
||||||
|
-- | Attempts to parse any valid Djot list marker.
|
||||||
|
listMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
|
listMarker =
|
||||||
|
choice
|
||||||
|
[ bulletMarker,
|
||||||
|
decimalMarker,
|
||||||
|
lowerRomanMarker, -- Order matters here to prevent 'i.' parsing as LowerAlpha
|
||||||
|
lowerAlphaMarker,
|
||||||
|
upperRomanMarker,
|
||||||
|
upperAlphaMarker
|
||||||
|
]
|
||||||
|
|
||||||
|
-- | Parses a single list item.
|
||||||
|
listItem :: (Logger m, Characters s) => Parser s m ListItem
|
||||||
|
listItem = do
|
||||||
|
-- 1. Grab the current indentation level before the item
|
||||||
|
startPos <- L.indentLevel
|
||||||
|
|
||||||
|
-- 2. Parse the marker
|
||||||
|
marker <- listMarker
|
||||||
|
|
||||||
|
-- 3. Consume spaces between marker and content (mandatory in Djot)
|
||||||
|
void $ some (char ' ')
|
||||||
|
|
||||||
|
-- 4. Find the column where the actual content begins.
|
||||||
|
-- Djot relies on this column to group subsequent lines.
|
||||||
|
contentPos <- L.indentLevel
|
||||||
|
|
||||||
|
-- 5. Parse the block content based on indentation.
|
||||||
|
-- We use `L.indentBlock` or manually grab lines that are indented >= contentPos.
|
||||||
|
content <- some (indentedLine contentPos)
|
||||||
|
|
||||||
|
return $ ListItem marker (unPos contentPos) content
|
||||||
|
|
||||||
|
-- | Helper to grab a line that respects the item's indentation level.
|
||||||
|
indentedLine :: (Logger m, Characters s) => Pos -> Parser s m Text
|
||||||
|
indentedLine minIndent = do
|
||||||
|
currentIndent <- L.indentLevel
|
||||||
|
if currentIndent >= minIndent
|
||||||
|
then do
|
||||||
|
line <- takeWhileP (Just "character") (/= '\n')
|
||||||
|
void (char '\n') <|> eof
|
||||||
|
return $ toText line
|
||||||
|
else fail "Line not indented enough for list item"
|
||||||
|
|
||||||
|
-- | Parses a complete list by ensuring all subsequent items share the same MarkerStyle.
|
||||||
|
djotList :: (Logger m, Characters s) => Parser s m List
|
||||||
|
djotList = do
|
||||||
|
firstItem <- listItem
|
||||||
|
let style = itemStyle firstItem
|
||||||
|
|
||||||
|
-- Try to parse more items, but ONLY if they have the exact same style.
|
||||||
|
restItems <- many (try $ itemWithStyle style)
|
||||||
|
|
||||||
|
return $ List style (firstItem : restItems)
|
||||||
|
|
||||||
|
-- | Parses an item only if it matches the expected style.
|
||||||
|
itemWithStyle :: (Logger m, Characters s) => MarkerStyle -> Parser s m ListItem
|
||||||
|
itemWithStyle expectedStyle = do
|
||||||
|
item <- listItem
|
||||||
|
if itemStyle item == expectedStyle
|
||||||
|
then return item
|
||||||
|
else fail "List marker style mismatch"
|
||||||
|
|
||||||
|
-- | Represents the state of the task checkbox.
|
||||||
|
data TaskStatus = Unchecked | Checked
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
-- | A specific list item that is guaranteed to be a task.
|
||||||
|
data TaskListItem = TaskListItem
|
||||||
|
{ taskStyle :: MarkerStyle,
|
||||||
|
taskIndent :: Int,
|
||||||
|
taskStatus :: TaskStatus,
|
||||||
|
taskContent :: [Text]
|
||||||
|
}
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
-- | A list composed entirely of TaskListItems.
|
||||||
|
data TaskList = TaskList MarkerStyle [TaskListItem]
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
-- | Parses the Djot task checkbox and returns its status.
|
||||||
|
taskCheckbox :: (Logger m, Characters s) => Parser s m TaskStatus
|
||||||
|
taskCheckbox = do
|
||||||
|
void $ char '['
|
||||||
|
status <-
|
||||||
|
(char ' ' *> pure Unchecked)
|
||||||
|
<|> (oneOf ['x', 'X'] *> pure Checked)
|
||||||
|
void $ char ']'
|
||||||
|
|
||||||
|
-- Djot requires a space after the checkbox (or a newline if the item is blank)
|
||||||
|
void (char ' ') <|> lookAhead (void $ char '\n') <|> eof
|
||||||
|
|
||||||
|
pure status
|
||||||
|
|
||||||
|
-- | Parses a single task list item.
|
||||||
|
taskListItem :: (Logger m, Characters s) => Parser s m TaskListItem
|
||||||
|
taskListItem = do
|
||||||
|
_startPos <- L.indentLevel
|
||||||
|
|
||||||
|
-- 1. Parse the list marker (bullets or ordered)
|
||||||
|
marker <- listMarker
|
||||||
|
|
||||||
|
-- 2. Consume mandatory spaces after the marker
|
||||||
|
void $ some (char ' ')
|
||||||
|
|
||||||
|
-- 3. Capture the indentation level right HERE.
|
||||||
|
-- The '[' of the checkbox dictates the column that subsequent lines must respect.
|
||||||
|
contentPos <- L.indentLevel
|
||||||
|
|
||||||
|
-- 4. Parse the mandatory checkbox (if this fails, it's not a task list item)
|
||||||
|
status <- taskCheckbox
|
||||||
|
|
||||||
|
-- 5. Parse the content, ensuring continuation lines respect the `contentPos`
|
||||||
|
-- (Using the same `indentedLine` helper from the previous parser)
|
||||||
|
content <- some (indentedLine contentPos)
|
||||||
|
|
||||||
|
pure $ TaskListItem marker (unPos contentPos) status content
|
||||||
|
|
||||||
|
-- | Parses a complete task list, ensuring consistent marker styles.
|
||||||
|
djotTaskList :: (Logger m, Characters s) => Parser s m TaskList
|
||||||
|
djotTaskList = do
|
||||||
|
firstItem <- taskListItem
|
||||||
|
let style = taskStyle firstItem
|
||||||
|
|
||||||
|
restItems <- many (try $ taskItemWithStyle style)
|
||||||
|
|
||||||
|
pure $ TaskList style (firstItem : restItems)
|
||||||
|
|
||||||
|
-- | Helper to strictly enforce uniform marker styles across the task list.
|
||||||
|
taskItemWithStyle :: (Logger m, Characters s) => MarkerStyle -> Parser s m TaskListItem
|
||||||
|
taskItemWithStyle expectedStyle = do
|
||||||
|
item <- taskListItem
|
||||||
|
if taskStyle item == expectedStyle
|
||||||
|
then pure item
|
||||||
|
else fail "Task list marker style mismatch"
|
||||||
Loading…
Reference in a new issue