Compare commits
No commits in common. "aa69b3f6ef2938f64e3525311dafb32674b63c26" and "b6a689868be4bd883e0d8aef0fa67b630e893367" have entirely different histories.
aa69b3f6ef
...
b6a689868b
3 changed files with 320 additions and 6 deletions
|
|
@ -27,7 +27,7 @@ common warnings
|
|||
|
||||
library
|
||||
hs-source-dirs: src
|
||||
exposed-modules: Djot Markdown HTML Logger IR Logger.Shake Psb.Main Utilities Utilities.FilePath Utilities.Action Utilities.Javascript Utilities.CSS Templates Types Config Utilities.Bundling
|
||||
exposed-modules: Djot Djot.Lists Markdown HTML Logger IR Logger.Shake Psb.Main Utilities Utilities.FilePath Utilities.Action Utilities.Javascript Utilities.CSS Templates Types Config Utilities.Bundling
|
||||
other-modules: Utilities.Parsing
|
||||
build-depends: base, mustache >=2.4.2, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text >= 2.1.2, time, unordered-containers, yaml, megaparsec >= 9.7.0, transformers >= 0.6.2, bytestring
|
||||
default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances
|
||||
|
|
|
|||
103
src/Djot.hs
103
src/Djot.hs
|
|
@ -11,12 +11,15 @@ module Djot
|
|||
where
|
||||
|
||||
import Control.Applicative (many, optional, some, (<|>))
|
||||
import Control.Monad.ST (runST)
|
||||
import Data.Foldable (for_)
|
||||
import Data.Functor (void, (<$>))
|
||||
import Data.List (elemIndex)
|
||||
import qualified Data.List.NonEmpty as NE
|
||||
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import qualified Djot.Lists
|
||||
import IR
|
||||
import Logger (Logger (logCallStack, logDebug, logError))
|
||||
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, satisfy, sepBy, setErrorOffset, someTill)
|
||||
|
|
@ -97,15 +100,105 @@ blockQuote attrs = do
|
|||
|
||||
listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||
listBlock attrs = do
|
||||
error "todo"
|
||||
list <- Djot.Lists.djotList
|
||||
error ""
|
||||
|
||||
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||
taskListBlock attrs = do
|
||||
error "todo"
|
||||
error "this is also probably fundamentally wrong"
|
||||
startOffset <- getOffset
|
||||
bullet
|
||||
first_item <- task_item
|
||||
rem_items <- manyTill (listMarker *> task_item) $ lookAhead blockSeparator
|
||||
pure $ TaskList (TL {items = first_item : rem_items}) attrs
|
||||
where
|
||||
task_item = error "todo"
|
||||
|
||||
descriptionListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||
descriptionListBlock attrs = do
|
||||
error "todo"
|
||||
lowerAlphabet :: [Char]
|
||||
lowerAlphabet = ['a' .. 'z']
|
||||
|
||||
upperAlphabet :: [Char]
|
||||
upperAlphabet = ['A' .. 'Z']
|
||||
|
||||
-- inner parsing of roman numerals done via Gemini due to lazy
|
||||
romanValue :: Char -> Int
|
||||
romanValue 'I' = 1
|
||||
romanValue 'V' = 5
|
||||
romanValue 'X' = 10
|
||||
romanValue 'L' = 50
|
||||
romanValue 'C' = 100
|
||||
romanValue 'D' = 500
|
||||
romanValue 'M' = 1000
|
||||
romanValue _ = 0
|
||||
|
||||
romanToInt :: Text -> Int
|
||||
romanToInt s = runST $ do
|
||||
total <- newSTRef 0
|
||||
prevVal <- newSTRef 0
|
||||
|
||||
for_ (T.unpack s) $ \char -> do
|
||||
let curr = romanValue char
|
||||
p <- readSTRef prevVal
|
||||
|
||||
-- If current > previous (e.g., IV), we subtract the previous twice
|
||||
if curr > p
|
||||
then modifySTRef' total (\t -> t + curr - 2 * p)
|
||||
else modifySTRef' total (+ curr)
|
||||
|
||||
writeSTRef prevVal curr
|
||||
|
||||
readSTRef total
|
||||
|
||||
upperRoman :: [Char]
|
||||
upperRoman = "IVXLCDM"
|
||||
|
||||
lowerRoman :: [Char]
|
||||
lowerRoman = T.unpack $ T.toLower $ T.pack upperRoman
|
||||
|
||||
-- consumes whitespace as well for convenience in element parser
|
||||
listMarker :: (Logger m, Characters s) => Parser s m ListType
|
||||
listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
|
||||
where
|
||||
decimal = do
|
||||
num <- choice $ map ($ some numberChar) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = Just $ read num
|
||||
pure $ Ordered {start_number, style = Just "1"}
|
||||
lower_letter :: (Logger m, Characters s) => Parser s m ListType
|
||||
lower_letter = do
|
||||
letter <- choice $ map ($ lowerChar) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = elemIndex letter lowerAlphabet
|
||||
pure $ Ordered {start_number, style = Just "a"}
|
||||
upper_letter :: (Logger m, Characters s) => Parser s m ListType
|
||||
upper_letter = do
|
||||
letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = elemIndex letter upperAlphabet
|
||||
pure $ Ordered {start_number, style = Just "A"}
|
||||
lower_roman_numeral = do
|
||||
numeral <- choice $ map ($ some (satisfy (`elem` lowerRoman))) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
||||
pure $ Ordered {start_number, style = Just "i"}
|
||||
upper_roman_numeral = do
|
||||
numeral <- choice $ map ($ some (satisfy (`elem` upperRoman))) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
||||
pure $ Ordered {start_number, style = Just "I"}
|
||||
|
||||
surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||
surroundParen parser = do
|
||||
char '('
|
||||
ret <- parser
|
||||
char ')'
|
||||
space
|
||||
pure ret
|
||||
rightParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||
rightParen parser = parser <* char ')' <* space
|
||||
point :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||
point parser = parser <* char '.' <* space
|
||||
|
||||
bullet :: (Logger m, Characters s) => Parser s m ListType
|
||||
bullet = error "todo"
|
||||
|
||||
taskMarker :: (Logger m, Characters s) => Parser s m Char
|
||||
taskMarker = char '[' *> choice (map char " xX") <* char ']' <* space
|
||||
|
||||
codeFence :: (Logger m, Characters s) => Parser s m ()
|
||||
codeFence = void $ string "```"
|
||||
|
|
|
|||
221
src/Djot/Lists.hs
Normal file
221
src/Djot/Lists.hs
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
-- This is written by Gemini barring minor adjustments made manually to fit
|
||||
-- into the Parsing utilities and logger stuff
|
||||
|
||||
module Djot.Lists (List, djotList, TaskList, djotTaskList) where
|
||||
|
||||
import Data.Functor (void)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Data.Void
|
||||
import Logger (Logger (logCallStack, logDebug, logError))
|
||||
import Text.Megaparsec
|
||||
import Text.Megaparsec.Char
|
||||
import qualified Text.Megaparsec.Char.Lexer as L
|
||||
import Utilities.Parsing
|
||||
|
||||
-- | Djot supports three types of ordered list delimiters.
|
||||
data Delimiter = Period | RightParen | Enclosed -- e.g., '1.', '1)', '(1)'
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | The style of the list marker.
|
||||
data MarkerStyle
|
||||
= Bullet Char -- '*', '-', '+'
|
||||
| Decimal Delimiter -- '1'
|
||||
| LowerAlpha Delimiter -- 'a'
|
||||
| UpperAlpha Delimiter -- 'A'
|
||||
| LowerRoman Delimiter -- 'i'
|
||||
| UpperRoman Delimiter -- 'I'
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | A single item in a list.
|
||||
data ListItem = ListItem
|
||||
{ itemStyle :: MarkerStyle,
|
||||
itemIndent :: Int, -- Column where the block content starts
|
||||
itemContent :: [Text] -- Simplified: usually this would be [Block]
|
||||
}
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | A full list is a collection of items sharing the exact same MarkerStyle.
|
||||
data List = List MarkerStyle [ListItem]
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | Parses whitespace but does not consume newlines.
|
||||
sc :: (Logger m, Characters s) => Parser s m ()
|
||||
sc = hidden (void $ takeWhileP (Just "space") (== ' '))
|
||||
|
||||
-- | Parses the punctuation after/around an ordered list number/letter.
|
||||
-- E.g., takes a parser for the numeral (like `lowerRoman`) and applies delimiters.
|
||||
orderedDelimiter :: (Logger m, Characters s) => Parser s m a -> Parser s m (a, Delimiter)
|
||||
orderedDelimiter inner =
|
||||
(try (between (char '(') (char ')') inner) >>= \x -> return (x, Enclosed))
|
||||
<|> (try (inner <* char ')') >>= \x -> return (x, RightParen))
|
||||
<|> (try (inner <* char '.') >>= \x -> return (x, Period))
|
||||
|
||||
-- | Parses a bullet marker.
|
||||
bulletMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
bulletMarker = Bullet <$> oneOf ['*', '-', '+']
|
||||
|
||||
-- | Decimals: '1.', '1)', '(1)'
|
||||
decimalMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
decimalMarker = Decimal . snd <$> orderedDelimiter L.decimal
|
||||
|
||||
-- | Lower/Upper Alpha: 'a.', 'A)', etc.
|
||||
-- (Note: In a full implementation, you must ensure these are single letters)
|
||||
lowerAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
lowerAlphaMarker = LowerAlpha . snd <$> orderedDelimiter lowerChar
|
||||
|
||||
upperAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
upperAlphaMarker = UpperAlpha . snd <$> orderedDelimiter upperChar
|
||||
|
||||
-- | Lower/Upper Roman
|
||||
-- Uses a simplified roman numeral regex/parser for illustration.
|
||||
romanLower :: (Logger m, Characters s) => Parser s m Text
|
||||
romanLower = toText <$> takeWhile1P (Just "lower roman") (`elem` ['i', 'v', 'x', 'l', 'c', 'd', 'm'])
|
||||
|
||||
romanUpper :: (Logger m, Characters s) => Parser s m Text
|
||||
romanUpper = toText <$> takeWhile1P (Just "upper roman") (`elem` ['I', 'V', 'X', 'L', 'C', 'D', 'M'])
|
||||
|
||||
lowerRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
lowerRomanMarker = LowerRoman . snd <$> orderedDelimiter romanLower
|
||||
|
||||
upperRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
upperRomanMarker = UpperRoman . snd <$> orderedDelimiter romanUpper
|
||||
|
||||
-- | Attempts to parse any valid Djot list marker.
|
||||
listMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
listMarker =
|
||||
choice
|
||||
[ bulletMarker,
|
||||
decimalMarker,
|
||||
lowerRomanMarker, -- Order matters here to prevent 'i.' parsing as LowerAlpha
|
||||
lowerAlphaMarker,
|
||||
upperRomanMarker,
|
||||
upperAlphaMarker
|
||||
]
|
||||
|
||||
-- | Parses a single list item.
|
||||
listItem :: (Logger m, Characters s) => Parser s m ListItem
|
||||
listItem = do
|
||||
-- 1. Grab the current indentation level before the item
|
||||
startPos <- L.indentLevel
|
||||
|
||||
-- 2. Parse the marker
|
||||
marker <- listMarker
|
||||
|
||||
-- 3. Consume spaces between marker and content (mandatory in Djot)
|
||||
void $ some (char ' ')
|
||||
|
||||
-- 4. Find the column where the actual content begins.
|
||||
-- Djot relies on this column to group subsequent lines.
|
||||
contentPos <- L.indentLevel
|
||||
|
||||
-- 5. Parse the block content based on indentation.
|
||||
-- We use `L.indentBlock` or manually grab lines that are indented >= contentPos.
|
||||
content <- some (indentedLine contentPos)
|
||||
|
||||
return $ ListItem marker (unPos contentPos) content
|
||||
|
||||
-- | Helper to grab a line that respects the item's indentation level.
|
||||
indentedLine :: (Logger m, Characters s) => Pos -> Parser s m Text
|
||||
indentedLine minIndent = do
|
||||
currentIndent <- L.indentLevel
|
||||
if currentIndent >= minIndent
|
||||
then do
|
||||
line <- takeWhileP (Just "character") (/= '\n')
|
||||
void (char '\n') <|> eof
|
||||
return $ toText line
|
||||
else fail "Line not indented enough for list item"
|
||||
|
||||
-- | Parses a complete list by ensuring all subsequent items share the same MarkerStyle.
|
||||
djotList :: (Logger m, Characters s) => Parser s m List
|
||||
djotList = do
|
||||
firstItem <- listItem
|
||||
let style = itemStyle firstItem
|
||||
|
||||
-- Try to parse more items, but ONLY if they have the exact same style.
|
||||
restItems <- many (try $ itemWithStyle style)
|
||||
|
||||
return $ List style (firstItem : restItems)
|
||||
|
||||
-- | Parses an item only if it matches the expected style.
|
||||
itemWithStyle :: (Logger m, Characters s) => MarkerStyle -> Parser s m ListItem
|
||||
itemWithStyle expectedStyle = do
|
||||
item <- listItem
|
||||
if itemStyle item == expectedStyle
|
||||
then return item
|
||||
else fail "List marker style mismatch"
|
||||
|
||||
-- | Represents the state of the task checkbox.
|
||||
data TaskStatus = Unchecked | Checked
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | A specific list item that is guaranteed to be a task.
|
||||
data TaskListItem = TaskListItem
|
||||
{ taskStyle :: MarkerStyle,
|
||||
taskIndent :: Int,
|
||||
taskStatus :: TaskStatus,
|
||||
taskContent :: [Text]
|
||||
}
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | A list composed entirely of TaskListItems.
|
||||
data TaskList = TaskList MarkerStyle [TaskListItem]
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | Parses the Djot task checkbox and returns its status.
|
||||
taskCheckbox :: (Logger m, Characters s) => Parser s m TaskStatus
|
||||
taskCheckbox = do
|
||||
void $ char '['
|
||||
status <-
|
||||
(char ' ' *> pure Unchecked)
|
||||
<|> (oneOf ['x', 'X'] *> pure Checked)
|
||||
void $ char ']'
|
||||
|
||||
-- Djot requires a space after the checkbox (or a newline if the item is blank)
|
||||
void (char ' ') <|> lookAhead (void $ char '\n') <|> eof
|
||||
|
||||
pure status
|
||||
|
||||
-- | Parses a single task list item.
|
||||
taskListItem :: (Logger m, Characters s) => Parser s m TaskListItem
|
||||
taskListItem = do
|
||||
_startPos <- L.indentLevel
|
||||
|
||||
-- 1. Parse the list marker (bullets or ordered)
|
||||
marker <- listMarker
|
||||
|
||||
-- 2. Consume mandatory spaces after the marker
|
||||
void $ some (char ' ')
|
||||
|
||||
-- 3. Capture the indentation level right HERE.
|
||||
-- The '[' of the checkbox dictates the column that subsequent lines must respect.
|
||||
contentPos <- L.indentLevel
|
||||
|
||||
-- 4. Parse the mandatory checkbox (if this fails, it's not a task list item)
|
||||
status <- taskCheckbox
|
||||
|
||||
-- 5. Parse the content, ensuring continuation lines respect the `contentPos`
|
||||
-- (Using the same `indentedLine` helper from the previous parser)
|
||||
content <- some (indentedLine contentPos)
|
||||
|
||||
pure $ TaskListItem marker (unPos contentPos) status content
|
||||
|
||||
-- | Parses a complete task list, ensuring consistent marker styles.
|
||||
djotTaskList :: (Logger m, Characters s) => Parser s m TaskList
|
||||
djotTaskList = do
|
||||
firstItem <- taskListItem
|
||||
let style = taskStyle firstItem
|
||||
|
||||
restItems <- many (try $ taskItemWithStyle style)
|
||||
|
||||
pure $ TaskList style (firstItem : restItems)
|
||||
|
||||
-- | Helper to strictly enforce uniform marker styles across the task list.
|
||||
taskItemWithStyle :: (Logger m, Characters s) => MarkerStyle -> Parser s m TaskListItem
|
||||
taskItemWithStyle expectedStyle = do
|
||||
item <- taskListItem
|
||||
if taskStyle item == expectedStyle
|
||||
then pure item
|
||||
else fail "Task list marker style mismatch"
|
||||
Loading…
Reference in a new issue