diff --git a/src/Djot.hs b/src/Djot.hs index 024492e..f9f982d 100644 --- a/src/Djot.hs +++ b/src/Djot.hs @@ -11,12 +11,10 @@ module Djot where import Control.Applicative (many, optional, some, (<|>)) -import Control.Monad.ST (runST) import Data.Foldable (for_) import Data.Functor (void, (<$>)) import Data.List (elemIndex) import qualified Data.List.NonEmpty as NE -import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef) import Data.Text (Text) import qualified Data.Text as T import qualified Djot.Lists @@ -100,105 +98,15 @@ blockQuote attrs = do listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element listBlock attrs = do - list <- Djot.Lists.djotList - error "" + error "todo" taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element taskListBlock attrs = do - error "this is also probably fundamentally wrong" - startOffset <- getOffset - bullet - first_item <- task_item - rem_items <- manyTill (listMarker *> task_item) $ lookAhead blockSeparator - pure $ TaskList (TL {items = first_item : rem_items}) attrs - where - task_item = error "todo" + error "todo" -lowerAlphabet :: [Char] -lowerAlphabet = ['a' .. 'z'] - -upperAlphabet :: [Char] -upperAlphabet = ['A' .. 'Z'] - --- inner parsing of roman numerals done via Gemini due to lazy -romanValue :: Char -> Int -romanValue 'I' = 1 -romanValue 'V' = 5 -romanValue 'X' = 10 -romanValue 'L' = 50 -romanValue 'C' = 100 -romanValue 'D' = 500 -romanValue 'M' = 1000 -romanValue _ = 0 - -romanToInt :: Text -> Int -romanToInt s = runST $ do - total <- newSTRef 0 - prevVal <- newSTRef 0 - - for_ (T.unpack s) $ \char -> do - let curr = romanValue char - p <- readSTRef prevVal - - -- If current > previous (e.g., IV), we subtract the previous twice - if curr > p - then modifySTRef' total (\t -> t + curr - 2 * p) - else modifySTRef' total (+ curr) - - writeSTRef prevVal curr - - readSTRef total - -upperRoman :: [Char] -upperRoman = "IVXLCDM" - -lowerRoman :: [Char] -lowerRoman = T.unpack $ T.toLower $ T.pack upperRoman - --- consumes whitespace as well for convenience in element parser -listMarker :: (Logger m, Characters s) => Parser s m ListType -listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space - where - decimal = do - num <- choice $ map ($ some numberChar) [try . surroundParen, try . rightParen, point] - let start_number = Just $ read num - pure $ Ordered {start_number, style = Just "1"} - lower_letter :: (Logger m, Characters s) => Parser s m ListType - lower_letter = do - letter <- choice $ map ($ lowerChar) [try . surroundParen, try . rightParen, point] - let start_number = elemIndex letter lowerAlphabet - pure $ Ordered {start_number, style = Just "a"} - upper_letter :: (Logger m, Characters s) => Parser s m ListType - upper_letter = do - letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point] - let start_number = elemIndex letter upperAlphabet - pure $ Ordered {start_number, style = Just "A"} - lower_roman_numeral = do - numeral <- choice $ map ($ some (satisfy (`elem` lowerRoman))) [try . surroundParen, try . rightParen, point] - let start_number = Just $ romanToInt $ T.toUpper $ toText numeral - pure $ Ordered {start_number, style = Just "i"} - upper_roman_numeral = do - numeral <- choice $ map ($ some (satisfy (`elem` upperRoman))) [try . surroundParen, try . rightParen, point] - let start_number = Just $ romanToInt $ T.toUpper $ toText numeral - pure $ Ordered {start_number, style = Just "I"} - - surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a - surroundParen parser = do - char '(' - ret <- parser - char ')' - space - pure ret - rightParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a - rightParen parser = parser <* char ')' <* space - point :: (Logger m, Characters s) => Parser s m a -> Parser s m a - point parser = parser <* char '.' <* space - -bullet :: (Logger m, Characters s) => Parser s m ListType -bullet = error "todo" - -taskMarker :: (Logger m, Characters s) => Parser s m Char -taskMarker = char '[' *> choice (map char " xX") <* char ']' <* space +descriptionListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element +descriptionListBlock attrs = do + error "todo" codeFence :: (Logger m, Characters s) => Parser s m () codeFence = void $ string "```" diff --git a/src/Djot/Lists.hs b/src/Djot/Lists.hs index ac2579c..1cc72f1 100644 --- a/src/Djot/Lists.hs +++ b/src/Djot/Lists.hs @@ -2,13 +2,21 @@ -- This is written by Gemini barring minor adjustments made manually to fit -- into the Parsing utilities and logger stuff +-- +-- the Djot root module just does +-- type translation currently -module Djot.Lists (List, djotList, TaskList, djotTaskList) where +module Djot.Lists (List (List), djotList, TaskList (TaskList), djotTaskList) where +import Control.Monad.ST (runST) +import Data.Char (isUpper, ord) +import Data.Foldable (for_) import Data.Functor (void) +import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef) import Data.Text (Text) import qualified Data.Text as T import Data.Void +import qualified IR import Logger (Logger (logCallStack, logDebug, logError)) import Text.Megaparsec import Text.Megaparsec.Char @@ -22,13 +30,17 @@ data Delimiter = Period | RightParen | Enclosed -- e.g., '1.', '1)', '(1)' -- | The style of the list marker. data MarkerStyle = Bullet Char -- '*', '-', '+' - | Decimal Delimiter -- '1' - | LowerAlpha Delimiter -- 'a' - | UpperAlpha Delimiter -- 'A' - | LowerRoman Delimiter -- 'i' - | UpperRoman Delimiter -- 'I' + | Decimal Int Delimiter -- '1' + | LowerAlpha Int Delimiter -- 'a' + | UpperAlpha Int Delimiter -- 'A' + | LowerRoman Int Delimiter -- 'i' + | UpperRoman Int Delimiter -- 'I' deriving (Show, Eq) +intoIRType :: MarkerStyle -> IR.ListType +intoIRType (Bullet _) = IR.Unordered {style = Nothing} +intoIRType (Decimal start _) = IR.Ordered {style = Just "1", start_number = Just start} + -- | A single item in a list. data ListItem = ListItem { itemStyle :: MarkerStyle, @@ -38,7 +50,7 @@ data ListItem = ListItem deriving (Show, Eq) -- | A full list is a collection of items sharing the exact same MarkerStyle. -data List = List MarkerStyle [ListItem] +data List = List MarkerStyle [Djot.Lists.ListItem] deriving (Show, Eq) -- | Parses whitespace but does not consume newlines. @@ -59,15 +71,21 @@ bulletMarker = Bullet <$> oneOf ['*', '-', '+'] -- | Decimals: '1.', '1)', '(1)' decimalMarker :: (Logger m, Characters s) => Parser s m MarkerStyle -decimalMarker = Decimal . snd <$> orderedDelimiter L.decimal +decimalMarker = do + (val, delim) <- orderedDelimiter L.decimal + return $ Decimal val delim -- | Lower/Upper Alpha: 'a.', 'A)', etc. -- (Note: In a full implementation, you must ensure these are single letters) lowerAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle -lowerAlphaMarker = LowerAlpha . snd <$> orderedDelimiter lowerChar +lowerAlphaMarker = do + (c, delim) <- orderedDelimiter lowerChar + return $ LowerAlpha (alphaToOffset c) delim upperAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle -upperAlphaMarker = UpperAlpha . snd <$> orderedDelimiter upperChar +upperAlphaMarker = do + (c, delim) <- orderedDelimiter upperChar + return $ UpperAlpha (alphaToOffset c) delim -- | Lower/Upper Roman -- Uses a simplified roman numeral regex/parser for illustration. @@ -78,10 +96,16 @@ romanUpper :: (Logger m, Characters s) => Parser s m Text romanUpper = toText <$> takeWhile1P (Just "upper roman") (`elem` ['I', 'V', 'X', 'L', 'C', 'D', 'M']) lowerRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle -lowerRomanMarker = LowerRoman . snd <$> orderedDelimiter romanLower +lowerRomanMarker = do + (numeral, delim) <- orderedDelimiter romanLower + let num = romanToInt numeral + pure $ LowerRoman num delim upperRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle -upperRomanMarker = UpperRoman . snd <$> orderedDelimiter romanUpper +upperRomanMarker = do + (numeral, delim) <- orderedDelimiter romanUpper + let num = romanToInt numeral + pure $ UpperRoman num delim -- | Attempts to parse any valid Djot list marker. listMarker :: (Logger m, Characters s) => Parser s m MarkerStyle @@ -96,7 +120,7 @@ listMarker = ] -- | Parses a single list item. -listItem :: (Logger m, Characters s) => Parser s m ListItem +listItem :: (Logger m, Characters s) => Parser s m Djot.Lists.ListItem listItem = do -- 1. Grab the current indentation level before the item startPos <- L.indentLevel @@ -219,3 +243,43 @@ taskItemWithStyle expectedStyle = do if taskStyle item == expectedStyle then pure item else fail "Task list marker style mismatch" + +-- | Converts a single alpha char to its 1-indexed position (a=1, b=2...) +alphaToOffset :: Char -> Int +alphaToOffset c = + let base = if isUpper c then ord 'A' else ord 'a' + in ord c - base + 1 + +-- | A real Roman numeral to Int converter (simplified for brevity) +-- In a production parser, use a library or a more robust fold. +romanToOffset :: Text -> Int +romanToOffset = romanToInt -- Implementation of Roman numeral logic + +-- inner parsing of roman numerals done via Gemini due to lazy +romanValue :: Char -> Int +romanValue 'I' = 1 +romanValue 'V' = 5 +romanValue 'X' = 10 +romanValue 'L' = 50 +romanValue 'C' = 100 +romanValue 'D' = 500 +romanValue 'M' = 1000 +romanValue _ = 0 + +romanToInt :: Text -> Int +romanToInt s = runST $ do + total <- newSTRef 0 + prevVal <- newSTRef 0 + + for_ (T.unpack s) $ \char -> do + let curr = romanValue char + p <- readSTRef prevVal + + -- If current > previous (e.g., IV), we subtract the previous twice + if curr > p + then modifySTRef' total (\t -> t + curr - 2 * p) + else modifySTRef' total (+ curr) + + writeSTRef prevVal curr + + readSTRef total