realizing LLM code is inadequate
This commit is contained in:
parent
b6a689868b
commit
4db3e8f912
2 changed files with 82 additions and 110 deletions
102
src/Djot.hs
102
src/Djot.hs
|
|
@ -11,12 +11,10 @@ module Djot
|
||||||
where
|
where
|
||||||
|
|
||||||
import Control.Applicative (many, optional, some, (<|>))
|
import Control.Applicative (many, optional, some, (<|>))
|
||||||
import Control.Monad.ST (runST)
|
|
||||||
import Data.Foldable (for_)
|
import Data.Foldable (for_)
|
||||||
import Data.Functor (void, (<$>))
|
import Data.Functor (void, (<$>))
|
||||||
import Data.List (elemIndex)
|
import Data.List (elemIndex)
|
||||||
import qualified Data.List.NonEmpty as NE
|
import qualified Data.List.NonEmpty as NE
|
||||||
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
|
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import qualified Djot.Lists
|
import qualified Djot.Lists
|
||||||
|
|
@ -100,105 +98,15 @@ blockQuote attrs = do
|
||||||
|
|
||||||
listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||||
listBlock attrs = do
|
listBlock attrs = do
|
||||||
list <- Djot.Lists.djotList
|
error "todo"
|
||||||
error ""
|
|
||||||
|
|
||||||
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||||
taskListBlock attrs = do
|
taskListBlock attrs = do
|
||||||
error "this is also probably fundamentally wrong"
|
error "todo"
|
||||||
startOffset <- getOffset
|
|
||||||
bullet
|
|
||||||
first_item <- task_item
|
|
||||||
rem_items <- manyTill (listMarker *> task_item) $ lookAhead blockSeparator
|
|
||||||
pure $ TaskList (TL {items = first_item : rem_items}) attrs
|
|
||||||
where
|
|
||||||
task_item = error "todo"
|
|
||||||
|
|
||||||
lowerAlphabet :: [Char]
|
descriptionListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||||
lowerAlphabet = ['a' .. 'z']
|
descriptionListBlock attrs = do
|
||||||
|
error "todo"
|
||||||
upperAlphabet :: [Char]
|
|
||||||
upperAlphabet = ['A' .. 'Z']
|
|
||||||
|
|
||||||
-- inner parsing of roman numerals done via Gemini due to lazy
|
|
||||||
romanValue :: Char -> Int
|
|
||||||
romanValue 'I' = 1
|
|
||||||
romanValue 'V' = 5
|
|
||||||
romanValue 'X' = 10
|
|
||||||
romanValue 'L' = 50
|
|
||||||
romanValue 'C' = 100
|
|
||||||
romanValue 'D' = 500
|
|
||||||
romanValue 'M' = 1000
|
|
||||||
romanValue _ = 0
|
|
||||||
|
|
||||||
romanToInt :: Text -> Int
|
|
||||||
romanToInt s = runST $ do
|
|
||||||
total <- newSTRef 0
|
|
||||||
prevVal <- newSTRef 0
|
|
||||||
|
|
||||||
for_ (T.unpack s) $ \char -> do
|
|
||||||
let curr = romanValue char
|
|
||||||
p <- readSTRef prevVal
|
|
||||||
|
|
||||||
-- If current > previous (e.g., IV), we subtract the previous twice
|
|
||||||
if curr > p
|
|
||||||
then modifySTRef' total (\t -> t + curr - 2 * p)
|
|
||||||
else modifySTRef' total (+ curr)
|
|
||||||
|
|
||||||
writeSTRef prevVal curr
|
|
||||||
|
|
||||||
readSTRef total
|
|
||||||
|
|
||||||
upperRoman :: [Char]
|
|
||||||
upperRoman = "IVXLCDM"
|
|
||||||
|
|
||||||
lowerRoman :: [Char]
|
|
||||||
lowerRoman = T.unpack $ T.toLower $ T.pack upperRoman
|
|
||||||
|
|
||||||
-- consumes whitespace as well for convenience in element parser
|
|
||||||
listMarker :: (Logger m, Characters s) => Parser s m ListType
|
|
||||||
listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
|
|
||||||
where
|
|
||||||
decimal = do
|
|
||||||
num <- choice $ map ($ some numberChar) [try . surroundParen, try . rightParen, point]
|
|
||||||
let start_number = Just $ read num
|
|
||||||
pure $ Ordered {start_number, style = Just "1"}
|
|
||||||
lower_letter :: (Logger m, Characters s) => Parser s m ListType
|
|
||||||
lower_letter = do
|
|
||||||
letter <- choice $ map ($ lowerChar) [try . surroundParen, try . rightParen, point]
|
|
||||||
let start_number = elemIndex letter lowerAlphabet
|
|
||||||
pure $ Ordered {start_number, style = Just "a"}
|
|
||||||
upper_letter :: (Logger m, Characters s) => Parser s m ListType
|
|
||||||
upper_letter = do
|
|
||||||
letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
|
|
||||||
let start_number = elemIndex letter upperAlphabet
|
|
||||||
pure $ Ordered {start_number, style = Just "A"}
|
|
||||||
lower_roman_numeral = do
|
|
||||||
numeral <- choice $ map ($ some (satisfy (`elem` lowerRoman))) [try . surroundParen, try . rightParen, point]
|
|
||||||
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
|
||||||
pure $ Ordered {start_number, style = Just "i"}
|
|
||||||
upper_roman_numeral = do
|
|
||||||
numeral <- choice $ map ($ some (satisfy (`elem` upperRoman))) [try . surroundParen, try . rightParen, point]
|
|
||||||
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
|
||||||
pure $ Ordered {start_number, style = Just "I"}
|
|
||||||
|
|
||||||
surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
|
||||||
surroundParen parser = do
|
|
||||||
char '('
|
|
||||||
ret <- parser
|
|
||||||
char ')'
|
|
||||||
space
|
|
||||||
pure ret
|
|
||||||
rightParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
|
||||||
rightParen parser = parser <* char ')' <* space
|
|
||||||
point :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
|
||||||
point parser = parser <* char '.' <* space
|
|
||||||
|
|
||||||
bullet :: (Logger m, Characters s) => Parser s m ListType
|
|
||||||
bullet = error "todo"
|
|
||||||
|
|
||||||
taskMarker :: (Logger m, Characters s) => Parser s m Char
|
|
||||||
taskMarker = char '[' *> choice (map char " xX") <* char ']' <* space
|
|
||||||
|
|
||||||
codeFence :: (Logger m, Characters s) => Parser s m ()
|
codeFence :: (Logger m, Characters s) => Parser s m ()
|
||||||
codeFence = void $ string "```"
|
codeFence = void $ string "```"
|
||||||
|
|
|
||||||
|
|
@ -2,13 +2,21 @@
|
||||||
|
|
||||||
-- This is written by Gemini barring minor adjustments made manually to fit
|
-- This is written by Gemini barring minor adjustments made manually to fit
|
||||||
-- into the Parsing utilities and logger stuff
|
-- into the Parsing utilities and logger stuff
|
||||||
|
--
|
||||||
|
-- the Djot root module just does
|
||||||
|
-- type translation currently
|
||||||
|
|
||||||
module Djot.Lists (List, djotList, TaskList, djotTaskList) where
|
module Djot.Lists (List (List), djotList, TaskList (TaskList), djotTaskList) where
|
||||||
|
|
||||||
|
import Control.Monad.ST (runST)
|
||||||
|
import Data.Char (isUpper, ord)
|
||||||
|
import Data.Foldable (for_)
|
||||||
import Data.Functor (void)
|
import Data.Functor (void)
|
||||||
|
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Data.Void
|
import Data.Void
|
||||||
|
import qualified IR
|
||||||
import Logger (Logger (logCallStack, logDebug, logError))
|
import Logger (Logger (logCallStack, logDebug, logError))
|
||||||
import Text.Megaparsec
|
import Text.Megaparsec
|
||||||
import Text.Megaparsec.Char
|
import Text.Megaparsec.Char
|
||||||
|
|
@ -22,13 +30,17 @@ data Delimiter = Period | RightParen | Enclosed -- e.g., '1.', '1)', '(1)'
|
||||||
-- | The style of the list marker.
|
-- | The style of the list marker.
|
||||||
data MarkerStyle
|
data MarkerStyle
|
||||||
= Bullet Char -- '*', '-', '+'
|
= Bullet Char -- '*', '-', '+'
|
||||||
| Decimal Delimiter -- '1'
|
| Decimal Int Delimiter -- '1'
|
||||||
| LowerAlpha Delimiter -- 'a'
|
| LowerAlpha Int Delimiter -- 'a'
|
||||||
| UpperAlpha Delimiter -- 'A'
|
| UpperAlpha Int Delimiter -- 'A'
|
||||||
| LowerRoman Delimiter -- 'i'
|
| LowerRoman Int Delimiter -- 'i'
|
||||||
| UpperRoman Delimiter -- 'I'
|
| UpperRoman Int Delimiter -- 'I'
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
intoIRType :: MarkerStyle -> IR.ListType
|
||||||
|
intoIRType (Bullet _) = IR.Unordered {style = Nothing}
|
||||||
|
intoIRType (Decimal start _) = IR.Ordered {style = Just "1", start_number = Just start}
|
||||||
|
|
||||||
-- | A single item in a list.
|
-- | A single item in a list.
|
||||||
data ListItem = ListItem
|
data ListItem = ListItem
|
||||||
{ itemStyle :: MarkerStyle,
|
{ itemStyle :: MarkerStyle,
|
||||||
|
|
@ -38,7 +50,7 @@ data ListItem = ListItem
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
-- | A full list is a collection of items sharing the exact same MarkerStyle.
|
-- | A full list is a collection of items sharing the exact same MarkerStyle.
|
||||||
data List = List MarkerStyle [ListItem]
|
data List = List MarkerStyle [Djot.Lists.ListItem]
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
-- | Parses whitespace but does not consume newlines.
|
-- | Parses whitespace but does not consume newlines.
|
||||||
|
|
@ -59,15 +71,21 @@ bulletMarker = Bullet <$> oneOf ['*', '-', '+']
|
||||||
|
|
||||||
-- | Decimals: '1.', '1)', '(1)'
|
-- | Decimals: '1.', '1)', '(1)'
|
||||||
decimalMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
decimalMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
decimalMarker = Decimal . snd <$> orderedDelimiter L.decimal
|
decimalMarker = do
|
||||||
|
(val, delim) <- orderedDelimiter L.decimal
|
||||||
|
return $ Decimal val delim
|
||||||
|
|
||||||
-- | Lower/Upper Alpha: 'a.', 'A)', etc.
|
-- | Lower/Upper Alpha: 'a.', 'A)', etc.
|
||||||
-- (Note: In a full implementation, you must ensure these are single letters)
|
-- (Note: In a full implementation, you must ensure these are single letters)
|
||||||
lowerAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
lowerAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
lowerAlphaMarker = LowerAlpha . snd <$> orderedDelimiter lowerChar
|
lowerAlphaMarker = do
|
||||||
|
(c, delim) <- orderedDelimiter lowerChar
|
||||||
|
return $ LowerAlpha (alphaToOffset c) delim
|
||||||
|
|
||||||
upperAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
upperAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
upperAlphaMarker = UpperAlpha . snd <$> orderedDelimiter upperChar
|
upperAlphaMarker = do
|
||||||
|
(c, delim) <- orderedDelimiter upperChar
|
||||||
|
return $ UpperAlpha (alphaToOffset c) delim
|
||||||
|
|
||||||
-- | Lower/Upper Roman
|
-- | Lower/Upper Roman
|
||||||
-- Uses a simplified roman numeral regex/parser for illustration.
|
-- Uses a simplified roman numeral regex/parser for illustration.
|
||||||
|
|
@ -78,10 +96,16 @@ romanUpper :: (Logger m, Characters s) => Parser s m Text
|
||||||
romanUpper = toText <$> takeWhile1P (Just "upper roman") (`elem` ['I', 'V', 'X', 'L', 'C', 'D', 'M'])
|
romanUpper = toText <$> takeWhile1P (Just "upper roman") (`elem` ['I', 'V', 'X', 'L', 'C', 'D', 'M'])
|
||||||
|
|
||||||
lowerRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
lowerRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
lowerRomanMarker = LowerRoman . snd <$> orderedDelimiter romanLower
|
lowerRomanMarker = do
|
||||||
|
(numeral, delim) <- orderedDelimiter romanLower
|
||||||
|
let num = romanToInt numeral
|
||||||
|
pure $ LowerRoman num delim
|
||||||
|
|
||||||
upperRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
upperRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
upperRomanMarker = UpperRoman . snd <$> orderedDelimiter romanUpper
|
upperRomanMarker = do
|
||||||
|
(numeral, delim) <- orderedDelimiter romanUpper
|
||||||
|
let num = romanToInt numeral
|
||||||
|
pure $ UpperRoman num delim
|
||||||
|
|
||||||
-- | Attempts to parse any valid Djot list marker.
|
-- | Attempts to parse any valid Djot list marker.
|
||||||
listMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
listMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||||
|
|
@ -96,7 +120,7 @@ listMarker =
|
||||||
]
|
]
|
||||||
|
|
||||||
-- | Parses a single list item.
|
-- | Parses a single list item.
|
||||||
listItem :: (Logger m, Characters s) => Parser s m ListItem
|
listItem :: (Logger m, Characters s) => Parser s m Djot.Lists.ListItem
|
||||||
listItem = do
|
listItem = do
|
||||||
-- 1. Grab the current indentation level before the item
|
-- 1. Grab the current indentation level before the item
|
||||||
startPos <- L.indentLevel
|
startPos <- L.indentLevel
|
||||||
|
|
@ -219,3 +243,43 @@ taskItemWithStyle expectedStyle = do
|
||||||
if taskStyle item == expectedStyle
|
if taskStyle item == expectedStyle
|
||||||
then pure item
|
then pure item
|
||||||
else fail "Task list marker style mismatch"
|
else fail "Task list marker style mismatch"
|
||||||
|
|
||||||
|
-- | Converts a single alpha char to its 1-indexed position (a=1, b=2...)
|
||||||
|
alphaToOffset :: Char -> Int
|
||||||
|
alphaToOffset c =
|
||||||
|
let base = if isUpper c then ord 'A' else ord 'a'
|
||||||
|
in ord c - base + 1
|
||||||
|
|
||||||
|
-- | A real Roman numeral to Int converter (simplified for brevity)
|
||||||
|
-- In a production parser, use a library or a more robust fold.
|
||||||
|
romanToOffset :: Text -> Int
|
||||||
|
romanToOffset = romanToInt -- Implementation of Roman numeral logic
|
||||||
|
|
||||||
|
-- inner parsing of roman numerals done via Gemini due to lazy
|
||||||
|
romanValue :: Char -> Int
|
||||||
|
romanValue 'I' = 1
|
||||||
|
romanValue 'V' = 5
|
||||||
|
romanValue 'X' = 10
|
||||||
|
romanValue 'L' = 50
|
||||||
|
romanValue 'C' = 100
|
||||||
|
romanValue 'D' = 500
|
||||||
|
romanValue 'M' = 1000
|
||||||
|
romanValue _ = 0
|
||||||
|
|
||||||
|
romanToInt :: Text -> Int
|
||||||
|
romanToInt s = runST $ do
|
||||||
|
total <- newSTRef 0
|
||||||
|
prevVal <- newSTRef 0
|
||||||
|
|
||||||
|
for_ (T.unpack s) $ \char -> do
|
||||||
|
let curr = romanValue char
|
||||||
|
p <- readSTRef prevVal
|
||||||
|
|
||||||
|
-- If current > previous (e.g., IV), we subtract the previous twice
|
||||||
|
if curr > p
|
||||||
|
then modifySTRef' total (\t -> t + curr - 2 * p)
|
||||||
|
else modifySTRef' total (+ curr)
|
||||||
|
|
||||||
|
writeSTRef prevVal curr
|
||||||
|
|
||||||
|
readSTRef total
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue