realizing LLM code is inadequate
This commit is contained in:
parent
b6a689868b
commit
4db3e8f912
2 changed files with 82 additions and 110 deletions
102
src/Djot.hs
102
src/Djot.hs
|
|
@ -11,12 +11,10 @@ module Djot
|
|||
where
|
||||
|
||||
import Control.Applicative (many, optional, some, (<|>))
|
||||
import Control.Monad.ST (runST)
|
||||
import Data.Foldable (for_)
|
||||
import Data.Functor (void, (<$>))
|
||||
import Data.List (elemIndex)
|
||||
import qualified Data.List.NonEmpty as NE
|
||||
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import qualified Djot.Lists
|
||||
|
|
@ -100,105 +98,15 @@ blockQuote attrs = do
|
|||
|
||||
listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||
listBlock attrs = do
|
||||
list <- Djot.Lists.djotList
|
||||
error ""
|
||||
error "todo"
|
||||
|
||||
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||
taskListBlock attrs = do
|
||||
error "this is also probably fundamentally wrong"
|
||||
startOffset <- getOffset
|
||||
bullet
|
||||
first_item <- task_item
|
||||
rem_items <- manyTill (listMarker *> task_item) $ lookAhead blockSeparator
|
||||
pure $ TaskList (TL {items = first_item : rem_items}) attrs
|
||||
where
|
||||
task_item = error "todo"
|
||||
error "todo"
|
||||
|
||||
lowerAlphabet :: [Char]
|
||||
lowerAlphabet = ['a' .. 'z']
|
||||
|
||||
upperAlphabet :: [Char]
|
||||
upperAlphabet = ['A' .. 'Z']
|
||||
|
||||
-- inner parsing of roman numerals done via Gemini due to lazy
|
||||
romanValue :: Char -> Int
|
||||
romanValue 'I' = 1
|
||||
romanValue 'V' = 5
|
||||
romanValue 'X' = 10
|
||||
romanValue 'L' = 50
|
||||
romanValue 'C' = 100
|
||||
romanValue 'D' = 500
|
||||
romanValue 'M' = 1000
|
||||
romanValue _ = 0
|
||||
|
||||
romanToInt :: Text -> Int
|
||||
romanToInt s = runST $ do
|
||||
total <- newSTRef 0
|
||||
prevVal <- newSTRef 0
|
||||
|
||||
for_ (T.unpack s) $ \char -> do
|
||||
let curr = romanValue char
|
||||
p <- readSTRef prevVal
|
||||
|
||||
-- If current > previous (e.g., IV), we subtract the previous twice
|
||||
if curr > p
|
||||
then modifySTRef' total (\t -> t + curr - 2 * p)
|
||||
else modifySTRef' total (+ curr)
|
||||
|
||||
writeSTRef prevVal curr
|
||||
|
||||
readSTRef total
|
||||
|
||||
upperRoman :: [Char]
|
||||
upperRoman = "IVXLCDM"
|
||||
|
||||
lowerRoman :: [Char]
|
||||
lowerRoman = T.unpack $ T.toLower $ T.pack upperRoman
|
||||
|
||||
-- consumes whitespace as well for convenience in element parser
|
||||
listMarker :: (Logger m, Characters s) => Parser s m ListType
|
||||
listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
|
||||
where
|
||||
decimal = do
|
||||
num <- choice $ map ($ some numberChar) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = Just $ read num
|
||||
pure $ Ordered {start_number, style = Just "1"}
|
||||
lower_letter :: (Logger m, Characters s) => Parser s m ListType
|
||||
lower_letter = do
|
||||
letter <- choice $ map ($ lowerChar) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = elemIndex letter lowerAlphabet
|
||||
pure $ Ordered {start_number, style = Just "a"}
|
||||
upper_letter :: (Logger m, Characters s) => Parser s m ListType
|
||||
upper_letter = do
|
||||
letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = elemIndex letter upperAlphabet
|
||||
pure $ Ordered {start_number, style = Just "A"}
|
||||
lower_roman_numeral = do
|
||||
numeral <- choice $ map ($ some (satisfy (`elem` lowerRoman))) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
||||
pure $ Ordered {start_number, style = Just "i"}
|
||||
upper_roman_numeral = do
|
||||
numeral <- choice $ map ($ some (satisfy (`elem` upperRoman))) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
||||
pure $ Ordered {start_number, style = Just "I"}
|
||||
|
||||
surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||
surroundParen parser = do
|
||||
char '('
|
||||
ret <- parser
|
||||
char ')'
|
||||
space
|
||||
pure ret
|
||||
rightParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||
rightParen parser = parser <* char ')' <* space
|
||||
point :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||
point parser = parser <* char '.' <* space
|
||||
|
||||
bullet :: (Logger m, Characters s) => Parser s m ListType
|
||||
bullet = error "todo"
|
||||
|
||||
taskMarker :: (Logger m, Characters s) => Parser s m Char
|
||||
taskMarker = char '[' *> choice (map char " xX") <* char ']' <* space
|
||||
descriptionListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||
descriptionListBlock attrs = do
|
||||
error "todo"
|
||||
|
||||
codeFence :: (Logger m, Characters s) => Parser s m ()
|
||||
codeFence = void $ string "```"
|
||||
|
|
|
|||
|
|
@ -2,13 +2,21 @@
|
|||
|
||||
-- This is written by Gemini barring minor adjustments made manually to fit
|
||||
-- into the Parsing utilities and logger stuff
|
||||
--
|
||||
-- the Djot root module just does
|
||||
-- type translation currently
|
||||
|
||||
module Djot.Lists (List, djotList, TaskList, djotTaskList) where
|
||||
module Djot.Lists (List (List), djotList, TaskList (TaskList), djotTaskList) where
|
||||
|
||||
import Control.Monad.ST (runST)
|
||||
import Data.Char (isUpper, ord)
|
||||
import Data.Foldable (for_)
|
||||
import Data.Functor (void)
|
||||
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Data.Void
|
||||
import qualified IR
|
||||
import Logger (Logger (logCallStack, logDebug, logError))
|
||||
import Text.Megaparsec
|
||||
import Text.Megaparsec.Char
|
||||
|
|
@ -22,13 +30,17 @@ data Delimiter = Period | RightParen | Enclosed -- e.g., '1.', '1)', '(1)'
|
|||
-- | The style of the list marker.
|
||||
data MarkerStyle
|
||||
= Bullet Char -- '*', '-', '+'
|
||||
| Decimal Delimiter -- '1'
|
||||
| LowerAlpha Delimiter -- 'a'
|
||||
| UpperAlpha Delimiter -- 'A'
|
||||
| LowerRoman Delimiter -- 'i'
|
||||
| UpperRoman Delimiter -- 'I'
|
||||
| Decimal Int Delimiter -- '1'
|
||||
| LowerAlpha Int Delimiter -- 'a'
|
||||
| UpperAlpha Int Delimiter -- 'A'
|
||||
| LowerRoman Int Delimiter -- 'i'
|
||||
| UpperRoman Int Delimiter -- 'I'
|
||||
deriving (Show, Eq)
|
||||
|
||||
intoIRType :: MarkerStyle -> IR.ListType
|
||||
intoIRType (Bullet _) = IR.Unordered {style = Nothing}
|
||||
intoIRType (Decimal start _) = IR.Ordered {style = Just "1", start_number = Just start}
|
||||
|
||||
-- | A single item in a list.
|
||||
data ListItem = ListItem
|
||||
{ itemStyle :: MarkerStyle,
|
||||
|
|
@ -38,7 +50,7 @@ data ListItem = ListItem
|
|||
deriving (Show, Eq)
|
||||
|
||||
-- | A full list is a collection of items sharing the exact same MarkerStyle.
|
||||
data List = List MarkerStyle [ListItem]
|
||||
data List = List MarkerStyle [Djot.Lists.ListItem]
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | Parses whitespace but does not consume newlines.
|
||||
|
|
@ -59,15 +71,21 @@ bulletMarker = Bullet <$> oneOf ['*', '-', '+']
|
|||
|
||||
-- | Decimals: '1.', '1)', '(1)'
|
||||
decimalMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
decimalMarker = Decimal . snd <$> orderedDelimiter L.decimal
|
||||
decimalMarker = do
|
||||
(val, delim) <- orderedDelimiter L.decimal
|
||||
return $ Decimal val delim
|
||||
|
||||
-- | Lower/Upper Alpha: 'a.', 'A)', etc.
|
||||
-- (Note: In a full implementation, you must ensure these are single letters)
|
||||
lowerAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
lowerAlphaMarker = LowerAlpha . snd <$> orderedDelimiter lowerChar
|
||||
lowerAlphaMarker = do
|
||||
(c, delim) <- orderedDelimiter lowerChar
|
||||
return $ LowerAlpha (alphaToOffset c) delim
|
||||
|
||||
upperAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
upperAlphaMarker = UpperAlpha . snd <$> orderedDelimiter upperChar
|
||||
upperAlphaMarker = do
|
||||
(c, delim) <- orderedDelimiter upperChar
|
||||
return $ UpperAlpha (alphaToOffset c) delim
|
||||
|
||||
-- | Lower/Upper Roman
|
||||
-- Uses a simplified roman numeral regex/parser for illustration.
|
||||
|
|
@ -78,10 +96,16 @@ romanUpper :: (Logger m, Characters s) => Parser s m Text
|
|||
romanUpper = toText <$> takeWhile1P (Just "upper roman") (`elem` ['I', 'V', 'X', 'L', 'C', 'D', 'M'])
|
||||
|
||||
lowerRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
lowerRomanMarker = LowerRoman . snd <$> orderedDelimiter romanLower
|
||||
lowerRomanMarker = do
|
||||
(numeral, delim) <- orderedDelimiter romanLower
|
||||
let num = romanToInt numeral
|
||||
pure $ LowerRoman num delim
|
||||
|
||||
upperRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
upperRomanMarker = UpperRoman . snd <$> orderedDelimiter romanUpper
|
||||
upperRomanMarker = do
|
||||
(numeral, delim) <- orderedDelimiter romanUpper
|
||||
let num = romanToInt numeral
|
||||
pure $ UpperRoman num delim
|
||||
|
||||
-- | Attempts to parse any valid Djot list marker.
|
||||
listMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
|
||||
|
|
@ -96,7 +120,7 @@ listMarker =
|
|||
]
|
||||
|
||||
-- | Parses a single list item.
|
||||
listItem :: (Logger m, Characters s) => Parser s m ListItem
|
||||
listItem :: (Logger m, Characters s) => Parser s m Djot.Lists.ListItem
|
||||
listItem = do
|
||||
-- 1. Grab the current indentation level before the item
|
||||
startPos <- L.indentLevel
|
||||
|
|
@ -219,3 +243,43 @@ taskItemWithStyle expectedStyle = do
|
|||
if taskStyle item == expectedStyle
|
||||
then pure item
|
||||
else fail "Task list marker style mismatch"
|
||||
|
||||
-- | Converts a single alpha char to its 1-indexed position (a=1, b=2...)
|
||||
alphaToOffset :: Char -> Int
|
||||
alphaToOffset c =
|
||||
let base = if isUpper c then ord 'A' else ord 'a'
|
||||
in ord c - base + 1
|
||||
|
||||
-- | A real Roman numeral to Int converter (simplified for brevity)
|
||||
-- In a production parser, use a library or a more robust fold.
|
||||
romanToOffset :: Text -> Int
|
||||
romanToOffset = romanToInt -- Implementation of Roman numeral logic
|
||||
|
||||
-- inner parsing of roman numerals done via Gemini due to lazy
|
||||
romanValue :: Char -> Int
|
||||
romanValue 'I' = 1
|
||||
romanValue 'V' = 5
|
||||
romanValue 'X' = 10
|
||||
romanValue 'L' = 50
|
||||
romanValue 'C' = 100
|
||||
romanValue 'D' = 500
|
||||
romanValue 'M' = 1000
|
||||
romanValue _ = 0
|
||||
|
||||
romanToInt :: Text -> Int
|
||||
romanToInt s = runST $ do
|
||||
total <- newSTRef 0
|
||||
prevVal <- newSTRef 0
|
||||
|
||||
for_ (T.unpack s) $ \char -> do
|
||||
let curr = romanValue char
|
||||
p <- readSTRef prevVal
|
||||
|
||||
-- If current > previous (e.g., IV), we subtract the previous twice
|
||||
if curr > p
|
||||
then modifySTRef' total (\t -> t + curr - 2 * p)
|
||||
else modifySTRef' total (+ curr)
|
||||
|
||||
writeSTRef prevVal curr
|
||||
|
||||
readSTRef total
|
||||
|
|
|
|||
Loading…
Reference in a new issue