realizing LLM code is inadequate

This commit is contained in:
Pagwin 2026-04-13 14:06:44 -04:00
parent b6a689868b
commit 4db3e8f912
No known key found for this signature in database
GPG key ID: 81137023740CA260
2 changed files with 82 additions and 110 deletions

View file

@ -11,12 +11,10 @@ module Djot
where where
import Control.Applicative (many, optional, some, (<|>)) import Control.Applicative (many, optional, some, (<|>))
import Control.Monad.ST (runST)
import Data.Foldable (for_) import Data.Foldable (for_)
import Data.Functor (void, (<$>)) import Data.Functor (void, (<$>))
import Data.List (elemIndex) import Data.List (elemIndex)
import qualified Data.List.NonEmpty as NE import qualified Data.List.NonEmpty as NE
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import qualified Djot.Lists import qualified Djot.Lists
@ -100,105 +98,15 @@ blockQuote attrs = do
listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element listBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
listBlock attrs = do listBlock attrs = do
list <- Djot.Lists.djotList error "todo"
error ""
taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element taskListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
taskListBlock attrs = do taskListBlock attrs = do
error "this is also probably fundamentally wrong" error "todo"
startOffset <- getOffset
bullet
first_item <- task_item
rem_items <- manyTill (listMarker *> task_item) $ lookAhead blockSeparator
pure $ TaskList (TL {items = first_item : rem_items}) attrs
where
task_item = error "todo"
lowerAlphabet :: [Char] descriptionListBlock :: (Logger m, Characters s) => Attrs -> Parser s m Element
lowerAlphabet = ['a' .. 'z'] descriptionListBlock attrs = do
error "todo"
upperAlphabet :: [Char]
upperAlphabet = ['A' .. 'Z']
-- inner parsing of roman numerals done via Gemini due to lazy
romanValue :: Char -> Int
romanValue 'I' = 1
romanValue 'V' = 5
romanValue 'X' = 10
romanValue 'L' = 50
romanValue 'C' = 100
romanValue 'D' = 500
romanValue 'M' = 1000
romanValue _ = 0
romanToInt :: Text -> Int
romanToInt s = runST $ do
total <- newSTRef 0
prevVal <- newSTRef 0
for_ (T.unpack s) $ \char -> do
let curr = romanValue char
p <- readSTRef prevVal
-- If current > previous (e.g., IV), we subtract the previous twice
if curr > p
then modifySTRef' total (\t -> t + curr - 2 * p)
else modifySTRef' total (+ curr)
writeSTRef prevVal curr
readSTRef total
upperRoman :: [Char]
upperRoman = "IVXLCDM"
lowerRoman :: [Char]
lowerRoman = T.unpack $ T.toLower $ T.pack upperRoman
-- consumes whitespace as well for convenience in element parser
listMarker :: (Logger m, Characters s) => Parser s m ListType
listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
where
decimal = do
num <- choice $ map ($ some numberChar) [try . surroundParen, try . rightParen, point]
let start_number = Just $ read num
pure $ Ordered {start_number, style = Just "1"}
lower_letter :: (Logger m, Characters s) => Parser s m ListType
lower_letter = do
letter <- choice $ map ($ lowerChar) [try . surroundParen, try . rightParen, point]
let start_number = elemIndex letter lowerAlphabet
pure $ Ordered {start_number, style = Just "a"}
upper_letter :: (Logger m, Characters s) => Parser s m ListType
upper_letter = do
letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
let start_number = elemIndex letter upperAlphabet
pure $ Ordered {start_number, style = Just "A"}
lower_roman_numeral = do
numeral <- choice $ map ($ some (satisfy (`elem` lowerRoman))) [try . surroundParen, try . rightParen, point]
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
pure $ Ordered {start_number, style = Just "i"}
upper_roman_numeral = do
numeral <- choice $ map ($ some (satisfy (`elem` upperRoman))) [try . surroundParen, try . rightParen, point]
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
pure $ Ordered {start_number, style = Just "I"}
surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
surroundParen parser = do
char '('
ret <- parser
char ')'
space
pure ret
rightParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
rightParen parser = parser <* char ')' <* space
point :: (Logger m, Characters s) => Parser s m a -> Parser s m a
point parser = parser <* char '.' <* space
bullet :: (Logger m, Characters s) => Parser s m ListType
bullet = error "todo"
taskMarker :: (Logger m, Characters s) => Parser s m Char
taskMarker = char '[' *> choice (map char " xX") <* char ']' <* space
codeFence :: (Logger m, Characters s) => Parser s m () codeFence :: (Logger m, Characters s) => Parser s m ()
codeFence = void $ string "```" codeFence = void $ string "```"

View file

@ -2,13 +2,21 @@
-- This is written by Gemini barring minor adjustments made manually to fit -- This is written by Gemini barring minor adjustments made manually to fit
-- into the Parsing utilities and logger stuff -- into the Parsing utilities and logger stuff
--
-- the Djot root module just does
-- type translation currently
module Djot.Lists (List, djotList, TaskList, djotTaskList) where module Djot.Lists (List (List), djotList, TaskList (TaskList), djotTaskList) where
import Control.Monad.ST (runST)
import Data.Char (isUpper, ord)
import Data.Foldable (for_)
import Data.Functor (void) import Data.Functor (void)
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Void import Data.Void
import qualified IR
import Logger (Logger (logCallStack, logDebug, logError)) import Logger (Logger (logCallStack, logDebug, logError))
import Text.Megaparsec import Text.Megaparsec
import Text.Megaparsec.Char import Text.Megaparsec.Char
@ -22,13 +30,17 @@ data Delimiter = Period | RightParen | Enclosed -- e.g., '1.', '1)', '(1)'
-- | The style of the list marker. -- | The style of the list marker.
data MarkerStyle data MarkerStyle
= Bullet Char -- '*', '-', '+' = Bullet Char -- '*', '-', '+'
| Decimal Delimiter -- '1' | Decimal Int Delimiter -- '1'
| LowerAlpha Delimiter -- 'a' | LowerAlpha Int Delimiter -- 'a'
| UpperAlpha Delimiter -- 'A' | UpperAlpha Int Delimiter -- 'A'
| LowerRoman Delimiter -- 'i' | LowerRoman Int Delimiter -- 'i'
| UpperRoman Delimiter -- 'I' | UpperRoman Int Delimiter -- 'I'
deriving (Show, Eq) deriving (Show, Eq)
intoIRType :: MarkerStyle -> IR.ListType
intoIRType (Bullet _) = IR.Unordered {style = Nothing}
intoIRType (Decimal start _) = IR.Ordered {style = Just "1", start_number = Just start}
-- | A single item in a list. -- | A single item in a list.
data ListItem = ListItem data ListItem = ListItem
{ itemStyle :: MarkerStyle, { itemStyle :: MarkerStyle,
@ -38,7 +50,7 @@ data ListItem = ListItem
deriving (Show, Eq) deriving (Show, Eq)
-- | A full list is a collection of items sharing the exact same MarkerStyle. -- | A full list is a collection of items sharing the exact same MarkerStyle.
data List = List MarkerStyle [ListItem] data List = List MarkerStyle [Djot.Lists.ListItem]
deriving (Show, Eq) deriving (Show, Eq)
-- | Parses whitespace but does not consume newlines. -- | Parses whitespace but does not consume newlines.
@ -59,15 +71,21 @@ bulletMarker = Bullet <$> oneOf ['*', '-', '+']
-- | Decimals: '1.', '1)', '(1)' -- | Decimals: '1.', '1)', '(1)'
decimalMarker :: (Logger m, Characters s) => Parser s m MarkerStyle decimalMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
decimalMarker = Decimal . snd <$> orderedDelimiter L.decimal decimalMarker = do
(val, delim) <- orderedDelimiter L.decimal
return $ Decimal val delim
-- | Lower/Upper Alpha: 'a.', 'A)', etc. -- | Lower/Upper Alpha: 'a.', 'A)', etc.
-- (Note: In a full implementation, you must ensure these are single letters) -- (Note: In a full implementation, you must ensure these are single letters)
lowerAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle lowerAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
lowerAlphaMarker = LowerAlpha . snd <$> orderedDelimiter lowerChar lowerAlphaMarker = do
(c, delim) <- orderedDelimiter lowerChar
return $ LowerAlpha (alphaToOffset c) delim
upperAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle upperAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
upperAlphaMarker = UpperAlpha . snd <$> orderedDelimiter upperChar upperAlphaMarker = do
(c, delim) <- orderedDelimiter upperChar
return $ UpperAlpha (alphaToOffset c) delim
-- | Lower/Upper Roman -- | Lower/Upper Roman
-- Uses a simplified roman numeral regex/parser for illustration. -- Uses a simplified roman numeral regex/parser for illustration.
@ -78,10 +96,16 @@ romanUpper :: (Logger m, Characters s) => Parser s m Text
romanUpper = toText <$> takeWhile1P (Just "upper roman") (`elem` ['I', 'V', 'X', 'L', 'C', 'D', 'M']) romanUpper = toText <$> takeWhile1P (Just "upper roman") (`elem` ['I', 'V', 'X', 'L', 'C', 'D', 'M'])
lowerRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle lowerRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
lowerRomanMarker = LowerRoman . snd <$> orderedDelimiter romanLower lowerRomanMarker = do
(numeral, delim) <- orderedDelimiter romanLower
let num = romanToInt numeral
pure $ LowerRoman num delim
upperRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle upperRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
upperRomanMarker = UpperRoman . snd <$> orderedDelimiter romanUpper upperRomanMarker = do
(numeral, delim) <- orderedDelimiter romanUpper
let num = romanToInt numeral
pure $ UpperRoman num delim
-- | Attempts to parse any valid Djot list marker. -- | Attempts to parse any valid Djot list marker.
listMarker :: (Logger m, Characters s) => Parser s m MarkerStyle listMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
@ -96,7 +120,7 @@ listMarker =
] ]
-- | Parses a single list item. -- | Parses a single list item.
listItem :: (Logger m, Characters s) => Parser s m ListItem listItem :: (Logger m, Characters s) => Parser s m Djot.Lists.ListItem
listItem = do listItem = do
-- 1. Grab the current indentation level before the item -- 1. Grab the current indentation level before the item
startPos <- L.indentLevel startPos <- L.indentLevel
@ -219,3 +243,43 @@ taskItemWithStyle expectedStyle = do
if taskStyle item == expectedStyle if taskStyle item == expectedStyle
then pure item then pure item
else fail "Task list marker style mismatch" else fail "Task list marker style mismatch"
-- | Converts a single alpha char to its 1-indexed position (a=1, b=2...)
alphaToOffset :: Char -> Int
alphaToOffset c =
let base = if isUpper c then ord 'A' else ord 'a'
in ord c - base + 1
-- | A real Roman numeral to Int converter (simplified for brevity)
-- In a production parser, use a library or a more robust fold.
romanToOffset :: Text -> Int
romanToOffset = romanToInt -- Implementation of Roman numeral logic
-- inner parsing of roman numerals done via Gemini due to lazy
romanValue :: Char -> Int
romanValue 'I' = 1
romanValue 'V' = 5
romanValue 'X' = 10
romanValue 'L' = 50
romanValue 'C' = 100
romanValue 'D' = 500
romanValue 'M' = 1000
romanValue _ = 0
romanToInt :: Text -> Int
romanToInt s = runST $ do
total <- newSTRef 0
prevVal <- newSTRef 0
for_ (T.unpack s) $ \char -> do
let curr = romanValue char
p <- readSTRef prevVal
-- If current > previous (e.g., IV), we subtract the previous twice
if curr > p
then modifySTRef' total (\t -> t + curr - 2 * p)
else modifySTRef' total (+ curr)
writeSTRef prevVal curr
readSTRef total