removed LLM code, unfortunately the only way to do lists correctly so I can pull out structure is to do it myself

2026-04-13 14:07:38 -04:00 · 2026-04-13 14:07:38 -04:00 · aa69b3f6ef
commit aa69b3f6ef
parent 4db3e8f912
3 changed files with 1 additions and 287 deletions
--- a/psb.cabal
+++ b/psb.cabal
@ -27,7 +27,7 @@ common warnings
 library
    hs-source-dirs: src
-    exposed-modules: Djot Djot.Lists Markdown HTML Logger IR Logger.Shake Psb.Main Utilities Utilities.FilePath Utilities.Action Utilities.Javascript Utilities.CSS Templates Types Config Utilities.Bundling
+    exposed-modules: Djot Markdown HTML Logger IR Logger.Shake Psb.Main Utilities Utilities.FilePath Utilities.Action Utilities.Javascript Utilities.CSS Templates Types Config Utilities.Bundling
    other-modules: Utilities.Parsing
    build-depends:    base, mustache >=2.4.2, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text >= 2.1.2, time, unordered-containers, yaml, megaparsec >= 9.7.0, transformers >= 0.6.2, bytestring
    default-extensions: ApplicativeDo DataKinds NamedFieldPuns DerivingVia LambdaCase TypeApplications DeriveGeneric OverloadedRecordDot NamedFieldPuns DuplicateRecordFields DisambiguateRecordFields FlexibleInstances
--- a/src/Djot.hs
+++ b/src/Djot.hs
@ -17,7 +17,6 @@ import Data.List (elemIndex)
 import qualified Data.List.NonEmpty as NE
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Djot.Lists
 import IR
 import Logger (Logger (logCallStack, logDebug, logError))
 import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, satisfy, sepBy, setErrorOffset, someTill)
--- a/src/Djot/Lists.hs
+++ b/src/Djot/Lists.hs
@ -1,285 +0,0 @@
 {-# LANGUAGE OverloadedStrings #-}
 -- This is written by Gemini barring minor adjustments made manually to fit
 -- into the Parsing utilities and logger stuff
 --
 -- the Djot root module just does
 -- type translation currently
 module Djot.Lists (List (List), djotList, TaskList (TaskList), djotTaskList) where
 import Control.Monad.ST (runST)
 import Data.Char (isUpper, ord)
 import Data.Foldable (for_)
 import Data.Functor (void)
 import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.Void
 import qualified IR
 import Logger (Logger (logCallStack, logDebug, logError))
 import Text.Megaparsec
 import Text.Megaparsec.Char
 import qualified Text.Megaparsec.Char.Lexer as L
 import Utilities.Parsing
 -- | Djot supports three types of ordered list delimiters.
 data Delimiter = Period | RightParen | Enclosed -- e.g., '1.', '1)', '(1)'
  deriving (Show, Eq)
 -- | The style of the list marker.
 data MarkerStyle
  = Bullet Char -- '*', '-', '+'
  | Decimal Int Delimiter -- '1'
  | LowerAlpha Int Delimiter -- 'a'
  | UpperAlpha Int Delimiter -- 'A'
  | LowerRoman Int Delimiter -- 'i'
  | UpperRoman Int Delimiter -- 'I'
  deriving (Show, Eq)
 intoIRType :: MarkerStyle -> IR.ListType
 intoIRType (Bullet _) = IR.Unordered {style = Nothing}
 intoIRType (Decimal start _) = IR.Ordered {style = Just "1", start_number = Just start}
 -- | A single item in a list.
 data ListItem = ListItem
  { itemStyle :: MarkerStyle,
    itemIndent :: Int, -- Column where the block content starts
    itemContent :: [Text] -- Simplified: usually this would be [Block]
  }
  deriving (Show, Eq)
 -- | A full list is a collection of items sharing the exact same MarkerStyle.
 data List = List MarkerStyle [Djot.Lists.ListItem]
  deriving (Show, Eq)
 -- | Parses whitespace but does not consume newlines.
 sc :: (Logger m, Characters s) => Parser s m ()
 sc = hidden (void $ takeWhileP (Just "space") (== ' '))
 -- | Parses the punctuation after/around an ordered list number/letter.
 -- E.g., takes a parser for the numeral (like `lowerRoman`) and applies delimiters.
 orderedDelimiter :: (Logger m, Characters s) => Parser s m a -> Parser s m (a, Delimiter)
 orderedDelimiter inner =
  (try (between (char '(') (char ')') inner) >>= \x -> return (x, Enclosed))
    <|> (try (inner <* char ')') >>= \x -> return (x, RightParen))
    <|> (try (inner <* char '.') >>= \x -> return (x, Period))
 -- | Parses a bullet marker.
 bulletMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
 bulletMarker = Bullet <$> oneOf ['*', '-', '+']
 -- | Decimals: '1.', '1)', '(1)'
 decimalMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
 decimalMarker = do
  (val, delim) <- orderedDelimiter L.decimal
  return $ Decimal val delim
 -- | Lower/Upper Alpha: 'a.', 'A)', etc.
 -- (Note: In a full implementation, you must ensure these are single letters)
 lowerAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
 lowerAlphaMarker = do
  (c, delim) <- orderedDelimiter lowerChar
  return $ LowerAlpha (alphaToOffset c) delim
 upperAlphaMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
 upperAlphaMarker = do
  (c, delim) <- orderedDelimiter upperChar
  return $ UpperAlpha (alphaToOffset c) delim
 -- | Lower/Upper Roman
 -- Uses a simplified roman numeral regex/parser for illustration.
 romanLower :: (Logger m, Characters s) => Parser s m Text
 romanLower = toText <$> takeWhile1P (Just "lower roman") (`elem` ['i', 'v', 'x', 'l', 'c', 'd', 'm'])
 romanUpper :: (Logger m, Characters s) => Parser s m Text
 romanUpper = toText <$> takeWhile1P (Just "upper roman") (`elem` ['I', 'V', 'X', 'L', 'C', 'D', 'M'])
 lowerRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
 lowerRomanMarker = do
  (numeral, delim) <- orderedDelimiter romanLower
  let num = romanToInt numeral
  pure $ LowerRoman num delim
 upperRomanMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
 upperRomanMarker = do
  (numeral, delim) <- orderedDelimiter romanUpper
  let num = romanToInt numeral
  pure $ UpperRoman num delim
 -- | Attempts to parse any valid Djot list marker.
 listMarker :: (Logger m, Characters s) => Parser s m MarkerStyle
 listMarker =
  choice
    [ bulletMarker,
      decimalMarker,
      lowerRomanMarker, -- Order matters here to prevent 'i.' parsing as LowerAlpha
      lowerAlphaMarker,
      upperRomanMarker,
      upperAlphaMarker
    ]
 -- | Parses a single list item.
 listItem :: (Logger m, Characters s) => Parser s m Djot.Lists.ListItem
 listItem = do
  -- 1. Grab the current indentation level before the item
  startPos <- L.indentLevel
  -- 2. Parse the marker
  marker <- listMarker
  -- 3. Consume spaces between marker and content (mandatory in Djot)
  void $ some (char ' ')
  -- 4. Find the column where the actual content begins.
  -- Djot relies on this column to group subsequent lines.
  contentPos <- L.indentLevel
  -- 5. Parse the block content based on indentation.
  -- We use `L.indentBlock` or manually grab lines that are indented >= contentPos.
  content <- some (indentedLine contentPos)
  return $ ListItem marker (unPos contentPos) content
 -- | Helper to grab a line that respects the item's indentation level.
 indentedLine :: (Logger m, Characters s) => Pos -> Parser s m Text
 indentedLine minIndent = do
  currentIndent <- L.indentLevel
  if currentIndent >= minIndent
    then do
      line <- takeWhileP (Just "character") (/= '\n')
      void (char '\n') <|> eof
      return $ toText line
    else fail "Line not indented enough for list item"
 -- | Parses a complete list by ensuring all subsequent items share the same MarkerStyle.
 djotList :: (Logger m, Characters s) => Parser s m List
 djotList = do
  firstItem <- listItem
  let style = itemStyle firstItem
  -- Try to parse more items, but ONLY if they have the exact same style.
  restItems <- many (try $ itemWithStyle style)
  return $ List style (firstItem : restItems)
 -- | Parses an item only if it matches the expected style.
 itemWithStyle :: (Logger m, Characters s) => MarkerStyle -> Parser s m ListItem
 itemWithStyle expectedStyle = do
  item <- listItem
  if itemStyle item == expectedStyle
    then return item
    else fail "List marker style mismatch"
 -- | Represents the state of the task checkbox.
 data TaskStatus = Unchecked | Checked
  deriving (Show, Eq)
 -- | A specific list item that is guaranteed to be a task.
 data TaskListItem = TaskListItem
  { taskStyle :: MarkerStyle,
    taskIndent :: Int,
    taskStatus :: TaskStatus,
    taskContent :: [Text]
  }
  deriving (Show, Eq)
 -- | A list composed entirely of TaskListItems.
 data TaskList = TaskList MarkerStyle [TaskListItem]
  deriving (Show, Eq)
 -- | Parses the Djot task checkbox and returns its status.
 taskCheckbox :: (Logger m, Characters s) => Parser s m TaskStatus
 taskCheckbox = do
  void $ char '['
  status <-
    (char ' ' *> pure Unchecked)
      <|> (oneOf ['x', 'X'] *> pure Checked)
  void $ char ']'
  -- Djot requires a space after the checkbox (or a newline if the item is blank)
  void (char ' ') <|> lookAhead (void $ char '\n') <|> eof
  pure status
 -- | Parses a single task list item.
 taskListItem :: (Logger m, Characters s) => Parser s m TaskListItem
 taskListItem = do
  _startPos <- L.indentLevel
  -- 1. Parse the list marker (bullets or ordered)
  marker <- listMarker
  -- 2. Consume mandatory spaces after the marker
  void $ some (char ' ')
  -- 3. Capture the indentation level right HERE.
  -- The '[' of the checkbox dictates the column that subsequent lines must respect.
  contentPos <- L.indentLevel
  -- 4. Parse the mandatory checkbox (if this fails, it's not a task list item)
  status <- taskCheckbox
  -- 5. Parse the content, ensuring continuation lines respect the `contentPos`
  -- (Using the same `indentedLine` helper from the previous parser)
  content <- some (indentedLine contentPos)
  pure $ TaskListItem marker (unPos contentPos) status content
 -- | Parses a complete task list, ensuring consistent marker styles.
 djotTaskList :: (Logger m, Characters s) => Parser s m TaskList
 djotTaskList = do
  firstItem <- taskListItem
  let style = taskStyle firstItem
  restItems <- many (try $ taskItemWithStyle style)
  pure $ TaskList style (firstItem : restItems)
 -- | Helper to strictly enforce uniform marker styles across the task list.
 taskItemWithStyle :: (Logger m, Characters s) => MarkerStyle -> Parser s m TaskListItem
 taskItemWithStyle expectedStyle = do
  item <- taskListItem
  if taskStyle item == expectedStyle
    then pure item
    else fail "Task list marker style mismatch"
 -- | Converts a single alpha char to its 1-indexed position (a=1, b=2...)
 alphaToOffset :: Char -> Int
 alphaToOffset c =
  let base = if isUpper c then ord 'A' else ord 'a'
   in ord c - base + 1
 -- | A real Roman numeral to Int converter (simplified for brevity)
 -- In a production parser, use a library or a more robust fold.
 romanToOffset :: Text -> Int
 romanToOffset = romanToInt -- Implementation of Roman numeral logic
 -- inner parsing of roman numerals done via Gemini due to lazy
 romanValue :: Char -> Int
 romanValue 'I' = 1
 romanValue 'V' = 5
 romanValue 'X' = 10
 romanValue 'L' = 50
 romanValue 'C' = 100
 romanValue 'D' = 500
 romanValue 'M' = 1000
 romanValue _ = 0
 romanToInt :: Text -> Int
 romanToInt s = runST $ do
  total <- newSTRef 0
  prevVal <- newSTRef 0
  for_ (T.unpack s) $ \char -> do
    let curr = romanValue char
    p <- readSTRef prevVal
    -- If current > previous (e.g., IV), we subtract the previous twice
    if curr > p
      then modifySTRef' total (\t -> t + curr - 2 * p)
      else modifySTRef' total (+ curr)
    writeSTRef prevVal curr
  readSTRef total