Compare commits

..

4 commits

Author SHA1 Message Date
Pagwin
4893b40cc9
minor LLM fuckup fixes 2025-11-01 15:41:40 -04:00
Pagwin
6212c3fd05
fixed up output from LLM for IR a bit 2025-11-01 15:39:18 -04:00
Pagwin
33b049e64e
IR changed via Claude 2025-11-01 15:36:52 -04:00
Pagwin
010351c6b1
giving up and having an LLM do markdown parser next commit 2025-11-01 15:36:04 -04:00
2 changed files with 66 additions and 20 deletions

View file

@ -4,33 +4,73 @@ import Data.Text
newtype Document = Doc [Element]
data Element = Heading Heading | Code Code | BlockQuote BlockQuote | List List | Table Table | HTML HTML | Paragraph Paragraph | BlankLine BlankLine
data Element
= Heading Heading
| Code Code
| BlockQuote BlockQuote
| List List
| HTML HTML
| Paragraph Paragraph
| HorizontalRule
data Heading = H {level :: Int, text :: Text}
-- Removed: BlankLine
data Code = C {language :: Text, code :: Text}
data Heading = H
{ level :: Int,
text :: [InlineText]
}
newtype BlockQuote = Q Text
data Code = C
{ language :: Maybe Text,
code :: Text
}
data BlockQuote = Q [InlineText]
data ListItem = LI
{ content :: [InlineText], -- Flatten continuations into here
children :: [List]
}
data ListType = Ordered | Unordered
data ListItem = LI {content :: Text, children :: [List]}
data List = L
{ list_type :: ListType,
items :: [ListItem]
}
data List = L {list_type :: ListType, items :: [ListItem]}
-- Table: keep as-is or simplify based on your needs
data Table = T {header :: TableHeader, rows :: [TableRow]}
data HTML
= HTMLTag
{ tagName :: Text,
attributes :: [(Text, Maybe Text)],
html_content :: Text
}
newtype TableHeader = TH [Text]
newtype TableRow = TR Text
newtype HTML = Raw Text
-- Optionally skip: HTMLComment, HTMLDeclaration
newtype Paragraph = P [InlineText]
data InlineText = Normal Text | Escaped Char | Bold InlineText | Italic InlineText | CodeLine Text | Link {nest :: InlineText, href :: Text} | HTMLIn Text
data BlankLine = BL
data InlineText
= Text Text -- Combined Normal and Escaped
| Bold [InlineText]
| Italic [InlineText]
| InlineCode Text
| Link
{ linkText :: [InlineText],
url :: Text,
title :: Maybe Text
}
| Image
{ altText :: [InlineText],
url :: Text,
title :: Maybe Text
}
| HTMLInline
{ inlineTagName :: Text,
inlineAttributes :: [(Text, Maybe Text)]
}
-- for processing math
-- https://hackage.haskell.org/package/typst-0.6.1/docs/Typst-Parse.html#v:parseTypst

View file

@ -72,11 +72,17 @@ htmlInline = do
where
htmlInlineRemainder = many $ whiteSpace *> attribute
name = many $ choice [alphaNum, char '-', char ':']
value = do
char '"'
l <- letter
rem <- many $ choice [alphaNum, char '-', char ':']
char '"'
pure '"' : l : rem ++ "\""
attribute = do
attrName <- name
char '='
attrValue <- value
pure attrName ++ '=' :
pure attrName ++ ('=' : attrValue)
whiteSpace :: Parser Text
whiteSpace = pack <$> many space