Compare commits

...

4 commits

Author SHA1 Message Date
Pagwin
4893b40cc9
minor LLM fuckup fixes 2025-11-01 15:41:40 -04:00
Pagwin
6212c3fd05
fixed up output from LLM for IR a bit 2025-11-01 15:39:18 -04:00
Pagwin
33b049e64e
IR changed via Claude 2025-11-01 15:36:52 -04:00
Pagwin
010351c6b1
giving up and having an LLM do markdown parser next commit 2025-11-01 15:36:04 -04:00
2 changed files with 66 additions and 20 deletions

View file

@ -4,33 +4,73 @@ import Data.Text
newtype Document = Doc [Element] newtype Document = Doc [Element]
data Element = Heading Heading | Code Code | BlockQuote BlockQuote | List List | Table Table | HTML HTML | Paragraph Paragraph | BlankLine BlankLine data Element
= Heading Heading
| Code Code
| BlockQuote BlockQuote
| List List
| HTML HTML
| Paragraph Paragraph
| HorizontalRule
data Heading = H {level :: Int, text :: Text} -- Removed: BlankLine
data Code = C {language :: Text, code :: Text} data Heading = H
{ level :: Int,
text :: [InlineText]
}
newtype BlockQuote = Q Text data Code = C
{ language :: Maybe Text,
code :: Text
}
data BlockQuote = Q [InlineText]
data ListItem = LI
{ content :: [InlineText], -- Flatten continuations into here
children :: [List]
}
data ListType = Ordered | Unordered data ListType = Ordered | Unordered
data ListItem = LI {content :: Text, children :: [List]} data List = L
{ list_type :: ListType,
items :: [ListItem]
}
data List = L {list_type :: ListType, items :: [ListItem]} -- Table: keep as-is or simplify based on your needs
data Table = T {header :: TableHeader, rows :: [TableRow]} data HTML
= HTMLTag
{ tagName :: Text,
attributes :: [(Text, Maybe Text)],
html_content :: Text
}
newtype TableHeader = TH [Text] -- Optionally skip: HTMLComment, HTMLDeclaration
newtype TableRow = TR Text
newtype HTML = Raw Text
newtype Paragraph = P [InlineText] newtype Paragraph = P [InlineText]
data InlineText = Normal Text | Escaped Char | Bold InlineText | Italic InlineText | CodeLine Text | Link {nest :: InlineText, href :: Text} | HTMLIn Text data InlineText
= Text Text -- Combined Normal and Escaped
data BlankLine = BL | Bold [InlineText]
| Italic [InlineText]
| InlineCode Text
| Link
{ linkText :: [InlineText],
url :: Text,
title :: Maybe Text
}
| Image
{ altText :: [InlineText],
url :: Text,
title :: Maybe Text
}
| HTMLInline
{ inlineTagName :: Text,
inlineAttributes :: [(Text, Maybe Text)]
}
-- for processing math -- for processing math
-- https://hackage.haskell.org/package/typst-0.6.1/docs/Typst-Parse.html#v:parseTypst -- https://hackage.haskell.org/package/typst-0.6.1/docs/Typst-Parse.html#v:parseTypst

View file

@ -72,11 +72,17 @@ htmlInline = do
where where
htmlInlineRemainder = many $ whiteSpace *> attribute htmlInlineRemainder = many $ whiteSpace *> attribute
name = many $ choice [alphaNum, char '-', char ':'] name = many $ choice [alphaNum, char '-', char ':']
value = do
char '"'
l <- letter
rem <- many $ choice [alphaNum, char '-', char ':']
char '"'
pure '"' : l : rem ++ "\""
attribute = do attribute = do
attrName <- name attrName <- name
char '=' char '='
attrValue <- value attrValue <- value
pure attrName ++ '=' : pure attrName ++ ('=' : attrValue)
whiteSpace :: Parser Text whiteSpace :: Parser Text
whiteSpace = pack <$> many space whiteSpace = pack <$> many space