began scaffolding down to markdown parser
This commit is contained in:
parent
016d50aff1
commit
0e4613f27c
3 changed files with 68 additions and 30 deletions
33
app/IR.hs
33
app/IR.hs
|
@ -2,13 +2,36 @@ module IR where
|
|||
|
||||
import Data.Text
|
||||
|
||||
-- Html and Math tags come with their data because they are leaves for us
|
||||
-- We aren't parsing that if we can avoid it
|
||||
data Tag = Heading {level :: Int} | Paragraph | Blockquote | Code | Html {html :: Text} | Anchor | Italic | Bold | Math {mathML :: Text}
|
||||
newtype Document = Doc [Element]
|
||||
|
||||
data Data = Ast {ast :: AST} | Text {text :: Text}
|
||||
data Element = Heading Heading | Code Code | BlockQuote BlockQuote | List List | Table Table | HTML HTML | Paragraph Paragraph | BlankLine BlankLine
|
||||
|
||||
data AST = AST {tag :: Tag, child :: [Data]}
|
||||
data Heading = H {level :: Int, text :: Text}
|
||||
|
||||
data Code = C {language :: Text, code :: Text}
|
||||
|
||||
newtype BlockQuote = Q Text
|
||||
|
||||
data ListType = Ordered | Unordered
|
||||
|
||||
data ListItem = LI {content :: Text, children :: [List]}
|
||||
|
||||
data List = L {list_type :: ListType, items :: [ListItem]}
|
||||
|
||||
data Table = T {header :: TableHeader, rows :: [TableRow]}
|
||||
|
||||
-- TODO: layout/sizing info?
|
||||
newtype TableHeader = TH [Text]
|
||||
|
||||
newtype TableRow = TR Text
|
||||
|
||||
newtype HTML = Raw Text
|
||||
|
||||
newtype Paragraph = P [InlineText]
|
||||
|
||||
data InlineText = Normal Text | Bold InlineText | Italic InlineText | CodeLine Text | Link {nest :: InlineText, href :: Text}
|
||||
|
||||
data BlankLine = BL
|
||||
|
||||
-- for processing math
|
||||
-- https://hackage.haskell.org/package/typst-0.6.1/docs/Typst-Parse.html#v:parseTypst
|
||||
|
|
|
@ -1,4 +1,41 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Markdown () where
|
||||
module Markdown (markdownParser) where
|
||||
|
||||
import Data.Text
|
||||
import IR
|
||||
import Text.Parsec
|
||||
import Text.Parsec.Combinator
|
||||
|
||||
type Parser a = forall s u m t. (Stream s m t) => ParsecT s u m a
|
||||
|
||||
markdownParser :: Parser Document
|
||||
markdownParser = Doc <$> many block
|
||||
|
||||
block :: Parser Element
|
||||
block = choice [heading, codeBlock, quoteBlock, list, table, htmlBlock, paragraph, blankLine]
|
||||
|
||||
heading :: Parser Element
|
||||
heading = pure $ Heading $ H {level = 1, text = ""}
|
||||
|
||||
codeBlock :: Parser Element
|
||||
codeBlock = pure $ Code $ C {language = "", code = ""}
|
||||
|
||||
quoteBlock :: Parser Element
|
||||
quoteBlock = pure $ BlockQuote $ Q ""
|
||||
|
||||
list :: Parser Element
|
||||
list = pure $ List $ L {list_type = Ordered, items = []}
|
||||
|
||||
table :: Parser Element
|
||||
table = pure $ Table $ T {header = TH [], rows = []}
|
||||
|
||||
htmlBlock :: Parser Element
|
||||
htmlBlock = pure $ HTML $ Raw ""
|
||||
|
||||
paragraph :: Parser Element
|
||||
paragraph = pure $ Paragraph $ P ""
|
||||
|
||||
blankline :: Parser Element
|
||||
blankline = pure $ BlankLine BL
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
document = { block } ;
|
||||
|
||||
block = heading
|
||||
| horizontal_rule
|
||||
| code_block
|
||||
| quote_block
|
||||
| list
|
||||
|
@ -13,27 +12,17 @@ block = heading
|
|||
| blank_line ;
|
||||
|
||||
(* Headings *)
|
||||
heading = atx_heading | setext_heading ;
|
||||
heading = atx_heading ;
|
||||
|
||||
atx_heading = "#" { "#" } [ " " ] inline_text newline ;
|
||||
|
||||
setext_heading = inline_text newline
|
||||
( ( "=" { "=" } ) | ( "-" { "-" } ) ) newline ;
|
||||
|
||||
(* Horizontal Rule *)
|
||||
horizontal_rule = ( ( "*" [ " " ] "*" [ " " ] "*" { [ " " ] "*" } )
|
||||
| ( "-" [ " " ] "-" [ " " ] "-" { [ " " ] "-" } )
|
||||
| ( "_" [ " " ] "_" [ " " ] "_" { [ " " ] "_" } ) ) newline ;
|
||||
|
||||
(* Code Blocks *)
|
||||
code_block = fenced_code_block | indented_code_block ;
|
||||
code_block = fenced_code_block;
|
||||
|
||||
fenced_code_block = "```" [ language_identifier ] newline
|
||||
{ code_line }
|
||||
"```" newline ;
|
||||
|
||||
indented_code_block = { " " code_line } ;
|
||||
|
||||
code_line = { character - newline } newline ;
|
||||
|
||||
language_identifier = { letter | digit | "-" | "+" } ;
|
||||
|
@ -122,20 +111,9 @@ inline_element = emphasis
|
|||
| link
|
||||
| image
|
||||
| autolink
|
||||
| html_inline
|
||||
| line_break
|
||||
| plain_text ;
|
||||
|
||||
html_inline = html_inline_element | html_comment | html_processing_instruction ;
|
||||
|
||||
html_inline_element = html_open_tag { html_inline_content } html_close_tag
|
||||
| html_self_closing_tag
|
||||
| html_void_tag ;
|
||||
|
||||
html_inline_content = html_inline_text | html_inline_element | html_comment | html_processing_instruction ;
|
||||
|
||||
html_inline_text = { character - "<" } ;
|
||||
|
||||
emphasis = ( "*" non_asterisk_text "*" )
|
||||
| ( "_" non_underscore_text "_" ) ;
|
||||
|
||||
|
|
Loading…
Reference in a new issue