began scaffolding down to markdown parser
This commit is contained in:
parent
016d50aff1
commit
0e4613f27c
3 changed files with 68 additions and 30 deletions
33
app/IR.hs
33
app/IR.hs
|
@ -2,13 +2,36 @@ module IR where
|
||||||
|
|
||||||
import Data.Text
|
import Data.Text
|
||||||
|
|
||||||
-- Html and Math tags come with their data because they are leaves for us
|
newtype Document = Doc [Element]
|
||||||
-- We aren't parsing that if we can avoid it
|
|
||||||
data Tag = Heading {level :: Int} | Paragraph | Blockquote | Code | Html {html :: Text} | Anchor | Italic | Bold | Math {mathML :: Text}
|
|
||||||
|
|
||||||
data Data = Ast {ast :: AST} | Text {text :: Text}
|
data Element = Heading Heading | Code Code | BlockQuote BlockQuote | List List | Table Table | HTML HTML | Paragraph Paragraph | BlankLine BlankLine
|
||||||
|
|
||||||
data AST = AST {tag :: Tag, child :: [Data]}
|
data Heading = H {level :: Int, text :: Text}
|
||||||
|
|
||||||
|
data Code = C {language :: Text, code :: Text}
|
||||||
|
|
||||||
|
newtype BlockQuote = Q Text
|
||||||
|
|
||||||
|
data ListType = Ordered | Unordered
|
||||||
|
|
||||||
|
data ListItem = LI {content :: Text, children :: [List]}
|
||||||
|
|
||||||
|
data List = L {list_type :: ListType, items :: [ListItem]}
|
||||||
|
|
||||||
|
data Table = T {header :: TableHeader, rows :: [TableRow]}
|
||||||
|
|
||||||
|
-- TODO: layout/sizing info?
|
||||||
|
newtype TableHeader = TH [Text]
|
||||||
|
|
||||||
|
newtype TableRow = TR Text
|
||||||
|
|
||||||
|
newtype HTML = Raw Text
|
||||||
|
|
||||||
|
newtype Paragraph = P [InlineText]
|
||||||
|
|
||||||
|
data InlineText = Normal Text | Bold InlineText | Italic InlineText | CodeLine Text | Link {nest :: InlineText, href :: Text}
|
||||||
|
|
||||||
|
data BlankLine = BL
|
||||||
|
|
||||||
-- for processing math
|
-- for processing math
|
||||||
-- https://hackage.haskell.org/package/typst-0.6.1/docs/Typst-Parse.html#v:parseTypst
|
-- https://hackage.haskell.org/package/typst-0.6.1/docs/Typst-Parse.html#v:parseTypst
|
||||||
|
|
|
@ -1,4 +1,41 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# LANGUAGE RankNTypes #-}
|
||||||
|
|
||||||
module Markdown () where
|
module Markdown (markdownParser) where
|
||||||
|
|
||||||
|
import Data.Text
|
||||||
|
import IR
|
||||||
|
import Text.Parsec
|
||||||
|
import Text.Parsec.Combinator
|
||||||
|
|
||||||
|
type Parser a = forall s u m t. (Stream s m t) => ParsecT s u m a
|
||||||
|
|
||||||
|
markdownParser :: Parser Document
|
||||||
|
markdownParser = Doc <$> many block
|
||||||
|
|
||||||
|
block :: Parser Element
|
||||||
|
block = choice [heading, codeBlock, quoteBlock, list, table, htmlBlock, paragraph, blankLine]
|
||||||
|
|
||||||
|
heading :: Parser Element
|
||||||
|
heading = pure $ Heading $ H {level = 1, text = ""}
|
||||||
|
|
||||||
|
codeBlock :: Parser Element
|
||||||
|
codeBlock = pure $ Code $ C {language = "", code = ""}
|
||||||
|
|
||||||
|
quoteBlock :: Parser Element
|
||||||
|
quoteBlock = pure $ BlockQuote $ Q ""
|
||||||
|
|
||||||
|
list :: Parser Element
|
||||||
|
list = pure $ List $ L {list_type = Ordered, items = []}
|
||||||
|
|
||||||
|
table :: Parser Element
|
||||||
|
table = pure $ Table $ T {header = TH [], rows = []}
|
||||||
|
|
||||||
|
htmlBlock :: Parser Element
|
||||||
|
htmlBlock = pure $ HTML $ Raw ""
|
||||||
|
|
||||||
|
paragraph :: Parser Element
|
||||||
|
paragraph = pure $ Paragraph $ P ""
|
||||||
|
|
||||||
|
blankline :: Parser Element
|
||||||
|
blankline = pure $ BlankLine BL
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
document = { block } ;
|
document = { block } ;
|
||||||
|
|
||||||
block = heading
|
block = heading
|
||||||
| horizontal_rule
|
|
||||||
| code_block
|
| code_block
|
||||||
| quote_block
|
| quote_block
|
||||||
| list
|
| list
|
||||||
|
@ -13,27 +12,17 @@ block = heading
|
||||||
| blank_line ;
|
| blank_line ;
|
||||||
|
|
||||||
(* Headings *)
|
(* Headings *)
|
||||||
heading = atx_heading | setext_heading ;
|
heading = atx_heading ;
|
||||||
|
|
||||||
atx_heading = "#" { "#" } [ " " ] inline_text newline ;
|
atx_heading = "#" { "#" } [ " " ] inline_text newline ;
|
||||||
|
|
||||||
setext_heading = inline_text newline
|
|
||||||
( ( "=" { "=" } ) | ( "-" { "-" } ) ) newline ;
|
|
||||||
|
|
||||||
(* Horizontal Rule *)
|
|
||||||
horizontal_rule = ( ( "*" [ " " ] "*" [ " " ] "*" { [ " " ] "*" } )
|
|
||||||
| ( "-" [ " " ] "-" [ " " ] "-" { [ " " ] "-" } )
|
|
||||||
| ( "_" [ " " ] "_" [ " " ] "_" { [ " " ] "_" } ) ) newline ;
|
|
||||||
|
|
||||||
(* Code Blocks *)
|
(* Code Blocks *)
|
||||||
code_block = fenced_code_block | indented_code_block ;
|
code_block = fenced_code_block;
|
||||||
|
|
||||||
fenced_code_block = "```" [ language_identifier ] newline
|
fenced_code_block = "```" [ language_identifier ] newline
|
||||||
{ code_line }
|
{ code_line }
|
||||||
"```" newline ;
|
"```" newline ;
|
||||||
|
|
||||||
indented_code_block = { " " code_line } ;
|
|
||||||
|
|
||||||
code_line = { character - newline } newline ;
|
code_line = { character - newline } newline ;
|
||||||
|
|
||||||
language_identifier = { letter | digit | "-" | "+" } ;
|
language_identifier = { letter | digit | "-" | "+" } ;
|
||||||
|
@ -122,20 +111,9 @@ inline_element = emphasis
|
||||||
| link
|
| link
|
||||||
| image
|
| image
|
||||||
| autolink
|
| autolink
|
||||||
| html_inline
|
|
||||||
| line_break
|
| line_break
|
||||||
| plain_text ;
|
| plain_text ;
|
||||||
|
|
||||||
html_inline = html_inline_element | html_comment | html_processing_instruction ;
|
|
||||||
|
|
||||||
html_inline_element = html_open_tag { html_inline_content } html_close_tag
|
|
||||||
| html_self_closing_tag
|
|
||||||
| html_void_tag ;
|
|
||||||
|
|
||||||
html_inline_content = html_inline_text | html_inline_element | html_comment | html_processing_instruction ;
|
|
||||||
|
|
||||||
html_inline_text = { character - "<" } ;
|
|
||||||
|
|
||||||
emphasis = ( "*" non_asterisk_text "*" )
|
emphasis = ( "*" non_asterisk_text "*" )
|
||||||
| ( "_" non_underscore_text "_" ) ;
|
| ( "_" non_underscore_text "_" ) ;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue