From 016d50aff191ff14faa2555e4e9946059e8f6956 Mon Sep 17 00:00:00 2001 From: Pagwin Date: Thu, 24 Jul 2025 18:39:40 -0400 Subject: [PATCH] starting to move towards making my own markdown parser --- app/Markdown.hs | 1 - markdown.ebnf | 57 +++++++++++++++++++++++++++++++++++++++++++++++-- psb.cabal | 3 +-- 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/app/Markdown.hs b/app/Markdown.hs index 752c613..5e73599 100644 --- a/app/Markdown.hs +++ b/app/Markdown.hs @@ -2,4 +2,3 @@ module Markdown () where -import CMark diff --git a/markdown.ebnf b/markdown.ebnf index e812e48..9c39e98 100644 --- a/markdown.ebnf +++ b/markdown.ebnf @@ -1,4 +1,4 @@ -(* Markdown EBNF Grammar *) +(* Markdown EBNF Grammar with HTML Support *) document = { block } ; @@ -8,6 +8,7 @@ block = heading | quote_block | list | table + | html_block | paragraph | blank_line ; @@ -68,6 +69,47 @@ table_cell = { character - ( "|" | newline ) } ; table_align_spec = [ ":" ] "-" { "-" } [ ":" ] ; +(* HTML Support *) +html_block = html_block_element; + +html_block_element = html_open_tag { html_content } html_close_tag newline + | html_self_closing_tag newline + | html_void_tag newline ; + +html_open_tag = "<" html_tag_name { " " html_attribute } [ " " ] ">" ; + +html_close_tag = "" ; + +html_self_closing_tag = "<" html_tag_name { " " html_attribute } [ " " ] "/>" ; + +html_void_tag = "<" html_void_tag_name { " " html_attribute } [ " " ] [ "/" ] ">" ; + +html_void_tag_name = "area" | "base" | "br" | "col" | "embed" | "hr" | "img" | "input" + | "link" | "meta" | "param" | "source" | "track" | "wbr" ; + +html_tag_name = letter { letter | digit | "-" | "_" | ":" | "." } ; + +html_attribute = html_attribute_name [ "=" html_attribute_value ] ; + +html_attribute_name = ( letter | "_" | ":" ) { letter | digit | "-" | "_" | ":" | "." } ; + +html_attribute_value = html_quoted_value | html_unquoted_value ; + +html_quoted_value = ( '"' { character - '"' } '"' ) + | ( "'" { character - "'" } "'" ) ; + +html_unquoted_value = { character - ( " " | "\t" | "\n" | "\r" | ">" | "/" | "=" ) } ; + +html_content = html_text | html_element | html_comment | html_processing_instruction ; + +html_text = { character - "<" } ; + +html_element = html_open_tag { html_content } html_close_tag + | html_self_closing_tag + | html_void_tag ; + +html_comment = "" ; + (* Paragraphs *) paragraph = inline_text { newline inline_text } newline ; @@ -80,9 +122,20 @@ inline_element = emphasis | link | image | autolink + | html_inline | line_break | plain_text ; +html_inline = html_inline_element | html_comment | html_processing_instruction ; + +html_inline_element = html_open_tag { html_inline_content } html_close_tag + | html_self_closing_tag + | html_void_tag ; + +html_inline_content = html_inline_text | html_inline_element | html_comment | html_processing_instruction ; + +html_inline_text = { character - "<" } ; + emphasis = ( "*" non_asterisk_text "*" ) | ( "_" non_underscore_text "_" ) ; @@ -130,7 +183,7 @@ email = { character - ( "@" | ">" ) } "@" { character - ">" } ; (* Utilities *) blank_line = newline ; -special_char = "*" | "_" | "`" | "[" | "]" | "(" | ")" | "#" | ">" | "|" | "!" | "\\" ; +special_char = "*" | "_" | "`" | "[" | "]" | "(" | ")" | "#" | ">" | "|" | "!" | "\\" | "<" ; newline = "\n" | "\r\n" ; diff --git a/psb.cabal b/psb.cabal index a17fadd..0f9ef17 100644 --- a/psb.cabal +++ b/psb.cabal @@ -35,8 +35,7 @@ executable psb -- Other library packages from which modules are imported. -- https://hackage.haskell.org/package/texmath - -- cmark is pinned because I don't want to touch it unless I rewrite to my own code - build-depends: base >=4.17.2.1, mustache >=2.4.2, pandoc >=3.2.1, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text, time, unordered-containers, yaml, parsec >= 3.1.18.0, typst >= 0.6.1, typst-symbols >= 0.1.7, cmark == 0.6.1 + build-depends: base >=4.17.2.1, mustache >=2.4.2, pandoc >=3.2.1, shake >= 0.19.8, deriving-aeson >= 0.2.9, aeson, text, time, unordered-containers, yaml, parsec >= 3.1.18.0, typst >= 0.6.1, typst-symbols >= 0.1.7 -- Directories containing source files. hs-source-dirs: app