From dc17b1e8169692bfafa531d623023a77834601e6 Mon Sep 17 00:00:00 2001 From: Pagwin Date: Mon, 9 Jun 2025 16:29:53 -0400 Subject: [PATCH] ebnf added so parser making can be easier now --- markdown.ebnf | 148 ++++++++++++++++++++++++ restructured.ebnf | 285 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 433 insertions(+) create mode 100644 markdown.ebnf create mode 100644 restructured.ebnf diff --git a/markdown.ebnf b/markdown.ebnf new file mode 100644 index 0000000..e812e48 --- /dev/null +++ b/markdown.ebnf @@ -0,0 +1,148 @@ +(* Markdown EBNF Grammar *) + +document = { block } ; + +block = heading + | horizontal_rule + | code_block + | quote_block + | list + | table + | paragraph + | blank_line ; + +(* Headings *) +heading = atx_heading | setext_heading ; + +atx_heading = "#" { "#" } [ " " ] inline_text newline ; + +setext_heading = inline_text newline + ( ( "=" { "=" } ) | ( "-" { "-" } ) ) newline ; + +(* Horizontal Rule *) +horizontal_rule = ( ( "*" [ " " ] "*" [ " " ] "*" { [ " " ] "*" } ) + | ( "-" [ " " ] "-" [ " " ] "-" { [ " " ] "-" } ) + | ( "_" [ " " ] "_" [ " " ] "_" { [ " " ] "_" } ) ) newline ; + +(* Code Blocks *) +code_block = fenced_code_block | indented_code_block ; + +fenced_code_block = "```" [ language_identifier ] newline + { code_line } + "```" newline ; + +indented_code_block = { " " code_line } ; + +code_line = { character - newline } newline ; + +language_identifier = { letter | digit | "-" | "+" } ; + +(* Quote Blocks *) +quote_block = { ">" [ " " ] ( inline_text | "" ) newline } ; + +(* Lists *) +list = unordered_list | ordered_list ; + +unordered_list = { unordered_list_item } ; + +ordered_list = { ordered_list_item } ; + +unordered_list_item = [ " " { " " } ] ( "*" | "+" | "-" ) " " inline_text newline + { continuation_line } ; + +ordered_list_item = [ " " { " " } ] digit { digit } "." " " inline_text newline + { continuation_line } ; + +continuation_line = " " inline_text newline ; + +(* Tables *) +table = table_header table_separator { table_row } ; + +table_header = "|" { table_cell "|" } newline ; + +table_separator = "|" { table_align_spec "|" } newline ; + +table_row = "|" { table_cell "|" } newline ; + +table_cell = { character - ( "|" | newline ) } ; + +table_align_spec = [ ":" ] "-" { "-" } [ ":" ] ; + +(* Paragraphs *) +paragraph = inline_text { newline inline_text } newline ; + +(* Inline Elements *) +inline_text = { inline_element } ; + +inline_element = emphasis + | strong + | code_span + | link + | image + | autolink + | line_break + | plain_text ; + +emphasis = ( "*" non_asterisk_text "*" ) + | ( "_" non_underscore_text "_" ) ; + +strong = ( "**" non_asterisk_text "**" ) + | ( "__" non_underscore_text "__" ) ; + +code_span = "`" { "`" } non_backtick_text { "`" } "`" ; + +link = "[" link_text "]" "(" link_url [ " " link_title ] ")" ; + +image = "!" "[" alt_text "]" "(" image_url [ " " image_title ] ")" ; + +autolink = "<" ( url | email ) ">" ; + +line_break = " " newline | "\\" newline ; + +(* Text Content *) +plain_text = { character - special_char } ; + +non_asterisk_text = { character - "*" } ; + +non_underscore_text = { character - "_" } ; + +non_backtick_text = { character - "`" } ; + +link_text = { character - ( "[" | "]" ) } ; + +alt_text = { character - ( "[" | "]" ) } ; + +link_url = { character - ( "(" | ")" | " " ) } ; + +image_url = { character - ( "(" | ")" | " " ) } ; + +link_title = quote_string ; + +image_title = quote_string ; + +quote_string = ( '"' { character - '"' } '"' ) + | ( "'" { character - "'" } "'" ) ; + +url = "http" [ "s" ] "://" { character - ">" } ; + +email = { character - ( "@" | ">" ) } "@" { character - ">" } ; + +(* Utilities *) +blank_line = newline ; + +special_char = "*" | "_" | "`" | "[" | "]" | "(" | ")" | "#" | ">" | "|" | "!" | "\\" ; + +newline = "\n" | "\r\n" ; + +character = letter | digit | symbol | " " ; + +letter = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" + | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" + | "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" + | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" ; + +digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + +symbol = "!" | "@" | "#" | "$" | "%" | "^" | "&" | "*" | "(" | ")" | "-" | "_" | "=" + | "+" | "[" | "]" | "{" | "}" | "\\" | "|" | ";" | ":" | "'" | '"' | "," | "." + | "<" | ">" | "/" | "?" | "~" | "`" ; diff --git a/restructured.ebnf b/restructured.ebnf new file mode 100644 index 0000000..1d22040 --- /dev/null +++ b/restructured.ebnf @@ -0,0 +1,285 @@ +(* reStructuredText EBNF Grammar *) + +document = { block } ; + +block = section + | transition + | paragraph + | literal_block + | line_block + | block_quote + | doctest_block + | table + | bullet_list + | enumerated_list + | definition_list + | field_list + | option_list + | directive + | comment + | substitution_definition + | target + | blank_line ; + +(* Sections *) +section = section_title section_underline [ section_overline ] ; + +section_title = inline_text newline ; + +section_underline = section_adornment newline ; + +section_overline = section_adornment newline ; + +section_adornment = adornment_char { adornment_char } ; + +adornment_char = "!" | '"' | "#" | "$" | "%" | "&" | "'" | "(" | ")" | "*" | "+" | "," | "-" | "." | "/" | ":" | ";" | "<" | "=" | ">" | "?" | "@" | "[" | "\\" | "]" | "^" | "_" | "`" | "{" | "|" | "}" | "~" ; + +(* Transitions *) +transition = transition_marker newline ; + +transition_marker = transition_char { transition_char } ; + +transition_char = adornment_char ; + +(* Paragraphs *) +paragraph = inline_text { newline inline_text } newline ; + +(* Literal Blocks *) +literal_block = literal_block_marker newline { indented_line } ; + +literal_block_marker = "::" | paragraph "::" ; + +indented_line = indent line_content newline ; + +indent = " " | "\t" ; + +line_content = { character - newline } ; + +(* Line Blocks *) +line_block = { line_block_line } ; + +line_block_line = "|" [ " " ] inline_text newline ; + +(* Block Quotes *) +block_quote = { indented_paragraph } [ attribution ] ; + +indented_paragraph = indent inline_text { newline indent inline_text } newline ; + +attribution = indent "-- " inline_text newline ; + +(* Doctest Blocks *) +doctest_block = { doctest_line } ; + +doctest_line = ">>>" " " line_content newline + | "..." " " line_content newline ; + +(* Tables *) +table = simple_table | grid_table ; + +simple_table = simple_table_row { simple_table_row } simple_table_separator { simple_table_row } ; + +simple_table_row = { table_cell } newline ; + +simple_table_separator = "=" { ( "=" | " " ) } newline ; + +grid_table = grid_table_border { grid_table_row grid_table_border } ; + +grid_table_border = "+" { ( "-" | "+" ) } newline ; + +grid_table_row = "|" { table_cell "|" } newline ; + +table_cell = { character - ( "|" | newline ) } ; + +(* Lists *) +bullet_list = { bullet_list_item } ; + +bullet_list_item = bullet_marker " " list_item_content ; + +bullet_marker = "*" | "+" | "-" | "•" | "‣" | "⁃" ; + +enumerated_list = { enumerated_list_item } ; + +enumerated_list_item = enumeration_marker " " list_item_content ; + +enumeration_marker = ( digit { digit } "." ) + | ( digit { digit } ")" ) + | ( "(" digit { digit } ")" ) + | ( letter "." ) + | ( letter ")" ) + | ( "(" letter ")" ) + | ( roman "." ) + | ( roman ")" ) + | ( "(" roman ")" ) + | "#." | "#)" | "(#)" ; + +list_item_content = inline_text { newline [ indent ] inline_text } newline ; + +definition_list = { definition_list_item } ; + +definition_list_item = term newline indent definition newline ; + +term = inline_text ; + +definition = inline_text { newline indent inline_text } ; + +(* Field Lists *) +field_list = { field_list_item } ; + +field_list_item = ":" field_name ":" " " field_body newline ; + +field_name = { letter | digit | " " | "-" | "_" } ; + +field_body = inline_text { newline indent inline_text } ; + +(* Option Lists *) +option_list = { option_list_item } ; + +option_list_item = option_group " " option_description newline ; + +option_group = option { ", " option } ; + +option = short_option | long_option ; + +short_option = "-" letter [ " " option_argument ] ; + +long_option = "--" { letter | digit | "-" } [ "=" option_argument ] ; + +option_argument = { letter | digit | "-" | "_" } ; + +option_description = inline_text { newline indent inline_text } ; + +(* Directives *) +directive = ".." " " directive_name "::" [ " " directive_arguments ] newline + [ directive_options ] + [ blank_line ] + [ directive_content ] ; + +directive_name = { letter | digit | "-" | "_" } ; + +directive_arguments = { character - newline } ; + +directive_options = { directive_option } ; + +directive_option = indent ":" option_name ":" [ " " option_value ] newline ; + +option_name = { letter | digit | "-" | "_" } ; + +option_value = { character - newline } ; + +directive_content = { indented_line } ; + +(* Comments *) +comment = ".." [ " " comment_text ] newline { indented_line } ; + +comment_text = { character - newline } ; + +(* Substitution Definitions *) +substitution_definition = ".." " " "|" substitution_name "|" " " directive_name "::" [ " " directive_arguments ] newline + [ directive_options ] + [ directive_content ] ; + +substitution_name = { character - ( "|" | newline ) } ; + +(* Targets *) +target = internal_target | external_target ; + +internal_target = ".." " " "_" target_name ":" newline ; + +external_target = ".." " " "_" target_name ":" " " target_url newline ; + +target_name = { character - ( ":" | newline ) } ; + +target_url = { character - newline } ; + +(* Inline Elements *) +inline_text = { inline_element } ; + +inline_element = emphasis + | strong + | literal + | interpreted_text + | phrase_reference + | substitution_reference + | inline_internal_target + | hyperlink_reference + | footnote_reference + | citation_reference + | inline_literal + | plain_text ; + +emphasis = "*" emphasis_text "*" ; + +strong = "**" strong_text "**" ; + +literal = "``" literal_text "``" ; + +interpreted_text = "`" interpreted_text_content "`" [ role_suffix ] + | role_prefix "`" interpreted_text_content "`" ; + +role_prefix = ":" role_name ":" ; + +role_suffix = ":" role_name ":" ; + +role_name = { letter | digit | "-" | "_" | "." } ; + +interpreted_text_content = { character - "`" } ; + +phrase_reference = "`" phrase_reference_text "`_" [ "_" ] ; + +phrase_reference_text = { character - ( "`" | "<" ) } [ " " "<" target_url ">" ] ; + +substitution_reference = "|" substitution_name "|" [ "_" [ "_" ] ] ; + +inline_internal_target = "_`" target_text "`" ; + +target_text = { character - "`" } ; + +hyperlink_reference = reference_name "_" [ "_" ] ; + +reference_name = { letter | digit | "-" | "_" | "." } ; + +footnote_reference = "[" footnote_label "]_" ; + +footnote_label = digit { digit } | "#" [ footnote_name ] | "*" ; + +footnote_name = { letter | digit | "-" | "_" } ; + +citation_reference = "[" citation_label "]_" ; + +citation_label = { letter | digit | "-" | "_" | "." } ; + +inline_literal = "`" "`" literal_content "`" "`" ; + +literal_content = { character - "`" } ; + +(* Text Content *) +plain_text = { character - markup_char } ; + +emphasis_text = { character - ( "*" | newline ) } ; + +strong_text = { character - ( "*" | newline ) } ; + +literal_text = { character - ( "`" | newline ) } ; + +markup_char = "*" | "`" | "_" | "|" | "[" | "]" | ":" | "." | ">" | "<" ; + +(* Utilities *) +blank_line = newline ; + +roman = "i" | "ii" | "iii" | "iv" | "v" | "vi" | "vii" | "viii" | "ix" | "x" + | "I" | "II" | "III" | "IV" | "V" | "VI" | "VII" | "VIII" | "IX" | "X" ; + +newline = "\n" | "\r\n" ; + +character = letter | digit | symbol | " " ; + +letter = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" + | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" + | "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" + | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" ; + +digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + +symbol = "!" | "@" | "#" | "$" | "%" | "^" | "&" | "*" | "(" | ")" | "-" | "_" | "=" + | "+" | "[" | "]" | "{" | "}" | "\\" | "|" | ";" | ":" | "'" | '"' | "," | "." + | "<" | ">" | "/" | "?" | "~" | "`" ;