ebnf added so parser making can be easier now

This commit is contained in:
Pagwin 2025-06-09 16:29:53 -04:00
parent 4a5d5e541a
commit dc17b1e816
No known key found for this signature in database
GPG key ID: 81137023740CA260
2 changed files with 433 additions and 0 deletions

148
markdown.ebnf Normal file
View file

@ -0,0 +1,148 @@
(* Markdown EBNF Grammar *)
document = { block } ;
block = heading
| horizontal_rule
| code_block
| quote_block
| list
| table
| paragraph
| blank_line ;
(* Headings *)
heading = atx_heading | setext_heading ;
atx_heading = "#" { "#" } [ " " ] inline_text newline ;
setext_heading = inline_text newline
( ( "=" { "=" } ) | ( "-" { "-" } ) ) newline ;
(* Horizontal Rule *)
horizontal_rule = ( ( "*" [ " " ] "*" [ " " ] "*" { [ " " ] "*" } )
| ( "-" [ " " ] "-" [ " " ] "-" { [ " " ] "-" } )
| ( "_" [ " " ] "_" [ " " ] "_" { [ " " ] "_" } ) ) newline ;
(* Code Blocks *)
code_block = fenced_code_block | indented_code_block ;
fenced_code_block = "```" [ language_identifier ] newline
{ code_line }
"```" newline ;
indented_code_block = { " " code_line } ;
code_line = { character - newline } newline ;
language_identifier = { letter | digit | "-" | "+" } ;
(* Quote Blocks *)
quote_block = { ">" [ " " ] ( inline_text | "" ) newline } ;
(* Lists *)
list = unordered_list | ordered_list ;
unordered_list = { unordered_list_item } ;
ordered_list = { ordered_list_item } ;
unordered_list_item = [ " " { " " } ] ( "*" | "+" | "-" ) " " inline_text newline
{ continuation_line } ;
ordered_list_item = [ " " { " " } ] digit { digit } "." " " inline_text newline
{ continuation_line } ;
continuation_line = " " inline_text newline ;
(* Tables *)
table = table_header table_separator { table_row } ;
table_header = "|" { table_cell "|" } newline ;
table_separator = "|" { table_align_spec "|" } newline ;
table_row = "|" { table_cell "|" } newline ;
table_cell = { character - ( "|" | newline ) } ;
table_align_spec = [ ":" ] "-" { "-" } [ ":" ] ;
(* Paragraphs *)
paragraph = inline_text { newline inline_text } newline ;
(* Inline Elements *)
inline_text = { inline_element } ;
inline_element = emphasis
| strong
| code_span
| link
| image
| autolink
| line_break
| plain_text ;
emphasis = ( "*" non_asterisk_text "*" )
| ( "_" non_underscore_text "_" ) ;
strong = ( "**" non_asterisk_text "**" )
| ( "__" non_underscore_text "__" ) ;
code_span = "`" { "`" } non_backtick_text { "`" } "`" ;
link = "[" link_text "]" "(" link_url [ " " link_title ] ")" ;
image = "!" "[" alt_text "]" "(" image_url [ " " image_title ] ")" ;
autolink = "<" ( url | email ) ">" ;
line_break = " " newline | "\\" newline ;
(* Text Content *)
plain_text = { character - special_char } ;
non_asterisk_text = { character - "*" } ;
non_underscore_text = { character - "_" } ;
non_backtick_text = { character - "`" } ;
link_text = { character - ( "[" | "]" ) } ;
alt_text = { character - ( "[" | "]" ) } ;
link_url = { character - ( "(" | ")" | " " ) } ;
image_url = { character - ( "(" | ")" | " " ) } ;
link_title = quote_string ;
image_title = quote_string ;
quote_string = ( '"' { character - '"' } '"' )
| ( "'" { character - "'" } "'" ) ;
url = "http" [ "s" ] "://" { character - ">" } ;
email = { character - ( "@" | ">" ) } "@" { character - ">" } ;
(* Utilities *)
blank_line = newline ;
special_char = "*" | "_" | "`" | "[" | "]" | "(" | ")" | "#" | ">" | "|" | "!" | "\\" ;
newline = "\n" | "\r\n" ;
character = letter | digit | symbol | " " ;
letter = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m"
| "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
| "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M"
| "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" ;
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
symbol = "!" | "@" | "#" | "$" | "%" | "^" | "&" | "*" | "(" | ")" | "-" | "_" | "="
| "+" | "[" | "]" | "{" | "}" | "\\" | "|" | ";" | ":" | "'" | '"' | "," | "."
| "<" | ">" | "/" | "?" | "~" | "`" ;

285
restructured.ebnf Normal file
View file

@ -0,0 +1,285 @@
(* reStructuredText EBNF Grammar *)
document = { block } ;
block = section
| transition
| paragraph
| literal_block
| line_block
| block_quote
| doctest_block
| table
| bullet_list
| enumerated_list
| definition_list
| field_list
| option_list
| directive
| comment
| substitution_definition
| target
| blank_line ;
(* Sections *)
section = section_title section_underline [ section_overline ] ;
section_title = inline_text newline ;
section_underline = section_adornment newline ;
section_overline = section_adornment newline ;
section_adornment = adornment_char { adornment_char } ;
adornment_char = "!" | '"' | "#" | "$" | "%" | "&" | "'" | "(" | ")" | "*" | "+" | "," | "-" | "." | "/" | ":" | ";" | "<" | "=" | ">" | "?" | "@" | "[" | "\\" | "]" | "^" | "_" | "`" | "{" | "|" | "}" | "~" ;
(* Transitions *)
transition = transition_marker newline ;
transition_marker = transition_char { transition_char } ;
transition_char = adornment_char ;
(* Paragraphs *)
paragraph = inline_text { newline inline_text } newline ;
(* Literal Blocks *)
literal_block = literal_block_marker newline { indented_line } ;
literal_block_marker = "::" | paragraph "::" ;
indented_line = indent line_content newline ;
indent = " " | "\t" ;
line_content = { character - newline } ;
(* Line Blocks *)
line_block = { line_block_line } ;
line_block_line = "|" [ " " ] inline_text newline ;
(* Block Quotes *)
block_quote = { indented_paragraph } [ attribution ] ;
indented_paragraph = indent inline_text { newline indent inline_text } newline ;
attribution = indent "-- " inline_text newline ;
(* Doctest Blocks *)
doctest_block = { doctest_line } ;
doctest_line = ">>>" " " line_content newline
| "..." " " line_content newline ;
(* Tables *)
table = simple_table | grid_table ;
simple_table = simple_table_row { simple_table_row } simple_table_separator { simple_table_row } ;
simple_table_row = { table_cell } newline ;
simple_table_separator = "=" { ( "=" | " " ) } newline ;
grid_table = grid_table_border { grid_table_row grid_table_border } ;
grid_table_border = "+" { ( "-" | "+" ) } newline ;
grid_table_row = "|" { table_cell "|" } newline ;
table_cell = { character - ( "|" | newline ) } ;
(* Lists *)
bullet_list = { bullet_list_item } ;
bullet_list_item = bullet_marker " " list_item_content ;
bullet_marker = "*" | "+" | "-" | "•" | "‣" | "" ;
enumerated_list = { enumerated_list_item } ;
enumerated_list_item = enumeration_marker " " list_item_content ;
enumeration_marker = ( digit { digit } "." )
| ( digit { digit } ")" )
| ( "(" digit { digit } ")" )
| ( letter "." )
| ( letter ")" )
| ( "(" letter ")" )
| ( roman "." )
| ( roman ")" )
| ( "(" roman ")" )
| "#." | "#)" | "(#)" ;
list_item_content = inline_text { newline [ indent ] inline_text } newline ;
definition_list = { definition_list_item } ;
definition_list_item = term newline indent definition newline ;
term = inline_text ;
definition = inline_text { newline indent inline_text } ;
(* Field Lists *)
field_list = { field_list_item } ;
field_list_item = ":" field_name ":" " " field_body newline ;
field_name = { letter | digit | " " | "-" | "_" } ;
field_body = inline_text { newline indent inline_text } ;
(* Option Lists *)
option_list = { option_list_item } ;
option_list_item = option_group " " option_description newline ;
option_group = option { ", " option } ;
option = short_option | long_option ;
short_option = "-" letter [ " " option_argument ] ;
long_option = "--" { letter | digit | "-" } [ "=" option_argument ] ;
option_argument = { letter | digit | "-" | "_" } ;
option_description = inline_text { newline indent inline_text } ;
(* Directives *)
directive = ".." " " directive_name "::" [ " " directive_arguments ] newline
[ directive_options ]
[ blank_line ]
[ directive_content ] ;
directive_name = { letter | digit | "-" | "_" } ;
directive_arguments = { character - newline } ;
directive_options = { directive_option } ;
directive_option = indent ":" option_name ":" [ " " option_value ] newline ;
option_name = { letter | digit | "-" | "_" } ;
option_value = { character - newline } ;
directive_content = { indented_line } ;
(* Comments *)
comment = ".." [ " " comment_text ] newline { indented_line } ;
comment_text = { character - newline } ;
(* Substitution Definitions *)
substitution_definition = ".." " " "|" substitution_name "|" " " directive_name "::" [ " " directive_arguments ] newline
[ directive_options ]
[ directive_content ] ;
substitution_name = { character - ( "|" | newline ) } ;
(* Targets *)
target = internal_target | external_target ;
internal_target = ".." " " "_" target_name ":" newline ;
external_target = ".." " " "_" target_name ":" " " target_url newline ;
target_name = { character - ( ":" | newline ) } ;
target_url = { character - newline } ;
(* Inline Elements *)
inline_text = { inline_element } ;
inline_element = emphasis
| strong
| literal
| interpreted_text
| phrase_reference
| substitution_reference
| inline_internal_target
| hyperlink_reference
| footnote_reference
| citation_reference
| inline_literal
| plain_text ;
emphasis = "*" emphasis_text "*" ;
strong = "**" strong_text "**" ;
literal = "``" literal_text "``" ;
interpreted_text = "`" interpreted_text_content "`" [ role_suffix ]
| role_prefix "`" interpreted_text_content "`" ;
role_prefix = ":" role_name ":" ;
role_suffix = ":" role_name ":" ;
role_name = { letter | digit | "-" | "_" | "." } ;
interpreted_text_content = { character - "`" } ;
phrase_reference = "`" phrase_reference_text "`_" [ "_" ] ;
phrase_reference_text = { character - ( "`" | "<" ) } [ " " "<" target_url ">" ] ;
substitution_reference = "|" substitution_name "|" [ "_" [ "_" ] ] ;
inline_internal_target = "_`" target_text "`" ;
target_text = { character - "`" } ;
hyperlink_reference = reference_name "_" [ "_" ] ;
reference_name = { letter | digit | "-" | "_" | "." } ;
footnote_reference = "[" footnote_label "]_" ;
footnote_label = digit { digit } | "#" [ footnote_name ] | "*" ;
footnote_name = { letter | digit | "-" | "_" } ;
citation_reference = "[" citation_label "]_" ;
citation_label = { letter | digit | "-" | "_" | "." } ;
inline_literal = "`" "`" literal_content "`" "`" ;
literal_content = { character - "`" } ;
(* Text Content *)
plain_text = { character - markup_char } ;
emphasis_text = { character - ( "*" | newline ) } ;
strong_text = { character - ( "*" | newline ) } ;
literal_text = { character - ( "`" | newline ) } ;
markup_char = "*" | "`" | "_" | "|" | "[" | "]" | ":" | "." | ">" | "<" ;
(* Utilities *)
blank_line = newline ;
roman = "i" | "ii" | "iii" | "iv" | "v" | "vi" | "vii" | "viii" | "ix" | "x"
| "I" | "II" | "III" | "IV" | "V" | "VI" | "VII" | "VIII" | "IX" | "X" ;
newline = "\n" | "\r\n" ;
character = letter | digit | symbol | " " ;
letter = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m"
| "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
| "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M"
| "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" ;
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
symbol = "!" | "@" | "#" | "$" | "%" | "^" | "&" | "*" | "(" | ")" | "-" | "_" | "="
| "+" | "[" | "]" | "{" | "}" | "\\" | "|" | ";" | ":" | "'" | '"' | "," | "."
| "<" | ">" | "/" | "?" | "~" | "`" ;