bunch of work on djot
This commit is contained in:
parent
cc4f0b7672
commit
1a2feb95e9
3 changed files with 133 additions and 13 deletions
138
src/Djot.hs
138
src/Djot.hs
|
|
@ -1,3 +1,4 @@
|
|||
{-# LANGUAGE BlockArguments #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
|
@ -10,14 +11,17 @@ module Djot
|
|||
where
|
||||
|
||||
import Control.Applicative (many, optional, some, (<|>))
|
||||
import Control.Monad.ST (runST)
|
||||
import Data.Foldable (for_)
|
||||
import Data.Functor (void, (<$>))
|
||||
import Data.List (elemIndex)
|
||||
import qualified Data.List.NonEmpty as NE
|
||||
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import IR
|
||||
import Logger (Logger (logCallStack, logDebug, logError))
|
||||
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, sepBy, setErrorOffset, someTill)
|
||||
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, satisfy, sepBy, setErrorOffset, someTill)
|
||||
import Text.Megaparsec.Char (char, lowerChar, newline, numberChar, space, string, tab, upperChar)
|
||||
import Utilities.Parsing
|
||||
|
||||
|
|
@ -46,10 +50,10 @@ blockElement accumulated_attributes =
|
|||
try $ thematicBreak accumulated_attributes,
|
||||
lookAhead (string ":::") *> containerBlock accumulated_attributes,
|
||||
-- try used due to table having a non-trivial structure at the start
|
||||
try tableBlock accumulated_attributes,
|
||||
try $ tableBlock accumulated_attributes,
|
||||
-- using try due to ambiguity between these and normal text until we've already done some amount of parsing
|
||||
try footnoteDefinition accumulated_attributes,
|
||||
try referenceDef accumulated_attributes,
|
||||
try $ footnoteDefinition accumulated_attributes,
|
||||
try $ referenceDef accumulated_attributes,
|
||||
lookAhead (char '{') *> blockAttribute accumulated_attributes,
|
||||
paragraph accumulated_attributes
|
||||
]
|
||||
|
|
@ -86,7 +90,9 @@ blockQuote attrs = do
|
|||
parseError $ remap $ NE.head errs
|
||||
where
|
||||
blockQuote' = do
|
||||
pure $ BlockQuote (error "todo") attrs
|
||||
-- using document for convenience
|
||||
(Doc contents) <- document
|
||||
pure $ BlockQuote (Q contents) attrs
|
||||
bq_line = do
|
||||
char '>'
|
||||
space
|
||||
|
|
@ -98,7 +104,7 @@ listBlock attrs = do
|
|||
startOffset <- getOffset
|
||||
list_type <- listMarker
|
||||
first_item <- list_item_content
|
||||
rem_items <- manyTill (listMarker *> list_item_content) $ lookAhead blockSeparator
|
||||
rem_items <- manyTill (listMarker' list_type *> list_item_content) $ lookAhead blockSeparator
|
||||
let items = first_item : rem_items
|
||||
case parse listBlock' "" $ error "todo: figure out how to take our list of items and put them toegether for easy parsing" of
|
||||
Right ret -> pure ret
|
||||
|
|
@ -127,6 +133,41 @@ lowerAlphabet = ['a' .. 'z']
|
|||
upperAlphabet :: [Char]
|
||||
upperAlphabet = ['A' .. 'Z']
|
||||
|
||||
-- inner parsing of roman numerals done via Gemini due to lazy
|
||||
romanValue :: Char -> Int
|
||||
romanValue 'I' = 1
|
||||
romanValue 'V' = 5
|
||||
romanValue 'X' = 10
|
||||
romanValue 'L' = 50
|
||||
romanValue 'C' = 100
|
||||
romanValue 'D' = 500
|
||||
romanValue 'M' = 1000
|
||||
romanValue _ = 0
|
||||
|
||||
romanToInt :: Text -> Int
|
||||
romanToInt s = runST $ do
|
||||
total <- newSTRef 0
|
||||
prevVal <- newSTRef 0
|
||||
|
||||
for_ (T.unpack s) $ \char -> do
|
||||
let curr = romanValue char
|
||||
p <- readSTRef prevVal
|
||||
|
||||
-- If current > previous (e.g., IV), we subtract the previous twice
|
||||
if curr > p
|
||||
then modifySTRef' total (\t -> t + curr - 2 * p)
|
||||
else modifySTRef' total (+ curr)
|
||||
|
||||
writeSTRef prevVal curr
|
||||
|
||||
readSTRef total
|
||||
|
||||
upperRoman :: [Char]
|
||||
upperRoman = "IVXLCDM"
|
||||
|
||||
lowerRoman :: [Char]
|
||||
lowerRoman = T.unpack $ T.toLower $ T.pack upperRoman
|
||||
|
||||
-- consumes whitespace as well for convenience in element parser
|
||||
listMarker :: (Logger m, Characters s) => Parser s m ListType
|
||||
listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
|
||||
|
|
@ -145,7 +186,14 @@ listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lo
|
|||
letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = elemIndex letter upperAlphabet
|
||||
pure $ Ordered {start_number, style = Just "A"}
|
||||
roman_numeral = error "todo"
|
||||
lower_roman_numeral = do
|
||||
numeral <- choice $ map ($ some (satisfy (`elem` lowerRoman))) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
||||
pure $ Ordered {start_number, style = Just "i"}
|
||||
upper_roman_numeral = do
|
||||
numeral <- choice $ map ($ some (satisfy (`elem` upperRoman))) [try . surroundParen, try . rightParen, point]
|
||||
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
||||
pure $ Ordered {start_number, style = Just "I"}
|
||||
|
||||
surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||
surroundParen parser = do
|
||||
|
|
@ -180,7 +228,7 @@ codeBlock attrs = do
|
|||
else
|
||||
Just $ toText language'
|
||||
code <- toText <$> manyTill anySingle codeFence
|
||||
pure $ Code $ C {language, code}
|
||||
pure $ Code (C {language, code}) attrs
|
||||
|
||||
blockAttribute :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||
blockAttribute attrs = do
|
||||
|
|
@ -310,4 +358,76 @@ inlineContent' opened = someTill (inlineElement opened) $ lookAhead blockSeparat
|
|||
inlineElement :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
inlineElement opened =
|
||||
choice
|
||||
[lookAhead (string "![") *> image, lookAhead]
|
||||
-- spamming try because backtracking is easier than having state for partial parses
|
||||
[ try $ image opened,
|
||||
try $ Djot.link opened,
|
||||
try $ autolink opened,
|
||||
try $ verbatim opened,
|
||||
try $ emphasis opened,
|
||||
try $ highlight opened,
|
||||
try $ superscript opened,
|
||||
try $ subscript opened,
|
||||
try $ insert opened,
|
||||
try $ math opened,
|
||||
try $ footnoteRef opened,
|
||||
try $ linebreak opened,
|
||||
try $ symbol opened,
|
||||
try $ rawInline opened,
|
||||
try $ Djot.span opened,
|
||||
try $ inlineAttribute opened,
|
||||
plainText
|
||||
]
|
||||
|
||||
image :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
image opened = error "todo"
|
||||
|
||||
link :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
link opened = error "todo"
|
||||
|
||||
autolink :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
autolink opened = error "todo"
|
||||
|
||||
verbatim :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
verbatim opened = error "todo"
|
||||
|
||||
emphasis :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
emphasis opened = error "todo"
|
||||
|
||||
highlight :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
highlight opened = error "todo"
|
||||
|
||||
superscript :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
superscript opene = error "todo"
|
||||
|
||||
subscript :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
subscript opened = error "todo"
|
||||
|
||||
insert :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
insert opened = error "todo"
|
||||
|
||||
delete :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
delete = error "todo"
|
||||
|
||||
math :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
math opened = error "todo"
|
||||
|
||||
footnoteRef :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
footnoteRef opened = error "todo"
|
||||
|
||||
linebreak :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
linebreak opened = error "todo"
|
||||
|
||||
symbol :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
symbol opened = error "todo"
|
||||
|
||||
rawInline :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
rawInline opened = error "todo"
|
||||
|
||||
span :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
span opened = error "todo"
|
||||
|
||||
inlineAttribute :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||
inlineAttribute opened = error "todo"
|
||||
|
||||
plainText :: (Logger m, Characters s) => Parser s m InlineText
|
||||
plainText = error "todo"
|
||||
|
|
|
|||
|
|
@ -49,16 +49,16 @@ spaceSep = T.intercalate " "
|
|||
elementToHTML :: Element -> T.Text
|
||||
elementToHTML (Heading header attrs) = T.concat ["<h", tshow header.level, headerAttrs header attrs, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"]
|
||||
--
|
||||
elementToHTML (Code code_block) = T.concat ["<pre class=\"sourceCode ", language, "\"><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"]
|
||||
elementToHTML (Code code_block attrs) = T.concat ["<pre class=\"sourceCode ", language, "\" ", handleAttrs attrs, "><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"]
|
||||
where
|
||||
language = fromMaybe "" code_block.language
|
||||
elementToHTML (BlockQuote (Q elems)) = T.concat ["<blockquote>", elementsToHTML elems, "</blockquote>"]
|
||||
elementToHTML (BlockQuote (Q elems) attrs) = T.concat ["<blockquote ", handleAttrs attrs, ">", elementsToHTML elems, "</blockquote>"]
|
||||
elementToHTML (List (L {list_type = Ordered {start_number, style}, items}) attrs) = spaceSep ["<ol", maybe "" handleStart start_number, maybe "" handleStyle style, ">", generateLiElems items, "</ol>"]
|
||||
elementToHTML (List (L {list_type = Unordered {style}, items}) attrs) = spaceSep ["<ul", maybe "" handleStyle style, ">", generateLiElems items, "</ul>"]
|
||||
elementToHTML (HTML (HTMLTag {html_content})) = html_content
|
||||
elementToHTML (Paragraph (P snippets) attrs) = spaceSep ["<p", handleAttrs attrs, ">", serializeInlineToHTML snippets, "</p>"]
|
||||
elementToHTML (Transparent snippets) = serializeInlineToHTML snippets
|
||||
elementToHTML HorizontalRule = "<hr>"
|
||||
elementToHTML (HorizontalRule attrs) = "<hr " <> handleAttrs attrs <> ">"
|
||||
elementToHTML (Table _ _) = error "TODO"
|
||||
elementToHTML (Container _ _) = error "TODO"
|
||||
elementToHTML (Footnote _ _) = error "TODO"
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ fencedCodeBlock = between (string "```") (string "```") $ do
|
|||
lineEnding
|
||||
code <- T.pack <$> (many ((notFollowedBy $ string "```") *> anySingle))
|
||||
let language = if language' == "" then Just language' else Nothing
|
||||
pure $ Code $ C {language, code}
|
||||
pure $ Code (C {language, code}) mempty
|
||||
|
||||
blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
|
||||
blockquoteBlock = do
|
||||
|
|
|
|||
Loading…
Reference in a new issue