bunch of work on djot
This commit is contained in:
parent
cc4f0b7672
commit
1a2feb95e9
3 changed files with 133 additions and 13 deletions
138
src/Djot.hs
138
src/Djot.hs
|
|
@ -1,3 +1,4 @@
|
||||||
|
{-# LANGUAGE BlockArguments #-}
|
||||||
{-# LANGUAGE FlexibleContexts #-}
|
{-# LANGUAGE FlexibleContexts #-}
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE RankNTypes #-}
|
{-# LANGUAGE RankNTypes #-}
|
||||||
|
|
@ -10,14 +11,17 @@ module Djot
|
||||||
where
|
where
|
||||||
|
|
||||||
import Control.Applicative (many, optional, some, (<|>))
|
import Control.Applicative (many, optional, some, (<|>))
|
||||||
|
import Control.Monad.ST (runST)
|
||||||
|
import Data.Foldable (for_)
|
||||||
import Data.Functor (void, (<$>))
|
import Data.Functor (void, (<$>))
|
||||||
import Data.List (elemIndex)
|
import Data.List (elemIndex)
|
||||||
import qualified Data.List.NonEmpty as NE
|
import qualified Data.List.NonEmpty as NE
|
||||||
|
import Data.STRef (modifySTRef', newSTRef, readSTRef, writeSTRef)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import IR
|
import IR
|
||||||
import Logger (Logger (logCallStack, logDebug, logError))
|
import Logger (Logger (logCallStack, logDebug, logError))
|
||||||
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, sepBy, setErrorOffset, someTill)
|
import Text.Megaparsec (MonadParsec (lookAhead, notFollowedBy, parseError, try), ParseErrorBundle (ParseErrorBundle, bundleErrors), SourcePos (sourceColumn), anySingle, choice, errorOffset, getInput, getOffset, manyTill, parse, satisfy, sepBy, setErrorOffset, someTill)
|
||||||
import Text.Megaparsec.Char (char, lowerChar, newline, numberChar, space, string, tab, upperChar)
|
import Text.Megaparsec.Char (char, lowerChar, newline, numberChar, space, string, tab, upperChar)
|
||||||
import Utilities.Parsing
|
import Utilities.Parsing
|
||||||
|
|
||||||
|
|
@ -46,10 +50,10 @@ blockElement accumulated_attributes =
|
||||||
try $ thematicBreak accumulated_attributes,
|
try $ thematicBreak accumulated_attributes,
|
||||||
lookAhead (string ":::") *> containerBlock accumulated_attributes,
|
lookAhead (string ":::") *> containerBlock accumulated_attributes,
|
||||||
-- try used due to table having a non-trivial structure at the start
|
-- try used due to table having a non-trivial structure at the start
|
||||||
try tableBlock accumulated_attributes,
|
try $ tableBlock accumulated_attributes,
|
||||||
-- using try due to ambiguity between these and normal text until we've already done some amount of parsing
|
-- using try due to ambiguity between these and normal text until we've already done some amount of parsing
|
||||||
try footnoteDefinition accumulated_attributes,
|
try $ footnoteDefinition accumulated_attributes,
|
||||||
try referenceDef accumulated_attributes,
|
try $ referenceDef accumulated_attributes,
|
||||||
lookAhead (char '{') *> blockAttribute accumulated_attributes,
|
lookAhead (char '{') *> blockAttribute accumulated_attributes,
|
||||||
paragraph accumulated_attributes
|
paragraph accumulated_attributes
|
||||||
]
|
]
|
||||||
|
|
@ -86,7 +90,9 @@ blockQuote attrs = do
|
||||||
parseError $ remap $ NE.head errs
|
parseError $ remap $ NE.head errs
|
||||||
where
|
where
|
||||||
blockQuote' = do
|
blockQuote' = do
|
||||||
pure $ BlockQuote (error "todo") attrs
|
-- using document for convenience
|
||||||
|
(Doc contents) <- document
|
||||||
|
pure $ BlockQuote (Q contents) attrs
|
||||||
bq_line = do
|
bq_line = do
|
||||||
char '>'
|
char '>'
|
||||||
space
|
space
|
||||||
|
|
@ -98,7 +104,7 @@ listBlock attrs = do
|
||||||
startOffset <- getOffset
|
startOffset <- getOffset
|
||||||
list_type <- listMarker
|
list_type <- listMarker
|
||||||
first_item <- list_item_content
|
first_item <- list_item_content
|
||||||
rem_items <- manyTill (listMarker *> list_item_content) $ lookAhead blockSeparator
|
rem_items <- manyTill (listMarker' list_type *> list_item_content) $ lookAhead blockSeparator
|
||||||
let items = first_item : rem_items
|
let items = first_item : rem_items
|
||||||
case parse listBlock' "" $ error "todo: figure out how to take our list of items and put them toegether for easy parsing" of
|
case parse listBlock' "" $ error "todo: figure out how to take our list of items and put them toegether for easy parsing" of
|
||||||
Right ret -> pure ret
|
Right ret -> pure ret
|
||||||
|
|
@ -127,6 +133,41 @@ lowerAlphabet = ['a' .. 'z']
|
||||||
upperAlphabet :: [Char]
|
upperAlphabet :: [Char]
|
||||||
upperAlphabet = ['A' .. 'Z']
|
upperAlphabet = ['A' .. 'Z']
|
||||||
|
|
||||||
|
-- inner parsing of roman numerals done via Gemini due to lazy
|
||||||
|
romanValue :: Char -> Int
|
||||||
|
romanValue 'I' = 1
|
||||||
|
romanValue 'V' = 5
|
||||||
|
romanValue 'X' = 10
|
||||||
|
romanValue 'L' = 50
|
||||||
|
romanValue 'C' = 100
|
||||||
|
romanValue 'D' = 500
|
||||||
|
romanValue 'M' = 1000
|
||||||
|
romanValue _ = 0
|
||||||
|
|
||||||
|
romanToInt :: Text -> Int
|
||||||
|
romanToInt s = runST $ do
|
||||||
|
total <- newSTRef 0
|
||||||
|
prevVal <- newSTRef 0
|
||||||
|
|
||||||
|
for_ (T.unpack s) $ \char -> do
|
||||||
|
let curr = romanValue char
|
||||||
|
p <- readSTRef prevVal
|
||||||
|
|
||||||
|
-- If current > previous (e.g., IV), we subtract the previous twice
|
||||||
|
if curr > p
|
||||||
|
then modifySTRef' total (\t -> t + curr - 2 * p)
|
||||||
|
else modifySTRef' total (+ curr)
|
||||||
|
|
||||||
|
writeSTRef prevVal curr
|
||||||
|
|
||||||
|
readSTRef total
|
||||||
|
|
||||||
|
upperRoman :: [Char]
|
||||||
|
upperRoman = "IVXLCDM"
|
||||||
|
|
||||||
|
lowerRoman :: [Char]
|
||||||
|
lowerRoman = T.unpack $ T.toLower $ T.pack upperRoman
|
||||||
|
|
||||||
-- consumes whitespace as well for convenience in element parser
|
-- consumes whitespace as well for convenience in element parser
|
||||||
listMarker :: (Logger m, Characters s) => Parser s m ListType
|
listMarker :: (Logger m, Characters s) => Parser s m ListType
|
||||||
listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
|
listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lower_roman_numeral, try upper_roman_numeral] <* space
|
||||||
|
|
@ -145,7 +186,14 @@ listMarker = choice [bullet, decimal, try lower_letter, try upper_letter, try lo
|
||||||
letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
|
letter <- choice $ map ($ upperChar) [try . surroundParen, try . rightParen, point]
|
||||||
let start_number = elemIndex letter upperAlphabet
|
let start_number = elemIndex letter upperAlphabet
|
||||||
pure $ Ordered {start_number, style = Just "A"}
|
pure $ Ordered {start_number, style = Just "A"}
|
||||||
roman_numeral = error "todo"
|
lower_roman_numeral = do
|
||||||
|
numeral <- choice $ map ($ some (satisfy (`elem` lowerRoman))) [try . surroundParen, try . rightParen, point]
|
||||||
|
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
||||||
|
pure $ Ordered {start_number, style = Just "i"}
|
||||||
|
upper_roman_numeral = do
|
||||||
|
numeral <- choice $ map ($ some (satisfy (`elem` upperRoman))) [try . surroundParen, try . rightParen, point]
|
||||||
|
let start_number = Just $ romanToInt $ T.toUpper $ toText numeral
|
||||||
|
pure $ Ordered {start_number, style = Just "I"}
|
||||||
|
|
||||||
surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
surroundParen :: (Logger m, Characters s) => Parser s m a -> Parser s m a
|
||||||
surroundParen parser = do
|
surroundParen parser = do
|
||||||
|
|
@ -180,7 +228,7 @@ codeBlock attrs = do
|
||||||
else
|
else
|
||||||
Just $ toText language'
|
Just $ toText language'
|
||||||
code <- toText <$> manyTill anySingle codeFence
|
code <- toText <$> manyTill anySingle codeFence
|
||||||
pure $ Code $ C {language, code}
|
pure $ Code (C {language, code}) attrs
|
||||||
|
|
||||||
blockAttribute :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
blockAttribute :: (Logger m, Characters s) => Attrs -> Parser s m Element
|
||||||
blockAttribute attrs = do
|
blockAttribute attrs = do
|
||||||
|
|
@ -310,4 +358,76 @@ inlineContent' opened = someTill (inlineElement opened) $ lookAhead blockSeparat
|
||||||
inlineElement :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
inlineElement :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
inlineElement opened =
|
inlineElement opened =
|
||||||
choice
|
choice
|
||||||
[lookAhead (string "![") *> image, lookAhead]
|
-- spamming try because backtracking is easier than having state for partial parses
|
||||||
|
[ try $ image opened,
|
||||||
|
try $ Djot.link opened,
|
||||||
|
try $ autolink opened,
|
||||||
|
try $ verbatim opened,
|
||||||
|
try $ emphasis opened,
|
||||||
|
try $ highlight opened,
|
||||||
|
try $ superscript opened,
|
||||||
|
try $ subscript opened,
|
||||||
|
try $ insert opened,
|
||||||
|
try $ math opened,
|
||||||
|
try $ footnoteRef opened,
|
||||||
|
try $ linebreak opened,
|
||||||
|
try $ symbol opened,
|
||||||
|
try $ rawInline opened,
|
||||||
|
try $ Djot.span opened,
|
||||||
|
try $ inlineAttribute opened,
|
||||||
|
plainText
|
||||||
|
]
|
||||||
|
|
||||||
|
image :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
image opened = error "todo"
|
||||||
|
|
||||||
|
link :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
link opened = error "todo"
|
||||||
|
|
||||||
|
autolink :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
autolink opened = error "todo"
|
||||||
|
|
||||||
|
verbatim :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
verbatim opened = error "todo"
|
||||||
|
|
||||||
|
emphasis :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
emphasis opened = error "todo"
|
||||||
|
|
||||||
|
highlight :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
highlight opened = error "todo"
|
||||||
|
|
||||||
|
superscript :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
superscript opene = error "todo"
|
||||||
|
|
||||||
|
subscript :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
subscript opened = error "todo"
|
||||||
|
|
||||||
|
insert :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
insert opened = error "todo"
|
||||||
|
|
||||||
|
delete :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
delete = error "todo"
|
||||||
|
|
||||||
|
math :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
math opened = error "todo"
|
||||||
|
|
||||||
|
footnoteRef :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
footnoteRef opened = error "todo"
|
||||||
|
|
||||||
|
linebreak :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
linebreak opened = error "todo"
|
||||||
|
|
||||||
|
symbol :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
symbol opened = error "todo"
|
||||||
|
|
||||||
|
rawInline :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
rawInline opened = error "todo"
|
||||||
|
|
||||||
|
span :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
span opened = error "todo"
|
||||||
|
|
||||||
|
inlineAttribute :: (Logger m, Characters s) => [OpenInline] -> Parser s m InlineText
|
||||||
|
inlineAttribute opened = error "todo"
|
||||||
|
|
||||||
|
plainText :: (Logger m, Characters s) => Parser s m InlineText
|
||||||
|
plainText = error "todo"
|
||||||
|
|
|
||||||
|
|
@ -49,16 +49,16 @@ spaceSep = T.intercalate " "
|
||||||
elementToHTML :: Element -> T.Text
|
elementToHTML :: Element -> T.Text
|
||||||
elementToHTML (Heading header attrs) = T.concat ["<h", tshow header.level, headerAttrs header attrs, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"]
|
elementToHTML (Heading header attrs) = T.concat ["<h", tshow header.level, headerAttrs header attrs, ">", serializeInlineToHTML header.text, "</h", tshow header.level, ">"]
|
||||||
--
|
--
|
||||||
elementToHTML (Code code_block) = T.concat ["<pre class=\"sourceCode ", language, "\"><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"]
|
elementToHTML (Code code_block attrs) = T.concat ["<pre class=\"sourceCode ", language, "\" ", handleAttrs attrs, "><code class=\"sourceCode ", language, "\">", escapeText code_block.code, "</code>", "</pre>"]
|
||||||
where
|
where
|
||||||
language = fromMaybe "" code_block.language
|
language = fromMaybe "" code_block.language
|
||||||
elementToHTML (BlockQuote (Q elems)) = T.concat ["<blockquote>", elementsToHTML elems, "</blockquote>"]
|
elementToHTML (BlockQuote (Q elems) attrs) = T.concat ["<blockquote ", handleAttrs attrs, ">", elementsToHTML elems, "</blockquote>"]
|
||||||
elementToHTML (List (L {list_type = Ordered {start_number, style}, items}) attrs) = spaceSep ["<ol", maybe "" handleStart start_number, maybe "" handleStyle style, ">", generateLiElems items, "</ol>"]
|
elementToHTML (List (L {list_type = Ordered {start_number, style}, items}) attrs) = spaceSep ["<ol", maybe "" handleStart start_number, maybe "" handleStyle style, ">", generateLiElems items, "</ol>"]
|
||||||
elementToHTML (List (L {list_type = Unordered {style}, items}) attrs) = spaceSep ["<ul", maybe "" handleStyle style, ">", generateLiElems items, "</ul>"]
|
elementToHTML (List (L {list_type = Unordered {style}, items}) attrs) = spaceSep ["<ul", maybe "" handleStyle style, ">", generateLiElems items, "</ul>"]
|
||||||
elementToHTML (HTML (HTMLTag {html_content})) = html_content
|
elementToHTML (HTML (HTMLTag {html_content})) = html_content
|
||||||
elementToHTML (Paragraph (P snippets) attrs) = spaceSep ["<p", handleAttrs attrs, ">", serializeInlineToHTML snippets, "</p>"]
|
elementToHTML (Paragraph (P snippets) attrs) = spaceSep ["<p", handleAttrs attrs, ">", serializeInlineToHTML snippets, "</p>"]
|
||||||
elementToHTML (Transparent snippets) = serializeInlineToHTML snippets
|
elementToHTML (Transparent snippets) = serializeInlineToHTML snippets
|
||||||
elementToHTML HorizontalRule = "<hr>"
|
elementToHTML (HorizontalRule attrs) = "<hr " <> handleAttrs attrs <> ">"
|
||||||
elementToHTML (Table _ _) = error "TODO"
|
elementToHTML (Table _ _) = error "TODO"
|
||||||
elementToHTML (Container _ _) = error "TODO"
|
elementToHTML (Container _ _) = error "TODO"
|
||||||
elementToHTML (Footnote _ _) = error "TODO"
|
elementToHTML (Footnote _ _) = error "TODO"
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,7 @@ fencedCodeBlock = between (string "```") (string "```") $ do
|
||||||
lineEnding
|
lineEnding
|
||||||
code <- T.pack <$> (many ((notFollowedBy $ string "```") *> anySingle))
|
code <- T.pack <$> (many ((notFollowedBy $ string "```") *> anySingle))
|
||||||
let language = if language' == "" then Just language' else Nothing
|
let language = if language' == "" then Just language' else Nothing
|
||||||
pure $ Code $ C {language, code}
|
pure $ Code (C {language, code}) mempty
|
||||||
|
|
||||||
blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
|
blockquoteBlock :: (Logger m, Characters s) => Parser s m Element
|
||||||
blockquoteBlock = do
|
blockquoteBlock = do
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue