{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}

-- (document, metadata)
module Markdown where

import Control.Applicative (many, optional, some, (<|>))
import Control.Monad (guard, void)
import Data.Char (isAlpha)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Void (Void)
import IR
import Text.Megaparsec (Parsec, anySingle, anySingleBut, between, choice, count, eof, manyTill, notFollowedBy, satisfy, skipSome, try, (<?>))
import qualified Text.Megaparsec as MP
import Text.Megaparsec.Char (alphaNumChar, char, digitChar, string)

type Parser = Parsec Void String

anyChar :: Parser Char
anyChar = anySingle

alphaNum :: Parser Char
alphaNum = alphaNumChar

digit :: Parser Char
digit = digitChar

noneOf :: [Char] -> Parser Char
noneOf = MP.noneOf

oneOf :: [Char] -> Parser Char
oneOf = MP.oneOf

optionMaybe :: Parser a -> Parser (Maybe a)
optionMaybe = optional

skipMany1 :: Parser a -> Parser ()
skipMany1 = skipSome

metadata :: Parser Text
metadata = bound *> fmap T.pack (many $ try (char '-' <* notFollowedBy (string "--")) <|> anySingleBut '-') <* bound
  where
    bound = string "---"

document :: Parser Document
document = Doc <$> many element <* eof

element :: Parser Element
element =
  choice
    [ try headingBlock <?> "Element Heading",
      try fencedCodeBlock <?> "Fenced Code Block",
      try indentedCodeBlock <?> "Indented Code Block",
      try blockquoteBlock <?> "BlockQuote",
      try unorderedListBlock <?> "Unordered List",
      try orderedListBlock <?> "Ordered List",
      try horizontalRuleBlock <?> "Horizontal Rule",
      try htmlBlock <?> "HTML Block",
      try blankLines <?> "Blank Lines", -- Consume blank lines but don't add to AST
      paragraphBlock <?> "Paragarph"
    ]

-- Blank lines (consumed but not stored)
blankLines :: Parser Element
blankLines = do
  skipMany1 blankLine
  element <|> fmap (const $ HTML HTMLTag {html_content = ""}) eof -- Parse the next element (or handle eof)

blankLine :: Parser ()
blankLine = do
  many (char ' ' <|> char '\t')
  lineEnding
  pure ()

-- Heading Block
headingBlock :: Parser Element
headingBlock = do
  hashes <- some (char '#') <?> "Heading Hashes"
  let level = length hashes
  guard (level <= 6) <?> "Higher than level 6"
  many (char ' ' <|> char '\t') <?> "Pre-Text Whitespace"
  content <- manyTill (inlineElement <?> "Header Text") (try lineEnding <?> "Header Ending")
  pure $ Heading $ H level content

-- Fenced Code Block
fencedCodeBlock :: Parser Element
fencedCodeBlock = do
  fence <- string "```" <|> string "~~~"
  lang <- optionMaybe languageInfo
  lineEnding
  codeLines <- manyTill codeLine (try $ string fence)
  pure $ Code $ C lang (T.pack $ unlines codeLines)

languageInfo :: Parser Text
languageInfo = T.pack <$> some (alphaNum <|> char '-' <|> char '+' <|> char '.')

codeLine :: Parser String
codeLine = do
  line <- many $ noneOf "\n\r"
  lineEnding
  pure line

-- Indented Code Block
indentedCodeBlock :: Parser Element
indentedCodeBlock = do
  lines' <- some indentedLine
  pure $ Code $ C Nothing (T.pack $ unlines lines')
  where
    indentedLine = do
      count 4 (char ' ' <|> char '\t')
      line <- many $ noneOf "\n\r"
      lineEnding
      pure line

-- Blockquote Block
blockquoteBlock :: Parser Element
blockquoteBlock = do
  lines' <- some blockquoteLine
  pure $ BlockQuote $ Q (concat lines')
  where
    blockquoteLine = do
      char '>'
      optional (char ' ')
      content <- manyTill inlineElement (try lineEnding)
      pure content

-- Horizontal Rule Block
horizontalRuleBlock :: Parser Element
horizontalRuleBlock = do
  choice
    [ try (count 3 (char '*') >> many (char ' ' <|> char '*')),
      try (count 3 (char '-') >> many (char ' ' <|> char '-')),
      try (count 3 (char '_') >> many (char ' ' <|> char '_'))
    ]
  lineEnding
  pure HorizontalRule

-- Unordered List Block
unorderedListBlock :: Parser Element
unorderedListBlock = do
  items <- some unorderedListItem
  pure $ List $ L Unordered items

unorderedListItem :: Parser ListItem
unorderedListItem = do
  oneOf "*-+"
  char ' ' <|> char '\t'
  content <- manyTill inlineElement (try lineEnding)
  -- continuations <- many listContinuation
  children <- many (try indentedList)
  pure $ LI content children

listContinuation :: Parser [InlineText]
listContinuation = do
  count 2 (char ' ' <|> char '\t')
  many (char ' ' <|> char '\t')
  notFollowedBy (oneOf "*-+")
  notFollowedBy (digit >> char '.')
  content <- manyTill inlineElement (try lineEnding)
  pure content

-- TODO: handle list indentation at all levels
indentedList :: Parser List
indentedList = do
  let n = 1
  void $ count (4 * n) (char ' ') <|> count n (char '\t')
  choice [try indentedUnorderedList, indentedOrderedList]

indentedUnorderedList :: Parser List
indentedUnorderedList = do
  items <- some (try $ indentedListItem (oneOf "*-+" >> void (char ' ' <|> char '\t')))
  pure $ L Unordered items

indentedOrderedList :: Parser List
indentedOrderedList = do
  items <- some (try $ indentedListItem (some digit >> char '.' >> void (char ' ' <|> char '\t')))
  pure $ L Ordered items

indentedListItem :: Parser () -> Parser ListItem
indentedListItem marker = do
  marker
  content <- manyTill inlineElement (try $ lineEnding <|> eof)
  pure $ LI content []

-- Ordered List Block
orderedListBlock :: Parser Element
orderedListBlock = do
  items <- some orderedListItem
  pure $ List $ L Ordered items

orderedListItem :: Parser ListItem
orderedListItem = do
  some digit
  char '.'
  char ' ' <|> char '\t'
  content <- manyTill inlineElement (try lineEnding)
  continuations <- many listContinuation
  children <- many (try indentedList)
  pure $ LI (content ++ concat continuations) children

-- HTML Block
htmlBlock :: Parser Element
htmlBlock = do
  char '<'
  -- Capture the entire HTML block as raw text
  rest <- manyTill anyChar (try $ char '>' >> lineEnding)
  let content = '<' : rest
  return $ HTML $ HTMLTag (T.pack content)

tagName :: Parser String
tagName = do
  first <- satisfy isAlpha
  rest <- many (alphaNum <|> char '-' <|> char ':')
  pure (first : rest)

attribute :: Parser (Text, Maybe Text)
attribute = do
  name <- attributeName
  value <- optionMaybe (char '=' >> attributeValue)
  pure (T.pack name, fmap T.pack value)

attributeName :: Parser String
attributeName = do
  first <- satisfy isAlpha
  rest <- many (alphaNum <|> char '-' <|> char ':')
  pure (first : rest)

attributeValue :: Parser String
attributeValue =
  choice
    [ between (char '"') (char '"') (many $ anySingleBut '"'),
      between (char '\'') (char '\'') (many $ anySingleBut '\''),
      some $ noneOf " \t\n\r>\"'=<`"
    ]

-- Paragraph Block
paragraphBlock :: Parser Element
paragraphBlock = do
  content <- some inlineElement
  lineEnding <|> eof
  pure $ Paragraph $ P content

-- Inline Elements
inlineElement :: Parser InlineText
inlineElement =
  choice
    [ try strong <?> "Inline Strong Text",
      try emphasis <?> "Inline Italic Text",
      try crossedText <?> "Inline Crossed Text",
      try codeSpan <?> "Inline Code",
      try image <?> "Inline Image",
      try link <?> "Inline Link",
      try htmlInline <?> "Inline HTML",
      try escapedChar <?> "Escaped Character",
      plainText <?> "Inline Plain Text"
    ]

-- Strong (Bold)
strong :: Parser InlineText
strong = strongAsterisk <|> strongUnderscore

strongAsterisk :: Parser InlineText
strongAsterisk = do
  string "**"
  content <- some (notFollowedBy (string "**") >> inlineElement)
  string "**"
  pure $ Bold content

strongUnderscore :: Parser InlineText
strongUnderscore = do
  string "__"
  content <- some (notFollowedBy (string "__") >> inlineElement)
  string "__"
  pure $ Bold content

crossedText :: Parser InlineText
crossedText = do
  string "~~"
  content <- some (notFollowedBy (string "~~") >> inlineElement)
  string "~~"
  pure $ Crossed content

-- Emphasis (Italic)
emphasis :: Parser InlineText
emphasis = emphasisAsterisk <|> emphasisUnderscore

emphasisAsterisk :: Parser InlineText
emphasisAsterisk = do
  char '*'
  content <- some (notFollowedBy (char '*') >> inlineElementNoAsterisk)
  char '*'
  pure $ Italic content

emphasisUnderscore :: Parser InlineText
emphasisUnderscore = do
  char '_'
  content <- some inlineElementNoUnderscore
  char '_'
  pure $ Italic content

inlineElementNo :: Char -> Parser InlineText
inlineElementNo c =
  choice
    [ try strong,
      try codeSpan,
      try image,
      try link,
      try htmlInline,
      try escapedChar,
      plainTextNo [c]
    ]

plainTextNo :: [Char] -> Parser InlineText
plainTextNo disallow = do
  firstChar <- noneOf disallow <?> "Plain Text Initial Disallow"
  remChars <- manyTill (plainTextCharNo disallow) lineEnding <?> "Remaining Characters"
  pure $ Text $ T.map wspHandler $ T.pack $ firstChar : remChars
  where
    wspHandler '\n' = ' '
    wspHandler c = c

inlineElementNoAsterisk :: Parser InlineText
inlineElementNoAsterisk = inlineElementNo '*'

inlineElementNoUnderscore :: Parser InlineText
inlineElementNoUnderscore = inlineElementNo '_'

-- Code Span
codeSpan :: Parser InlineText
codeSpan =
  choice
    [ try tripleBacktick,
      try doubleBacktick,
      singleBacktick
    ]
  where
    singleBacktick = do
      char '`'
      content <- many $ noneOf "`\n\r"
      char '`'
      pure $ InlineCode (T.pack content)
    doubleBacktick = do
      string "``"
      content <- manyTill anyChar (try $ string "``")
      pure $ InlineCode (T.pack content)
    tripleBacktick = do
      string "```"
      content <- manyTill anyChar (try $ string "```")
      pure $ InlineCode (T.pack content)

-- Image
image :: Parser InlineText
image = do
  char '!'
  char '['
  alt <- T.pack <$> many (noneOf "]\n\r")
  char ']'
  (url, title) <- linkDestination
  return $ Image {altText = alt, url = url, title = title}

-- Link
link :: Parser InlineText
link = do
  char '['
  content <- some (notFollowedBy (char ']') >> inlineElementNoBracket)
  char ']'
  (url, title) <- linkDestination
  pure $ Link content url title

inlineElementNoBracket :: Parser InlineText
inlineElementNoBracket =
  choice
    [ try strong,
      try emphasis,
      try codeSpan,
      try htmlInline,
      try escapedChar,
      plainTextNoBracket
    ]

linkDestination :: Parser (Text, Maybe Text)
linkDestination = directLink <|> referenceLink
  where
    directLink = do
      char '('
      url <- many $ noneOf " \t\n\r)"
      title <- optionMaybe (try $ some (char ' ' <|> char '\t') >> titleParser)
      char ')'
      pure (T.pack url, title)
    referenceLink = do
      char '['
      ref <- some (alphaNum <|> char ' ' <|> char '\t')
      char ']'
      -- For simplicity, we're not resolving references here
      -- In a real implementation, you'd look up the reference
      pure (T.pack $ "[" ++ ref ++ "]", Nothing)

titleParser :: Parser Text
titleParser =
  T.pack
    <$> choice
      [ between (char '"') (char '"') (many $ anySingleBut '"'),
        between (char '\'') (char '\'') (many $ anySingleBut '\''),
        between (char '(') (char ')') (many $ anySingleBut ')')
      ]

-- HTML Inline
htmlInline :: Parser InlineText
htmlInline = do
  start <- char '<'
  content <- manyTill anyChar (try $ char '>')
  return $ HTMLInline (T.pack (start : content ++ ">"))

-- Escaped Character
escapedChar :: Parser InlineText
escapedChar = do
  char '\\'
  c <- satisfy (\x -> x >= '!' && x <= '~')
  pure $ Text (T.singleton c)

-- Plain Text
plainText :: Parser InlineText
plainText = plainTextNo [] <?> "Baseline Plain Text"

plainTextBaseDisallow :: [Char]
plainTextBaseDisallow = "[~`_*<"

plainTextCharNo :: [Char] -> Parser Char
plainTextCharNo additional = noneOf $ additional <> plainTextBaseDisallow

plainTextNoAsterisk :: Parser InlineText
plainTextNoAsterisk = plainTextNo "*"

plainTextNoUnderscore :: Parser InlineText
plainTextNoUnderscore = plainTextNo "_"

plainTextNoBracket :: Parser InlineText
plainTextNoBracket = plainTextNo "[]"

-- Helper Parsers
lineEnding :: Parser ()
lineEnding = void (try $ count 2 (try (string "\r\n") <|> try (string "\n") <|> string "\r")) <|> eof

wsParser :: Parser ()
wsParser = void $ some (char ' ' <|> char '\t')