From 5e528a1484533e64815db07608dc7fb1613fa36f Mon Sep 17 00:00:00 2001
From: jwansek <eddie.atten.ea29@gmail.com>
Date: Mon, 15 Mar 2021 16:14:47 +0000
Subject: switched markdown parser, added greentexts

---
 Dockerfile       |   2 +-
 parser.py        | 159 ++++++++++---------------------------------------------
 requirements.txt |   3 ++
 shark.md         |   3 --
 static/index.md  |   4 +-
 static/style.css |   8 +++
 6 files changed, 41 insertions(+), 138 deletions(-)
 delete mode 100755 shark.md

diff --git a/Dockerfile b/Dockerfile
index e9d0fa4..7ad7f42 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:latest
 MAINTAINER Eden Attenborough "eda@e.email"
 RUN apt-get update -y
-RUN apt-get install -y python3-pip python-dev build-essential
+RUN apt-get install -y python3-pip python-dev build-essential clang libffi libffi-dev
 COPY . /app
 WORKDIR /app
 RUN pip3 install -r requirements.txt
diff --git a/parser.py b/parser.py
index 8fb7408..1ba94cd 100755
--- a/parser.py
+++ b/parser.py
@@ -1,26 +1,40 @@
 #!/usr/bin/env python3
 
 from urllib.parse import urlparse
+from pygments import highlight
+from pygments.formatters import HtmlFormatter, ClassNotFound
+from pygments.lexers import get_lexer_by_name
 import webbrowser
 import database
 import argparse
 import getpass
+import houdini
+import misaka
 import app
 import sys
 import re
 import os
 
-# DISCLAIMER
-# There is almost certainly a python package to
-# do this better. I wanted to do it myself as a challenge.
-
-# TODO:
-#   - Add table formatting
-#   - Fix <br>s with newlines
-#   - Fix nested markdown elements
-
-HEADER_INCREMENTER = 1
-IMAGE_TYPES = [".png", ".jpg"]
+class HighlighterRenderer(misaka.SaferHtmlRenderer):
+    def blockcode(self, text, lang):
+        try:
+            lexer = get_lexer_by_name(lang, stripall=True)
+        except ClassNotFound:
+            lexer = None
+
+        if lexer:
+            formatter = HtmlFormatter()
+            return highlight(text, lexer, formatter)
+        # default
+        return '\n<pre><code>{}</code></pre>\n'.format(houdini.escape_html(text.strip()))
+
+    def blockquote(self, content):
+        content = content[3:-5] # idk why this is required...
+        out = '\n<blockquote>'
+        for line in houdini.escape_html(content.strip()).split("\n"):
+            out += '\n<span class="quote">{}</span><br>'.format(line)
+        print(out)
+        return out + '\n</blockquote>'
 
 def get_thought_from_id(db, id_):
     category_name, title, dt, markdown = db.get_thought(id_)
@@ -33,127 +47,10 @@ def parse_file(path):
     return parse_text(unformatted)    
 
 def parse_text(unformatted):
-    formatted = parse_headers(unformatted)
-    formatted = parse_asteriscs(formatted)
-    formatted = parse_links(formatted)
-    formatted = parse_code(formatted)
-    formatted = parse_lists(formatted)
-    formatted = add_linebreaks(formatted)
-
-    return formatted
-
-def parse_headers(test_str):
-    regex = r"^#{1,5}\s\w.*$"
-    matches = re.finditer(regex, test_str, re.MULTILINE)
-    offset = 0
-
-    for match in matches:
-        # work out if its h2, h3 etc. from the number of #s
-        headerNo = len(match.group().split(" ")[0]) + HEADER_INCREMENTER
-        
-        replacement = "<h%i>%s</h%i>" % (headerNo, " ".join(match.group().split(" ")[1:]), headerNo)
-
-        #don't use .replace() in the unlikely case the the regex hit appears in a block
-        test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:]
-        #replacing the hits fucks up the indexes, accommodate for this
-        offset += (len(replacement) - (match.end() - match.start()))
-
-    return test_str
-
-def parse_asteriscs(test_str):
-    regex = r"(?<!\\)\*{1,3}.*?\*{1,3}"
-    matches = re.finditer(regex, test_str, re.MULTILINE)
-    offset = 0
-
-    for match in matches:
-        if len(re.findall(r"\*{1,3}.*?\\\*{1,3}", match.group())) == 0:     #need to find a way of doing this with regexes
-            if match.group().startswith(re.findall(r"\w\*{1,3}", match.group())[0][1:]):    #this too
-                if match.group().startswith("***"):
-                    replacement = "<b><i>%s</i></b>" % (match.group()[3:-3])
-                elif match.group().startswith("**"):
-                    replacement = "<b>%s</b>" % (match.group()[2:-2])
-                else:
-                    replacement = "<i>%s</i>" % (match.group()[1:-1])
-        
-                test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:]
-                offset += (len(replacement) - (match.end() - match.start()))
-
-    return test_str
-
-def parse_links(test_str):
-    regex = r"(?<!\\)\[.*?\]\(.*?\)"
-    matches = re.finditer(regex, test_str, re.MULTILINE)
-    offset = 0
-
-    for match in matches:
-        s = match.group().split("(")
-        label = s[0][1:-1]
-        url = s[1][:-1]
-
-        if os.path.splitext(urlparse(url).path)[1] in IMAGE_TYPES:
-            replacement = "<img alt='%s' src=%s>" % (label, url)
-        else:
-            replacement = "<a href=%s>%s</a>" % (url, label)
-
-        test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:]
-        offset += (len(replacement) - (match.end() - match.start()))
-
-    return test_str
-
-def parse_code(test_str):
-    regex = r"(?<!\\)`\w{1,}?`"
-    # this only matches single words, but escaping is less complicated
-    matches = re.finditer(regex, test_str, re.MULTILINE)
-    offset = 0
-
-    for match in matches:
-        replacement = "<em class=inlineCode style='font-family: monospace;font-style: normal;'>%s</em>" % match.group()[1:-1]
-        test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:]
-        offset += (len(replacement) - (match.end() - match.start()))
-
-    out = ""
-    inBlock = 0
-    for line in test_str.split("\n"):
-        if line == "```":
-            if inBlock % 2 == 0:
-                out += "<p class=codeBlock style='font-family: monospace;font-style: normal;white-space: pre-wrap;'>\n"
-            else:
-                out += "</p>\n"
-            inBlock += 1
-        else:
-            out += line + "\n"
-
-    return out
-
-def parse_lists(test_str):
-    regex = r"^[1-9][.)] .*$|- .*$"
-    matches = re.finditer(regex, test_str, re.MULTILINE)
-    offset = 0
-    theFirstOne = True
-
-    for match in matches:
-        if theFirstOne:
-            if match.group()[0].isdigit():
-                listType = "ol"
-                cutoff = 3
-            else:
-                listType = "ul"
-                cutoff = 2
-            replacement = "<%s>\n<li>%s</li>" % (listType, match.group()[cutoff:])
-            theFirstOne = False
-        else:
-            if re.match(regex, [i for i in test_str[match.end()+offset:].split("\n") if i != ''][0]) is None:
-                theFirstOne = True
-                replacement = "<li>%s</li>\n</%s>" % (match.group()[cutoff:], listType)
-            else:
-                replacement = "<li>%s</li>" % match.group()[cutoff:]
-        test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:]
-        offset += (len(replacement) - (match.end() - match.start()))
-
-    return test_str
+    renderer = HighlighterRenderer()
+    md = misaka.Markdown(renderer, extensions=('fenced-code', 'quote'))
 
-def add_linebreaks(test_str):
-    return re.sub(r"^$", "<br><br>", test_str, 0, re.MULTILINE)
+    return md(unformatted)
 
 def preview_markdown(path, title, category):
     def startBrowser():
diff --git a/requirements.txt b/requirements.txt
index 07cf1bc..617191e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,3 +13,6 @@ lxml
 requests
 PasteScript==3.2.0
 waitress
+houdini.py
+Pygments
+misaka
diff --git a/shark.md b/shark.md
deleted file mode 100755
index 285f78e..0000000
--- a/shark.md
+++ /dev/null
@@ -1,3 +0,0 @@
-[i love him](https://eda.gay/img/shark1.jpg)
-
-i love him
diff --git a/static/index.md b/static/index.md
index 5caaa92..bc970b7 100644
--- a/static/index.md
+++ b/static/index.md
@@ -5,8 +5,6 @@ i'll post my thoughts on here sometimes, and use this site to link to other stuf
 [click here for a random image of lio fotia](/random?tags=lio_fotia)
 
 ## FOSS alternative services
-- [nextcloud - dropbox + much more alternative](https://nc.eda.gay)
+- [nextcloud - dropbox (+ much more!) alternative](https://nc.eda.gay)
 - [invidious - youtube alternative](https://invidious.eda.gay)
 - [nitter - alternative twitter frontend](https://nitter.eda.gay)
-
-more coming! (hopefully)
diff --git a/static/style.css b/static/style.css
index 0e77ec0..a169e55 100644
--- a/static/style.css
+++ b/static/style.css
@@ -115,6 +115,14 @@ aside {
     font-size: xx-small;
 }
 
+blockquote span {
+    color: #789922;
+}
+
+blockquote span::before {
+    content: ">";
+}
+
 .running {
     background-color: green;
     padding: 1, 1, 1, 1;
-- 
cgit v1.2.3