6 files changed, 153 insertions, 2 deletions
diff --git a/app.py b/app.py
index c7d8756..b187f3e 100644
--- a/app.py
+++ b/app.py
@@ -132,6 +132,20 @@ def serve_image(filename):
     else:
         flask.abort(404)
 
+@app.route("/random")
+def serve_random():
+    try:
+        tags = flask.request.args['tags'].split(" ")
+    except KeyError:
+        flask.abort(400)
+    
+    with database.Database() as db:
+        return flask.render_template(
+            "random.html",
+            **get_template_items("random image", db),
+            sbi = services.get_random_image(tags)
+        )
+
 @app.route("/api/<infoRequest>")
 def serve_api_request(infoRequest):
     if infoRequest == "commits":
diff --git a/parser.py b/parser.py
index d8dedea..6b5ac18 100644
--- a/parser.py
+++ b/parser.py
@@ -77,7 +77,7 @@ def parse_asteriscs(test_str):
     return test_str
 
 def parse_links(test_str):
-    regex = r"(?<!\\)\[.*?\]\(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+\)"
+    regex = r"(?<!\\)\[.*?\]\(.*?\)"
     matches = re.finditer(regex, test_str, re.MULTILINE)
     offset = 0
 
diff --git a/services.py b/services.py
index 4f014d8..0423a3e 100644
--- a/services.py
+++ b/services.py
@@ -1,17 +1,22 @@
 from dataclasses import dataclass
 from io import StringIO
+from lxml import html, etree
 import multiprocessing
 import pihole as ph
 import qbittorrent
 import requests
 import datetime
+import urllib
 import docker
 import clutch
+import random
 import queue
 import json
 import time
 import app
 
+theLastId = 0
+
 def humanbytes(B):
    'Return the given bytes as a human friendly KB, MB, GB, or TB string'
    B = float(B)
@@ -111,4 +116,104 @@ def get_pihole_stats():
         "domains": pihole.domain_count,
         "last_updated": str(datetime.datetime.fromtimestamp(pihole.gravity_last_updated["absolute"]))
     }
+
+@dataclass
+class SafebooruImage:
+    id_: int
+    url: str
+    searchTags: list
+    tags: list
+    source: str
+    imurl: str
+
+    def remove_tag(self, tag):
+        return list(set(self.searchTags).difference(set([tag])))
+
+def get_num_pages(tags):
+    pages_url = "https://safebooru.org/index.php?page=post&s=list&tags=%s" % "+".join(tags)
+    tree = html.fromstring(requests.get(pages_url).content)
+    try:
+        finalpage_element = tree.xpath("/html/body/div[6]/div/div[2]/div[2]/div/a[12]")[0]
+    except IndexError:
+        return 1
+    else:
+        return int(int(urllib.parse.parse_qs(finalpage_element.get("href"))["pid"][0]) / (5*8))
+
+def get_id_from_url(url):
+    return int(urllib.parse.parse_qs(url)["id"][0])
+
+def get_random_image(tags):
+    global theLastId
+    searchPage = random.randint(1, get_num_pages(tags)) * 5 * 8
+    url = "https://safebooru.org/index.php?page=post&s=list&tags=%s&pid=%i" % ("+".join(tags), searchPage)
+    tree = html.fromstring(requests.get(url).content)
+
+    imageElements = [e for e in tree.xpath("/html/body/div[6]/div/div[2]/div[1]")[0].iter(tag = "a")]
+    try:
+        element = random.choice(imageElements)
+    except IndexError:
+        raise ConnectionError("Couldn't find any images")
+
+    url = "https://safebooru.org/" + element.get("href")
+    if get_id_from_url(url) == theLastId:
+        return get_random_image(tags)
+    theLastId = get_id_from_url(url)
+
+    try:
+        sbi = SafebooruImage(
+            id_ = get_id_from_url(url),
+            url = url,
+            tags = element.find("img").get("alt").split(),
+            searchTags = tags,
+            source = fix_source_url(get_source(url)),
+            imurl = get_imurl(url)
+        )
+    except (ConnectionError, KeyError) as e:
+        print("[ERROR]", e)
+        return get_random_image(tags)
+
+    if link_deleted(sbi.url):
+        print("Retried since the source was deleted...")
+        return get_random_image(tags)
+
+    return sbi
+
+def get_source(url):
+    tree = html.fromstring(requests.get(url).content)
+    for element in tree.xpath('//*[@id="stats"]')[0].iter("li"):
+        if element.text.startswith("Source: h"):
+            return element.text[8:]
+        elif element.text.startswith("Source:"):
+            for child in element.iter():
+                if child.get("href") is not None:
+                    return child.get("href")
+    raise ConnectionError("Couldn't find source image for id %i" % get_id_from_url(url))
+
+def fix_source_url(url):
+    parsed = urllib.parse.urlparse(url)
+    if parsed.netloc == "www.pixiv.net":
+        return "https://www.pixiv.net/en/artworks/" + urllib.parse.parse_qs(parsed.query)["illust_id"][0]
+    elif parsed.netloc in ["bishie.booru.org", "www.secchan.net"]:
+        return ConnectionError("Couldn't get source")
+    elif "pximg.net" in parsed.netloc or "pixiv.net" in parsed.netloc:
+        return "https://www.pixiv.net/en/artworks/" + parsed.path.split("/")[-1][:8]
+    elif parsed.netloc == "twitter.com":
+        return url.replace("twitter.com", "nitter.eda.gay")
+    return url
+
+def get_imurl(url):
+    tree = html.fromstring(requests.get(url).content)
+    return tree.xpath('//*[@id="image"]')[0].get("src")
+
+def link_deleted(url):
+    text = requests.get(url).text
+    return text[text.find("<title>") + 7 : text.find("</title>")] in ["Error | nitter", "イラストコミュニケーションサービス[pixiv]"]
+
+if __name__ == "__main__":
+    sbi = get_random_image(["lio_fotia", "promare"])
+    print(sbi.tags)
+    print(sbi.source)
+    print(sbi.imurl)
+    print(sbi.remove_tag("promare"))
+
  
diff --git a/static/index.md b/static/index.md
index 080a648..e217a94 100644
--- a/static/index.md
+++ b/static/index.md
@@ -1,3 +1,5 @@
 # haii
 my name is eden and im a 19yo computer science undergraduate. i made my own website to encourage others to do so too.
-i'll post my thoughts on here sometimes, and use this site to link to other stuff i host.
-\ No newline at end of file
+i'll post my thoughts on here sometimes, and use this site to link to other stuff i host.
+
+[click here for a random image of lio fotia](/random?tags=lio_fotia)
+\ No newline at end of file
diff --git a/static/style.css b/static/style.css
index 1ac4ec3..26b337c 100644
--- a/static/style.css
+++ b/static/style.css
@@ -96,6 +96,10 @@ article section table td {
     text-align: right;
 }
 
+#randomImage img {
+    max-width: 65%;
+}
+
 aside {
     width: 30%;
     padding-left: 15px;
@@ -103,6 +107,10 @@ aside {
     float: right;
 }
 
+#tags {
+    font-size: xx-small;
+}
+
 .running {
     background-color: green;
     padding: 1, 1, 1, 1;
diff --git a/templates/random.html b/templates/random.html
new file mode 100644
index 0000000..e36bb87
--- /dev/null
+++ b/templates/random.html
@@ -0,0 +1,22 @@
+{% extends "template.html" %}
+{% block content %}
+    <aside id="tags">
+        <h1>current search tags: (click to remove)</h1>
+        <ul>
+            {% for tag in sbi.searchTags %}
+            <li><a href={{"/random?tags=" + "+".join(sbi.remove_tag(tag))}}>{{tag}}</a></li>
+            {% endfor %}
+        </ul>
+        <h1>this image's tags:</h1>
+        <ul>
+            {% for tag in sbi.tags %}
+                <li><a href={{"/random?tags=" + "+".join(sbi.searchTags + [tag])}}>{{tag}}</a></li>
+            {% endfor %}
+        </ul>
+    </aside>
+    <section id="randomImage">
+        <a href={{sbi.source}}><img src={{sbi.imurl}}></a>
+        <h1><a href={{"/random?tags=" + "+".join(sbi.searchTags)}}>generate another</a></h1>
+        <h2><a href={{sbi.source}}>artist link</a></h2>
+    </section>
+{% endblock %}
+\ No newline at end of file