pls merge

author: jwansek <eddie.atten.ea29@gmail.com> 2026-02-21 17:17:12 +0000
committer: jwansek <eddie.atten.ea29@gmail.com> 2026-02-21 17:17:12 +0000
commit: b504077c13ab79c1e899b8402d3afdbf4d4da3f6 (patch)
tree: 69b28b49b74be04f64fe198d872ee0546bc44d1b /edaweb/services.py
parent: 91ba9a69867153480610bff7a4434d0697e37015 (diff)
parent: acaa3aa8204941e1a08059d5bf2ad6c4aca602a6 (diff)
download: boymoder.blog-b504077c13ab79c1e899b8402d3afdbf4d4da3f6.tar.gz
boymoder.blog-b504077c13ab79c1e899b8402d3afdbf4d4da3f6.zip
1 files changed, 409 insertions, 409 deletions
diff --git a/edaweb/services.py b/edaweb/services.py
index 50eed45..11f21fc 100644
--- a/edaweb/services.py
+++ b/edaweb/services.py
@@ -1,409 +1,409 @@
-from dataclasses import dataclass
-from io import StringIO
-from lxml import html, etree
-from github import Github
-import multiprocessing
-import paramiko.client
-from APiHole import PiHole
-import transmission_rpc
-import configparser
-import math as maths
-import requests
-import datetime
-import urllib
-import docker
-import random
-import subprocess
-import fabric
-import pickle
-import queue
-import json
-import time
-import os
-
-theLastId = 0
-config_path = os.path.join(os.path.dirname(__file__), "..", "edaweb.conf")
-if not os.path.exists(config_path):
-    raise FileNotFoundError("Could not find edaweb.conf config file")
-CONFIG = configparser.ConfigParser(interpolation = None)
-CONFIG.read(config_path)
-
-def humanbytes(B):
-   'Return the given bytes as a human friendly KB, MB, GB, or TB string'
-   B = float(B)
-   KB = float(1024)
-   MB = float(KB ** 2) # 1,048,576
-   GB = float(KB ** 3) # 1,073,741,824
-   TB = float(KB ** 4) # 1,099,511,627,776
-
-   if B < KB:
-      return '{0} {1}'.format(B,'Bytes' if 0 == B > 1 else 'Byte')
-   elif KB <= B < MB:
-      return '{0:.2f} KB'.format(B/KB)
-   elif MB <= B < GB:
-      return '{0:.2f} MB'.format(B/MB)
-   elif GB <= B < TB:
-      return '{0:.2f} GB'.format(B/GB)
-   elif TB <= B:
-      return '{0:.2f} TB'.format(B/TB)
-
-@dataclass
-class SafebooruImage:
-    id_: int
-    url: str
-    searchTags: list
-    tags: list
-    source: str
-    imurl: str
-
-    def remove_tag(self, tag):
-        return list(set(self.searchTags).difference(set([tag])))
-
-@dataclass
-class DownloadedImage:
-    imurl: str
-    
-    def __enter__(self):
-        self.filename = os.path.join("static", "images", "random.jpg")
-
-        req = urllib.request.Request(self.imurl, headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.50.2 (KHTML, like Gecko) Version/5.0.6 Safari/533.22.3'})
-        mediaContent = urllib.request.urlopen(req).read()
-        with open(self.filename, "wb") as f:
-            f.write(mediaContent)
-        return self.filename
-
-    def __exit__(self, type, value, traceback):
-        os.remove(self.filename)
-
-def get_num_pages(tags):
-    pages_url = "https://safebooru.org/index.php?page=post&s=list&tags=%s" % "+".join(tags)
-    tree = html.fromstring(requests.get(pages_url).content)
-    try:
-        finalpage_element = tree.xpath("/html/body/div[6]/div/div[2]/div[2]/div/a[12]")[0]
-    except IndexError:
-        return 1
-    else:
-        return int(int(urllib.parse.parse_qs(finalpage_element.get("href"))["pid"][0]) / (5*8))
-
-def get_id_from_url(url):
-    return int(urllib.parse.parse_qs(url)["id"][0])
-
-def get_random_image(tags):
-    global theLastId
-    searchPage = random.randint(1, get_num_pages(tags)) * 5 * 8
-    url = "https://safebooru.org/index.php?page=post&s=list&tags=%s&pid=%i" % ("+".join(tags), searchPage)
-    tree = html.fromstring(requests.get(url).content)
-
-    imageElements = [e for e in tree.xpath("/html/body/div[6]/div/div[2]/div[1]")[0].iter(tag = "a")]
-    try:
-        element = random.choice(imageElements)
-    except IndexError:
-        # raise ConnectionError("Couldn't find any images")
-        return get_random_image(tags)
-
-    url = "https://safebooru.org/" + element.get("href")
-    if get_id_from_url(url) == theLastId:
-        return get_random_image(tags)
-    theLastId = get_id_from_url(url)
-
-    try:
-        sbi = SafebooruImage(
-            id_ = get_id_from_url(url),
-            url = url,
-            tags = element.find("img").get("alt").split(),
-            searchTags = tags,
-            source = fix_source_url(get_source(url)),
-            imurl = get_imurl(url)
-        )
-    except (ConnectionError, KeyError) as e:
-        print("[ERROR]", e)
-        return get_random_image(tags)
-
-    if link_deleted(sbi.url):
-        print("Retried since the source was deleted...")
-        return get_random_image(tags)
-
-    return sbi
-
-def get_source(url):
-    tree = html.fromstring(requests.get(url).content)
-    for element in tree.xpath('//*[@id="stats"]')[0].iter("li"):
-        if element.text.startswith("Source: h"):
-            return element.text[8:]
-        elif element.text.startswith("Source:"):
-            for child in element.iter():
-                if child.get("href") is not None:
-                    return child.get("href")
-    raise ConnectionError("Couldn't find source image for id %i" % get_id_from_url(url))
-
-def fix_source_url(url):
-    parsed = urllib.parse.urlparse(url)
-    if parsed.netloc == "www.pixiv.net":
-        return "https://www.pixiv.net/en/artworks/" + urllib.parse.parse_qs(parsed.query)["illust_id"][0]
-    elif parsed.netloc in ["bishie.booru.org", "www.secchan.net"]:
-        return ConnectionError("Couldn't get source")
-    elif "pximg.net" in parsed.netloc or "pixiv.net" in parsed.netloc:
-        return "https://www.pixiv.net/en/artworks/" + parsed.path.split("/")[-1][:8]
-    elif parsed.netloc == "twitter.com":
-        return url.replace("twitter.com", "nitter.eda.gay")
-    return url
-
-def get_imurl(url):
-    tree = html.fromstring(requests.get(url).content)
-    return tree.xpath('//*[@id="image"]')[0].get("src")
-
-def link_deleted(url):
-    text = requests.get(url).text
-    return text[text.find("<title>") + 7 : text.find("</title>")] in ["Error | nitter", "イラストコミュニケーションサービス[pixiv]"]
-
-def request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=7)):
-    g = Github(CONFIG.get("github", "access_code"))
-    out = []
-    for repo in g.get_user().get_repos():
-        # print(repo.name, list(repo.get_branches()))
-        try:
-            for commit in repo.get_commits(since = since):
-                out.append({
-                    "repo": repo.name,
-                    "message": commit.commit.message,
-                    "url": commit.html_url,
-                    "datetime": commit.commit.author.date,
-                    "stats": {
-                        "additions": commit.stats.additions,
-                        "deletions": commit.stats.deletions,
-                        "total": commit.stats.total
-                    }
-                })
-        except Exception as e:
-            print(repo, e)
-
-    return sorted(out, key = lambda a: a["datetime"], reverse = True) 
-
-def scrape_nitter(username, get_until:int):
-    new_tweets = []
-    nitter_url = CONFIG.get("nitter", "internalurl")
-    nitter_port = CONFIG.getint("nitter", "internalport")
-    scrape_new_pages = True
-    url = "http://%s:%d/%s" % (nitter_url, nitter_port, username)
-
-    while scrape_new_pages:
-        tree = html.fromstring(requests.get(url).content)
-        for i, tweetUrlElement in enumerate(tree.xpath('//*[@class="tweet-link"]'), 0):
-            if i > 0 and tweetUrlElement.get("href").split("/")[1] == username:
-                id_ = int(urllib.parse.urlparse(tweetUrlElement.get("href")).path.split("/")[-1])
-                tweet_link = "http://%s:%d%s" % (nitter_url, nitter_port, tweetUrlElement.get("href"))
-
-                if id_ == get_until:
-                    scrape_new_pages = False
-                    break
-
-                try:
-                    dt, replying_to, text, images = parse_tweet(tweet_link)
-                    new_tweets.append((id_, dt, replying_to, text, username, images))
-                    print(dt, "'%s'" % text)
-                except IndexError:
-                    print("Couldn't get any more tweets")
-                    scrape_new_pages = False
-                    break
-                except ConnectionError:
-                    print("Rate limited, try again later")
-                    return []
-
-
-        try:
-            cursor = tree.xpath('//*[@class="show-more"]/a')[0].get("href")
-        except IndexError:
-            # no more elements
-            break
-        url = "http://%s:%d/%s%s" % (nitter_url, nitter_port, username, cursor)
-
-    return new_tweets
-
-def parse_tweet(tweet_url):
-    # print(tweet_url)
-    tree = html.fromstring(requests.get(tweet_url).content)
-    # with open("2images.html", "r") as f:
-    #     tree = html.fromstring(f.read())
-
-    rate_limited_elem = tree.xpath("/html/body/div/div/div/span")
-    if rate_limited_elem != []:
-        if rate_limited_elem[0].text == "Instance has been rate limited.":
-            raise ConnectionError("Instance has been rate limited.")
-
-    main_tweet_elem = tree.xpath('//*[@class="main-tweet"]')[0]
-
-    dt_str = main_tweet_elem.xpath('//*[@class="tweet-published"]')[0].text
-    dt = datetime.datetime.strptime(dt_str.replace("Â", ""), "%b %d, %Y · %I:%M %p UTC")
-    text = tree.xpath('//*[@class="main-tweet"]/div/div/div[2]')[0].text_content() 
-    if text == "":
-        text = "[Image only]"
-    replying_to_elems = tree.xpath('//*[@class="before-tweet thread-line"]/div/a')
-    if replying_to_elems != []:
-        replying_to = int(urllib.parse.urlparse(replying_to_elems[-1].get("href")).path.split("/")[-1])
-    else:
-        replying_to = None
-        
-    images = []
-    images_elems = tree.xpath('//*[@class="main-tweet"]/div/div/div[3]/div/div/a/img')
-    for image_elem in images_elems:
-        images.append("https://" + CONFIG.get("nitter", "outsideurl") + urllib.parse.urlparse(image_elem.get("src")).path)
-
-    return dt, replying_to, text, images
-
-def scrape_whispa(whispa_url, since = None):
-    def query_answer(answer_url, max_retries = 10):
-        for i in range(max_retries):
-            try:
-                return requests.get(answer_url)
-            except requests.exceptions.ConnectionError:
-                s = 5.05 * (i + 1)
-                print("Connection timed out, retrying in %.2fs" % s)
-                time.sleep(s)
-                continue
-
-    # add a bit of wiggle room in case i don't answer the questions in order (i often do this)
-    if since is None:
-        stop_at = datetime.datetime(year = 2001, month = 8, day = 12)
-    else:
-        stop_at = since - datetime.timedelta(days = 14)
-        print("The newest Q&A timestamp in the database was %s, we will stop looking at %s." % (since.astimezone().isoformat(), stop_at.astimezone().isoformat()))
-
-    html_ = requests.get(whispa_url).content.decode()
-    # with open("temp.html", "w") as f:
-    #     f.write(html_)
-
-    tree = html.fromstring(html_)
-    qnas = []
-    # we're not doing proper HTML scraping here really... since the site uses client side rendering
-    # we rather parse the JS scripts to get the JSON payload of useful information... sadly this looks horrible
-    for i, script in enumerate(tree.xpath("/html/body/script"), 0):
-        js = str(script.text)
-        if "receivedFeedback" in js:
-            # my god this is horrible...
-            parsed_json = json.loads(json.loads(js[19:-1])[1][2:])[0][3]["loadedUser"]["receivedFeedback"]
-            # print(json.dumps(parsed_json, indent = 4))
-            # with open("whispas_%i.json" % i, "w") as f:
-            #     json.dump(parsed_json, f, indent = 4)
-            for j in parsed_json:
-                if j["_count"]["childFeedback"] < 0:
-                    continue
-
-                answer_url = "https://apiv4.whispa.sh/feedbacks/%s/children/public" % j["id"]
-                req = query_answer(answer_url)
-                try:
-                    firstanswer = req.json()["data"][0]
-                except IndexError:
-                    continue
-                dt = datetime.datetime.fromisoformat(firstanswer["createdAt"][:-1])
-
-                qna = {
-                    # "id": int(j["id"], base = 16),
-                    "id": int(dt.timestamp()),
-                    "link": answer_url,
-                    "datetime": dt,
-                    "question": j["content"],
-                    "answer": firstanswer["content"],
-                    "host": "whispa.sh"
-                }
-                print(qna)     
-                qnas.append(qna)
-                time.sleep(2.03)
-                if dt <= stop_at:
-                    print("Met the threshold for oldest Q&A, so stopped looking.")
-                    break
-    return qnas
-
-def get_docker_containers(host, ssh_key_path):
-    result = fabric.Connection(
-        host = host,
-        user = "root",
-        connect_kwargs = {
-            "key_filename": ssh_key_path,
-            "look_for_keys": False
-        }
-    ).run('docker ps -a -s --format "table {{.Names}};{{.Status}};{{.Image}}"', hide = True)
-    return [line.split(";") for line in result.stdout.split("\n")[1:-1]]
-
-def cache_all_docker_containers(ssh_key_path):
-    containers = {}
-    containers["containers"] = {}
-    for host, name in CONFIG["docker_hosts"].items():
-        print(host)
-        containers["containers"][(host, name)] = get_docker_containers(host, ssh_key_path)
-
-    containers["cachetime"] = "Docker information last updated at %s" % str(datetime.datetime.now())
-    with open("/tmp/docker-cache.json", "wb") as f:
-        pickle.dump(containers, f)
-
-def get_all_docker_containers():
-    if not os.path.exists("/tmp/docker-cache.json"):
-        return {"containers": {}, "cachetime": "No cached docker information"}
-
-    with open("/tmp/docker-cache.json", "rb") as f:
-        return pickle.load(f)
-
-def timeout(func):
-    # cant get this to work with queue.Queue() for some reason?
-    # this works but Manager() uses an extra thread than Queue()
-    manager = multiprocessing.Manager()
-    returnVan = manager.list()
-    # ti = time.time()
-   
-    def runFunc(q, func):
-        q.append(func())
-
-    def beginTimeout():
-        t = multiprocessing.Process(target = runFunc, args = (returnVan, func))
-        t.start()
-
-        t.join(timeout = CONFIG["servicetimeout"].getint("seconds"))
-
-        # print("Request took:", time.time() - ti)
-        try:
-            return returnVan[0]
-        except IndexError:
-            if t.is_alive():
-                t.terminate()
-
-    return beginTimeout
-
-@timeout
-def get_torrent_stats():
-    client = transmission_rpc.client.Client(
-        host = CONFIG.get("transmission", "host")
-    )
-    s = vars(client.session_stats())["fields"]
-    return {
-        "Active torrents:": s["activeTorrentCount"],
-        "Downloaded:": humanbytes(s["cumulative-stats"]["downloadedBytes"]),
-        "Uploaded:": humanbytes(s["cumulative-stats"]["uploadedBytes"]),
-        "Active time:": str(datetime.timedelta(seconds = s["cumulative-stats"]["secondsActive"])),
-        "Files added:": s["cumulative-stats"]["filesAdded"],
-        "Current upload speed:": humanbytes(s["uploadSpeed"]) + "s/S",
-        "Current download speed:": humanbytes(s["downloadSpeed"]) + "s/S"
-    }
-
-@timeout
-def get_pihole_stats():
-    return PiHole.GetSummary(CONFIG.get("pihole", "url"), CONFIG.get("pihole", "key"), True)
-
-def get_recent_commits(db, max_per_repo = 3):
-    cache = db.get_cached_commits()
-    num_per_repo = {}
-    out = []
-    for commit in cache:
-        if commit["repo"] not in num_per_repo.keys():
-            num_per_repo[commit["repo"]] = 0
-
-        num_per_repo[commit["repo"]] += 1
-        if num_per_repo[commit["repo"]] <= max_per_repo:
-            out.append(commit)
-
-    return sorted(out, key = lambda a: a["datetime"], reverse = True)
-
-if __name__ == "__main__":
-    print(scrape_whispa(CONFIG.get("qnas", "url")))
-    # import database
-
-    # with database.Database() as db:
-    #     print(json.dumps(get_recent_commits(db), indent=4))
+from dataclasses import dataclass
+from io import StringIO
+from lxml import html, etree
+from github import Github
+import multiprocessing
+import paramiko.client
+from APiHole import PiHole
+import transmission_rpc
+import configparser
+import math as maths
+import requests
+import datetime
+import urllib
+import docker
+import random
+import subprocess
+import fabric
+import pickle
+import queue
+import json
+import time
+import os
+
+theLastId = 0
+config_path = os.path.join(os.path.dirname(__file__), "..", "edaweb.conf")
+if not os.path.exists(config_path):
+    raise FileNotFoundError("Could not find edaweb.conf config file")
+CONFIG = configparser.ConfigParser(interpolation = None)
+CONFIG.read(config_path)
+
+def humanbytes(B):
+   'Return the given bytes as a human friendly KB, MB, GB, or TB string'
+   B = float(B)
+   KB = float(1024)
+   MB = float(KB ** 2) # 1,048,576
+   GB = float(KB ** 3) # 1,073,741,824
+   TB = float(KB ** 4) # 1,099,511,627,776
+
+   if B < KB:
+      return '{0} {1}'.format(B,'Bytes' if 0 == B > 1 else 'Byte')
+   elif KB <= B < MB:
+      return '{0:.2f} KB'.format(B/KB)
+   elif MB <= B < GB:
+      return '{0:.2f} MB'.format(B/MB)
+   elif GB <= B < TB:
+      return '{0:.2f} GB'.format(B/GB)
+   elif TB <= B:
+      return '{0:.2f} TB'.format(B/TB)
+
+@dataclass
+class SafebooruImage:
+    id_: int
+    url: str
+    searchTags: list
+    tags: list
+    source: str
+    imurl: str
+
+    def remove_tag(self, tag):
+        return list(set(self.searchTags).difference(set([tag])))
+
+@dataclass
+class DownloadedImage:
+    imurl: str
+    
+    def __enter__(self):
+        self.filename = os.path.join("static", "images", "random.jpg")
+
+        req = urllib.request.Request(self.imurl, headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.50.2 (KHTML, like Gecko) Version/5.0.6 Safari/533.22.3'})
+        mediaContent = urllib.request.urlopen(req).read()
+        with open(self.filename, "wb") as f:
+            f.write(mediaContent)
+        return self.filename
+
+    def __exit__(self, type, value, traceback):
+        os.remove(self.filename)
+
+def get_num_pages(tags):
+    pages_url = "https://safebooru.org/index.php?page=post&s=list&tags=%s" % "+".join(tags)
+    tree = html.fromstring(requests.get(pages_url).content)
+    try:
+        finalpage_element = tree.xpath("/html/body/div[6]/div/div[2]/div[2]/div/a[12]")[0]
+    except IndexError:
+        return 1
+    else:
+        return int(int(urllib.parse.parse_qs(finalpage_element.get("href"))["pid"][0]) / (5*8))
+
+def get_id_from_url(url):
+    return int(urllib.parse.parse_qs(url)["id"][0])
+
+def get_random_image(tags):
+    global theLastId
+    searchPage = random.randint(1, get_num_pages(tags)) * 5 * 8
+    url = "https://safebooru.org/index.php?page=post&s=list&tags=%s&pid=%i" % ("+".join(tags), searchPage)
+    tree = html.fromstring(requests.get(url).content)
+
+    imageElements = [e for e in tree.xpath("/html/body/div[6]/div/div[2]/div[1]")[0].iter(tag = "a")]
+    try:
+        element = random.choice(imageElements)
+    except IndexError:
+        # raise ConnectionError("Couldn't find any images")
+        return get_random_image(tags)
+
+    url = "https://safebooru.org/" + element.get("href")
+    if get_id_from_url(url) == theLastId:
+        return get_random_image(tags)
+    theLastId = get_id_from_url(url)
+
+    try:
+        sbi = SafebooruImage(
+            id_ = get_id_from_url(url),
+            url = url,
+            tags = element.find("img").get("alt").split(),
+            searchTags = tags,
+            source = fix_source_url(get_source(url)),
+            imurl = get_imurl(url)
+        )
+    except (ConnectionError, KeyError) as e:
+        print("[ERROR]", e)
+        return get_random_image(tags)
+
+    if link_deleted(sbi.url):
+        print("Retried since the source was deleted...")
+        return get_random_image(tags)
+
+    return sbi
+
+def get_source(url):
+    tree = html.fromstring(requests.get(url).content)
+    for element in tree.xpath('//*[@id="stats"]')[0].iter("li"):
+        if element.text.startswith("Source: h"):
+            return element.text[8:]
+        elif element.text.startswith("Source:"):
+            for child in element.iter():
+                if child.get("href") is not None:
+                    return child.get("href")
+    raise ConnectionError("Couldn't find source image for id %i" % get_id_from_url(url))
+
+def fix_source_url(url):
+    parsed = urllib.parse.urlparse(url)
+    if parsed.netloc == "www.pixiv.net":
+        return "https://www.pixiv.net/en/artworks/" + urllib.parse.parse_qs(parsed.query)["illust_id"][0]
+    elif parsed.netloc in ["bishie.booru.org", "www.secchan.net"]:
+        return ConnectionError("Couldn't get source")
+    elif "pximg.net" in parsed.netloc or "pixiv.net" in parsed.netloc:
+        return "https://www.pixiv.net/en/artworks/" + parsed.path.split("/")[-1][:8]
+    elif parsed.netloc == "twitter.com":
+        return url.replace("twitter.com", "nitter.eda.gay")
+    return url
+
+def get_imurl(url):
+    tree = html.fromstring(requests.get(url).content)
+    return tree.xpath('//*[@id="image"]')[0].get("src")
+
+def link_deleted(url):
+    text = requests.get(url).text
+    return text[text.find("<title>") + 7 : text.find("</title>")] in ["Error | nitter", "イラストコミュニケーションサービス[pixiv]"]
+
+def request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=7)):
+    g = Github(CONFIG.get("github", "access_code"))
+    out = []
+    for repo in g.get_user().get_repos():
+        # print(repo.name, list(repo.get_branches()))
+        try:
+            for commit in repo.get_commits(since = since):
+                out.append({
+                    "repo": repo.name,
+                    "message": commit.commit.message,
+                    "url": commit.html_url,
+                    "datetime": commit.commit.author.date,
+                    "stats": {
+                        "additions": commit.stats.additions,
+                        "deletions": commit.stats.deletions,
+                        "total": commit.stats.total
+                    }
+                })
+        except Exception as e:
+            print(repo, e)
+
+    return sorted(out, key = lambda a: a["datetime"], reverse = True) 
+
+def scrape_nitter(username, get_until:int):
+    new_tweets = []
+    nitter_url = CONFIG.get("nitter", "internalurl")
+    nitter_port = CONFIG.getint("nitter", "internalport")
+    scrape_new_pages = True
+    url = "http://%s:%d/%s" % (nitter_url, nitter_port, username)
+
+    while scrape_new_pages:
+        tree = html.fromstring(requests.get(url).content)
+        for i, tweetUrlElement in enumerate(tree.xpath('//*[@class="tweet-link"]'), 0):
+            if i > 0 and tweetUrlElement.get("href").split("/")[1] == username:
+                id_ = int(urllib.parse.urlparse(tweetUrlElement.get("href")).path.split("/")[-1])
+                tweet_link = "http://%s:%d%s" % (nitter_url, nitter_port, tweetUrlElement.get("href"))
+
+                if id_ == get_until:
+                    scrape_new_pages = False
+                    break
+
+                try:
+                    dt, replying_to, text, images = parse_tweet(tweet_link)
+                    new_tweets.append((id_, dt, replying_to, text, username, images))
+                    print(dt, "'%s'" % text)
+                except IndexError:
+                    print("Couldn't get any more tweets")
+                    scrape_new_pages = False
+                    break
+                except ConnectionError:
+                    print("Rate limited, try again later")
+                    return []
+
+
+        try:
+            cursor = tree.xpath('//*[@class="show-more"]/a')[0].get("href")
+        except IndexError:
+            # no more elements
+            break
+        url = "http://%s:%d/%s%s" % (nitter_url, nitter_port, username, cursor)
+
+    return new_tweets
+
+def parse_tweet(tweet_url):
+    # print(tweet_url)
+    tree = html.fromstring(requests.get(tweet_url).content)
+    # with open("2images.html", "r") as f:
+    #     tree = html.fromstring(f.read())
+
+    rate_limited_elem = tree.xpath("/html/body/div/div/div/span")
+    if rate_limited_elem != []:
+        if rate_limited_elem[0].text == "Instance has been rate limited.":
+            raise ConnectionError("Instance has been rate limited.")
+
+    main_tweet_elem = tree.xpath('//*[@class="main-tweet"]')[0]
+
+    dt_str = main_tweet_elem.xpath('//*[@class="tweet-published"]')[0].text
+    dt = datetime.datetime.strptime(dt_str.replace("Â", ""), "%b %d, %Y · %I:%M %p UTC")
+    text = tree.xpath('//*[@class="main-tweet"]/div/div/div[2]')[0].text_content() 
+    if text == "":
+        text = "[Image only]"
+    replying_to_elems = tree.xpath('//*[@class="before-tweet thread-line"]/div/a')
+    if replying_to_elems != []:
+        replying_to = int(urllib.parse.urlparse(replying_to_elems[-1].get("href")).path.split("/")[-1])
+    else:
+        replying_to = None
+        
+    images = []
+    images_elems = tree.xpath('//*[@class="main-tweet"]/div/div/div[3]/div/div/a/img')
+    for image_elem in images_elems:
+        images.append("https://" + CONFIG.get("nitter", "outsideurl") + urllib.parse.urlparse(image_elem.get("src")).path)
+
+    return dt, replying_to, text, images
+
+def scrape_whispa(whispa_url, since = None):
+    def query_answer(answer_url, max_retries = 10):
+        for i in range(max_retries):
+            try:
+                return requests.get(answer_url)
+            except requests.exceptions.ConnectionError:
+                s = 5.05 * (i + 1)
+                print("Connection timed out, retrying in %.2fs" % s)
+                time.sleep(s)
+                continue
+
+    # add a bit of wiggle room in case i don't answer the questions in order (i often do this)
+    if since is None:
+        stop_at = datetime.datetime(year = 2001, month = 8, day = 12)
+    else:
+        stop_at = since - datetime.timedelta(days = 14)
+        print("The newest Q&A timestamp in the database was %s, we will stop looking at %s." % (since.astimezone().isoformat(), stop_at.astimezone().isoformat()))
+
+    html_ = requests.get(whispa_url).content.decode()
+    # with open("temp.html", "w") as f:
+    #     f.write(html_)
+
+    tree = html.fromstring(html_)
+    qnas = []
+    # we're not doing proper HTML scraping here really... since the site uses client side rendering
+    # we rather parse the JS scripts to get the JSON payload of useful information... sadly this looks horrible
+    for i, script in enumerate(tree.xpath("/html/body/script"), 0):
+        js = str(script.text)
+        if "receivedFeedback" in js:
+            # my god this is horrible...
+            parsed_json = json.loads(json.loads(js[19:-1])[1][2:])[0][3]["loadedUser"]["receivedFeedback"]
+            # print(json.dumps(parsed_json, indent = 4))
+            # with open("whispas_%i.json" % i, "w") as f:
+            #     json.dump(parsed_json, f, indent = 4)
+            for j in parsed_json:
+                if j["_count"]["childFeedback"] < 0:
+                    continue
+
+                answer_url = "https://apiv4.whispa.sh/feedbacks/%s/children/public" % j["id"]
+                req = query_answer(answer_url)
+                try:
+                    firstanswer = req.json()["data"][0]
+                except IndexError:
+                    continue
+                dt = datetime.datetime.fromisoformat(firstanswer["createdAt"][:-1])
+
+                qna = {
+                    # "id": int(j["id"], base = 16),
+                    "id": int(dt.timestamp()),
+                    "link": answer_url,
+                    "datetime": dt,
+                    "question": j["content"],
+                    "answer": firstanswer["content"],
+                    "host": "whispa.sh"
+                }
+                print(qna)     
+                qnas.append(qna)
+                time.sleep(2.03)
+                if dt <= stop_at:
+                    print("Met the threshold for oldest Q&A, so stopped looking.")
+                    break
+    return qnas
+
+def get_docker_containers(host, ssh_key_path):
+    result = fabric.Connection(
+        host = host,
+        user = "root",
+        connect_kwargs = {
+            "key_filename": ssh_key_path,
+            "look_for_keys": False
+        }
+    ).run('docker ps -a -s --format "table {{.Names}};{{.Status}};{{.Image}}"', hide = True)
+    return [line.split(";") for line in result.stdout.split("\n")[1:-1]]
+
+def cache_all_docker_containers(ssh_key_path):
+    containers = {}
+    containers["containers"] = {}
+    for host, name in CONFIG["docker_hosts"].items():
+        print(host)
+        containers["containers"][(host, name)] = get_docker_containers(host, ssh_key_path)
+
+    containers["cachetime"] = "Docker information last updated at %s" % str(datetime.datetime.now())
+    with open("/tmp/docker-cache.json", "wb") as f:
+        pickle.dump(containers, f)
+
+def get_all_docker_containers():
+    if not os.path.exists("/tmp/docker-cache.json"):
+        return {"containers": {}, "cachetime": "No cached docker information"}
+
+    with open("/tmp/docker-cache.json", "rb") as f:
+        return pickle.load(f)
+
+def timeout(func):
+    # cant get this to work with queue.Queue() for some reason?
+    # this works but Manager() uses an extra thread than Queue()
+    manager = multiprocessing.Manager()
+    returnVan = manager.list()
+    # ti = time.time()
+   
+    def runFunc(q, func):
+        q.append(func())
+
+    def beginTimeout():
+        t = multiprocessing.Process(target = runFunc, args = (returnVan, func))
+        t.start()
+
+        t.join(timeout = CONFIG["servicetimeout"].getint("seconds"))
+
+        # print("Request took:", time.time() - ti)
+        try:
+            return returnVan[0]
+        except IndexError:
+            if t.is_alive():
+                t.terminate()
+
+    return beginTimeout
+
+@timeout
+def get_torrent_stats():
+    client = transmission_rpc.client.Client(
+        host = CONFIG.get("transmission", "host")
+    )
+    s = vars(client.session_stats())["fields"]
+    return {
+        "Active torrents:": s["activeTorrentCount"],
+        "Downloaded:": humanbytes(s["cumulative-stats"]["downloadedBytes"]),
+        "Uploaded:": humanbytes(s["cumulative-stats"]["uploadedBytes"]),
+        "Active time:": str(datetime.timedelta(seconds = s["cumulative-stats"]["secondsActive"])),
+        "Files added:": s["cumulative-stats"]["filesAdded"],
+        "Current upload speed:": humanbytes(s["uploadSpeed"]) + "s/S",
+        "Current download speed:": humanbytes(s["downloadSpeed"]) + "s/S"
+    }
+
+@timeout
+def get_pihole_stats():
+    return PiHole.GetSummary(CONFIG.get("pihole", "url"), CONFIG.get("pihole", "key"), True)
+
+def get_recent_commits(db, max_per_repo = 3):
+    cache = db.get_cached_commits()
+    num_per_repo = {}
+    out = []
+    for commit in cache:
+        if commit["repo"] not in num_per_repo.keys():
+            num_per_repo[commit["repo"]] = 0
+
+        num_per_repo[commit["repo"]] += 1
+        if num_per_repo[commit["repo"]] <= max_per_repo:
+            out.append(commit)
+
+    return sorted(out, key = lambda a: a["datetime"], reverse = True)
+
+if __name__ == "__main__":
+    print(scrape_whispa(CONFIG.get("qnas", "url")))
+    # import database
+
+    # with database.Database() as db:
+    #     print(json.dumps(get_recent_commits(db), indent=4))
author	jwansek <eddie.atten.ea29@gmail.com>	2026-02-21 17:17:12 +0000
committer	jwansek <eddie.atten.ea29@gmail.com>	2026-02-21 17:17:12 +0000
commit	b504077c13ab79c1e899b8402d3afdbf4d4da3f6 (patch)
tree	69b28b49b74be04f64fe198d872ee0546bc44d1b /edaweb/services.py
parent	91ba9a69867153480610bff7a4434d0697e37015 (diff)
parent	acaa3aa8204941e1a08059d5bf2ad6c4aca602a6 (diff)
download	boymoder.blog-b504077c13ab79c1e899b8402d3afdbf4d4da3f6.tar.gz boymoder.blog-b504077c13ab79c1e899b8402d3afdbf4d4da3f6.zip