aboutsummaryrefslogtreecommitdiffstats
path: root/insinuations.py
diff options
context:
space:
mode:
authorjwansek <eddie.atten.ea29@gmail.com>2023-05-16 14:11:51 +0100
committerjwansek <eddie.atten.ea29@gmail.com>2023-05-16 14:11:51 +0100
commit953dc5ef8652a0b7e8ae2c7db3535f1bd157cdd4 (patch)
tree08a4c9cd4f10a12927f7c57056617672369ed9b6 /insinuations.py
parent0faf95d56815d310290d3533d81d888deb7731f0 (diff)
downloadUKGenderPayGap-953dc5ef8652a0b7e8ae2c7db3535f1bd157cdd4.tar.gz
UKGenderPayGap-953dc5ef8652a0b7e8ae2c7db3535f1bd157cdd4.zip
Added alt text, docker
Diffstat (limited to 'insinuations.py')
-rw-r--r--insinuations.py86
1 files changed, 0 insertions, 86 deletions
diff --git a/insinuations.py b/insinuations.py
deleted file mode 100644
index 107c84e..0000000
--- a/insinuations.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from lxml import html
-import database
-import datetime
-import requests
-import os
-
-def get_sics(db: database.PayGapDatabase, url = "https://resources.companieshouse.gov.uk/sic/"):
- req = requests.get(url)
- tree = html.fromstring(req.content.decode())
- bigtable = tree.xpath("/html/body/main/table/tbody")[0]
- for tr_elem in bigtable.getchildren():
- td_code, td_description = tr_elem
-
- if td_code.getchildren() != []:
- # if contains a <strong> element which indicates a section
- current_section_code = td_code.getchildren()[0].text.replace("Section ", "").strip()
- current_section_description = td_description.getchildren()[0].text.strip()
-
- db.append_sic_sections(current_section_code, current_section_description)
-
- else:
- sic_code = int(td_code.text)
- sic_desc = td_description.text.rstrip()
- db.append_sic(sic_code, sic_desc, current_section_code)
-
-def get_companyinfo_url(company_number, url = "https://find-and-update.company-information.service.gov.uk/company/%s"):
- if company_number.isdigit():
- company_number = "%08d" % int(company_number)
-
- return url % company_number
-
-def lookup_company(company_number):
- company = {}
- req = requests.get(
- get_companyinfo_url(company_number),
- headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.50.2 (KHTML, like Gecko) Version/5.0.6 Safari/533.22.3'}
- )
-
- if req.status_code not in [200, 404]:
- raise ConnectionError("Couldn't connect- it %d'd. Was looking for company %s" % (req.status_code, company_number))
-
- tree = html.fromstring(req.content.decode())
-
- status_elem = tree.xpath('//*[@id="company-status"]')
- if len(status_elem) == 1:
- company["status"] = status_elem[0].text.strip()
- else:
- company["status"] = None
-
- incorp_elem = tree.xpath('//*[@id="company-creation-date"]')
- if len(incorp_elem) == 1:
- company["incorporated"] = datetime.datetime.strptime(incorp_elem[0].text.strip(), "%d %B %Y")
- else:
- company["incorporated"] = None
-
- type_elem = tree.xpath('//*[@id="company-type"]')
- if len(type_elem) == 1:
- company["type_"] = type_elem[0].text.strip()
- else:
- company["type_"] = None
-
- company["sics"] = set()
- for i in range(9):
- sic_elem = tree.xpath('//*[@id="sic%d"]' % i)
- if len(sic_elem) == 1:
- company["sics"].add(int(sic_elem[0].text.strip().split(" - ")[0]))
- else:
- break
-
- return company
-
-if __name__ == "__main__":
- if not os.path.exists(".docker"):
- import dotenv
- dotenv.load_dotenv(dotenv_path = "db.env")
- host = "localhost"
- else:
- host = "db"
-
- # with database.PayGapDatabase(host = host) as db:
- # print(db.search_company("University"))
-
- import app
- counties = [feature["properties"]["name"] for feature in app.UK_GEOJSON["features"]]
- print(counties)
- print(len(counties)) \ No newline at end of file