diff options
Diffstat (limited to 'insinuations.py')
-rw-r--r-- | insinuations.py | 86 |
1 files changed, 0 insertions, 86 deletions
diff --git a/insinuations.py b/insinuations.py deleted file mode 100644 index 107c84e..0000000 --- a/insinuations.py +++ /dev/null @@ -1,86 +0,0 @@ -from lxml import html -import database -import datetime -import requests -import os - -def get_sics(db: database.PayGapDatabase, url = "https://resources.companieshouse.gov.uk/sic/"): - req = requests.get(url) - tree = html.fromstring(req.content.decode()) - bigtable = tree.xpath("/html/body/main/table/tbody")[0] - for tr_elem in bigtable.getchildren(): - td_code, td_description = tr_elem - - if td_code.getchildren() != []: - # if contains a <strong> element which indicates a section - current_section_code = td_code.getchildren()[0].text.replace("Section ", "").strip() - current_section_description = td_description.getchildren()[0].text.strip() - - db.append_sic_sections(current_section_code, current_section_description) - - else: - sic_code = int(td_code.text) - sic_desc = td_description.text.rstrip() - db.append_sic(sic_code, sic_desc, current_section_code) - -def get_companyinfo_url(company_number, url = "https://find-and-update.company-information.service.gov.uk/company/%s"): - if company_number.isdigit(): - company_number = "%08d" % int(company_number) - - return url % company_number - -def lookup_company(company_number): - company = {} - req = requests.get( - get_companyinfo_url(company_number), - headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.50.2 (KHTML, like Gecko) Version/5.0.6 Safari/533.22.3'} - ) - - if req.status_code not in [200, 404]: - raise ConnectionError("Couldn't connect- it %d'd. Was looking for company %s" % (req.status_code, company_number)) - - tree = html.fromstring(req.content.decode()) - - status_elem = tree.xpath('//*[@id="company-status"]') - if len(status_elem) == 1: - company["status"] = status_elem[0].text.strip() - else: - company["status"] = None - - incorp_elem = tree.xpath('//*[@id="company-creation-date"]') - if len(incorp_elem) == 1: - company["incorporated"] = datetime.datetime.strptime(incorp_elem[0].text.strip(), "%d %B %Y") - else: - company["incorporated"] = None - - type_elem = tree.xpath('//*[@id="company-type"]') - if len(type_elem) == 1: - company["type_"] = type_elem[0].text.strip() - else: - company["type_"] = None - - company["sics"] = set() - for i in range(9): - sic_elem = tree.xpath('//*[@id="sic%d"]' % i) - if len(sic_elem) == 1: - company["sics"].add(int(sic_elem[0].text.strip().split(" - ")[0])) - else: - break - - return company - -if __name__ == "__main__": - if not os.path.exists(".docker"): - import dotenv - dotenv.load_dotenv(dotenv_path = "db.env") - host = "localhost" - else: - host = "db" - - # with database.PayGapDatabase(host = host) as db: - # print(db.search_company("University")) - - import app - counties = [feature["properties"]["name"] for feature in app.UK_GEOJSON["features"]] - print(counties) - print(len(counties))
\ No newline at end of file |