diff options
| author | jwansek <eddie.atten.ea29@gmail.com> | 2023-05-16 00:05:27 +0100 | 
|---|---|---|
| committer | jwansek <eddie.atten.ea29@gmail.com> | 2023-05-16 00:05:27 +0100 | 
| commit | 0faf95d56815d310290d3533d81d888deb7731f0 (patch) | |
| tree | ef9fd281f8b85aab569944eb712235aa321ba768 | |
| parent | 469de094098c5cf149b9aeab9c2a7d23aa22c11d (diff) | |
| download | UKGenderPayGap-0faf95d56815d310290d3533d81d888deb7731f0.tar.gz UKGenderPayGap-0faf95d56815d310290d3533d81d888deb7731f0.zip  | |
Added heatmap
| -rw-r--r-- | README.md | 6 | ||||
| -rw-r--r-- | app.py | 25 | ||||
| -rw-r--r-- | charts.json | 11 | ||||
| -rw-r--r-- | database.py | 86 | ||||
| -rw-r--r-- | parser.py | 6 | ||||
| -rw-r--r-- | requirements.txt | 1 | ||||
| -rw-r--r-- | static/scripts.js | 74 | 
7 files changed, 201 insertions, 8 deletions
@@ -9,4 +9,8 @@ Visualisations of the UK gender pay gap data. Publically avaliable, not often se  python3 parser.py ../RawData/UK\ Gender\ Pay\ Gap\ Data\ -\ 2019\ to\ 2020.csv  ``` - - [ONS data](https://www.ons.gov.uk/aboutus/transparencyandgovernance/freedomofinformationfoi/ukpostcodestownsandcounties) is required to turn the address postcode to a county or unitary authority for the map. This data is rather complicated, thankfully the [Borough of Camden](https://www.data.gov.uk/dataset/7ec10db7-c8f4-4a40-8d82-8921935b4865/national-statistics-postcode-lookup-uk) has processed this data nicely for us. + - [ONS data](https://www.ons.gov.uk/aboutus/transparencyandgovernance/freedomofinformationfoi/ukpostcodestownsandcounties) is required to turn the address postcode to a county or unitary authority for the map. This data is rather complicated, thankfully the [Borough of Camden](https://www.data.gov.uk/dataset/7ec10db7-c8f4-4a40-8d82-8921935b4865/national-statistics-postcode-lookup-uk) has processed this data nicely for us. To append the county or local authority data, give `parser.py` a file downloaded from the above with the exact name: + +``` +python3 parser.py ../RawData/National_Statistics_Postcode_Lookup_UK.csv +``` @@ -86,6 +86,20 @@ def api_get_sic_section_pay():          return flask.jsonify(db.get_pay_by_sic_section(pay_type, year)) +@app.route("/api/heatmap") +def api_get_heatmap_data(): +    # pay_type = flask.request.args.get("Pay Type") +    year = flask.request.args.get("year") +    # print("year: '%s'" % year) +    # if pay_type is None or pay_type.lower() not in {'hourly', 'bonuses'}: +    #     return flask.abort(400, "The key `pay type` must be equal to 'hourly' or 'bonuses'") +    with database.PayGapDatabase(host = host) as db: +        if year is not None: +            if year not in db.get_years(): +                return flask.abort(400, "Unrecognised year '%s'. The year option must be in %s" % (year, ", ".join(db.get_years()))) +         +        return flask.jsonify(db.get_heatmap_data("hourly", year)) +  @app.route("/api/getyears")  def api_get_year_options():      with database.PayGapDatabase(host = host) as db: @@ -107,17 +121,25 @@ def search():  def get_chart_elem(url):      for i in get_charts()["index"]: -        print(i["url"], url) +        # print(i["url"], url)          # if i["url"] == url:          #     return i          if url.startswith(i["url"]):              return i +def get_chart_elem_strict(url): +    for i in get_charts()["index"]: +        print(urllib.parse.urlsplit(i["url"]).path, urllib.parse.urlsplit(url).path) +        if urllib.parse.urlsplit(i["url"]).path == urllib.parse.urlsplit(url).path: +            return i +  @app.route("/plot/<name>")  def serve_large_plot(name):      with database.PayGapDatabase(host = host) as db:          # print(flask.request.full_path)          elem = get_chart_elem(flask.request.full_path) +        # if elem is None: +        #     elem = get_chart_elem_strict(flask.request.full_path)          filters = elem["filters"]          for k, v in filters.items():              if v == "<SICType>": @@ -132,6 +154,7 @@ def serve_large_plot(name):      elem["url"] = flask.request.full_path      # print("elem", elem)      current_filters = dict(flask.request.args) +    print("filters", filters)      print("current_filters", current_filters)      return flask.render_template(          "plot.html.j2", diff --git a/charts.json b/charts.json index 91f2ab7..4b266aa 100644 --- a/charts.json +++ b/charts.json @@ -1,6 +1,13 @@  {      "index": [          { +            "title": "Median Hourly Pay by County or Local Authority: Choropleth Map", +            "url": "/plot/heatmap", +            "filters": { +                "Year": "<Years>" +            } +        }, +        {              "title": "Median Hourly Pay Difference by Year",              "url": "/plot/years?Pay+Type=Hourly",              "filters": { @@ -33,7 +40,7 @@              }          },          { -            "title": "Median Hourly pay Difference by SIC Section", +            "title": "Median Hourly Pay Difference by SIC Section",              "url": "/plot/sic_sec?Pay+Type=Hourly",              "filters": {                  "Pay Type": { @@ -47,7 +54,7 @@              }          },          { -            "title": "Median Bonus pay Difference by SIC Section", +            "title": "Median Bonus Pay Difference by SIC Section",              "url": "/plot/sic_sec?Pay+Type=Bonuses",              "filters": {                  "Pay Type": { diff --git a/database.py b/database.py index 80edd3c..d67b96c 100644 --- a/database.py +++ b/database.py @@ -2,11 +2,14 @@ from dataclasses import dataclass  import operator  import datetime  import pymysql +import pandas +import app  import os  @dataclass  class PayGapDatabase: +    postcode_lookup_obj = None      host: str = "db"      user: str = "root"      passwd: str = None @@ -336,6 +339,83 @@ class PayGapDatabase:          return sorted(pay, key = operator.itemgetter(1), reverse = True) +    def get_heatmap_data(self, pay_type, year = None): +        sql = "SELECT  insinuated_loc, COUNT(insinuated_loc), -AVG(" +        if pay_type.lower() == "hourly": +            sql += "DiffMedianHourlyPercent" +        elif pay_type.lower() == "bonuses": +            sql += "DiffMedianBonusPercent" +        sql += """ +        ) FROM employer INNER JOIN pay ON pay.company_number = employer.company_number +        WHERE insinuated_loc_type IS NOT NULL +        """ +        if year is not None: +            sql += " AND source LIKE %s" + +        sql += " GROUP BY insinuated_loc;" + +        with self.__connection.cursor() as cursor: +            if year is None: +                cursor.execute(sql) +            else: +                cursor.execute(sql, ("%" + year.replace("-", "to") + "%", )) + +            return [[i[0], i[1], float(i[2])] for i in cursor.fetchall()] + +    def _get_postcode_lookup_obj(self, path_): +        return pandas.read_csv(path_) + +    def _get_counties(self): +        return {feature["properties"]["name"] for feature in app.UK_GEOJSON["features"]} + +    def append_counties(self, path_): +        if self.postcode_lookup_obj is None: +            self.postcode_lookup_obj = self._get_postcode_lookup_obj(path_) + +        counties = self._get_counties() +        postcodes = self._get_postcodes() + +        with self.__connection.cursor() as cursor: + +            cursor.execute("ALTER TABLE employer ADD COLUMN IF NOT EXISTS insinuated_loc VARCHAR(69) DEFAULT NULL;") +            cursor.execute("ALTER TABLE employer ADD COLUMN IF NOT EXISTS insinuated_loc_type VARCHAR(25) DEFAULT NULL;") +         +            for i, j in enumerate(postcodes, 1): +                id_, postcode = j +                found_locations = self.postcode_lookup_obj[ +                    (self.postcode_lookup_obj["Postcode 1"] == postcode) |  +                    (self.postcode_lookup_obj["Postcode 2"] == postcode) |  +                    (self.postcode_lookup_obj["Postcode 3"] == postcode) +                ] +                if len(found_locations) == 1: +                    county, la = found_locations[["County Name", "Local Authority Name"]].values[0] +                    if la in counties: +                        cursor.execute("UPDATE employer SET insinuated_loc = %s, insinuated_loc_type = 'Local Authority' WHERE company_number = %s", (la, id_)) + +                        print("[%d/%d] Using local authority '%s' for postcode '%s'" % (i, len(postcodes), la, postcode)) +                    elif county in counties: +                        cursor.execute("UPDATE employer SET insinuated_loc = %s, insinuated_loc_type = 'County' WHERE company_number = %s", (county, id_)) + +                        print("[%d/%d] Using county '%s' for postcode '%s'" % (i, len(postcodes), county, postcode)) +                    elif "Northamptonshire" in la: +                        print("Manually fixing Northamptonshire...") +                        cursor.execute("UPDATE employer SET insinuated_loc = %s, insinuated_loc_type = 'County' WHERE company_number = %s", ("Northamptonshire", id_)) +                    elif "Bournemouth" in la: +                        print("Manually fixing Bournemouth...") +                        cursor.execute("UPDATE employer SET insinuated_loc = %s, insinuated_loc_type = 'County' WHERE company_number = %s", ("Bournemouth", id_)) +                    else: +                        print("[%d/%d] Didn't recoginse the local authority '%s' or the county '%s'" % (i, len(postcodes), la, county)) +                else: +                    print("[%d/%d] Couldn't find a county for postcode '%s' (company id '%s')" % (i, len(postcodes), postcode, id_)) + +                # break +        self.__connection.commit() + +    def _get_postcodes(self): +        with self.__connection.cursor() as cursor: +            cursor.execute("SELECT company_number, TRIM(SUBSTRING_INDEX(address, ',', -1)) FROM employer;") +            return cursor.fetchall() +  if __name__ == "__main__": @@ -347,6 +427,8 @@ if __name__ == "__main__":          host = "db"      with PayGapDatabase(host = host) as db: -        print(db.get_years()) -        print(db.get_pay_by_sic_section("bonuses", None)) +        # print(db.get_years()) +        # print(db.get_pay_by_sic_section("bonuses", None)) +        print(db.get_heatmap_data("hourly", db.get_years()[0])) +        # print(db.append_counties()) @@ -78,4 +78,8 @@ if __name__ == "__main__":          host = "db"      with database.PayGapDatabase(host = host) as db: -        parse_csv(db, sys.argv[1])
\ No newline at end of file +        p = sys.argv[1] +        if os.path.basename(p) == "National_Statistics_Postcode_Lookup_UK.csv": +            db.append_counties(p) +        else: +            parse_csv(db, p)
\ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5e5339e..0dad989 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ lxml  python-dotenv  flask  requests +pandas diff --git a/static/scripts.js b/static/scripts.js index 2b1e47e..9b52215 100644 --- a/static/scripts.js +++ b/static/scripts.js @@ -11,6 +11,7 @@ function collapseTogglePress(elem, a_elem, num_hidden) {  const PLOT_FUNC_MAPPINGS = {      "years": draw_plot_years,      "sic_sec": draw_plot_sic_sections, +    "heatmap": draw_heatmap  }  $(document).ready(function() { @@ -83,7 +84,7 @@ function form_api_url(containerName, filters) {      //         }      //     }             // } -    // console.log("fetching ", url.toString()); +    console.log("fetching ", url.toString());      return url.toString();  } @@ -244,4 +245,75 @@ function draw_plot_sic_sections(containerName, filters) {              })          })      }) +} + +function draw_heatmap(containerName, filters) { +    fetch(form_api_url(containerName, filters)).then(resp => { + +        const isPreview = (containerName.substring(1, 6) === "chart"); + +        resp.json().then(data => { + +            var data2 = []; +            data.forEach(row => { +                data2.push([row[0], row[2]]); +            }); +            console.log(data2); + +            $.getJSON("/static/ukcounties.json", function(geojson) { + +                console.log(geojson); + +                Highcharts.mapChart(containerName, { +                    chart: { +                        map: geojson +                    }, +             +                    title: { +                        text: null +                    }, +             +                    accessibility: { +                        typeDescription: 'Map of the United Kingdom.' +                    }, +             +                    mapNavigation: { +                        enabled: isPreview, +                        buttonOptions: { +                            verticalAlign: 'bottom' +                        } +                    }, + +                    legend: { +                        enabled: isPreview, +                        layout: 'vertical', +                        align: 'right', +                        verticalAlign: 'middle', +                        itemMarginTop: 10, +                        itemMarginBottom: 10 +                    }, +             +                    colorAxis: { +                        stops: [ +                            [0, '#c4463a'], +                            [0.5, '#e6ffee'], +                            [0.9, '#009933'] +                        ], +                        min: -15 +                    }, +             +                    series: [{ +                        data: data2, +                        keys: ['name', 'value'], +                        joinBy: 'name', +                        name: 'Pay Gap', +                        color: 'Green', +                        tooltip: { +                            valueSuffix: '%' +                        } +                    }] +                }); +            });        +        }); +    })  }
\ No newline at end of file  | 
