summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrej Mihajlov <and@mullvad.net>2019-04-02 20:26:52 +0200
committerAndrej Mihajlov <and@mullvad.net>2019-04-04 11:52:34 +0200
commitc6973b13cc0a235e0a306575b2ee2c6a0302742b (patch)
tree05aab9e4e81d0971ab69e03b383f1c82ee0ce519
parent5ea3c63fb2c174502bc0dfff96e655b499286474 (diff)
downloadmullvadvpn-c6973b13cc0a235e0a306575b2ee2c6a0302742b.tar.xz
mullvadvpn-c6973b13cc0a235e0a306575b2ee2c6a0302742b.zip
Filter places dataset by >=50k population
-rw-r--r--gui/geo-data/extract-geo-data.py71
-rw-r--r--gui/geo-data/requirements.txt3
2 files changed, 63 insertions, 11 deletions
diff --git a/gui/geo-data/extract-geo-data.py b/gui/geo-data/extract-geo-data.py
index 0c3d7481e5..6e575caaed 100644
--- a/gui/geo-data/extract-geo-data.py
+++ b/gui/geo-data/extract-geo-data.py
@@ -2,23 +2,26 @@
This module forms a geo json of highly populated cities in the world
"""
-import os
+from os import path, makedirs
import json
+from polib import POFile, POEntry
from subprocess import Popen, PIPE
# import order is important, see https://github.com/Toblerity/Shapely/issues/553
from shapely.geometry import shape, mapping
import fiona
-SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
-OUT_DIR = os.path.join(SCRIPT_DIR, "out")
+SCRIPT_DIR = path.dirname(path.realpath(__file__))
+OUT_DIR = path.join(SCRIPT_DIR, "out")
+
+POPULATION_MAX_FILTER = 50000
def get_shape_path(dataset_name):
- return os.path.join(SCRIPT_DIR, dataset_name, dataset_name + ".shp")
+ return path.join(SCRIPT_DIR, dataset_name, dataset_name + ".shp")
def extract_cites():
input_path = get_shape_path("ne_50m_populated_places_simple")
- output_path = os.path.join(OUT_DIR, "cities.json")
+ output_path = path.join(OUT_DIR, "cities.json")
props_to_keep = frozenset(["scalerank", "name", "latitude", "longitude"])
@@ -26,7 +29,7 @@ def extract_cites():
with fiona.collection(input_path, "r") as source:
for feat in source:
props = feat["properties"]
- if props["scalerank"] < 8:
+ if props["pop_max"] >= POPULATION_MAX_FILTER:
for k in frozenset(props) - props_to_keep:
del props[k]
features.append(feat)
@@ -44,7 +47,7 @@ def extract_cites():
def extract_countries():
input_path = get_shape_path("ne_50m_admin_0_countries")
- output_path = os.path.join(OUT_DIR, "countries.json")
+ output_path = path.join(OUT_DIR, "countries.json")
props_to_keep = frozenset(["name"])
@@ -79,7 +82,7 @@ def extract_countries():
def extract_geometry():
input_path = get_shape_path("ne_50m_admin_0_countries")
- output_path = os.path.join(OUT_DIR, "geometry.json")
+ output_path = path.join(OUT_DIR, "geometry.json")
features = []
with fiona.open(input_path) as source:
@@ -107,7 +110,7 @@ def extract_geometry():
def extract_provinces_and_states_lines():
input_path = get_shape_path("ne_50m_admin_1_states_provinces_lines")
- output_path = os.path.join(OUT_DIR, "states-provinces-lines.json")
+ output_path = path.join(OUT_DIR, "states-provinces-lines.json")
features = []
with fiona.open(input_path) as source:
@@ -132,13 +135,59 @@ def extract_provinces_and_states_lines():
else:
print "geo2topo exited with {}. {}".format(p.returncode, errors.decode('utf-8').strip())
+def extract_countries_pot():
+ input_path = get_shape_path("ne_50m_admin_0_countries")
+ input_basename = path.basename(input_path)
+ output_path = path.join(OUT_DIR, "countries.pot")
+
+ pot = POFile(encoding='UTF-8')
+
+ with fiona.open(input_path) as source:
+ for feat in source:
+ # lowercase all keys
+ props = dict((k.lower(), v) for k, v in feat["properties"].iteritems())
+
+ entry = POEntry(
+ msgid=props["name"],
+ msgstr=u"",
+ occurrences=[(input_basename, feat["id"])]
+ )
+ pot.append(entry)
+
+ pot.save(output_path)
+ print "Extracted {} countries to {}".format(len(pot), output_path)
+
+def extract_cities_pot():
+ input_path = get_shape_path("ne_50m_populated_places_simple")
+ input_basename = path.basename(input_path)
+ output_path = path.join(OUT_DIR, "cities.pot")
+
+ pot = POFile(encoding='UTF-8')
+
+ with fiona.open(input_path) as source:
+ for feat in source:
+ props = feat["properties"]
+ if props["pop_max"] >= POPULATION_MAX_FILTER:
+ entry = POEntry(
+ msgid=props["name"],
+ msgstr=u"",
+ comment=u"{} {}".format(props["adm0name"], props["adm0_a3"]),
+ occurrences=[(input_basename, feat["id"])]
+ )
+ pot.append(entry)
+
+ pot.save(output_path)
+ print "Extracted {} cities to {}".format(len(pot), output_path)
+
# ensure output path exists
-if not os.path.exists(OUT_DIR):
- os.makedirs(OUT_DIR)
+if not path.exists(OUT_DIR):
+ makedirs(OUT_DIR)
# extract all data
extract_cites()
extract_countries()
extract_geometry()
extract_provinces_and_states_lines()
+extract_countries_pot()
+extract_cities_pot()
diff --git a/gui/geo-data/requirements.txt b/gui/geo-data/requirements.txt
index 0b8f4a3025..f7cf53c2fc 100644
--- a/gui/geo-data/requirements.txt
+++ b/gui/geo-data/requirements.txt
@@ -23,3 +23,6 @@ Shapely==1.6.4.post2 \
--hash=sha256:ba58b21b9cf3c33725f7f530febff9ed6a6846f9d0bf8a120fc74683ff919f89 \
--hash=sha256:c4b87bb61fc3de59fc1f85e71a79b0c709dc68364d9584473697aad4aa13240f \
--hash=sha256:ebb4d2bee7fac3f6c891fcdafaa17f72ab9c6480f6d00de0b2dc9a5137dfe342
+polib==1.1.0 \
+ --hash=sha256:93b730477c16380c9a96726c54016822ff81acfa553977fdd131f2b90ba858d7 \
+ --hash=sha256:fad87d13696127ffb27ea0882d6182f1a9cf8a5e2b37a587751166c51e5a332a