From 3e3235b24d6560c0599faa19d513c90fbaa661d2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Apr 2023 18:48:49 +0200 Subject: [PATCH 1/4] [pre-commit.ci] pre-commit autoupdate (#68) --- .pre-commit-config.yaml | 8 ++++---- pgeocode.py | 2 -- test_pgeocode.py | 1 - 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 36c148a..6e146bf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 + rev: v4.4.0 hooks: - id: check-yaml - id: end-of-file-fixer @@ -8,7 +8,7 @@ repos: - repo: https://github.com/psf/black - rev: 22.12.0 + rev: 23.3.0 hooks: - id: black @@ -19,7 +19,7 @@ repos: args: ["--py38-plus"] - repo: https://github.com/hadialqattan/pycln - rev: "v2.1.2" + rev: "v2.1.3" hooks: - id: pycln args: [--config=pyproject.toml] @@ -27,7 +27,7 @@ repos: - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.991 + rev: v1.1.1 hooks: - id: mypy files: ".+.py" diff --git a/pgeocode.py b/pgeocode.py index 2670580..69f0911 100644 --- a/pgeocode.py +++ b/pgeocode.py @@ -202,7 +202,6 @@ class Nominatim: """ def __init__(self, country: str = "fr", unique: bool = True): - country = country.upper() if country not in COUNTRIES_VALID: raise ValueError( @@ -259,7 +258,6 @@ def _index_postal_codes(self) -> pd.DataFrame: data_path_unique, dtype={"postal_code": str} ) else: - # group together places with the same postal code df_unique_cp_group = self._data.groupby("postal_code") data_unique = df_unique_cp_group[["latitude", "longitude"]].mean() diff --git a/test_pgeocode.py b/test_pgeocode.py index 72f5003..7ba37bb 100644 --- a/test_pgeocode.py +++ b/test_pgeocode.py @@ -133,7 +133,6 @@ def test_nominatim_all_countries(country): def test_nominatim_distance_postal_code(): - gdist = GeoDistance("fr") dist = gdist.query_postal_code("91120", "91120") From 143ee338423921cc0232cc45f3eed9fdc118f081 Mon Sep 17 00:00:00 2001 From: Alessandro Lorenzi Date: Sun, 7 May 2023 09:32:04 +0200 Subject: [PATCH 2/4] fix: NA county_code is valid, not Nan (#74) refs: https://github.com/symerio/pgeocode/issues/73 --- pgeocode.py | 36 +++++++++++++++++++++++++++++++++--- test_pgeocode.py | 8 ++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/pgeocode.py b/pgeocode.py index 69f0911..85e3427 100644 --- a/pgeocode.py +++ b/pgeocode.py @@ -141,6 +141,27 @@ "ZA", ] +NA_VALUES = [ + "", + "#N/A", + "#N/A N/A", + "#NA", + "-1.#IND", + "-1.#QNAN", + "-NaN", + "-nan", + "1.#IND", + "1.#QNAN", + "", + "N/A", + # "NA", # NA is a valid county code for Naples, Italy + "NULL", + "NaN", + "n/a", + "nan", + "null", +] + @contextlib.contextmanager def _open_extract_url(url: str, country: str) -> Any: @@ -231,7 +252,12 @@ def _get_data(country: str) -> Tuple[str, pd.DataFrame]: data_path = os.path.join(STORAGE_DIR, country.upper() + ".txt") if os.path.exists(data_path): - data = pd.read_csv(data_path, dtype={"postal_code": str}) + data = pd.read_csv( + data_path, + dtype={"postal_code": str}, + na_values=NA_VALUES, + keep_default_na=False, + ) else: download_urls = [ val.format(country=country) for val in DOWNLOAD_URL @@ -243,10 +269,11 @@ def _get_data(country: str) -> Tuple[str, pd.DataFrame]: header=None, names=DATA_FIELDS, dtype={"postal_code": str}, + na_values=NA_VALUES, + keep_default_na=False, ) os.makedirs(STORAGE_DIR, exist_ok=True) data.to_csv(data_path, index=None) - return data_path, data def _index_postal_codes(self) -> pd.DataFrame: @@ -255,7 +282,10 @@ def _index_postal_codes(self) -> pd.DataFrame: if os.path.exists(data_path_unique): data_unique = pd.read_csv( - data_path_unique, dtype={"postal_code": str} + data_path_unique, + dtype={"postal_code": str}, + na_values=NA_VALUES, + keep_default_na=False, ) else: # group together places with the same postal code diff --git a/test_pgeocode.py b/test_pgeocode.py index 7ba37bb..3fa3c12 100644 --- a/test_pgeocode.py +++ b/test_pgeocode.py @@ -278,6 +278,14 @@ def test_query_location_exact(): assert res["state_name"].unique().tolist() == ["Île-de-France"] +def test_location_naples(): + # https://github.com/symerio/pgeocode/issues/73 + nomi = Nominatim("it") + res = nomi.query_location("Napoli") + assert res["county_name"].unique().tolist() == ["Napoli"] + assert res["county_code"].unique().tolist() == ["NA"] + + def test_query_location_fuzzy(): pytest.importorskip("thefuzz") nomi = Nominatim("fr") From e49388ef814701ecf974a87594dbfd1e180240ec Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Thu, 7 Sep 2023 20:10:10 +0200 Subject: [PATCH 3/4] Version 0.4.1 (#77) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99f9ce7..b81a9b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Release notes +## Version 0.4.1 + +*September 7, 2022* + + - NA county_code is valid, not Nan + [#74](https://github.com/symerio/pgeocode/pull/74) + ## Version 0.4.0 *December 13, 2022* From 8da3a878ddb6b3675b639ac216a40e05e3202b6c Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Thu, 7 Sep 2023 20:15:20 +0200 Subject: [PATCH 4/4] Fix version in the source code --- pgeocode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgeocode.py b/pgeocode.py index 85e3427..342d411 100644 --- a/pgeocode.py +++ b/pgeocode.py @@ -13,7 +13,7 @@ import numpy as np import pandas as pd -__version__ = "0.4.0" +__version__ = "0.4.1" STORAGE_DIR = os.environ.get( "PGEOCODE_DATA_DIR",