Skip to content

Commit

Permalink
Merge branch 'main' into update-to-ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
rth authored Apr 12, 2024
2 parents 7f6a948 + 8da3a87 commit 520f786
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 9 deletions.
5 changes: 3 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ repos:


- repo: https://github.com/psf/black
rev: 22.12.0
rev: 23.3.0
hooks:
- id: black

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.991
rev: v1.1.1
hooks:
- id: mypy
files: ".+.py"
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Release notes

## Version 0.4.1

*September 7, 2022*

- NA county_code is valid, not Nan
[#74](https://github.com/symerio/pgeocode/pull/74)

## Version 0.4.0

*December 13, 2022*
Expand Down
40 changes: 34 additions & 6 deletions pgeocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import numpy as np
import pandas as pd

__version__ = "0.4.0"
__version__ = "0.4.1"

STORAGE_DIR = os.environ.get(
"PGEOCODE_DATA_DIR",
Expand Down Expand Up @@ -141,6 +141,27 @@
"ZA",
]

NA_VALUES = [
"",
"#N/A",
"#N/A N/A",
"#NA",
"-1.#IND",
"-1.#QNAN",
"-NaN",
"-nan",
"1.#IND",
"1.#QNAN",
"<NA>",
"N/A",
# "NA", # NA is a valid county code for Naples, Italy
"NULL",
"NaN",
"n/a",
"nan",
"null",
]


@contextlib.contextmanager
def _open_extract_url(url: str, country: str) -> Any:
Expand Down Expand Up @@ -202,7 +223,6 @@ class Nominatim:
"""

def __init__(self, country: str = "fr", unique: bool = True):

country = country.upper()
if country not in COUNTRIES_VALID:
raise ValueError(
Expand Down Expand Up @@ -232,7 +252,12 @@ def _get_data(country: str) -> Tuple[str, pd.DataFrame]:

data_path = os.path.join(STORAGE_DIR, country.upper() + ".txt")
if os.path.exists(data_path):
data = pd.read_csv(data_path, dtype={"postal_code": str})
data = pd.read_csv(
data_path,
dtype={"postal_code": str},
na_values=NA_VALUES,
keep_default_na=False,
)
else:
download_urls = [
val.format(country=country) for val in DOWNLOAD_URL
Expand All @@ -244,10 +269,11 @@ def _get_data(country: str) -> Tuple[str, pd.DataFrame]:
header=None,
names=DATA_FIELDS,
dtype={"postal_code": str},
na_values=NA_VALUES,
keep_default_na=False,
)
os.makedirs(STORAGE_DIR, exist_ok=True)
data.to_csv(data_path, index=None)

return data_path, data

def _index_postal_codes(self) -> pd.DataFrame:
Expand All @@ -256,10 +282,12 @@ def _index_postal_codes(self) -> pd.DataFrame:

if os.path.exists(data_path_unique):
data_unique = pd.read_csv(
data_path_unique, dtype={"postal_code": str}
data_path_unique,
dtype={"postal_code": str},
na_values=NA_VALUES,
keep_default_na=False,
)
else:

# group together places with the same postal code
df_unique_cp_group = self._data.groupby("postal_code")
data_unique = df_unique_cp_group[["latitude", "longitude"]].mean()
Expand Down
9 changes: 8 additions & 1 deletion test_pgeocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ def test_nominatim_all_countries(country):


def test_nominatim_distance_postal_code():

gdist = GeoDistance("fr")

dist = gdist.query_postal_code("91120", "91120")
Expand Down Expand Up @@ -279,6 +278,14 @@ def test_query_location_exact():
assert res["state_name"].unique().tolist() == ["Île-de-France"]


def test_location_naples():
# https://github.com/symerio/pgeocode/issues/73
nomi = Nominatim("it")
res = nomi.query_location("Napoli")
assert res["county_name"].unique().tolist() == ["Napoli"]
assert res["county_code"].unique().tolist() == ["NA"]


def test_query_location_fuzzy():
pytest.importorskip("thefuzz")
nomi = Nominatim("fr")
Expand Down

0 comments on commit 520f786

Please sign in to comment.