From a2a8cdb710f737e1b4ca79adf46a68e599c61ff9 Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Mon, 15 May 2023 16:51:10 +0100 Subject: [PATCH 01/14] feat: added get_dataset_params method --- src/osdatahub/PlacesAPI/places_api.py | 30 ++++++++++++++++++++++++--- src/osdatahub/__init__.py | 2 +- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/osdatahub/PlacesAPI/places_api.py b/src/osdatahub/PlacesAPI/places_api.py index 32b451e..f23c138 100644 --- a/src/osdatahub/PlacesAPI/places_api.py +++ b/src/osdatahub/PlacesAPI/places_api.py @@ -9,6 +9,7 @@ from osdatahub import Extent from osdatahub.grow_list import GrowList from osdatahub.utils import addresses_to_geojson, validate_in_range +from osdatahub.codes import DATASET class PlacesAPI: @@ -36,14 +37,29 @@ def __init__(self, key: str): def __endpoint(self, api_name: str) -> str: return self.__ENDPOINT + api_name + f"?key={self.key}" + + @staticmethod + def __get_dataset_param(self, dataset: Union[str, Iterable] ) -> str: + if not isinstance(dataset, str): + dataset_unique = set(dataset) + shared_datasets = dataset_unique & DATASET + + if len(shared_datasets) == len(dataset_unique): + return ",".join(dataset_unique) + + elif dataset in DATASET: + return dataset + + raise ValueError(f"Unrecognised dataset, expected 'LPI', 'DPA' or ['LPI', 'DPA'], got {dataset}") def query( self, extent: Extent, output_crs: str = None, limit: int = 100, - classification_code: Union[str, Iterable] = None, - logical_status_code: Union[str, int] = None, + classification_code: Union[str, Iterable, None] = None, + logical_status_code: Union[str, int, None] = None, + dataset: Union[str, Iterable, None] = None ) -> FeatureCollection: """Run a query of the OS Places API within a given extent @@ -72,6 +88,10 @@ def query( params["params"].update( {"fq": self.__format_fq(classification_code, logical_status_code)} ) + + if dataset is not None: + + try: n_required = min(limit, 100) @@ -92,7 +112,8 @@ def find( classification_code: Union[str, Iterable] = None, logical_status_code: Union[str, int] = None, minmatch: float = None, - matchprecision: int = None + matchprecision: int = None, + dataset: Union[str, Iterable, None] = None ) -> FeatureCollection: """A free text query of the OS Places API @@ -139,6 +160,7 @@ def postcode( limit: int = 100, classification_code: Union[str, Iterable] = None, logical_status_code: Union[str, int] = None, + dataset: Union[str, Iterable, None] = None ) -> FeatureCollection: """A query based on a property’s postcode. The minimum for the resource is the area and district @@ -182,6 +204,7 @@ def uprn( output_crs: str = "EPSG:27700", classification_code: Union[str, Iterable] = None, logical_status_code: Union[str, int] = None, + dataset: Union[str, Iterable, None] = None ) -> FeatureCollection: """A query that takes a UPRN as the search parameter @@ -217,6 +240,7 @@ def nearest( output_crs: str = "EPSG:27700", classification_code: Union[str, Iterable] = None, logical_status_code: Union[str, int] = None, + dataset: Union[str, Iterable, None] = None ) -> FeatureCollection: """Takes a pair of coordinates (X, Y)/(Lon, Lat) as an input to determine the closest address. diff --git a/src/osdatahub/__init__.py b/src/osdatahub/__init__.py index e4526d2..534789c 100644 --- a/src/osdatahub/__init__.py +++ b/src/osdatahub/__init__.py @@ -18,4 +18,4 @@ def get_proxies(): from osdatahub.LinkedIdentifiersAPI import LinkedIdentifiersAPI from osdatahub.DownloadsAPI import OpenDataDownload, DataPackageDownload from osdatahub.NGD import NGD -from osdatahub.requests_wrapper import get \ No newline at end of file +from osdatahub.requests_wrapper import get, post \ No newline at end of file From 2fb9aafb3995cded78d745796feac08d2831bb8a Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Mon, 15 May 2023 16:53:34 +0100 Subject: [PATCH 02/14] docs: added TODO to places --- src/osdatahub/PlacesAPI/places_api.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/osdatahub/PlacesAPI/places_api.py b/src/osdatahub/PlacesAPI/places_api.py index f23c138..914fd34 100644 --- a/src/osdatahub/PlacesAPI/places_api.py +++ b/src/osdatahub/PlacesAPI/places_api.py @@ -12,6 +12,14 @@ from osdatahub.codes import DATASET +""" +TODO: +- Add dataset to parameters +- Add test for dataset +- Remove "DPA" key from output +- Check for DPA in url +""" + class PlacesAPI: """Main class for querying the OS Places API (https://osdatahub.os.uk/docs/places/overview) From eb7f4c95250a8456c2e03f6d308a8b02ee43620e Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Tue, 16 May 2023 15:39:35 +0100 Subject: [PATCH 03/14] feat: added dataset type specification --- src/osdatahub/PlacesAPI/places_api.py | 46 ++++++++++++++++++--------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/src/osdatahub/PlacesAPI/places_api.py b/src/osdatahub/PlacesAPI/places_api.py index 914fd34..44f44cd 100644 --- a/src/osdatahub/PlacesAPI/places_api.py +++ b/src/osdatahub/PlacesAPI/places_api.py @@ -11,15 +11,6 @@ from osdatahub.utils import addresses_to_geojson, validate_in_range from osdatahub.codes import DATASET - -""" -TODO: -- Add dataset to parameters -- Add test for dataset -- Remove "DPA" key from output -- Check for DPA in url -""" - class PlacesAPI: """Main class for querying the OS Places API (https://osdatahub.os.uk/docs/places/overview) @@ -45,9 +36,9 @@ def __init__(self, key: str): def __endpoint(self, api_name: str) -> str: return self.__ENDPOINT + api_name + f"?key={self.key}" - + @staticmethod - def __get_dataset_param(self, dataset: Union[str, Iterable] ) -> str: + def __get_dataset_param(dataset: Union[str, Iterable] ) -> str: if not isinstance(dataset, str): dataset_unique = set(dataset) shared_datasets = dataset_unique & DATASET @@ -78,6 +69,7 @@ def query( Defaults to 100. classification_code (str|Iterable[str], optional): Classification codes to filter query by logical_status_code (str|int, optional): logical status codes to filter query by + dataset (str|Iterable, optional): The dataset to return. Multiple values can be sent, separated by a comma. Default: DPA. Returns: FeatureCollection: The results of the query in GeoJSON format @@ -96,10 +88,11 @@ def query( params["params"].update( {"fq": self.__format_fq(classification_code, logical_status_code)} ) - + if dataset is not None: - - + params["params"].update( + {"dataset": self.__get_dataset_param(dataset)} + ) try: n_required = min(limit, 100) @@ -134,6 +127,7 @@ def find( logical_status_code (str|int, optional): logical status codes to filter query by minmatch (float, optional): The minimum match score a result has to have to be returned matchprecision (int, optional): The decimal point position at which the match score value is to be truncated + dataset (str|Iterable, optional): The dataset to return. Multiple values can be sent, separated by a comma. Default: DPA. Returns: FeatureCollection: The results of the query in GeoJSON format @@ -149,6 +143,11 @@ def find( params.update( {"fq": self.__format_fq(classification_code, logical_status_code)} ) + + if dataset is not None: + params.update( + {"dataset": self.__get_dataset_param(dataset)} + ) try: n_required = min(limit, 100) @@ -184,6 +183,7 @@ def postcode( Defaults to 100. classification_code (str|Iterable[str], optional): Classification codes to filter query by logical_status_code (str|int, optional): logical status codes to filter query by + dataset (str|Iterable, optional): The dataset to return. Multiple values can be sent, separated by a comma. Default: DPA. Returns: FeatureCollection: The results of the query in GeoJSON format @@ -195,6 +195,11 @@ def postcode( params.update( {"fq": self.__format_fq(classification_code, logical_status_code)} ) + if dataset is not None: + params.update( + {"dataset": self.__get_dataset_param(dataset)} + ) + try: n_required = min(limit, 100) while n_required > 0 and data.grown: @@ -222,6 +227,7 @@ def uprn( Defaults to "EPSG:27700". classification_code (str|Iterable[str], optional): Classification codes to filter query by logical_status_code (str|int, optional): logical status codes to filter query by + dataset (str|Iterable, optional): The dataset to return. Multiple values can be sent, separated by a comma. Default: DPA. Returns: FeatureCollection: The results of the query in GeoJSON format @@ -233,6 +239,10 @@ def uprn( params.update( {"fq": self.__format_fq(classification_code, logical_status_code)} ) + if dataset is not None: + params.update( + {"dataset": self.__get_dataset_param(dataset)} + ) try: response = osdatahub.get(self.__endpoint("uprn"), params=params, proxies=osdatahub.get_proxies()) data.extend(self.__format_response(response)) @@ -262,6 +272,7 @@ def nearest( Defaults to "EPSG:27700". classification_code (str|Iterable[str], optional): Classification codes to filter query by logical_status_code (str|int, optional): logical status codes to filter query by + dataset (str|Iterable, optional): The dataset to return. Multiple values can be sent, separated by a comma. Default: DPA. Returns: FeatureCollection: The results of the query in GeoJSON format @@ -279,6 +290,10 @@ def nearest( params.update( {"fq": self.__format_fq(classification_code, logical_status_code)} ) + if dataset is not None: + params.update( + {"dataset": self.__get_dataset_param(dataset)} + ) try: response = osdatahub.get(self.__endpoint("nearest"), params=params, proxies=osdatahub.get_proxies()) data.extend(self.__format_response(response)) @@ -288,7 +303,8 @@ def nearest( @staticmethod def __format_response(response: requests.Response) -> list: - return [result["DPA"] for result in response.json()["results"]] + results = response.json()["results"] + return [result[list(result.keys())[0]] for result in results] @staticmethod def __format_fq( From a92e3b10858b05eb20a85072687a9101d548507f Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Wed, 17 May 2023 08:59:31 +0100 Subject: [PATCH 04/14] chore: update requirements and bump version --- requirements.txt | 10 +++++----- setup.cfg | 2 +- src/osdatahub/__init__.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index 11aca18..a976cff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -geojson~=2.5.0 +geojson~=3.0.1 requests~=2.25.0 -typeguard~=2.13.0 -shapely~=1.8.0 -tqdm~=4.62.3 -setuptools>=65.5.1 \ No newline at end of file +typeguard~=4.0.0 +shapely~=2.0.0 +tqdm~=4.65.0 +setuptools>=67.7.2 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 340f842..d441337 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = osdatahub -version = 1.2.4 +version = 1.2.5 author = OS Rapid Prototyping author_email = rapidprototyping@os.uk classifiers = diff --git a/src/osdatahub/__init__.py b/src/osdatahub/__init__.py index 534789c..aeb1109 100644 --- a/src/osdatahub/__init__.py +++ b/src/osdatahub/__init__.py @@ -9,7 +9,7 @@ def set_proxies(proxies): def get_proxies(): return json.loads(os.environ["_OSDATAHUB_PROXIES"]) -__version__ = "1.2.4" +__version__ = "1.2.5" from osdatahub.extent import Extent from osdatahub.FeaturesAPI import FeaturesAPI From 77d12472495c64399181a4e607dd1e2339dcedd3 Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Wed, 17 May 2023 09:03:17 +0100 Subject: [PATCH 05/14] docs: update CHANGELOG --- CHANGELOG.md | 10 ++++++++++ requirements.txt | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9563ba8..9c14c55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## [1.2.5] - 2023/05/17 + +### Fixed + +- Import error for osdatahub.post in PlacesAPI [FHunt-OS] [JEPooley] + +### Features + +- Added support for the dataset parameter within the PlacesAPI wrapper [FHunt-OS] [JEPooley] + ## [1.2.4] - 2023/04/20 ### Fixed diff --git a/requirements.txt b/requirements.txt index a976cff..f3934d8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ requests~=2.25.0 typeguard~=4.0.0 shapely~=2.0.0 tqdm~=4.65.0 -setuptools>=67.7.2 \ No newline at end of file +setuptools~=67.7.2 \ No newline at end of file From ccb21694371c336fd3d1c69d6e72be474810027f Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Wed, 17 May 2023 09:31:19 +0100 Subject: [PATCH 06/14] chore: revert typeguard back to 2.13.0 --- requirements.txt | 2 +- setup.cfg | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index f3934d8..cc7b3b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ geojson~=3.0.1 requests~=2.25.0 -typeguard~=4.0.0 +typeguard~=2.13.0 shapely~=2.0.0 tqdm~=4.65.0 setuptools~=67.7.2 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index d441337..374dd15 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,11 +33,12 @@ url = https://github.com/OrdnanceSurvey/osdatahub [options] include_package_data = True install_requires = - geojson~=2.5.0 + geojson~=3.0.1 requests~=2.25.0 typeguard~=2.13.0 - shapely~=1.8.0 - tqdm~=4.62.3 + shapely~=2.0.0 + tqdm~=4.65.0 + setuptools~=67.7.2 python_requires = >=3.7 package_dir= =src From 1eb70ea1ca315c5845c338fb32583114336d209c Mon Sep 17 00:00:00 2001 From: Josh <43497823+JEPooley@users.noreply.github.com> Date: Wed, 17 May 2023 09:51:41 +0100 Subject: [PATCH 07/14] Update index.rst --- docs/index.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index ccf851a..2002a56 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,8 +1,3 @@ -.. osdatahub documentation master file, created by -sphinx-quickstart on Tue Nov 2 16:09:27 2021. -You can adapt this file completely to your liking, but it should at least -contain the root `toctree` directive. - Welcome to ``osdatahub's`` documentation! ========================================== From b74c7a973beacb342566a02c90b3b6993be096f7 Mon Sep 17 00:00:00 2001 From: gwionap <32736223+gwionap@users.noreply.github.com> Date: Tue, 13 Jun 2023 13:39:27 +0100 Subject: [PATCH 08/14] Update requirements.txt Updating requests version requirments. --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index cc7b3b7..9db1b48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ geojson~=3.0.1 -requests~=2.25.0 +requests~=2.31.0 typeguard~=2.13.0 shapely~=2.0.0 tqdm~=4.65.0 -setuptools~=67.7.2 \ No newline at end of file +setuptools~=67.7.2 From b99c657ab11daae6ed44209f137c9c41784ce7d5 Mon Sep 17 00:00:00 2001 From: gwionap <32736223+gwionap@users.noreply.github.com> Date: Tue, 13 Jun 2023 13:40:58 +0100 Subject: [PATCH 09/14] Update setup.cfg Updated requests in setup requirements --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 374dd15..c8cb6e1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,7 +34,7 @@ url = https://github.com/OrdnanceSurvey/osdatahub include_package_data = True install_requires = geojson~=3.0.1 - requests~=2.25.0 + requests~=2.31.0 typeguard~=2.13.0 shapely~=2.0.0 tqdm~=4.65.0 From 6d6c491d2c2df9b7d57914f4b8ca59ada7cfaa42 Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Wed, 28 Jun 2023 14:37:54 +0100 Subject: [PATCH 10/14] feat: add check for response chunk size in download stream --- src/osdatahub/DownloadsAPI/downloads_api.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/osdatahub/DownloadsAPI/downloads_api.py b/src/osdatahub/DownloadsAPI/downloads_api.py index a2099b2..e203d16 100644 --- a/src/osdatahub/DownloadsAPI/downloads_api.py +++ b/src/osdatahub/DownloadsAPI/downloads_api.py @@ -45,18 +45,25 @@ def download(self, output_dir: Union[str, Path], overwrite: bool = False, pbar: response = requests.get(self.url, stream=True, proxies=osdatahub.get_proxies()) response.raise_for_status() - size = int(response.headers.get('content-length')) - chunk_size = 1024 + expected_size = int(response.headers.get('content-length')) + current_size = 0 + chunk_size = 1048576 # 1024 ** 2 -> 1MB if response.status_code == 200: with open(output_path, 'wb') as f: if not pbar: - pbar = tqdm(total=size, desc=self.file_name, unit="B", unit_scale=True, leave=True) + pbar = tqdm(total=expected_size, desc=self.file_name, unit="B", unit_scale=True, leave=True) for chunk in response.iter_content(chunk_size=chunk_size): + current_size += len(chunk) f.write(chunk) f.flush() pbar.update(chunk_size) - # pbar.write(f"Finished downloading {self.file_name} to {output_path}") + if expected_size != current_size: + deficit = expected_size - current_size + raise IOError( + f'incomplete read ({current_size} bytes read, {deficit} more expected)' + ) + pbar.write(f"Finished downloading {self.file_name} to {output_path}") return output_path From b5a9aff331c81ee9b632d11fa75589e709209e09 Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Wed, 28 Jun 2023 14:45:39 +0100 Subject: [PATCH 11/14] chore: update CHANGELOG and version number --- CHANGELOG.md | 5 +++++ setup.cfg | 2 +- src/osdatahub/__init__.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c14c55..960e80c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## [1.2.6] - 2023/06/28 + +### Features +- Added check for chunk size when straming data. Program should error if file download is incomplete + ## [1.2.5] - 2023/05/17 ### Fixed diff --git a/setup.cfg b/setup.cfg index c8cb6e1..c7bf17a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = osdatahub -version = 1.2.5 +version = 1.2.6 author = OS Rapid Prototyping author_email = rapidprototyping@os.uk classifiers = diff --git a/src/osdatahub/__init__.py b/src/osdatahub/__init__.py index aeb1109..6cf5ccf 100644 --- a/src/osdatahub/__init__.py +++ b/src/osdatahub/__init__.py @@ -9,7 +9,7 @@ def set_proxies(proxies): def get_proxies(): return json.loads(os.environ["_OSDATAHUB_PROXIES"]) -__version__ = "1.2.5" +__version__ = "1.2.6" from osdatahub.extent import Extent from osdatahub.FeaturesAPI import FeaturesAPI From ed70a977b3a482059c984c5c784efd4b12674cf7 Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Wed, 28 Jun 2023 14:46:07 +0100 Subject: [PATCH 12/14] chore: update CHANGELOG and version number --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 960e80c..54cef88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## [1.2.6] - 2023/06/28 ### Features -- Added check for chunk size when straming data. Program should error if file download is incomplete +- Added check for chunk size when straming data. Program should error if file download is incomplete [JEPooley] ## [1.2.5] - 2023/05/17 From aecde6eb8aae800200ed3896db4a225b4af3a3ea Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Wed, 28 Jun 2023 14:46:51 +0100 Subject: [PATCH 13/14] chore: update CHANGELOG and version number --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54cef88..5356e83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## [1.2.6] - 2023/06/28 ### Features -- Added check for chunk size when straming data. Program should error if file download is incomplete [JEPooley] +- Added check for chunk size when streaming data. Program should error if file download is incomplete [JEPooley] ## [1.2.5] - 2023/05/17 From 2e173b1680f6e3f6b4506f94ea30af15a35b2dbe Mon Sep 17 00:00:00 2001 From: Josh Pooley Date: Wed, 28 Jun 2023 15:11:55 +0100 Subject: [PATCH 14/14] chore: update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5356e83..5c3d13f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ ### Features - Added check for chunk size when streaming data. Program should error if file download is incomplete [JEPooley] + +### Changed +- Upgrade requests version in dependencies to 2.31.0 [gwionap] + ## [1.2.5] - 2023/05/17 ### Fixed