From e4b02ab08d75c578b88f1476a0a46e994ac6c326 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Wed, 20 Mar 2024 20:40:52 +0545 Subject: [PATCH 1/3] refactor(logging): adds tqdm for better logging and upgrades hdx python lib --- requirements.txt | 17 +++++++++++------ src/app.py | 30 ++++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/requirements.txt b/requirements.txt index c3f7956c..aa30b3bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,11 +6,11 @@ geojson-pydantic==1.0.1 geojson==3.1.0 -# Testing +## Testing pytest==7.4.3 httpx==0.26.0 -# # Used for new relic monitoring +## Used for new relic monitoring newrelic==7.2.4.171 sentry-sdk==1.5.12 @@ -26,20 +26,21 @@ osm-login-python==1.0.2 humanize==4.9.0 python-slugify==8.0.1 geomet==1.1.0 -#''' required for generating documentations ''' + +## documentation # mkdocs-material==8.5.11 # mkdocs-jupyter==0.22.0 # neoteroi-mkdocs==0.1.2 # pdocs==1.0.1 -# flower +## flower flower==2.0.1 ##duckdb duckdb==0.9.2 ##hdx -hdx-python-api==6.2.0 +hdx-python-api==6.2.2 ## only needed if postgres is used as celery backend SQLAlchemy==2.0.25 @@ -48,4 +49,8 @@ SQLAlchemy==2.0.25 sozipfile==0.3.2 ## zip memory optimization zipfly==6.0.5 -psutil==5.9.8 \ No newline at end of file +psutil==5.9.8 + + +## logging +tqdm==4.66.2 \ No newline at end of file diff --git a/src/app.py b/src/app.py index e070b5da..9424b04b 100644 --- a/src/app.py +++ b/src/app.py @@ -45,6 +45,7 @@ from psycopg2 import OperationalError, connect, sql from psycopg2.extras import DictCursor from slugify import slugify +from tqdm import tqdm # Reader imports from src.config import ( @@ -1245,16 +1246,18 @@ def __init__(self, params): if not self.params.dataset.dataset_prefix: self.params.dataset.dataset_prefix = dataset_prefix if not self.params.dataset.dataset_locations: - self.params.dataset.dataset_locations = dataset_locations + self.params.dataset.dataset_locations = json.loads(dataset_locations) self.uuid = str(uuid.uuid4().hex) self.parallel_process_state = False - + self.default_export_base_name = ( + self.iso3.upper() if self.iso3 else self.params.dataset.dataset_prefix + ) self.default_export_path = os.path.join( export_path, self.uuid, self.params.dataset.dataset_folder, - self.iso3.upper() if self.iso3 else self.params.dataset.dataset_prefix, + self.default_export_base_name, ) if os.path.exists(self.default_export_path): shutil.rmtree(self.default_export_path, ignore_errors=True) @@ -1263,7 +1266,7 @@ def __init__(self, params): if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True: self.duck_db_db_path = os.path.join( self.default_export_path, - f"{self.iso3 if self.iso3 else self.params.dataset.dataset_prefix}.db", + f"{self.default_export_base_name}.db", ) self.duck_db_instance = DuckDB(self.duck_db_db_path) @@ -1477,6 +1480,14 @@ def process_export_format(export_format): future.result() for future in concurrent.futures.as_completed(futures) ] + resources = [ + future.result() + for future in tqdm( + concurrent.futures.as_completed(futures), + total=len(futures), + desc=f"{category_name.lower()}: Processing Export Formats", + ) + ] else: for exf in export_formats: resource = process_export_format(exf) @@ -1493,7 +1504,7 @@ def process_category_result(self, category_result): Returns: - Dictionary containing processed category result. """ - if self.params.hdx_upload and ENABLE_HDX_EXPORTS: + if self.params.hdx_upload and ENABLE_HDX_EXPORTS : return self.resource_to_hdx( uploaded_resources=category_result.uploaded_resources, dataset_config=self.params.dataset, @@ -1607,7 +1618,7 @@ def resource_to_hdx(self, uploaded_resources, dataset_config, category): resource["uploaded_to_hdx"] = True else: non_hdx_resources.append(resource) - category_name, hdx_dataset_info = uploader.upload_dataset(self.params.meta) + category_name, hdx_dataset_info = uploader.upload_dataset(self.params.meta and USE_S3_TO_UPLOAD) hdx_dataset_info["resources"].extend(non_hdx_resources) return {category_name: hdx_dataset_info} @@ -1687,8 +1698,11 @@ def process_custom_categories(self): executor.submit(self.process_category, category): category for category in self.params.categories } - - for future in concurrent.futures.as_completed(futures): + for future in tqdm( + concurrent.futures.as_completed(futures), + total=len(futures), + desc=f"{self.default_export_base_name} : Processing Categories", + ): category = futures[future] uploaded_resources = future.result() category_result = CategoryResult( From 75bf742c54d3704118f89f35024651cbac784f07 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Wed, 20 Mar 2024 20:44:01 +0545 Subject: [PATCH 2/3] style(formatting): format app.py --- src/app.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/app.py b/src/app.py index 9424b04b..e69d3523 100644 --- a/src/app.py +++ b/src/app.py @@ -1504,7 +1504,7 @@ def process_category_result(self, category_result): Returns: - Dictionary containing processed category result. """ - if self.params.hdx_upload and ENABLE_HDX_EXPORTS : + if self.params.hdx_upload and ENABLE_HDX_EXPORTS: return self.resource_to_hdx( uploaded_resources=category_result.uploaded_resources, dataset_config=self.params.dataset, @@ -1618,7 +1618,9 @@ def resource_to_hdx(self, uploaded_resources, dataset_config, category): resource["uploaded_to_hdx"] = True else: non_hdx_resources.append(resource) - category_name, hdx_dataset_info = uploader.upload_dataset(self.params.meta and USE_S3_TO_UPLOAD) + category_name, hdx_dataset_info = uploader.upload_dataset( + self.params.meta and USE_S3_TO_UPLOAD + ) hdx_dataset_info["resources"].extend(non_hdx_resources) return {category_name: hdx_dataset_info} From 1198e6ece91de8b7386aa71759356d10bdd72b4f Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Wed, 20 Mar 2024 20:51:23 +0545 Subject: [PATCH 3/3] ci(pr-template): adds new pr template --- .github/pull_request_template.md | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..295d4e06 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,39 @@ +## What type of PR is this? (check all applicable) + +- [ ] 🍕 Feature +- [ ] 🐛 Bug Fix +- [ ] 📝 Documentation +- [ ] 🧑‍💻 Refactor +- [ ] ✅ Test +- [ ] 🤖 Build or CI +- [ ] ❓ Other (please specify) + +## Related Issue +Example : +- Resolve #123 + +## Describe this PR + +A brief description of how this solves the issue. + +## Screenshots + +Please provide screenshots of the change. + +## Consideration + +Are there any alternatives considered / tried out during development for solution. + +## Review Guide + +Notes for the reviewer. How to test this change? + +## Checklist before requesting a review + +- 📖 Read the HOT Code of Conduct: +- 👷‍♀️ Create small PRs. In most cases, this will be possible. +- ✅ Provide tests for your changes. +- 📝 Use descriptive commit messages. +- 📗 Update any related documentation and include any relevant screenshots. + +## [optional] What gif best describes this PR or how it makes you feel?