Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance : Logging and Upgrade hdx python lib #235

Merged
merged 3 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
## What type of PR is this? (check all applicable)

- [ ] 🍕 Feature
- [ ] 🐛 Bug Fix
- [ ] 📝 Documentation
- [ ] 🧑‍💻 Refactor
- [ ] ✅ Test
- [ ] 🤖 Build or CI
- [ ] ❓ Other (please specify)

## Related Issue
Example :
- Resolve #123

## Describe this PR

A brief description of how this solves the issue.

## Screenshots

Please provide screenshots of the change.

## Consideration

Are there any alternatives considered / tried out during development for solution.

## Review Guide

Notes for the reviewer. How to test this change?

## Checklist before requesting a review

- 📖 Read the HOT Code of Conduct: <https://docs.hotosm.org/code-of-conduct>
- 👷‍♀️ Create small PRs. In most cases, this will be possible.
- ✅ Provide tests for your changes.
- 📝 Use descriptive commit messages.
- 📗 Update any related documentation and include any relevant screenshots.

## [optional] What gif best describes this PR or how it makes you feel?
17 changes: 11 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ geojson-pydantic==1.0.1

geojson==3.1.0

# Testing
## Testing
pytest==7.4.3
httpx==0.26.0

# # Used for new relic monitoring
## Used for new relic monitoring
newrelic==7.2.4.171
sentry-sdk==1.5.12

Expand All @@ -26,20 +26,21 @@ osm-login-python==1.0.2
humanize==4.9.0
python-slugify==8.0.1
geomet==1.1.0
#''' required for generating documentations '''

## documentation
# mkdocs-material==8.5.11
# mkdocs-jupyter==0.22.0
# neoteroi-mkdocs==0.1.2
# pdocs==1.0.1

# flower
## flower
flower==2.0.1

##duckdb
duckdb==0.9.2

##hdx
hdx-python-api==6.2.0
hdx-python-api==6.2.2

## only needed if postgres is used as celery backend
SQLAlchemy==2.0.25
Expand All @@ -48,4 +49,8 @@ SQLAlchemy==2.0.25
sozipfile==0.3.2
## zip memory optimization
zipfly==6.0.5
psutil==5.9.8
psutil==5.9.8


## logging
tqdm==4.66.2
30 changes: 23 additions & 7 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from psycopg2 import OperationalError, connect, sql
from psycopg2.extras import DictCursor
from slugify import slugify
from tqdm import tqdm

# Reader imports
from src.config import (
Expand Down Expand Up @@ -1245,16 +1246,18 @@ def __init__(self, params):
if not self.params.dataset.dataset_prefix:
self.params.dataset.dataset_prefix = dataset_prefix
if not self.params.dataset.dataset_locations:
self.params.dataset.dataset_locations = dataset_locations
self.params.dataset.dataset_locations = json.loads(dataset_locations)

self.uuid = str(uuid.uuid4().hex)
self.parallel_process_state = False

self.default_export_base_name = (
self.iso3.upper() if self.iso3 else self.params.dataset.dataset_prefix
)
self.default_export_path = os.path.join(
export_path,
self.uuid,
self.params.dataset.dataset_folder,
self.iso3.upper() if self.iso3 else self.params.dataset.dataset_prefix,
self.default_export_base_name,
)
if os.path.exists(self.default_export_path):
shutil.rmtree(self.default_export_path, ignore_errors=True)
Expand All @@ -1263,7 +1266,7 @@ def __init__(self, params):
if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True:
self.duck_db_db_path = os.path.join(
self.default_export_path,
f"{self.iso3 if self.iso3 else self.params.dataset.dataset_prefix}.db",
f"{self.default_export_base_name}.db",
)
self.duck_db_instance = DuckDB(self.duck_db_db_path)

Expand Down Expand Up @@ -1477,6 +1480,14 @@ def process_export_format(export_format):
future.result()
for future in concurrent.futures.as_completed(futures)
]
resources = [
future.result()
for future in tqdm(
concurrent.futures.as_completed(futures),
total=len(futures),
desc=f"{category_name.lower()}: Processing Export Formats",
)
]
else:
for exf in export_formats:
resource = process_export_format(exf)
Expand Down Expand Up @@ -1607,7 +1618,9 @@ def resource_to_hdx(self, uploaded_resources, dataset_config, category):
resource["uploaded_to_hdx"] = True
else:
non_hdx_resources.append(resource)
category_name, hdx_dataset_info = uploader.upload_dataset(self.params.meta)
category_name, hdx_dataset_info = uploader.upload_dataset(
self.params.meta and USE_S3_TO_UPLOAD
)
hdx_dataset_info["resources"].extend(non_hdx_resources)
return {category_name: hdx_dataset_info}

Expand Down Expand Up @@ -1687,8 +1700,11 @@ def process_custom_categories(self):
executor.submit(self.process_category, category): category
for category in self.params.categories
}

for future in concurrent.futures.as_completed(futures):
for future in tqdm(
concurrent.futures.as_completed(futures),
total=len(futures),
desc=f"{self.default_export_base_name} : Processing Categories",
):
category = futures[future]
uploaded_resources = future.result()
category_result = CategoryResult(
Expand Down
Loading