From 88b79cef1f65ab86a32733f4e4742970333e8f8a Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Tue, 20 Feb 2024 09:30:15 +0000 Subject: [PATCH 1/8] docs: symlink backend install docs for mkdocs --- docs/mkdocs.yml | 1 + docs/src/installation/backend.md | 1 + 2 files changed, 2 insertions(+) create mode 120000 docs/src/installation/backend.md diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 3e018f8f..c3078185 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -19,6 +19,7 @@ extra: nav: - Raw Data API : "index.md" - Installation: + - Backend: "installation/backend.md" - Docker Installation: "installation/docker.md" - Configurations: "installation/configurations.md" # - User Guide: 'user_guide/index.md' diff --git a/docs/src/installation/backend.md b/docs/src/installation/backend.md new file mode 120000 index 00000000..dca5deb1 --- /dev/null +++ b/docs/src/installation/backend.md @@ -0,0 +1 @@ +../../../backend/Readme.md \ No newline at end of file From 95f251aeacdb4d01a01fb3ee35bbe9cd8e7fe2de Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Tue, 20 Feb 2024 09:30:29 +0000 Subject: [PATCH 2/8] docs: update readme links to install docs --- README.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 64f498bc..d04214d9 100644 --- a/README.md +++ b/README.md @@ -32,11 +32,16 @@ ## Installation -#### To setup Backend Follow [Backend_Installation](./backend/Readme.md) +Raw Data API consists of two elements: + +- A **backend** database, tools, and scripts: used to import OSM data into a specific database structure and keep it updated. +- An **API** that is used to serve data from the backend database. + +#### To setup the backend see [Backend Installation](./installation/backend) Raw Data API can be installed through `docker` or locally on your computer. -- To install with docker see [docker installation](./docs/src/installation/docker.md). +- To install with docker see [docker installation](./installation/docker). - To install locally, continue below. NOTE: The installation guide below is only tested to work on Ubuntu, we recommend using docker for other operating systems. @@ -90,7 +95,7 @@ pip install -r requirements.txt ### Additional required configurations for Raw Data API -Setup the necessary configurations for Raw Data API from [configurations](./docs/src/installation/configurations.md). +Setup the necessary configurations for Raw Data API from [configurations](./installation/configurations). Setup config.txt in project root. @@ -202,7 +207,7 @@ py.test -k test function name ## Contribution & Development -Learn about current priorities and work going through Roadmap & see here [CONTRIBUTING](./docs/src/contributing.md) +Learn about current priorities and work going through Roadmap & see here [CONTRIBUTING](./contributing) ## Roadmap https://github.com/orgs/hotosm/projects/29 From d7de667b26f4ca1823952c0a11cece95929a4c48 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Tue, 20 Feb 2024 09:31:38 +0000 Subject: [PATCH 3/8] refactor: use f-string for flatgeobuf ogr command --- src/app.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/app.py b/src/app.py index cea0666c..1f4bfa72 100644 --- a/src/app.py +++ b/src/app.py @@ -562,14 +562,13 @@ def ogr_export(query, outputtype, working_dir, dump_temp_path, params): run_ogr2ogr_cmd(cmd) if outputtype == RawDataOutputType.FLATGEOBUF.value: - cmd = """ogr2ogr -overwrite -f FLATGEOBUF {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress VERIFY_BUFFERS=NO""".format( - export_path=dump_temp_path, - host=db_items.get("host"), - port=db_items.get("port"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, + cmd = ( + f"ogr2ogr -overwrite -f FLATGEOBUF {dump_temp_path} " + f"PG:'host={db_items.get('host')} port={db_items.get('port')} " + f"user={db_items.get('user')} dbname={db_items.get('dbname')} " + f"password={db_items.get('password')}' " + f"-sql @'{query_path}' -lco ENCODING=UTF-8 -progress " + f"VERIFY_BUFFERS=NO" ) run_ogr2ogr_cmd(cmd) From d414d29818a8f8cf7c964a53d1e64ee478faded9 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Tue, 20 Feb 2024 09:32:08 +0000 Subject: [PATCH 4/8] refactor: remove ENCODING param for flatgeobuf (does nothing) --- src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index 1f4bfa72..19a69aa2 100644 --- a/src/app.py +++ b/src/app.py @@ -567,7 +567,7 @@ def ogr_export(query, outputtype, working_dir, dump_temp_path, params): f"PG:'host={db_items.get('host')} port={db_items.get('port')} " f"user={db_items.get('user')} dbname={db_items.get('dbname')} " f"password={db_items.get('password')}' " - f"-sql @'{query_path}' -lco ENCODING=UTF-8 -progress " + f"-sql @'{query_path}' -progress " f"VERIFY_BUFFERS=NO" ) run_ogr2ogr_cmd(cmd) From 4302ef3f6272f2c1cff61534076857f93cd2a607 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Tue, 20 Feb 2024 09:32:42 +0000 Subject: [PATCH 5/8] fix: default add spatial index to generated flatgeobuf files --- src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index 19a69aa2..0f7b557a 100644 --- a/src/app.py +++ b/src/app.py @@ -567,7 +567,7 @@ def ogr_export(query, outputtype, working_dir, dump_temp_path, params): f"PG:'host={db_items.get('host')} port={db_items.get('port')} " f"user={db_items.get('user')} dbname={db_items.get('dbname')} " f"password={db_items.get('password')}' " - f"-sql @'{query_path}' -progress " + f"-sql @'{query_path}' -lco SPATIAL_INDEX=YES -progress " f"VERIFY_BUFFERS=NO" ) run_ogr2ogr_cmd(cmd) From a5f06fd12b09cad6b43c67db401fe8285da86810 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Tue, 20 Feb 2024 09:36:31 +0000 Subject: [PATCH 6/8] fix: add optional param to wrap flatgeobuf in geomcollection --- src/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/app.py b/src/app.py index 0f7b557a..fe9d6b32 100644 --- a/src/app.py +++ b/src/app.py @@ -570,6 +570,8 @@ def ogr_export(query, outputtype, working_dir, dump_temp_path, params): f"-sql @'{query_path}' -lco SPATIAL_INDEX=YES -progress " f"VERIFY_BUFFERS=NO" ) + if params.fgb_wrap_geoms: + cmd = cmd + " -nlt GEOMETRYCOLLECTION" run_ogr2ogr_cmd(cmd) if outputtype == RawDataOutputType.GEOPARQUET.value: From c57595afdd6e39ac3e1c15686c5422c97d50b96c Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Wed, 21 Feb 2024 13:32:55 +0545 Subject: [PATCH 7/8] Add api test case with reformat logic of ogr export --- src/app.py | 139 +++++++++++++-------------------------- src/validation/models.py | 68 ++++++++++--------- tests/test_API.py | 75 ++++++++++++++++----- 3 files changed, 137 insertions(+), 145 deletions(-) diff --git a/src/app.py b/src/app.py index fe9d6b32..df6a36d5 100644 --- a/src/app.py +++ b/src/app.py @@ -529,111 +529,60 @@ def ogr_export_shp(point_query, line_query, poly_query, working_dir, file_name): @staticmethod def ogr_export(query, outputtype, working_dir, dump_temp_path, params): - """Function written to support ogr type extractions as well , In this way we will be able to support all file formats supported by Ogr , Currently it is slow when dataset gets bigger as compared to our own conversion method but rich in feature and data types even though it is slow""" db_items = get_db_connection_params() - # format query if it has " in string" query_path = os.path.join(working_dir, "export_query.sql") - # writing to .sql to pass in ogr2ogr because we don't want to pass too much argument on command with sql with open(query_path, "w", encoding="UTF-8") as file: file.write(query) - # for mbtiles we need additional input as well i.e. minzoom and maxzoom , setting default at max=22 and min=10 - if ENABLE_TILES: - if outputtype == RawDataOutputType.MBTILES.value: - if params.min_zoom and params.max_zoom: - cmd = """ogr2ogr -overwrite -f MBTILES -dsco MINZOOM={min_zoom} -dsco MAXZOOM={max_zoom} {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( - min_zoom=params.min_zoom, - max_zoom=params.max_zoom, - export_path=dump_temp_path, - host=db_items.get("host"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, - ) - else: - cmd = """ogr2ogr -overwrite -f MBTILES -dsco ZOOM_LEVEL_AUTO=YES {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( - export_path=dump_temp_path, - host=db_items.get("host"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, + + format_options = { + RawDataOutputType.MBTILES.value: { + "format": "MBTILES", + "extra": ( + "-dsco MINZOOM={} -dsco MAXZOOM={} ".format( + params.min_zoom, params.max_zoom ) - run_ogr2ogr_cmd(cmd) - - if outputtype == RawDataOutputType.FLATGEOBUF.value: - cmd = ( - f"ogr2ogr -overwrite -f FLATGEOBUF {dump_temp_path} " - f"PG:'host={db_items.get('host')} port={db_items.get('port')} " - f"user={db_items.get('user')} dbname={db_items.get('dbname')} " - f"password={db_items.get('password')}' " - f"-sql @'{query_path}' -lco SPATIAL_INDEX=YES -progress " - f"VERIFY_BUFFERS=NO" - ) - if params.fgb_wrap_geoms: - cmd = cmd + " -nlt GEOMETRYCOLLECTION" - run_ogr2ogr_cmd(cmd) + if params.min_zoom and params.max_zoom + else "-dsco ZOOM_LEVEL_AUTO=YES" + ), + }, + RawDataOutputType.FLATGEOBUF.value: { + "format": "FLATGEOBUF", + "extra": "-lco SPATIAL_INDEX=YES VERIFY_BUFFERS=NO", + }, + RawDataOutputType.GEOPARQUET.value: { + "format": "Parquet", + "extra": "", + }, + RawDataOutputType.PGDUMP.value: { + "format": "PGDump", + "extra": "--config PG_USE_COPY YES -lco SRID=4326", + }, + RawDataOutputType.KML.value: { + "format": "KML", + "extra": "", + }, + RawDataOutputType.CSV.value: { + "format": "CSV", + "extra": "", + }, + RawDataOutputType.GEOPACKAGE.value: { + "format": "GPKG", + "extra": "", + }, + } - if outputtype == RawDataOutputType.GEOPARQUET.value: - cmd = """ogr2ogr -overwrite -f Parquet {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( - export_path=dump_temp_path, - host=db_items.get("host"), - port=db_items.get("port"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, - ) - run_ogr2ogr_cmd(cmd) + file_name_option = ( + f"-nln {params.file_name if params.file_name else 'raw_export'}" + ) - if outputtype == RawDataOutputType.PGDUMP.value: - cmd = """ogr2ogr -overwrite --config PG_USE_COPY YES -f PGDump {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco SRID=4326 -progress""".format( - export_path=dump_temp_path, - host=db_items.get("host"), - port=db_items.get("port"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, - ) - run_ogr2ogr_cmd(cmd) + if outputtype == RawDataOutputType.FLATGEOBUF.value and params.fgb_wrap_geoms: + format_options[outputtype]["extra"] += " -nlt GEOMETRYCOLLECTION" - if outputtype == RawDataOutputType.KML.value: - cmd = """ogr2ogr -overwrite -f KML {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( - export_path=dump_temp_path, - host=db_items.get("host"), - port=db_items.get("port"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, - ) - run_ogr2ogr_cmd(cmd) + format_option = format_options.get(outputtype, {"format": "", "extra": ""}) - if outputtype == RawDataOutputType.CSV.value: - cmd = """ogr2ogr -overwrite -f CSV {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( - export_path=dump_temp_path, - host=db_items.get("host"), - port=db_items.get("port"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, - ) - run_ogr2ogr_cmd(cmd) + cmd = f"ogr2ogr -overwrite -f {format_option['format']} {dump_temp_path} PG:\"host={db_items.get('host')} port={db_items.get('port')} user={db_items.get('user')} dbname={db_items.get('dbname')} password={db_items.get('password')}\" -sql @{query_path} -lco ENCODING=UTF-8 -progress {format_option['extra']} {file_name_option}" + run_ogr2ogr_cmd(cmd) - if outputtype == RawDataOutputType.GEOPACKAGE.value: - cmd = """ogr2ogr -overwrite -f GPKG {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format( - export_path=dump_temp_path, - host=db_items.get("host"), - port=db_items.get("port"), - username=db_items.get("user"), - db=db_items.get("dbname"), - password=db_items.get("password"), - pg_sql_select=query_path, - ) - run_ogr2ogr_cmd(cmd) - # clear query file we don't need it anymore os.remove(query_path) @staticmethod diff --git a/src/validation/models.py b/src/validation/models.py index 4a7edf03..6a5ce3ed 100644 --- a/src/validation/models.py +++ b/src/validation/models.py @@ -210,6 +210,10 @@ class RawDataCurrentParams(RawDataCurrentParamsBase): default=True, description="Attaches uid to exports by default , Only disable this if it is recurring export", ) + fgb_wrap_geoms: Optional[bool] = Field( + default=False, + description="Wraps all flatgeobuff output to geometrycollection geometry type", + ) if ALLOW_BIND_ZIP_FILTER: bind_zip: Optional[bool] = True @@ -292,22 +296,22 @@ class StatsRequestParams(BaseModel, GeometryValidatorMixin): max_length=3, example="NPL", ) - geometry: Optional[ - Union[Polygon, MultiPolygon, Feature, FeatureCollection] - ] = Field( - default=None, - example={ - "type": "Polygon", - "coordinates": [ - [ - [83.96919250488281, 28.194446860487773], - [83.99751663208006, 28.194446860487773], - [83.99751663208006, 28.214869548073377], - [83.96919250488281, 28.214869548073377], - [83.96919250488281, 28.194446860487773], - ] - ], - }, + geometry: Optional[Union[Polygon, MultiPolygon, Feature, FeatureCollection]] = ( + Field( + default=None, + example={ + "type": "Polygon", + "coordinates": [ + [ + [83.96919250488281, 28.194446860487773], + [83.99751663208006, 28.194446860487773], + [83.99751663208006, 28.214869548073377], + [83.96919250488281, 28.214869548073377], + [83.96919250488281, 28.194446860487773], + ] + ], + }, + ) ) @validator("geometry", pre=True, always=True) @@ -604,22 +608,22 @@ class DynamicCategoriesModel(BaseModel, GeometryValidatorMixin): } ], ) - geometry: Optional[ - Union[Polygon, MultiPolygon, Feature, FeatureCollection] - ] = Field( - default=None, - example={ - "type": "Polygon", - "coordinates": [ - [ - [83.96919250488281, 28.194446860487773], - [83.99751663208006, 28.194446860487773], - [83.99751663208006, 28.214869548073377], - [83.96919250488281, 28.214869548073377], - [83.96919250488281, 28.194446860487773], - ] - ], - }, + geometry: Optional[Union[Polygon, MultiPolygon, Feature, FeatureCollection]] = ( + Field( + default=None, + example={ + "type": "Polygon", + "coordinates": [ + [ + [83.96919250488281, 28.194446860487773], + [83.99751663208006, 28.194446860487773], + [83.99751663208006, 28.214869548073377], + [83.96919250488281, 28.214869548073377], + [83.96919250488281, 28.194446860487773], + ] + ], + }, + ) ) @validator("geometry", pre=True, always=True) diff --git a/tests/test_API.py b/tests/test_API.py index ff0798e4..24fabe75 100644 --- a/tests/test_API.py +++ b/tests/test_API.py @@ -12,6 +12,32 @@ ## Status +def wait_for_task_completion(track_link, max_attempts=6, interval_seconds=10): + """ + Waits for a task to complete, polling the task status at specified intervals. + + :param track_link: The endpoint to check the task status. + :param max_attempts: Maximum number of polling attempts. + :param interval_seconds: Time to wait between each polling attempt. + :return: The final response JSON on success or raises an AssertionError on failure. + """ + for attempt in range(1, max_attempts + 1): + time.sleep(interval_seconds) # wait for the worker to complete the task + + response = client.get(f"/v1{track_link}") + assert response.status_code == 200, "Task status check failed" + res = response.json() + check_status = res["status"] + + if check_status == "SUCCESS": + return res # Task completed successfully + + if attempt == max_attempts: + raise AssertionError( + f"Task did not complete successfully after {max_attempts} attempts" + ) + + def test_status(): response = client.get("/v1/status/") assert response.status_code == 200 @@ -67,24 +93,7 @@ def test_snapshot(): assert response.status_code == 200 res = response.json() track_link = res["track_link"] - max_attempts = 6 - interval_seconds = 10 - for attempt in range(1, max_attempts + 1): - time.sleep(interval_seconds) # wait for worker to complete task - - response = client.get(f"/v1{track_link}") - assert response.status_code == 200 - res = response.json() - check_status = res["status"] - - if check_status == "SUCCESS": - break # exit the loop if the status is SUCCESS - - if attempt == max_attempts: - # If max_attempts reached and status is not SUCCESS, raise an AssertionError - assert ( - False - ), f"Task did not complete successfully after {max_attempts} attempts" + wait_for_task_completion(client, track_link) def test_snapshot_featurecollection(): @@ -182,6 +191,36 @@ def test_snapshot_feature(): ), f"Task did not complete successfully after {max_attempts} attempts" +def test_snapshot_feature_fgb_wrap_geom(): + response = client.post( + "/v1/snapshot/", + json={ + "fgbWrapGeoms": True, + "outputType": "fgb", + "geometry": { + "type": "Feature", + "properties": {}, + "geometry": { + "coordinates": [ + [ + [83.97346137271688, 28.217525272345284], + [83.97346137271688, 28.192595937414737], + [84.01473909818759, 28.192595937414737], + [84.01473909818759, 28.217525272345284], + [83.97346137271688, 28.217525272345284], + ] + ], + "type": "Polygon", + }, + }, + }, + ) + assert response.status_code == 200 + res = response.json() + track_link = res["track_link"] + wait_for_task_completion(client, track_link) + + def test_snapshot_centroid(): response = client.post( "/v1/snapshot/", From f51bfd36161cc3b6792df6d8e811ed7b2bdb9355 Mon Sep 17 00:00:00 2001 From: kshitijrajsharma Date: Wed, 21 Feb 2024 13:38:40 +0545 Subject: [PATCH 8/8] Remove repettive env variables --- src/app.py | 9 +++++++++ src/config.py | 12 ++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/app.py b/src/app.py index df6a36d5..4177c58f 100644 --- a/src/app.py +++ b/src/app.py @@ -529,6 +529,15 @@ def ogr_export_shp(point_query, line_query, poly_query, working_dir, file_name): @staticmethod def ogr_export(query, outputtype, working_dir, dump_temp_path, params): + """Generates ogr2ogr command based on outputtype and parameters + + Args: + query (_type_): Postgresql query to extract + outputtype (_type_): _description_ + working_dir (_type_): _description_ + dump_temp_path (_type_): temp file path for metadata gen + params (_type_): _description_ + """ db_items = get_db_connection_params() query_path = os.path.join(working_dir, "export_query.sql") with open(query_path, "w", encoding="UTF-8") as file: diff --git a/src/config.py b/src/config.py index fad2bb60..89bc6d68 100644 --- a/src/config.py +++ b/src/config.py @@ -75,14 +75,6 @@ def get_bool_env_var(key, default=False): "API_CONFIG", "LOG_LEVEL", fallback="debug" ) -ALLOW_BIND_ZIP_FILTER = os.environ.get("ALLOW_BIND_ZIP_FILTER") or config.get( - "API_CONFIG", "ALLOW_BIND_ZIP_FILTER", fallback=None -) - -ENABLE_TILES = os.environ.get("ENABLE_TILES") or config.get( - "API_CONFIG", "ENABLE_TILES", fallback=None -) - def not_raises(func, *args, **kwargs): try: @@ -166,10 +158,14 @@ def not_raises(func, *args, **kwargs): if not os.path.exists(EXPORT_PATH): # Create a exports directory because it does not exist os.makedirs(EXPORT_PATH) + ALLOW_BIND_ZIP_FILTER = get_bool_env_var( "ALLOW_BIND_ZIP_FILTER", config.getboolean("API_CONFIG", "ALLOW_BIND_ZIP_FILTER", fallback=False), ) +ENABLE_TILES = get_bool_env_var( + "ENABLE_TILES", config.getboolean("API_CONFIG", "ENABLE_TILES", fallback=False) +) # check either to use connection pooling or not USE_CONNECTION_POOLING = get_bool_env_var(