Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optional param to wrap flatgeobuf geometries in GeometryCollection #213

Merged
merged 8 commits into from
Feb 21, 2024
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,16 @@

## Installation

#### To setup Backend Follow [Backend_Installation](./backend/Readme.md)
Raw Data API consists of two elements:

- A **backend** database, tools, and scripts: used to import OSM data into a specific database structure and keep it updated.
- An **API** that is used to serve data from the backend database.

#### To setup the backend see [Backend Installation](./installation/backend)

Raw Data API can be installed through `docker` or locally on your computer.

- To install with docker see [docker installation](./docs/src/installation/docker.md).
- To install with docker see [docker installation](./installation/docker).
- To install locally, continue below.

NOTE: The installation guide below is only tested to work on Ubuntu, we recommend using docker for other operating systems.
Expand Down Expand Up @@ -90,7 +95,7 @@ pip install -r requirements.txt

### Additional required configurations for Raw Data API

Setup the necessary configurations for Raw Data API from [configurations](./docs/src/installation/configurations.md).
Setup the necessary configurations for Raw Data API from [configurations](./installation/configurations).

Setup config.txt in project root.

Expand Down Expand Up @@ -202,7 +207,7 @@ py.test -k test function name

## Contribution & Development

Learn about current priorities and work going through Roadmap & see here [CONTRIBUTING](./docs/src/contributing.md)
Learn about current priorities and work going through Roadmap & see here [CONTRIBUTING](./contributing)

## Roadmap
https://github.com/orgs/hotosm/projects/29
Expand Down
1 change: 1 addition & 0 deletions docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ extra:
nav:
- Raw Data API : "index.md"
- Installation:
- Backend: "installation/backend.md"
- Docker Installation: "installation/docker.md"
- Configurations: "installation/configurations.md"
# - User Guide: 'user_guide/index.md'
Expand Down
1 change: 1 addition & 0 deletions docs/src/installation/backend.md
147 changes: 53 additions & 94 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,110 +529,69 @@ def ogr_export_shp(point_query, line_query, poly_query, working_dir, file_name):

@staticmethod
def ogr_export(query, outputtype, working_dir, dump_temp_path, params):
"""Function written to support ogr type extractions as well , In this way we will be able to support all file formats supported by Ogr , Currently it is slow when dataset gets bigger as compared to our own conversion method but rich in feature and data types even though it is slow"""
"""Generates ogr2ogr command based on outputtype and parameters

Args:
query (_type_): Postgresql query to extract
outputtype (_type_): _description_
working_dir (_type_): _description_
dump_temp_path (_type_): temp file path for metadata gen
params (_type_): _description_
"""
db_items = get_db_connection_params()
# format query if it has " in string"
query_path = os.path.join(working_dir, "export_query.sql")
# writing to .sql to pass in ogr2ogr because we don't want to pass too much argument on command with sql
with open(query_path, "w", encoding="UTF-8") as file:
file.write(query)
# for mbtiles we need additional input as well i.e. minzoom and maxzoom , setting default at max=22 and min=10
if ENABLE_TILES:
if outputtype == RawDataOutputType.MBTILES.value:
if params.min_zoom and params.max_zoom:
cmd = """ogr2ogr -overwrite -f MBTILES -dsco MINZOOM={min_zoom} -dsco MAXZOOM={max_zoom} {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
min_zoom=params.min_zoom,
max_zoom=params.max_zoom,
export_path=dump_temp_path,
host=db_items.get("host"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
else:
cmd = """ogr2ogr -overwrite -f MBTILES -dsco ZOOM_LEVEL_AUTO=YES {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,

format_options = {
RawDataOutputType.MBTILES.value: {
"format": "MBTILES",
"extra": (
"-dsco MINZOOM={} -dsco MAXZOOM={} ".format(
params.min_zoom, params.max_zoom
)
run_ogr2ogr_cmd(cmd)
if params.min_zoom and params.max_zoom
else "-dsco ZOOM_LEVEL_AUTO=YES"
),
},
RawDataOutputType.FLATGEOBUF.value: {
"format": "FLATGEOBUF",
"extra": "-lco SPATIAL_INDEX=YES VERIFY_BUFFERS=NO",
},
RawDataOutputType.GEOPARQUET.value: {
"format": "Parquet",
"extra": "",
},
RawDataOutputType.PGDUMP.value: {
"format": "PGDump",
"extra": "--config PG_USE_COPY YES -lco SRID=4326",
},
RawDataOutputType.KML.value: {
"format": "KML",
"extra": "",
},
RawDataOutputType.CSV.value: {
"format": "CSV",
"extra": "",
},
RawDataOutputType.GEOPACKAGE.value: {
"format": "GPKG",
"extra": "",
},
}

if outputtype == RawDataOutputType.FLATGEOBUF.value:
cmd = """ogr2ogr -overwrite -f FLATGEOBUF {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress VERIFY_BUFFERS=NO""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)
file_name_option = (
f"-nln {params.file_name if params.file_name else 'raw_export'}"
)

if outputtype == RawDataOutputType.GEOPARQUET.value:
cmd = """ogr2ogr -overwrite -f Parquet {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)
if outputtype == RawDataOutputType.FLATGEOBUF.value and params.fgb_wrap_geoms:
format_options[outputtype]["extra"] += " -nlt GEOMETRYCOLLECTION"

if outputtype == RawDataOutputType.PGDUMP.value:
cmd = """ogr2ogr -overwrite --config PG_USE_COPY YES -f PGDump {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco SRID=4326 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)
format_option = format_options.get(outputtype, {"format": "", "extra": ""})

if outputtype == RawDataOutputType.KML.value:
cmd = """ogr2ogr -overwrite -f KML {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)

if outputtype == RawDataOutputType.CSV.value:
cmd = """ogr2ogr -overwrite -f CSV {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)
cmd = f"ogr2ogr -overwrite -f {format_option['format']} {dump_temp_path} PG:\"host={db_items.get('host')} port={db_items.get('port')} user={db_items.get('user')} dbname={db_items.get('dbname')} password={db_items.get('password')}\" -sql @{query_path} -lco ENCODING=UTF-8 -progress {format_option['extra']} {file_name_option}"
run_ogr2ogr_cmd(cmd)

if outputtype == RawDataOutputType.GEOPACKAGE.value:
cmd = """ogr2ogr -overwrite -f GPKG {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)
# clear query file we don't need it anymore
os.remove(query_path)

@staticmethod
Expand Down
12 changes: 4 additions & 8 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,6 @@ def get_bool_env_var(key, default=False):
"API_CONFIG", "LOG_LEVEL", fallback="debug"
)

ALLOW_BIND_ZIP_FILTER = os.environ.get("ALLOW_BIND_ZIP_FILTER") or config.get(
"API_CONFIG", "ALLOW_BIND_ZIP_FILTER", fallback=None
)

ENABLE_TILES = os.environ.get("ENABLE_TILES") or config.get(
"API_CONFIG", "ENABLE_TILES", fallback=None
)


def not_raises(func, *args, **kwargs):
try:
Expand Down Expand Up @@ -166,10 +158,14 @@ def not_raises(func, *args, **kwargs):
if not os.path.exists(EXPORT_PATH):
# Create a exports directory because it does not exist
os.makedirs(EXPORT_PATH)

ALLOW_BIND_ZIP_FILTER = get_bool_env_var(
"ALLOW_BIND_ZIP_FILTER",
config.getboolean("API_CONFIG", "ALLOW_BIND_ZIP_FILTER", fallback=False),
)
ENABLE_TILES = get_bool_env_var(
"ENABLE_TILES", config.getboolean("API_CONFIG", "ENABLE_TILES", fallback=False)
)

# check either to use connection pooling or not
USE_CONNECTION_POOLING = get_bool_env_var(
Expand Down
68 changes: 36 additions & 32 deletions src/validation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ class RawDataCurrentParams(RawDataCurrentParamsBase):
default=True,
description="Attaches uid to exports by default , Only disable this if it is recurring export",
)
fgb_wrap_geoms: Optional[bool] = Field(
default=False,
description="Wraps all flatgeobuff output to geometrycollection geometry type",
)
if ALLOW_BIND_ZIP_FILTER:
bind_zip: Optional[bool] = True

Expand Down Expand Up @@ -292,22 +296,22 @@ class StatsRequestParams(BaseModel, GeometryValidatorMixin):
max_length=3,
example="NPL",
)
geometry: Optional[
Union[Polygon, MultiPolygon, Feature, FeatureCollection]
] = Field(
default=None,
example={
"type": "Polygon",
"coordinates": [
[
[83.96919250488281, 28.194446860487773],
[83.99751663208006, 28.194446860487773],
[83.99751663208006, 28.214869548073377],
[83.96919250488281, 28.214869548073377],
[83.96919250488281, 28.194446860487773],
]
],
},
geometry: Optional[Union[Polygon, MultiPolygon, Feature, FeatureCollection]] = (
Field(
default=None,
example={
"type": "Polygon",
"coordinates": [
[
[83.96919250488281, 28.194446860487773],
[83.99751663208006, 28.194446860487773],
[83.99751663208006, 28.214869548073377],
[83.96919250488281, 28.214869548073377],
[83.96919250488281, 28.194446860487773],
]
],
},
)
)

@validator("geometry", pre=True, always=True)
Expand Down Expand Up @@ -604,22 +608,22 @@ class DynamicCategoriesModel(BaseModel, GeometryValidatorMixin):
}
],
)
geometry: Optional[
Union[Polygon, MultiPolygon, Feature, FeatureCollection]
] = Field(
default=None,
example={
"type": "Polygon",
"coordinates": [
[
[83.96919250488281, 28.194446860487773],
[83.99751663208006, 28.194446860487773],
[83.99751663208006, 28.214869548073377],
[83.96919250488281, 28.214869548073377],
[83.96919250488281, 28.194446860487773],
]
],
},
geometry: Optional[Union[Polygon, MultiPolygon, Feature, FeatureCollection]] = (
Field(
default=None,
example={
"type": "Polygon",
"coordinates": [
[
[83.96919250488281, 28.194446860487773],
[83.99751663208006, 28.194446860487773],
[83.99751663208006, 28.214869548073377],
[83.96919250488281, 28.214869548073377],
[83.96919250488281, 28.194446860487773],
]
],
},
)
)

@validator("geometry", pre=True, always=True)
Expand Down
Loading
Loading