From bf5a2323910f3b56786fc059916f3f077b689a2b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 May 2024 19:24:51 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .github/workflows/release.yml | 2 +- notebooks/BUILT_compare_buildings_wsf.ipynb | 63 ++++++++------ notebooks/FATHOM/CLIP_Flood_data_iso3.ipynb | 27 +++--- .../Transfer_Data_AWS.ipynb | 85 +++++++++++-------- .../generate_fathom_vrts.ipynb | 85 +++++++++++-------- notebooks/FATHOM/Vizualize_Flood_types.ipynb | 33 +++---- notebooks/Overture_Maps_Data_Access.ipynb | 48 +++++------ src/GOSTrocks/dataMisc.py | 47 ++++++---- src/GOSTrocks/fathom_vrts.txt | 2 +- src/GOSTrocks/infra/aggregator.py | 1 + src/GOSTrocks/rasterMisc.py | 2 +- 11 files changed, 231 insertions(+), 164 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8c432c9..5a829d9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -49,4 +49,4 @@ jobs: name: python-package-distributions path: dist/ - name: Publish distribution 📦 to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/notebooks/BUILT_compare_buildings_wsf.ipynb b/notebooks/BUILT_compare_buildings_wsf.ipynb index 1f519c0..cc02135 100644 --- a/notebooks/BUILT_compare_buildings_wsf.ipynb +++ b/notebooks/BUILT_compare_buildings_wsf.ipynb @@ -15,19 +15,18 @@ } ], "source": [ - "import sys, os\n", + "import sys\n", + "import os\n", "import rasterio\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", "\n", "from shapely.wkt import loads\n", "\n", "sys.path.insert(0, \"../src\")\n", "\n", "import GOSTrocks.rasterMisc as rMisc\n", - "import GOSTrocks.dataMisc as dMisc\n", "\n", "%load_ext autoreload\n", "%autoreload 2" @@ -49,19 +48,25 @@ ], "source": [ "# Local/input files\n", - "iso3 = 'KHM'\n", + "iso3 = \"KHM\"\n", "out_folder = \"c:/WBG/Work/KHM_Energy/data\"\n", "wsf_file = os.path.join(out_folder, \"WSF\", \"wsf.tif\")\n", "ghsl_file = os.path.join(out_folder, \"GHSL\", \"ghsl.tif\")\n", - "overture_buildings = os.path.join(out_folder, \"overture\", \"overture_download_2024_03_29.csv\")\n", - "overture_raster = os.path.join(out_folder, \"overture\", \"overture_download_2024_03_29.tif\")\n", - "overture_raster_points = os.path.join(out_folder, \"overture\", \"overture_download_2024_03_29_points.tif\")\n", + "overture_buildings = os.path.join(\n", + " out_folder, \"overture\", \"overture_download_2024_03_29.csv\"\n", + ")\n", + "overture_raster = os.path.join(\n", + " out_folder, \"overture\", \"overture_download_2024_03_29.tif\"\n", + ")\n", + "overture_raster_points = os.path.join(\n", + " out_folder, \"overture\", \"overture_download_2024_03_29_points.tif\"\n", + ")\n", "for file in [wsf_file, ghsl_file]:\n", " if not os.path.exists(os.path.dirname(file)):\n", " os.makedirs(os.path.dirname(file))\n", "\n", "# get country extent from geopandas\n", - "world_filepath = gpd.datasets.get_path('naturalearth_lowres')\n", + "world_filepath = gpd.datasets.get_path(\"naturalearth_lowres\")\n", "world = gpd.read_file(world_filepath)\n", "country = world[world.iso_a3 == iso3]" ] @@ -97,12 +102,14 @@ "metadata": {}, "outputs": [], "source": [ - "#Clip GHSL using local files\n", - "local_version = r\"J:\\Data\\GLOBAL\\GHSL\\Built\\GHS_BUILT_S_E2020_GLOBE_R2023A_54009_100_V1_0.tif\"\n", + "# Clip GHSL using local files\n", + "local_version = (\n", + " r\"J:\\Data\\GLOBAL\\GHSL\\Built\\GHS_BUILT_S_E2020_GLOBE_R2023A_54009_100_V1_0.tif\"\n", + ")\n", "if not os.path.exists(ghsl_file):\n", " ghsl_raster = rasterio.open(local_version)\n", " data, profile = rMisc.clipRaster(ghsl_raster, country)\n", - " with rasterio.open(ghsl_file, 'w', **profile) as dst:\n", + " with rasterio.open(ghsl_file, \"w\", **profile) as dst:\n", " dst.write(data)\n", "ghsl_r = rasterio.open(ghsl_file)" ] @@ -203,9 +210,9 @@ "source": [ "# read in and process Overture buildings\n", "ob = pd.read_csv(overture_buildings)\n", - "ob_geoms = ob['wkt'].apply(loads)\n", + "ob_geoms = ob[\"wkt\"].apply(loads)\n", "inB = gpd.GeoDataFrame(ob, geometry=ob_geoms, crs=4326)\n", - "inB.head()\n" + "inB.head()" ] }, { @@ -216,9 +223,11 @@ "source": [ "# attempt to rasterrize the buildings as polygons\n", "if not os.path.exists(overture_raster):\n", - " rasterized_buildings = rMisc.rasterizeDataFrame(inB, templateRaster=ghsl_file, mergeAlg=\"ADD\", re_proj=True, nodata=0.)\n", - " with rasterio.open(overture_raster, 'w', **rasterized_buildings['meta']) as dst:\n", - " dst.write_band(1, rasterized_buildings['vals'])\n", + " rasterized_buildings = rMisc.rasterizeDataFrame(\n", + " inB, templateRaster=ghsl_file, mergeAlg=\"ADD\", re_proj=True, nodata=0.0\n", + " )\n", + " with rasterio.open(overture_raster, \"w\", **rasterized_buildings[\"meta\"]) as dst:\n", + " dst.write_band(1, rasterized_buildings[\"vals\"])\n", "overture_r = rasterio.open(overture_raster)" ] }, @@ -231,11 +240,15 @@ "# attempt to rasterrize the buildings as points\n", "if not os.path.exists(overture_raster_points):\n", " inB_points = inB.copy()\n", - " inB_points['geometry'] = inB_points['geometry'].centroid\n", - " rasterized_buildings = rMisc.rasterizeDataFrame(inB_points, templateRaster=ghsl_file, mergeAlg=\"ADD\", re_proj=True, nodata=0.)\n", - " with rasterio.open(overture_raster_points, 'w', **rasterized_buildings['meta']) as dst:\n", - " dst.write_band(1, rasterized_buildings['vals'])\n", - "overture_r_points = rasterio.open(overture_raster_points) " + " inB_points[\"geometry\"] = inB_points[\"geometry\"].centroid\n", + " rasterized_buildings = rMisc.rasterizeDataFrame(\n", + " inB_points, templateRaster=ghsl_file, mergeAlg=\"ADD\", re_proj=True, nodata=0.0\n", + " )\n", + " with rasterio.open(\n", + " overture_raster_points, \"w\", **rasterized_buildings[\"meta\"]\n", + " ) as dst:\n", + " dst.write_band(1, rasterized_buildings[\"vals\"])\n", + "overture_r_points = rasterio.open(overture_raster_points)" ] }, { @@ -250,10 +263,10 @@ "ghsl_thresh = 3000\n", "\n", "o_data = overture_r_points.read(1)\n", - "o_data = (o_data > o_thresh).astype('uint8')\n", + "o_data = (o_data > o_thresh).astype(\"uint8\")\n", "\n", "ghsl_data = ghsl_r.read(1)\n", - "ghsl_data = (ghsl_data > ghsl_thresh).astype('uint8') * 10\n", + "ghsl_data = (ghsl_data > ghsl_thresh).astype(\"uint8\") * 10\n", "\n", "combo_data = o_data + ghsl_data\n", "\n", @@ -262,8 +275,8 @@ "if not os.path.exists(out_file):\n", " meta = overture_r_points.meta.copy()\n", " meta.update(dtype=rasterio.uint8, nodata=0)\n", - " with rasterio.open(out_file, 'w', **meta) as out_raster:\n", - " out_raster.write_band(1, combo_data)\n" + " with rasterio.open(out_file, \"w\", **meta) as out_raster:\n", + " out_raster.write_band(1, combo_data)" ] }, { diff --git a/notebooks/FATHOM/CLIP_Flood_data_iso3.ipynb b/notebooks/FATHOM/CLIP_Flood_data_iso3.ipynb index ffc3d00..6edb31a 100644 --- a/notebooks/FATHOM/CLIP_Flood_data_iso3.ipynb +++ b/notebooks/FATHOM/CLIP_Flood_data_iso3.ipynb @@ -19,7 +19,6 @@ "source": [ "import sys\n", "import os\n", - "import boto3\n", "import rasterio\n", "\n", "import geopandas as gpd\n", @@ -63,11 +62,11 @@ "out_folder = f\"/home/wb411133/temp/FATHOM/{iso3}\"\n", "if not os.path.exists(out_folder):\n", " os.makedirs(out_folder)\n", - " \n", + "\n", "# This demo uses the default national boundaries included with GeoPandas, but this can be changed here\n", - "world_filepath = gpd.datasets.get_path('naturalearth_lowres')\n", + "world_filepath = gpd.datasets.get_path(\"naturalearth_lowres\")\n", "world = gpd.read_file(world_filepath)\n", - "inB = world.loc[world['iso_a3'] == iso3].copy()" + "inB = world.loc[world[\"iso_a3\"] == iso3].copy()" ] }, { @@ -78,17 +77,21 @@ "outputs": [], "source": [ "# Select layer to downlaod\n", - "flood_type = [\"COASTAL\",\"FLUVIAL\",\"PLUVIAL\"]\n", + "flood_type = [\"COASTAL\", \"FLUVIAL\", \"PLUVIAL\"]\n", "defence = [\"DEFENDED\"]\n", - "return_period = ['1in5','1in10','1in50']\n", + "return_period = [\"1in5\", \"1in10\", \"1in50\"]\n", "climate_model = [\"PERCENTILE50\"]\n", "year = [\"2020\"]\n", "\n", "# all_vrts is a pandas dataframe with all the vrt paths to the global datasets, with columns defining\n", "# the various models' defining attributes\n", "all_vrts = dMisc.get_fathom_vrts(True)\n", - "sel_images = all_vrts.loc[(all_vrts['FLOOD_TYPE'].isin(flood_type)) & (all_vrts['DEFENCE'].isin(defence)) & \n", - " (all_vrts['RETURN'].isin(return_period)) & (all_vrts['CLIMATE_MODEL'].isin(climate_model))]" + "sel_images = all_vrts.loc[\n", + " (all_vrts[\"FLOOD_TYPE\"].isin(flood_type))\n", + " & (all_vrts[\"DEFENCE\"].isin(defence))\n", + " & (all_vrts[\"RETURN\"].isin(return_period))\n", + " & (all_vrts[\"CLIMATE_MODEL\"].isin(climate_model))\n", + "]" ] }, { @@ -349,12 +352,12 @@ "# For each image in the selected images dataframe, we clip out the area of interest\n", "# which is defined by the ioso3 code, but could be any GeoDataFrame\n", "\n", - "for idx, row in sel_images.iterrows(): \n", - " out_file = os.path.join(out_folder, os.path.basename(row['PATH']))\n", + "for idx, row in sel_images.iterrows():\n", + " out_file = os.path.join(out_folder, os.path.basename(row[\"PATH\"]))\n", " if not os.path.exists(out_file):\n", - " cur_r = rasterio.open(row['PATH'])\n", + " cur_r = rasterio.open(row[\"PATH\"])\n", " rMisc.clipRaster(cur_r, inB, out_file)\n", - " tPrint(os.path.basename(row['PATH']))" + " tPrint(os.path.basename(row[\"PATH\"]))" ] } ], diff --git a/notebooks/FATHOM/PROCESSING_NOTEBOOKS/Transfer_Data_AWS.ipynb b/notebooks/FATHOM/PROCESSING_NOTEBOOKS/Transfer_Data_AWS.ipynb index 224d7e0..5bfab08 100644 --- a/notebooks/FATHOM/PROCESSING_NOTEBOOKS/Transfer_Data_AWS.ipynb +++ b/notebooks/FATHOM/PROCESSING_NOTEBOOKS/Transfer_Data_AWS.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os\n", + "import os\n", "import boto3\n", "\n", "import pandas as pd" @@ -39,14 +39,14 @@ } ], "source": [ - "in_file_list = '/home/wb411133/temp/World_Bank_Global_3_Complete.csv'\n", + "in_file_list = \"/home/wb411133/temp/World_Bank_Global_3_Complete.csv\"\n", "out_folder = os.path.join(os.path.dirname(in_file_list), \"FATHOM\")\n", - "s3_bucket = 'wbg-geography01'\n", - "s3_prefix = 'FATHOM/v2023/'\n", - "s3_out = os.path.join('s3://',s3_bucket, s3_prefix)\n", + "s3_bucket = \"wbg-geography01\"\n", + "s3_prefix = \"FATHOM/v2023/\"\n", + "s3_out = os.path.join(\"s3://\", s3_bucket, s3_prefix)\n", "\n", "in_files = pd.read_csv(in_file_list)\n", - "s3 = boto3.resource('s3')\n", + "s3 = boto3.resource(\"s3\")\n", "my_bucket = s3.Bucket(s3_bucket)" ] }, @@ -59,7 +59,7 @@ "# Find all files already copied\n", "all_folders = []\n", "for obj in my_bucket.objects.filter(Prefix=s3_prefix):\n", - " all_folders.append(obj.key.split(\"/\")[-2])\n" + " all_folders.append(obj.key.split(\"/\")[-2])" ] }, { @@ -69,9 +69,9 @@ "outputs": [], "source": [ "processed_folders = list(set(all_folders))\n", - "delivered_folders = in_files['Layer'].values\n", - "sel_folders = [x for x in delivered_folders if not x in processed_folders]\n", - "sel_files = in_files.loc[in_files['Layer'].isin(sel_folders)].copy()\n", + "delivered_folders = in_files[\"Layer\"].values\n", + "sel_folders = [x for x in delivered_folders if x not in processed_folders]\n", + "sel_files = in_files.loc[in_files[\"Layer\"].isin(sel_folders)].copy()\n", "sel_files" ] }, @@ -83,31 +83,33 @@ }, "outputs": [], "source": [ - "with open(os.path.join(out_folder, \"aaa_download_upload_2.sh\"), 'w') as out_file:\n", - " out_file.write('#!/bin/bash\\n')\n", + "with open(os.path.join(out_folder, \"aaa_download_upload_2.sh\"), \"w\") as out_file:\n", + " out_file.write(\"#!/bin/bash\\n\")\n", " for idx, row in sel_files.iterrows():\n", - " fathom_path = row['AWS_Path']\n", - " local_folder = os.path.join(out_folder, row['Layer'])\n", - " gost_folder = os.path.join(s3_out, row['Layer'])\n", + " fathom_path = row[\"AWS_Path\"]\n", + " local_folder = os.path.join(out_folder, row[\"Layer\"])\n", + " gost_folder = os.path.join(s3_out, row[\"Layer\"])\n", " if not os.path.exists(local_folder):\n", " os.makedirs(local_folder)\n", - " \n", - " cur_out_folder = os.path.join(s3_prefix, row['Layer'])\n", + "\n", + " cur_out_folder = os.path.join(s3_prefix, row[\"Layer\"])\n", " obj_count = 0\n", " for obj in my_bucket.objects.filter(Prefix=cur_out_folder):\n", " obj_count += 1\n", " print(f\"{row['Layer']}: {obj_count}\")\n", " if obj_count == 0:\n", - " download_command = f'aws s3 sync --profile fathom {fathom_path} {local_folder}'\n", - " upload_command = f'aws s3 sync {local_folder} {gost_folder}'\n", - " remove_command = f'rm -R {local_folder}'\n", + " download_command = (\n", + " f\"aws s3 sync --profile fathom {fathom_path} {local_folder}\"\n", + " )\n", + " upload_command = f\"aws s3 sync {local_folder} {gost_folder}\"\n", + " remove_command = f\"rm -R {local_folder}\"\n", "\n", " out_file.write(download_command)\n", - " out_file.write('\\n')\n", + " out_file.write(\"\\n\")\n", " out_file.write(upload_command)\n", - " out_file.write('\\n')\n", + " out_file.write(\"\\n\")\n", " out_file.write(remove_command)\n", - " out_file.write('\\n')" + " out_file.write(\"\\n\")" ] }, { @@ -118,7 +120,7 @@ "source": [ "all_vals = []\n", "for idx, row in in_files.iterrows():\n", - " all_vals.append(row['Layer'].split('-'))" + " all_vals.append(row[\"Layer\"].split(\"-\"))" ] }, { @@ -127,8 +129,23 @@ "metadata": {}, "outputs": [], "source": [ - "xx = pd.DataFrame(all_vals, columns=['GLOBAL', \"Size\",'Offset','return','type','defense','depth','year','projection','v1','v2'])\n", - "xx.head()\n" + "xx = pd.DataFrame(\n", + " all_vals,\n", + " columns=[\n", + " \"GLOBAL\",\n", + " \"Size\",\n", + " \"Offset\",\n", + " \"return\",\n", + " \"type\",\n", + " \"defense\",\n", + " \"depth\",\n", + " \"year\",\n", + " \"projection\",\n", + " \"v1\",\n", + " \"v2\",\n", + " ],\n", + ")\n", + "xx.head()" ] }, { @@ -137,7 +154,7 @@ "metadata": {}, "outputs": [], "source": [ - "xx['projection'].value_counts()" + "xx[\"projection\"].value_counts()" ] }, { @@ -146,7 +163,7 @@ "metadata": {}, "outputs": [], "source": [ - "xx.loc[xx['projection'] == 'SSP1_2.6']['year'].value_counts()" + "xx.loc[xx[\"projection\"] == \"SSP1_2.6\"][\"year\"].value_counts()" ] }, { @@ -172,24 +189,24 @@ } ], "source": [ - "sel_scenario = 'GLOBAL-1ARCSEC-NW_OFFSET-1in10-COASTAL-DEFENDED-DEPTH-2030-SSP3_7.0-PERCENTILE50-v3.0'\n", + "sel_scenario = \"GLOBAL-1ARCSEC-NW_OFFSET-1in10-COASTAL-DEFENDED-DEPTH-2030-SSP3_7.0-PERCENTILE50-v3.0\"\n", "bucket = \"fathom-products-global\"\n", "prefix = f\"fathom-global/v3/{sel_scenario}\"\n", "\n", "local_folder = os.path.join(\"/home/wb411133/temp/FATHOM\", sel_scenario)\n", - "fathom_path = f's3://{bucket}/{prefix}'\n", + "fathom_path = f\"s3://{bucket}/{prefix}\"\n", "gost_folder = f\"s3://wbg-geography01/FATHOM/v2023/{sel_scenario}\"\n", "\n", "if not os.path.exists(local_folder):\n", " os.makedirs(local_folder)\n", "\n", - "download_command = f'aws s3 sync --profile fathom {fathom_path} {local_folder}'\n", - "upload_command = f'aws s3 sync {local_folder} {gost_folder}'\n", - "remove_command = f'rm -R {local_folder}'\n", + "download_command = f\"aws s3 sync --profile fathom {fathom_path} {local_folder}\"\n", + "upload_command = f\"aws s3 sync {local_folder} {gost_folder}\"\n", + "remove_command = f\"rm -R {local_folder}\"\n", "\n", "print(download_command)\n", "print(upload_command)\n", - "print(remove_command)\n" + "print(remove_command)" ] }, { diff --git a/notebooks/FATHOM/PROCESSING_NOTEBOOKS/generate_fathom_vrts.ipynb b/notebooks/FATHOM/PROCESSING_NOTEBOOKS/generate_fathom_vrts.ipynb index 60811d6..779a693 100644 --- a/notebooks/FATHOM/PROCESSING_NOTEBOOKS/generate_fathom_vrts.ipynb +++ b/notebooks/FATHOM/PROCESSING_NOTEBOOKS/generate_fathom_vrts.ipynb @@ -15,11 +15,12 @@ } ], "source": [ - "import sys, os, boto3, json\n", - "import rasterio\n", + "import sys\n", + "import os\n", + "import boto3\n", + "import json\n", "\n", "import xml.etree.ElementTree as ET\n", - "import pandas as pd\n", "\n", "sys.path.insert(0, \"../../src\")\n", "import GOSTrocks.dataMisc as dMisc\n", @@ -34,11 +35,11 @@ "metadata": {}, "outputs": [], "source": [ - "s3_bucket = 'wbg-geography01'\n", - "s3_prefix = 'FATHOM/v2023/'\n", - "s3_out = os.path.join('s3://',s3_bucket, s3_prefix)\n", + "s3_bucket = \"wbg-geography01\"\n", + "s3_prefix = \"FATHOM/v2023/\"\n", + "s3_out = os.path.join(\"s3://\", s3_bucket, s3_prefix)\n", "\n", - "s3 = boto3.resource('s3')\n", + "s3 = boto3.resource(\"s3\")\n", "my_bucket = s3.Bucket(s3_bucket)\n", "\n", "# Find all files already copied\n", @@ -56,13 +57,16 @@ "outputs": [], "source": [ "# build list of rasters for generating VRT\n", - "local_path = os.path.join(\"v2023\",\"GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2020-PERCENTILE50-v3.0\")\n", + "local_path = os.path.join(\n", + " \"v2023\",\n", + " \"GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2020-PERCENTILE50-v3.0\",\n", + ")\n", "in_folder = os.path.join(template_folder, local_path)\n", "all_tiffs = [f\"{local_path}/{x}\" for x in os.listdir(in_folder)]\n", "\n", - "with open(os.path.join(template_folder, \"s3_tiffs.txt\"), 'w') as out:\n", + "with open(os.path.join(template_folder, \"s3_tiffs.txt\"), \"w\") as out:\n", " for p in all_tiffs:\n", - " out.write(f'{p}\\n')" + " out.write(f\"{p}\\n\")" ] }, { @@ -71,11 +75,18 @@ "metadata": {}, "outputs": [], "source": [ - "template_folder = '/home/wb411133/temp'\n", - "coastal_template = os.path.join(template_folder, \"GLOBAL-1ARCSEC-NW_OFFSET-1in10-COASTAL-DEFENDED-DEPTH-2030-SSP3_7.0-PERCENTILE50-v3.0.0.vrt\")\n", - "other_template = os.path.join(template_folder, \"GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2020-PERCENTILE50-v3.0.vrt\")\n", + "template_folder = \"/home/wb411133/temp\"\n", + "coastal_template = os.path.join(\n", + " template_folder,\n", + " \"GLOBAL-1ARCSEC-NW_OFFSET-1in10-COASTAL-DEFENDED-DEPTH-2030-SSP3_7.0-PERCENTILE50-v3.0.0.vrt\",\n", + ")\n", + "other_template = os.path.join(\n", + " template_folder,\n", + " \"GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2020-PERCENTILE50-v3.0.vrt\",\n", + ")\n", "\n", - "class generate_vrt_from_template():\n", + "\n", + "class generate_vrt_from_template:\n", " def __init__(self, template_vrt, new_dataset):\n", " self.template_vrt = template_vrt\n", " self.new_dataset = new_dataset\n", @@ -91,11 +102,12 @@ "\n", " tree.write(self.new_vrt, xml_declaration=False)\n", "\n", - "'''\n", + "\n", + "\"\"\"\n", "new_ds = \"GLOBAL-1ARCSEC-NW_OFFSET-1in10-COASTAL-DEFENDED-DEPTH-2030-SSP3_7.0-PERCENTILE50-v3.0\"\n", "xx = generate_vrt_from_template(template_vrt, new_ds)\n", "xx.update_vrt()\n", - "'''" + "\"\"\"" ] }, { @@ -145,18 +157,18 @@ "metadata": {}, "outputs": [], "source": [ - "s3_bucket = 'wbg-geography01'\n", - "s3_prefix = 'FATHOM/'\n", + "s3_bucket = \"wbg-geography01\"\n", + "s3_prefix = \"FATHOM/\"\n", "\n", - "s3 = boto3.resource('s3')\n", + "s3 = boto3.resource(\"s3\")\n", "my_bucket = s3.Bucket(s3_bucket)\n", "\n", "all_vrts = []\n", "for o in my_bucket.objects.filter(Prefix=s3_prefix):\n", " if o.key.endswith(\".vrt\"):\n", " print(o.key)\n", - " full_vrt_path = f's3://{s3_bucket}/{o.key}'\n", - " all_vrts.append(full_vrt_path)\n" + " full_vrt_path = f\"s3://{s3_bucket}/{o.key}\"\n", + " all_vrts.append(full_vrt_path)" ] }, { @@ -294,7 +306,7 @@ } ], "source": [ - "all_vrts['FLOOD_TYPE'].value_counts()" + "all_vrts[\"FLOOD_TYPE\"].value_counts()" ] }, { @@ -306,23 +318,23 @@ "all_vrts = dMisc.get_fathom_vrts(True)\n", "all_res = {}\n", "for idx, row in all_vrts.iterrows():\n", - " #vrt_path = row['PATH']\n", - " #xx = rasterio.open(vrt_path)\n", - " filename = os.path.basename(row['PATH'])\n", - " year = row['YEAR']\n", + " # vrt_path = row['PATH']\n", + " # xx = rasterio.open(vrt_path)\n", + " filename = os.path.basename(row[\"PATH\"])\n", + " year = row[\"YEAR\"]\n", " climate_model = row[\"CLIMATE_MODEL\"]\n", " if climate_model == \"PERCENTILE50\":\n", " climate_model = \"CURRENT\"\n", - " flood_type = row['FLOOD_TYPE'].lower()\n", - " defence = row['DEFENCE'].lower()\n", - " ret = row['RETURN']\n", + " flood_type = row[\"FLOOD_TYPE\"].lower()\n", + " defence = row[\"DEFENCE\"].lower()\n", + " ret = row[\"RETURN\"]\n", " label = \"_\".join([flood_type, defence, ret, climate_model, year])\n", - " ret=ret.replace(\"in\", \" in \")\n", - " if year == '2020':\n", + " ret = ret.replace(\"in\", \" in \")\n", + " if year == \"2020\":\n", " description = f\"Global {defence} {flood_type} flood model based on current climate. Flood depth is measured in cm expected flood depth, based on a {ret} year return period.\"\n", " else:\n", " description = f\"Global {defence} {flood_type} flood model based on {climate_model} climate model for year {year}. Flood depth is measured in cm expected flood depth, based on a {ret} year return period.\"\n", - " all_res[label] = {\"description\":description,'filename':row['PATH']}" + " all_res[label] = {\"description\": description, \"filename\": row[\"PATH\"]}" ] }, { @@ -1390,7 +1402,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open(\"fathom_file_descriptions.json\", 'w') as out_f:\n", + "with open(\"fathom_file_descriptions.json\", \"w\") as out_f:\n", " json.dump(all_res, out_f)" ] }, @@ -1418,8 +1430,11 @@ "source": [ "# Generate a list of files for gdalbuildvrt\n", "folder = \"/home/wb411133/temp/v2023/GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2020-PERCENTILE50-v3.0\"\n", - "all_files = [f'v2023/GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2020-PERCENTILE50-v3.0/{x}' for x in os.listdir(folder)]\n", - "all_files\n" + "all_files = [\n", + " f\"v2023/GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2020-PERCENTILE50-v3.0/{x}\"\n", + " for x in os.listdir(folder)\n", + "]\n", + "all_files" ] }, { diff --git a/notebooks/FATHOM/Vizualize_Flood_types.ipynb b/notebooks/FATHOM/Vizualize_Flood_types.ipynb index 07d1f94..5119921 100644 --- a/notebooks/FATHOM/Vizualize_Flood_types.ipynb +++ b/notebooks/FATHOM/Vizualize_Flood_types.ipynb @@ -19,10 +19,8 @@ "source": [ "import sys\n", "import os\n", - "import boto3\n", "import rasterio\n", "\n", - "import pandas as pd\n", "import geopandas as gpd\n", "\n", "sys.path.insert(0, \"../../src\")\n", @@ -44,11 +42,11 @@ "out_folder = f\"/home/wb411133/temp/FATHOM/{iso3}\"\n", "if not os.path.exists(out_folder):\n", " os.makedirs(out_folder)\n", - " \n", + "\n", "# This demo uses the default national boundaries included with GeoPandas, but this can be changed here\n", - "world_filepath = gpd.datasets.get_path('naturalearth_lowres')\n", + "world_filepath = gpd.datasets.get_path(\"naturalearth_lowres\")\n", "world = gpd.read_file(world_filepath)\n", - "inB = world.loc[world['iso_a3'] == iso3].copy()" + "inB = world.loc[world[\"iso_a3\"] == iso3].copy()" ] }, { @@ -59,17 +57,21 @@ "outputs": [], "source": [ "# Select layer to downlaod\n", - "flood_type = [\"COASTAL\",\"FLUVIAL\",\"PLUVIAL\"]\n", + "flood_type = [\"COASTAL\", \"FLUVIAL\", \"PLUVIAL\"]\n", "defence = [\"DEFENDED\"]\n", - "return_period = ['1in50']\n", + "return_period = [\"1in50\"]\n", "climate_model = [\"PERCENTILE50\"]\n", "year = [\"2020\"]\n", "\n", "# all_vrts is a pandas dataframe with all the vrt paths to the global datasets, with columns defining\n", "# the various models' defining attributes\n", "all_vrts = dMisc.get_fathom_vrts(True)\n", - "sel_images = all_vrts.loc[(all_vrts['FLOOD_TYPE'].isin(flood_type)) & (all_vrts['DEFENCE'].isin(defence)) & \n", - " (all_vrts['RETURN'].isin(return_period)) & (all_vrts['CLIMATE_MODEL'].isin(climate_model))]" + "sel_images = all_vrts.loc[\n", + " (all_vrts[\"FLOOD_TYPE\"].isin(flood_type))\n", + " & (all_vrts[\"DEFENCE\"].isin(defence))\n", + " & (all_vrts[\"RETURN\"].isin(return_period))\n", + " & (all_vrts[\"CLIMATE_MODEL\"].isin(climate_model))\n", + "]" ] }, { @@ -183,11 +185,10 @@ "source": [ "flood_data = {}\n", "for idx, row in sel_images.iterrows():\n", - " fluvialR = rasterio.open(row['PATH'])\n", + " fluvialR = rasterio.open(row[\"PATH\"])\n", " rData, profile = rMisc.clipRaster(fluvialR, inB)\n", - " flood_data[row['FLOOD_TYPE']] = [rData, profile]\n", - " tPrint(row['PATH'])\n", - " " + " flood_data[row[\"FLOOD_TYPE\"]] = [rData, profile]\n", + " tPrint(row[\"PATH\"])" ] }, { @@ -212,7 +213,7 @@ } ], "source": [ - "rData, profile = flood_data['FLUVIAL']\n", + "rData, profile = flood_data[\"FLUVIAL\"]\n", "with rMisc.create_rasterio_inmemory(profile, rData) as floodR:\n", " mapMisc.static_map_raster(\n", " floodR, colormap=\"Blues\", thresh=[0, 0.01, 0.1, 0.2, 0.5, 1, 5]\n", @@ -241,7 +242,7 @@ } ], "source": [ - "rData, profile = flood_data['PLUVIAL']\n", + "rData, profile = flood_data[\"PLUVIAL\"]\n", "with rMisc.create_rasterio_inmemory(profile, rData) as floodR:\n", " mapMisc.static_map_raster(\n", " floodR, colormap=\"Blues\", thresh=[0, 0.01, 0.1, 0.2, 0.5, 1, 5]\n", @@ -268,7 +269,7 @@ } ], "source": [ - "rData, profile = flood_data['COASTAL']\n", + "rData, profile = flood_data[\"COASTAL\"]\n", "with rMisc.create_rasterio_inmemory(profile, rData) as floodR:\n", " mapMisc.static_map_raster(\n", " floodR, colormap=\"Blues\", thresh=[0, 0.01, 0.1, 0.2, 0.5, 1, 5]\n", diff --git a/notebooks/Overture_Maps_Data_Access.ipynb b/notebooks/Overture_Maps_Data_Access.ipynb index 47a94bc..9bff57e 100644 --- a/notebooks/Overture_Maps_Data_Access.ipynb +++ b/notebooks/Overture_Maps_Data_Access.ipynb @@ -17,12 +17,9 @@ "outputs": [], "source": [ "import duckdb\n", - "from typing import Any\n", - "import json\n", "import geopandas as gpd\n", "\n", - "from osgeo import ogr\n", - "from shapely import wkb\n" + "from shapely import wkb" ] }, { @@ -40,13 +37,13 @@ ], "source": [ "con = duckdb.connect()\n", - "print(con.execute('SELECT 42').fetchall())\n", + "print(con.execute(\"SELECT 42\").fetchall())\n", "# To install an extension (this is usually done only once)\n", "try:\n", - " con.install_extension('httpfs')\n", - " con.install_extension('spatial')\n", - " con.load_extension('httpfs')\n", - " con.load_extension('spatial')\n", + " con.install_extension(\"httpfs\")\n", + " con.install_extension(\"spatial\")\n", + " con.load_extension(\"httpfs\")\n", + " con.load_extension(\"spatial\")\n", " con.execute(\"SET s3_region='us-west-2'\")\n", "except Exception as e:\n", " print(f\"Failed to install extension: {e}\")" @@ -59,7 +56,7 @@ "outputs": [], "source": [ "# Define the query to read from S3 and filter the data\n", - "query = '''\n", + "query = \"\"\"\n", "SELECT\n", " type,\n", " subType,\n", @@ -77,7 +74,7 @@ "WHERE adminLevel = 2\n", " AND ST_GeometryType(ST_GeomFromWkb(geometry)) IN ('POLYGON','MULTIPOLYGON')\n", "LIMIT 5\n", - "'''" + "\"\"\"" ] }, { @@ -129,9 +126,10 @@ " except Exception as e:\n", " print(f\"Failed to convert geometry: {e}\")\n", " return None\n", - " \n", - "df['geometry'] = df['geometry'].apply(try_wkb_loads)\n", - "gdf = gpd.GeoDataFrame(df, geometry='geometry', crs=4326)" + "\n", + "\n", + "df[\"geometry\"] = df[\"geometry\"].apply(try_wkb_loads)\n", + "gdf = gpd.GeoDataFrame(df, geometry=\"geometry\", crs=4326)" ] }, { @@ -158,7 +156,7 @@ "source": [ "import dask.dataframe as dd\n", "import geopandas as gpd\n", - "import dask_geopandas as dgpd \n", + "import dask_geopandas as dgpd\n", "\n", "from shapely.geometry import box" ] @@ -169,7 +167,7 @@ "metadata": {}, "outputs": [], "source": [ - "iso3 = 'KHM'" + "iso3 = \"KHM\"" ] }, { @@ -179,11 +177,11 @@ "outputs": [], "source": [ "df = dd.read_parquet(\n", - " 's3://overturemaps-us-west-2/release/2023-07-26-alpha.0/theme=places/type=place/*',\n", - " columns=['bbox', 'geometry'],\n", - " engine='pyarrow',\n", - " index='id',\n", - " dtype_backend='pyarrow',\n", + " \"s3://overturemaps-us-west-2/release/2023-07-26-alpha.0/theme=places/type=place/*\",\n", + " columns=[\"bbox\", \"geometry\"],\n", + " engine=\"pyarrow\",\n", + " index=\"id\",\n", + " dtype_backend=\"pyarrow\",\n", " storage_options={\"anon\": True},\n", " parquet_file_extensions=False,\n", ")" @@ -207,10 +205,10 @@ ], "source": [ "# Get extent of selected country as a bounding box polygon\n", - "world_filepath = gpd.datasets.get_path('naturalearth_lowres')\n", + "world_filepath = gpd.datasets.get_path(\"naturalearth_lowres\")\n", "world = gpd.read_file(world_filepath)\n", "\n", - "sel_country = world[world['iso_a3'] == iso3]\n", + "sel_country = world[world[\"iso_a3\"] == iso3]\n", "country_box = box(*sel_country.total_bounds)\n", "str(country_box)" ] @@ -244,7 +242,9 @@ } ], "source": [ - "geometry = df[\"geometry\"].map_partitions(gpd.GeoSeries.from_wkt, meta=gpd.GeoSeries(name=\"geometry\")) #.set_crs(4326)\n", + "geometry = df[\"geometry\"].map_partitions(\n", + " gpd.GeoSeries.from_wkt, meta=gpd.GeoSeries(name=\"geometry\")\n", + ") # .set_crs(4326)\n", "gdf = dgpd.from_dask_dataframe(df, geometry=geometry)\n", "\n", "clipped_gdf = gdf[gdf.geometry.within(country_box)]\n", diff --git a/src/GOSTrocks/dataMisc.py b/src/GOSTrocks/dataMisc.py index ed6aec2..36c213c 100644 --- a/src/GOSTrocks/dataMisc.py +++ b/src/GOSTrocks/dataMisc.py @@ -1,4 +1,4 @@ -import sys, os +import os import json import urllib import boto3 @@ -13,8 +13,11 @@ from . import rasterMisc as rMisc -def download_WSF(extent, wsf_url="https://download.geoservice.dlr.de/WSF2019/files/WSF2019_cog.tif", - out_file=""): +def download_WSF( + extent, + wsf_url="https://download.geoservice.dlr.de/WSF2019/files/WSF2019_cog.tif", + out_file="", +): """_summary_ Parameters @@ -29,9 +32,10 @@ def download_WSF(extent, wsf_url="https://download.geoservice.dlr.de/WSF2019/fil wsf_raster = rasterio.open(wsf_url) data, profile = rMisc.clipRaster(raster=wsf_raster, bounds=extent) if out_file != "": - with rasterio.open(out_file, 'w', **profile) as dst: + with rasterio.open(out_file, "w", **profile) as dst: dst.write(data) - return(data, profile) + return (data, profile) + def aws_search_ntl( bucket="globalnightlight", @@ -112,20 +116,33 @@ def get_geoboundaries( ) ) -def get_fathom_vrts(return_df = False): - """ Get a list of VRT files of Fathom data from the GOST S3 bucket. Note that the - VRT files are not searched dynamically but are stored in a text file in the same - folder as the function. - return_df: if True, return a pandas dataframe with the VRT files and their components, defaults to False which returns just the list of VRT files - """ - vrt_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fathom_vrts.txt") +def get_fathom_vrts(return_df=False): + """Get a list of VRT files of Fathom data from the GOST S3 bucket. Note that the + VRT files are not searched dynamically but are stored in a text file in the same + folder as the function. + + return_df: if True, return a pandas dataframe with the VRT files and their components, defaults to False which returns just the list of VRT files + """ + vrt_file = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "fathom_vrts.txt" + ) all_vrts = [] with open(vrt_file, "r") as f: for line in f: all_vrts.append(line.strip()) if return_df: - vrt_pd = pd.DataFrame([x.split("-")[4:10] for x in all_vrts], columns=['RETURN', 'FLOOD_TYPE', 'DEFENCE', 'DEPTH', 'YEAR', 'CLIMATE_MODEL']) - vrt_pd['PATH'] = all_vrts + vrt_pd = pd.DataFrame( + [x.split("-")[4:10] for x in all_vrts], + columns=[ + "RETURN", + "FLOOD_TYPE", + "DEFENCE", + "DEPTH", + "YEAR", + "CLIMATE_MODEL", + ], + ) + vrt_pd["PATH"] = all_vrts return vrt_pd - return all_vrts \ No newline at end of file + return all_vrts diff --git a/src/GOSTrocks/fathom_vrts.txt b/src/GOSTrocks/fathom_vrts.txt index 6429632..4dfe3b9 100644 --- a/src/GOSTrocks/fathom_vrts.txt +++ b/src/GOSTrocks/fathom_vrts.txt @@ -517,4 +517,4 @@ s3://wbg-geography01/FATHOM/GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEP s3://wbg-geography01/FATHOM/GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2080-SSP1_2.6-PERCENTILE50-v3.0.vrt s3://wbg-geography01/FATHOM/GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2080-SSP2_4.5-PERCENTILE50-v3.0.vrt s3://wbg-geography01/FATHOM/GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2080-SSP3_7.0-PERCENTILE50-v3.0.vrt -s3://wbg-geography01/FATHOM/GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2080-SSP5_8.5-PERCENTILE50-v3.0.vrt \ No newline at end of file +s3://wbg-geography01/FATHOM/GLOBAL-1ARCSEC-NW_OFFSET-1in500-PLUVIAL-DEFENDED-DEPTH-2080-SSP5_8.5-PERCENTILE50-v3.0.vrt diff --git a/src/GOSTrocks/infra/aggregator.py b/src/GOSTrocks/infra/aggregator.py index 7664317..202f5d5 100644 --- a/src/GOSTrocks/infra/aggregator.py +++ b/src/GOSTrocks/infra/aggregator.py @@ -1,6 +1,7 @@ """ The following module contains a number of functions to aggregate geospatial outputs into tables for InfraSAP analytics. """ + import geopandas as gpd import pandas as pd import rasterio as rio diff --git a/src/GOSTrocks/rasterMisc.py b/src/GOSTrocks/rasterMisc.py index 819441c..d587e2a 100644 --- a/src/GOSTrocks/rasterMisc.py +++ b/src/GOSTrocks/rasterMisc.py @@ -170,7 +170,7 @@ def clipRaster(inR, inD, outFile=None, crop=True): :rtype: array """ if isinstance(inR, str): - inR = rasterio.open(inR) + inR = rasterio.open(inR) if isinstance(inD, str): inD = gpd.read_file(inD) if inD.crs != inR.crs: