From 3e94569e5f61033dd6b67e161a70869798448234 Mon Sep 17 00:00:00 2001 From: rush42 Date: Tue, 21 Nov 2023 17:15:07 +0100 Subject: [PATCH 1/6] count category length per boundary areas --- app/analysis/bikelanes.sql | 74 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 app/analysis/bikelanes.sql diff --git a/app/analysis/bikelanes.sql b/app/analysis/bikelanes.sql new file mode 100644 index 00000000..a815295f --- /dev/null +++ b/app/analysis/bikelanes.sql @@ -0,0 +1,74 @@ +-- TODO: Wrap into table returning function and parametrize on `res`, `table` and `tag` +-- STEP 1: create a table containing the homogenously splitted lines as points +BEGIN; + +-- for each segment which is longer than `res` calculate `n` which is rounded `len` / `res` +CREATE temp TABLE _res ON COMMIT DROP AS +SELECT + *, + st_length(geom) AS len, + greatest(round(st_length(geom) / 100), 1) :: int AS n +FROM + bikelanes; + +-- split those segments into `n` pieces with equal length `len` where `len` = `res` +/- `res`/4 +CREATE temp TABLE _bikelanes_splitted AS +SELECT + osm_id, + tags, + len / n AS len, + ST_LineInterpolatePoint( + geom, + (generate_series(1, n) - 0.5) / n :: numeric + ) AS geom +FROM + _res; + +COMMIT; + +-- STEP 2: calculate the distribution of our `category` for each area of the `boundary` data set +-- +-- create a geospatial index on `_bikelanes_splitted` to speed up the spatial join; +CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanes_splitted USING gist (geom); + +-- make `osm_id` the primary of the `boundaries` table to allow selecting non-group-by columns +ALTER TABLE + boundaries DROP CONSTRAINT IF EXISTS osm_id_key; + +ALTER TABLE + boundaries +ADD + CONSTRAINT osm_id_key PRIMARY KEY (osm_id); + +-- spatialy join `_bikelanes_splitted` with `boundaries` then group by category and boundaries.osm_id aggreagate the results in a single json object per area +CREATE TABLE boundary_stats AS +SELECT + osm_id, + geom, + region, + jsonb_object_agg(category, len) +FROM + ( + SELECT + boundaries.osm_id AS osm_id, + boundaries.geom AS geom, + boundaries.tags ->> 'name' AS region, + _bikelanes_splitted.tags ->> 'category' AS category, + sum(_bikelanes_splitted.len) AS len + FROM + boundaries + JOIN _bikelanes_splitted ON ST_Intersects(boundaries.geom, _bikelanes_splitted.geom) + GROUP BY + boundaries.osm_id, + _bikelanes_splitted.tags ->> 'category' + ) AS sq +GROUP BY + osm_id, + geom, + region; + +DROP TABLE _bikelanes_splitted; + +-- this line is only for making pg_tileserve display the `_bikelanes_splitted` table (can be deleted later on) +SELECT + UpdateGeometrySRID('boundary_stats', 'geom', 3857); \ No newline at end of file From a41c6bb8042f1b920f23d0c34830b2f6a61a7ddc Mon Sep 17 00:00:00 2001 From: rush42 Date: Tue, 21 Nov 2023 18:32:22 +0100 Subject: [PATCH 2/6] define custom function for quantizing LineStrings --- app/analysis/bikelanes.sql | 91 ++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 48 deletions(-) diff --git a/app/analysis/bikelanes.sql b/app/analysis/bikelanes.sql index a815295f..f04421d1 100644 --- a/app/analysis/bikelanes.sql +++ b/app/analysis/bikelanes.sql @@ -1,44 +1,40 @@ --- TODO: Wrap into table returning function and parametrize on `res`, `table` and `tag` --- STEP 1: create a table containing the homogenously splitted lines as points -BEGIN; +-- STEP 1: create a function containing for "quantizing" LineStrings into homogenous parts of length `res` +/- 25% (Note: segments shorter than `res` keep their length) +CREATE OR REPLACE FUNCTION QuantizeLineString(ls Geometry(LineString), res numeric) + RETURNS TABLE( + len numeric, + geom Geometry(point) + ) + AS $$ +DECLARE + len numeric := st_length(ls); + n int := greatest(round(len / res), 1)::int; +BEGIN + RETURN query + SELECT + len / n, + ST_LineInterpolatePoint(ls,(generate_series(1, n) - 0.5) / n::numeric) AS geom; +END; +$$ +LANGUAGE plpgsql; --- for each segment which is longer than `res` calculate `n` which is rounded `len` / `res` -CREATE temp TABLE _res ON COMMIT DROP AS -SELECT - *, - st_length(geom) AS len, - greatest(round(st_length(geom) / 100), 1) :: int AS n -FROM - bikelanes; - --- split those segments into `n` pieces with equal length `len` where `len` = `res` +/- `res`/4 +-- create a Table conatining osm_id and tags for all quantized bikelanes geometries CREATE temp TABLE _bikelanes_splitted AS SELECT osm_id, tags, - len / n AS len, - ST_LineInterpolatePoint( - geom, - (generate_series(1, n) - 0.5) / n :: numeric - ) AS geom +(QuantizeLineString(geom, 100)).* AS geom FROM - _res; - -COMMIT; + bikelanes + -- create a geospatial index on `_bikelanes_splitted` to speed up the spatial join; + CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanes_splitted USING gist(geom); -- STEP 2: calculate the distribution of our `category` for each area of the `boundary` data set --- --- create a geospatial index on `_bikelanes_splitted` to speed up the spatial join; -CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanes_splitted USING gist (geom); - -- make `osm_id` the primary of the `boundaries` table to allow selecting non-group-by columns -ALTER TABLE - boundaries DROP CONSTRAINT IF EXISTS osm_id_key; +ALTER TABLE boundaries + DROP CONSTRAINT IF EXISTS osm_id_key; -ALTER TABLE - boundaries -ADD - CONSTRAINT osm_id_key PRIMARY KEY (osm_id); +ALTER TABLE boundaries + ADD CONSTRAINT osm_id_key PRIMARY KEY (osm_id); -- spatialy join `_bikelanes_splitted` with `boundaries` then group by category and boundaries.osm_id aggreagate the results in a single json object per area CREATE TABLE boundary_stats AS @@ -47,21 +43,19 @@ SELECT geom, region, jsonb_object_agg(category, len) -FROM - ( - SELECT - boundaries.osm_id AS osm_id, - boundaries.geom AS geom, - boundaries.tags ->> 'name' AS region, - _bikelanes_splitted.tags ->> 'category' AS category, - sum(_bikelanes_splitted.len) AS len - FROM - boundaries - JOIN _bikelanes_splitted ON ST_Intersects(boundaries.geom, _bikelanes_splitted.geom) - GROUP BY - boundaries.osm_id, - _bikelanes_splitted.tags ->> 'category' - ) AS sq +FROM ( + SELECT + boundaries.osm_id AS osm_id, + boundaries.geom AS geom, + boundaries.tags ->> 'name' AS region, + _bikelanes_splitted.tags ->> 'category' AS category, + sum(_bikelanes_splitted.len) AS len + FROM + boundaries + JOIN _bikelanes_splitted ON ST_Intersects(boundaries.geom, _bikelanes_splitted.geom) + GROUP BY + boundaries.osm_id, + _bikelanes_splitted.tags ->> 'category') AS sq GROUP BY osm_id, geom, @@ -69,6 +63,7 @@ GROUP BY DROP TABLE _bikelanes_splitted; --- this line is only for making pg_tileserve display the `_bikelanes_splitted` table (can be deleted later on) +-- this line is only for making pg_tileserve display the table SELECT - UpdateGeometrySRID('boundary_stats', 'geom', 3857); \ No newline at end of file + UpdateGeometrySRID('boundary_stats', 'geom', 3857); + From 9805b959c4d17e95f057489380229eb7fe6750e8 Mon Sep 17 00:00:00 2001 From: rush42 Date: Thu, 23 Nov 2023 21:33:41 +0100 Subject: [PATCH 3/6] enable pgFormater extension --- .vscode/extensions.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 99f00cbb..b31d8c04 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -1,3 +1,6 @@ { - "recommendations": ["sumneko.lua"] + "recommendations": [ + "sumneko.lua", + "adpyke.vscode-sql-formatter" + ] } From c5bb0714edecc736eebffabd6c1269dea19f8072 Mon Sep 17 00:00:00 2001 From: rush42 Date: Mon, 27 Nov 2023 11:05:41 +0100 Subject: [PATCH 4/6] introduce filter on `admin_level` prefix functions with `atlas_` rename column `stats` -> `bikelanes_category` to reference source data --- app/analysis/bikelanes.sql | 62 ++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/app/analysis/bikelanes.sql b/app/analysis/bikelanes.sql index f04421d1..dcc330ea 100644 --- a/app/analysis/bikelanes.sql +++ b/app/analysis/bikelanes.sql @@ -1,5 +1,5 @@ --- STEP 1: create a function containing for "quantizing" LineStrings into homogenous parts of length `res` +/- 25% (Note: segments shorter than `res` keep their length) -CREATE OR REPLACE FUNCTION QuantizeLineString(ls Geometry(LineString), res numeric) +-- this function quantizes `LineStrings` into `Points` where each point represents a homogenous segment each of length `res` +/- 25% (Note: segments shorter than `res` keep their length) +CREATE OR REPLACE FUNCTION atlas_QuantizeLineString(ls Geometry(LineString), res numeric) RETURNS TABLE( len numeric, geom Geometry(point) @@ -17,16 +17,18 @@ END; $$ LANGUAGE plpgsql; --- create a Table conatining osm_id and tags for all quantized bikelanes geometries -CREATE temp TABLE _bikelanes_splitted AS + +-- STEP 1: quantize bikelane data into points +CREATE temp TABLE _bikelanesQuantized AS SELECT osm_id, tags, -(QuantizeLineString(geom, 100)).* AS geom +(atlas_QuantizeLineString(geom, 100)).* AS geom FROM - bikelanes - -- create a geospatial index on `_bikelanes_splitted` to speed up the spatial join; - CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanes_splitted USING gist(geom); + bikelanes; + +-- create a geospatial index on `_bikelanesQuantized` to speed up the spatial join; +CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanesQuantized USING gist(geom); -- STEP 2: calculate the distribution of our `category` for each area of the `boundary` data set -- make `osm_id` the primary of the `boundaries` table to allow selecting non-group-by columns @@ -36,34 +38,56 @@ ALTER TABLE boundaries ALTER TABLE boundaries ADD CONSTRAINT osm_id_key PRIMARY KEY (osm_id); --- spatialy join `_bikelanes_splitted` with `boundaries` then group by category and boundaries.osm_id aggreagate the results in a single json object per area -CREATE TABLE boundary_stats AS +-- spatialy join `_bikelanesQuantized` with `boundaries` then group by category and boundaries.osm_id aggreagate the results in a single json object per area +DROP TABLE IF EXISTS "boundaryStats"; + +CREATE TABLE "boundaryStats" AS SELECT osm_id, geom, region, - jsonb_object_agg(category, len) + jsonb_object_agg(category, len) AS bikelanes_category FROM ( SELECT boundaries.osm_id AS osm_id, boundaries.geom AS geom, boundaries.tags ->> 'name' AS region, - _bikelanes_splitted.tags ->> 'category' AS category, - sum(_bikelanes_splitted.len) AS len + _bikelanesQuantized.tags ->> 'category' AS category, + sum(_bikelanesQuantized.len) AS len FROM boundaries - JOIN _bikelanes_splitted ON ST_Intersects(boundaries.geom, _bikelanes_splitted.geom) - GROUP BY - boundaries.osm_id, - _bikelanes_splitted.tags ->> 'category') AS sq + JOIN _bikelanesQuantized ON ST_Intersects(boundaries.geom, _bikelanesQuantized.geom) + WHERE (boundaries.tags ->> 'admin_level')::int IN (4, 6, 7, 8) +GROUP BY + boundaries.osm_id, + _bikelanesQuantized.tags ->> 'category') AS sq GROUP BY osm_id, geom, region; -DROP TABLE _bikelanes_splitted; +DROP TABLE _bikelanesQuantized; -- this line is only for making pg_tileserve display the table SELECT - UpdateGeometrySRID('boundary_stats', 'geom', 3857); + UpdateGeometrySRID('boundaryStats', 'geom', 3857); + + +-- -- for `jsonb` with numeric values return a normalized object (all values sum to 100) which also includes the total value +-- CREATE OR REPLACE FUNCTION atlas_NormalizeDistribution(dist jsonb) +-- RETURNS jsonb +-- AS $$ +-- DECLARE +-- total numeric := sum(value) +-- FROM ( +-- SELECT +-- (jsonb_each(dist)).value::numeric) AS sq; +-- BEGIN +-- RETURN jsonb_object_agg(key, round((100 * value::numeric) / total, 2)) || jsonb_build_object('total', total) +-- FROM ( +-- SELECT +-- (jsonb_each(dist)).*) AS sq; +-- END; +-- $$ +-- LANGUAGE plpgsql; \ No newline at end of file From 7581d83ee89773c7e35103f2f004f8e8d617b6bf Mon Sep 17 00:00:00 2001 From: rush42 Date: Tue, 28 Nov 2023 10:48:06 +0100 Subject: [PATCH 5/6] add run-5-analysis.sh and rename analysis file to `boundaryStats` --- .../{bikelanes.sql => boundaryStats.sql} | 0 app/run-5-analysis.sh | 8 ++++++++ ...run-5-postprocess.sh => run-6-postprocess.sh} | 0 app/{run-6-api.sh => run-7-api.sh} | 0 app/{run-7-metadata.sh => run-8-metadata.sh} | 0 app/run.sh | 16 ++++++++++------ 6 files changed, 18 insertions(+), 6 deletions(-) rename app/analysis/{bikelanes.sql => boundaryStats.sql} (100%) create mode 100644 app/run-5-analysis.sh rename app/{run-5-postprocess.sh => run-6-postprocess.sh} (100%) rename app/{run-6-api.sh => run-7-api.sh} (100%) rename app/{run-7-metadata.sh => run-8-metadata.sh} (100%) diff --git a/app/analysis/bikelanes.sql b/app/analysis/boundaryStats.sql similarity index 100% rename from app/analysis/bikelanes.sql rename to app/analysis/boundaryStats.sql diff --git a/app/run-5-analysis.sh b/app/run-5-analysis.sh new file mode 100644 index 00000000..8ac04ce3 --- /dev/null +++ b/app/run-5-analysis.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -e + +echo -e "\e[1m\e[7m Analysis – START \e[27m\e[21m\e[0m" + +psql -q -f "./analysis/boundaryStats.sql" + +echo -e "\e[1m\e[7m Analysis – END \e[27m\e[21m\e[0m" \ No newline at end of file diff --git a/app/run-5-postprocess.sh b/app/run-6-postprocess.sh similarity index 100% rename from app/run-5-postprocess.sh rename to app/run-6-postprocess.sh diff --git a/app/run-6-api.sh b/app/run-7-api.sh similarity index 100% rename from app/run-6-api.sh rename to app/run-7-api.sh diff --git a/app/run-7-metadata.sh b/app/run-8-metadata.sh similarity index 100% rename from app/run-7-metadata.sh rename to app/run-8-metadata.sh diff --git a/app/run.sh b/app/run.sh index ead19969..a4390df3 100755 --- a/app/run.sh +++ b/app/run.sh @@ -45,14 +45,18 @@ fi process_end_time=$(date +%s) export PROCESS_RUN_TIME_DIFF=$((process_end_time - process_start_time)) # used by metadata.sh -if ! ./run-5-postprocess.sh; then - alert '*ERROR*: #run-5-postprocess exited with non-zero status code' +if ! ./run-5-analysis.sh; then + alert '*ERROR*: #run-5-analysis exited with non-zero status code' fi -if ! ./run-6-api.sh; then - alert '*ERROR*: #run-6-api exited with non-zero status code' +if ! ./run-6-postprocess.sh; then + alert '*ERROR*: #run-6-postprocess exited with non-zero status code' fi -if ! ./run-7-metadata.sh; then - alert '*ERROR*: #run-7-metadata exited with non-zero status code' +if ! ./run-7-api.sh; then + alert '*ERROR*: #run-7-api exited with non-zero status code' +fi + +if ! ./run-8-metadata.sh; then + alert '*ERROR*: #run-8-metadata exited with non-zero status code' fi From 65bd90c65c6efffbb1589833cb333f0f8484eb59 Mon Sep 17 00:00:00 2001 From: rush42 Date: Tue, 28 Nov 2023 11:06:53 +0100 Subject: [PATCH 6/6] transform to `EPSG:25833` to calculate length in km --- app/analysis/boundaryStats.sql | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/app/analysis/boundaryStats.sql b/app/analysis/boundaryStats.sql index dcc330ea..2cc0c6c8 100644 --- a/app/analysis/boundaryStats.sql +++ b/app/analysis/boundaryStats.sql @@ -6,7 +6,7 @@ CREATE OR REPLACE FUNCTION atlas_QuantizeLineString(ls Geometry(LineString), res ) AS $$ DECLARE - len numeric := st_length(ls); + len numeric := st_length(ST_Transform(ls, 25833)); n int := greatest(round(len / res), 1)::int; BEGIN RETURN query @@ -17,7 +17,6 @@ END; $$ LANGUAGE plpgsql; - -- STEP 1: quantize bikelane data into points CREATE temp TABLE _bikelanesQuantized AS SELECT @@ -46,14 +45,14 @@ SELECT osm_id, geom, region, - jsonb_object_agg(category, len) AS bikelanes_category + jsonb_object_agg(CONCAT(category, '_km'), len) AS bikelanes_category FROM ( SELECT boundaries.osm_id AS osm_id, boundaries.geom AS geom, boundaries.tags ->> 'name' AS region, _bikelanesQuantized.tags ->> 'category' AS category, - sum(_bikelanesQuantized.len) AS len + round(sum(_bikelanesQuantized.len) / 1000, 1) AS len FROM boundaries JOIN _bikelanesQuantized ON ST_Intersects(boundaries.geom, _bikelanesQuantized.geom) @@ -72,8 +71,6 @@ DROP TABLE _bikelanesQuantized; SELECT UpdateGeometrySRID('boundaryStats', 'geom', 3857); - - -- -- for `jsonb` with numeric values return a normalized object (all values sum to 100) which also includes the total value -- CREATE OR REPLACE FUNCTION atlas_NormalizeDistribution(dist jsonb) -- RETURNS jsonb @@ -90,4 +87,4 @@ SELECT -- (jsonb_each(dist)).*) AS sq; -- END; -- $$ --- LANGUAGE plpgsql; \ No newline at end of file +-- LANGUAGE plpgsql;