From 3e94569e5f61033dd6b67e161a70869798448234 Mon Sep 17 00:00:00 2001
From: rush42 <laurenz.rasche@gmail.com>
Date: Tue, 21 Nov 2023 17:15:07 +0100
Subject: [PATCH 1/6] count category length per boundary areas

---
 app/analysis/bikelanes.sql | 74 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 app/analysis/bikelanes.sql

diff --git a/app/analysis/bikelanes.sql b/app/analysis/bikelanes.sql
new file mode 100644
index 00000000..a815295f
--- /dev/null
+++ b/app/analysis/bikelanes.sql
@@ -0,0 +1,74 @@
+-- TODO: Wrap into table returning function and parametrize on `res`, `table` and `tag`
+-- STEP 1: create a table containing the homogenously splitted lines as points
+BEGIN;
+
+-- for each segment which is longer than `res` calculate `n` which is rounded `len` / `res` 
+CREATE temp TABLE _res ON COMMIT DROP AS
+SELECT
+    *,
+    st_length(geom) AS len,
+    greatest(round(st_length(geom) / 100), 1) :: int AS n
+FROM
+    bikelanes;
+
+-- split those segments into `n` pieces with equal length `len` where `len` = `res` +/- `res`/4
+CREATE temp TABLE _bikelanes_splitted AS
+SELECT
+    osm_id,
+    tags,
+    len / n AS len,
+    ST_LineInterpolatePoint(
+        geom,
+        (generate_series(1, n) - 0.5) / n :: numeric
+    ) AS geom
+FROM
+    _res;
+
+COMMIT;
+
+-- STEP 2: calculate the distribution of our `category` for each area of the `boundary` data set
+-- 
+-- create a geospatial index on `_bikelanes_splitted` to speed up the spatial join;
+CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanes_splitted USING gist (geom);
+
+-- make `osm_id` the primary of the `boundaries` table to allow selecting non-group-by columns
+ALTER TABLE
+    boundaries DROP CONSTRAINT IF EXISTS osm_id_key;
+
+ALTER TABLE
+    boundaries
+ADD
+    CONSTRAINT osm_id_key PRIMARY KEY (osm_id);
+
+-- spatialy join `_bikelanes_splitted` with `boundaries` then group by category and boundaries.osm_id aggreagate the results in a single json object per area
+CREATE TABLE boundary_stats AS
+SELECT
+    osm_id,
+    geom,
+    region,
+    jsonb_object_agg(category, len)
+FROM
+    (
+        SELECT
+            boundaries.osm_id AS osm_id,
+            boundaries.geom AS geom,
+            boundaries.tags ->> 'name' AS region,
+            _bikelanes_splitted.tags ->> 'category' AS category,
+            sum(_bikelanes_splitted.len) AS len
+        FROM
+            boundaries
+            JOIN _bikelanes_splitted ON ST_Intersects(boundaries.geom, _bikelanes_splitted.geom)
+        GROUP BY
+            boundaries.osm_id,
+            _bikelanes_splitted.tags ->> 'category'
+    ) AS sq
+GROUP BY
+    osm_id,
+    geom,
+    region;
+
+DROP TABLE _bikelanes_splitted;
+
+-- this line is only for making pg_tileserve display the `_bikelanes_splitted` table (can be deleted later on)
+SELECT
+    UpdateGeometrySRID('boundary_stats', 'geom', 3857);
\ No newline at end of file

From a41c6bb8042f1b920f23d0c34830b2f6a61a7ddc Mon Sep 17 00:00:00 2001
From: rush42 <laurenz.rasche@gmail.com>
Date: Tue, 21 Nov 2023 18:32:22 +0100
Subject: [PATCH 2/6] define custom function for quantizing LineStrings

---
 app/analysis/bikelanes.sql | 91 ++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 48 deletions(-)

diff --git a/app/analysis/bikelanes.sql b/app/analysis/bikelanes.sql
index a815295f..f04421d1 100644
--- a/app/analysis/bikelanes.sql
+++ b/app/analysis/bikelanes.sql
@@ -1,44 +1,40 @@
--- TODO: Wrap into table returning function and parametrize on `res`, `table` and `tag`
--- STEP 1: create a table containing the homogenously splitted lines as points
-BEGIN;
+-- STEP 1: create a function containing for "quantizing" LineStrings into homogenous parts of length  `res` +/- 25% (Note: segments shorter than `res` keep their length)
+CREATE OR REPLACE FUNCTION QuantizeLineString(ls Geometry(LineString), res numeric)
+    RETURNS TABLE(
+        len numeric,
+        geom Geometry(point)
+    )
+    AS $$
+DECLARE
+    len numeric := st_length(ls);
+    n int := greatest(round(len / res), 1)::int;
+BEGIN
+    RETURN query
+    SELECT
+        len / n,
+        ST_LineInterpolatePoint(ls,(generate_series(1, n) - 0.5) / n::numeric) AS geom;
+END;
+$$
+LANGUAGE plpgsql;
 
--- for each segment which is longer than `res` calculate `n` which is rounded `len` / `res` 
-CREATE temp TABLE _res ON COMMIT DROP AS
-SELECT
-    *,
-    st_length(geom) AS len,
-    greatest(round(st_length(geom) / 100), 1) :: int AS n
-FROM
-    bikelanes;
-
--- split those segments into `n` pieces with equal length `len` where `len` = `res` +/- `res`/4
+-- create a Table conatining osm_id and tags for all quantized bikelanes geometries
 CREATE temp TABLE _bikelanes_splitted AS
 SELECT
     osm_id,
     tags,
-    len / n AS len,
-    ST_LineInterpolatePoint(
-        geom,
-        (generate_series(1, n) - 0.5) / n :: numeric
-    ) AS geom
+(QuantizeLineString(geom, 100)).* AS geom
 FROM
-    _res;
-
-COMMIT;
+    bikelanes
+    -- create a geospatial index on `_bikelanes_splitted` to speed up the spatial join;
+    CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanes_splitted USING gist(geom);
 
 -- STEP 2: calculate the distribution of our `category` for each area of the `boundary` data set
--- 
--- create a geospatial index on `_bikelanes_splitted` to speed up the spatial join;
-CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanes_splitted USING gist (geom);
-
 -- make `osm_id` the primary of the `boundaries` table to allow selecting non-group-by columns
-ALTER TABLE
-    boundaries DROP CONSTRAINT IF EXISTS osm_id_key;
+ALTER TABLE boundaries
+    DROP CONSTRAINT IF EXISTS osm_id_key;
 
-ALTER TABLE
-    boundaries
-ADD
-    CONSTRAINT osm_id_key PRIMARY KEY (osm_id);
+ALTER TABLE boundaries
+    ADD CONSTRAINT osm_id_key PRIMARY KEY (osm_id);
 
 -- spatialy join `_bikelanes_splitted` with `boundaries` then group by category and boundaries.osm_id aggreagate the results in a single json object per area
 CREATE TABLE boundary_stats AS
@@ -47,21 +43,19 @@ SELECT
     geom,
     region,
     jsonb_object_agg(category, len)
-FROM
-    (
-        SELECT
-            boundaries.osm_id AS osm_id,
-            boundaries.geom AS geom,
-            boundaries.tags ->> 'name' AS region,
-            _bikelanes_splitted.tags ->> 'category' AS category,
-            sum(_bikelanes_splitted.len) AS len
-        FROM
-            boundaries
-            JOIN _bikelanes_splitted ON ST_Intersects(boundaries.geom, _bikelanes_splitted.geom)
-        GROUP BY
-            boundaries.osm_id,
-            _bikelanes_splitted.tags ->> 'category'
-    ) AS sq
+FROM (
+    SELECT
+        boundaries.osm_id AS osm_id,
+        boundaries.geom AS geom,
+        boundaries.tags ->> 'name' AS region,
+        _bikelanes_splitted.tags ->> 'category' AS category,
+        sum(_bikelanes_splitted.len) AS len
+    FROM
+        boundaries
+        JOIN _bikelanes_splitted ON ST_Intersects(boundaries.geom, _bikelanes_splitted.geom)
+    GROUP BY
+        boundaries.osm_id,
+        _bikelanes_splitted.tags ->> 'category') AS sq
 GROUP BY
     osm_id,
     geom,
@@ -69,6 +63,7 @@ GROUP BY
 
 DROP TABLE _bikelanes_splitted;
 
--- this line is only for making pg_tileserve display the `_bikelanes_splitted` table (can be deleted later on)
+-- this line is only for making pg_tileserve display the table
 SELECT
-    UpdateGeometrySRID('boundary_stats', 'geom', 3857);
\ No newline at end of file
+    UpdateGeometrySRID('boundary_stats', 'geom', 3857);
+

From 9805b959c4d17e95f057489380229eb7fe6750e8 Mon Sep 17 00:00:00 2001
From: rush42 <laurenz.rasche@gmail.com>
Date: Thu, 23 Nov 2023 21:33:41 +0100
Subject: [PATCH 3/6] enable pgFormater extension

---
 .vscode/extensions.json | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.vscode/extensions.json b/.vscode/extensions.json
index 99f00cbb..b31d8c04 100644
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -1,3 +1,6 @@
 {
-  "recommendations": ["sumneko.lua"]
+  "recommendations": [
+    "sumneko.lua",
+    "adpyke.vscode-sql-formatter"
+  ]
 }

From c5bb0714edecc736eebffabd6c1269dea19f8072 Mon Sep 17 00:00:00 2001
From: rush42 <laurenz.rasche@gmail.com>
Date: Mon, 27 Nov 2023 11:05:41 +0100
Subject: [PATCH 4/6] introduce filter on `admin_level`

prefix functions with `atlas_`

rename column `stats` -> `bikelanes_category` to reference source data
---
 app/analysis/bikelanes.sql | 62 ++++++++++++++++++++++++++------------
 1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/app/analysis/bikelanes.sql b/app/analysis/bikelanes.sql
index f04421d1..dcc330ea 100644
--- a/app/analysis/bikelanes.sql
+++ b/app/analysis/bikelanes.sql
@@ -1,5 +1,5 @@
--- STEP 1: create a function containing for "quantizing" LineStrings into homogenous parts of length  `res` +/- 25% (Note: segments shorter than `res` keep their length)
-CREATE OR REPLACE FUNCTION QuantizeLineString(ls Geometry(LineString), res numeric)
+-- this function quantizes `LineStrings` into `Points` where each point represents a homogenous segment each of length  `res` +/- 25% (Note: segments shorter than `res` keep their length)
+CREATE OR REPLACE FUNCTION atlas_QuantizeLineString(ls Geometry(LineString), res numeric)
     RETURNS TABLE(
         len numeric,
         geom Geometry(point)
@@ -17,16 +17,18 @@ END;
 $$
 LANGUAGE plpgsql;
 
--- create a Table conatining osm_id and tags for all quantized bikelanes geometries
-CREATE temp TABLE _bikelanes_splitted AS
+
+-- STEP 1: quantize bikelane data into points
+CREATE temp TABLE _bikelanesQuantized AS
 SELECT
     osm_id,
     tags,
-(QuantizeLineString(geom, 100)).* AS geom
+(atlas_QuantizeLineString(geom, 100)).* AS geom
 FROM
-    bikelanes
-    -- create a geospatial index on `_bikelanes_splitted` to speed up the spatial join;
-    CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanes_splitted USING gist(geom);
+    bikelanes;
+
+-- create a geospatial index on `_bikelanesQuantized` to speed up the spatial join;
+CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanesQuantized USING gist(geom);
 
 -- STEP 2: calculate the distribution of our `category` for each area of the `boundary` data set
 -- make `osm_id` the primary of the `boundaries` table to allow selecting non-group-by columns
@@ -36,34 +38,56 @@ ALTER TABLE boundaries
 ALTER TABLE boundaries
     ADD CONSTRAINT osm_id_key PRIMARY KEY (osm_id);
 
--- spatialy join `_bikelanes_splitted` with `boundaries` then group by category and boundaries.osm_id aggreagate the results in a single json object per area
-CREATE TABLE boundary_stats AS
+-- spatialy join `_bikelanesQuantized` with `boundaries` then group by category and boundaries.osm_id aggreagate the results in a single json object per area
+DROP TABLE IF EXISTS "boundaryStats";
+
+CREATE TABLE "boundaryStats" AS
 SELECT
     osm_id,
     geom,
     region,
-    jsonb_object_agg(category, len)
+    jsonb_object_agg(category, len) AS bikelanes_category
 FROM (
     SELECT
         boundaries.osm_id AS osm_id,
         boundaries.geom AS geom,
         boundaries.tags ->> 'name' AS region,
-        _bikelanes_splitted.tags ->> 'category' AS category,
-        sum(_bikelanes_splitted.len) AS len
+        _bikelanesQuantized.tags ->> 'category' AS category,
+        sum(_bikelanesQuantized.len) AS len
     FROM
         boundaries
-        JOIN _bikelanes_splitted ON ST_Intersects(boundaries.geom, _bikelanes_splitted.geom)
-    GROUP BY
-        boundaries.osm_id,
-        _bikelanes_splitted.tags ->> 'category') AS sq
+        JOIN _bikelanesQuantized ON ST_Intersects(boundaries.geom, _bikelanesQuantized.geom)
+    WHERE (boundaries.tags ->> 'admin_level')::int IN (4, 6, 7, 8)
+GROUP BY
+    boundaries.osm_id,
+    _bikelanesQuantized.tags ->> 'category') AS sq
 GROUP BY
     osm_id,
     geom,
     region;
 
-DROP TABLE _bikelanes_splitted;
+DROP TABLE _bikelanesQuantized;
 
 -- this line is only for making pg_tileserve display the table
 SELECT
-    UpdateGeometrySRID('boundary_stats', 'geom', 3857);
+    UpdateGeometrySRID('boundaryStats', 'geom', 3857);
+
+
 
+-- -- for `jsonb` with numeric values return a normalized object (all values sum to 100) which also includes the total value
+-- CREATE OR REPLACE FUNCTION atlas_NormalizeDistribution(dist jsonb)
+--     RETURNS jsonb
+--     AS $$
+-- DECLARE
+--     total numeric := sum(value)
+-- FROM (
+--     SELECT
+--         (jsonb_each(dist)).value::numeric) AS sq;
+-- BEGIN
+--     RETURN jsonb_object_agg(key, round((100 * value::numeric) / total, 2)) || jsonb_build_object('total', total)
+-- FROM (
+--     SELECT
+--         (jsonb_each(dist)).*) AS sq;
+-- END;
+-- $$
+-- LANGUAGE plpgsql;
\ No newline at end of file

From 7581d83ee89773c7e35103f2f004f8e8d617b6bf Mon Sep 17 00:00:00 2001
From: rush42 <laurenz.rasche@gmail.com>
Date: Tue, 28 Nov 2023 10:48:06 +0100
Subject: [PATCH 5/6] add run-5-analysis.sh and rename analysis file to
 `boundaryStats`

---
 .../{bikelanes.sql => boundaryStats.sql}         |  0
 app/run-5-analysis.sh                            |  8 ++++++++
 ...run-5-postprocess.sh => run-6-postprocess.sh} |  0
 app/{run-6-api.sh => run-7-api.sh}               |  0
 app/{run-7-metadata.sh => run-8-metadata.sh}     |  0
 app/run.sh                                       | 16 ++++++++++------
 6 files changed, 18 insertions(+), 6 deletions(-)
 rename app/analysis/{bikelanes.sql => boundaryStats.sql} (100%)
 create mode 100644 app/run-5-analysis.sh
 rename app/{run-5-postprocess.sh => run-6-postprocess.sh} (100%)
 rename app/{run-6-api.sh => run-7-api.sh} (100%)
 rename app/{run-7-metadata.sh => run-8-metadata.sh} (100%)

diff --git a/app/analysis/bikelanes.sql b/app/analysis/boundaryStats.sql
similarity index 100%
rename from app/analysis/bikelanes.sql
rename to app/analysis/boundaryStats.sql
diff --git a/app/run-5-analysis.sh b/app/run-5-analysis.sh
new file mode 100644
index 00000000..8ac04ce3
--- /dev/null
+++ b/app/run-5-analysis.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -e
+
+echo -e "\e[1m\e[7m Analysis – START \e[27m\e[21m\e[0m"
+
+psql -q -f "./analysis/boundaryStats.sql"
+
+echo -e "\e[1m\e[7m Analysis – END \e[27m\e[21m\e[0m"
\ No newline at end of file
diff --git a/app/run-5-postprocess.sh b/app/run-6-postprocess.sh
similarity index 100%
rename from app/run-5-postprocess.sh
rename to app/run-6-postprocess.sh
diff --git a/app/run-6-api.sh b/app/run-7-api.sh
similarity index 100%
rename from app/run-6-api.sh
rename to app/run-7-api.sh
diff --git a/app/run-7-metadata.sh b/app/run-8-metadata.sh
similarity index 100%
rename from app/run-7-metadata.sh
rename to app/run-8-metadata.sh
diff --git a/app/run.sh b/app/run.sh
index ead19969..a4390df3 100755
--- a/app/run.sh
+++ b/app/run.sh
@@ -45,14 +45,18 @@ fi
 process_end_time=$(date +%s)
 export PROCESS_RUN_TIME_DIFF=$((process_end_time - process_start_time)) # used by metadata.sh
 
-if ! ./run-5-postprocess.sh; then
-    alert '*ERROR*: #run-5-postprocess exited with non-zero status code'
+if ! ./run-5-analysis.sh; then
+    alert '*ERROR*: #run-5-analysis exited with non-zero status code'
 fi
 
-if ! ./run-6-api.sh; then
-    alert '*ERROR*: #run-6-api exited with non-zero status code'
+if ! ./run-6-postprocess.sh; then
+    alert '*ERROR*: #run-6-postprocess exited with non-zero status code'
 fi
 
-if ! ./run-7-metadata.sh; then
-    alert '*ERROR*: #run-7-metadata exited with non-zero status code'
+if ! ./run-7-api.sh; then
+    alert '*ERROR*: #run-7-api exited with non-zero status code'
+fi
+
+if ! ./run-8-metadata.sh; then
+    alert '*ERROR*: #run-8-metadata exited with non-zero status code'
 fi

From 65bd90c65c6efffbb1589833cb333f0f8484eb59 Mon Sep 17 00:00:00 2001
From: rush42 <laurenz.rasche@gmail.com>
Date: Tue, 28 Nov 2023 11:06:53 +0100
Subject: [PATCH 6/6] transform to `EPSG:25833` to calculate length in km

---
 app/analysis/boundaryStats.sql | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/app/analysis/boundaryStats.sql b/app/analysis/boundaryStats.sql
index dcc330ea..2cc0c6c8 100644
--- a/app/analysis/boundaryStats.sql
+++ b/app/analysis/boundaryStats.sql
@@ -6,7 +6,7 @@ CREATE OR REPLACE FUNCTION atlas_QuantizeLineString(ls Geometry(LineString), res
     )
     AS $$
 DECLARE
-    len numeric := st_length(ls);
+    len numeric := st_length(ST_Transform(ls, 25833));
     n int := greatest(round(len / res), 1)::int;
 BEGIN
     RETURN query
@@ -17,7 +17,6 @@ END;
 $$
 LANGUAGE plpgsql;
 
-
 -- STEP 1: quantize bikelane data into points
 CREATE temp TABLE _bikelanesQuantized AS
 SELECT
@@ -46,14 +45,14 @@ SELECT
     osm_id,
     geom,
     region,
-    jsonb_object_agg(category, len) AS bikelanes_category
+    jsonb_object_agg(CONCAT(category, '_km'), len) AS bikelanes_category
 FROM (
     SELECT
         boundaries.osm_id AS osm_id,
         boundaries.geom AS geom,
         boundaries.tags ->> 'name' AS region,
         _bikelanesQuantized.tags ->> 'category' AS category,
-        sum(_bikelanesQuantized.len) AS len
+        round(sum(_bikelanesQuantized.len) / 1000, 1) AS len
     FROM
         boundaries
         JOIN _bikelanesQuantized ON ST_Intersects(boundaries.geom, _bikelanesQuantized.geom)
@@ -72,8 +71,6 @@ DROP TABLE _bikelanesQuantized;
 SELECT
     UpdateGeometrySRID('boundaryStats', 'geom', 3857);
 
-
-
 -- -- for `jsonb` with numeric values return a normalized object (all values sum to 100) which also includes the total value
 -- CREATE OR REPLACE FUNCTION atlas_NormalizeDistribution(dist jsonb)
 --     RETURNS jsonb
@@ -90,4 +87,4 @@ SELECT
 --         (jsonb_each(dist)).*) AS sq;
 -- END;
 -- $$
--- LANGUAGE plpgsql;
\ No newline at end of file
+-- LANGUAGE plpgsql;