diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 99f00cbb..b31d8c04 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -1,3 +1,6 @@ { - "recommendations": ["sumneko.lua"] + "recommendations": [ + "sumneko.lua", + "adpyke.vscode-sql-formatter" + ] } diff --git a/app/analysis/boundaryStats.sql b/app/analysis/boundaryStats.sql new file mode 100644 index 00000000..2cc0c6c8 --- /dev/null +++ b/app/analysis/boundaryStats.sql @@ -0,0 +1,90 @@ +-- this function quantizes `LineStrings` into `Points` where each point represents a homogenous segment each of length `res` +/- 25% (Note: segments shorter than `res` keep their length) +CREATE OR REPLACE FUNCTION atlas_QuantizeLineString(ls Geometry(LineString), res numeric) + RETURNS TABLE( + len numeric, + geom Geometry(point) + ) + AS $$ +DECLARE + len numeric := st_length(ST_Transform(ls, 25833)); + n int := greatest(round(len / res), 1)::int; +BEGIN + RETURN query + SELECT + len / n, + ST_LineInterpolatePoint(ls,(generate_series(1, n) - 0.5) / n::numeric) AS geom; +END; +$$ +LANGUAGE plpgsql; + +-- STEP 1: quantize bikelane data into points +CREATE temp TABLE _bikelanesQuantized AS +SELECT + osm_id, + tags, +(atlas_QuantizeLineString(geom, 100)).* AS geom +FROM + bikelanes; + +-- create a geospatial index on `_bikelanesQuantized` to speed up the spatial join; +CREATE INDEX "_bikelanes_stats_geom_idx" ON _bikelanesQuantized USING gist(geom); + +-- STEP 2: calculate the distribution of our `category` for each area of the `boundary` data set +-- make `osm_id` the primary of the `boundaries` table to allow selecting non-group-by columns +ALTER TABLE boundaries + DROP CONSTRAINT IF EXISTS osm_id_key; + +ALTER TABLE boundaries + ADD CONSTRAINT osm_id_key PRIMARY KEY (osm_id); + +-- spatialy join `_bikelanesQuantized` with `boundaries` then group by category and boundaries.osm_id aggreagate the results in a single json object per area +DROP TABLE IF EXISTS "boundaryStats"; + +CREATE TABLE "boundaryStats" AS +SELECT + osm_id, + geom, + region, + jsonb_object_agg(CONCAT(category, '_km'), len) AS bikelanes_category +FROM ( + SELECT + boundaries.osm_id AS osm_id, + boundaries.geom AS geom, + boundaries.tags ->> 'name' AS region, + _bikelanesQuantized.tags ->> 'category' AS category, + round(sum(_bikelanesQuantized.len) / 1000, 1) AS len + FROM + boundaries + JOIN _bikelanesQuantized ON ST_Intersects(boundaries.geom, _bikelanesQuantized.geom) + WHERE (boundaries.tags ->> 'admin_level')::int IN (4, 6, 7, 8) +GROUP BY + boundaries.osm_id, + _bikelanesQuantized.tags ->> 'category') AS sq +GROUP BY + osm_id, + geom, + region; + +DROP TABLE _bikelanesQuantized; + +-- this line is only for making pg_tileserve display the table +SELECT + UpdateGeometrySRID('boundaryStats', 'geom', 3857); + +-- -- for `jsonb` with numeric values return a normalized object (all values sum to 100) which also includes the total value +-- CREATE OR REPLACE FUNCTION atlas_NormalizeDistribution(dist jsonb) +-- RETURNS jsonb +-- AS $$ +-- DECLARE +-- total numeric := sum(value) +-- FROM ( +-- SELECT +-- (jsonb_each(dist)).value::numeric) AS sq; +-- BEGIN +-- RETURN jsonb_object_agg(key, round((100 * value::numeric) / total, 2)) || jsonb_build_object('total', total) +-- FROM ( +-- SELECT +-- (jsonb_each(dist)).*) AS sq; +-- END; +-- $$ +-- LANGUAGE plpgsql; diff --git a/app/run-5-analysis.sh b/app/run-5-analysis.sh new file mode 100644 index 00000000..8ac04ce3 --- /dev/null +++ b/app/run-5-analysis.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -e + +echo -e "\e[1m\e[7m Analysis – START \e[27m\e[21m\e[0m" + +psql -q -f "./analysis/boundaryStats.sql" + +echo -e "\e[1m\e[7m Analysis – END \e[27m\e[21m\e[0m" \ No newline at end of file diff --git a/app/run-5-postprocess.sh b/app/run-6-postprocess.sh similarity index 100% rename from app/run-5-postprocess.sh rename to app/run-6-postprocess.sh diff --git a/app/run-6-api.sh b/app/run-7-api.sh similarity index 100% rename from app/run-6-api.sh rename to app/run-7-api.sh diff --git a/app/run-7-metadata.sh b/app/run-8-metadata.sh similarity index 100% rename from app/run-7-metadata.sh rename to app/run-8-metadata.sh diff --git a/app/run.sh b/app/run.sh index ead19969..a4390df3 100755 --- a/app/run.sh +++ b/app/run.sh @@ -45,14 +45,18 @@ fi process_end_time=$(date +%s) export PROCESS_RUN_TIME_DIFF=$((process_end_time - process_start_time)) # used by metadata.sh -if ! ./run-5-postprocess.sh; then - alert '*ERROR*: #run-5-postprocess exited with non-zero status code' +if ! ./run-5-analysis.sh; then + alert '*ERROR*: #run-5-analysis exited with non-zero status code' fi -if ! ./run-6-api.sh; then - alert '*ERROR*: #run-6-api exited with non-zero status code' +if ! ./run-6-postprocess.sh; then + alert '*ERROR*: #run-6-postprocess exited with non-zero status code' fi -if ! ./run-7-metadata.sh; then - alert '*ERROR*: #run-7-metadata exited with non-zero status code' +if ! ./run-7-api.sh; then + alert '*ERROR*: #run-7-api exited with non-zero status code' +fi + +if ! ./run-8-metadata.sh; then + alert '*ERROR*: #run-8-metadata exited with non-zero status code' fi