From 47c06de4da96e6746132f0cf45a9303e977a6a15 Mon Sep 17 00:00:00 2001 From: pszufe Date: Sun, 7 Jul 2024 14:04:13 +0200 Subject: [PATCH] Redesign and expend API --- .gitignore | 3 +- Project.toml | 6 +- README.md | 153 ++++++++++++++++++++++++++++---------- docs/src/reference.md | 2 +- docs/src/visualize.md | 4 +- src/OSMToolset.jl | 3 +- src/attractiveness.jl | 26 +++---- src/poi.jl | 166 +++++++++++++++++++++++++++++------------- test/runtests.jl | 32 ++++++-- 9 files changed, 281 insertions(+), 114 deletions(-) diff --git a/.gitignore b/.gitignore index b13f4e6..644b1d7 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ /docs/build/ /docs/site/ /.vscode - +.ipynb_checkpoints/* +/.ipynb_checkpoints/* diff --git a/Project.toml b/Project.toml index 6dc518b..9d823bb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "OSMToolset" uuid = "a1c25ae6-0f93-4b3a-bddf-c248cb99b9fa" authors = ["pszufe and contributors"] -version = "0.1.2" +version = "0.2.0" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" @@ -16,11 +16,11 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] CSV = "0.10" DataFrames = "1.1, 1.2, 1.3, 1.4, 1.5, 1.6" -EzXML = "1.0, 1.1" +EzXML = "1.0, 1.1, 1.2" NamedTupleTools = "0.14" OpenStreetMapX = "0.2, 0.3, 0.4" Parsers = "2" -SpatialIndexing = "0.1.5" +SpatialIndexing = "0.1.5, 0.1.6" StatsBase = "0.28, 0.29, 0.30, 0.31, 0.32, 0.33, 0.34" julia = "1.6" diff --git a/README.md b/README.md index 51cf4d2..54d86ed 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ `OSMToolset` package provides the tools for efficient extraction of [point-of-interest](https://en.wikipedia.org/wiki/Point_of_interest) from maps and building various custom [walkability](https://en.wikipedia.org/wiki/Walkability) indexes in [Julia](https://julialang.org/). -**Documentation**: [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://pszufe.github.io/OSMToolset.jl/dev/) +**Documentation**: [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://pszufe.github.io/OSMToolset.jl/dev/)
[![DOI](https://zenodo.org/badge/637564645.svg)](https://zenodo.org/doi/10.5281/zenodo.10016849) @@ -53,66 +53,140 @@ julia> df1 = find_poi(file) 4 columns and 76 rows omitted ``` The default configuration file can be founds in `OSMToolset.__builtin_config_path`. This configuration has meta-data columns that can be seen in results of the parsing process. You could create on base on that your own configuration and use it from scratch. -``` -myconfig = ScrapePOIConfig{AttractivenessMetaPOI}(OSMToolset.__builtin_config_path) -df1 = find_poi(file;scrape_config=myconfig) -``` Suppose that rather you want to configure manually what is scraped. Perhaps we just wanted parking spaces that can be either defined in an OSM file as `amenity=parking` or as `parking` key value: ``` -julia> config = DataFrame(key=["parking", "amenity"], values=["*", "parking"]) -2×2 DataFrame - Row │ key values - │ String String -─────┼────────────────── - 1 │ parking * - 2 │ amenity parking +julia> config = ScrapePOIConfig("parking",("amenity","parking")) +ScrapePOIConfig{NoneMetaPOI} with 2 keys: + No │ key values +────┼────────────────── + 1 │ amenity parking + 2 │ parking * ``` -Note that contrary to the previous example this time we do not have meta data columns and hence we will use the `NoneMetaPOI` configuration. +Note that the scraping configuration can be extracted to a data frame by executing `config |> DataFrame`. Such dataframe can also be used to create a new configuration by executing `ScrapePOIConfig{NoneMetaPOI}(DataFrame(key=["amenity","parking"],values=["parking","*"]))`. + +Note that since we do not use meta data yet we use parameter: `NoneMetaPOI`. Now this can be scraped as : ``` -julia> df2 = find_poi(file; scrape_config=ScrapePOIConfig{NoneMetaPOI}(config)) +julia> df2 = find_poi(file, config) 12×7 DataFrame Row │ elemtype elemid nodeid lat lon key value │ Symbol Int64 Int64 Float64 Float64 String String ─────┼─────────────────────────────────────────────────────────────────────── 1 │ way 187565434 1982207088 42.3603 -71.0866 amenity parking ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ - 12 │ way 1052438049 9672086211 42.3624 -71.0878 parking surface - 10 rows omitted + 12 │ way 1052438049 9672086211 42.3624 -71.0878 parking surface 10 rows omitted +``` + +It is also possible to extract adjacent tags within the same node - this cab be achieved via the `all_tags` option. +For an example we could get the information on parking place metadata. + +``` +find_poi(file, ScrapePOIConfig("parking",("amenity","parking")); all_tags=true) +25×7 DataFrame + Row │ elemtype elemid nodeid lat lon key value + │ Symbol Int64 Int64 Float64 Float64 String String +─────┼──────────────────────────────────────────────────────────────────────────────── + 1 │ way 187565434 1982207088 42.3603 -71.0866 amenity parking + 2 │ way 187565434 1982207088 42.3603 -71.0866 access private + 3 │ way 187565434 1982207088 42.3603 -71.0866 parking surface + 4 │ way 187565434 1982207088 42.3603 -71.0866 surface asphalt + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ + 25 │ way 1052438049 9672086211 42.3624 -71.0878 parking surface + 20 rows omitted +``` +It can be seen that the same nodeid is repeated for different tags. + +The data that we extract can be decorated with additionaly information, such as range and influence of the POI. + +``` +julia> config2 = ScrapePOIConfig(("amenity","cafe")=>AttractivenessMetaPOI(:food,1,500), ("amenity","restaurant")=>AttractivenessMetaPOI(:food,2,1000), ("parking",("amenity","parking")) => AttractivenessMetaPOI(:car,1,500)) +ScrapePOIConfig{AttractivenessMetaPOI} with 2 keys: + No │ key values group influence range +────┼─────────────────────────────────────────────── + 1 │ amenity cafe food 1.0 500.0 + 2 │ amenity restaurant food 2.0 1000.0 +``` +Here we assume that the importance of restaurant is larger than of cafe and that people are more likely to walk a larger distance to visit a restaurant. + ``` -This data can be further processed in many ways. For example [here](https://pszufe.github.io/OSMToolset.jl/dev/visualize/) is a sample code that performs POI vizualisation +julia> filter!(r->r.nodeid in [1884055322, 11173231405], # select two places + find_poi(file, config2, all_tags=true)) +5×10 DataFrame + Row │ elemtype elemid nodeid lat lon key value group influence range + │ Symbol Int64 Int64 Float64 Float64 String String Symbol? Float64? Float64? +─────┼───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + 1 │ node 1884055322 1884055322 42.3617 -71.09 amenity cafe food 1.0 500.0 + 2 │ node 1884055322 1884055322 42.3617 -71.09 name Forbes Family Cafe missing missing missing + 3 │ node 1884055322 1884055322 42.3617 -71.09 opening_hours Mo-Fr 11:00-15:00 missing missing missing + 4 │ node 11173231405 11173231405 42.3622 -71.0864 amenity cafe food 1.0 500.0 + 5 │ node 11173231405 11173231405 42.3622 -71.0864 name Ripple Cafe missing missing missing +``` + + +The data can be further processed in many ways. For example [here](https://pszufe.github.io/OSMToolset.jl/dev/visualize/) is a sample code that performs POI vizualisation ## Spatial attractiveness processing -Suppose we have the `df1` data from the previous example. Now we can do a spatial attractiveness index in the following way: +Let's consider a more complex attractiveness information: +``` + config3 = ScrapePOIConfig(("amenity","cafe")=>AttractivenessMetaPOI(:food,1,500), ("amenity","restaurant")=>AttractivenessMetaPOI(:food,2,1000), (["parking",("amenity","parking")] .=> Ref(AttractivenessMetaPOI(:car,1,500)))... ) +ScrapePOIConfig{AttractivenessMetaPOI} with 4 keys: + No │ key values group influence range +────┼─────────────────────────────────────────────── + 1 │ amenity cafe food 1.0 500.0 + 2 │ amenity parking car 1.0 500.0 + 3 │ amenity restaurant food 2.0 1000.0 + 4 │ parking * car 1.0 500.0 +``` + +Note that in this demo we assume attractiveness configuration defined as `AttractivenessMetaPOI`. If you want a different structure of data for this index you need to crate a subtype of `MetaPOI` and use it in the constructor. + +We search for such locations: ``` -ix = AttractivenessSpatIndex(df1) +julia> df3 = find_poi(file, config3) +18×10 DataFrame + Row │ elemtype elemid nodeid lat lon key value group influence range + │ Symbol Int64 Int64 Float64 Float64 String String Symbol Float64 Float64 +─────┼──────────────────────────────────────────────────────────────────────────────────────────────────────── + 1 │ node 1884054889 1884054889 42.3621 -71.0892 amenity cafe food 1.0 500.0 + 2 │ node 1884055322 1884055322 42.3617 -71.09 amenity cafe food 1.0 500.0 + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ + 17 │ way 1052438049 9672086211 42.3624 -71.0878 amenity parking car 1.0 500.0 + 18 │ way 1052438049 9672086211 42.3624 -71.0878 parking surface car 1.0 500.0 + 14 rows omitted ``` -Note that the default configuration works with the `AttractivenessMetaPOI` data format. If you want a different structure of data for this index you need to crate a subtype of `MetaPOI` and use it in the constructor. -Let us consider some point on the map: +Now with this data we create a spatial attractiveness index in the following way: ``` -lat, lon = mean(df1.lat), mean(df1.lon) +ix = AttractivenessSpatIndex(df3); +``` + +Let us consider a point on the map: +``` +using Statistics +lat, lon = mean(df3.lat), mean(df3.lon) ``` We can use the API to calculate attractiveness of that location: ``` julia> attractiveness(ix, lat, lon) -(education = 42.73746118854219, entertainment = 30.385266049775055, healthcare = 12.491783858701343, leisure = 134.5949900134078, parking = 7.310719949554132, restaurants = 25.200347106553586, shopping = 6.89416203789267, transport = 12.090409181473555) +(car = 8.595822085195946, food = 5.151440338789913) ``` -If, for the debugging purposes, we want to understand what data has been used to calculate that attractiveness use the `explain=true` parameter: +For this location we can see it is easy to find food and park your car nearby. + +If, for some debugging purposes, we want to understand what data has been used to calculate that attractiveness use the `explain=true` parameter: ``` -julia> attractiveness(ix, lat, lon ;explain=true).explanation -68×7 DataFrame - Row │ group influence range attractiveness poidistance lat lon - │ Symbol Float64 Float64 Float64 Float64 Float64 Float64 -─────┼───────────────────────────────────────────────────────────────────────────────── - 1 │ education 20.0 10000.0 16.9454 1527.31 42.3553 -71.105 - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ - 68 │ shopping 5.0 500.0 0.618922 438.108 42.3625 -71.0834 - 66 rows omitted +julia> attractiveness(ix, lat, lon; explain=true) +(car = 8.595822085195946, food = 5.151440338789913, explanation = 18×7 DataFrame + Row │ group influence range attractiveness poidistance lat lon + │ Symbol Float64 Float64 Float64 Float64 Float64 Float64 +─────┼──────────────────────────────────────────────────────────────────────────── + 1 │ food 1.0 500.0 0.183414 408.293 42.3599 -71.0913 + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ + 18 │ food 2.0 1000.0 1.44716 276.42 42.3627 -71.084 + 16 rows omitted)ted ``` The attractiveness function is fully configurable on how the attractiveness is actually calculated. The available parameters can be used to define attractiveness dimension, aggreagation function, @@ -121,19 +195,22 @@ attractivess function and how the distance is on map is calculated. Let us for an example take maximum influence values rather than summing them: ``` julia> att = attractiveness(ix, lat, lon, aggregator = x -> length(x)==0 ? 0 : maximum(x)) -(education = 19.245381074958622, entertainment = 17.69295158791498, healthcare = 6.245891929350671, leisure = 4.723681042516024, parking = 2.9623334286775806, restaurants = 4.596901824773207, shopping = 2.0103741801865715, transport = 6.407028429850689) +(car = 0.8840868352005442, food = 1.747669233262405) ``` -We could also used the custom scraped `df2` for the attractiveness: + +We could also used a DataFrame without meta data columns for the attractiveness: ``` -ix2 = AttractivenessSpatIndex{NoneMetaPOI}(df2; get_range=a->300, get_group=a->:parking); +df4 = find_poi(file, ScrapePOIConfig(("amenity","parking"), "parking")) + +ix4 = AttractivenessSpatIndex{NoneMetaPOI}(df4; get_range=a->300, get_group=a->:parking); ``` Note that since we did not have metadata we have manually provided `300` meters for the range and `:parking` for the group. Now we can use this custom scraper to query the attractiveness: ``` -julia> attractiveness(ix2, lat, lon; aggregator = sum, calculate_attractiveness = (a,dist) -> dist > 300 ? 0 : 300/dist ) -(parking = 13.200370032301507,) +julia> attractiveness(ix4, lat, lon; aggregator = sum, calculate_attractiveness = (a,dist) -> dist > 300 ? 0 : 300/dist ) +(parking = 30.235559263812686,) ``` Note that for this code to work we needed to provide the way the attractiveness is calculated with the respect of metadata a (now an empty `struct` as this is NoneMetaPOI). diff --git a/docs/src/reference.md b/docs/src/reference.md index 291ff9b..4157d78 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -13,7 +13,7 @@ Scraping points-of-interest (POI) ```@docs find_poi ScrapePOIConfig -MetaPOI +AbstractMetaPOI NoneMetaPOI AttractivenessMetaPOI ``` diff --git a/docs/src/visualize.md b/docs/src/visualize.md index d258fad..a3c8e89 100644 --- a/docs/src/visualize.md +++ b/docs/src/visualize.md @@ -112,8 +112,8 @@ function getplotdata(md, dfpoi) end md = get_map_data("Boston.osm"; use_cache=false, only_intersections=false); -config = DataFrame(key="amenity", values=["restaurant", "fast_food", "food_court", "pub", "bar", "cafe", "ice_cream"]) -dfpoi = find_poi("Boston.osm"; scrape_config=ScrapePOIConfig{NoneMetaPOI}(config)) +configdf = DataFrame(key="amenity", values=["restaurant", "fast_food", "food_court", "pub", "bar", "cafe", "ice_cream"]) +dfpoi = find_poi("Boston.osm", ScrapePOIConfig{NoneMetaPOI}(configdf)) attdf = getplotdata(md, dfpoi) diff --git a/src/OSMToolset.jl b/src/OSMToolset.jl index abb46ae..2bff691 100644 --- a/src/OSMToolset.jl +++ b/src/OSMToolset.jl @@ -7,6 +7,7 @@ using StatsBase using NamedTupleTools using Parsers using EzXML +import DataFrames: DataFrame import OpenStreetMapX import OpenStreetMapX: LLA, ENU, distance, MapData, center @@ -24,7 +25,7 @@ export find_poi export calc_tiling export getbounds, Bounds export ScrapePOIConfig -export MetaPOI +export AbstractMetaPOI export NoneMetaPOI export AttractivenessMetaPOI export sample_osm_file diff --git a/src/attractiveness.jl b/src/attractiveness.jl index 8c7c49e..dbbac2b 100644 --- a/src/attractiveness.jl +++ b/src/attractiveness.jl @@ -3,19 +3,19 @@ Internal data structure used to store data in the `AttractivenessSpatIndex` spatial index. """ -struct AttractivenessData{T <: MetaPOI} +struct AttractivenessData{T <: AbstractMetaPOI} data::T enu::ENU lla::LLA end -AttractivenessData{T}(row::DataFrameRow, enu, lla) where T <: MetaPOI = +AttractivenessData{T}(row::DataFrameRow, enu, lla) where T <: AbstractMetaPOI = AttractivenessData(T(row), enu, lla) AttractivenessData{NoneMetaPOI}(::DataFrameRow, enu, lla) = AttractivenessData(NoneMetaPOI(), enu, lla) -struct AttractivenessSpatIndex{T <: MetaPOI, F <: Function} +struct AttractivenessSpatIndex{T <: AbstractMetaPOI, F <: Function} tree::RTree{Float64, 2, SpatialElem{Float64, 2, Int64, AttractivenessData{T}}} df::DataFrame refLLA::LLA @@ -30,8 +30,8 @@ struct NodeSpatIndex end """ - AttractivenessSpatIndex{T <: MetaPOI}(filename::AbstractString, get_range::Function=get_attractiveness_range, get_group::Function=get_attractiveness_group) - AttractivenessSpatIndex{T <: MetaPOI}(df::AbstractDataFrame, get_range::Function=get_attractiveness_range, get_group::Function=get_attractiveness_group) + AttractivenessSpatIndex{T <: AbstractMetaPOI}(filename::AbstractString, get_range::Function=get_attractiveness_range, get_group::Function=get_attractiveness_group) + AttractivenessSpatIndex{T <: AbstractMetaPOI}(df::AbstractDataFrame, get_range::Function=get_attractiveness_range, get_group::Function=get_attractiveness_group) Builds an attractivness spatial index basing on data in some CSV file or a DataFrame @@ -50,11 +50,11 @@ If `T` is not provided `AttractivenessMetaPOI` will be used as the default metad The type `F` represents the attractiveness group function provided as `get_group = (a::T) -> :somegroup`. """ -AttractivenessSpatIndex{T}(filename::AbstractString;get_range::Function=get_attractiveness_range, get_group::Function=get_attractiveness_group) where T <: MetaPOI = AttractivenessSpatIndex(CSV.read(filename, DataFrame);get_range,get_group) +AttractivenessSpatIndex{T}(filename::AbstractString;get_range::Function=get_attractiveness_range, get_group::Function=get_attractiveness_group) where T <: AbstractMetaPOI = AttractivenessSpatIndex(CSV.read(filename, DataFrame);get_range,get_group) AttractivenessSpatIndex(filename::AbstractString;get_range::Function=get_attractiveness_range, get_group::Function=get_attractiveness_group) = AttractivenessSpatIndex{AttractivenessMetaPOI}(filename::AbstractString;get_range,get_group) AttractivenessSpatIndex(df::AbstractDataFrame, refLLA::LLA = LLA(mean(df.lat), mean(df.lon));get_range::Function=get_attractiveness_range, get_group::Function=get_attractiveness_group) = AttractivenessSpatIndex{AttractivenessMetaPOI}(df, refLLA;get_range, get_group) -function AttractivenessSpatIndex{T}(df::AbstractDataFrame, refLLA::LLA = LLA(mean(df.lat), mean(df.lon));get_range::Function=get_attractiveness_range, get_group::Function=get_attractiveness_group) where T <: MetaPOI +function AttractivenessSpatIndex{T}(df::AbstractDataFrame, refLLA::LLA = LLA(mean(df.lat), mean(df.lon));get_range::Function=get_attractiveness_range, get_group::Function=get_attractiveness_group) where T <: AbstractMetaPOI data = SpatialElem{Float64, 2, Int64, AttractivenessData{T}}[] groups = Symbol[] id = 0 @@ -170,7 +170,7 @@ end """ - attractiveness(sindex::AttractivenessSpatIndex{T}, lattitude::Number, longitude::Number; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: MetaPOI + attractiveness(sindex::AttractivenessSpatIndex{T}, lattitude::Number, longitude::Number; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: AbstractMetaPOI Returns the multidimensional attractiveness measure for the given spatial index `sindex` and `lattitude` and `longitude`. @@ -185,12 +185,12 @@ calculate the distance between point pairs. If `explain` is set to true the result will additionally contain details about POIs used to calculate the attractiveness. """ -function attractiveness(sindex::AttractivenessSpatIndex{T}, lattitude::Number, longitude::Number; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: MetaPOI +function attractiveness(sindex::AttractivenessSpatIndex{T}, lattitude::Number, longitude::Number; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: AbstractMetaPOI attractiveness(sindex, OpenStreetMapX.LLA(lattitude,longitude); aggregator, calculate_attractiveness, distance, explain) end """ - attractiveness(sindex::AttractivenessSpatIndex{T}, lla::LLA; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: MetaPOI + attractiveness(sindex::AttractivenessSpatIndex{T}, lla::LLA; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: AbstractMetaPOI Returns the multidimensional attractiveness measure for the given spatial index `sindex` and `LLA` coordinates. @@ -205,7 +205,7 @@ calculate the distance between point pairs. If `explain` is set to true the result will additionally contain details about POIs used to calculate the attractiveness. """ -function attractiveness(sindex::AttractivenessSpatIndex{T}, lla::LLA; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: MetaPOI +function attractiveness(sindex::AttractivenessSpatIndex{T}, lla::LLA; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: AbstractMetaPOI enu = ENU(lla,sindex.refLLA) attractiveness(sindex, enu; aggregator, calculate_attractiveness, distance, explain) end @@ -213,7 +213,7 @@ end """ - attractiveness(sindex::AttractivenessSpatIndex{T}, enu::ENU; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: MetaPOI + attractiveness(sindex::AttractivenessSpatIndex{T}, enu::ENU; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: AbstractMetaPOI Returns the multidimensional attractiveness measure for the given spatial index `sindex` and `enu` cooridanates. @@ -230,7 +230,7 @@ calculate the distance between point pairs. If `explain` is set to true the result will additionally contain details about POIs used to calculate the attractiveness. """ -function attractiveness(sindex::AttractivenessSpatIndex{T}, enu::ENU; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: MetaPOI +function attractiveness(sindex::AttractivenessSpatIndex{T}, enu::ENU; aggregator::Function=sum, calculate_attractiveness::Function=calculate_attractiveness, distance::Function=OpenStreetMapX.distance, explain::Bool=false) where T <: AbstractMetaPOI res = Dict(sindex.measures .=> [Float64[] for _ in 1:length(sindex.measures)]) p = SpatialIndexing.Point((enu.east, enu.north)) explanation = DataFrame() diff --git a/src/poi.jl b/src/poi.jl index 607c8ff..a182025 100644 --- a/src/poi.jl +++ b/src/poi.jl @@ -1,26 +1,27 @@ - """ - abstract type MetaPOI end + abstract type AbstractMetaPOI end A base time for representing metadata related to a POI location. """ -abstract type MetaPOI end +abstract type AbstractMetaPOI end """ - struct NoneMetaPOI <: MetaPOI; end + struct NoneMetaPOI <: AbstractMetaPOI; end -A subtype of `MetaPOI` that does not contain any metadata. +Scraping configuration when no attractiveness metadata is attached. """ -struct NoneMetaPOI <: MetaPOI; end +struct NoneMetaPOI <: AbstractMetaPOI; end """ - struct AttractivenessMetaPOI <: MetaPOI + struct AttractivenessMetaPOI <: AbstractMetaPOI + +Container for metadata for attractiveness (the default configuration of scraping). -A subtype of `MetaPOI` that contains metadata for attractiveness -(the default configuration of scraping). -This assumes that the metadata is stored in a CSV file with the following columns: -`key`, `values`, `group`, `influence`, `range`. +The attractiveness is defined by the following fields: +- `group` - the group of the POI (e.g. `:parking` or `:food`) +- `influence` - the power of the POI on the attractiveness of the location +- `range` - the range of the POI influence (measeured in meters) """ -struct AttractivenessMetaPOI <: MetaPOI +struct AttractivenessMetaPOI <: AbstractMetaPOI group::Symbol influence::Float64 range::Float64 @@ -28,30 +29,30 @@ end """ get_attractiveness_group(a::AttractivenessMetaPOI) - + Default group for AttractivenessMetaPOI which is `a.group`. """ get_attractiveness_group(a::AttractivenessMetaPOI) = a.group """ get_attractiveness_range(a::AttractivenessMetaPOI) - + Default range for AttractivenessMetaPOI whic is the `a.range`. """ get_attractiveness_range(a::AttractivenessMetaPOI) = a.range """ - get_attractiveness_group(a::NoneMetaPOI) - + get_attractiveness_group(a::NoneMetaPOI) + Default group for NoneMetaPOI (`NoneMetaPOI`). """ get_attractiveness_group(a::NoneMetaPOI) = :NoneMetaPOI """ - get_attractiveness_range(a::MetaPOI) -You can create own subtypes of `MetaPOI` but than range needs to be provided. + get_attractiveness_range(a::AbstractMetaPOI) +You can create own subtypes of `AbstractMetaPOI` but than range needs to be provided. """ -get_attractiveness_range(a::MetaPOI) = throw(ArgumentError("`get_attractiveness_range` not implemented for type $(typeof(a)). You can also just provide a custom function via the `get_range` parameter such as `get_range= a -> 100`")) +get_attractiveness_range(a::AbstractMetaPOI) = throw(ArgumentError("`get_attractiveness_range` not implemented for type $(typeof(a)). You can also just provide a custom function via the `get_range` parameter such as `get_range= a -> 100`")) AttractivenessMetaPOI(row::DataFrameRow) = AttractivenessMetaPOI(Symbol(row.group), Float64(row.influence), Float64(row.range)) @@ -68,36 +69,64 @@ The configuration is defined in a DataFrame with the following columns: Instead of the DataFrame a paths to a CSV file can be provided. * Constructors * -- `ScrapePOIConfig()` - default inbuilt configuration for data scraping. +- `ScrapePOIConfig()` - default inbuilt configuration for data scraping. Note that the default configuration can change with library updates. This will use `AttractivenessMetaPOI` as meta data. -- `ScrapePOIConfig{T <: MetaPOI}(filename::AbstractString)` - use a CSV file with configuration -- `ScrapePOIConfig{T <: MetaPOI}(df::DataFrame)` - use a `DataFrame` +- `ScrapePOIConfig{T <: AbstractMetaPOI}(filename::AbstractString)` - use a CSV file with configuration +- `ScrapePOIConfig{T <: AbstractMetaPOI}(df::DataFrame)` - use a `DataFrame` as configuration +- ScrapePOIConfig{T <: AbstractMetaPOI}(meta::Dict{<:Union{String, Tuple{String,String}}, T}) - `meta` dictionary explaining how a single `k="keyname"` value or tuple ofvalues (paired with `v="valuename"`) should be mapped for attractiveness metadata. + When the `T` parameter is not provided `AttractivenessMetaPOI` will be used. When you do not want to use metadata provide `NoneMetaPOI` as `T` """ -struct ScrapePOIConfig{T <: MetaPOI} - dkeys::Set{String} +struct ScrapePOIConfig{T <: AbstractMetaPOI} meta::Dict{Union{String, Tuple{String,String}}, T} + dkeys::Set{String} #helper field for efficient searching end -""" -Default built-in configuration for data scraping from OSM XML. -The default configuration will use AttractivenessMetaPOI -""" -const __builtin_config_path = joinpath(@__DIR__, "..", "config", "ScrapePOIconfig.csv") +function ScrapePOIConfig(pairs::Pair{<:Union{Tuple{String,String}, String}, T}...) where T <: AbstractMetaPOI + ScrapePOIConfig(Dict{Union{String, Tuple{String,String}}, T}(pairs)) +end + + +function ScrapePOIConfig(keys::Union{Tuple{String,String}, String}...) + ScrapePOIConfig(Dict{Union{String, Tuple{String,String}}, NoneMetaPOI}(keys .=> Ref(NoneMetaPOI()))) +end + + +function ScrapePOIConfig(meta::Union{Dict{Tuple{String,String}, T},Dict{String, T}}) where T <: AbstractMetaPOI + ScrapePOIConfig(Dict{Union{String, Tuple{String,String}}, T}(meta)) +end + +function ScrapePOIConfig(meta::Dict{Union{String, Tuple{String,String}}, T}) where T <: AbstractMetaPOI + dkeyfirst(k::String) = k + dkeyfirst(k::Tuple{String,String}) = k[1] + ScrapePOIConfig{T}(meta, Set(dkeyfirst.(keys(meta)))) +end + +function DataFrames.DataFrame(sp::ScrapePOIConfig{T}) where T <: AbstractMetaPOI + df = DataFrame(;key=String[], values=String[], + (NamedTupleTools.fieldnames(T) .=> [Vector{ftype}() for ftype in NamedTupleTools.fieldtypes(T)])... ) + for kv in keys(sp.meta) + key = kv isa Tuple ? kv[1] : kv + values = kv isa Tuple ? kv[2] : "*" + push!(df, (;key, values, ntfromstruct(sp.meta[kv])...)) + end + df2 = combine(groupby(df, Not(:values)), :values => (val -> join(sort(val), ",")) => :values) + DataFrames.select!(df2, :key, :values,Not([:key, :values])) + sort!(df2, [:key, :values]) + df2 +end -function ScrapePOIConfig{T}(df::DataFrame) where T <: MetaPOI +function ScrapePOIConfig{T}(df::DataFrame) where T <: AbstractMetaPOI colnames = ["key", "values"] @assert all(colnames .∈ Ref(names(df))) - - dkeys = Set(String.(df.key)) meta = Dict{Union{String, Tuple{String,String}}, T}() for row in eachrow(df) - a = T(row) + a = T(row) for value in string.(split(String(row.values),',')) if value == "*" meta[String(row.key)] = a @@ -106,22 +135,31 @@ function ScrapePOIConfig{T}(df::DataFrame) where T <: MetaPOI end end end - ScrapePOIConfig{T}(dkeys, meta) + ScrapePOIConfig(meta) end ScrapePOIConfig(df::DataFrame) = ScrapePOIConfig{AttractivenessMetaPOI}(df) -function ScrapePOIConfig{T}(filename::AbstractString = __builtin_config_path) where T <: MetaPOI - ScrapePOIConfig{T}(CSV.read(filename, DataFrame,types=Dict( - :key => String, :values =>String) )) + +ScrapePOIConfig() = ScrapePOIConfig{AttractivenessMetaPOI}(CSV.read(__builtin_config_path, DataFrame )) + +function Base.show(io::IO, sp::ScrapePOIConfig{T}) where T <: AbstractMetaPOI + println(io, "ScrapePOIConfig{$T} with $(length(sp.meta)) keys:") + show(io, DataFrame(sp);summary=false,eltypes=false,allrows=true,rowlabel=:No) end -ScrapePOIConfig(filename::AbstractString = __builtin_config_path) = ScrapePOIConfig{AttractivenessMetaPOI}(filename) + +""" +Default built-in configuration for data scraping from OSM XML. +The default configuration will use AttractivenessMetaPOI +""" +const __builtin_config_path = joinpath(@__DIR__, "..", "config", "ScrapePOIconfig.csv") const __builtin_poiconfig = ScrapePOIConfig() + """ - find_poi(filename::AbstractString; scrape_config::ScrapePOIConfig{T <: MetaPOI}=__builtin_poiconfig) + find_poi(filename::AbstractString, scrape_config::ScrapePOIConfig{T <: AbstractMetaPOI}=__builtin_poiconfig; all_tags::Bool=false) Generates a `DataFrame` with points of interests and from a given XML `filename`. The data frame will also contain the metadata from `T` for each POI. @@ -130,9 +168,13 @@ The `DataFrame` can be later used with `AttractivenessSpatIndex` to build an att The attractiveness values for the index will be used ones from the `scrape_config` file. By default `__builtin_poiconfig` from `__builtin_config_path` will be used but you can define your own index. + +Setting the `all_tags` parameter to `true` will cause that once the tag is matched, other tags within the same +`id` will be included in the resulting DataFrame. """ -function find_poi(filename::AbstractString; scrape_config::ScrapePOIConfig{T}=__builtin_poiconfig) where T <: MetaPOI +function find_poi(filename::AbstractString, scrape_config::ScrapePOIConfig{T}=__builtin_poiconfig; all_tags::Bool=false) where T <: AbstractMetaPOI dkeys = scrape_config.dkeys + dkeys_has_star = ("*" in dkeys) meta = scrape_config.meta EMPTY_NODE = Node(0,0.,0.) nodes = Dict{Int,Node}() @@ -140,11 +182,17 @@ function find_poi(filename::AbstractString; scrape_config::ScrapePOIConfig{T}=__ relations_firstnode = Dict{Int, Node}() elemtype = :X elemid = -1 + + # Buffer for collecting state when all_tags==true + all_tags_buffer::Vector{Tuple{String,String}} = Vector{Tuple{String,String}}() + all_tags_good_tag::Base.RefValue{Bool} = Ref(false) + alltags_clear = all_tags ? () -> begin;empty!(all_tags_buffer);all_tags_good_tag[]=false;end : ()->nothing + # creates an empty data frame df = DataFrame(;elemtype=Symbol[], elemid=Int[],nodeid=Int[],lat=Float64[],lon=Float64[], - key=String[], value=String[], - (NamedTupleTools.fieldnames(T) .=> [Vector{ftype}() for ftype in NamedTupleTools.fieldtypes(T)])... ) - + key=String[], value=String[], + (NamedTupleTools.fieldnames(T) .=> [Vector{Union{ftype, all_tags ? Missing : ftype}}() for ftype in NamedTupleTools.fieldtypes(T)])... ) + io = open(filename, "r") sr = EzXML.StreamReader(io) i = 0 @@ -160,11 +208,12 @@ function find_poi(filename::AbstractString; scrape_config::ScrapePOIConfig{T}=__ if hasnodeattributes(sr) attrs = nodeattributes(sr) elemid = parse(Int, attrs["id"]) - curnode = Node(elemid, parse(Float64,attrs["lat"]), parse(Float64,attrs["lon"])) + curnode = Node(elemid, parse(Float64,attrs["lat"]), parse(Float64,attrs["lon"])) nodes[elemid] = curnode else @warn " $nname, $i, no attribs?" end + alltags_clear() elseif nname == "way" elemtype = :way curnode = EMPTY_NODE @@ -175,6 +224,7 @@ function find_poi(filename::AbstractString; scrape_config::ScrapePOIConfig{T}=__ else @warn " $nname, $i, no attribs?" end + alltags_clear() elseif waylookforfirstnd && nname == "nd" if hasnodeattributes(sr) attrs = nodeattributes(sr) @@ -184,6 +234,7 @@ function find_poi(filename::AbstractString; scrape_config::ScrapePOIConfig{T}=__ else @warn "/ $nname, $i, no attribs?" end + alltags_clear() elseif nname == "relation" elemtype = :relation curnode = EMPTY_NODE @@ -195,6 +246,7 @@ function find_poi(filename::AbstractString; scrape_config::ScrapePOIConfig{T}=__ else @warn " $nname, $i, no attribs?" end + alltags_clear() elseif relationlookforfirstmember && nname == "member" if hasnodeattributes(sr) attrs = nodeattributes(sr) @@ -218,30 +270,46 @@ function find_poi(filename::AbstractString; scrape_config::ScrapePOIConfig{T}=__ else @warn "/ $nname, $i, no attribs?" end + alltags_clear() elseif nname == "tag" attrs = nodeattributes(sr) key = string(get(attrs,"k","")) + keysearch::Union{String,Nothing} = nothing if key in dkeys + keysearch = key + elseif dkeys_has_star + keysearch = "*" + end + if !isnothing(keysearch) value = string(get(attrs,"v","")) # get either first key if it was of * type # otherwise try to get attractiveness for the tuple - a = get(meta, key, get(meta, (key, value), nothing)) + a = get(meta, keysearch, get(meta, (keysearch, value), nothing)) if !isnothing(a) # we are interested only in attractive POIs push!(df, (;elemtype,elemid,nodeid=curnode.id, lat=curnode.lat, lon=curnode.lon, key, value, ntfromstruct(a)...) ) + all_tags_good_tag[] = true + end + elseif all_tags + push!(all_tags_buffer, (key, string(get(attrs,"v","")))) + end + if all_tags_good_tag[] + for (key, value) in all_tags_buffer + push!(df, (;elemtype,elemid,nodeid=curnode.id, lat=curnode.lat, lon=curnode.lon, key, value, (NamedTupleTools.fieldnames(T) .=>missing)...) ) end + empty!(all_tags_buffer) end end end unique!(df,[:lat,:lon,:key,:value]) df end - + """ clean_pois_by_group(df::DataFrame) For data imported via AttractivenessMetaPOI the function will return only the most attractive POI for each group. This is useful when you want to remove duplicate entries for the same node. -""" +""" function clean_pois_by_group(df::DataFrame) DataFrame(g[findmax(g.influence)[2], :] for g in groupby(df, [:nodeid, :group])) end @@ -249,12 +317,12 @@ end #= """ - find_poi(osm::OpenStreetMapX.OSMData; scrape_config::ScrapePOIConfig=__builtin_poiconfig) + find_poi(osm::OpenStreetMapX.OSMData,scrape_config::ScrapePOIConfig=__builtin_poiconfig) Finds POIs on the data from OSM parser. Please note that the OSM parser might not parse all the data from the XML file, hence the results might be different than from `find_poi(filename::AbstractString)`. Generally, usage of `find_poi(filename::AbstractString)` is stronlgy recommended. """ -function find_poi(osm::OpenStreetMapX.OSMData; scrape_config::ScrapePOIConfig=__builtin_poiconfig) +function find_poi(osm::OpenStreetMapX.OSMData,scrape_config::ScrapePOIConfig=__builtin_poiconfig) dkeys = scrape_config.dkeys meta = scrape_config.meta diff --git a/test/runtests.jl b/test/runtests.jl index 12cb327..f051bbf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,19 +11,19 @@ test_poi_config = joinpath(dirname(pathof(OSMToolset)),"..","test","data","Scrap #test_map = "test/data/boston.osm" #test_poi_config = "test/data/ScrapePOIconfig.csv" -poiconf = ScrapePOIConfig(test_poi_config) +poiconf = ScrapePOIConfig{AttractivenessMetaPOI}(CSV.read(test_poi_config,DataFrame)) poiconf_default = ScrapePOIConfig() bounds = getbounds(test_map) -df = find_poi(test_map,scrape_config=poiconf) +df = find_poi(test_map,poiconf) sindex = AttractivenessSpatIndex(df); csvfile = tempname() CSV.write(csvfile, df) sindex2 = AttractivenessSpatIndex(csvfile); -rm(csvfile) +atexit(() -> rm(csvfile)) lla = OpenStreetMapX.LLA((bounds.minlat+bounds.maxlat)/2, (Float64(bounds.minlon)+Float64(bounds.maxlon))/2) @testset "AttractivenessSpatIndex" begin @@ -41,16 +41,36 @@ lla = OpenStreetMapX.LLA((bounds.minlat+bounds.maxlat)/2, (Float64(bounds.minlon end +scdf1 = ScrapePOIConfig(("amenity","parking"), "parking") |> DataFrame +sc1 = ScrapePOIConfig{NoneMetaPOI}(scdf1) + +sc2 = ScrapePOIConfig(("amenity","parking")=>AttractivenessMetaPOI(:parking,10,1000)) +scdf2 = sc2 |> DataFrame + +dfpois_all = find_poi(test_map, sc2; all_tags=true) + +@testset "DataFramesSC" begin + @test sc1 |> DataFrame == scdf1 + @test ScrapePOIConfig{AttractivenessMetaPOI}(scdf2) |> DataFrame == scdf2 + @test length(findall(ismissing, dfpois_all.group)) > 0 +end + + config = DataFrame(key=["parking", "amenity"], values=["*", "parking"]) -df2 = find_poi(test_map; scrape_config=ScrapePOIConfig{NoneMetaPOI}(config)) -df3 = find_poi(test_map; scrape_config=ScrapePOIConfig{NoneMetaPOI}(DataFrame(key=String[], values=String[]))) -df4 = find_poi(test_map; scrape_config=ScrapePOIConfig{NoneMetaPOI}(DataFrame(key=["does not exist300, get_group=a->:parking); @testset "CustomConfig" begin @test nrow(df2) > 0 + @test nrow(df2) == nrow(df2_api2) + @test sort(df2,:nodeid) == sort(df2_api2, :nodeid) @test nrow(df3) == 0 @test nrow(df4) == 0 @test df2[1:0,:] == df3