diff --git a/.Rbuildignore b/.Rbuildignore index 2132181..09affc2 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,3 +6,4 @@ ^\.github$ ^README\.Rmd$ ^cran-comments\.md$ +^CRAN-SUBMISSION$ diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION new file mode 100644 index 0000000..238bd58 --- /dev/null +++ b/CRAN-SUBMISSION @@ -0,0 +1,3 @@ +Version: 0.1.0 +Date: 2024-12-01 15:31:37 UTC +SHA: c5ec154ec555c730740c2b80151ef5894cb64caa diff --git a/DESCRIPTION b/DESCRIPTION index f96ff88..a7e682a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,7 +6,7 @@ Authors@R: email = "lyu.geosocial@gmail.com", role = c("aut", "cre", "cph"), comment = c(ORCID = "0009-0002-6003-3800")) -Description: Spatial stratified heterogeneity (SSH) denotes the coexistence of within-strata homogeneity and between-strata heterogeneity. Information consistency-based methods provide a rigorous approach to quantify SSH and evaluate its role in spatial processes, grounded in principles of geographical stratification and information theory (Bai et al., 2023 ; Wang et al., 2024 ). +Description: Spatial stratified heterogeneity (SSH) denotes the coexistence of within-strata homogeneity and between-strata heterogeneity. Information consistency-based methods provide a rigorous approach to quantify SSH and evaluate its role in spatial processes, grounded in principles of geographical stratification and information theory (Bai, H. et al. (2023) ; Wang, J. et al. (2024) ). License: GPL-3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) diff --git a/R/sshicm.R b/R/sshicm.R index 61e0a4e..4e7f141 100644 --- a/R/sshicm.R +++ b/R/sshicm.R @@ -12,11 +12,13 @@ #' @export #' #' @examples +#' \donttest{ +#' # This code may take a bit longer to execute: #' baltim = sf::read_sf(system.file("extdata/baltim.gpkg",package = "sshicm")) #' sshicm(PRICE ~ .,baltim,type = "IC") #' cinc = sf::read_sf(system.file("extdata/cinc.gpkg",package = "sshicm")) #' sshicm(THEFT_D ~ .,cinc,type = "IN") -#' +#' } sshicm = \(formula, data, type = 'IC', seed = 42, permutation_number = 999, bin_method = "Sturges"){ formulavar = sdsfun::formula_varname(formula,data) diff --git a/man/sshicm.Rd b/man/sshicm.Rd index 689f731..91dab4b 100644 --- a/man/sshicm.Rd +++ b/man/sshicm.Rd @@ -34,9 +34,11 @@ A \code{tibble}. Information Consistency-Based Measures for Spatial Stratified Heterogeneity } \examples{ +\donttest{ +# This code may take a bit longer to execute: baltim = sf::read_sf(system.file("extdata/baltim.gpkg",package = "sshicm")) sshicm(PRICE ~ .,baltim,type = "IC") cinc = sf::read_sf(system.file("extdata/cinc.gpkg",package = "sshicm")) sshicm(THEFT_D ~ .,cinc,type = "IN") - +} } diff --git a/vignettes/.gitignore b/vignettes/.gitignore index 097b241..2d19fc7 100644 --- a/vignettes/.gitignore +++ b/vignettes/.gitignore @@ -1,2 +1 @@ *.html -*.R diff --git a/vignettes/precompile.R b/vignettes/precompile.R new file mode 100644 index 0000000..fa4caa2 --- /dev/null +++ b/vignettes/precompile.R @@ -0,0 +1,4 @@ +devtools::load_all() + +knitr::knit("vignettes/sshicm.Rmd.orig", + "vignettes/sshicm.Rmd") diff --git a/vignettes/sshicm.Rmd b/vignettes/sshicm.Rmd index ed41c54..bd212c4 100644 --- a/vignettes/sshicm.Rmd +++ b/vignettes/sshicm.Rmd @@ -1,7 +1,7 @@ --- title: "Information Consistency-Based Measures for Spatial Stratified Heterogeneity" author: "Wenbo Lv" -date: "`r Sys.Date()`" +date: "2024-12-01" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{sshicm} @@ -9,13 +9,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "##", - fig.path = "man/figures/sshicm/" -) -``` +   @@ -86,24 +80,50 @@ install.packages("sshicm", dep = TRUE) ``` -```{r} + +``` r library(sshicm) ``` -```{r} + +``` r baltim = sf::read_sf(system.file("extdata/baltim.gpkg",package = "sshicm")) sshicm(PRICE ~ .,baltim,type = "IC") +## # A tibble: 5 × 3 +## Variable Ic Pv +## +## 1 DWELL 0.648 0.00801 +## 2 AC 0.223 0.0591 +## 3 PATIO 0.168 0.556 +## 4 FIREPL 0.135 0.667 +## 5 CITCOU 0.0898 0.988 ``` -```{r} + +``` r cinc = sf::read_sf(system.file("extdata/cinc.gpkg",package = "sshicm")) sshicm(THEFT_D ~ .,cinc,type = "IN") +## # A tibble: 5 × 3 +## Variable In Pv +## +## 1 DENSITY 0.776 0.0681 +## 2 MEDIAN_AGE 0.228 0.0230 +## 3 MALE 0.0367 0 +## 4 AVG_FAMSIZ 0.0205 0.00300 +## 5 FEMALE 0.00584 0.0200 ``` -```{r} + +``` r ntds = gdverse::NTDs sshicm(incidence ~ watershed + elevation + soiltype,data = ntds) +## # A tibble: 3 × 3 +## Variable Ic Pv +## +## 1 elevation 0.293 0.0250 +## 2 watershed 0.177 0.0521 +## 3 soiltype 0.117 0.0671 ``` diff --git a/vignettes/sshicm.Rmd.orig b/vignettes/sshicm.Rmd.orig new file mode 100644 index 0000000..ed41c54 --- /dev/null +++ b/vignettes/sshicm.Rmd.orig @@ -0,0 +1,134 @@ +--- +title: "Information Consistency-Based Measures for Spatial Stratified Heterogeneity" +author: "Wenbo Lv" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{sshicm} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "##", + fig.path = "man/figures/sshicm/" +) +``` + +  + +## 1. Introduction to `sshicm` package + +### 1.1 The `sshicm` package can be used to address following issues: + +- Information consistency-based measures of spatial stratified heterogeneity intensity for continuous and nominal variables. + +- Strength of spatial pattern associations based on information consistency measures. + +### 1.2 Example data in the `sshicm` package + +#### baltim data + +"baltim" consists of [Baltimore home sale prices and hedonics][5]. In total, there are 221 instances in "baltim" data. The explanatory variables are whether it is a detached unit (DWELL), whether it has a patio (PATIO), whether it has a fireplace (FIREPL), whether it has air conditioning (AC), and whether the dwelling is in Baltimore County (CITCOU, while the target variable is the sale price of the home (PRICE). + + +#### cinc data + +"cinc" is derived from [the 2008 Cincinnati Crime + Socio-Demographics dataset][6]. It includes spatial data on 457 objects located on an irregular lattice. The explanatory variables are male population (MALE), female population (FEMALE), median age (MEDIAN_AGE), average family size (AVG_FAMSIZ), and population density (DENSITY), while the target variable is the existence of theft (THEFT_D). + +![**Figure 1**. Maps of the baltim and cinc data sets. ([Bai et al. 2023][2])](../man/figures/sshicm/sshicm_example_data.jpg){width=500px} + +### 1.3 Functions in the `sshicm` package + +#### Two functions for vector-type inputs of dependent and independent variables. + +- `sshic()` for continuous dependent variable + +- `sshin()` for continuous nominal variable + +#### Regression-style data frame modeling function + +A function `sshicm()` that yields all results in a single line, with the `type` parameter set to `IC` (Continuous) or `IN` (Nominal) to specify whether the dependent variable is a continuous or nominal variable. + +## 2. The principle of measuring spatial stratified heterogeneity based on information consistency + +**Note: All explanatory variables must be discretized in advance or inherently be discrete nominal variables.** + +### 2.1 When the dependent variable is a continuous variable: + +$$ +I_{C}\left(d,s\right) = \sum_{s_{i} \in S}p\left(s_{i}\right)\frac{ \arctan \left(\textbf{RelE} \left( f_{d_{i}} \mid \mid f \right) \right)}{\pi / 2} +$$ + +where $d_i$ is the random variable corresponding to the target variable in stratum $s_i$ , and $f_{d_i}$ and $f$ are the density functions of $d_i$ and $d$, respectively. Additionally, $\textbf{RelE} \left( f_{d_{i}} \mid \mid f \right)$ is the relative entropy of $f_{d_i}$ and $f$. + +$$ +\textbf{RelE} \left( f_{d_{i}} \mid \mid f \right) = H \left(f_{d_{i}} , f\right) - H \left(f_{d_{i}}\right) = \sum_{i = 1}^{n} f_{d_{i}} \log \frac{1}{f} - \sum_{i = 1}^{n} f_{d_{i}} \log \frac{1}{f_{d_{i}}} = \sum_{i = 1}^{n} f_{d_{i}} \log \frac{f_{d_{i}}}{f} +$$ + +### 2.2 When the dependent variable is a nominal variable: + +$$ +I_{N}\left(d,s\right) = \frac{I \left(d,s\right)}{I \left(d\right)} = +\frac{I \left(d\right) - I \left(d \mid s\right)}{I \left(d\right)} = +1 - \frac{\sum_{s_i \in S}\sum_{x \in V_d} p\left(s_i,x\right) \log p\left(x \mid s_i\right)}{\sum_{x \in V_d} p\left(x\right) \log p\left(x\right)} +$$ + +where $p\left(x\right)$ is the probability of observing $x$ in $U$, $p\left(s_i,x\right)$ is the probability of observing $s_i$ and $x$ in $U$, and $p\left(x \mid s_i\right)$ is the probability of observing $x$ given that the stratum is $s_i$. + + +## 3. Examples of the `sshicm` package + +```r +install.packages("sshicm", dep = TRUE) +``` + + +```{r} +library(sshicm) +``` + + +```{r} +baltim = sf::read_sf(system.file("extdata/baltim.gpkg",package = "sshicm")) +sshicm(PRICE ~ .,baltim,type = "IC") +``` + +```{r} +cinc = sf::read_sf(system.file("extdata/cinc.gpkg",package = "sshicm")) +sshicm(THEFT_D ~ .,cinc,type = "IN") +``` + +```{r} +ntds = gdverse::NTDs +sshicm(incidence ~ watershed + elevation + soiltype,data = ntds) +``` + + +## Reference + + + +Wang, J., Haining, R., Zhang, T., Xu, C., Hu, M., Yin, Q., … Chen, H. (2024). Statistical Modeling of Spatially Stratified Heterogeneous Data. Annals of the American Association of Geographers, 114(3), 499–519. [https://doi.org/10.1080/24694452.2023.2289982][1]. + +Bai, H., Wang, H., Li, D., & Ge, Y. (2023). Information Consistency-Based Measures for Spatial Stratified Heterogeneity. Annals of the American Association of Geographers, 113(10), 2512–2524. [https://doi.org/10.1080/24694452.2023.2223700][2]. + +Wang, J., Li, X., Christakos, G., Liao, Y., Zhang, T., Gu, X., & Zheng, X. (2010). Geographical Detectors‐Based Health Risk Assessment and its Application in the Neural Tube Defects Study of the Heshun Region, China. International Journal of Geographical Information Science, 24(1), 107–127. [https://doi.org/10.1080/13658810802443457][3]. + +Wang, J. F., Zhang, T. L., & Fu, B. J. A measure of spatial stratified heterogeneity. Ecological indicators, 2016. 67, 250-256. [https://doi.org/10.1016/j.ecolind.2016.02.052][4]. + + + +  + +[1]: https://doi.org/10.1080/24694452.2023.2289982 +[2]: https://doi.org/10.1080/24694452.2023.2223700 +[3]: https://doi.org/10.1080/13658810802443457 +[4]: https://doi.org/10.1016/j.ecolind.2016.02.052 +[5]: https://geodacenter.github.io/data-and-lab/baltim/ +[6]: https://geodacenter.github.io/data-and-lab/walnut_hills/ + +  +