-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwrangle_data_clean.R
144 lines (107 loc) · 5.85 KB
/
wrangle_data_clean.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
## ---------------------------
##
## Script name: Wrangle BpS and Sclass data inputs
##
## Purpose of script: Clean and merge BpS-Sclass combine, BpS reference percents and sclass descriptions to calculate and make charts for historical vs. current sclass charts, and for exploring different stages of BpSs (e.g., late classes). This script is an evolution of "wrangle_data" originally written for Mary Kelly's MZ2 late succession project.
##
## Author: Randy Swaty
##
## Date Created: December 18, 2023
##
##
## ---------------------------
##
## Notes:
## Challenging to "keep" all sclass combinations
## There are issues with LANDFIRE data including a duplicate of Laurentian-Acadian Northern Hardwoods-Hemlock.
##
## ---------------------------
## ---------------------------
## load packages
library(janitor) # for cleaning column names
library(tidyverse) # for reading in and wrangling data
## ---------------------------
## read in raw data and sclass descriptions
# output from BpS-Sclass data combine in ArcGIS pro
raw_bps_scls <- read.csv("data/bpsScls2.csv")
# output from SyncroSim that has labels and age-categories of all succession classes except AK
sclass_descriptions <- read.csv("data/scls_descriptions.csv")
# reference percents per sclass per BpS for all BpSs except AK. Modified for this purpose from original LF ref con table.
reference_percents <- read.csv("data/ref_con_long.csv")
# bps attributes for landscape of interest. Output from clipping BpS data with shapefile in GIS.
bps_atts <- read.csv("data/bps_aoi_attributes.csv")
## ---------------------------
## clean and prep raw combined data
clean_bps_scls_cmbn <- raw_bps_scls %>%
rename("join_field" = "model_label") %>%
# select(-c(OID_,
# Value,
# LC16_BPS_2,
# LC20_SCla_,
# GROUPVEG)) %>%
# unite("join_field", BPS_MODEL,LABEL, sep = "_", remove = FALSE ) %>%
group_by(join_field, Model_Code, BpS_Name, refLabel) %>% # I want only one row per unique BpS-sclass combo
summarize(count = sum(Freq)) %>%
clean_names()
# note-some BpSs did not have sclasses A-E in mapping rules and/or may not have any pixels of a particular s-class mapped today so there will not be all 10 possible sclasses for each BpS (i.e., A-E, UN, UE, etc.) from the combine which represents current data.
## clean and prep sclass descriptions
sclass_descriptions_clean <- sclass_descriptions %>%
dplyr::select(-c(Description)) %>% # remove column
rename("model_code" = "StratumID",
"scls_label" = "ClassLabelID",
"state_class_id" = "StateClassID" ) %>% # rename columns
unite("join_field", model_code:scls_label, sep = "_", remove = FALSE ) %>%
separate(state_class_id, into = c("age_category", "canopy_category"), sep = ":", remove = FALSE)
## clean and prep reference percents
# get unique s-class labels from modified ref_con so we can have 'authoritative' list
unique_sclass_labels_ref <- unique(reference_percents$refLabel)
print(unique_sclass_labels_ref)
# there may be some differences in the mapped sclass labels in the cleaned bps_sclass combine
unique_sclass_lables_cmbn <- unique(clean_bps_scls_cmbn$ref_label)
print(unique_sclass_lables_cmbn)
# there are differences, e.g., Urban-Developed between this and sclass label
# will assume Barren/Sparse, NoData and Snow/Ice is minimal; will change "Developed" to "Urban" in reference df cleaning code
clean_ref_percents <- reference_percents %>%
mutate(across('refLabel', str_replace, 'Developed', 'Urban')) %>%
mutate(across('model_label', str_replace, 'Developed', 'Urban')) %>%
rename("join_field" = "model_label" ) %>%
clean_names()
## winnow this df to only the bps model codes in area of interest. Used BPS_MODEL, thought note this may result in duplicate BpS names if the AoI is across multiple variants of a BpS. There should be 10x the number of unique BPS_MODEL values. Note, this will not be 10x the number of rows in the bps_atts since that is parsed by the "VALUE" field in the clipped BpS dataset, i.e., there may be multiple rows for a single BPS_MODEL because each Map Zone gets a unique "VALUE". We are working with the BPS_MODEL field as it will have the unique ref conditions we need. Also, we use the a newly created 'join-field' for joining in GIS.
# first see how many bpss there are
length(unique(bps_atts$BPS_MODEL))
clean_ref_percents <- clean_ref_percents %>%
filter(model_code %in% bps_atts$BPS_MODEL)
# should be 10x number of unique BPS_Models (minus one for NAs that are 'water' etc)
## create 'final' dataframe with reference and current sclass percents, acres and labels
## first ref con and sclass descriptions, remove BPS_NAME column to avoid duplication below
final_ref_con <- left_join(clean_ref_percents, sclass_descriptions_clean) %>%
dplyr::select(-c(model_code,
ref_label,
bp_s_name))
# looks OK, now full join to add reference percents then clean a bit
ref_con_slcs_count <- full_join(final_ref_con, clean_bps_scls_cmbn, by = "join_field") %>%
rename("cur_scls_count" = "count",
"bps_name" = "bp_s_name")
# now for the math: need count/acres per bps, cur sclass percents and differences
final_df_full <- ref_con_slcs_count %>%
group_by(model_code) %>%
mutate(bps_count = sum(cur_scls_count, na.rm = TRUE)) %>%
ungroup() %>%
mutate(bps_acres = bps_count*0.2223945,
ref_scls_acres = bps_acres*(ref_percent/100),
cur_scls_acres = cur_scls_count*0.2223945,
cur_percent = (cur_scls_acres/bps_acres)*100) %>%
mutate(across(11:14, round, 0))
final_df_clean <- final_df_full %>%
dplyr::select(c(join_field,
bps_name,
model_code,
ref_label,
ref_percent,
cur_percent,
ref_scls_acres,
cur_scls_acres,
bps_acres
))
# save to csv
write.csv(final_df_full, file = "data/final_df_full.csv", row.names=FALSE)