-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path04_CreateValidationSample.R
142 lines (114 loc) · 4.87 KB
/
04_CreateValidationSample.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
library(terra)
library(sf)
# Set a seed for reproducability
set.seed(13002023)
p_ll <- '+proj=longlat +datum=WGS84 +no_defs'
e <- c(-180,180,-90,90)
# Total number of points
nr_points <- 1000
# -------------------- #
## Load the respective layers ##
# Forest management categorizations. These show across all layers
# any grid cell that has forest gain in one of the 3 datasets and
# falls into a forest management class
# 1 stating natural and 3 planted forest
fmlgain <- rast("extracts/combined_forestgain_2000onwards.tif")
names(fmlgain) <- "fml_gain"
fmlgain[fmlgain==0] <- NA
# Get also the Agreement layer. This layers indicates for each grid cell
# how many of the three datasets agree on whether there is forest gain
# Values rank from 1 to 3
dissim <- rast("resSaves/ForestGainAgreement_2000onwards.tif")
names(dissim) <- "Agreement"
# And the individual raster layers of total forest gain per grid cell
# Convert them to binary
ras1 <- rast("extracts/ESACCI_forestgainsum_2000onwards.tif")
ras1[ras1>0] <- 1
ras2 <- rast("extracts/Hansen_forestgain.tif")
ras2[ras2>0] <- 1
ras3 <- rast("extracts/modis_forestgainsum.tif")
ras3[ras3>0] <- 1
# Ensure that all layers perfectly align. They all have the same grid cell size,
# but are slightly different in extent at the borders (180deg dateline, here resample)
compareGeom(ras1,ras3)
ras2 <- terra::resample(ras2, ras1, method = "near")
compareGeom(ras1,ras2)
compareGeom(ras1,dissim)
fmlgain <- terra::resample(fmlgain, ras1, method = "near")
compareGeom(dissim,ras1)
# Add them all to a joint stack for easier sampling
# and value extraction
# Make a combined version of any treecovergain
o <- sum(ras1, ras2, ras3, na.rm = TRUE)
o[o>0] <- 1; o[o==0] <- NA # Get any grid cell with tree-cover gain
ss <- c(fmlgain, dissim, o)
# Buffer the combined treecover gain dataset
# Load in and extract again
# Then do sampling
o <- rast("CombinedTreeCoverGain.tif")
bb <- rast("BoundaryForestGain.tif")
fmlgain <- resample(fmlgain, bb, method = "near")
bb <- mask(bb, fmlgain)
bb[bb==0] <- NA; bb[bb>0] <- 1
new_sampleboundary <- sum(bb, o, na.rm = TRUE)
new_sampleboundary[new_sampleboundary==0] <- NA
# --------------- #
# Now do a weighted random sample on the remaining layers
ex <- terra::spatSample(x = new_sampleboundary,
size = nr_points,
method = "random",
replace = FALSE, # No replacement
na.rm = TRUE, # No grid cells that fall into the ocean
exhaustive = TRUE, # Try hard to reach the required number
as.df = TRUE, # Return a data.frame
values = FALSE, # Also return cell values
cells = TRUE, # Also return cell numbers (needed for extent)
xy = TRUE, # Also return cell coordinates
warn = TRUE
)
# --- #
# Re-extract all relevant values
df <- terra::extract(c(ras1, ras2, ras3, dissim, bb), y = ex[,c("x", "y")])
ex <- cbind(ex, df)
ex$BoundaryForestGain[is.na(ex$BoundaryForestGain)] <- 0
write.csv(ex, "resSaves/validation_sample_random.csv", row.names = FALSE)
# Make checks of coverage
table(ex$fml_gain)
table(ex$Agreement)
table(ex$ESACCI_forestgainsum)
table(ex$Hansen_forestgain)
table(ex$modis_forestgainsum)
# --- #
ss <- c( rast("/mnt/idrive/jung/forMartina/ESACCI_forestgainsum.tif"),
rast("/mnt/idrive/jung/forMartina/Hansen_forestgain.tif"),
rast("/mnt/idrive/jung/forMartina/modis_forestgainsum.tif")
)
ex <- read.csv("resSaves/validation_sample_random.csv")
if(!hasName(ex, "X")) ex$X <- 1:nrow(ex)
template <- ss[[1]]
template[] <- NA
# Buffer each grid cell by a small amount
sub <- ex |> sf::st_as_sf(coords = c("x", "y"),crs = st_crs(ss))
sub$x <- st_coordinates(sub)[,1]
sub$y <- st_coordinates(sub)[,2]
template[terra::cellFromXY(template, ex[,c("x","y")])] <- ex$X
# Vectorize the specific grid cell
poly_rast <- as.polygons(template, values = TRUE, na.rm = TRUE)
names(poly_rast) <- "value"
poly_rast <- poly_rast |> sf::st_as_sf()
# Loop through each observation
for(i in 1:nrow(sub)){
subs <- poly_rast |> dplyr::filter(value == i)
sub[i,"xmin"] <- st_bbox(subs)["xmin"]
sub[i,"ymin"] <- st_bbox(subs)["ymin"]
sub[i,"xmax"] <- st_bbox(subs)["xmax"]
sub[i,"ymax"] <- st_bbox(subs)["ymax"]
}
sub <- sub |> st_drop_geometry()
sub$cell <- NULL
# Save the outputs to a csv file
write.csv(sub, file = "resSaves/validation_sample_random.csv", row.names = FALSE)
# Also save a binary layer of the forestgain layers
# writeRaster(ras1, "resSaves/ESACCI_forestgainsum.tif", datatype = "INT1U", gdal=c("COMPRESS=DEFLATE"), overwrite = TRUE)
# writeRaster(ras2, "resSaves/Hansen_forestgain.tif", datatype = "INT1U", gdal=c("COMPRESS=DEFLATE"), overwrite = TRUE)
# writeRaster(ras3, "resSaves/modis_forestgainsum.tif", datatype = "INT1U", gdal=c("COMPRESS=DEFLATE"), overwrite = TRUE)