-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1.Pyromes_analysis.Rmd
1627 lines (1300 loc) · 70.9 KB
/
1.Pyromes_analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: "Analysis and Figures for Modern pyromes: Biogeographical patterns of fire characteristics across the contiguous United States"
author: Megan E. Cattau, Adam Mahood, Jennifer K. Balch, and Carol Wessman
date: "`r Sys.Date()`"
output:
pdf_document:
toc: false
toc_depth: 2
number_sections: false
keep_tex: yes
extra_dependencies: "subfig"
latex_engine: pdflatex
header-includes:
\usepackage{helvet}
\renewcommand\familydefault{\sfdefault}
\usepackage{placeins}
\usepackage{caption}
\captionsetup[figure]{labelformat=empty}
\captionsetup[table]{labelformat=empty}
\pagenumbering{gobble} % Suppress page number on the title page
---
# setwd("/Users/megancattau/Dropbox/0_EarthLab/US_Pyromes/Pyromes/pyromes_code/Data")
```{r, setup, include = FALSE}
knitr::opts_chunk$set(
echo = FALSE,
warning = FALSE,
message = FALSE,
knitr::opts_knit$set(root.dir = "/Users/megancattau/Dropbox/0_EarthLab/US_Pyromes/Pyromes/pyromes_code/Data"),
knitr::knit_hooks$set(plot = function(x, options) {
paste0(knitr::hook_plot_tex(x, options), "\n\\FloatBarrier\n")
})
)
```
```{r knitr-NA}
# Displays blank instead of NA for missing values
options(knitr.kable.NA = '')
```
```{r load-packages, results="hide"}
library(rgdal)
library(sp)
library(sf)
library(dplyr)
library(raster)
library(sendmailR)
library(tidyr)
library(lubridate)
library(rgeos)
library(ggplot2)
library(assertthat)
library(plyr)
library(factoextra)
library(dendextend)
library(ggdendro)
library(pvclust)
library(clValid)
library(mclust)
library(ggthemes)
library(ggmap)
library(RColorBrewer)
library(ggpubr)
library(knitr)
library(gtsummary)
library(kableExtra)
```
```{r import data, cache=TRUE, results="hide"}
# Projection for layers
#EPSG:32613
data_crs<- " +proj=utm +zone=13 +datum=WGS84 +units=m +no_defs +ellps=WGS84 "
### US States
States <- st_read(dsn = 'States', layer = "cb_2016_us_state_20m", quiet = TRUE) %>%
filter(!(NAME %in% c("Alaska", "Hawaii", "Puerto Rico"))) %>%
dplyr::select(STATEFP, STUSPS) %>%
setNames(tolower(names(.))) %>%
st_transform(., data_crs)
# Create a raster that's extent of States and 50km resolution and write into Data folder
Fishnet<- raster(ext=extent(States), resolution=50000)
projection(Fishnet)<-crs(data_crs)
writeRaster(Fishnet,"Fishnet.grd", format="raster", overwrite=TRUE)
### MTBS fire perimeters, 1984-2020
MTBS2 <- st_read(dsn = 'MTBS', layer = "mtbs_perims_DD", quiet = TRUE) %>%
st_transform(., data_crs)
MTBS<-as(MTBS2, 'Spatial')
#MTBS preprocessing
# names(MTBS)
MTBS$JD<-yday(MTBS$Ig_Date) # add JD col
MTBS$FireYear<-year(MTBS$Ig_Date) # add year column
MTBS$FireID<-1:nrow(MTBS)
MTBS$ha<-MTBS$BurnBndAc*0.4046856
# convert to points
MTBS_point1<-gCentroid(MTBS, byid=TRUE, id=MTBS$FireID)
MTBS_point<-SpatialPointsDataFrame(MTBS_point1, MTBS@data)
## MODIS active fire data
MODIS2 <- st_read(dsn = 'MODIS', layer = "fire_archive_M-C61_275705", quiet = TRUE) %>%
st_transform(., data_crs)
#limit to 2020 because dec of 2021 still nrt rather than archive data
class(MODIS2$ACQ_DATE)
MODIS3 <-MODIS2[year(MODIS2$ACQ_DATE)<=2020,]
MODIS<-as(MODIS3, 'Spatial')
# MODIS preprocessing
# names(MODIS)
MODIS$JD<-yday(MODIS$ACQ_DATE) # add JD col
MODIS$FireYear<-year(MODIS$ACQ_DATE) # add year column
MODIS$FireID<-1:nrow(MODIS)
### FPA-FOD
FOD2 <- st_read("FOD/Data/FPA_FOD_20210617.gdb", layer = "Fires") %>%
st_transform(., data_crs)
FOD<-as(FOD2, 'Spatial')
# FOD preprocessing
# names(FOD)
FOD$ha <- FOD$FIRE_SIZE*0.4046856 #size in hectares
FOD <- subset(FOD, is.na(FOD$NWCG_CAUSE_CLASSIFICATION) == FALSE) #cleaning up one NA
# explore percent each ignition type
data.frame((table(FOD$NWCG_CAUSE_CLASSIFICATION))/nrow(FOD)*100)
# area by ignition type
#aggregate(FOD$ha, by=list(Category=FOD$NWCG_CAUSE_CLASSIFICATION), FUN=sum)
####
FOD$JD<-FOD$DISCOVERY_DOY # add JD col, ddoy = discovery date
FOD$FireYear<-FOD$FIRE_YEAR # add year column
# Just human ones
FOD_human<-FOD[FOD$NWCG_CAUSE_CLASSIFICATION=="Human",]
### EPA Level I Ecoregions
# clipped to States in QGIS
Ecoregion_clip <- st_read(dsn = 'Ecoregion', layer = "ecoregion_proj_fix_clip", quiet = TRUE) %>%
st_transform(., data_crs)
Ecoregion_fix<-st_make_valid(Ecoregion_clip)
Ecoregion<-as(Ecoregion_fix, 'Spatial')
Ecoregion_simple<-st_simplify(Ecoregion_fix)
Ecoregion2<-as(Ecoregion_simple, 'Spatial')
```
```{r list of rasters, cache=TRUE, results="hide"}
# parse all the fire layers by year into a list of vector objects, calling them "xxxxx_parsed", each vector in the list called xxxx_yyyy
parse_vector <- function(all_data, prefix, year_seq) {
# The function takes a vector layer (arg: all_data; e.g., MTBS), parses it by year, gives each new object a sensible name with a defined prefix (arg: prefix; e.g., fire),
# returns a list of vector objects
# args= all_data (the original polygon / points data), prefix (prefix for the name of the parsed vector layers), year_seq (sequence of relevant years)
# A nested function - parse the data based on Year
separate_data_by_year<-function(year){
all_data[all_data$FireYear==year,]
}
# Apply this function over the range of relevant years, resulting in a list of vector objects for each year
vector_list <- lapply(year_seq, separate_data_by_year)
# Name each object in the list prefix_year
names(vector_list) <- paste(prefix, year_seq, sep = "_")
vector_list
}
# range(MTBS$FireYear) #1984-2020
MTBS_parsed<-parse_vector(MTBS_point, "MTBS", (min(MTBS$FireYear):max(MTBS$FireYear)))
# range(MODIS$FireYear) # 2003 - 2020
MODIS_parsed<-parse_vector(MODIS, "MODIS", (min(MODIS$FireYear):max(MODIS$FireYear)))
# range(FOD$FireYear) # 1992 - 2018
FOD_parsed<-parse_vector(FOD, "FOD", (min(FOD$FireYear):max(FOD$FireYear)))
FOD_human_parsed<-parse_vector(FOD_human, "FOD_human", (min(FOD_human$FireYear):max(FOD_human$FireYear)))
```
```{r generate variables, cache=TRUE, results="hide"}
######### convert the a list of vector objects into the relevant rasters
# Generate annual rasters of:
# Number of fires - count of MODIS FRP points, MTBS, FOD
## 1. Number_fires_MODIS
## 2. Number_fires_MTBS
## 3. Number_fires_FOD
# Max, mean, std FRP, MODIS
## 4. Mean_FRP_MODIS
## 5. Max_FRP_MODIS
# Mean, max fire event size MTBS, FOD
## 6. Mean_area_MTBS
## 7. Max_area_MTBS
## 8. Mean_area_FOD
## 9. Max_area_FOD
# Burned area MTBS, FOD
## 10. Sum_area_MTBS
## 11. Sum_area_FOD
# Season Length (std * 2 JD), MODIS, MTBS, FOD
## 12. Std_JD_MTBS
## 13. Std_JD_MODIS
## 14. Std_JD_SFOD
# Ignition type (Perc human ignitions) FOD
## 15. Perc human ignitions FOD
annual_rasters <- function(vector_data, template, prefix, year_seq, field, fun, background) {
# The function takes a list of vector objects, creates annual rasters based on a particular variable, and gives each new raster a sensible name with a defined prefix (arg: prefix; e.g., fire), # args= vector_data (a list of vector objects), template (a raster whose extent / res will be used for the putput rasters), prefix (prefix for the name of the raster layers), year_seq (sequence of relevant years), field= field on which to based calculations (e.g., FRP), fun = function to apply on that field (e.g., max), background(what value to give raster if there are no vector elements in there)
# returns a list of rasters
# A nested function - rasterize a polygon
raster_each_year<- function(polygon){
# Set the resolution and extent based on a template raster
r <- raster(ncol=ncol(template), nrow=nrow(template))
extent(r)<-extent(template)
projection(r)<-projection(template)
if (length(polygon)==0)
new_raster<-setValues(r, 0)
else
new_raster<-rasterize(polygon, r, field=field, fun=fun, background=background)
# new_raster[is.na(new_raster)]<-0 - remove this because we do want some to be NA
new_raster
}
annual_rasters<-lapply(vector_data, raster_each_year)
names(annual_rasters) <- paste(prefix, year_seq, sep = "_")
annual_rasters
}
# args: (vector_data, template, prefix, year_seq, field, fun, background)
# Fire frequency
Number_fires_MODIS<-annual_rasters(MODIS_parsed, Fishnet, "MODIS_Numfires", 2003:2020, "FireID", fun="count", background=0) # for count, it doesn't matter what field you use if points
Number_fires_MTBS<-annual_rasters(MTBS_parsed, Fishnet, "MTBS_Numfires", 1984:2020, "FireID", fun="count", background=0)
Number_fires_FOD<-annual_rasters(FOD_parsed, Fishnet, "FOD_Numfires", 1992:2018, "FOD_ID", fun="count", background=0)
# Fire intensity - give background of NA so that not included in means if no fire in them
Mean_FRP_MODIS<-annual_rasters(MODIS_parsed, Fishnet, "MODIS_meanFRP", 2003:2020, "FRP", fun=mean, background=NA)
Max_FRP_MODIS<-annual_rasters(MODIS_parsed, Fishnet, "MODIS_maxFRP", 2003:2020, "FRP", fun=max, background=NA)
# Fire event size
Mean_area_MTBS<-annual_rasters(MTBS_parsed, Fishnet, "MTBS_meanArea", 1984:2020, "ha", fun=mean, background=NA)
Max_area_MTBS<-annual_rasters(MTBS_parsed, Fishnet, "MTBS_maxArea", 1984:2020, field="ha", fun=max, background=NA)
Mean_area_FOD<-annual_rasters(FOD_parsed, Fishnet, "FOD_meanArea", 1992:2018, "ha", fun=mean, background=NA)
Max_area_FOD<-annual_rasters(FOD_parsed, Fishnet, "FOD_maxArea", 1992:2018, "ha", fun=max, background=NA)
# Burned area
Sum_area_MTBS<-annual_rasters(MTBS_parsed, Fishnet, "MTBS_sumArea", 1984:2020, "ha", fun=sum, background=NA)
Sum_area_FOD<-annual_rasters(FOD_parsed, Fishnet, "FOD_sumArea", 1992:2018, "ha", fun=sum, background=NA)
# Fire seasonality
Std_JD_MTBS<-annual_rasters(MTBS_parsed, Fishnet, "MTBS_stdJD", 1984:2020, "JD", fun=sd, background=NA)
Std_JD_MODIS<-annual_rasters(MODIS_parsed, Fishnet, "MODIS_stdJD", 2003:2020, "JD", fun=sd, background=NA)
Std_JD_FOD<-annual_rasters(FOD_parsed, Fishnet, "FOD_stdJD", 1992:2018, "JD", fun=sd, background=NA)
# Make it Std JD * 2
Std2_JD_MTBS<-calc(stack(Std_JD_MTBS), function(x) x*2, forceapply=TRUE)
Std2_JD_MODIS<-calc(stack(Std_JD_MODIS), function(x) x*2, forceapply=TRUE)
Std2_JD_FOD<-calc(stack(Std_JD_FOD), function(x) x*2, forceapply=TRUE)
# Perc human ignitions
Number_fires_FOD2<-annual_rasters(FOD_parsed, Fishnet, "FOD_Numfires", 1992:2018, "FireYear", fun="count", background=NA) # background is NA instead of 0 so that if there were no fires in that cell, the perc human ign value won't be calculated
Number_fires_FOD_human<-annual_rasters(FOD_human_parsed, Fishnet, "FOD_Number_fires_human", 1992:2018, "FireYear", fun="count", background=0) #Background = 0 so that if there were fires in that cell but no human fires, the value is still calculated
Perc_fires_FOD_human<-stack(Number_fires_FOD_human) / stack(Number_fires_FOD2)
results_rasterstack<-stack(stack(Number_fires_MODIS), stack(Number_fires_MTBS), stack(Number_fires_FOD), stack(Mean_FRP_MODIS), stack(Max_FRP_MODIS), stack(Mean_area_MTBS), stack(Max_area_MTBS), stack(Mean_area_FOD), stack(Max_area_FOD), stack(Sum_area_MTBS), stack(Sum_area_FOD), stack(Std2_JD_MODIS),stack(Std2_JD_MTBS), stack(Std2_JD_FOD), stack(Perc_fires_FOD_human))
# names(results_rasterstack)
# writeRaster(results_rasterstack,"Results/results_rasterstack.grd", format="raster", overwrite=TRUE)
# results_rasterstack<-stack("results_rasterstack.grd") # Import sampled rasters - annual
# stats on each variable across all years rather than annual
Number_fires_MODIS_mean<-calc(stack(Number_fires_MODIS), mean)
Number_fires_MTBS_mean<-calc(stack(Number_fires_MTBS), mean)
Number_fires_FOD_mean<-calc(stack(Number_fires_FOD), mean)
Mean_FRP_MODIS_mean<-calc(stack(Mean_FRP_MODIS), mean, na.rm=TRUE)
Max_FRP_MODIS_mean<-calc(stack(Max_FRP_MODIS), mean, na.rm=TRUE)
Mean_area_MTBS_mean<-calc(stack(Mean_area_MTBS), mean, na.rm=TRUE)
Max_area_MTBS_mean<-calc(stack(Max_area_MTBS), mean, na.rm=TRUE)
Mean_area_FOD_mean<-calc(stack(Mean_area_FOD), mean, na.rm=TRUE)
Max_area_FOD_mean<-calc(stack(Max_area_FOD), mean, na.rm=TRUE)
Sum_area_MTBS_mean<-calc(stack(Sum_area_MTBS), mean, na.rm=TRUE)
Sum_area_FOD_mean<-calc(stack(Sum_area_FOD), mean, na.rm=TRUE)
# (2 * SD for season length already calculated above)
Std2_JD_MODIS_mean<-calc(stack(Std2_JD_MODIS), mean, na.rm=TRUE)
Std2_JD_MTBS_mean<-calc(stack(Std2_JD_MTBS), mean, na.rm=TRUE)
Std2_JD_FOD_mean<-calc(stack(Std2_JD_FOD), mean, na.rm=TRUE)
Perc_fires_FOD_human_mean<-calc(stack(Perc_fires_FOD_human), mean, na.rm=TRUE)
results_rasterstack_mean<-stack(Number_fires_MODIS_mean, Number_fires_MTBS_mean, Number_fires_FOD_mean, Mean_FRP_MODIS_mean, Max_FRP_MODIS_mean, Mean_area_MTBS_mean, Max_area_MTBS_mean, Mean_area_FOD_mean, Max_area_FOD_mean, Sum_area_MTBS_mean, Sum_area_FOD_mean, Std2_JD_MODIS_mean, Std2_JD_MTBS_mean, Std2_JD_FOD_mean, Perc_fires_FOD_human_mean)
# writeRaster(results_rasterstack_mean,"Results/results_rasterstack_mean.grd", format="raster", overwrite=TRUE)
# results_rasterstack_mean<-stack("Results/results_rasterstack_mean.grd") # Import sampled rasters - annual
```
```{r sample, cache=TRUE, results="hide"}
# Get data into shape
# if need to reimport:
# results_rasterstack<-stack("results_rasterstack.grd") # Import sampled rasters - annual
# results_rasterstack_mean<-stack("results_rasterstack_mean.grd") # Import sampled rasters - mean
# States<-readOGR("States","CONUS") # Import States layer
results_rasterstack_all<-stack(results_rasterstack_mean, results_rasterstack) # Combine annual and mean sampled pyromes rasters
results_rasterstack_mask<-mask(results_rasterstack_all, States) # mask combined sampled pyromes rasters
sample_points<-rasterToPoints(results_rasterstack_mask[[1]], spatial=TRUE) # create sample point locations from one of the rasters
samples<-raster::extract(results_rasterstack_mask, sample_points, sp=TRUE) # extract values at sample points
# names(samples)
samples<-samples[-1] # remove repeat layer
# format for the annual layers is datasource_measuredthing_yyyy
# format for the mean layers is layer.x, so rewrite that below
names(samples)<-c("Number_fires_MODIS_mean", "Number_fires_MTBS_mean", "Number_fires_FOD_mean", "Mean_FRP_MODIS_mean", "Max_FRP_MODIS_mean", "Mean_area_MTBS_mean", "Max_area_MTBS_mean", "Mean_area_FOD_mean", "Max_area_FOD_mean", "Sum_area_MTBS_mean", "Sum_area_FOD_mean", "Std_JD_MODIS_mean", "Std_JD_MTBS_mean", "Std_JD_FOD_mean", "Perc_human_FOD_mean", names(results_rasterstack))
samples_p <- SpatialPointsDataFrame(samples, data=samples@data, proj4string=crs(MODIS)) # project
samples_p$FID<-1:nrow(samples_p) # put FID in there
samples_df<-data.frame(samples_p)
samples_df<-samples_df[,-438] # remove 'optional' column
eco_data<-sp::over(samples_p, Ecoregion[,"NA_L1NAME"], fn=NULL)
samples_df$ecoregion<-eco_data$NA_L1NAME
samples_p$ecoregion<-eco_data
samples_spatial<-samples_df
coordinates(samples_spatial)<-~x+y
proj4string(samples_spatial)<-CRS("+init=epsg:32613")
# all the mean values plus FID and ecoregion
# samples_df_mean<-samples_df[,c(1:15, 376, 379)]
samples_spatial_mean<-samples_spatial[,c(1:15, 435, 436)]
### Write and retrieve samples_df dataframe
write.csv(samples_df, "Results/samples_df.csv")
# samples_df<-read.csv("samples_df.csv")
# names(samples_df)
# samples_df<-samples_df[,-1]
```
```{r samples_process, cache=TRUE, results="hide"}
# Import data - if needed from previous
# Fire variables sampled at 50km resolution
# samples_df<-read.csv("samples_df.csv")
# names(samples_df)
# samples_df<-samples_df[,-1]
#make NA values 0 for PCA and clustering
samples_df[is.na(samples_df)]<-0
# Prepare the data - just get mean values, x, and y
samples_df_mean_no_anth<-samples_df[,c(1:14, 436, 437)]
# Prepare the data - just get mean values, x, and y wo location data
# names(samples_df_mean_no_anth)
samples_df_mean_no_anth_no_xy<-samples_df_mean_no_anth[,c(-15:-16)]
# standardize variables (w location data)
samples_df_mean_no_anth_sc<-as.data.frame(scale(samples_df_mean_no_anth))
# Names to pass later to functions
names_vector<-c("Num fires (MODIS)", "Num fires (MTBS)", "Num fires (FPA FOD)", "Mean Intensity (MODIS)", "Max Intensity (MODIS)", "Mean Fire Size (MTBS)", "Max Fire Size (MTBS)", "Mean Fire Size (FPA FOD)", "Max Fire Size (FPA FOD)", "Burned Area (MTBS)", "Burned Area (FPA FOD)", "Season Length (MODIS)", "Season Length (MTBS)", "Season Length (FPA FOD)", "Prop human ign (FPA FOD)")
names_simple<-c("Fire Frequency (n fires)", "Fire Frequency (n fires)", "Fire Frequency (n fires)", "Average Intensity (MW)", "Extreme Intensity(MW)", "Average Fire Size (ha)", "Extreme Fire Size (ha)", "Average Fire Size (ha)", "Extreme Fire Size (ha)", "Burned area (ha)", "Burned area (ha)", "Season Length (days)","Season Length (days)", "Season Length (days)", "Human Ignitions (Proportion)")
names_no_units<-c("Fire Frequency", "Fire Frequency", "Fire Frequency", "Average Intensity", "Extreme Intensity", "Average Fire Size", "Extreme Fire Size", "Average Fire Size", "Extreme Fire Size", "Burned area", "Burned area", "Season Length","Season Length", "Season Length", "Human Ignitions")
units_simple<-c("n fires", "n fires", "n fires", "MW", "MW", "ha", "ha", "ha","ha", "ha", "ha", "days","days", "days", "proportion")
names(samples_df_mean_no_anth_no_xy)<-names_vector[1:14]
```
```{r variables_redundancy, cache=TRUE, results="hide"}
# Number of fires - MODIS, FOD, MTBS
num_fires_long<-gather(samples_df[,c(1:3, 435)], key=variable, value=value, -FID)
num_fires_long$group<-ifelse(substr(num_fires_long$variable, 14, 18)=="MODIS", "MODIS", ifelse(substr(num_fires_long$variable, 14, 17)=="MTBS", "MTBS", ifelse(substr(num_fires_long$variable, 14, 16)=="FOD", "FPA-FOD",-9999)))
num_fires_long$group<-as.factor(num_fires_long$group)
kruskal.test(value ~ group, data = num_fires_long)
# P < 0.001
# Pairwise comparisons using Wilcoxon rank sum test
pairwise.wilcox.test(num_fires_long$value,
num_fires_long$group,
p.adjust.method="none")
# All p < 0.001
# Max Mean and Sum area - MTBS and FOD
## Mean Area
mean_area_long<-gather(samples_df[,c(6, 8, 435)], key=variable, value=value, -FID)
mean_area_long$group<-ifelse(substr(mean_area_long$variable, 11, 14)=="MTBS", "MTBS", ifelse(substr(mean_area_long$variable, 11, 15)=="FOD", "FPA-FOD",-9999))
mean_area_long$group<-as.factor(mean_area_long$group)
unique(mean_area_long$group)
wilcox.test(mean_area_long$value~mean_area_long$group)
# P < 0.001
## Max Area
max_area_long<-gather(samples_df[,c(7, 9, 435)], key=variable, value=value, -FID)
max_area_long$group<-ifelse(substr(max_area_long$variable, 10, 13)=="MTBS", "MTBS", ifelse(substr(max_area_long$variable, 10, 12)=="FOD", "FPA-FOD",-9999))
max_area_long$group<-as.factor(max_area_long$group)
unique(max_area_long$group)
wilcox.test(max_area_long$value~max_area_long$group)
# P < 0.001
## Sum Area
sum_area_long<-gather(samples_df[,c(10, 11, 435)], key=variable, value=value, -FID)
sum_area_long$group<-ifelse(substr(sum_area_long$variable, 10, 13)=="MTBS", "MTBS", ifelse(substr(sum_area_long$variable, 10, 12)=="FOD", "FPA-FOD",-9999))
sum_area_long$group<-as.factor(sum_area_long$group)
unique(sum_area_long$group)
wilcox.test(sum_area_long$value~sum_area_long$group)
# P < 0.001
# Seasonality - MTBS, MODIS, and FOD
season_long<-gather(samples_df[,c(12, 13, 14, 435)], key=variable, value=value, -FID)
season_long$group<-ifelse(substr(season_long$variable, 8, 12)=="MODIS", "MODIS", ifelse(substr(season_long$variable, 8, 11)=="MTBS", "MTBS", ifelse(substr(season_long$variable, 8, 10)=="FOD", "FPA-FOD",-9999)))
season_long$group<-as.factor(season_long$group)
unique(season_long$group)
kruskal.test(value ~ group, data = season_long)
# P < 0.001
# Pairwise comparisons using Wilcoxon rank sum test
pairwise.wilcox.test(season_long$value,
season_long$group,
p.adjust.method="none")
# All # P < 0.001
# Max and mean FRP = only one source - MODIS
# % Anthro = only one source - FOD
```
```{r PCA, results=FALSE, fig.show="hide", results="hide"}
# apply PCA
set.seed(16233)
samples_pca_no_anth <- prcomp(na.omit(samples_df_mean_no_anth_no_xy),center=TRUE, scale=TRUE) # apply PCA
# How many components to keep?
# 1. Kaiser criterion says that retain components - eigenvalue associated w each component - retain in >1, or reject if <1
round(samples_pca_no_anth$sdev^2, 2)
summary(samples_pca_no_anth)
# 1-4>1
# retain 4 components, explains ~72% of the variance
# The summary method describe the importance of the PCs. The first row describe again the standard deviation associated with each PC. The second row shows the proportion of the variance in the data explained by each component while the third row describe the cumulative proportion of explained variance. We can see there that the first two PCs accounts for more than half of the variance of the data.
# another way to do above
set.seed(16233)
eig.val <- get_eigenvalue(samples_pca_no_anth)
eig.val<-cbind(rownames(eig.val), eig.val)
rownames(eig.val) <- NULL
eig.val<-eig.val[1:5,]
eig.val[,1]<-c("PCA 1","PCA 2","PCA 3","PCA 4","PCA 5")
table_eig<-eig.val %>%
knitr::kable(
format = "latex",
align = "l",
digits = 2,
booktabs = TRUE,
longtable = TRUE,
caption="Table S1. Principal Components Analysis (PCA) components, with the associated eigenvalues and variance from a PCA analysis using all derived fire characteristics across the contiguous United States. The PCA was conducted to determine which fire variables accounted for most of the variance in the data and thus which variables to use to define the pyromes. Based on the Kaiser Criterion, on which components whose eigenvalues >1 are retained, we retain four components for further analysis.The first four PCA components explained 72 percent of the cumulative variance.",
col.names = c("Component", "Eigenvalue", "Percent variance", "Cumulative percent variance"),
escape = FALSE
) %>%
kableExtra::kable_styling(
position = "left",
latex_options = c("striped", "repeat_header"),
stripe_color = "gray!15"
)
# 2. Scree plot
# The plot method returns a plot of the variances (y-axis) associated with the PCs (x-axis). The Figure below is useful to decide how many PCs to retain for further analysis.
fig_scree<-plot(samples_pca_no_anth, main =" ", type = "l")
pdf(file = "/Users/megancattau/Dropbox/0_EarthLab/US_Pyromes/Pyromes/pyromes_code/Data/Figures/FigS2.pdf", height=4, width=4)
plot(samples_pca_no_anth, main =" ", type = "l")
dev.off()
# What variables are colinear w components?
# BiPlot
fig_biplot<-biplot(samples_pca_no_anth, cex=c(1, .5), col=c("white", "black"))
pdf(file = "/Users/megancattau/Dropbox/0_EarthLab/US_Pyromes/Pyromes/pyromes_code/Data/Figures/FigS3.pdf", height=8, width=6)
biplot(samples_pca_no_anth, cex=c(1, .5), col=c("white", "black"))
dev.off()
# DotPlot of loadings - PC1
load <- samples_pca_no_anth$rotation
sorted.loadings1 <- load[order(load[, 1]), 1]
myTitle1 <- "Loadings Plot for PC1"
myXlab <- "Variable Loadings"
library(lattice)
fig_dotplota<-dotplot(sorted.loadings1, main=myTitle1, xlab=myXlab, cex=1.5, col="red")
# DotPlot PC2
sorted.loadings2 <- load[order(load[, 2]), 2]
myTitle2 <- "Loadings Plot for PC2"
fig_dotplotb<-dotplot(sorted.loadings2, main=myTitle2, xlab=myXlab, cex=1.5, col="red")
# DotPlot PC3
sorted.loadings3 <- load[order(load[, 3]), 3]
myTitle3 <- "Loadings Plot for PC3"
fig_dotplotc<-dotplot(sorted.loadings3, main=myTitle3, xlab=myXlab, cex=1.5, col="red")
# DotPlot PC4
sorted.loadings4 <- load[order(load[, 4]), 4]
myTitle4 <- "Loadings Plot for PC4"
fig_dotplotd<-dotplot(sorted.loadings4, main=myTitle4, xlab=myXlab, cex=1.5, col="red")
pdf(file = "/Users/megancattau/Dropbox/0_EarthLab/US_Pyromes/Pyromes/pyromes_code/Data/Figures/FigS4a.pdf", height=5, width=4)
dotplot(sorted.loadings1, main=paste0("(a) ", myTitle1), xlab=myXlab, cex=1.5, col="red")
dev.off()
pdf(file = "/Users/megancattau/Dropbox/0_EarthLab/US_Pyromes/Pyromes/pyromes_code/Data/Figures/FigS4b.pdf", height=5, width=4)
dotplot(sorted.loadings2, main=paste0("(b) ", myTitle2), xlab=myXlab, cex=1.5, col="red")
dev.off()
pdf(file = "/Users/megancattau/Dropbox/0_EarthLab/US_Pyromes/Pyromes/pyromes_code/Data/Figures/FigS4c.pdf", height=5, width=4)
dotplot(sorted.loadings3, main=paste0("(c) ", myTitle3), xlab=myXlab, cex=1.5, col="red")
dev.off()
pdf(file = "/Users/megancattau/Dropbox/0_EarthLab/US_Pyromes/Pyromes/pyromes_code/Data/Figures/FigS4d.pdf", height=5, width=4)
dotplot(sorted.loadings4, main=paste0("(d) ", myTitle4), xlab=myXlab, cex=1.5, col="red")
dev.off()
var_imp<-get_pca_var(samples_pca_no_anth)
round(var_imp$cor[,c(1:4)], 2) # correlations between variables and dimensions
round(var_imp$contrib[,c(1:4)], 2) # Contributions of the variables - The contribution of a variable to a given principal component is (in percentage) : (var.cos2 * 100) / (total cos2 of the component)
round(var_imp$cos2[,c(1:4)], 2) # Cos2 for the variables
var_info<-cbind(names_vector[1:14], round(var_imp$cor[,c(1:4)], 2), round(var_imp$contrib[,c(1:4)], 2))
var_info_df<-data.frame(var_info)
var_info_df2<-var_info_df[c(-1,-6:-9)]
var_info_df2 <- cbind(rownames(var_info_df2), var_info_df2)
rownames(var_info_df2) <- NULL
table_PCAcorr<-var_info_df2 %>%
knitr::kable(
format = "latex",
align = "l",
booktabs = TRUE,
longtable = TRUE,
caption="Table S2. PCA Correlations. Correlations between fire variables and the Principal Components Analysis (PCA) components and the contributions of the fire variables to the components. The PCA analysis used all derived fire characteristics across the contiguous United States and was conducted to determine which fire variables accounted for most of the variance in the data and thus which variables to use to define the pyromes. After determining to keep the first four PCA components based on the Kaiser criterion (Table S1) and by evaluating a scree plot (Figure S2), we determined which of the fire characteristics were associated with these four components using the statistical analysis summarized here and by evaluating a biplot (Figure S3) and dotplots (Figure S4). Every variable in this suite loads significantly onto at least one of the components, so we retain all fire variables for the clustering analysis.",
col.names = c("Fire characteristic", "PCA 1", "PCA 2", "PCA 3", "PCA 4"),
escape = FALSE
) %>%
kableExtra::kable_styling(
position = "left",
latex_options = c("striped", "repeat_header"),
stripe_color = "gray!15"
)
```
```{r cluster, cache=TRUE, results="hide"}
# Cluster - (Divisive method)
# Kmeans option: randomly places specified number of centroids
# kmeans objects for a range of cluster numbers - scaled and centered data
set.seed(16338)
fit_no_anth<-vector("list", 150)
for (i in 1:150) {
fit_no_anth[[i]] <- kmeans(samples_df_mean_no_anth_sc,iter.max=10000, centers=i, nstart=100)
}
# Evaluate these - how many clusters
# 1. BIC / AIC
# Compute AIC and BIC for a range of cluster # s
set.seed(16338)
kmeansAIC_BIC <- function(fit){
m = ncol(fit$centers)
n = length(fit$cluster)
k = nrow(fit$centers)
D = fit$tot.withinss
return(data.frame(AIC = D + 2*m*k,
BIC = D + log(n)*m*k))
}
# make dataframe with AIC and BIC ~ Number of clusters for no anth
AIC_BIC_df_no_anth<-data.frame()
for (i in 1:150){
AIC_BIC_df_no_anth[i,1]<-kmeansAIC_BIC(fit_no_anth[[i]])[1]
AIC_BIC_df_no_anth[i,2]<-kmeansAIC_BIC(fit_no_anth[[i]])[2]
}
AIC_BIC_df_no_anth$n_clust<-1:150
max_clust<-AIC_BIC_df_no_anth[AIC_BIC_df_no_anth$BIC==min(AIC_BIC_df_no_anth$BIC),]$n_clust
max_clust
# 39
pdf(file = "/Users/megancattau/Dropbox/0_EarthLab/US_Pyromes/Pyromes/pyromes_code/Data/Figures/FigS5.pdf", height=5, width=4)
plot(AIC_BIC_df_no_anth$n_clust,AIC_BIC_df_no_anth$BIC, pch=".", ylim=c(0,100000), xlab="Number of Clusters", ylab="BIC")
points(AIC_BIC_df_no_anth[AIC_BIC_df_no_anth$BIC==min(AIC_BIC_df_no_anth$BIC),]$n_clust, min(AIC_BIC_df_no_anth$BIC), col="blue")
text(AIC_BIC_df_no_anth[AIC_BIC_df_no_anth$BIC==min(AIC_BIC_df_no_anth$BIC),]$n_clust, min(AIC_BIC_df_no_anth$BIC-4000), paste0("k=", max_clust))
legend(x=10, y=80000, legend=c("Data", "Inflection point (min BIC~k)"), pch=c(20, 1), col=c("black", "blue"))
dev.off()
# 2. Dunn ~ k
dunn_df_no_anth<-rep(0,max_clust)
for (i in 1:max_clust){
dunn_df_no_anth[i]<-dunn(Data=samples_df_mean_no_anth_sc, clusters=fit_no_anth[[i]]$cluster)
}
# write.csv(dunn_df_no_anth, "Results/dunn_df_no_anth.csv")
dunn_df_df_no_anth<-data.frame(cbind(2:max_clust, dunn_df_no_anth[2:max_clust]))
names(dunn_df_df_no_anth)<-c("n_clust", "dunn")
pdf(file = "/Users/megancattau/Dropbox/0_EarthLab/US_Pyromes/Pyromes/pyromes_code/Data/Figures/FigS6.pdf", height=4, width=4)
plot(dunn_df_df_no_anth$n_clust, dunn_df_df_no_anth$dunn, type="o",xlab="Number of clusters", ylab="Dunn Index")
dev.off()
# local maxima at
local_max_dunn<-numeric(0)
local_max_dunn[1]<-2
for (i in 2:(max(dunn_df_df_no_anth$n_clust)-2)){
if(dunn_df_df_no_anth[i,]$dunn>dunn_df_df_no_anth[i-1,]$dunn & dunn_df_df_no_anth[i,]$dunn>dunn_df_df_no_anth[i+1,]$dunn) {
local_max_dunn<-c(local_max_dunn, dunn_df_df_no_anth[i,]$n_clust)
}
}
for (i in (max(dunn_df_df_no_anth$n_clust)-1)){
if(dunn_df_df_no_anth[i,]$dunn>dunn_df_df_no_anth[i-1,]$dunn) {
local_max_dunn<-c(local_max_dunn, dunn_df_df_no_anth[i,]$n_clust)
}
}
# If over a threshold instead of a local max:
# thresh<-mean(dunn_df_df_no_anth[-1,]$dunn)
# abline(h=thresh, col="red")
# dunn_df_df_no_anth[dunn_df_df_no_anth$dunn>thresh,]$n_clust
local_max_dunn
# 2 5 8 14 19 24 28 30 32 35 37 39
### Add it back in to data frame
samples_df_k<-samples_df
samples_df_k$kmeans2<-fit_no_anth[[2]]$cluster
samples_df_k$kmeans5<-fit_no_anth[[5]]$cluster
samples_df_k$kmeans8<-fit_no_anth[[8]]$cluster
samples_df_k$kmeans14<-fit_no_anth[[14]]$cluster
samples_df_k$kmeans19<-fit_no_anth[[19]]$cluster
samples_df_k$kmeans24<-fit_no_anth[[24]]$cluster
samples_df_k$kmeans28<-fit_no_anth[[28]]$cluster
samples_df_k$kmeans30<-fit_no_anth[[30]]$cluster
samples_df_k$kmeans32<-fit_no_anth[[32]]$cluster
samples_df_k$kmeans35<-fit_no_anth[[35]]$cluster
samples_df_k$kmeans37<-fit_no_anth[[37]]$cluster
samples_df_k$kmeans39<-fit_no_anth[[39]]$cluster
# write.csv(samples_df_k, "Results/samples_df_k.csv")
# samples_df_k<-read.csv("samples_df_k.csv")
# samples_df_k<-samples_df_k[,-1]
# % Area of each
area_fun<-function(cluster_num){
(nrow(samples_df_k[samples_df_k$kmeans8==cluster_num,])/ nrow(samples_df_k))
}
perc_area<-1:8
total_area<-1:8
for (i in 1:8){
perc_area[i]<-round(area_fun(i)*100, 2)
total_area[i]<-area_fun(i) * 7663941.7
# 7,663,941.7 km2 total US area
}
perc_area
# 2.24 26.67 28.35 6.55 0.67 10.96 0.06 24.49
total_area
# 171300.595 2043874.226 2172936.318 502168.868 51624.837 840076.892 4693.167 1877266.797
```
```{r characterize_pyromes, cache=TRUE, results="hide"}
samples_df_k$FID<-1:nrow(samples_df_k)
samples_df_k_MODIS<-samples_df_k[samples_df_k$Number_fires_MODIS_mean>0,]
samples_df_k_MTBS<-samples_df_k[samples_df_k$Number_fires_MTBS_mean>0,]
samples_df_k_FOD<-samples_df_k[samples_df_k$Number_fires_FOD_mean>0,]
mean_char8<-vector("list", 15)
sd_char8<-vector("list", 15)
for (i in c(1,2,3)){
mean_char8[i]<-round(aggregate(samples_df_k[,i], by=list(samples_df_k$kmeans8), FUN=mean)[2],1)
sd_char8[i]<-round(aggregate(samples_df_k[,i], by=list(samples_df_k$kmeans8), FUN=sd)[2],1)
}
for (i in c(4, 5, 12)){
mean_char8[i]<-round(aggregate(samples_df_k_MODIS[,i], by=list(samples_df_k_MODIS$kmeans8), FUN=mean)[2],1)
sd_char8[i]<-round(aggregate(samples_df_k_MODIS[,i], by=list(samples_df_k_MODIS$kmeans8), FUN=sd)[2],1)
}
for (i in c(6, 7, 10, 13)){
mean_char8[i]<-round(aggregate(samples_df_k_MTBS[,i], by=list(samples_df_k_MTBS$kmeans8), FUN=mean)[2],1)
sd_char8[i]<-round(aggregate(samples_df_k_MTBS[,i], by=list(samples_df_k_MTBS$kmeans8), FUN=sd)[2],1)
}
for (i in c(8, 9, 11, 14, 15)){
mean_char8[i]<-round(aggregate(samples_df_k_FOD[,i], by=list(samples_df_k_FOD$kmeans8), FUN=mean)[2],1)
sd_char8[i]<-round(aggregate(samples_df_k_FOD[,i], by=list(samples_df_k_FOD$kmeans8), FUN=sd)[2],1)
}
mean_char8_df <- data.frame(matrix(unlist(mean_char8), nrow=15, byrow=T))
sd_char8_df <- data.frame(matrix(unlist(sd_char8), nrow=15, byrow=T))
# write.table(mean_char8_df, "Results/mean_char8_df.txt")
fire_char_pyrome8<-cbind(mean_char8_df,sd_char8_df)
names(fire_char_pyrome8)<-c(paste0(1:8), paste0("Pyrome ", 1:8, " Sd"))
fire_characteristics_pyrome8<-fire_char_pyrome8
fire_characteristics_pyrome8<-data.frame(matrix(NA, nrow = 15, ncol = 8))
names(fire_characteristics_pyrome8)<-1:8
for(i in 1:8){
fire_characteristics_pyrome8[,i]<-paste0(mean_char8_df[,i], " (+/-", sd_char8_df[,i], ")")
}
# write.table(fire_characteristics_pyrome8, "Results/fire_characteristics_pyrome8.txt")
# fire_characteristics_pyrome8<-read.table("Results/fire_characteristics_pyrome8.txt")
# fit anlaysis of variance test for each fire characteristic as a function of which cluster it belongs to (not great if not balanced) and Tukey's is post-hoc where the differences are. Check to make sure pyrome with highest value sig diff from next highest value
anov_results8<-vector("list", 15) # empty list
Tukey8<-vector("list", 15) # empty list
for (i in 1:15){
anov_results8[[i]]<-aov(formula=samples_df_k[,i]~as.factor(samples_df_k$kmeans8))
Tukey8[i]<-TukeyHSD(anov_results8[[i]])
}
names(Tukey8)<-c(names(samples_df_k[1:15]))
Tukey_sig8<-vector("list", 15)
for (i in 1:15){
Tukey_sig8[[i]]<-ifelse(Tukey8[[i]][,"p adj"]<=0.01, "***",
ifelse(Tukey8[[i]][,"p adj"]>0.01 & Tukey8[[i]][,"p adj"]<=0.05, "**",
ifelse(Tukey8[[i]][,"p adj"]>0.05 & Tukey8[[i]][,"p adj"]<=0.1, "*", -9999)))
}
# Get pyrome number with highest mean value for each characteristic
high_char<-colnames(fire_char_pyrome8)[max.col(fire_char_pyrome8[,1:8])]
# get pyromes that have value that is not sig diff from the pyrome with the highest value (any Tukey_sig8 value -9999 not sig diff). Automate later if possible
# change color in table
# For example, which(Tukey_sig8[[1]]=="-9999") shows that Pyromes 1, 4, and 7 are not diff from pyrome 5, the pyrome with the highest values for characteristic 1 (add 1 to that value for the below to represent the correct column in the table, since the fire characteristics are col 1)
which(Tukey_sig8[[1]]=="-9999") #1, 4, 7
which(Tukey_sig8[[2]]=="-9999")
which(Tukey_sig8[[3]]=="-9999") #7
which(Tukey_sig8[[4]]=="-9999") #1,7
which(Tukey_sig8[[5]]=="-9999") #6,7
which(Tukey_sig8[[6]]=="-9999")
which(Tukey_sig8[[7]]=="-9999")
which(Tukey_sig8[[8]]=="-9999")
which(Tukey_sig8[[9]]=="-9999")
which(Tukey_sig8[[10]]=="-9999")
which(Tukey_sig8[[11]]=="-9999")
which(Tukey_sig8[[12]]=="-9999") #7
which(Tukey_sig8[[13]]=="-9999")
which(Tukey_sig8[[14]]=="-9999") #7
which(Tukey_sig8[[15]]=="-9999")
high_char_not_diff<-vector(mode="list", length=15)
high_char_not_diff[[1]]<-c(1, 4, 7)
high_char_not_diff[[3]]<-c(7)
high_char_not_diff[[4]]<-c(1, 7)
high_char_not_diff[[5]]<-c(6, 7)
high_char_not_diff[[12]]<-c(7)
high_char_not_diff[[14]]<-c(7)
# Annual trends
# grab all the annual data, kmeans8 value, and FID
# names(samples_df_k)
samples_clust_num<-samples_df_k[,c((substr(names(samples_df_k), 7, 14)=="Numfires")|(substr(names(samples_df_k), 6, 11)=="Numfires")|(substr(names(samples_df_k), 5, 12)=="Numfires"), 435,441)]
samples_clust_MODIS<-cbind((samples_df_k_MODIS[ , substr(names(samples_df_k_MODIS), 1, 5)=="MODIS"]), (samples_df_k_MODIS[,c(435, 441)]))
samples_clust_MTBS<-cbind((samples_df_k_MTBS[ , substr(names(samples_df_k_MTBS), 1, 4)=="MTBS"]), (samples_df_k_MTBS[,c(435, 441)]))
samples_clust_FOD<-cbind((samples_df_k_FOD[ , substr(names(samples_df_k_FOD), 1, 3)=="FOD"]), (samples_df_k_FOD[,c(435, 441)]))
samples_long1<-gather(samples_clust_num, key=variable, value=value, -FID, -kmeans8)
samples_long2<-gather(samples_clust_MODIS, key=variable, value=value, -FID, -kmeans8)
samples_long3<-gather(samples_clust_MTBS, key=variable, value=value, -FID, -kmeans8)
samples_long4<-gather(samples_clust_FOD, key=variable, value=value, -FID, -kmeans8)
samples_long<-rbind(samples_long1, samples_long2, samples_long3, samples_long4)
head(samples_long)
# add year column
extractYear <- function(x, n){
substr(x, nchar(x)-n+1, nchar(x))
}
samples_long$year<-extractYear(samples_long$variable, 4)
samples_long$year<-as.numeric(samples_long$year)
unique(samples_long$variable)
unique(samples_long$year)
# look at various fire variables over time, colored by cluster #
#MODIS_numfires
# MODIS_maxFRP
# MODIS_sdJD
# MTBS_maxarea
# MTBS_pac
# FOD_perchum
MODIS_numfires<-samples_long[grep("MODIS_Numfires_", samples_long$variable),]
MODIS_numfires$time<-MODIS_numfires$year - min(MODIS_numfires$year)
MTBS_numfires<-samples_long[grep("MTBS_Numfires_", samples_long$variable),]
MTBS_numfires$time<-MTBS_numfires$year - min(MTBS_numfires$year)
FOD_numfires<-samples_long[grep("FOD_Numfires_", samples_long$variable),]
FOD_numfires$time<-FOD_numfires$year - min(FOD_numfires$year)
MODIS_maxFRP<-samples_long[grep("MODIS_maxFRP_", samples_long$variable),]
MODIS_maxFRP$time<-MODIS_maxFRP$year - min(MODIS_maxFRP$year)
MODIS_meanFRP<-samples_long[grep("MODIS_meanFRP_", samples_long$variable),]
MODIS_meanFRP$time<-MODIS_meanFRP$year - min(MODIS_meanFRP$year)
MTBS_maxArea<-samples_long[grep("MTBS_maxArea_", samples_long$variable),]
MTBS_maxArea$time<-MTBS_maxArea$year - min(MTBS_maxArea$year)
MTBS_meanArea<-samples_long[grep("MTBS_meanArea_", samples_long$variable),]
MTBS_meanArea$time<-MTBS_meanArea$year - min(MTBS_meanArea$year)
MTBS_sumArea<-samples_long[grep("MTBS_sumArea_", samples_long$variable),]
MTBS_sumArea$time<-MTBS_sumArea$year - min(MTBS_sumArea$year)
FOD_maxArea<-samples_long[grep("FOD_maxArea_", samples_long$variable),]
FOD_maxArea$time<-FOD_maxArea$year - min(FOD_maxArea$year)
FOD_meanArea<-samples_long[grep("FOD_meanArea_", samples_long$variable),]
FOD_meanArea$time<-FOD_meanArea$year - min(FOD_meanArea$year)
FOD_sumArea<-samples_long[grep("FOD_sumArea_", samples_long$variable),]
FOD_sumArea$time<-FOD_sumArea$year - min(FOD_sumArea$year)
MTBS_stdJD<-samples_long[grep("MTBS_stdJD_", samples_long$variable),]
MTBS_stdJD$time<-MTBS_stdJD$year - min(MTBS_stdJD$year)
MODIS_stdJD<-samples_long[grep("MODIS_stdJD_", samples_long$variable),]
MODIS_stdJD$time<-MODIS_stdJD$year - min(MODIS_stdJD$year)
FOD_stdJD<-samples_long[grep("FOD_stdJD_", samples_long$variable),]
FOD_stdJD$time<-FOD_stdJD$year - min(FOD_stdJD$year)
Perc_human<-samples_long[grep("FOD_Number_fires_human_", samples_long$variable),]
Perc_human$time<-Perc_human$year - min(Perc_human$year)
# Function to compute mean annual values of a fire characteristic (title) by pyrome (kmeans8)
mean_annual_value_by_group<-function(title){
value_by_group<-title %>%
group_by(kmeans8, time) %>%
summarise(
count = n(),
value_by_group = mean(value, na.rm = TRUE)
)
value_by_group
}
# create a list of these characteristics to pass to function above
fire_chars<-list(MODIS_numfires, MTBS_numfires, FOD_numfires, MODIS_meanFRP, MODIS_maxFRP, MTBS_meanArea, MTBS_maxArea, FOD_meanArea, FOD_maxArea, MTBS_sumArea, FOD_sumArea, MTBS_stdJD, MODIS_stdJD, FOD_stdJD, Perc_human)
# pass this list to function above
detach("package:plyr", unload=TRUE)
grouped_list=vector("list", 15)
for (i in 1:15){
grouped_list[[i]]<-mean_annual_value_by_group(fire_chars[[i]])
}
# if get an error - detach("package:plyr", unload=TRUE)
names(grouped_list)<-names_vector
for(i in c(1,4, 5, 12)){
grouped_list[[i]]$year<-grouped_list[[i]]$time+2003
}
for(i in c(2, 6, 7, 10, 13)){
grouped_list[[i]]$year<-grouped_list[[i]]$time+1984
}
for(i in c(3, 8, 9, 11, 14, 15)){
grouped_list[[i]]$year<-grouped_list[[i]]$time+1992
}
library(rlist)
# list.save(grouped_list, file = "Results/grouped_list.rds")
# Fit linear model for each
library(nlme)
lm_summary_list<-vector("list", 15)
for (i in 1:15){
lm_summary_list[[i]]<-summary(lmList(value ~ time | kmeans8, data=fire_chars[[i]], na.action=na.omit))
}
# Slopes of groups 1-8
slopes<-data.frame(matrix(NA, nrow = 15, ncol = 8))
names(slopes)<-c("1", "2", "3", "4", "5", "6", "7", "8")
kmeans_num<-8
for (i in 1:15){
for (n in 1:kmeans_num){
slopes[i, n]<-round(lm_summary_list[[i]]$coefficients[((8*4)+n)], 2)
}
}
# get pyrome with steepest slope for each characteristic (row)
pyrome_steep_slope<-as.numeric(colnames(slopes)[max.col(slopes)])
steep_slope<-apply(slopes, 1, max)
for (i in 1:15){
for (n in 1:kmeans_num){
slopes[i, n]<-paste0(round(lm_summary_list[[i]]$coefficients[((8*4)+n)], 2), " (+/-", round(lm_summary_list[[i]]$coefficients[((8*5)+n)], 2), ")",
ifelse(lm_summary_list[[i]]$coefficients[((8*7)+n)]>0.1, " ",
ifelse(lm_summary_list[[i]]$coefficients[((8*7)+n)]>0.05 & lm_summary_list[[i]]$coefficients[((8*7)+n)]<=0.1, "*",
ifelse(lm_summary_list[[i]]$coefficients[((8*7)+n)]>0.01 & lm_summary_list[[i]]$coefficients[((8*7)+n)]<=0.05, "**",
ifelse(lm_summary_list[[i]]$coefficients[((8*7)+n)]<=0.01, "***", NA)))))
}
}
# write.table(slopes, "Results/slopes8.txt")
# upper ci of slopes (slopes_ci) of groups 1-8
slopes_ci<-data.frame(matrix(NA, nrow = 15, ncol = 8))
names(slopes_ci)<-c("1", "2", "3", "4", "5", "6", "7", "8")
# upper end of ci for slopes
for (i in 1:15){
for (n in 1:kmeans_num){
slopes_ci[i, n]<-round(lm_summary_list[[i]]$coefficients[((8*4)+n)], 4) + round(lm_summary_list[[i]]$coefficients[((8*5)+n)], 4)
}
}
# get pyromes with upper ci >= steepest slope for each characteristic (row), excluding one w steepest
steep_ci<-vector(mode="list", length=15)
for(i in 1:15){
steep_ci[[i]]<- colnames(slopes_ci)[apply(slopes_ci[i,], 2, function(x) any(x>=steep_slope[i]))]
steep_ci[[i]]<-steep_ci[[i]][steep_ci[[i]]!=pyrome_steep_slope[i]]
}
# steep_ci
# Table of values and slopes of characteristics
# fire_characteristics_pyrome8
fire_characteristics_pyrome8<-cbind(names_vector, fire_characteristics_pyrome8)
# slopes
slopes<-cbind(names_vector, slopes)
library(gdata)
char_slopes_merged<-do.call(interleave, lapply(list(fire_characteristics_pyrome8, slopes), setNames, paste0("V", 1:ncol(fire_characteristics_pyrome8))))
table_char_slopes<-cbind(c(rbind(names_vector, (rep(c(" "), 15)))), (rep(c("Value", "Slope"),15)), char_slopes_merged[,2:9])
names(table_char_slopes)<-c("Fire characteristic", " ", paste0("Pyrome ", 1:8))
# write.table(table_char_slopes, "Results/table_char_slopes.txt")
# Pyrome number with highest mean value for each characteristic is in vector high_char
# Pyrome number with highest slope for each characteristic is in vector pyrome_steep_slope
# Syntax for making those values red and bold is like below for cell [1,6]:
# table_char_slopes_latex[1,6] <- cell_spec(table_char_slopes_latex[1,6], "latex", bold=TRUE, color = "red")
# loop through to set the pyromes with the highest values for each characteristic (added 2 to column bc first 2 rows characters)
table_char_slopes_latex<-table_char_slopes
high_char_slopes<-as.numeric(c(rbind(high_char, pyrome_steep_slope)))
for (i in 1:30){
table_char_slopes_latex[i, (high_char_slopes[i]+2)] <- cell_spec(table_char_slopes_latex[i, (high_char_slopes[i]+2)], "latex", bold=TRUE, color = "red")
}
# Pyrome numbers where mean value not significantly different from the highest for each characteristic is in list high_char_not_diff. Remeber blank row in between each, so change row num accordingly (*2-1). Add 2 to col num to account for char cols
high_char_not_diff_30rows<-vector(mode="list", length=30)