forked from uataq/co2usa_data_synthesis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathco2usa_load_netCDF.r
134 lines (108 loc) · 5.31 KB
/
co2usa_load_netCDF.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Load the CO2-USA Data Synthesis files from netCDF
#
# USAGE:
#
# The CO2-USA synthesis data is available to download from the ORNL DAAC:
# https://doi.org/10.3334/ORNLDAAC/1743
#
# To download the data, first sign into your account (or create one if you don't have one).
# Next, click on "Download Data" to download the entire data set in a zip file.
# Extract the netCDF files to a folder on your computer.
#
# The CO2-USA synthesis data files should be all saved in a single directory:
# /synthesis_output/[netCDF_file.nc]
#
# For example, for the CO2 data file for Boston it would be:
# /synthesis_output/boston_all_sites_co2_1_hour_R0_2019-07-09.nc
#
# In the code below, choose the cities you want to extract, the species, and if you want to create plots.
# After running the script, the CO2_USA greenhouse gas data will all be contained within the
# 'co2_usa' list variable.
#
# For more information, visit the CO2-USA GitHub repository:
# https://github.com/loganemitchell/co2usa_data_synthesis
#
# Written by Logan Mitchell ([email protected]) and Ben Fasoli
# University of Utah
# Last updated: 2019-10-30
if (!'tidyverse' %in% installed.packages()) install.packages('tidyverse', repos='http://cran.us.r-project.org')
if (!'ncdf4' %in% installed.packages()) install.packages('ncdf4', repos='http://cran.us.r-project.org')
if (!'ggplot2' %in% installed.packages()) install.packages('ggplot2', repos='http://cran.us.r-project.org')
if (!'RColorBrewer' %in% installed.packages()) install.packages('RColorBrewer', repos='http://cran.us.r-project.org')
if (!'plotly' %in% installed.packages()) install.packages('plotly', repos='http://cran.us.r-project.org')
library(tidyverse)
library(ncdf4)
library(ggplot2)
library(RColorBrewer)
library(plotly)
# Clear the workspace
rm(list = ls())
######## Set the following options: ########
# City options: 'boston', 'indianapolis', 'los_angeles', 'northeast_corridor', 'portland', 'salt_lake_city', 'san_francisco_baaqmd', 'san_francisco_beacon'
# Note: multiple cities may be selected.
cities = c('boston','los_angeles')
# Greenhouse gas species options: 'co2', 'ch4', or 'co'
species = 'co2'
# Produce figures for each city? The script runs faster if no figures are produced.
make_co2_usa_plots = 'y' # Options: 'y' or 'n'
# Choose the path to the location on your computer where the CO2-USA Synthesis data files have been saved, called the 'read_folder'.
read_folder = file.path('C:/Users','u0932260','gcloud.utah.edu','data','co2-usa','synthesis_output_ornl')
##############################################
# Check if the read_folder exists and set it as the working directory
if (!dir.exists(read_folder)) stop('Cannot find the specified read folder. Check the file path to make sure it is correct.')
setwd(read_folder)
# Create the data structures
co2_usa = list()
co2_usa_figures = list()
for (ii in 1:length(cities)) {
city = cities[ii]
# netCDF file name
fn = list.files(path=file.path(read_folder),
pattern=paste(city,'_',species,'_','.*nc$',sep = ''),
include.dirs = TRUE)
if (is_empty(fn)) {
warning(paste('Looked for data files for ',city,' here:\n',
file.path(read_folder),
'\nHowever none were found. Either they do not exist or the path name is incorrect.\n',
'Check the path and file names. Skipping ',city,' data files for now.',sep=''))
next()
}
# Data frame for all of the sites
co2_usa[[city]][['all_sites']] = data.frame()
for (jj in 1:length(fn)) {
# Opens the netCDF file for reading
info = nc_open(file.path(read_folder,fn[jj]))
# site/inlet/species name
site = substr(fn[jj],regexpr(species,fn[jj])+str_length(species)+1,regexpr('_1_hour',fn[jj])-1)
# Global attributes for that site
co2_usa[[city]][['Attributes']][[site]] = list('global' = ncatt_get(info,varid=0))
# Extracting the time variable
co2_usa[[city]][[site]] = data.frame(time = as.POSIXct(info$dim$time$vals, tz='UTC',origin = "1970-01-01 00:00.00"))
# Extracting the variable Name, Data, and Attributes
var_names = attributes(info$var)$names
for (kk in 1:length(var_names)) {
var_name = var_names[kk]
# Variable Attributes
co2_usa[[city]][['Attributes']][[site]][[var_name]] = ncatt_get(info,var_name)
# Variable data
co2_usa[[city]][[site]][var_name] = ncvar_get(info,var_name)
}
# Add the site_name data column to the data frame (to make plotting easier):
co2_usa[[city]][[site]]['site_name'] = site
# Bind all the sites together into a single data frame for easier plotting:
co2_usa[[city]][['all_sites']] = bind_rows(co2_usa[[city]][['all_sites']],co2_usa[[city]][[site]])
}
if (make_co2_usa_plots=='y') {
# Make a plot of all of the sites in the city:
colourCount = length(unique(co2_usa[[city]][['all_sites']]$site_name))
getPalette = colorRampPalette(brewer.pal(9, "Set1"))
co2_usa_figures[[city]][['all_sites_fig']] = ggplot(data = co2_usa[[city]][['all_sites']], aes_string(x = 'time', y = species)) +
geom_line(aes(color = site_name), alpha = 0.9) +
scale_colour_manual(values = getPalette(colourCount)) +
theme_classic() +
labs(color = 'Site',
title = city,
x = '')
print(ggplotly(co2_usa_figures[[city]][['all_sites_fig']]))
}
}