This repository has been archived by the owner on Jul 20, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathlnlp_interface.R
252 lines (232 loc) · 9.5 KB
/
lnlp_interface.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
#' @name simplex
#' @aliases s_map
#'
#' @title Perform univariate forecasting
#' @details \code{\link{simplex}} is typically applied, and the embedding dimension
#' varied, to find an optimal embedding dimension for the data. Thus, the
#' default parameters are set so that passing a time series as the only
#' argument will run over E = 1:10 (embedding dimension), using leave-one-out
#' cross-validation over the whole time series, and returning just the forecast
#' statistics.
#'
#' \code{\link{s_map}} is typically applied, with fixed embedding dimension, and theta
#' varied, to test for nonlinear dynamics in the data. Thus, the default
#' parameters are set so that passing a time series as the only argument will
#' run over a default list of thetas (0, 0.0001, 0.0003, 0.001, 0.003, 0.01,
#' 0.03, 0.1, 0.3, 0.5, 0.75, 1.0, 1.5, 2, 3, 4, 6, and 8), using E = 1,
#' leave-one-out cross-validation over the whole time series, and returning
#' just the forecast statistics.
#'
#' \code{norm = 2} (default) uses the "L2 norm", Euclidean distance:
#' \deqn{distance(a,b) := \sqrt{\sum_i{(a_i - b_i)^2}}
#' }{distance(a, b) := \sqrt(\sum(a_i - b_i)^2)}
#' \code{norm = 1} uses the "L1 norm", Manhattan distance:
#' \deqn{distance(a,b) := \sum_i{|a_i - b_i|}
#' }{distance(a, b) := \sum|a_i - b_i|}
#' Other values generalize the L1 and L2 norm to use the given argument as the
#' exponent, P, as:
#' \deqn{distance(a,b) := \sum_i{(a_i - b_i)^P}^{1/P}
#' }{distance(a, b) := (\sum(a_i - b_i)^P)^(1/P)}
#'
#' @inheritParams block_lnlp
#' @param time_series either a vector to be used as the time series, or a
#' data.frame or matrix with at least 2 columns (in which case the first
#' column will be used as the time index, and the second column as the time
#' series)
#' @param E the embedding dimensions to use for time delay embedding
#' @param tau the lag to use for time delay embedding
#' @param tp the prediction horizon (how far ahead to forecast)
#' @rdname simplex
#'
#' @description \code{\link{simplex}} uses time delay embedding on a single time
#' series to generate an attractor reconstruction, and then applies the
#' simplex projection algorithm to make forecasts.
#'
#' @return For \code{\link{simplex}}, a data.frame with components for the
#' parameters and forecast statistics:
#' \tabular{ll}{
#' \code{E} \tab embedding dimension\cr
#' \code{tau} \tab time lag\cr
#' \code{tp} \tab prediction horizon\cr
#' \code{nn} \tab number of neighbors\cr
#' \code{num_pred} \tab number of predictions\cr
#' \code{rho} \tab correlation coefficient between observations and
#' predictions\cr
#' \code{mae} \tab mean absolute error\cr
#' \code{rmse} \tab root mean square error\cr
#' \code{perc} \tab percent correct sign\cr
#' \code{p_val} \tab p-value that rho is significantly greater than 0 using
#' Fisher's z-transformation\cr
#' \code{const_pred_rho} \tab same as \code{rho}, but for the constant predictor\cr
#' \code{const_pred_mae} \tab same as \code{mae}, but for the constant predictor\cr
#' \code{const_pred_rmse} \tab same as \code{rmse}, but for the constant predictor\cr
#' \code{const_pred_perc} \tab same as \code{perc}, but for the constant predictor\cr
#' \code{const_p_val} \tab same as \code{p_val}, but for the constant predictor\cr
#' \code{model_output} \tab data.frame with columns for the time index,
#' observations, predictions, and estimated prediction variance
#' (if \code{stats_only == FALSE})\cr
#' }
#' @examples
#' data("two_species_model")
#' ts <- two_species_model$x[1:200]
#' simplex(ts, lib = c(1, 100), pred = c(101, 200))
#'
#' data("two_species_model")
#' ts <- two_species_model$x[1:200]
#' simplex(ts, stats_only = FALSE)
#'
simplex <- function(time_series, lib = c(1, NROW(time_series)), pred = lib,
norm = 2,
E = 1:10, tau = 1, tp = 1, num_neighbors = "e+1",
stats_only = TRUE, exclusion_radius = NULL, epsilon = NULL,
silent = FALSE)
{
# make new model object
model <- new(LNLP)
# setup data
dat <- setup_time_and_time_series(time_series)
time <- dat$time
time_series <- dat$time_series
model$set_time(time)
model$set_time_series(time_series)
# setup norm and pred types
model$set_norm(norm)
model$set_pred_type(2)
# setup lib and pred ranges
lib <- coerce_lib(lib, silent = silent)
pred <- coerce_lib(pred, silent = silent)
model$set_lib(lib)
model$set_pred(pred)
# handle remaining arguments and flags
setup_model_flags(model, exclusion_radius, epsilon, silent)
# setup other params in data.frame
params <- expand.grid(tp, num_neighbors, tau, E)
names(params) <- c("tp", "nn", "tau", "E")
params <- params[, c("E", "tau", "tp", "nn")]
e_plus_1_index <- match(num_neighbors, c("e+1", "E+1", "e + 1", "E + 1"))
if (any(e_plus_1_index, na.rm = TRUE))
params$nn <- params$E + 1
params$nn <- as.numeric(params$nn)
# check params
idx <- vapply(seq(NROW(params)), function(i) {
check_params_against_lib(params$E[i], params$tau[i], params$tp[i], lib,
silent = silent)}, FALSE)
if (!any(idx))
{
stop("No valid parameter combinations to run, stopping.")
}
params <- params[idx, ]
# apply model prediction function to params
output <- lapply(seq(NROW(params)), function(i) {
model$set_params(params$E[i], params$tau[i], params$tp[i], params$nn[i])
model$run()
if (silent)
{
suppressWarnings( df <- model$get_stats() )
} else {
df <- model$get_stats()
}
if (!stats_only)
{
df$model_output <- I(list(model$get_output()))
}
return(df)
})
return(cbind(params, do.call(rbind, output), row.names = NULL))
}
#' @rdname simplex
#'
#' @description \code{\link{s_map}} is similar to \code{\link{simplex}}, but uses the S-map
#' algorithm to make forecasts.
#' @return For \code{\link{s_map}}, the same as for \code{\link{simplex}}, but
#' with additional columns:
#' \tabular{ll}{
#' \code{theta} \tab the nonlinear tuning parameter\cr
#' \code{smap_coefficients} \tab data.frame with columns for the s-map
#' coefficients (if \code{save_smap_coefficients == TRUE})\cr
#' \code{smap_coefficient_covariances} \tab list of covariance matrices for
#' the s-map coefficients (if \code{save_smap_coefficients == TRUE})\cr
#' }
#' @examples
#' data("two_species_model")
#' ts <- two_species_model$x[1:200]
#' s_map(ts, E = 2)
#'
#' data("two_species_model")
#' ts <- two_species_model$x[1:200]
#' s_map(ts, E = 2, theta = 1, save_smap_coefficients = TRUE)
#'
s_map <- function(time_series, lib = c(1, NROW(time_series)), pred = lib,
norm = 2,
E = 1, tau = 1, tp = 1, num_neighbors = 0,
theta = c(0, 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1,
0.3, 0.5, 0.75, 1.0, 1.5, 2, 3, 4, 6, 8),
stats_only = TRUE, exclusion_radius = NULL, epsilon = NULL,
silent = FALSE, save_smap_coefficients = FALSE)
{
# check inputs?
# make new model object
model <- new(LNLP)
# setup data
dat <- setup_time_and_time_series(time_series)
time <- dat$time
time_series <- dat$time_series
model$set_time(time)
model$set_time_series(time_series)
# setup norm and pred types
model$set_norm(norm)
model$set_pred_type(1)
# setup lib and pred ranges
lib <- coerce_lib(lib, silent = silent)
pred <- coerce_lib(pred, silent = silent)
model$set_lib(lib)
model$set_pred(pred)
# handle remaining arguments and flags
setup_model_flags(model, exclusion_radius, epsilon, silent)
# handle smap coefficients flag
if (save_smap_coefficients)
{
stats_only <- FALSE
model$save_smap_coefficients()
}
# setup other params in data.frame
params <- expand.grid(theta, tp, num_neighbors, tau, E)
names(params) <- c("theta", "tp", "nn", "tau", "E")
params <- params[, c("E", "tau", "tp", "nn", "theta")]
e_plus_1_index <- match(num_neighbors, c("e+1", "E+1", "e + 1", "E + 1"))
if (any(e_plus_1_index, na.rm = TRUE))
params$nn <- params$E + 1
params$nn <- as.numeric(params$nn)
# check params
idx <- vapply(seq(NROW(params)), function(i) {
check_params_against_lib(params$E[i], params$tau[i], params$tp[i], lib,
silent = silent)}, FALSE)
if (!any(idx))
{
stop("No valid parameter combinations to run, stopping.")
}
params <- params[idx, ]
# apply model prediction function to params
output <- lapply(seq_len(NROW(params)), function(i) {
model$set_params(params$E[i], params$tau[i], params$tp[i], params$nn[i])
model$set_theta(params$theta[i])
model$run()
if (silent)
{
suppressWarnings( df <- model$get_stats() )
} else {
df <- model$get_stats()
}
if (!stats_only)
{
df$model_output <- I(list(model$get_output()))
if (save_smap_coefficients)
{
df$smap_coefficients <- I(list(model$get_smap_coefficients()))
df$smap_coefficient_covariances <- I(list(model$get_smap_coefficient_covariances()))
}
}
return(df)
})
return(cbind(params, do.call(rbind, output), row.names = NULL))
}