This repository has been archived by the owner on Jul 20, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathblock_lnlp_interface.R
261 lines (249 loc) · 11.3 KB
/
block_lnlp_interface.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#' Perform generalized forecasting using simplex projection or s-map
#'
#' \code{\link{block_lnlp}} uses multiple time series given as input to generate
#' an attractor reconstruction, and then applies the simplex projection or
#' s-map algorithm to make forecasts. This method generalizes the
#' \code{\link{simplex}} and \code{\link{s_map}} routines, and allows for
#' "mixed" embeddings, where multiple time series can be used as different
#' dimensions of an attractor reconstruction.
#'
#' The default parameters are set so that passing a vector as the only argument
#' will use that vector to predict itself one time step ahead. If a matrix or
#' data.frame is given as the only argument, the first column will be
#' predicted (one time step ahead), using the remaining columns as the
#' embedding. Rownames will be converted to numeric if possible to be used as
#' the time index, otherwise 1:NROW will be used instead. The default lib and
#' pred are for leave-one-out cross-validation over the whole time series,
#' and returning just the forecast statistics.
#'
#' \code{norm = 2} (default) uses the "L2 norm", Euclidean distance:
#' \deqn{distance(a,b) := \sqrt{\sum_i{(a_i - b_i)^2}}
#' }{distance(a, b) := \sqrt(\sum(a_i - b_i)^2)}
#' \code{norm = 1} uses the "L1 norm", Manhattan distance:
#' \deqn{distance(a,b) := \sum_i{|a_i - b_i|}
#' }{distance(a, b) := \sum|a_i - b_i|}
#' Other values generalize the L1 and L2 norm to use the given argument as the
#' exponent, P, as:
#' \deqn{distance(a,b) := \sum_i{(a_i - b_i)^P}^{1/P}
#' }{distance(a, b) := (\sum(a_i - b_i)^P)^(1/P)}
#'
#' method "simplex" (default) uses the simplex projection forecasting algorithm
#'
#' method "s-map" uses the s-map forecasting algorithm
#'
#' @param block either a vector to be used as the time series, or a
#' data.frame or matrix where each column is a time series
#' @param lib a 2-column matrix (or 2-element vector) where each row specifies
#' the first and last *rows* of the time series to use for attractor
#' reconstruction
#' @param pred (same format as lib), but specifying the sections of the time
#' series to forecast.
#' @param norm the distance measure to use. see 'Details'
#' @param method the prediction method to use. see 'Details'
#' @param tp the prediction horizon (how far ahead to forecast)
#' @param num_neighbors the number of nearest neighbors to use. Note that the
#' default value will change depending on the method selected. (any of "e+1",
#' "E+1", "e + 1", "E + 1" will peg this parameter to E+1 for each run, any
#' value < 1 will use all possible neighbors.)
#' @param columns either a vector with the columns to use (indices or names),
#' or a list of such columns
#' @param target_column the index (or name) of the column to forecast
#' @param stats_only specify whether to output just the forecast statistics or
#' the raw predictions for each run
#' @param first_column_time indicates whether the first column of the given
#' block is a time column (and therefore excluded when indexing)
#' @param exclusion_radius excludes vectors from the search space of nearest
#' neighbors if their *time index* is within exclusion_radius (NULL turns
#' this option off)
#' @param epsilon excludes vectors from the search space of nearest neighbors
#' if their *distance* is farther away than epsilon (NULL turns this option
#' off)
#' @param theta the nonlinear tuning parameter (theta is only relevant if
#' method == "s-map")
#' @param silent prevents warning messages from being printed to the R console
#' @param save_smap_coefficients specifies whether to include the s_map
#' coefficients with the output (and forces stats_only = FALSE, as well)
#' @return A data.frame with components for the parameters and forecast
#' statistics:
#' \tabular{ll}{
#' \code{cols} \tab embedding\cr
#' \code{tp} \tab prediction horizon\cr
#' \code{nn} \tab number of neighbors\cr
#' \code{num_pred} \tab number of predictions\cr
#' \code{rho} \tab correlation coefficient between observations and
#' predictions\cr
#' \code{mae} \tab mean absolute error\cr
#' \code{rmse} \tab root mean square error\cr
#' \code{perc} \tab percent correct sign\cr
#' \code{p_val} \tab p-value that rho is significantly greater than 0 using
#' Fisher's z-transformation\cr
#' \code{const_pred_rho} \tab same as \code{rho}, but for the constant predictor\cr
#' \code{const_pred_mae} \tab same as \code{mae}, but for the constant predictor\cr
#' \code{const_pred_rmse} \tab same as \code{rmse}, but for the constant predictor\cr
#' \code{const_pred_perc} \tab same as \code{perc}, but for the constant predictor\cr
#' \code{const_p_val} \tab same as \code{p_val}, but for the constant predictor\cr
#' \code{model_output} \tab data.frame with columns for the time index,
#' observations, predictions, and estimated prediction variance
#' (if \code{stats_only == FALSE})\cr
#' }
#' If "s-map" is the method, then the same, but with additional columns:
#' \tabular{ll}{
#' \code{theta} \tab the nonlinear tuning parameter\cr
#' \code{smap_coefficients} \tab data.frame with columns for the s-map
#' coefficients (if \code{save_smap_coefficients == TRUE})\cr
#' \code{smap_coefficient_covariances} \tab list of covariance matrices for
#' the s-map coefficients (if \code{save_smap_coefficients == TRUE})\cr
#' }
#' @examples
#' data("two_species_model")
#' block <- two_species_model[1:200,]
#' block_lnlp(block, columns = c("x", "y"), first_column_time = TRUE)
#'
block_lnlp <- function(block, lib = c(1, NROW(block)), pred = lib,
norm = 2,
method = c("simplex", "s-map"),
tp = 1,
num_neighbors = switch(match.arg(method),
"simplex" = "e+1", "s-map" = 0),
columns = NULL,
target_column = 1, stats_only = TRUE,
first_column_time = FALSE,
exclusion_radius = NULL, epsilon = NULL, theta = NULL,
silent = FALSE, save_smap_coefficients = FALSE)
{
# make new model object
model <- new(BlockLNLP)
# setup data
dat <- setup_time_and_block(block, first_column_time)
time <- dat$time
block <- dat$block
model$set_time(time)
model$set_block(block)
model$set_target_column(convert_to_column_indices(target_column, block,
silent = silent))
# setup norm and pred types
model$set_norm(as.numeric(norm))
model$set_pred_type(switch(match.arg(method), "simplex" = 2, "s-map" = 1))
if (match.arg(method) == "s-map")
{
if (is.null(theta))
theta <- 0
if (save_smap_coefficients)
{
stats_only <- FALSE
}
model$save_smap_coefficients()
}
# setup lib and pred ranges
lib <- coerce_lib(lib, silent = silent)
pred <- coerce_lib(pred, silent = silent)
model$set_lib(lib)
model$set_pred(pred)
# handle remaining arguments and flags
setup_model_flags(model, exclusion_radius, epsilon, silent)
# convert embeddings to column indices
if (is.null(names(block)))
{
names(block) <- paste("ts_", seq_len(NCOL(block)))
}
if (is.null(columns))
{
columns <- list(seq_len(NCOL(block)))
} else if (is.list(columns)) {
columns <- lapply(columns, function(embedding) {
convert_to_column_indices(embedding, block, silent = silent)
})
} else if (is.vector(columns)) {
columns <- list(convert_to_column_indices(columns, block, silent = silent))
} else if (is.matrix(columns)) {
columns <- lapply(seq_len(NROW(columns)), function(i) {
convert_to_column_indices(columns[i, ], block, silent = silent)})
}
embedding_index <- seq_along(columns)
# setup other params in data.frame
if (match.arg(method) == "s-map")
{
params <- expand.grid(tp, num_neighbors, theta, embedding_index)
names(params) <- c("tp", "nn", "theta", "embedding")
params <- params[, c("embedding", "tp", "nn", "theta")]
e_plus_1_index <- match(num_neighbors,
c("e+1", "E+1", "e + 1", "E + 1"))
if (any(e_plus_1_index, na.rm = TRUE))
params$nn <- 1 + vapply(columns, length, 0)
params$nn <- as.numeric(params$nn)
# check params
idx <- vapply(seq(NROW(params)), function(i) {
check_params_against_lib(1, 0, params$tp[i], lib, silent = silent)},
FALSE)
if (!any(idx))
{
stop("No valid parameter combinations to run, stopping.")
}
params <- params[idx, ]
# apply model prediction function to params
output <- lapply(seq_len(NROW(params)), function(i) {
model$set_embedding(columns[[params$embedding[i]]])
model$set_params(params$tp[i], params$nn[i])
model$set_theta(params$theta[i])
model$run()
if (silent)
{
suppressWarnings( df <- model$get_stats() )
} else {
df <- model$get_stats()
}
if (!stats_only)
{
df$model_output <- I(list(model$get_output()))
if (save_smap_coefficients)
{
df$smap_coefficients <-
I(list(model$get_smap_coefficients()))
df$smap_coefficient_covariances <-
I(list(model$get_smap_coefficient_covariances()))
}
}
return(df)
})
} else {
# simplex
params <- expand.grid(tp, num_neighbors, embedding_index)
names(params) <- c("tp", "nn", "embedding")
params <- params[, c("embedding", "tp", "nn")]
e_plus_1_index <- match(num_neighbors,
c("e+1", "E+1", "e + 1", "E + 1"))
if (any(e_plus_1_index, na.rm = TRUE))
params$nn <- 1 + vapply(columns, length, 0)
params$nn <- as.numeric(params$nn)
# check params
idx <- vapply(seq(NROW(params)), function(i) {
check_params_against_lib(1, 0, params$tp[i], lib, silent = silent)},
FALSE)
if (!any(idx))
{
stop("No valid parameter combinations to run, stopping.")
}
params <- params[idx, ]
# apply model prediction function to params
output <- lapply(seq_len(NROW(params)), function(i) {
model$set_embedding(columns[[params$embedding[i]]])
model$set_params(params$tp[i], params$nn[i])
model$run()
if (silent)
{
suppressWarnings( df <- model$get_stats() )
} else {
df <- model$get_stats()
}
if (!stats_only)
{
df$model_output <- I(list(model$get_output()))
}
return(df)
})
}
# create embedding column in params
params$embedding <- vapply(params$embedding, function(i) {
paste(columns[[i]], sep = "", collapse = ", ")}, "")
return(cbind(params, do.call(rbind, output), row.names = NULL))
}