From dfae6d56e03c9288930f8be076fa67629414cf23 Mon Sep 17 00:00:00 2001
From: Guilherme Bodin <32756941+guilhermebodin@users.noreply.github.com>
Date: Tue, 9 Feb 2021 23:36:56 -0300
Subject: [PATCH] Add Backtest (#125)

* Add backtest and time_limit_sec

* Add backtest
---
 Project.toml                      |  2 +-
 src/MLE.jl                        |  5 +--
 src/ScoreDrivenModels.jl          |  1 +
 src/backtest.jl                   | 56 +++++++++++++++++++++++++++++++
 src/opt_methods/IPNewton.jl       |  4 +--
 src/opt_methods/LBFGS.jl          |  4 +--
 src/opt_methods/NelderMead.jl     |  4 +--
 src/opt_methods/common_methods.jl | 10 +++---
 test/runtests.jl                  |  1 +
 test/test_backtest.jl             |  8 +++++
 10 files changed, 82 insertions(+), 13 deletions(-)
 create mode 100644 src/backtest.jl
 create mode 100644 test/test_backtest.jl

diff --git a/Project.toml b/Project.toml
index 427ffe1..53346cd 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "ScoreDrivenModels"
 uuid = "4a87933e-d659-11e9-0e65-7f40dedd4a3a"
 authors = ["guilhermebodin <guilherme.b.moraes@gmail.com>, raphaelsaavedra <raphael.saavedra93@gmail.com>"]
-version = "0.1.5"
+version = "0.1.6"
 
 [deps]
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
diff --git a/src/MLE.jl b/src/MLE.jl
index 5b21a27..404b958 100644
--- a/src/MLE.jl
+++ b/src/MLE.jl
@@ -95,7 +95,8 @@ function fit(gas::Model{D, T}, y::Vector{T};
              initial_params::Matrix{T} = DEFAULT_INITIAL_PARAM,
              opt_method::AbstractOptimizationMethod = NelderMead(gas, DEFAULT_NUM_SEEDS),
              verbose::Int = DEFAULT_VERBOSE,
-             throw_errors::Bool = false) where {D, T}
+             throw_errors::Bool = false,
+             time_limit_sec::Int = 10^8) where {D, T}
 
     verbose in [0, 1, 2, 3] || throw(ErrorException, "verbose argument must be in [0, 1, 2, 3]")
     # Number of initial_points and number of params to estimate
@@ -119,7 +120,7 @@ function fit(gas::Model{D, T}, y::Vector{T};
             func = TwiceDifferentiable(psi_tilde -> log_lik(psi_tilde, y, gas_fit, 
                                                         initial_params, unknowns, n), 
                                                         opt_method.initial_points[i])
-            opt_result = optimize(func, opt_method, verbose, i)
+            opt_result = optimize(func, opt_method, verbose, i, time_limit_sec)
             update_aux_estimation!(aux_est, func, opt_result)
             verbose >= 1 && println("Round $i of $n_initial_points - Log-likelihood: $(-opt_result.minimum * length(y))")
         catch err
diff --git a/src/ScoreDrivenModels.jl b/src/ScoreDrivenModels.jl
index b6c83f9..547af36 100644
--- a/src/ScoreDrivenModels.jl
+++ b/src/ScoreDrivenModels.jl
@@ -20,6 +20,7 @@ include("simulate.jl")
 include("diagnostics.jl")
 include("univariate_score_driven_recursion.jl")
 include("MLE.jl")
+include("backtest.jl")
 include("prints.jl")
 
 # Optimization methods
diff --git a/src/backtest.jl b/src/backtest.jl
new file mode 100644
index 0000000..9b8c228
--- /dev/null
+++ b/src/backtest.jl
@@ -0,0 +1,56 @@
+export backtest
+
+struct Backtest
+    abs_errors::Matrix{Float64}
+    crps_scores::Matrix{Float64}
+    function Backtest(n::Int, steps_ahead::Int)
+        abs_errors = Matrix{Float64}(undef, n, steps_ahead)
+        crps_scores = Matrix{Float64}(undef, n, steps_ahead)
+        return new(abs_errors, crps_scores)
+    end
+end
+
+discrete_crps_indicator_function(val::T, z::T) where {T} = val < z
+function crps(val::T, scenarios::Vector{T}) where {T}
+    sorted_scenarios = sort(scenarios)
+    m = length(scenarios)
+    crps_score = zero(T)
+    for i = 1:m
+        crps_score +=
+            (sorted_scenarios[i] - val) *
+            (m * discrete_crps_indicator_function(val, sorted_scenarios[i]) - i + 0.5)
+    end
+    return (2 / m^2) * crps_score
+end
+evaluate_abs_error(y::Vector{T}, forecast::Vector{T}) where T = abs.(y - forecast)
+function evaluate_crps(y::Vector{T}, scenarios::Matrix{T}) where {T}
+    crps_scores = Vector{T}(undef, length(y))
+    for k = 1:length(y)
+        crps_scores[k] = crps(y[k], scenarios[k, :])
+    end
+    return crps_scores
+end
+
+"""
+TODO
+"""
+function backtest(gas::Model{<:Distribution, T}, y::Vector{T}, steps_ahead::Int, start_idx::Int;
+                  S::Int = 1000,
+                  initial_params::Matrix{T} = stationary_initial_params(gas),
+                  opt_method = NelderMead(gas, DEFAULT_NUM_SEEDS)) where T
+    num_mle = length(y) - start_idx - steps_ahead
+    backtest = Backtest(num_mle, steps_ahead)
+    for i in 1:num_mle
+        println("Backtest: step $i of $num_mle")
+        gas_to_fit = deepcopy(gas)
+        y_to_fit = y[1:start_idx - 1 + i]
+        y_to_verify = y[start_idx + i:start_idx - 1 + i + steps_ahead]
+        ScoreDrivenModels.fit!(gas_to_fit, y_to_fit; initial_params=initial_params, opt_method=opt_method)
+        forec = forecast(y_to_fit, gas_to_fit, steps_ahead; S=S, initial_params=initial_params)
+        abs_errors = evaluate_abs_error(y_to_verify, forec.observation_forecast)
+        crps_scores = evaluate_crps(y_to_verify, forec.observation_scenarios)
+        backtest.abs_errors[i, :] = abs_errors
+        backtest.crps_scores[i, :] = crps_scores
+    end
+    return backtest
+end
\ No newline at end of file
diff --git a/src/opt_methods/IPNewton.jl b/src/opt_methods/IPNewton.jl
index b323f20..b490a95 100644
--- a/src/opt_methods/IPNewton.jl
+++ b/src/opt_methods/IPNewton.jl
@@ -39,7 +39,7 @@ function IPNewton(model::Model{D, T}, initial_points::Vector{Vector{T}}; f_tol::
     return IPNewton{T}(f_tol, g_tol, ub, lb, iterations, initial_points)
 end
 
-function optimize(func::Optim.TwiceDifferentiable, opt_method::IPNewton{T}, verbose::Int, i::Int) where T
+function optimize(func::Optim.TwiceDifferentiable, opt_method::IPNewton{T}, verbose::Int, i::Int, time_limit_sec::Int) where T
     cons = Optim.TwiceDifferentiableConstraints(opt_method.lb, opt_method.ub)
-    return optimize(func, opt_method, cons, Optim.IPNewton(), verbose, i)
+    return optimize(func, opt_method, cons, Optim.IPNewton(), verbose, i, time_limit_sec)
 end
diff --git a/src/opt_methods/LBFGS.jl b/src/opt_methods/LBFGS.jl
index ad9a85c..e4c2878 100644
--- a/src/opt_methods/LBFGS.jl
+++ b/src/opt_methods/LBFGS.jl
@@ -30,6 +30,6 @@ function LBFGS(model::Model{D, T}, initial_points::Vector{Vector{T}}; f_tol::T =
     return LBFGS{T}(f_tol, g_tol, iterations, initial_points)
 end
 
-function optimize(func::Optim.TwiceDifferentiable, opt_method::LBFGS{T}, verbose::Int, i::Int) where T
-    return optimize(func, opt_method, Optim.LBFGS(), verbose, i)
+function optimize(func::Optim.TwiceDifferentiable, opt_method::LBFGS{T}, verbose::Int, i::Int, time_limit_sec::Int) where T
+    return optimize(func, opt_method, Optim.LBFGS(), verbose, i, time_limit_sec)
 end
diff --git a/src/opt_methods/NelderMead.jl b/src/opt_methods/NelderMead.jl
index 4c4b5df..d2d921a 100644
--- a/src/opt_methods/NelderMead.jl
+++ b/src/opt_methods/NelderMead.jl
@@ -30,6 +30,6 @@ function NelderMead(model::Model{D, T}, initial_points::Vector{Vector{T}}; f_tol
     return NelderMead{T}(f_tol, g_tol, iterations, initial_points)
 end
 
-function optimize(func::Optim.TwiceDifferentiable, opt_method::NelderMead{T}, verbose::Int, i::Int) where T
-    return optimize(func, opt_method, Optim.NelderMead(), verbose, i)
+function optimize(func::Optim.TwiceDifferentiable, opt_method::NelderMead{T}, verbose::Int, i::Int, time_limit_sec::Int) where T
+    return optimize(func, opt_method, Optim.NelderMead(), verbose, i, time_limit_sec)
 end
diff --git a/src/opt_methods/common_methods.jl b/src/opt_methods/common_methods.jl
index dd18f96..d7a07e5 100644
--- a/src/opt_methods/common_methods.jl
+++ b/src/opt_methods/common_methods.jl
@@ -26,25 +26,27 @@ function create_initial_points(model::Model{D, T}, n_initial_points::Int, LB::T,
 end
 
 function optimize(func::Optim.TwiceDifferentiable, opt_method::AbstractOptimizationMethod{T},
-                  optimizer::Optim.AbstractOptimizer, verbose::Int, i::Int) where T
+                  optimizer::Optim.AbstractOptimizer, verbose::Int, i::Int, time_limit_sec::Int) where T
                   
     return Optim.optimize(func, opt_method.initial_points[i], optimizer,
                                                      Optim.Options(f_tol = opt_method.f_tol, 
                                                                    g_tol = opt_method.g_tol, 
                                                                    iterations = opt_method.iterations,
-                                                                   show_trace = show_trace(verbose) ))
+                                                                   show_trace = show_trace(verbose),
+                                                                   time_limit = time_limit_sec))
 end
 
 function optimize(func::Optim.TwiceDifferentiable, opt_method::AbstractOptimizationMethod{T},
                   cons::Optim.TwiceDifferentiableConstraints,
                   optimizer::Optim.AbstractOptimizer, 
-                  verbose::Int, i::Int) where T
+                  verbose::Int, i::Int, time_limit_sec::Int) where T
 
     return Optim.optimize(func, cons, opt_method.initial_points[i], optimizer,
                                                      Optim.Options(f_tol = opt_method.f_tol, 
                                                                    g_tol = opt_method.g_tol, 
                                                                    iterations = opt_method.iterations,
-                                                                   show_trace = show_trace(verbose) ))
+                                                                   show_trace = show_trace(verbose), 
+                                                                   time_limit = time_limit_sec))
 end
 
 show_trace(verbose::Int) =  (verbose == 3 ? true : false)
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 73305ff..7038bfa 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -11,3 +11,4 @@ include("test_initial_params.jl")
 include("test_diagnostics.jl")
 include("test_estimate.jl")
 include("test_simulate.jl")
+include("test_backtest.jl")
diff --git a/test/test_backtest.jl b/test/test_backtest.jl
new file mode 100644
index 0000000..2bf1a70
--- /dev/null
+++ b/test/test_backtest.jl
@@ -0,0 +1,8 @@
+@testset "Backtest" begin
+    ω = [0.1, 0.1]
+    A = [0.5 0; 0 0.5]
+    B = [0.5 0; 0 0.5]
+    simulation = simulate_GAS_1_1(Normal, 0.0, ω, A, B, 1)
+    gas = ScoreDrivenModels.Model(1, 1, Normal, 0.0)
+    bac = backtest(gas, simulation, 10, 4985)
+end
\ No newline at end of file