From dfae6d56e03c9288930f8be076fa67629414cf23 Mon Sep 17 00:00:00 2001 From: Guilherme Bodin <32756941+guilhermebodin@users.noreply.github.com> Date: Tue, 9 Feb 2021 23:36:56 -0300 Subject: [PATCH] Add Backtest (#125) * Add backtest and time_limit_sec * Add backtest --- Project.toml | 2 +- src/MLE.jl | 5 +-- src/ScoreDrivenModels.jl | 1 + src/backtest.jl | 56 +++++++++++++++++++++++++++++++ src/opt_methods/IPNewton.jl | 4 +-- src/opt_methods/LBFGS.jl | 4 +-- src/opt_methods/NelderMead.jl | 4 +-- src/opt_methods/common_methods.jl | 10 +++--- test/runtests.jl | 1 + test/test_backtest.jl | 8 +++++ 10 files changed, 82 insertions(+), 13 deletions(-) create mode 100644 src/backtest.jl create mode 100644 test/test_backtest.jl diff --git a/Project.toml b/Project.toml index 427ffe1..53346cd 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ScoreDrivenModels" uuid = "4a87933e-d659-11e9-0e65-7f40dedd4a3a" authors = ["guilhermebodin , raphaelsaavedra "] -version = "0.1.5" +version = "0.1.6" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" diff --git a/src/MLE.jl b/src/MLE.jl index 5b21a27..404b958 100644 --- a/src/MLE.jl +++ b/src/MLE.jl @@ -95,7 +95,8 @@ function fit(gas::Model{D, T}, y::Vector{T}; initial_params::Matrix{T} = DEFAULT_INITIAL_PARAM, opt_method::AbstractOptimizationMethod = NelderMead(gas, DEFAULT_NUM_SEEDS), verbose::Int = DEFAULT_VERBOSE, - throw_errors::Bool = false) where {D, T} + throw_errors::Bool = false, + time_limit_sec::Int = 10^8) where {D, T} verbose in [0, 1, 2, 3] || throw(ErrorException, "verbose argument must be in [0, 1, 2, 3]") # Number of initial_points and number of params to estimate @@ -119,7 +120,7 @@ function fit(gas::Model{D, T}, y::Vector{T}; func = TwiceDifferentiable(psi_tilde -> log_lik(psi_tilde, y, gas_fit, initial_params, unknowns, n), opt_method.initial_points[i]) - opt_result = optimize(func, opt_method, verbose, i) + opt_result = optimize(func, opt_method, verbose, i, time_limit_sec) update_aux_estimation!(aux_est, func, opt_result) verbose >= 1 && println("Round $i of $n_initial_points - Log-likelihood: $(-opt_result.minimum * length(y))") catch err diff --git a/src/ScoreDrivenModels.jl b/src/ScoreDrivenModels.jl index b6c83f9..547af36 100644 --- a/src/ScoreDrivenModels.jl +++ b/src/ScoreDrivenModels.jl @@ -20,6 +20,7 @@ include("simulate.jl") include("diagnostics.jl") include("univariate_score_driven_recursion.jl") include("MLE.jl") +include("backtest.jl") include("prints.jl") # Optimization methods diff --git a/src/backtest.jl b/src/backtest.jl new file mode 100644 index 0000000..9b8c228 --- /dev/null +++ b/src/backtest.jl @@ -0,0 +1,56 @@ +export backtest + +struct Backtest + abs_errors::Matrix{Float64} + crps_scores::Matrix{Float64} + function Backtest(n::Int, steps_ahead::Int) + abs_errors = Matrix{Float64}(undef, n, steps_ahead) + crps_scores = Matrix{Float64}(undef, n, steps_ahead) + return new(abs_errors, crps_scores) + end +end + +discrete_crps_indicator_function(val::T, z::T) where {T} = val < z +function crps(val::T, scenarios::Vector{T}) where {T} + sorted_scenarios = sort(scenarios) + m = length(scenarios) + crps_score = zero(T) + for i = 1:m + crps_score += + (sorted_scenarios[i] - val) * + (m * discrete_crps_indicator_function(val, sorted_scenarios[i]) - i + 0.5) + end + return (2 / m^2) * crps_score +end +evaluate_abs_error(y::Vector{T}, forecast::Vector{T}) where T = abs.(y - forecast) +function evaluate_crps(y::Vector{T}, scenarios::Matrix{T}) where {T} + crps_scores = Vector{T}(undef, length(y)) + for k = 1:length(y) + crps_scores[k] = crps(y[k], scenarios[k, :]) + end + return crps_scores +end + +""" +TODO +""" +function backtest(gas::Model{<:Distribution, T}, y::Vector{T}, steps_ahead::Int, start_idx::Int; + S::Int = 1000, + initial_params::Matrix{T} = stationary_initial_params(gas), + opt_method = NelderMead(gas, DEFAULT_NUM_SEEDS)) where T + num_mle = length(y) - start_idx - steps_ahead + backtest = Backtest(num_mle, steps_ahead) + for i in 1:num_mle + println("Backtest: step $i of $num_mle") + gas_to_fit = deepcopy(gas) + y_to_fit = y[1:start_idx - 1 + i] + y_to_verify = y[start_idx + i:start_idx - 1 + i + steps_ahead] + ScoreDrivenModels.fit!(gas_to_fit, y_to_fit; initial_params=initial_params, opt_method=opt_method) + forec = forecast(y_to_fit, gas_to_fit, steps_ahead; S=S, initial_params=initial_params) + abs_errors = evaluate_abs_error(y_to_verify, forec.observation_forecast) + crps_scores = evaluate_crps(y_to_verify, forec.observation_scenarios) + backtest.abs_errors[i, :] = abs_errors + backtest.crps_scores[i, :] = crps_scores + end + return backtest +end \ No newline at end of file diff --git a/src/opt_methods/IPNewton.jl b/src/opt_methods/IPNewton.jl index b323f20..b490a95 100644 --- a/src/opt_methods/IPNewton.jl +++ b/src/opt_methods/IPNewton.jl @@ -39,7 +39,7 @@ function IPNewton(model::Model{D, T}, initial_points::Vector{Vector{T}}; f_tol:: return IPNewton{T}(f_tol, g_tol, ub, lb, iterations, initial_points) end -function optimize(func::Optim.TwiceDifferentiable, opt_method::IPNewton{T}, verbose::Int, i::Int) where T +function optimize(func::Optim.TwiceDifferentiable, opt_method::IPNewton{T}, verbose::Int, i::Int, time_limit_sec::Int) where T cons = Optim.TwiceDifferentiableConstraints(opt_method.lb, opt_method.ub) - return optimize(func, opt_method, cons, Optim.IPNewton(), verbose, i) + return optimize(func, opt_method, cons, Optim.IPNewton(), verbose, i, time_limit_sec) end diff --git a/src/opt_methods/LBFGS.jl b/src/opt_methods/LBFGS.jl index ad9a85c..e4c2878 100644 --- a/src/opt_methods/LBFGS.jl +++ b/src/opt_methods/LBFGS.jl @@ -30,6 +30,6 @@ function LBFGS(model::Model{D, T}, initial_points::Vector{Vector{T}}; f_tol::T = return LBFGS{T}(f_tol, g_tol, iterations, initial_points) end -function optimize(func::Optim.TwiceDifferentiable, opt_method::LBFGS{T}, verbose::Int, i::Int) where T - return optimize(func, opt_method, Optim.LBFGS(), verbose, i) +function optimize(func::Optim.TwiceDifferentiable, opt_method::LBFGS{T}, verbose::Int, i::Int, time_limit_sec::Int) where T + return optimize(func, opt_method, Optim.LBFGS(), verbose, i, time_limit_sec) end diff --git a/src/opt_methods/NelderMead.jl b/src/opt_methods/NelderMead.jl index 4c4b5df..d2d921a 100644 --- a/src/opt_methods/NelderMead.jl +++ b/src/opt_methods/NelderMead.jl @@ -30,6 +30,6 @@ function NelderMead(model::Model{D, T}, initial_points::Vector{Vector{T}}; f_tol return NelderMead{T}(f_tol, g_tol, iterations, initial_points) end -function optimize(func::Optim.TwiceDifferentiable, opt_method::NelderMead{T}, verbose::Int, i::Int) where T - return optimize(func, opt_method, Optim.NelderMead(), verbose, i) +function optimize(func::Optim.TwiceDifferentiable, opt_method::NelderMead{T}, verbose::Int, i::Int, time_limit_sec::Int) where T + return optimize(func, opt_method, Optim.NelderMead(), verbose, i, time_limit_sec) end diff --git a/src/opt_methods/common_methods.jl b/src/opt_methods/common_methods.jl index dd18f96..d7a07e5 100644 --- a/src/opt_methods/common_methods.jl +++ b/src/opt_methods/common_methods.jl @@ -26,25 +26,27 @@ function create_initial_points(model::Model{D, T}, n_initial_points::Int, LB::T, end function optimize(func::Optim.TwiceDifferentiable, opt_method::AbstractOptimizationMethod{T}, - optimizer::Optim.AbstractOptimizer, verbose::Int, i::Int) where T + optimizer::Optim.AbstractOptimizer, verbose::Int, i::Int, time_limit_sec::Int) where T return Optim.optimize(func, opt_method.initial_points[i], optimizer, Optim.Options(f_tol = opt_method.f_tol, g_tol = opt_method.g_tol, iterations = opt_method.iterations, - show_trace = show_trace(verbose) )) + show_trace = show_trace(verbose), + time_limit = time_limit_sec)) end function optimize(func::Optim.TwiceDifferentiable, opt_method::AbstractOptimizationMethod{T}, cons::Optim.TwiceDifferentiableConstraints, optimizer::Optim.AbstractOptimizer, - verbose::Int, i::Int) where T + verbose::Int, i::Int, time_limit_sec::Int) where T return Optim.optimize(func, cons, opt_method.initial_points[i], optimizer, Optim.Options(f_tol = opt_method.f_tol, g_tol = opt_method.g_tol, iterations = opt_method.iterations, - show_trace = show_trace(verbose) )) + show_trace = show_trace(verbose), + time_limit = time_limit_sec)) end show_trace(verbose::Int) = (verbose == 3 ? true : false) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 73305ff..7038bfa 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,3 +11,4 @@ include("test_initial_params.jl") include("test_diagnostics.jl") include("test_estimate.jl") include("test_simulate.jl") +include("test_backtest.jl") diff --git a/test/test_backtest.jl b/test/test_backtest.jl new file mode 100644 index 0000000..2bf1a70 --- /dev/null +++ b/test/test_backtest.jl @@ -0,0 +1,8 @@ +@testset "Backtest" begin + ω = [0.1, 0.1] + A = [0.5 0; 0 0.5] + B = [0.5 0; 0 0.5] + simulation = simulate_GAS_1_1(Normal, 0.0, ω, A, B, 1) + gas = ScoreDrivenModels.Model(1, 1, Normal, 0.0) + bac = backtest(gas, simulation, 10, 4985) +end \ No newline at end of file