-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path[4]預測表現之評估.py
104 lines (81 loc) · 2.54 KB
/
[4]預測表現之評估.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# -*- coding: utf-8 -*-
"""[4]預測表現之評估.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1ETyvXu0KImcbmuQpXSM4MoR-xiO_MYXX
# Diebold-Mariano 檢定
- 以比特幣報酬為例
- 比較AR(1)與AR(2)的模型預測能力
- 利用Diebold-Mariano 檢定
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ffn
import statsmodels.api as sm
#from statsmodels.regression.rolling import RollingOLS 回歸模型 ar 模型為回歸
#抓取彼特的價格
data = ffn.get('BTC-USD',start = '2018-01-01',end='2021-04-16')
data.index = pd.to_datetime(data.index)
#轉成時間格式
data.head()
data.btcusd.plot()
"""## 設定自變數
- 報酬率
- 落後一期報酬率
- 落後二期報酬率
"""
data['ret'] = np.log(data.btcusd).diff() #ret報酬率 自動建立ret
data['ret_1'] = data.ret.shift(1) #報酬率平移一期
data['ret_2'] = data.ret.shift(2) #往前推兩期
data.head()
#ret_1前一天的報酬率
data.info()
"""## 設定樣本範圍
- 資料總數為`1190`
- 樣本內資料設定為`R=200`
- 樣本外資料設定為`P=990`
"""
y=data.ret[0:200] #樣本200筆 y1 到 yr
y
y=data.ret[0:200]
x=sm.add_constant(data.ret_1[0:200])
result_ar1 = sm.OLS(y,x,missing='drop').fit()
result_ar1.summary()
R = 200
P = len(data) - R
print(R,P)
# out-of-sample MSE
R = 200
P = len(data) - R
print(R,P)
#
e1 = np.zeros(P) #模型一的誤差 990個0
e2 = np.zeros(P)
#進行989次 1到P-1 滾輪法
for i in range(1,P-1):
y = data.ret[i:R+i] #報酬率 抓到200
x = sm.add_constant(data.ret_1[i:R+i]) #估計出beta1
result_ar1 = sm.OLS(y,x,missing='drop').fit()
x = sm.add_constant(data[['ret_1','ret_2']][i:R+i])
result_ar2 = sm.OLS(y,x,missing='drop').fit()
fitted_ar1 = result_ar1.params[0] + result_ar1.params[1]*data.ret_1[R+i-1] #估計的y
fitted_ar2 = result_ar2.params[0] + result_ar2.params[1]*data.ret_1[R+i-1] + result_ar2.params[2]*data.ret_2[R+i-1]
e1[i] = data.ret[R+i] - fitted_ar1
e2[i] = data.ret[R+i] - fitted_ar2 #樣本外的殘差
mse_ar1_oos = np.mean(e1**2) #樣本外的mse
mse_ar2_oos = np.mean(e2**2)
print(mse_ar1_oos,mse_ar2_oos)
x = np.ones(len(y))
# 利用 Newey-West標準誤估計
rr = sm.OLS(y,x,missing='drop').fit(cov_type='HAC',cov_kwds={'maxlags':2})
dt=mse_ar1_oos-mse_ar2_oos
print(dt)
#檢查beta0是否有大於零 yt=beta+error
#兩個模型做比較 y=e1ˇ2-e2ˇ2 殘差有否顯著 dt公式
#答案負
"""### 判斷模型好壞
- MSE 直接比大小
- 檢定: pvalue
"""
rr.pvalues