From de54b074fd1cfa8d584ebd7cc285d14b677bbee7 Mon Sep 17 00:00:00 2001 From: Josef Perktold Date: Tue, 9 Mar 2010 16:22:55 -0500 Subject: [PATCH] sandbox/stattools some cleaning in tests --- .../statsmodels/sandbox/tools/stattools.py | 239 +++++++++++------- 1 file changed, 154 insertions(+), 85 deletions(-) diff --git a/scikits/statsmodels/sandbox/tools/stattools.py b/scikits/statsmodels/sandbox/tools/stattools.py index 045bff0def6..c5112daa19a 100644 --- a/scikits/statsmodels/sandbox/tools/stattools.py +++ b/scikits/statsmodels/sandbox/tools/stattools.py @@ -17,9 +17,10 @@ from scikits.statsmodels.sandbox.tools.tools_tsa import lagmat class ResultsStore(object): - pass + def __str__(self): + return self._str -def ljungbox(x, lags=None, boxpierce=False): +def acorr_ljungbox(x, lags=None, boxpierce=False): '''Ljung-Box test for no autocorrelation @@ -213,7 +214,7 @@ def unitroot_adf(x, maxlag=None, trendorder=0, autolag='AIC', store=False): return adfstat -def lm_acorr(x, maxlag=None, autolag='AIC', store=False): +def acorr_lm(x, maxlag=None, autolag='AIC', store=False): '''Lagrange Multiplier tests for autocorrelation not checked yet, copied from unitrood_adf with adjustments @@ -340,7 +341,42 @@ def het_white(y, x, retres=False): def het_goldfeldquandt(y, x, idx, split=None, retres=False): '''test whether variance is the same in 2 subsamples - todo add resultinstance + Parameters + ---------- + y : array_like + endogenous variable + x : array_like + exogenous variable, regressors + idx : integer + column index of variable according to which observations are + sorted for the split + split : None or integer or float in intervall (0,1) + index at which sample is split. + If 0>> crvd - {0.050000000000000003: -13.699999999999999, 0.97499999999999998: 0.46999999999999997, 0.025000000000000001: -16.300000000000001, 0.01: -11.0} - >>> sorted(crvd.values()) - [-16.300000000000001, -13.699999999999999, -11.0, 0.46999999999999997] - ''' + nrepl = 100 + nobs = 100 + mcres = np.zeros(nrepl) + for ii in range(nrepl-1): + x = (1e-4+np.random.randn(nobs)).cumsum() + mcres[ii] = unitroot_adf(x, 2,trendorder=0, autolag=None) + + print (mcres<-2.57).sum() + print np.histogram(mcres) + mcressort = np.sort(mcres) + for ratio in [0.01, 0.025, 0.05, 0.1]: + print ratio, mcressort[int(nrepl*ratio)] + + print 'critical values in Green table 20.5' + print 'sample size = 100' + print 'with constant' + print '0.01: -19.8, 0.025: -16.3, 0.05: -13.7, 0.01: -11.0, 0.975: 0.47' - #for trend = 0 - crit_5lags0p05 =-4.41519 + (-14.0406)/nobs + (-12.575)/nobs**2 - print crit_5lags0p05 + print '0.01: -3.50, 0.025: -3.17, 0.05: -2.90, 0.01: -2.58, 0.975: 0.26' + crvdg = dict([map(float,s.split(':')) for s in ('0.01: -19.8, 0.025: -16.3, 0.05: -13.7, 0.01: -11.0, 0.975: 0.47'.split(','))]) + crvd = dict([map(float,s.split(':')) for s in ('0.01: -3.50, 0.025: -3.17, 0.05: -2.90, 0.01: -2.58, 0.975: 0.26'.split(','))]) + ''' + >>> crvd + {0.050000000000000003: -13.699999999999999, 0.97499999999999998: 0.46999999999999997, 0.025000000000000001: -16.300000000000001, 0.01: -11.0} + >>> sorted(crvd.values()) + [-16.300000000000001, -13.699999999999999, -11.0, 0.46999999999999997] + ''' + #for trend = 0 + crit_5lags0p05 =-4.41519 + (-14.0406)/nobs + (-12.575)/nobs**2 + print crit_5lags0p05 - adfstat, resstore = unitroot_adf(x, 2,trendorder=0, autolag=None, store=1) - print (mcres>crit_5lags0p05).sum() + adfstat, resstore = unitroot_adf(x, 2,trendorder=0, autolag=None, store=1) - print resstore.resols.model.exog[-5:] - print x[-5:] + print (mcres>crit_5lags0p05).sum() - print np.histogram(mcres, bins=[-np.inf, -3.5, -3.17, -2.9 , -2.58, 0.26, np.inf]) + print resstore.resols.model.exog[-5:] + print x[-5:] - print mcressort[(nrepl*(np.array([0.01, 0.025, 0.05, 0.1, 0.975]))).astype(int)] + print np.histogram(mcres, bins=[-np.inf, -3.5, -3.17, -2.9 , -2.58, 0.26, np.inf]) + print mcressort[(nrepl*(np.array([0.01, 0.025, 0.05, 0.1, 0.975]))).astype(int)] - def randwalksim(nobs=100, drift=0.0): - return (drift+np.random.randn(nobs)).cumsum() - def normalnoisesim(nobs=500, loc=0.0): - return (loc+np.random.randn(nobs)) + def randwalksim(nobs=100, drift=0.0): + return (drift+np.random.randn(nobs)).cumsum() - def adf20(x): - return unitroot_adf(x, 2,trendorder=0, autolag=None) + def normalnoisesim(nobs=500, loc=0.0): + return (loc+np.random.randn(nobs)) - print '\nResults with MC class' - mc1 = StatTestMC(randwalksim, adf20) - mc1.run(1000) - print mc1.histogram(critval=[-3.5, -3.17, -2.9 , -2.58, 0.26]) - print mc1.quantiles() + def adf20(x): + return unitroot_adf(x, 2,trendorder=0, autolag=None) - print '\nLjung Box' + print '\nResults with MC class' + mc1 = StatTestMC(randwalksim, adf20) + mc1.run(1000) + print mc1.histogram(critval=[-3.5, -3.17, -2.9 , -2.58, 0.26]) + print mc1.quantiles() + + print '\nLjung Box' + + def lb4(x): + s,p = acorr_ljungbox(x, lags=4) + return s[-1], p[-1] + + def lb4(x): + s,p = acorr_ljungbox(x, lags=1) + return s[0], p[0] + + print 'Results with MC class' + mc1 = StatTestMC(normalnoisesim, lb4) + mc1.run(1000, statindices=[0,1]) + print mc1.histogram(1, critval=[0.01, 0.025, 0.05, 0.1, 0.975]) + print mc1.quantiles(1) + print mc1.quantiles(0) + print mc1.histogram(0) + + nobs = 100 + x = np.ones((20,2)) + x[:,1] = np.arange(20) + y = x.sum(1) + 1.01*(1+1.5*(x[:,1]>10))*np.random.rand(20) + print het_goldfeldquandt(y,x, 1) - def lb4(x): - s,p = ljungbox(x, lags=4) - return s[-1], p[-1] + y = x.sum(1) + 1.01*(1+0.5*(x[:,1]>10))*np.random.rand(20) + print het_goldfeldquandt(y,x, 1) - def lb4(x): - s,p = ljungbox(x, lags=1) - return s[0], p[0] + y = x.sum(1) + 1.01*(1-0.5*(x[:,1]>10))*np.random.rand(20) + print het_goldfeldquandt(y,x, 1) - print 'Results with MC class' - mc1 = StatTestMC(normalnoisesim, lb4) - mc1.run(1000, statindices=[0,1]) - print mc1.histogram(1, critval=[0.01, 0.025, 0.05, 0.1, 0.975]) - print mc1.quantiles(1) - print mc1.quantiles(0) - print mc1.histogram(0) + print het_breushpagan(y,x) + print het_white(y,x) -nobs = 100 -x = np.ones((20,2)) -x[:,1] = np.arange(20) -y = x.sum(1) + 1.01*(1+1.5*(x[:,1]>10))*np.random.rand(20) -print het_goldfeldquandt(y,x, 1) + f,p = het_goldfeldquandt(y,x, 1) + print f, p + resgq = het_goldfeldquandt(y,x, 1, retres=True) + print resgq -y = x.sum(1) + 1.01*(1+0.5*(x[:,1]>10))*np.random.rand(20) -print het_goldfeldquandt(y,x, 1) + #this is just a syntax check: + print neweywestcov(y, x) -y = x.sum(1) + 1.01*(1-0.5*(x[:,1]>10))*np.random.rand(20) -print het_goldfeldquandt(y,x, 1) + resols1 = sm.OLS(y, x).fit() + print neweywestcov(resols1.resid, x) + print resols1.cov_params() + print resols1.HC0_se + print resols1.cov_HC0 -print het_breushpagan(y,x) -print het_white(y,x) -f,p,d = het_goldfeldquandt(y,x, 1) -print d