Skip to content

Commit

Permalink
sandbox/stattools some cleaning in tests
Browse files Browse the repository at this point in the history
  • Loading branch information
josef-pkt committed Mar 9, 2010
1 parent 87e3a92 commit de54b07
Showing 1 changed file with 154 additions and 85 deletions.
239 changes: 154 additions & 85 deletions scikits/statsmodels/sandbox/tools/stattools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
from scikits.statsmodels.sandbox.tools.tools_tsa import lagmat

class ResultsStore(object):
pass
def __str__(self):
return self._str

def ljungbox(x, lags=None, boxpierce=False):
def acorr_ljungbox(x, lags=None, boxpierce=False):
'''Ljung-Box test for no autocorrelation
Expand Down Expand Up @@ -213,7 +214,7 @@ def unitroot_adf(x, maxlag=None, trendorder=0, autolag='AIC', store=False):
return adfstat


def lm_acorr(x, maxlag=None, autolag='AIC', store=False):
def acorr_lm(x, maxlag=None, autolag='AIC', store=False):
'''Lagrange Multiplier tests for autocorrelation
not checked yet, copied from unitrood_adf with adjustments
Expand Down Expand Up @@ -340,7 +341,42 @@ def het_white(y, x, retres=False):
def het_goldfeldquandt(y, x, idx, split=None, retres=False):
'''test whether variance is the same in 2 subsamples
todo add resultinstance
Parameters
----------
y : array_like
endogenous variable
x : array_like
exogenous variable, regressors
idx : integer
column index of variable according to which observations are
sorted for the split
split : None or integer or float in intervall (0,1)
index at which sample is split.
If 0<split<0 then split is interpreted as fraction of the observations
in the first sample
retres : boolean
if true, then an instance of a result class is returned,
otherwise 2 numbers, fvalue and p-value, are returned
Returns
-------
(fval, pval) or res
fval : float
value of the F-statistic
pval : float
p-value of the hypothesis that the variance in one subsample is larger
than in the other subsample
res : instance of result class
The class instance is just a storage for the intermediate and final
results that are calculated
Notes
-----
TODO:
add resultinstance - DONE
maybe add drop-middle as option
maybe allow for several breaks
recommendation for users: use this function as pattern for more flexible
split in tests, e.g. drop middle.
Expand Down Expand Up @@ -381,7 +417,8 @@ def het_goldfeldquandt(y, x, idx, split=None, retres=False):
res.resols2 = resols2
res.ordering = ordering
res.split = split
res.__str__ = '''The Goldfeld-Quandt test for null hypothesis that the
#res.__str__
res._str = '''The Goldfeld-Quandt test for null hypothesis that the
variance in the second subsample is %s than in the first subsample:
F-statistic =%8.4f and p-value =%8.4f''' % (ordering, fval, fpval)

Expand Down Expand Up @@ -413,21 +450,31 @@ def neweywestcov(resid, x):
za = np.dot(hhat[:,lag:nobs] * hhat[:,:nobs-lag].T)
w = 1 - lag/(nobs + 1.)
xuux = xuux + np.dot(w, za+za.T)
xtxi = np.inv(np.dot(x.T, x)) #QR instead?
xtxi = np.linalg.inv(np.dot(x.T, x)) #QR instead?
covbNW = np.dot(xtxi, np.dot(xuux, xtxi))

return covbNW



class StatTestMC(object):
"""class to run Monte Carlo study on a statistical test'''
TODO
print summary, for quantiles and for histogram
draft in trying out script log
"""

def __init__(self, dgp, statistic):
self.dgp = dgp #staticmethod(dgp) #no self
self.statistic = statistic # staticmethod(statistic) #no self

def run(self, nrepl, statindices=None, dgpargs=[], statsargs=[]):
'''run the actual Monte Carlo and save results
'''
self.nrepl = nrepl
self.statindices = statindices
self.dgpargs = dgpargs
Expand Down Expand Up @@ -455,6 +502,10 @@ def run(self, nrepl, statindices=None, dgpargs=[], statsargs=[]):
self.mcres = mcres

def histogram(self, idx=None, critval=None):
'''calculate histogram values
does not do any plotting
'''
if self.mcres.ndim == 2:
if not idx is None:
mcres = self.mcres[:,idx]
Expand All @@ -479,6 +530,10 @@ def histogram(self, idx=None, critval=None):
return histo, self.cumhisto, self.cumhistoreversed

def quantiles(self, idx=None, frac=[0.01, 0.025, 0.05, 0.1, 0.975]):
'''calculate quantiles of Monte Carlo results
'''

if self.mcres.ndim == 2:
if not idx is None:
mcres = self.mcres[:,idx]
Expand All @@ -491,106 +546,120 @@ def quantiles(self, idx=None, frac=[0.01, 0.025, 0.05, 0.1, 0.975]):
self.mcressort = mcressort = np.sort(self.mcres)
return frac, mcressort[(self.nrepl*frac).astype(int)]

if __name__ == '__main__':

examples = []
if 'adf' in examples:
examples = []
if 'adf' in examples:

x = np.random.randn(20)
print ljungbox(x)
print unitroot_adf(x)
x = np.random.randn(20)
print acorr_ljungbox(x)
print unitroot_adf(x)

nrepl = 100
nobs = 100
mcres = np.zeros(nrepl)
for ii in range(nrepl-1):
x = (1e-4+np.random.randn(nobs)).cumsum()
mcres[ii] = unitroot_adf(x, 2,trendorder=0, autolag=None)

print (mcres<-2.57).sum()
print np.histogram(mcres)
mcressort = np.sort(mcres)
for ratio in [0.01, 0.025, 0.05, 0.1]:
print ratio, mcressort[int(nrepl*ratio)]

print 'critical values in Green table 20.5'
print 'sample size = 100'
print 'with constant'
print '0.01: -19.8, 0.025: -16.3, 0.05: -13.7, 0.01: -11.0, 0.975: 0.47'

print '0.01: -3.50, 0.025: -3.17, 0.05: -2.90, 0.01: -2.58, 0.975: 0.26'
crvdg = dict([map(float,s.split(':')) for s in ('0.01: -19.8, 0.025: -16.3, 0.05: -13.7, 0.01: -11.0, 0.975: 0.47'.split(','))])
crvd = dict([map(float,s.split(':')) for s in ('0.01: -3.50, 0.025: -3.17, 0.05: -2.90, 0.01: -2.58, 0.975: 0.26'.split(','))])
'''
>>> crvd
{0.050000000000000003: -13.699999999999999, 0.97499999999999998: 0.46999999999999997, 0.025000000000000001: -16.300000000000001, 0.01: -11.0}
>>> sorted(crvd.values())
[-16.300000000000001, -13.699999999999999, -11.0, 0.46999999999999997]
'''
nrepl = 100
nobs = 100
mcres = np.zeros(nrepl)
for ii in range(nrepl-1):
x = (1e-4+np.random.randn(nobs)).cumsum()
mcres[ii] = unitroot_adf(x, 2,trendorder=0, autolag=None)

print (mcres<-2.57).sum()
print np.histogram(mcres)
mcressort = np.sort(mcres)
for ratio in [0.01, 0.025, 0.05, 0.1]:
print ratio, mcressort[int(nrepl*ratio)]

print 'critical values in Green table 20.5'
print 'sample size = 100'
print 'with constant'
print '0.01: -19.8, 0.025: -16.3, 0.05: -13.7, 0.01: -11.0, 0.975: 0.47'

#for trend = 0
crit_5lags0p05 =-4.41519 + (-14.0406)/nobs + (-12.575)/nobs**2
print crit_5lags0p05
print '0.01: -3.50, 0.025: -3.17, 0.05: -2.90, 0.01: -2.58, 0.975: 0.26'
crvdg = dict([map(float,s.split(':')) for s in ('0.01: -19.8, 0.025: -16.3, 0.05: -13.7, 0.01: -11.0, 0.975: 0.47'.split(','))])
crvd = dict([map(float,s.split(':')) for s in ('0.01: -3.50, 0.025: -3.17, 0.05: -2.90, 0.01: -2.58, 0.975: 0.26'.split(','))])
'''
>>> crvd
{0.050000000000000003: -13.699999999999999, 0.97499999999999998: 0.46999999999999997, 0.025000000000000001: -16.300000000000001, 0.01: -11.0}
>>> sorted(crvd.values())
[-16.300000000000001, -13.699999999999999, -11.0, 0.46999999999999997]
'''

#for trend = 0
crit_5lags0p05 =-4.41519 + (-14.0406)/nobs + (-12.575)/nobs**2
print crit_5lags0p05

adfstat, resstore = unitroot_adf(x, 2,trendorder=0, autolag=None, store=1)

print (mcres>crit_5lags0p05).sum()
adfstat, resstore = unitroot_adf(x, 2,trendorder=0, autolag=None, store=1)

print resstore.resols.model.exog[-5:]
print x[-5:]
print (mcres>crit_5lags0p05).sum()

print np.histogram(mcres, bins=[-np.inf, -3.5, -3.17, -2.9 , -2.58, 0.26, np.inf])
print resstore.resols.model.exog[-5:]
print x[-5:]

print mcressort[(nrepl*(np.array([0.01, 0.025, 0.05, 0.1, 0.975]))).astype(int)]
print np.histogram(mcres, bins=[-np.inf, -3.5, -3.17, -2.9 , -2.58, 0.26, np.inf])

print mcressort[(nrepl*(np.array([0.01, 0.025, 0.05, 0.1, 0.975]))).astype(int)]

def randwalksim(nobs=100, drift=0.0):
return (drift+np.random.randn(nobs)).cumsum()

def normalnoisesim(nobs=500, loc=0.0):
return (loc+np.random.randn(nobs))
def randwalksim(nobs=100, drift=0.0):
return (drift+np.random.randn(nobs)).cumsum()

def adf20(x):
return unitroot_adf(x, 2,trendorder=0, autolag=None)
def normalnoisesim(nobs=500, loc=0.0):
return (loc+np.random.randn(nobs))

print '\nResults with MC class'
mc1 = StatTestMC(randwalksim, adf20)
mc1.run(1000)
print mc1.histogram(critval=[-3.5, -3.17, -2.9 , -2.58, 0.26])
print mc1.quantiles()
def adf20(x):
return unitroot_adf(x, 2,trendorder=0, autolag=None)

print '\nLjung Box'
print '\nResults with MC class'
mc1 = StatTestMC(randwalksim, adf20)
mc1.run(1000)
print mc1.histogram(critval=[-3.5, -3.17, -2.9 , -2.58, 0.26])
print mc1.quantiles()

print '\nLjung Box'

def lb4(x):
s,p = acorr_ljungbox(x, lags=4)
return s[-1], p[-1]

def lb4(x):
s,p = acorr_ljungbox(x, lags=1)
return s[0], p[0]

print 'Results with MC class'
mc1 = StatTestMC(normalnoisesim, lb4)
mc1.run(1000, statindices=[0,1])
print mc1.histogram(1, critval=[0.01, 0.025, 0.05, 0.1, 0.975])
print mc1.quantiles(1)
print mc1.quantiles(0)
print mc1.histogram(0)

nobs = 100
x = np.ones((20,2))
x[:,1] = np.arange(20)
y = x.sum(1) + 1.01*(1+1.5*(x[:,1]>10))*np.random.rand(20)
print het_goldfeldquandt(y,x, 1)

def lb4(x):
s,p = ljungbox(x, lags=4)
return s[-1], p[-1]
y = x.sum(1) + 1.01*(1+0.5*(x[:,1]>10))*np.random.rand(20)
print het_goldfeldquandt(y,x, 1)

def lb4(x):
s,p = ljungbox(x, lags=1)
return s[0], p[0]
y = x.sum(1) + 1.01*(1-0.5*(x[:,1]>10))*np.random.rand(20)
print het_goldfeldquandt(y,x, 1)

print 'Results with MC class'
mc1 = StatTestMC(normalnoisesim, lb4)
mc1.run(1000, statindices=[0,1])
print mc1.histogram(1, critval=[0.01, 0.025, 0.05, 0.1, 0.975])
print mc1.quantiles(1)
print mc1.quantiles(0)
print mc1.histogram(0)
print het_breushpagan(y,x)
print het_white(y,x)

nobs = 100
x = np.ones((20,2))
x[:,1] = np.arange(20)
y = x.sum(1) + 1.01*(1+1.5*(x[:,1]>10))*np.random.rand(20)
print het_goldfeldquandt(y,x, 1)
f,p = het_goldfeldquandt(y,x, 1)
print f, p
resgq = het_goldfeldquandt(y,x, 1, retres=True)
print resgq

y = x.sum(1) + 1.01*(1+0.5*(x[:,1]>10))*np.random.rand(20)
print het_goldfeldquandt(y,x, 1)
#this is just a syntax check:
print neweywestcov(y, x)

y = x.sum(1) + 1.01*(1-0.5*(x[:,1]>10))*np.random.rand(20)
print het_goldfeldquandt(y,x, 1)
resols1 = sm.OLS(y, x).fit()
print neweywestcov(resols1.resid, x)
print resols1.cov_params()
print resols1.HC0_se
print resols1.cov_HC0

print het_breushpagan(y,x)
print het_white(y,x)

f,p,d = het_goldfeldquandt(y,x, 1)
print d

0 comments on commit de54b07

Please sign in to comment.