*
* Replication file for Hansen and Seo(2002), "Testing for two-regime threshold
* cointegration in vector error-correction models", J of Econometrics, vol 110, pp
* 293-318.
*
open data zeroyld.dat
cal 1951 1 12
*
* Create the data set which will match the faulty one used in the paper.
*
all 531
data(format=free,org=columns,missing=-88.888) * 1991:2 year month notmiss regnobs stderr taxrate $
 r0m r1m r2m r3m r4m r5m r6m r7m r8m r9m r10m r11m r12m $
 r13m r14m r15m r16m r17m r18m r21m r24m r30m r36m r48m r60m r72m r84m r96m $
 r108m r120m r132m r144m r156m r168m r180m r192m r204m r216m r228m r240m r252m $
 r264m r276m r288m r300m r312m r324m r336m r348m r360m r372m r384m r396m r408m r420m r480m
*
rewind data
data(format=free,org=columns,missing=-88.888) 1991:3 531 year month notmiss regnobs stderr taxrate $
 r0m r1m r2m r3m r4m r5m r6m r7m r8m r9m r10m r11m r12m $
 r13m r14m r15m r16m r17m r18m r21m r24m r30m r36m r48m r60m r72m r84m r96m $
 r108m r120m r132m r144m r156m r168m r180m r192m r204m r216m r228m r240m r252m $
 r264m r276m r288m r300m r312m r324m r336m r348m r360m r372m r384m r396m r408m r420m r480m
*
set s1_2    = r2m-r1m
set s1_3    = r3m-r1m
set s1_6    = r6m-r1m
set s3_6    = r6m-r3m
set s3_12   = r12m-r3m
set s3_120  = r120m-r3m
set s12_24  = r24m-r12m
set s12_120 = r120m-r12m
set s24_120 = r120m-r24m
************************************************************************************
*
* @HansenSeo  start  end   y1  y2
* Computes a threshold cointegrating bivariate VAR
*
* Parameters:
*
* start   end     range to estimate [maximum allowed]
* y1 y2           dependent variables
*
* Options:
*   LAGS=# of lags in the VAR
*   BETA =input value for cointegrating coefficient (y1-beta*y2 stationary) [not used]
*   GAMMA=threshold value for partioning sample [not used]
*   BSIZE=size of beta grid search [300]
*   GSIZE=size of gamma grid search [300]
*   PI=minimum fraction sample in a partition [.05]
*
* Revision Schedule:
*   02/07 Written by Tom Doan, Estima
*
procedure hansenseo start end y1 y2
type integer start end
type series y1 y2
*
option integer lags 1
option real    beta
option real    gamma
option integer bsize 300
option integer gsize 300
option real    pi    .05
*
local series d1 d2 u1 u2 w d
local vect[integer] reglist
local integer k n
local integer startl endl
local real beta0 betax gamma0 gammax value minval
local real countd
local vect betav gammav
local rect ss a1 a2 s1 s2
local symm xxi xxs uu uug rcmom v1 v2
local integer count1 count2
*
inquire(reglist) startl<<start endl<<end
# y1{0 to lags+1} y2{0 to lags+1}
*
set d1 = y1-y1{1}
set d2 = y2-y2{1}
*
* Build up a regressor list consisting of 1, w{1} and k lags of d1 and d2.
*
compute k=lags
compute reglist=%rladdlag(||constant||,w,1)
if k>0 {
   compute reglist=%rladdlaglist(reglist,d1,%seq(1,k))
   compute reglist=%rladdlaglist(reglist,d2,%seq(1,k))
}
*
* If beta is provided, use it. Otherwise, estimate beta from an
* Engle-Granger regression and make a grid for searching over beta
* which runs over +/- 2 standard errors
*
if .not.%defined(beta) {
   linreg(noprint) y1 startl endl
   # y2
   compute beta0=%beta(1)
   compute betav=%seqa(beta0-20*%stderrs(1),40*%stderrs(1)/(bsize-1),bsize)
}
else {
   compute beta0=beta
   compute betav=||beta0||
}
*
* If gamma is provided, use it. Otherwise, use a grid over the empirical
* support of w.
*
if .not.%defined(gamma) {
   set w = y1{1}-beta0*y2{1}
   stats(fractiles,noprint) w startl endl
   compute gammav=%seqa(%minimum,(%maximum-%minimum)/(gsize-1),gsize)
   compute gamma0=%median
}
else {
   compute gamma0=gamma
   compute gammav=||gamma0||
}
*
* Search over test values of beta on the outside and test values of gamma on the
* inside
*
compute minval=%na
dofor betax = betav
   set w = y1-betax*y2
   *
   * Use cmom and %sweep to run the full sample VAR. Pull out (into uu)
   * T x the covariance of residuals and into xxi the X'X**-1 of the regressors
   * (This has to be done inside the beta loop because w{1} is one of the regressors)
   *
   cmom startl endl
   # d1 d2 reglist
   compute ss =%sweeplist(%cmom,%seq(3,%ncmom))
   compute xxi=%xsubmat(ss,3,%ncmom,3,%ncmom)
   compute uu =%xsubmat(ss,1,2,1,2)
   *
   * Compute the residuals
   *
   linreg(noprint) d1 startl endl u1
   # reglist
   linreg(noprint) d2 startl endl u2
   # reglist
   *
   dofor gammax = gammav
      *
      * d = dummy which partitions the sample based upon the test threshold value
      * gammax. Move on to the next value if either part of the partition is too small.
      *
      set d = w{1}<=gammax
      sstats(mean) startl endl d>>countd
      if countd<pi.or.countd>(1-pi)
         next
      *
      * Compute the log |sigma| for a partitioned VAR. If this is the smallest value
      * we've seen, keep track of the settings.
      *
      cmom(smpl=d) startl endl
      # u1 u2 reglist
      compute xxs=%xsubmat(%cmom,3,%ncmom,3,%ncmom)
      compute uug=uu-%mqform(xxs-%mqform(xxi,xxs),%xsubmat(%cmom,3,%ncmom,1,2))
      compute value=log(%det(uug))
      if .not.%valid(minval).or.value<minval
         compute minval=value,beta0=betax,gamma0=gammax
    end dofor gammax
end dofor betax
*
* Estimate the likelihood maximizing regressions
*
set w = y1-beta0*y2
set d = w{1}<=gamma0
cmom(smpl=d) startl endl
# reglist d1 d2
compute rcmom=%cmom
compute n=%ncmom-2
compute count1=%nobs
compute a1=%xsubmat(%sweeptop(rcmom,n),1,n,n+1,n+2)
set u1 = d1-%dot(%xcol(a1,1),%eqnxvector(0,t))
set u2 = d2-%dot(%xcol(a1,2),%eqnxvector(0,t))
*
* Compute heteroscedasticity-consistent robust covariance matrix of the stacked
* coefficient vector
*
mcov(smpl=d) startl endl u1 u2
# reglist
compute v1=%mqform(%cmom,%kroneker(%identity(2),inv(%xsubmat(rcmom,1,n,1,n))))
compute s1=%vectorect(%sqrt(%xdiag(v1)),n)
*
cmom(smpl=.not.d) startl endl
# reglist d1 d2
compute rcmom=%cmom
compute count2=%nobs
compute a2=%xsubmat(%sweeptop(rcmom,n),1,n,n+1,n+2)
set u1 = d1-%dot(%xcol(a2,1),%eqnxvector(0,t))
set u2 = d2-%dot(%xcol(a2,2),%eqnxvector(0,t))
mcov(smpl=.not.d) startl endl u1 u2
# reglist
compute v2=%mqform(%cmom,%kroneker(%identity(2),inv(%xsubmat(rcmom,1,n,1,n))))
compute s2=%vectorect(%sqrt(%xdiag(v2)),n)
*
report(action=define)
report(atrow=1,atcol=1,span) "Estimates of a Threshold Cointegration Model"
report(atrow=2,atcol=1) "Beta" beta0
report(atrow=3,atcol=1) "Gamma" gamma0
report(atrow=5,atcol=2,tocol=5,span,align=center) "Regime 1(N="+count1+")"
report(atrow=5,atcol=6,tocol=9,span,align=center) "Regime 2(N="+count2+")"
report(atrow=6,atcol=1,fillby=cols) %eqnreglabels(0)
report(atrow=6,atcol=2) %xcol(a1,1) %xcol(s1,1) %xcol(a1,2) %xcol(s1,2)
report(atrow=6,atcol=6) %xcol(a2,1) %xcol(s2,1) %xcol(a2,2) %xcol(s2,2)
report(action=format,atrow=2,atcol=1,width=9)
report(action=show)
end
*
* @HSSupLMStat( options )  start end u1 u2 teststat
*
* This isn't really designed for stand-alone use, as it doesn't have checks for
* valid syntax. It computes the sup LM statistic for a break in a bivariate
* regression based upon an input threshold series and set of test values.
*
* Parameters:
*   start end  = estimation range
*   u1  u2     = series of residuals
*   teststat   = (output) test statistic
*
* Options:
*   PI=fraction of values at either end to skip [.05]
*   W=series with values of threshold series
*   GAMMAS=VECTOR of test threshold values
*   REGLIST=Coded regressor list
*
* Revision Schedule
*  02/2007 Written by Tom Doan, Estima
*
procedure HSSupLMStat startl endl u1 u2 teststat
type integer startl endl
type series u1 u2
type real *teststat
*
option real pi .05
option vector gammas
option vect[int] reglist
option series w
*
local series d
local real countd lm maxval gammastar
local symm rcmom v1 v2
local vect a1 a2
local integer n
local real gammax
*
compute maxval=-1.0
dofor gammax = gammas
   *
   *  d is the dummy series which partitions the sample based upon the threshold being
   *  <= gammax
   *
   set d = w{1}<=gammax
   *
   *  See what fraction of the data are in this. If it's too small or too large, move on
   *  to the next gamma value
   *
   sstats(mean) startl endl d>>countd
   if countd<pi.or.countd>(1-pi)
      next
   *
   *  Use cmom and sweep to get the regression of the u's on the regressors for the
   *  first subsample. Stack the coefficients into a vector
   *
   cmom(smpl=d) startl endl
   # reglist u1 u2
   compute rcmom=%cmom
   compute n=%ncmom-2
   compute a1=%vec(%xsubmat(%sweeptop(rcmom,n),1,n,n+1,n+2))
   *
   *  Compute heteroscedasticity-consistent robust covariance matrix of the stacked
   *  coefficient vector
   *
   mcov(smpl=d) startl endl u1 u2
   # reglist
   compute v1=%mqform(%cmom,%kroneker(%identity(2),inv(%xsubmat(rcmom,1,n,1,n))))
   *
   *  Repeat for the second subsample
   *
   cmom(smpl=.not.d) startl endl
   # reglist u1 u2
   compute rcmom=%cmom
   compute a2=%vec(%xsubmat(%sweeptop(rcmom,n),1,n,n+1,n+2))
   mcov(smpl=.not.d) startl endl u1 u2
   # reglist
   compute v2=%mqform(%cmom,%kroneker(%identity(2),inv(%xsubmat(rcmom,1,n,1,n))))
   *
   * Compute the LM statistic for a difference between the two coefficient vectors
   *
   compute lm=%qform(inv(v1+v2),a1-a2)
   *
   * If the LM is bigger, keep it
   *
   if lm>maxval
      compute maxval=lm,gammastar=gammax
end do gammax
compute teststat=maxval
end
*
* @HSLMTest start  end   y1  y2
* Computes an LM test for threshold cointegration in a bivariate VAR
*
* Parameters:
*
* start   end     range to estimate [maximum allowed]
* y1 y2           dependent variables
*
* Options:
*   LAGS=# of lags in the VAR
*   BETA =input value for cointegrating coefficient (y1-beta*y2 stationary)
*   GSIZE=size of gamma grid search [300]
*   PI=minimum fraction sample in a partition [.05]
*   BOOT=# of bootstrap repetitions. Note that the code here corrects an error in
*    the program used for the paper.
*   [PRINT]/NOPRINT
*
procedure HSLMTest start end y1 y2
type integer start end
type series y1 y2
*
option integer lags 1
option real    beta
option integer gsize 300
option real    pi    .05
option integer boot  0
option switch  print 1
*
local series d1 d2 u1 u2 v1 v2 w e
local vect[integer] reglist
local integer k
local integer startl endl
local real beta0 betax gamma0 gammax
local real teststat realstat pvalue
local vect betav gammav
local integer i
*
if .not.%defined(beta) {
   disp "@HSLMTEST requires option BETA"
   return
}
inquire(reglist) startl<<start endl<<end
# y1{0 to lags+1} y2{0 to lags+1}
*
set d1 = y1-y1{1}
set d2 = y2-y2{1}
*
* Build up a regressor list consisting of 1, w{1} and k lags of d1 and d2.
* (Note that w isn't defined yet).
*
compute reglist=%rladdlag(||constant||,w,1)
compute k=lags
if k>0 {
   compute reglist=%rladdlaglist(reglist,d1,%seq(1,k))
   compute reglist=%rladdlaglist(reglist,d2,%seq(1,k))
}
compute beta0=beta
compute betav=||beta0||
set w = y1{1}-beta0*y2{1}
*
* Make gamma as a grid on the range of the threshold value
*
stats(fractiles,noprint) w startl endl
compute gammav=%seqa(%minimum,(%maximum-%minimum)/(gsize-1),gsize)
*
compute betax=beta0
set w = y1-betax*y2
cmom startl endl
# reglist d1 d2
linreg(noprint,cmom) d1 startl endl u1
# reglist
linreg(noprint,cmom) d2 startl endl u2
# reglist
*
@HSSupLMStat(w=w,reglist=reglist,pi=pi,gammas=gammav) startl endl u1 u2 realstat
compute %cdstat=realstat
if boot==0
   return
compute pvalue=0
*
* The Gauss code on Hansen's web site draws separate N(0,1) values for the
* residual bootstrap. That isn't the correct procedure, since it would break the
* correlation structure of the data. Drawing a single number and multiplying both
* by it (as is done here) is the correct procedure.
*
do i=1,boot
   set e startl endl = %ran(1.0)
   set v1 startl endl = e*u1
   set v2 startl endl = e*u2
   @HSSupLMStat(w=w,reglist=reglist,pi=pi,gammas=gammav) startl endl v1 v2 teststat
   compute pvalue=pvalue+(teststat>realstat)
end do i
compute pvalue=pvalue/boot
if print {
   disp "SupLM Statistic" realstat
   disp "Fixed regressor bootstrap p-value" pvalue
}
compute %signif=pvalue
end
**********************************************************
*
* Bivariate tests of threshold cointegration
*
dec rect[int] ratepair
compute ratepair=||r1m,r2m|r1m,r3m|r1m,r6m|r3m,r6m|r3m,r12m|$
                   r3m,r120m|r12m,r24m|r12m,r120m|r24m,r120m||
do i=1,9
   set short = ratepair(i,1){0}
   set long  = ratepair(i,2){0}
   @hslmtest(noprint,beta=1.0,lags=1,boot=250) 1957:3 * long short
   disp "Test of" %l(ratepair(i,1)) "and" %l(ratepair(i,2)) @20 %signif
end do i
*
* Estimation of threshold cointegration model
*
@hansenseo 1957:3 * r120m r12m
*
* The analysis actually done for the paper didn't use the grid procedure on gamma
* described in the paper. Instead of an equally spaced grid, it used empirical
* values for the w series. This caused it to be a bit sparse in places, and, in
* particular, it missed the parameter pair caught by the fuller search done
* above. The VAR in the paper can be replicated by feeding in the published
* values for beta and gamma.
*
@hansenseo(beta=.984,gamma=-.63) 1957:3 * r120m r12m

