1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
PRESERVE.
SET printback=off.
DEFINE !BCNON(             
               SAMPLES=!TOKENS(1) !DEFAULT(1000)
              /VAR    =!TOKENS(1)
              /ALPHA1 =!TOKENS(1) !DEFAULT(0.05)
              /ALPHA2 =!TOKENS(1) !DEFAULT(0.95)
              /DEBUG  = !CMDEND   !DEFAULT('N')   ).
****************************************************************************
* This macro produces a bootstrapped nonparametric confidence interval
* for the variance of a variable on the active file. 
* The method is described in "An Introduction to the Bootstrap" by B Efron
* and R J Tibshirani, Chapman & Hall, 1993, pages 184-188, describing the 
* BC method.
* I am grateful to SPSS Inc for the resampling code, (see
* http://www.spss.com/tech/answer/result.cfm?tech_tan_id=100000795 )
* and to Raynald Levesque for suggesting a method for getting the number
* of observations in a file into a loop counter.
*
* A typical use of this macro might be:- 
* !BCNON SAMPLES=1000 VAR=hhinc ALPHA1=0.05 ALPHA2=0.95.
* where
* SAMPLES=1000 is the number of bootstrap samples to be drawn
* VAR is the name of the variable for which the confidence interval is required
* ALPHA1 and ALPHA2 specify the lower and upper limits required
*
* VAR is the only essential parameter; if the others are omitted
* they will be replaced by defaults, so
* !BCNON VAR=hhinc.
* is the minimum specification.
*
* David Hitchin, University of Sussex, 7th March 2001
* ccfg4@sussex.ac.uk
* 
****************************************************************************.
PRESERVE.
!IF ( !DEBUG !EQ 'N') !THEN
SET printback=off mprint off.                                                   
!ELSE
SET printback on mprint on.
!IFEND .
SET SEED=random.
*---------------------------------------------------------------------------.
* Save original active file to give back after macro is done.
*---------------------------------------------------------------------------.
!IF (!DEBUG !EQ 'N') !THEN 
SET RESULTS ON.
DO IF $CASENUM=1.
PRINT / "NOTE: ALL OUTPUT INCLUDING ERROR MESSAGES HAVE BEEN TEMPORARILY" 
      / "SUPPRESSED. IF YOU EXPERIENCE UNUSUAL BEHAVIOR, RERUN THIS" 
      / "MACRO WITH AN ADDITIONAL ARGUMENT /DEBUG='Y'."  
      / "BEFORE DOING THIS YOU SHOULD RESTORE YOUR DATA FILE."
      / "THIS WILL FACILITATE FURTHER DIAGNOSIS OF ANY PROBLEMS.".
END IF.
!IFEND .
SAVE OUTFILE='C:\\Temp\\rr__tmp1.sav'.

SELECT IF (NOT MISSING(!VAR)).
COMPUTE sample=1.
COMPUTE id=$casenum.

* A bit of trickery here to get number of cases into the input program loop.
RANK VARIABLES=!VAR  (A) /N INTO N.
EXECUTE.
DO IF $casenum=1.
WRITE OUTFILE='C:\\Temp\\syntax.sps'  /'DEFINE !NBCASES()'N '!ENDDEFINE.'.
END IF.
EXECUTE.
INCLUDE FILE='C:\\Temp\\syntax.sps' .

* DESCRIPTIVES  VARIABLES=!VAR /STATISTICS=VARIANCE .
SAVE OUTFILE='C:\\Temp\\temp1.sav'/keep=!VAR,sample,id.
GET FILE='C:\\Temp\\temp1.sav'.
AGGREGATE  /OUTFILE='C:\\Temp\\sd.sav'/BREAK=sample/s = SD(!VAR).

* The bootstrapping begins here.
INPUT PROGRAM .
LOOP SAMPLE=1 to !SAMPLES.
  LOOP N = 1 to !NBCASES.
    COMPUTE ID=TRUNC(UNIFORM(!NBCASES)) + 1.
    END CASE.
    LEAVE SAMPLE.
    END LOOP.
END LOOP.
END FILE.
END INPUT PROGRAM .

FORMATS ID SAMPLE N (F6.0).
SORT CASES BY ID .
MATCH FILES / FILE = * / TABLE 'C:\\Temp\\temp1.sav' / BY ID.
AGGREGATE  /OUTFILE=*/BREAK=sample/z = SD(!VAR).
COMPUTE z=z*z.
COMPUTE sample=1.
MATCH FILES / FILE = * / TABLE 'C:\\Temp\\sd.sav' / BY sample.

COMPUTE theta=s*s.
COMPUTE thetals=(z < theta).
AGGREGATE  /OUTFILE='C:\\Temp\\theta.sav'/ BREAK=sample / stless=SUM(thetals) / thetabar=MEAN(z) / N_BREAK=N.
MATCH FILES / FILE = * / TABLE 'C:\\Temp\\theta.sav' / BY sample.

COMPUTE z0=stless/n_break.
COMPUTE z0hat=IDF.NORMAL(z0,0,1).

COMPUTE zalph=IDF.NORMAL(!ALPHA2,0,1).
COMPUTE z1malph=IDF.NORMAL(!ALPHA1,0,1).

COMPUTE t2=(thetabar-theta)*(thetabar-theta).
COMPUTE t3=t2*(thetabar-theta).
AGGREGATE  /OUTFILE='C:\\Temp\\tt.sav'/ BREAK=sample / tt2=SUM(t2) / tt3=SUM(t3).
MATCH FILES / FILE = * / TABLE 'C:\\Temp\\tt.sav' / BY sample.

COMPUTE alphahat=tt3/ (6*tt2)**1.5 .
COMPUTE a=z0hat+(z0hat+z1malph)/(1-alphahat*(z0hat+z1malph)).
COMPUTE a1=CDF.NORMAL(a,0,1).
COMPUTE a=z0hat+(z0hat+zalph)/(1-alphahat*(z0hat+zalph)).
COMPUTE a2=CDF.NORMAL(a,0,1).

COMPUTE pl=a1*100.
COMPUTE pu=a2*100.
COMPUTE cl=RND(n_break*a1).
COMPUTE cu=RND(n_break*a2).
PRINT FORMATS z to theta,thetabar,z0 to pu (f6.4)/
 thetals,stless,cl,cu,n_break (f6.0)/ sample (f2.0).

COMPUTE filter_$=($casenum=1).
VARIABLE LABEL filter_$ '$casenum=1 (FILTER)'.
VALUE LABELS filter_$  0 'Not Selected' 1 'Selected'.
FORMAT filter_$ (f1.0).
FILTER BY filter_$.
*DESCRIPTIVES VARIABLES=sample z s theta thetals stless thetabar n_break z0 z0hat zalph
  z1malph t2 t3 tt2 tt3 alphahat a a1 a2 pl pu cl cu
  /STATISTICS=MEAN STDDEV MIN MAX .
FILTER OFF.
USE ALL.

SORT CASES BY  z (A).
IF ($casenum=cl) lci=z.
IF ($casenum=cu) uci=z.
AGGREGATE  /OUTFILE=* /BREAK=sample /lci_1 = MIN(lci) /uci_1 = MAX(uci).
RENAME VARIABLES  (lci_1 =lci)(uci_1=uci).
* SUMMARIZE  /TABLES=lci uci  /FORMAT=VALIDLIST NOCASENUM TOTAL LIMIT=100
*  /TITLE='Case Summaries'  /MISSING=VARIABLE  /CELLS=COUNT .
MATRIX.
GET m /VARIABLES=lci,uci.
GET s /FILE='C:\\Temp\\sd.sav' /VARIABLES=s.
COMPUTE s=s*s.
PRINT s /FORMAT="F14.6"/TITLE="Variance"/CLABELS=" ".
PRINT m /FORMAT="F14.6"/TITLE="Confidence limits for variance"/CLABELS="Lower","Upper".
COMPUTE lower=!ALPHA1.
COMPUTE upper=!ALPHA2.
PRINT lower/FORMAT="F7.3"/TITLE="Alpha1"/CLABELS=" ".
PRINT upper/FORMAT="F7.3"/TITLE="Alpha2"/CLABELS=" ".
END MATRIX.
ERASE FILE='C:\\Temp\\temp1.sav'.
ERASE FILE='C:\\Temp\\sd.sav'.
ERASE FILE='C:\\Temp\\theta.sav'.
ERASE FILE='C:\\Temp\\tt.sav'.
ERASE FILE='C:\\Temp\\syntax.sps'.
GET FILE='C:\\Temp\\rr__tmp1.sav'.

RESTORE.
!ENDDEFINE.
RESTORE.

* Test data from Efron, page 180. Test runs will not exactly
* replicate the results in the book as different random samples
* will be drawn in different runs.
Data list free/zz.
Begin data.
48 36 20 29 42 42 20 42 22 41 45 14 6 
0 33 28 34 4 32 24 47 41 24 26 30 
end data.
!BCNON SAMPLES=2000 VAR=zz ALPHA1=0.05 ALPHA2=0.95.