1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
* CI for the difference between two medians by Hodges-Lehmann method:
  Hodges JR and Lehmann EL (1963) 'Estimates of location based on rank tests'
  Annals of Mathematical Statistics 34, 598-611.

* Improved sorting algorithm added 03/16/05 *.  

* Although the example provided has equal sample sizes,
  it will work with unequal sample sizes, too.
* Code by Marta Garcia-Granero.

* Example dataset *.
DATA LIST FREE /group(F4.0) wghtgain(F8.0).
BEGIN DATA
1 175 1 149 1 132 1 187 1 218 1 123 1 151
1 248 1 200 1 206 1 219 1 179 1 234 1 206
2 142 2 214 2 311 2 249 2 337 2 176 2 262
2 211 2 302 2 216 2 195 2 236 2 253 2 199
END DATA.
VAR LABEL group 'Treatment group'  /wghtgain 'Weight gain (pounds)'.
VALUE LABELS group 1 'Control' 2 'Vit. A'.

************************************************
*                     CI CODE                  *
************************************************
* If n1*n2>10000, change MXLOOPS accordingly.
* Data must be presorted by group and depvar.
* A temporary listwise deletion of missing data is done before running MATRIX *.

SET MXLOOPS=10000.
SORT CASES BY group (A) wghtgain (A) .
TEMPORARY.
SELECT IF ((NOT MISSING(group)) AND (NOT MISSING(wghtgain))).

MATRIX.
PRINT /TITLE 'HODGES-LEHMANN CONFIDENCE INTERVAL FOR THE DIFFERENCE BETWEEN TWO MEDIANS'.
PRINT /TITLE 'Underlying assumption: the distributions are similar in shape'.
* Get data: replace variable names by your own *.
GET group /VAR=group.
GET depvar /VAR=wghtgain.
* Get group values, count sample sizes & split data in two groups *.
COMPUTE totaln=NROW(group).
COMPUTE ngrp1=group(1).
COMPUTE ngrp2=group(totaln).
PRINT {ngrp1;ngrp2}
 /TITLE='Group values'
 /RLABELS='Group A=' 'Group B='
 /FORMAT='f5.0'.
COMPUTE n=CSUM(DESIGN(group)).
PRINT {T(n)}
 /FORMAT='f5.0'
 /TITLE='Sample sizes'
 /RLABELS='   N(a)=' '   N(b)='.
DO IF RMIN(n) GT 1. /* Don't compute CI if any ni=1.
COMPUTE group1=MAKE(1,n(1),0).
COMPUTE group2=MAKE(1,n(2),0).
LOOP i=1 TO n(1).
+ COMPUTE group1(i)=depvar(i).
END LOOP.
LOOP i=1+n(1) TO totaln.
+ COMPUTE group2(i-n(1))=depvar(i).
END LOOP.
RELEASE ngrp1,ngrp2,group,depvar,totaln.
* Five points summary for both samples *.
COMPUTE min1=RMIN(group1).
COMPUTE max1=RMAX(group1).
COMPUTE pair=(TRUNC(n(1)/2) EQ n(1)/2). /* Check if n(1) is odd (0) or even (1) *. 
DO IF pair EQ 1.                        /* Tukey's hinges for even samples *.
- COMPUTE lmedian=group1(n(1)/2).
- COMPUTE umedian=group1(1+n(1)/2).
- COMPUTE median1=(lmedian+umedian)/2.
- RELEASE lmedian,umedian.
- DO IF TRUNC(n(1)/4) NE (n(1)/4)).
-  COMPUTE q11=group1(1+(n(1)/2)/2).
-  COMPUTE q31=group1(1+(3*n(1)/2)/2).
- ELSE.
-  COMPUTE lq11=group1(n(1)/4).
-  COMPUTE uq11=group1(1+n(1)/4).
-  COMPUTE lq31=group1(3*n(1)/4).
-  COMPUTE uq31=group1(1+3*n(1)/4).
-  COMPUTE q11=(lq11+uq11)/2.
-  COMPUTE q31=(lq31+uq31)/2.
-  RELEASE lq11,uq11,lq31,uq31.
- END IF.
ELSE.                                      /* Tukey's hinges for odd samples *.
- COMPUTE median1=group1((n(1)+1)/2).
- DO IF TRUNC((1+n(1))/4) EQ (1+n(1))/4).
-  COMPUTE lq11=group1((n(1)+1)/4).
-  COMPUTE uq11=group1(1+(n(1)+1)/4).
-  COMPUTE lq31=group1(3*(n(1)+1)/4-1).
-  COMPUTE uq31=group1(3*(n(1)+1)/4).
-  COMPUTE q11=(lq11+uq11)/2.
-  COMPUTE q31=(lq31+uq31)/2.
-  RELEASE lq11,uq11,lq31,uq31.
- ELSE.
-  COMPUTE q11=group1(((1+(n(1)+1)/2))/2).
-  COMPUTE q31=group1(((3*(n(1)+1)/2)-1)/2). 
- END IF.
END IF.
COMPUTE min2=RMIN(group2).
COMPUTE max2=RMAX(group2).
COMPUTE pair=(TRUNC(n(2)/2) EQ n(2)/2). /* Check if n(2) is odd (0) or even (1) *. 
DO IF pair EQ 1.                        /* Tukey's hinges for even samples *.
- COMPUTE lmedian=group2(n(2)/2).
- COMPUTE umedian=group2(1+n(2)/2).
- COMPUTE median2=(lmedian+umedian)/2.
- RELEASE lmedian,umedian.
- DO IF TRUNC(n(2)/4) NE (n(2)/4)).
-  COMPUTE q12=group2(1+(n(2)/2)/2).
-  COMPUTE q32=group2(1+(3*n(2)/2)/2).
- ELSE.
-  COMPUTE lq12=group2(n(2)/4).
-  COMPUTE uq12=group2(1+n(2)/4).
-  COMPUTE lq32=group2(3*n(2)/4).
-  COMPUTE uq32=group2(1+3*n(2)/4).
-  COMPUTE q12=(lq12+uq12)/2.
-  COMPUTE q32=(lq32+uq32)/2.
-  RELEASE lq12,uq12,lq32,uq32.
- END IF.
ELSE.                                      /* Tukey's hinges for odd samples *.
- COMPUTE median2=group2((n(2)+1)/2).
- DO IF TRUNC((1+n(2))/4) EQ (1+n(2))/4).
-  COMPUTE lq12=group2((n(2)+1)/4).
-  COMPUTE uq12=group2(1+(n(2)+1)/4).
-  COMPUTE lq32=group2(3*(n(2)+1)/4-1).
-  COMPUTE uq32=group2(3*(n(2)+1)/4).
-  COMPUTE q12=(lq12+uq12)/2.
-  COMPUTE q32=(lq32+uq32)/2.
-  RELEASE lq12,uq12,lq32,uq32.
- ELSE.
-  COMPUTE q12=group2(((1+(n(2)+1)/2))/2).
-  COMPUTE q32=group2(((3*(n(2)+1)/2)-1)/2). 
- END IF.
END IF.
PRINT {median1,q11,q31,min1,max1,(q31-q11),(max1-min1);median2,q12,q32,min2,max2,(q32-q12),(max2-min2)}
 /FORMAT='f8.1'
 /TITLE='Five point summaries & measures of spread'
 /CLABELS='Median' 'Q1' 'Q3' 'Min' 'Max' 'IQR' 'Range'
 /RLABELS='Group A' 'Group B'.
RELEASE median1,q11,q31,min1,max1,median2,q12,q32,min2,max2.
* Compute vector of all differences *.
COMPUTE n1n2=n(1)&*n(2).
COMPUTE diff=MAKE(1,n1n2,0).
LOOP i=1 TO n(1).
- LOOP j=1 TO n(2).
-  COMPUTE diff(n(2)*(i-1)+j)=group1(i)-group2(j).
- END LOOP.
END LOOP.
* Sort differences (algorithm by R Ristow & J Peck) *.
COMPUTE sdiff = diff.
COMPUTE sdiff(grade(diff)) = diff.
RELEASE diff,group1,group2.
* Compute median of differences *.
COMPUTE pair=(TRUNC(n1n2/2) EQ n1n2/2). /* Check if n1n2 is odd (0) or even (1) *. 
DO IF pair EQ 0.                        /* Median formula for odd samples. 
- COMPUTE median=sdiff((n1n2+1)/2). 
ELSE.                                   /* Median formula for even samples *.
- COMPUTE median=(sdiff(n1n2/2)+sdiff(1+(n1n2/2)))/2. 
END IF.
RELEASE pair.
PRINT median
 /TITLE='Difference between population medians * (A - B) '
 /RLABELS='Point'
 /FORMAT='f8.1'.
PRINT /TITLE='(*) Remember it is NOT the same as the difference between sample medians'.
* Exact or asymptotic 95% & 99% CI *.
DO IF ((n(1) LE 20) AND (n(2) LE 20)). /* Exact Mann-Whitney's critical values *.
- COMPUTE d95={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0;
               0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,2,2,2,2;
               0,0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8;
               0,0,0,0,1,2,3,4,4,5,6,7,8,9,10,11,11,12,13,13;
               0,0,0,1,2,3,5,6,7,8,9,11,12,13,14,15,17,18,19,20;
               0,0,1,2,3,5,6,8,10,11,13,14,16,17,19,21,22,24,25,27;
               0,0,1,3,5,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34;
               0,0,2,4,6,8,10,13,15,17,19,22,24,26,29,31,34,36,38,41;
               0,0,2,4,7,10,12,15,17,20,23,26,28,31,34,37,39,42,45,48;
               0,0,3,5,8,11,14,17,20,23,26,29,33,36,39,42,45,48,52,55;
               0,0,3,6,9,13,16,19,23,26,30,33,37,40,44,47,51,55,58,62;
               0,1,4,7,11,14,18,22,26,29,33,37,41,45,49,53,57,61,65,69;
               0,1,4,8,12,16,20,24,28,33,37,41,45,50,54,59,63,67,72,76;
               0,1,5,9,13,17,22,26,31,36,40,45,50,55,59,64,69,74,78,83;
               0,1,5,10,14,19,24,29,34,39,44,49,54,59,64,70,75,80,85,90;
               0,1,6,11,15,21,26,31,37,42,47,53,59,64,70,75,81,86,92,98;
               0,2,6,11,17,22,28,34,39,45,51,57,63,69,75,81,87,93,99,105;
               0,2,7,12,18,24,30,36,42,48,55,61,67,74,80,86,93,99,106,112;
               0,2,7,13,19,25,32,38,45,52,58,65,72,78,85,92,99,106,113,119;
               0,2,8,13,20,27,34,41,48,55,62,69,76,83,90,98,105,112,119,127}.
- COMPUTE d99={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0;
               0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0;
               0,0,0,0,0,0,0,0,0,0,0,1,1,1,2,2,2,2,3,3;
               0,0,0,0,0,0,0,1,1,2,2,3,3,4,5,5,6,6,7,8;
               0,0,0,0,0,1,1,2,3,4,5,6,7,7,8,9,10,11,12,13;
               0,0,0,0,1,2,3,4,5,6,7,9,10,11,12,13,15,16,17,18;
               0,0,0,0,1,3,4,6,7,9,10,12,13,15,16,18,19,21,22,24;
               0,0,0,1,2,4,6,7,9,11,13,15,17,18,20,22,24,26,28,30;
               0,0,0,1,3,5,7,9,11,13,16,18,20,22,24,27,29,31,33,36;
               0,0,0,2,4,6,9,11,13,16,18,21,24,26,29,31,34,37,39,42;
               0,0,0,2,5,7,10,13,16,18,21,24,27,30,33,36,39,42,45,48;
               0,0,1,3,6,9,12,15,18,21,24,27,31,34,37,41,44,47,51,54;
               0,0,1,3,7,10,13,17,20,24,27,31,34,38,42,45,49,53,57,60;
               0,0,1,4,7,11,15,18,22,26,30,34,38,42,46,50,54,58,63,67;
               0,0,2,5,8,12,16,20,24,29,33,37,42,46,51,55,60,64,69,73;
               0,0,2,5,9,13,18,22,27,31,36,41,45,50,55,60,65,70,74,79;
               0,0,2,6,10,15,19,24,29,34,39,44,49,54,60,65,70,75,81,86;
               0,0,2,6,11,16,21,26,31,37,42,47,53,58,64,70,75,81,87,92;
               0,0,3,7,12,17,22,28,33,39,45,51,57,63,69,74,81,87,93,99;
               0,0,3,8,13,18,24,30,36,42,48,54,60,67,73,79,86,92,99,105}.
- COMPUTE u95=d95(n(1),n(2)).
- COMPUTE u99=d99(n(1),n(2)).
- PRINT /TITLE='Exact confidence intervals calculated (N(a) and N(b) =< 20)'.
- DO IF u95 EQ 0.
-  PRINT /TITLE'Warning: sample size is too low for accurate 95%CI. Discard it.'.
-  COMPUTE u95=1. /* Replace 0 by 1 to avoid a computing error.
- END IF.
- DO IF u99 EQ 0.
-  PRINT /TITLE='Warning: sample size is too low for accurate 99%CI. Discard it.'.
-  COMPUTE u99=1. /* Replace 0 by 1 to avoid a computing error.
- END IF.
- RELEASE d95,d99.
ELSE. /* Asymptotic Mann-Whitney's critical values *.
- COMPUTE u95=TRUNC(n1n2/2-0.5-1.960*sqrt(n1n2*(n(1)+n(2)+1)/12)).
- COMPUTE u99=TRUNC(n1n2/2-0.5-2.576*sqrt(n1n2*(n(1)+n(2)+1)/12)).
- PRINT /TITLE'Asymptotic confidence intervals calculated (N(a) and/or N(b) > 20)'.
END IF.
COMPUTE low95=sdiff(u95).
COMPUTE high95=sdiff(n1n2-u95).
COMPUTE low99=sdiff(u99).
COMPUTE high99=sdiff(n1n2-u99).
PRINT {low95,high95;low99,high99}
 /FORMAT='f8.1'
 /TITLE='CI for difference between medians'
 /RLABELS='95%Level' '99%Level'
 /CLABELS='Lower' 'Upper'.
ELSE.
PRINT /TITLE='At least one sample size = 1. No CI can be calculated.'.
END IF.
END MATRIX.