Hodges-Lehmann Confidence Interval for Median difference
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | * CI for the difference between two medians by Hodges-Lehmann method: Hodges JR and Lehmann EL (1963) 'Estimates of location based on rank tests' Annals of Mathematical Statistics 34, 598-611. * Improved sorting algorithm added 03/16/05 *. * Although the example provided has equal sample sizes, it will work with unequal sample sizes, too. * Code by Marta Garcia-Granero. * Example dataset *. DATA LIST FREE /group(F4.0) wghtgain(F8.0). BEGIN DATA 1 175 1 149 1 132 1 187 1 218 1 123 1 151 1 248 1 200 1 206 1 219 1 179 1 234 1 206 2 142 2 214 2 311 2 249 2 337 2 176 2 262 2 211 2 302 2 216 2 195 2 236 2 253 2 199 END DATA. VAR LABEL group 'Treatment group' /wghtgain 'Weight gain (pounds)'. VALUE LABELS group 1 'Control' 2 'Vit. A'. ************************************************ * CI CODE * ************************************************ * If n1*n2>10000, change MXLOOPS accordingly. * Data must be presorted by group and depvar. * A temporary listwise deletion of missing data is done before running MATRIX *. SET MXLOOPS=10000. SORT CASES BY group (A) wghtgain (A) . TEMPORARY. SELECT IF ((NOT MISSING(group)) AND (NOT MISSING(wghtgain))). MATRIX. PRINT /TITLE 'HODGES-LEHMANN CONFIDENCE INTERVAL FOR THE DIFFERENCE BETWEEN TWO MEDIANS'. PRINT /TITLE 'Underlying assumption: the distributions are similar in shape'. * Get data: replace variable names by your own *. GET group /VAR=group. GET depvar /VAR=wghtgain. * Get group values, count sample sizes & split data in two groups *. COMPUTE totaln=NROW(group). COMPUTE ngrp1=group(1). COMPUTE ngrp2=group(totaln). PRINT {ngrp1;ngrp2} /TITLE='Group values' /RLABELS='Group A=' 'Group B=' /FORMAT='f5.0'. COMPUTE n=CSUM(DESIGN(group)). PRINT {T(n)} /FORMAT='f5.0' /TITLE='Sample sizes' /RLABELS=' N(a)=' ' N(b)='. DO IF RMIN(n) GT 1. /* Don't compute CI if any ni=1. COMPUTE group1=MAKE(1,n(1),0). COMPUTE group2=MAKE(1,n(2),0). LOOP i=1 TO n(1). + COMPUTE group1(i)=depvar(i). END LOOP. LOOP i=1+n(1) TO totaln. + COMPUTE group2(i-n(1))=depvar(i). END LOOP. RELEASE ngrp1,ngrp2,group,depvar,totaln. * Five points summary for both samples *. COMPUTE min1=RMIN(group1). COMPUTE max1=RMAX(group1). COMPUTE pair=(TRUNC(n(1)/2) EQ n(1)/2). /* Check if n(1) is odd (0) or even (1) *. DO IF pair EQ 1. /* Tukey's hinges for even samples *. - COMPUTE lmedian=group1(n(1)/2). - COMPUTE umedian=group1(1+n(1)/2). - COMPUTE median1=(lmedian+umedian)/2. - RELEASE lmedian,umedian. - DO IF TRUNC(n(1)/4) NE (n(1)/4)). - COMPUTE q11=group1(1+(n(1)/2)/2). - COMPUTE q31=group1(1+(3*n(1)/2)/2). - ELSE. - COMPUTE lq11=group1(n(1)/4). - COMPUTE uq11=group1(1+n(1)/4). - COMPUTE lq31=group1(3*n(1)/4). - COMPUTE uq31=group1(1+3*n(1)/4). - COMPUTE q11=(lq11+uq11)/2. - COMPUTE q31=(lq31+uq31)/2. - RELEASE lq11,uq11,lq31,uq31. - END IF. ELSE. /* Tukey's hinges for odd samples *. - COMPUTE median1=group1((n(1)+1)/2). - DO IF TRUNC((1+n(1))/4) EQ (1+n(1))/4). - COMPUTE lq11=group1((n(1)+1)/4). - COMPUTE uq11=group1(1+(n(1)+1)/4). - COMPUTE lq31=group1(3*(n(1)+1)/4-1). - COMPUTE uq31=group1(3*(n(1)+1)/4). - COMPUTE q11=(lq11+uq11)/2. - COMPUTE q31=(lq31+uq31)/2. - RELEASE lq11,uq11,lq31,uq31. - ELSE. - COMPUTE q11=group1(((1+(n(1)+1)/2))/2). - COMPUTE q31=group1(((3*(n(1)+1)/2)-1)/2). - END IF. END IF. COMPUTE min2=RMIN(group2). COMPUTE max2=RMAX(group2). COMPUTE pair=(TRUNC(n(2)/2) EQ n(2)/2). /* Check if n(2) is odd (0) or even (1) *. DO IF pair EQ 1. /* Tukey's hinges for even samples *. - COMPUTE lmedian=group2(n(2)/2). - COMPUTE umedian=group2(1+n(2)/2). - COMPUTE median2=(lmedian+umedian)/2. - RELEASE lmedian,umedian. - DO IF TRUNC(n(2)/4) NE (n(2)/4)). - COMPUTE q12=group2(1+(n(2)/2)/2). - COMPUTE q32=group2(1+(3*n(2)/2)/2). - ELSE. - COMPUTE lq12=group2(n(2)/4). - COMPUTE uq12=group2(1+n(2)/4). - COMPUTE lq32=group2(3*n(2)/4). - COMPUTE uq32=group2(1+3*n(2)/4). - COMPUTE q12=(lq12+uq12)/2. - COMPUTE q32=(lq32+uq32)/2. - RELEASE lq12,uq12,lq32,uq32. - END IF. ELSE. /* Tukey's hinges for odd samples *. - COMPUTE median2=group2((n(2)+1)/2). - DO IF TRUNC((1+n(2))/4) EQ (1+n(2))/4). - COMPUTE lq12=group2((n(2)+1)/4). - COMPUTE uq12=group2(1+(n(2)+1)/4). - COMPUTE lq32=group2(3*(n(2)+1)/4-1). - COMPUTE uq32=group2(3*(n(2)+1)/4). - COMPUTE q12=(lq12+uq12)/2. - COMPUTE q32=(lq32+uq32)/2. - RELEASE lq12,uq12,lq32,uq32. - ELSE. - COMPUTE q12=group2(((1+(n(2)+1)/2))/2). - COMPUTE q32=group2(((3*(n(2)+1)/2)-1)/2). - END IF. END IF. PRINT {median1,q11,q31,min1,max1,(q31-q11),(max1-min1);median2,q12,q32,min2,max2,(q32-q12),(max2-min2)} /FORMAT='f8.1' /TITLE='Five point summaries & measures of spread' /CLABELS='Median' 'Q1' 'Q3' 'Min' 'Max' 'IQR' 'Range' /RLABELS='Group A' 'Group B'. RELEASE median1,q11,q31,min1,max1,median2,q12,q32,min2,max2. * Compute vector of all differences *. COMPUTE n1n2=n(1)&*n(2). COMPUTE diff=MAKE(1,n1n2,0). LOOP i=1 TO n(1). - LOOP j=1 TO n(2). - COMPUTE diff(n(2)*(i-1)+j)=group1(i)-group2(j). - END LOOP. END LOOP. * Sort differences (algorithm by R Ristow & J Peck) *. COMPUTE sdiff = diff. COMPUTE sdiff(grade(diff)) = diff. RELEASE diff,group1,group2. * Compute median of differences *. COMPUTE pair=(TRUNC(n1n2/2) EQ n1n2/2). /* Check if n1n2 is odd (0) or even (1) *. DO IF pair EQ 0. /* Median formula for odd samples. - COMPUTE median=sdiff((n1n2+1)/2). ELSE. /* Median formula for even samples *. - COMPUTE median=(sdiff(n1n2/2)+sdiff(1+(n1n2/2)))/2. END IF. RELEASE pair. PRINT median /TITLE='Difference between population medians * (A - B) ' /RLABELS='Point' /FORMAT='f8.1'. PRINT /TITLE='(*) Remember it is NOT the same as the difference between sample medians'. * Exact or asymptotic 95% & 99% CI *. DO IF ((n(1) LE 20) AND (n(2) LE 20)). /* Exact Mann-Whitney's critical values *. - COMPUTE d95={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0; 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,2,2,2,2; 0,0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8; 0,0,0,0,1,2,3,4,4,5,6,7,8,9,10,11,11,12,13,13; 0,0,0,1,2,3,5,6,7,8,9,11,12,13,14,15,17,18,19,20; 0,0,1,2,3,5,6,8,10,11,13,14,16,17,19,21,22,24,25,27; 0,0,1,3,5,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34; 0,0,2,4,6,8,10,13,15,17,19,22,24,26,29,31,34,36,38,41; 0,0,2,4,7,10,12,15,17,20,23,26,28,31,34,37,39,42,45,48; 0,0,3,5,8,11,14,17,20,23,26,29,33,36,39,42,45,48,52,55; 0,0,3,6,9,13,16,19,23,26,30,33,37,40,44,47,51,55,58,62; 0,1,4,7,11,14,18,22,26,29,33,37,41,45,49,53,57,61,65,69; 0,1,4,8,12,16,20,24,28,33,37,41,45,50,54,59,63,67,72,76; 0,1,5,9,13,17,22,26,31,36,40,45,50,55,59,64,69,74,78,83; 0,1,5,10,14,19,24,29,34,39,44,49,54,59,64,70,75,80,85,90; 0,1,6,11,15,21,26,31,37,42,47,53,59,64,70,75,81,86,92,98; 0,2,6,11,17,22,28,34,39,45,51,57,63,69,75,81,87,93,99,105; 0,2,7,12,18,24,30,36,42,48,55,61,67,74,80,86,93,99,106,112; 0,2,7,13,19,25,32,38,45,52,58,65,72,78,85,92,99,106,113,119; 0,2,8,13,20,27,34,41,48,55,62,69,76,83,90,98,105,112,119,127}. - COMPUTE d99={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0; 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0; 0,0,0,0,0,0,0,0,0,0,0,1,1,1,2,2,2,2,3,3; 0,0,0,0,0,0,0,1,1,2,2,3,3,4,5,5,6,6,7,8; 0,0,0,0,0,1,1,2,3,4,5,6,7,7,8,9,10,11,12,13; 0,0,0,0,1,2,3,4,5,6,7,9,10,11,12,13,15,16,17,18; 0,0,0,0,1,3,4,6,7,9,10,12,13,15,16,18,19,21,22,24; 0,0,0,1,2,4,6,7,9,11,13,15,17,18,20,22,24,26,28,30; 0,0,0,1,3,5,7,9,11,13,16,18,20,22,24,27,29,31,33,36; 0,0,0,2,4,6,9,11,13,16,18,21,24,26,29,31,34,37,39,42; 0,0,0,2,5,7,10,13,16,18,21,24,27,30,33,36,39,42,45,48; 0,0,1,3,6,9,12,15,18,21,24,27,31,34,37,41,44,47,51,54; 0,0,1,3,7,10,13,17,20,24,27,31,34,38,42,45,49,53,57,60; 0,0,1,4,7,11,15,18,22,26,30,34,38,42,46,50,54,58,63,67; 0,0,2,5,8,12,16,20,24,29,33,37,42,46,51,55,60,64,69,73; 0,0,2,5,9,13,18,22,27,31,36,41,45,50,55,60,65,70,74,79; 0,0,2,6,10,15,19,24,29,34,39,44,49,54,60,65,70,75,81,86; 0,0,2,6,11,16,21,26,31,37,42,47,53,58,64,70,75,81,87,92; 0,0,3,7,12,17,22,28,33,39,45,51,57,63,69,74,81,87,93,99; 0,0,3,8,13,18,24,30,36,42,48,54,60,67,73,79,86,92,99,105}. - COMPUTE u95=d95(n(1),n(2)). - COMPUTE u99=d99(n(1),n(2)). - PRINT /TITLE='Exact confidence intervals calculated (N(a) and N(b) =< 20)'. - DO IF u95 EQ 0. - PRINT /TITLE'Warning: sample size is too low for accurate 95%CI. Discard it.'. - COMPUTE u95=1. /* Replace 0 by 1 to avoid a computing error. - END IF. - DO IF u99 EQ 0. - PRINT /TITLE='Warning: sample size is too low for accurate 99%CI. Discard it.'. - COMPUTE u99=1. /* Replace 0 by 1 to avoid a computing error. - END IF. - RELEASE d95,d99. ELSE. /* Asymptotic Mann-Whitney's critical values *. - COMPUTE u95=TRUNC(n1n2/2-0.5-1.960*sqrt(n1n2*(n(1)+n(2)+1)/12)). - COMPUTE u99=TRUNC(n1n2/2-0.5-2.576*sqrt(n1n2*(n(1)+n(2)+1)/12)). - PRINT /TITLE'Asymptotic confidence intervals calculated (N(a) and/or N(b) > 20)'. END IF. COMPUTE low95=sdiff(u95). COMPUTE high95=sdiff(n1n2-u95). COMPUTE low99=sdiff(u99). COMPUTE high99=sdiff(n1n2-u99). PRINT {low95,high95;low99,high99} /FORMAT='f8.1' /TITLE='CI for difference between medians' /RLABELS='95%Level' '99%Level' /CLABELS='Lower' 'Upper'. ELSE. PRINT /TITLE='At least one sample size = 1. No CI can be calculated.'. END IF. END MATRIX. |
Related pages
...