1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
*In 1997, David Nichols at SPSS wrote syntax for kappa, which included the standard error, z-value, and p(sig.) value.
*This syntax is based on his, first using his syntax for the original four statistics.  The original syntax was expanded
*after having reviewed a paper presented at the annual meeting of the Southwest Educational Research Association
*by Jason E. King at Baylor College of Medicine (Software Solutions for Obtaining a Kappa-Type Statistic for Use with
*Multiple Raters).  The syntax here produces four sections of information.  The first section produces the raw rater 
*agreement (Pa), the number of items, number of raters, and number of categories.  The second section calculates
*k, standard error, z, and p based on the original syntax and adds the upper and lower 95% confidence limits.  The
*third section presents Fliess' corrected standard error together with recalculated z, p, and confidence limits.  The last
*section produces k, standard error, z, p, and confidence intervals for the individual categories.  Raw data is set up
*with individual cases or items as rows and raters as columns.  Entries in individual cells represent the category
*assigned to a particular case or item by a specific rater.  At the end of the macro is the command line which identifies
*the macro. I requires that the raters be identified in the same manner as line 1.This macro has been tested with 20
*raters, 20 categories, and 2000 cases.  If more are used, there may be a need to adjust the mxloops command.
*The syntax is configured to produce -99999.0 as a missing data code in the report in cases where a coding category is
*not utilized.

* Brian G. Dates 2006/02/23.

data list list /rater1 rater2 rater3 .
begin data
1 1 1
2 1 2
2 2 2
2 1 1
2 1 2
2 1 2
2 2 1
2 1 2
2 1 2
2 1 1
2 1 3
4 4 3
4 6 4
5 5 5
4 4 4
6 6 6
4 4 3
2 1 6
2 2 3
1 3 1
end data .
preserve.
set printback=off mprint=off.
save outfile='k__tmp1.sav'.

define cohensk (vars=!charend('/')).
set mxloops=100000.
count ms__=!vars (missing).
select if ms__=0.

*This section sets up a matrix(x) based on the raw data file, a matrix(y) with rows equal to the number of items and
*columns equal to the number of categories, then determines for y the number of responses per category for each
*case or item. 

matrix.
get x /var=!vars.
compute c=mmax(x).
compute y=make(nrow(x),c,0).
loop i=1 to nrow(x).
loop j=1 to ncol(x).
loop k=1 to c.
do if x(i,j)=k.
compute y(i,k)=y(i,k)+1.
end if.
end loop.
end loop.
end loop.

*This section computes the basic information and kappa and its related statistics.

compute pe=msum((csum(y)/msum(y))&**2).
compute k=ncol(x).
compute n=nrow(y) .
compute r=ncol(y) .
compute pa=mssq(y)/(nrow(y)*k*(k-1))-(1/(k-1)).
compute ck=(pa-pe)/(1-pe).
compute num=2*(pe-(2*k-3)*(pe**2)+2*(k-2)*msum((csum(y)/msum(y))&**3)).
compute den=nrow(y)*k*(k-1)*((1-pe)**2).
compute ase=sqrt(num/den).
compute z=ck/ase.
compute sig=1-chicdf(z**2,1) .
compute ckul=ck+1.96*ase .
compute ckll=ck-1.96*ase .

*This section computes the alternate standard error and related statistics based on Fliess' corrected formula.

compute nm=sqrt(n*k*(k-1)) .
compute vectora=csum(y)/msum(csum(y)) .
compute vectorb=1-csum(y)/msum(csum(y)) .
compute vectorc=1-2*(csum(y)/msum(csum(y))) .
compute vectord=vectora&*vectorb.
compute vectore=vectora&*vectorb&*vectorc .
compute e=msum(vectord) .
compute f =msum(vectore) .
compute fse=(sqrt(2)/(e*nm))*(sqrt(e**2-f)) .
compute fsez=ck/fse .
compute fsesig=1-chicdf(fsez**2,1) .
compute fseu=ck+1.96*fse .
compute fsel=ck-1.96*fse .

* This section computes the kappas for the individual categories.  Each statistic, e.g., k or standard error, is computed
* as a vector.  The vectors are then assembled into a matrix of all six statistics.  As part of this process, -99999.0 is 
* imputed as the missing data value.

compute matz=k-y .
compute mata=y&*matz .
compute vectorf=csum(mata)+(.0001) .
compute vectorg=vectord*(n*k*(k-1))+(.0001) .
compute vectorh=1-(vectorf&/vectorg) .
compute vectori=(1+(2*(k-1)*(csum(y)/msum(csum(y)))))&**2 .
compute vectorj=(2*(k-1))*vectord .
compute vectork=(n*k*(k-1)**2)*vectord+(.0001) .
compute vectorse=sqrt((vectori+vectorj)&/vectork) .
compute vectorz=vectorh&/(vectorse+.0001).
compute vectorp=1-cdfnorm(vectorz) .
compute vectorll=vectorh-(1.96*vectorse) .
compute vectorul=vectorh+(1.96*vectorse) .
loop i=1 to ncol(vectorh) .
do if (vectorh(i)=0.00) .
compute vectorh(i)=-99999) .
end if .
end loop .
loop i=1 to ncol(vectorh) .
do if (vectorh(i)=-99999) .
compute vectorse(i)=-99999 .
end if .
end loop .
loop i=1 to ncol(vectorh) .
do if (vectorh(i)=-99999) .
compute vectorz(i)=-99999 .
end if .
end loop .
loop i=1 to ncol(vectorh) .
do if (vectorh(i)=-99999) .
compute vectorp(i)=-99999 .
end if .
end loop .
loop i=1 to ncol(vectorh) .
do if (vectorh(i)=-99999) .
compute vectorul(i)=-99999.
end if .
end loop .
loop i=1 to ncol(vectorh) .
do if (vectorh(i)=-99999) .
compute vectorll(i)=-99999 .
end if .
end loop .
compute ikstat={vectorh;vectorse;vectorz;vectorp;vectorll;vectorul} .
save ikstat /outfile='ikstat1.sav' .

*This section saves the data and prepares the reporting formats.

save {k,n,r,pa,ck,ase,z,sig,ckll,ckul,fse,fsez,fsesig,fseu,fsel} /outfile='k__tmp2.sav'
     /variables=k,n,r,pa,ck,ase,z,sig,ckll,ckul,fse,fsez,fsesig,fseu,fsel .
end matrix .
get file='k__tmp2.sav'.
formats k (f8.0) /n (f8.0) /r (f8.0) /pa (f8.4) /ck (f8.4) /ase (f8.4) /z (f8.4) /sig (f8.4) /ckul (f8.4) /ckll (f8.4)
 /fse(f8.4) /fsez (f8.4) /fsesig (f8.4) /fseu (f8.4) /fsel (f8.4) .
variable labels k 'Number of Raters' /n 'Number of Items' /r 'Number of Categories' /pa 'Percent of Rater Agreement'
 /ck 'Kappa' /ase 'Standard Error' /z 'z'/sig 'p' /ckul 'Upper 95% Confidence Limit' /ckll 'Lower 95% Confidence Limit'
 /fse 'Fleiss SE' /fsez 'z' /fsesig 'p' /fseu 'Upper 95% Confidence Limit' /fsel 'Lower 95% Confidence Limit' .
report format=list automatic align(center)
  /variables=k,n,r,pa
  /title "Basic Information" .
report format=list automatic align(center)
  /variables=ck ase z sig ckll ckul
  /title "Cohens Kappa".
report format=list automatic align(center)
  /variables=fse fsez fsesig fsel fseu
  /title "Cohens Kappa -- Fleiss Adjusted Standard Error" .
get file='ikstat1.sav' .
flip .
delete variable case_lbl .
compute n1=$casenum .
formats n1 (f8.0) /var001 (f8.4) /var002 (f8.4)  /var003 (f8.4)  /var004 (f8.4)  /var005 (f8.4)  /var006 (f8.4)  .
variable labels n1 'Coding Category' /var001 'k' /var002 'Standard Error' /var003 'z' /var004 'p'
  /var005 'Lower 95% Confidence Limit' /var006 'Upper 95% Confidence Limit' .
save outfile='ikstat.sav' .
report format=list automatic align(center)
  /variables=n1 var001 var002 var003 var004 var005 var006
  /Title "Individual Category Statistics" .
!enddefine.
restore.
COHENSK VARS = rater1 to rater3 .