1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
OBJECTIVE: Compute aggregate percentiles for one variable
           by one grouping variable.
               
ALGORITHM: Same as SPSS FREQUENCIES

AUTHOR:    Tom Dierickx (tom@data-for-all.com)

MODIFIED:  February 5, 2003.

Notes:
1) If you want them across the entire dataset, simply do a 
COMPUTE dumvar = 1. and save your sav file before you run this 
and use dumvar as your "grouping" variable.

2) If you want percentiles by multiple grouping variables, you could
concatenate them together into a single grouping variable first (again.,
saving your sav file before you run this); that is, for this example you
could find aggregate percentiles by accel, year, and origin combinations:

STRING cubeID (A9). 
COMPUTE cubeID = CONCAT(STRING(accel,F4.0),"-",STRING(year,F2.0),"-",STRING(origin,F1.0)). 


***************************************************************************.
*** JUST ALTER THESE VALUES AS NEEDED AND GO ******************************.
***************************************************************************.

DEFINE @Path()    "C:\\Program Files\\SPSS\\"   !ENDDEFINE.
DEFINE @SavFile() "Cars.sav"                 !ENDDEFINE.
DEFINE @VarX()    mpg                        !ENDDEFINE.
DEFINE @VarXQ()   "mpg"                      !ENDDEFINE.
DEFINE @GrpBy()   cylinder                   !ENDDEFINE.

***************************************************************************.



*** STEP 1: IMPORT SAV FILE OF INTEREST AND SORT CASES ***.

GET FILE= @Path + @SavFile.
SORT CASES @GrpBy(A) @VarX(A).



*** STEP 2: SAVE OUT DATA INTO A TEMPORARY FILE ****.
*** KEEP ONLY GROUPING ID AND NON-MISSING VALUES ***.

SELECT IF NOT(SYSMIS(@VarX)) AND NOT(MISSING(@VarX)).
SAVE OUTFILE = "C:\\Temp\\" + @VarXQ + ".sav" 
  /rename= (@GrpBy @VarX = grp_id value) 
  /keep= grp_id value.
GET FILE = "C:\\Temp\\" + @VarXQ + ".sav". 



*** STEP 3: NUMBER RECORDS WITHIN EACH GROUPING ***.

IF (
   ($CASENUM=1) OR 
   (SYSMIS(LAG(grp_id)) AND NOT(SYSMIS(grp_id))) OR 
   (grp_id <> LAG(grp_id))
) recno=1. 
IF (MISSING(recno)) recno=LAG(recno) + 1. 
EXECUTE.



*** STEP 4: SAVE OUT ORDERED DATA ***.

SAVE OUTFILE= "C:\\Temp\\" + @VarXQ + ".sav".
GET FILE= "C:\\Temp\\" + @VarXQ + ".sav".



*** STEP 5: FIND COUNTS FOR EACH GROUPING ***.

AGGREGATE
  /OUTFILE= *
  /BREAK= grp_id
  /N= N.



*** STEP 6: ATTACH COUNTS TO ORDERED DATA ***.  

MATCH FILES
  /FILE= "C:\\Temp\\" + @VarXQ + ".sav"
  /TABLE= *
  /BY= grp_id.
EXECUTE.



*** STEP 7: COMPUTE PERCENTILES USING "p(n+1) AND INTERPOLATE" METHOD *****.
*** SEE http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm ***.

DO REPEAT 
   p=      .10  .25  .50  .75  .90
  /OBS=   i_10 i_25 i_50 i_75 i_90
  /k=     k_10 k_25 k_50 k_75 k_90
  /r=     r_10 r_25 r_50 r_75 r_90
  /ptile= p_10 p_25 p_50 p_75 p_90. 
  COMPUTE OBS = p*(N+1).
  COMPUTE k = TRUNC(OBS).
  COMPUTE r = OBS - k.
  
  DO IF ((OBS <= 1) AND (recno = 1)).
    COMPUTE ptile = value.
  ELSE IF ((OBS >= N) AND (recno = N)).
    COMPUTE ptile = value.
  ELSE IF (lag(recno) = k).
    COMPUTE ptile = (1-r)*lag(value) + r*(value).
  END IF.
  
END REPEAT . 
EXECUTE.



*** STEP 7: "SQUEEZE DATA", RETAINING ONLY SUMMARIZED CALCULATIONS ***. 

AGGREGATE
  /OUTFILE= *
  /BREAK= grp_id
  /n    = N
  /mean = MEAN(value)
  /min  = MIN(value)
  /p_10 = MAX(p_10)
  /p_25 = MAX(p_25)
  /p_50 = MAX(p_50)
  /p_75 = MAX(p_75)
  /p_90 = MAX(p_90)
  /max  = MAX(value).



*** STEP 9: APPLY SOME GENERIC LABELS ***.

VARIABLE LABELS n     "Count: "           + @varXQ.
VARIABLE LABELS mean  "Mean: "            + @varXQ.
VARIABLE LABELS min   "Minimum: "         + @varXQ.
VARIABLE LABELS p_10  "10th percentile: " + @varXQ.
VARIABLE LABELS p_25  "25th percentile: " + @varXQ.
VARIABLE LABELS p_50  "Median: "          + @varXQ.
VARIABLE LABELS p_75  "75th percentile: " + @varXQ.
VARIABLE LABELS p_90  "90th percentile: " + @varXQ.
VARIABLE LABELS max   "Maximum: "         + @varXQ.
  


*** STEP 10: WE'RE DONE! ***

SAVE OUTFILE= "C:\\Temp\\" + @VarXQ + ".sav".