'SPSS to STATA conversion and data documentation utility version 0.0. Alasdair Crockett, UK Data Archive, University of Essex, 2004-5 ' For a description of what this script does read the accompanying documentation ' which is available at #URL here# 'The software has not been developed or tested to a commercial standard and therefore is made available 'strictly On the basis that you accept it as is, and that you are solely responsible for any use made of it. 'The University of Essex does give any warranties, including without limitation, as to the accuracy of the 'software and disclaims any liability to you or any third party anywhere in the world for any injury, damage, 'direct or indirect loss, consequential or economic loss or any other loss suffered as a result of the use of 'or reliance upon the software to the maximum extent permitted by law. Sub Main Dim strPath As String, statapath As String, strFileMask As String, strDataOutputDir As String, posn As Integer, fName As String, FileName As String, strFname As String, warningdir As String 'get user input variables and make directories strPath = InputBox$("Enter path to directory containing SPSS .sav files, e.g. C:\myarea\data") stataexepath = InputBox$("Enter path to wstata.exe, e.g. C:\Program files\stata, or if network version, e.g. Z:") strPath=strPath & "\" strFileMask ="*.sav" strDataOutputDir=strPath & "tab\" On Error GoTo direrror MkDir strDataOutputDir statapath= strPath & "stata\" MkDir statapath warningdir=strPath & "data_documentation\" MkDir warningdir 'write introductory info to SPSS_to_Stata conversion warning file Open warningdir & "SPSS_to_STATA_conversion.rtf" For Output As #3 Print #3, "{\rtf1\ansi\deff0\deftab1700{\fonttbl{\f0\fswiss MS Sans Serif;}{\f1\froman\fcharset2 Symbol;}{\f2\fswiss Arial;}{\f3\fswiss\fprq2 Arial;}{\f4\fmodern\fprq5 Courier New;}{\f5\fswiss Arial;}}" Print #3, "{\colortbl;\red0\green0\blue0;\red255\green0\blue0;\red0\green0\blue255;\red0\green0\blue255;\red10\green10\blue160;}" Print #3, "\deflang2057\pard\plain\f2\fs16\cf1" Print #3, "\par{\fs28\b\ul UK Data Archive SPSS to STATA Conversion Information File" & "}{\fs24\b" & Chr(13) & Chr(10) & Chr(13) & Chr(10) Print #3, "\par" & Chr(13) & Chr(10) Print #3, "\par" & Chr(13) & Chr(10) Print #3, "\par }{\f2\cf2\fs20 This is a log of actual or potential sources of data and metadata (label) loss upon conversion from SPSS to STATA. Though rare, such losses are unavoidable given the differential data handling limits of the two packages." & Chr(13) & Chr(10) Print #3, "\par }{" & Chr(13) & Chr(10) Print #3, "\par }{\f2\cf1\b\ul\fs20 The losses/potential losses reported are:" & Chr(13) & Chr(10) Print #3, "\par }{" Print #3, "\par \f2\cf1\fs20\ul Truncation and potential truncation of data and/or labels Print #3, "\par }{" & Chr(13) & Chr(10) Print #3, "\par \f2\cf1\fs20 1. String variables defined with a width of > 80 characters (the intercooled STATA limit) or 244 characters (the special edition limit) in the SPSS file" & Chr(13) & Chr(10) Print #3, "\par 2. Variable labels that are > 80 characters (the STATA limit) in the SPSS file" & Chr(13) & Chr(10) Print #3, "\par 3. Value labels that are > 32 characters (the STATA limit) in the SPSS file" & Chr(13) & Chr(10) Print #3, "\par" & Chr(13) & Chr(10) Print #3, "\par \f2\cf1\fs20\ul Outright loss of value labels" & Chr(13) & Chr(10) Print #3, "\par }{" & Chr(13) & Chr(10) Print #3, "\par \f2\cf1\fs20 4. String variables that have value labels in the SPSS file" & Chr(13) & Chr(10) Print #3, "\par 5. Non-integer values that have value labels in the SPSS file" & Chr(13) & Chr(10) Print #3, "\par " & Chr(13) & Chr(10) Print #3, "\par" & Chr(13) & Chr(10) Print #3, "\par If any warnings are logged for any of the data files below please refer to the SPSS data dictionary, supplied as _UKDA_Data_Dictionary.rtf, which shows the metadata as it was in SPSS." & Chr(13) & Chr(10) Print #3, "\par " & Chr(13) & Chr(10) Print #3, "\par Note: all SPSS discrete user missing values have been translated to STATA missing values (.a,.b and .c). The originating SPSS value has been appended to the start of the value label in the STATA data file." & Chr(13) & Chr(10) Print #3, "\par " & Chr(13) & Chr(10) Print #3, "\par }{\f2\cf1\b\ul\fs20 Start of conversion warning log:" & Chr(13) & Chr(10) 'now call main subroutine to do everything else Call conversion (strPath, strFileMask, strDataOutputDir, posn, fName, FileName, strFname, statapath, stataexepath, warningdir) 'now save rtf SPSS_to_Stata conversion log file Print #3, "\par }}" & Chr(13) & Chr(10) Close #3 'Produce popup box to indicate script has finished. Note dialog routine doesn't work if using SPSS ver 11.0 '3 beeps To indicate script has finished - will not sound if you don't have admin rights to machine Shell "c:\winnt\system32\command.com /c echo " Wait 1 Shell "c:\winnt\system32\command.com /c echo " Wait 1 Shell "c:\winnt\system32\command.com /c echo " 'dialog box to also indicate script has finished Begin Dialog UserDialog 200,120 Text 10,10,180,15,"Script has finished running" TextBox 10,25,180,20,.Text$ OKButton 80,90,40,20 End Dialog Dim dlg As UserDialog dlg.Text$ = "about time too!" Dialog dlg DirError: 'error trapping for if a directory already exists Debug.Print "directory already exists so skipping creation" Resume Next End Sub Sub conversion (strPath As String, strFileMask As String, strDataOutputDir As String, posn As Integer, fName As String, FileName As String, strFname As String, statapath As String, stataexepath As String, warningdir As String) 'export data to tab-delimited file and remove spaces to create system missing values in Stata Dim size As Variant, textqual As String, DateVar As Integer, Varformat As Long, VarType As Variant, VarWidth As Variant, VarFracts As Variant, objDataDoc As ISpssDataDoc, objDocuments As ISpssDocuments, count As Integer, MissingValues As Variant, MissingCounts As Variant, Missing As Long, flag As Integer, count2 As Integer, test As Variant, numvars As Variant, numvars2 As Variant, NumCases As Variant, VarName As Variant, VarName2 As Variant, VarLabel As Variant, ValName As Variant strFname = Dir$(strPath & strFileMask) While strFname <> "" posn = 0 fName = Right(strFname, Len(strFname) - posn) 'get filename without extension posn = InStr(fName, ".") If posn <> 0 Then fName = Left(fName, posn - 1) End If FileName = fName strCmd = "get file='" & strPath & strFname & "'." & vbCr objSpssApp.ExecuteCommands strCmd , True size=FileLen(strPath & strFname) ' size used to determine amount of memory to give stata size=100+Int(1.2*(size/1024)) 'create tab-delimited format data file from .sav file strCmd = "save translate outfile='" & strDataOutputDir & FileName & ".tab'" & "/type=tab /map /fieldnames /replace." & vbCr objSpssApp.ExecuteCommands strCmd , True 'get conversion log ready for data file Print #3, "\par }{\f2\cf5\fs20" & Chr(13) & Chr(10) Print #3, "\par Warnings for " & "\b " & FileName & Chr(13) & Chr(10) Print #3, "\par }{" & Chr(13) & Chr(10) Print #3, "\par }{\f2\cf1 i) Truncation and potential truncation of data and/or labels" & Chr(13) & Chr(10) Print #3, "\par }{" & Chr(13) & Chr(10) 'Create the .do file textqual=""" Open statapath & FileName & ".do" For Output As #2 quote=Chr(34) Print #2, "insheet using " & quote & strDataOutputDir & FileName & ".tab" & quote & ", clear double" Print #2, "destring, replace" Print #2, "compress" 'define additional variables needed Set objDocuments = objSpssApp.Documents Set objDataDoc = objDocuments.GetDataDoc (0) Set objSPSSInfo = objSpssApp.SpssInfo numvars=objSPSSInfo.NumVariables-1 flag=0 Missing = objDataDoc.GetVariableMissingValues(MissingCounts, MissingValues) 'get data dictionary file ready NumCases = objDataDoc.GetNumberOfCases numvars2=objSPSSInfo.NumVariables Open warningdir & FileName & "_Data_Dictionary.rtf" For Output As #1 Print #1, "{\rtf1\ansi\deff0\deftab1200{\fonttbl{\f0\fswiss MS Sans Serif;}{\f1\froman\fcharset2 Symbol;}{\f2\fswiss Arial;}{\f3\fswiss\fprq2 Arial;}{\f4\fmodern\fprq5 Courier New;}{\f5\fswiss Arial;}}{\colortbl;\red0\green0\blue0;\red255\green0\blue0;\red100\green100\blue100;\red0\green0\blue255;\red10\green10\blue160;}\deflang2057\pard\plain\f2\fs20\cf1" Print #1, "\par {\fs28\b\ul UK Data Archive Data Dictionary" Print #1, "\par" Print #1, "\par }{\b\f2\fs20\cf1\ File-level information:" Print #1, "\par" Print #1, "\par }{\f2\fs20\cf1 File Name = " & Chr (9) & Chr (9) & "\f2\fs20\cf5" & FileName Print #1, "\par }{\f2\fs20\cf1 Number of variables = " & Chr (9) & "\f2\fs20\cf5 " & numvars2 Print #1, "\par }{\f2\fs20\cf1 Number of cases = " & Chr (9) & "\f2\fs20\cf5 " & NumCases Print #1, "\par" Print #1, "\par" Print #1, "\par }{\f2\fs20\cf1\b Variable-level information:" 'variable level loop to add variable labels and recreate date formats For I=0 To numvars count=0 VarTy= objSPSSInfo.VarType(I) VarLength=objSPSSInfo.VarLength(I) VarName= objSPSSInfo.VariableAt(I) VarName2=LCase(VarName) VarLabel= objSPSSInfo.VariableLabelAt(I) VarLabel=Replace (VarLabel,textqual,"'") Varformat = objDataDoc.GetVariableFormats(VarType, VarWidth, VarFract) Print #2, "label variable " & VarName2 & " " & textqual & VarLabel & textqual 'redefine date/time variables DateVar=0 If VarType (I) = 20 Or VarType (I) = 22 Or VarType (I) = 23 Or VarType (I) = 24 Or VarType (I) = 28 Or VarType (I) = 38 Or VarType (I) = 39 Then DateVar=1 If DateVar=1 Then Print #2, "generate tempvar = date(" & VarName2 & ", " & textqual & "mdy" & textqual & ")" If DateVar=1 Then Print #2, "destring " & VarName2 & ", replace force" If DateVar=1 Then Print #2, "replace " & VarName2 & "=tempvar" If DateVar=1 Then Print #2, "format " & VarName2 & " %d" If DateVar=1 Then Print #2, "drop tempvar" 'redefine discrete user missing values in STATA If MissingCounts(I)>0 Then Print #2, "recode " & VarName2 & " (" & MissingValues(I,0) & "=.a)" If MissingCounts(I)>1 Then Print #2, "recode " & VarName2 & " (" & MissingValues(I,1) & "=.b)" If MissingCounts(I)>2 Then Print #2, "recode " & VarName2 & " (" & MissingValues(I,2) & "=.c)" If MissingCounts(I)=-3 Then Print #2, "recode " & VarName2 & " (" & MissingValues(I,2) & "=.a)" ' write information to data dictionary file Dim info As Long, info1 As Variant, info2 As Variant, info3 As Variant, info4 As Variant, info5 As Variant, NV As Integer info = objDataDoc.GetVariableInfo(info1, info2, info3, info4, info5) NV= I+1 Print #1, "\par }{\cf1\b" If Len(VarLabel)>1 Then Print #1, "\par Pos. = " & "}{\f2\fs20\cf4 " & NV & Chr (9) & "}{\b\cf1 Variable = " & "}{\f2\fs20\cf4 " & VarName & Chr (9) & "}{\b\cf1 Variable label = " & "}{\cf4 " & VarLabel If Len(VarLabel)<2 Then Print #1, "\par Pos. = " & "}{\f2\fs20\cf4 " & NV & Chr (9) & "}{\b\cf1 Variable = " & "}{\f2\fs20\cf4 " & VarName & Chr (9) & "}{\cf5 This variable has no label in SPSS" If VarTy=0 And info4(I)=1 Then Print #1, "\par }{\cf3 This variable is }{\cf5\i numeric}{\cf3, the SPSS measurement level is }{\cf5\i nominal." If VarTy=0 And info4(I)=2 Then Print #1, "\par }{\cf3 This variable is }{\cf5\i numeric}{\cf3, the SPSS measurement level is }{\cf5\i ordinal." If VarTy=0 And info4(I)=3 Then Print #1, "\par }{\cf3 This variable is }{\cf5\i numeric}{\cf3, the SPSS measurement level is }{\cf5\i scale." If VarTy=1 Then Print #1, "\par }{\cf3 This variable is }{\cf5\i string}{\cf3\, the SPSS measurement level is }{\cf5\i nominal." If VarTy=2 Then Print #1, "\par }{\cf3 This variable is }{\cf5\i 'other' }{\cf3 (not numeric or string)." If MissingCounts(I)=3 Then Print #1, "\par }{\cf1 SPSS user missing values = " & "\cf4\fs20 " & MissingValues(I,0) & " \fs16 (.a in STATA) \cf1 and \cf4\fs20 " & MissingValues(I,1) & " \cf4\fs16 (.b in STATA)" & " \cf1\fs16 and \cf4\fs20 " & MissingValues(I,2) & " \fs16 (.c in STATA)" If MissingCounts(I)=-3 Then Print #1, "\par }{\cf1 SPSS user missing values = " & "\cf4\fs20 " & MissingValues(I,0) & " \fs16\cf1 thru \cf4\fs20 " & MissingValues(I,1) & " \cf1\fs16 and \cf4\fs20 " & MissingValues(I,2) & " \fs16 (.a in STATA)" If MissingCounts(I)=2 Then Print #1, "\par }{\cf1 SPSS user missing values = " & "\cf4\fs20 " & MissingValues(I,0) & " \fs16 (.a in STATA) \cf1 and \cf4\fs20 " & MissingValues(I,1) & " \fs16 (.b in STATA)" If MissingCounts(I)=-2 Then Print #1, "\par }{\cf1 SPSS user missing values = " & "\cf4\fs20 " & MissingValues(I,0) & " \fs16\cf1 thru \cf4\fs20 " & MissingValues(I,1) If MissingCounts(I)=1 Then Print #1, "\par }{\cf1 SPSS user missing value = " & "\cf4\fs20 " & MissingValues(I,0) & " \fs16 (.a in STATA)" NumVals= objSPSSInfo.NumberOfValueLabels(I)-1 'log potential truncation to SPSS_To_Stata rtf conversion file If VarTy =1 And VarLength>80 Then Print #3, "\par }{\f2\cf5 Variable = }{\cf4\b " & VarName & "}{\f2\cf1\ " & Chr(9) & "String variable of width up to " & VarLength & " chars in SPSS. STATA max. is 80 chars (244 in SE)." & Chr(13) & Chr(10) varlablength= Len (VarLabel) If varlablength>80 Then Print #3, "\par }{\f2\cf5 Variable = }{\cf4\b " & VarName & "}{\f2\cf1\ "& Chr(9) & "Variable label in SPSS is " & varlablength & " chars, this will be truncated to 80 chars in the STATA file." & Chr(13) & Chr(10) count2=0 'value level loop to add commands to add value labels in STATA where appropriate, adding SPSS missing value codes to start of labels for user missing values For K=0 To NumVals ValLabel= objSPSSInfo.ValueLabelAt(I,K) ValLabel=Replace (ValLabel,textqual,"'") ValName=objSPSSInfo.ValueAt(I,K) If VarTy <> 1 Then test=CDbl(ValName) If VarTy <> 1 Then testint=CLng (test) If VarTy=1 Then test2=ValName Debug.Print VarName Debug.Print test Debug.Print MissingValues(I,0) If VarTy <>1 And (MissingValues(I,0)=test Or MissingValues(I,1)=test Or MissingValues(I,2)=test) Then flag=1 Else flag=0 If Varty <>1 And test=testint Or (MissingValues(I,0)=test And MissingCounts(I)>0) Or (MissingValues(I,1)=test And MissingCounts(I)>0) Or (MissingValues(I,2)=test) Then count=count+1 If count=1 Then Print #2, "label define " & VarName2 & " "; If Varty <>1 And test=testint And flag=0 Then Print #2, testint & " " & textqual & ValLabel & textqual & " "; If Varty <>1 And MissingValues(I,0)=test And MissingCounts(I)>0 Then Print #2, ".a " & textqual & "(" & test & ") " & ValLabel & textqual & " "; If Varty <>1 And MissingValues(I,0)=test And MissingCounts(I)<0 Then Print #2, test & " " & textqual & ValLabel & textqual & " "; If Varty <>1 And MissingValues(I,1)=test And MissingCounts(I)>1 Then Print #2, ".b " & textqual & "(" & test & ") " & ValLabel & textqual & " "; If Varty <>1 And MissingValues(I,1)=test And MissingCounts(I)<0 Then Print #2, test & " " & textqual & ValLabel & textqual & " "; If Varty <>1 And MissingValues(I,2)=test And MissingCounts(I)>2 Then Print #2, ".c " & textqual & "(" & test & ") " & ValLabel & textqual & " "; If Varty <>1 And MissingValues(I,2)=test And MissingCounts(I)=-3 Then Print #2, ".a " & textqual & "(" & test & ") " & ValLabel & textqual & " "; 'log any value label truncation to SPSS_to_Stata rtf conversion file If Len (ValLabel)>32 And Varty<>1 And count2=0 Then Print #3, "\par }{\f2\cf5 Variable = }{\cf4\b " & VarName & "}{\f2\cf1\ " & Chr(9) & "One or more long value labels in SPSS, which will be truncated at 32 chars in the STATA file." & Chr(13) & Chr(10) If Len (ValLabel)>32 Then count2=count2+1 'write value-level info to data dictionary If K=0 Then Print #1, "\par " & Chr (9) & "}{\cf3\ul\fs16 Value label information for " & VarName If Varty<>1 Then Print #1, "\par }{\cf1\fs16" & Chr (9) & "Value = " & "}{\cf4\fs16 " & test & Chr (9) & "}{\cf1\fs16 Label = " & "}{\cf4\fs16 " & ValLabel If Varty=1 Then Print #1, "\par }{\cf1\fs16" & Chr (9) & "Value = " & "}{\cf4\fs16 " & test2 & Chr (9) & "}{\cf1\fs16 Label = " & "}{\cf4\fs16 " & ValLabel Next k If count>0 Then Print #2, vbCr & "label values " & VarName2 & " " & VarName2 & vbCr If count=0 Then Print #2, vbCr Next I Print #1, "\par }}" Close #1 ' now repeat loop to log outright loss of value labels in SPSS_to_Stata rtf conversion file Print #3, "\par }{" & Chr(13) & Chr(10) Print #3, "\par }{\f2\cf1\ ii) Outright loss of value labels" & Chr(13) & Chr(10) Print #3, "\par }{" & Chr(13) & Chr(10) test=1 testint=1 For L=0 To numvars count3=0 VarName= objSPSSInfo.VariableAt(L) VarName2=LCase(VarName) VarTy= objSPSSInfo.VarType(L) nvaluelabs=objSPSSInfo.NumberOfValueLabels(L) NumVals= objSPSSInfo.NumberOfValueLabels(L)-1 ' return STATA variable name case to that of SPSS If VarName <> VarName2 Then Print #2, "rename " & VarName2 & " " & VarName If VarTy = 1 And nvaluelabs>0 Then Print #3, "\par }{\f2\cf5 Variable = }{\cf4\b " & VarName & "}{\f2\cf1\ " & Chr(9) & "There are value labels for string variables in SPSS. These will not exist in the STATA file." & Chr(13) & Chr(10) For M=0 To NumVals ValLabel= objSPSSInfo.ValueLabelAt(L,M) ValName=objSPSSInfo.ValueAt(L,M) If VarTy <> 1 Then test=CDbl(ValName) If VarTy <> 1 Then testint=CLng (test) If VarTy <> 1 And testint <> test And Len (ValLabel)>0 And count3=0 Then Print #3, "\par }{\f2\cf5 Variable = }{\cf4\b " & VarName & "}{\f2\cf1\ " & Chr(9) & "There are value labels for non-integer values in SPSS. These will not exist in the STATA file." & Chr(13) & Chr(10) If VarTy <> 1 And testint <> test And Len (ValLabel)>0 Then count3=1 Next M Next L Set objSPSSInfo = Nothing 'lastly, save .do file and execute using wstata.exe to create .dta file Print #2, "save " & quote & statapath & FileName & ".dta" & quote & ", replace" & vbCr Close #2 ChDir statapath Set objOutputDoc = objSpssApp.GetDesignatedOutputDoc objOutputDoc.SelectAll objOutputDoc.Delete size=CStr(size) Shell stataexepath & "\wstata.exe /e /k" & size & " do " & quote & statapath & FileName & quote & ".do nolog" strFname = Dir$() Wend End Sub