' To calculate Levenshtein distance between 2 string. ' (For info see http://www.merriampark.com/ld.htm#ALGORITHM) ' Source of the 2 functions is the URL given above. ' Main Sub written by Raynald Levesque 2004/02/04. ' SPSS data editor must contain string variables s1 and s2 (they must be next one to the other). ' For each case, the script calculates the distances between the 2 strings and saves ' the result in a text file. ' Look at "distance between 2 strings.SPS" for an example which uses the script. Sub Main Dim objDocuments As ISpssDocuments Dim objDataDoc As ISpssDataDoc Dim strCmd As String Dim lngNbCases As Long Set objDocuments=objSpssApp.Documents Set objDataDoc = objDocuments.GetDataDoc(0) 'This file will contain the distances Open "C:\\temp\\distances.txt" For Output As #1 ' Gets the data for s1 and s2 Dim SpssTextData As Variant lngNbCases = objDataDoc.GetNumberOfCases SpssTextData = objDataDoc.GetTextData ("s1", "s2", 1, lngNbCases) Dim d As Integer Dim intCnt As Long For intCnt = 0 To lngNbCases - 1 d=LD(SpssTextData(0,intCnt),SpssTextData(1,intCnt)) 'MsgBox("distance is " & d ) Print #1, d Next Close #1 Set objDataDoc = Nothing Set objDocuments = Nothing End Sub '******************************** '*** Compute Levenshtein Distance '(source: http://www.merriampark.com/ld.htm#ALGORITHM) '******************************** Public Function LD(ByVal s As String, ByVal t As String) As Integer Dim d() As Integer ' matrix Dim m As Integer ' length of t Dim n As Integer ' length of s Dim i As Integer ' iterates through s Dim j As Integer ' iterates through t Dim s_i As String ' ith character of s Dim t_j As String ' jth character of t Dim cost As Integer ' cost ' Step 1 n = Len(s) m = Len(t) If n = 0 Then LD = m Exit Function End If If m = 0 Then LD = n Exit Function End If ReDim d(0 To n, 0 To m) As Integer ' Step 2 For i = 0 To n d(i, 0) = i Next i For j = 0 To m d(0, j) = j Next j ' Step 3 For i = 1 To n s_i = Mid$(s, i, 1) ' Step 4 For j = 1 To m t_j = Mid$(t, j, 1) ' Step 5 If s_i = t_j Then cost = 0 Else cost = 1 End If ' Step 6 d(i, j) = Minimum(d(i - 1, j) + 1, d(i, j - 1) + 1, d(i - 1, j - 1) + cost) Next j Next i ' Step 7 LD = d(n, m) Erase d End Function '******************************* '*** Get minimum of three values '(source: http://www.merriampark.com/ld.htm#ALGORITHM) '******************************* Private Function Minimum(ByVal a As Integer, _ ByVal b As Integer, _ ByVal c As Integer) As Integer Dim mi As Integer mi = a If b < mi Then mi = b End If If c < mi Then mi = c End If Minimum = mi End Function