Add files via upload

This commit is contained in:
youronlydimwit
2023-11-01 13:03:28 +07:00
committed by GitHub
parent dace505148
commit 2cc97d254b
5 changed files with 207 additions and 0 deletions

50
VBA/CosineSimilarity.bas Normal file
View File

@@ -0,0 +1,50 @@
Attribute VB_Name = "Module4"
Function SimilarityCosine(s1 As String, s2 As String) As Double
Dim words1 As Object
Set words1 = CreateObject("Scripting.Dictionary")
Dim words2 As Object
Set words2 = CreateObject("Scripting.Dictionary")
Dim i As Integer
Dim j As Integer
Dim dotProduct As Double
Dim magnitude1 As Double
Dim magnitude2 As Double
' Split the strings into words and create vectors
Dim words As Variant
words = Split(s1 & " " & s2, " ")
For i = LBound(words) To UBound(words)
If Trim(words(i)) <> "" Then
words1(Trim(words(i))) = 0
words2(Trim(words(i))) = 0
End If
Next i
' Populate the vectors
For i = LBound(words) To UBound(words)
If Trim(words(i)) <> "" Then
If i <= UBound(Split(s1, " ")) Then
words1(Trim(words(i))) = words1(Trim(words(i))) + 1
Else
words2(Trim(words(i))) = words2(Trim(words(i))) + 1
End If
End If
Next i
' Calculate dot product and magnitudes
For Each Key In words1.Keys
dotProduct = dotProduct + (words1(Key) * words2(Key))
magnitude1 = magnitude1 + (words1(Key) * words1(Key))
magnitude2 = magnitude2 + (words2(Key) * words2(Key))
Next Key
' Calculate cosine similarity
If magnitude1 * magnitude2 = 0 Then
SimilarityCosine = 0
Else
SimilarityCosine = dotProduct / (Sqr(magnitude1) * Sqr(magnitude2))
End If
End Function

View File

@@ -0,0 +1,47 @@
Attribute VB_Name = "Module2"
Function SimilarityJaroWinkler(s1 As String, s2 As String) As Double
Dim m As Integer ' Number of matching characters
Dim t As Integer ' Number of transpositions
Dim l1 As Integer ' Length of string 1
Dim l2 As Integer ' Length of string 2
Dim maxLen As Integer
Dim prefix As Integer
Dim jw As Double ' Jaro-Winkler similarity
l1 = Len(s1)
l2 = Len(s2)
maxLen = IIf(l1 > l2, l1, l2)
' Calculate the number of matching characters
m = 0
t = 0
For i = 1 To l1
For j = IIf(i - 2 > 0, i - 2, 1) To IIf(i + 2 <= l2, i + 2, l2)
If Mid(s1, i, 1) = Mid(s2, j, 1) Then
m = m + 1
If i <> j Then t = t + 1
Exit For
End If
Next j
Next i
' Calculate the prefix length
prefix = 0
For i = 1 To IIf(l1 > 4, 4, l1)
If Mid(s1, i, 1) = Mid(s2, i, 1) Then
prefix = prefix + 1
Else
Exit For
End If
Next i
' Calculate the Jaro similarity
If m = 0 Then
jw = 0
Else
jw = (m / l1 + m / l2 + (m - t) / m) / 3
jw = jw + (prefix * 0.1 * (1 - jw))
End If
SimilarityJaroWinkler = jw
End Function

View File

@@ -0,0 +1,25 @@
Attribute VB_Name = "Module1"
Function SimilarityLevenshtein(target As String, reference As String) As Double
Dim maxLen As Long
Dim minLen As Long
Dim distance As Double
Dim similarity As Double
' Remove spaces and convert to lowercase
target = Replace(LCase(target), " ", "")
reference = Replace(LCase(reference), " ", "")
maxLen = IIf(Len(target) > Len(reference), Len(target), Len(reference))
minLen = IIf(Len(target) < Len(reference), Len(target), Len(reference))
distance = maxLen - minLen
For i = 1 To minLen
If Mid(target, i, 1) <> Mid(reference, i, 1) Then
distance = distance + 1
End If
Next i
similarity = (1 - distance / maxLen)
SimilarityLevenshtein = similarity
End Function

View File

@@ -0,0 +1,45 @@
Attribute VB_Name = "Module5"
Function SimilarityNGramJaccard(s1 As String, s2 As String, n As Integer) As Double
Dim set1 As Object
Set set1 = CreateObject("Scripting.Dictionary")
Dim set2 As Object
Set set2 = CreateObject("Scripting.Dictionary")
Dim i As Integer
Dim ngram1 As String
Dim ngram2 As String
' Create n-grams from string 1
For i = 1 To Len(s1) - n + 1
ngram1 = Mid(s1, i, n)
set1(ngram1) = 1
Next i
' Create n-grams from string 2
For i = 1 To Len(s2) - n + 1
ngram2 = Mid(s2, i, n)
set2(ngram2) = 1
Next i
' Calculate the size of the intersection of the n-gram sets
Dim intersectionSize As Integer
intersectionSize = 0
For Each Key In set1.Keys
If set2.Exists(Key) Then
intersectionSize = intersectionSize + 1
End If
Next Key
' Calculate the n-gram similarity
Dim similarity As Double
If set1.Count + set2.Count = 0 Then
similarity = 1
Else
similarity = intersectionSize / (set1.Count + set2.Count - intersectionSize)
End If
SimilarityNGramJaccard = similarity
End Function

View File

@@ -0,0 +1,40 @@
Attribute VB_Name = "Module3"
Function SimilaritySorensenDiceCoef(s1 As String, s2 As String) As Double
Dim set1 As Object
Set set1 = CreateObject("Scripting.Dictionary")
Dim set2 As Object
Set set2 = CreateObject("Scripting.Dictionary")
Dim i As Integer
' Create sets of characters from both strings
For i = 1 To Len(s1)
set1(Mid(s1, i, 1)) = 1
Next i
For i = 1 To Len(s2)
set2(Mid(s2, i, 1)) = 1
Next i
' Calculate the size of the intersection of the sets
Dim intersectionSize As Integer
intersectionSize = 0
For Each Key In set1.Keys
If set2.Exists(Key) Then
intersectionSize = intersectionSize + 1
End If
Next Key
' Calculate the S<>rensen-Dice coefficient
Dim coefficient As Double
If set1.Count + set2.Count = 0 Then
coefficient = 1
Else
coefficient = 2 * intersectionSize / (set1.Count + set2.Count)
End If
SimilaritySorensenDiceCoef = coefficient
End Function