JaroWinklerDistance

Run Settings
LanguageC#
Language Version
Run Command
namespace StringMatching { using System; using System.Collections.Generic; class MainClass { static void Main() { var acceptedMatchLevel = 0.8; var users = new Dictionary<string, string> { {"Chris Mumford 5 lubnaig drive", "Christopher David Mumford 5 lubnaig dr"}, {"Ken A.Z 2354 Alexandria Road", "K Aue Zen Alexandria Rd"}, {"J D 234 Bryant Avenue, pa8 e4w Scotland uk", "Jonathon Davis 234 Bryant av"}, {"J D 234 Bryant Avenue pa8 e4w Scotland uk", "Jonathon Davis 234 Bryant Avenue"}, {"Jerry Springer 232 Bryant Avenue pa8 e4w Scotland uk", "Jonathon Davis 234 Bryant Avenue"} }; foreach (var user in users) { var matchResult = JaroWinklerDistance.proximity(Cleanser.Cleanse(user.Key), Cleanser.Cleanse(user.Value)); Console.WriteLine("==========================="); Console.WriteLine($"{user.Key} ------- {user.Value}"); Console.WriteLine(matchResult >= acceptedMatchLevel ? "Matched" : "Not matched"); Console.WriteLine(matchResult); } } } }
namespace StringMatching { using System; public static class JaroWinklerDistance { /* The Winkler modification will not be applied unless the * percent match was at or above the mWeightThreshold percent * without the modification. * Winkler's paper used a default value of 0.7 */ private static readonly double mWeightThreshold = 0.7; /* Size of the prefix to be concidered by the Winkler modification. * Winkler's paper used a default value of 4 */ private static readonly int mNumChars = 4; /// <summary> /// Returns the Jaro-Winkler distance between the specified /// strings. The distance is symmetric and will fall in the /// range 0 (perfect match) to 1 (no match). /// </summary> /// <param name="aString1">First String</param> /// <param name="aString2">Second String</param> /// <returns></returns> public static double distance(string aString1, string aString2) { return 1.0 - proximity(aString1,aString2); } /// <summary> /// Returns the Jaro-Winkler distance between the specified /// strings. The distance is symmetric and will fall in the /// range 0 (no match) to 1 (perfect match). /// </summary> /// <param name="aString1">First String</param> /// <param name="aString2">Second String</param> /// <returns></returns> public static double proximity(string aString1, string aString2) { int lLen1 = aString1.Length; int lLen2 = aString2.Length; if (lLen1 == 0) return lLen2 == 0 ? 1.0 : 0.0; int lSearchRange = Math.Max(0,Math.Max(lLen1,lLen2)/2 - 1); // default initialized to false bool[] lMatched1 = new bool[lLen1]; bool[] lMatched2 = new bool[lLen2]; int lNumCommon = 0; for (int i = 0; i < lLen1; ++i) { int lStart = Math.Max(0,i-lSearchRange); int lEnd = Math.Min(i+lSearchRange+1,lLen2); for (int j = lStart; j < lEnd; ++j) { if (lMatched2[j]) continue; if (aString1[i] != aString2[j]) continue; lMatched1[i] = true; lMatched2[j] = true; ++lNumCommon; break; } } if (lNumCommon == 0) return 0.0; int lNumHalfTransposed = 0; int k = 0; for (int i = 0; i < lLen1; ++i) { if (!lMatched1[i]) continue; while (!lMatched2[k]) ++k; if (aString1[i] != aString2[k]) ++lNumHalfTransposed; ++k; } // System.Diagnostics.Debug.WriteLine("numHalfTransposed=" + numHalfTransposed); int lNumTransposed = lNumHalfTransposed/2; // System.Diagnostics.Debug.WriteLine("numCommon=" + numCommon + " numTransposed=" + numTransposed); double lNumCommonD = lNumCommon; double lWeight = (lNumCommonD/lLen1 + lNumCommonD/lLen2 + (lNumCommon - lNumTransposed)/lNumCommonD)/3.0; if (lWeight <= mWeightThreshold) return lWeight; int lMax = Math.Min(mNumChars,Math.Min(aString1.Length,aString2.Length)); int lPos = 0; while (lPos < lMax && aString1[lPos] == aString2[lPos]) ++lPos; if (lPos == 0) return lWeight; return lWeight + 0.1 * lPos * (1.0 - lWeight); } } }
namespace StringMatching { public static class Cleanser { public static string Cleanse(string item) { item = item.ToLower(); item = item.Replace("av", "avenue"); return item; } } }
Editor Settings
Theme
Key bindings
Full width
Lines