namespace StringMatching {
using System;
using System.Collections.Generic;
class MainClass {
static void Main() {
var acceptedMatchLevel = 0.33;
var users = new Dictionary<string, string> {
{"Chris Mumford 5 lubnaig drive", "Christopher Mumford 5 lubnaig dr"},
{"Ken A.Z 2354 Alexandria Road", "K Aue Zen Alexandria Rd"},
{"J D 234 Bryant Avenue, pa8 e4w Scotland uk", "Jonathon Davis 234 Bryant av"},
{"J D 234 Bryant Avenue pa8 e4w Scotland uk", "Jonathon Davis 234 Bryant Avenue"},
{"Jerry Springer 232 Bryant Avenue pa8 e4w Scotland uk", "Brian Evans 234 Bryant Avenue"}
};
foreach (var user in users) {
var matchResult = DiceCoefficientExtensions.DiceCoefficient(Cleanser.Cleanse(user.Key), Cleanser.Cleanse(user.Value));
Console.WriteLine("===========================");
Console.WriteLine($"{user.Key} ------- {user.Value}");
Console.WriteLine(matchResult >= acceptedMatchLevel ? "Matched" : "Not matched");
Console.WriteLine(matchResult);
}
}
}
}
namespace StringMatching {
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
public static class DiceCoefficientExtensions
{
/// <summary>
/// Dice Coefficient based on bigrams. <br />
/// A good value would be 0.33 or above, a value under 0.2 is not a good match, from 0.2 to 0.33 is iffy.
/// </summary>
/// <param name="input"></param>
/// <param name="comparedTo"></param>
/// <returns></returns>
public static double DiceCoefficient(this string input, string comparedTo)
{
var ngrams = input.ToBiGrams();
var compareToNgrams = comparedTo.ToBiGrams();
return ngrams.DiceCoefficient(compareToNgrams);
}
/// <summary>
/// Dice Coefficient used to compare nGrams arrays produced in advance.
/// </summary>
/// <param name="nGrams"></param>
/// <param name="compareToNGrams"></param>
/// <returns></returns>
public static double DiceCoefficient(this string[] nGrams, string[] compareToNGrams)
{
int matches = 0;
foreach (var nGram in nGrams)
{
if (compareToNGrams.Any(x => x == nGram)) matches++;
}
if (matches == 0) return 0.0d;
double totalBigrams = nGrams.Length + compareToNGrams.Length;
return (2 * matches) / totalBigrams;
}
public static string[] ToBiGrams(this string input)
{
// nLength == 2
// from Jackson, return %j ja ac ck ks so on n#
// from Main, return #m ma ai in n#
input = SinglePercent + input + SinglePound;
return ToNGrams(input, 2);
}
public static string[] ToTriGrams(this string input)
{
// nLength == 3
// from Jackson, return %%j %ja jac ack cks kso son on# n##
// from Main, return ##m #ma mai ain in# n##
input = DoublePercent + input + DoublePount;
return ToNGrams(input, 3);
}
private static string[] ToNGrams(string input, int nLength)
{
int itemsCount = input.Length - 1;
string[] ngrams = new string[input.Length - 1];
for (int i = 0; i < itemsCount; i++) ngrams[i] = input.Substring(i, nLength);
return ngrams;
}
private const string SinglePercent = "%";
private const string SinglePound = "#";
private const string DoublePercent = "&&";
private const string DoublePount = "##";
}
}
namespace StringMatching {
public static class Cleanser {
public static string Cleanse(string item) {
return item.ToLower();
}
}
}