From 68aac350347a4a76be9426429f44508d079790fd Mon Sep 17 00:00:00 2001 From: Kyle Parrigan Date: Sat, 6 Sep 2014 22:10:49 -0400 Subject: [PATCH] Jaccard Index --- src/Numerics/Distance.cs | 92 ++++++++++++++++++++++++++++++++++ src/UnitTests/DistanceTests.cs | 67 +++++++++++++++++++++++++ 2 files changed, 159 insertions(+) diff --git a/src/Numerics/Distance.cs b/src/Numerics/Distance.cs index 2456e988..105c3faa 100644 --- a/src/Numerics/Distance.cs +++ b/src/Numerics/Distance.cs @@ -388,5 +388,97 @@ namespace MathNet.Numerics { return 1.0 - Correlation.Pearson(a, b); } + + /// + /// Jaccard distance, i.e. 1 - the Jaccard index. + /// + /// Thrown if a or b are null. + /// Throw if a and b are of different lengths. + /// Jaccard distance. + public static double Jaccard(double[] a, double[] b) + { + Int32 intersection = 0, union = 0; + + if (a == null) + { + throw new ArgumentNullException("a"); + } + + if (b == null) + { + throw new ArgumentNullException("b"); + } + + if (a.Length != b.Length) + { + throw new ArgumentException(Resources.ArgumentVectorsSameLength); + } + + if ((a.Length == 0 && b.Length == 0) || (a == null && b == null)) + { + return 0; + } + + for (Int32 x = 0, len = a.Length; x < len; x++) + { + if (a[x] != 0 && b[x] != 0) + { + if (a[x] == b[x]) + { + intersection++; + } + + union++; + } + } + + return 1.0 - ((double)intersection / (double)union); + } + + /// + /// Jaccard distance, i.e. 1 - the Jaccard index. + /// + /// Thrown if a or b are null. + /// Throw if a and b are of different lengths. + /// Jaccard distance. + public static double Jaccard(float[] a, float[] b) + { + Int32 intersection = 0, union = 0; + + if (a == null) + { + throw new ArgumentNullException("a"); + } + + if (b == null) + { + throw new ArgumentNullException("b"); + } + + if (a.Length != b.Length) + { + throw new ArgumentException(Resources.ArgumentVectorsSameLength); + } + + if ((a.Length == 0 && b.Length == 0) || (a == null && b == null)) + { + return 0; + } + + for (Int32 x = 0, len = a.Length; x < len; x++) + { + if (a[x] != 0 && b[x] != 0) + { + if (a[x] == b[x]) + { + intersection++; + } + + union++; + } + } + + return 1.0 - ((float)intersection / (float)union); + } } } diff --git a/src/UnitTests/DistanceTests.cs b/src/UnitTests/DistanceTests.cs index b18e80a6..7e02f91a 100644 --- a/src/UnitTests/DistanceTests.cs +++ b/src/UnitTests/DistanceTests.cs @@ -1,4 +1,5 @@ using NUnit.Framework; +using System; namespace MathNet.Numerics.UnitTests { @@ -15,5 +16,71 @@ namespace MathNet.Numerics.UnitTests Assert.That(Distance.Hamming(new[] { 0.0, 0.0 }, new[] { 1.0, 1.0 }), Is.EqualTo(2.0)); Assert.That(Distance.Hamming(new[] { 1.0, 0.0 }, new[] { 0.0, 1.0 }), Is.EqualTo(2.0)); } + + [Test] + public void Jaccard_Double() + { + double[] p0 = new double[] { 1, 0.5 }; + double[] q0 = new double[] { 0.5, 1 }; + + double[] p1 = new double[] { 4.5, 1 }; + double[] q1 = new double[] { 4, 2 }; + + double[] p2 = new double[] { 0, 0, 0 }; + double[] q2 = new double[] { 0, 0, 0 }; + + double[] p3 = new double[] { 1, 1, 1 }; + double[] q3 = new double[] { 1, 1, 1 }; + + double[] p4 = new double[] { 2.5, 3.5, 3.0, 3.5, 2.5, 3.0 }; + double[] q4 = new double[] { 3.0, 3.5, 1.5, 5.0, 3.5, 3.0 }; + + double[] p5 = new double[] { 1, 3, 5, 6, 8, 9, 6, 4, 3, 2 }; + double[] q5 = new double[] { 2, 5, 6, 6, 7, 7, 5, 3, 1, 1 }; + + Assert.Throws(() => Distance.Jaccard(p0, q4)); + Assert.Throws(() => Distance.Jaccard(null, q4)); + Assert.Throws(() => Distance.Jaccard(p0, null)); + + Assert.That(Distance.Jaccard(p0, q0), Is.EqualTo(1)); + Assert.That(Distance.Jaccard(p1, q1), Is.EqualTo(1)); + Assert.That(Distance.Jaccard(p2, q2), Is.EqualTo(Double.NaN)); + Assert.That(Distance.Jaccard(p3, q3), Is.EqualTo(0)); + Assert.That(Distance.Jaccard(p4, q4), Is.EqualTo(0.66666).Within(0.00001)); + Assert.That(Distance.Jaccard(p5, q5), Is.EqualTo(0.9).Within(0.1)); + } + + [Test] + public void Jaccard_Float() + { + float[] p0 = new float[] { 1, 0.5f }; + float[] q0 = new float[] { 0.5f, 1 }; + + float[] p1 = new float[] { 4.5f, 1 }; + float[] q1 = new float[] { 4, 2 }; + + float[] p2 = new float[] { 0, 0, 0 }; + float[] q2 = new float[] { 0, 0, 0 }; + + float[] p3 = new float[] { 1, 1, 1 }; + float[] q3 = new float[] { 1, 1, 1 }; + + float[] p4 = new float[] { 2.5f, 3.5f, 3.0f, 3.5f, 2.5f, 3.0f }; + float[] q4 = new float[] { 3.0f, 3.5f, 1.5f, 5.0f, 3.5f, 3.0f }; + + float[] p5 = new float[] { 1, 3, 5, 6, 8, 9, 6, 4, 3, 2 }; + float[] q5 = new float[] { 2, 5, 6, 6, 7, 7, 5, 3, 1, 1 }; + + Assert.Throws(() => Distance.Jaccard(p0, q4)); + Assert.Throws(() => Distance.Jaccard(null, q4)); + Assert.Throws(() => Distance.Jaccard(p0, null)); + + Assert.That(Distance.Jaccard(p0, q0), Is.EqualTo(1)); + Assert.That(Distance.Jaccard(p1, q1), Is.EqualTo(1)); + Assert.That(Distance.Jaccard(p2, q2), Is.EqualTo(float.NaN)); + Assert.That(Distance.Jaccard(p3, q3), Is.EqualTo(0)); + Assert.That(Distance.Jaccard(p4, q4), Is.EqualTo(0.66666).Within(0.00001)); + Assert.That(Distance.Jaccard(p5, q5), Is.EqualTo(0.9).Within(0.1)); + } } }