Skip to content

Commit bced3df

Browse files
authored
Merge pull request #180 from cicirello/weighted-tau
Added implementation of weighted Kendall tau distance
2 parents 391d1fb + a1f6cb8 commit bced3df

File tree

6 files changed

+275
-9
lines changed

6 files changed

+275
-9
lines changed

CHANGELOG.md

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99
### Added
1010

1111
### Changed
12-
* Bumped dependency rho-mu to 1.2.0
13-
* Bumped dependency org.cicirello.core to 1.1.0
14-
* Migrated test cases to JUnit 5 (specifically JUnit Jupiter 5.8.2).
1512

1613
### Deprecated
1714

1815
### Removed
1916

2017
### Fixed
2118

19+
### CI/CD
20+
21+
### Other
22+
23+
24+
## [3.1.0] - 2022-02-15
25+
26+
### Added
27+
* WeightedKendallTauDistance: an implementation of a weighted version of Kendall tau distance
28+
29+
### Changed
30+
* Bumped dependency rho-mu to 1.2.0
31+
* Bumped dependency org.cicirello.core to 1.1.0
32+
* Migrated test cases to JUnit 5 (specifically JUnit Jupiter 5.8.2).
33+
2234
### CI/CD
2335
* Automated commenting of test coverage percentages on pull requests.
2436
* Revised documentation workflow to deploy to API doc updates to website only

src/main/java/org/cicirello/permutations/distance/KendallTauDistance.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2014, 2015, 2017-2021 Vincent A. Cicirello, <https://www.cicirello.org/>.
2+
* Copyright 2014, 2015, 2017-2022 Vincent A. Cicirello, <https://www.cicirello.org/>.
33
*
44
* This file is part of JavaPermutationTools (https://jpt.cicirello.org/).
55
*
@@ -24,8 +24,6 @@
2424
import java.util.Arrays;
2525

2626
/**
27-
* Kendall Tau Distance:
28-
*
2927
* <p>Kendall Tau distance is sometimes also known as bubble sort distance, as it is
3028
* the number of adjacent swaps necessary to transform one permutation into the other.</p>
3129
*
@@ -47,9 +45,8 @@
4745
* <p>Kendall Tau distance originally described in:<br>
4846
* M. G. Kendall, "A new measure of rank correlation," Biometrika, vol. 30, no. 1/2, pp. 81–93, June 1938.</p>
4947
*
50-
* @author <a href=https://www.cicirello.org/ target=_top>Vincent A. Cicirello</a>, <a href=https://www.cicirello.org/ target=_top>https://www.cicirello.org/</a>
51-
* @version 5.13.2021
52-
*
48+
* @author <a href=https://www.cicirello.org/ target=_top>Vincent A. Cicirello</a>,
49+
* <a href=https://www.cicirello.org/ target=_top>https://www.cicirello.org/</a>
5350
*/
5451
public final class KendallTauDistance implements NormalizedPermutationDistanceMeasurer {
5552

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
/*
2+
* JavaPermutationTools: A Java library for computation on permutations and sequences.
3+
* Copyright (C) 2018-2022 Vincent A. Cicirello, <https://www.cicirello.org/>.
4+
*
5+
* This file is part of JavaPermutationTools (https://jpt.cicirello.org/).
6+
*
7+
* JavaPermutationTools is free software: you can
8+
* redistribute it and/or modify it under the terms of the GNU
9+
* General Public License as published by the Free Software
10+
* Foundation, either version 3 of the License, or (at your
11+
* option) any later version.
12+
*
13+
* JavaPermutationTools is distributed in the hope
14+
* that it will be useful, but WITHOUT ANY WARRANTY; without even
15+
* the implied warranty of MERCHANTABILITY or FITNESS FOR A
16+
* PARTICULAR PURPOSE. See the GNU General Public License for more
17+
* details.
18+
*
19+
* You should have received a copy of the GNU General Public License
20+
* along with JavaPermutationTools. If not, see <http://www.gnu.org/licenses/>.
21+
*/
22+
package org.cicirello.permutations.distance;
23+
24+
import org.cicirello.permutations.Permutation;
25+
import java.util.Arrays;
26+
27+
/**
28+
* <p>This class implements the weighted Kendall tau distance. In the original
29+
* Kendall tau distance, each inverted pair of elements (i.e., such that element
30+
* x appears someplace before y in Permutation p1, but someplace after y in Permutation p2)
31+
* contributes 1 to the distance. Thus, since there are n(n-1)/2 pairs of elements,
32+
* the maximum of Kendall tau distance is n(n-1)/2 where n is the permutation length.
33+
* In this weighted Kendall tau distance, each element x of the permutation has an
34+
* associated weight w(x), and each inverted pair x, y (where x appears before sometime
35+
* prior to y in p1, but sometime after y in p2) contributes w(x) * w(y) to the weighted
36+
* Kendall tau distance.</p>
37+
*
38+
* <p>The weighted Kendall tau distance was first described in:<br>
39+
* "Failure proximity: a fault localization-based approach" (Liu and Han, SIGSOFT 2006, pages 46-56).</p>
40+
*
41+
* <p>The runtime of JPT's implementation is O(n lg n), where n is the permutation length.
42+
* This runtime is achieved using a modified version of mergesort to sum the weighted inversions.</p>
43+
*
44+
* @author <a href=https://www.cicirello.org/ target=_top>Vincent A. Cicirello</a>,
45+
* <a href=https://www.cicirello.org/ target=_top>https://www.cicirello.org/</a>
46+
*/
47+
public final class WeightedKendallTauDistance implements NormalizedPermutationDistanceMeasurerDouble {
48+
49+
private final double[] weights;
50+
private final double maxDistance;
51+
52+
/**
53+
* Constructs an instance of the WeightedKendallTauDistance.
54+
* @param weights An array of weights, such that weights[e] is the weight of
55+
* element e.
56+
*/
57+
public WeightedKendallTauDistance(double[] weights) {
58+
this.weights = weights.clone();
59+
double max = 0;
60+
for (int i = 0; i < weights.length - 1; i++) {
61+
double runningSum = 0;
62+
for (int j = i+1; j < weights.length; j++) {
63+
runningSum += weights[j];
64+
}
65+
max += weights[i] * runningSum;
66+
}
67+
maxDistance = max;
68+
}
69+
70+
/**
71+
* Gets the length of permutations supported by this instance of
72+
* WeightedKendallTauDistance, which is equal to the length of the
73+
* array of weights passed to the constructor.
74+
*
75+
* @return The length of supported Permutations.
76+
*/
77+
public int supportedLength() {
78+
return weights.length;
79+
}
80+
81+
/**
82+
* {@inheritDoc}
83+
*
84+
* @throws IllegalArgumentException if p1.length() is not equal to supportedLength(),
85+
* or if p2.length() is not equal to supportedLength().
86+
*/
87+
@Override
88+
public double distancef(Permutation p1, Permutation p2) {
89+
if (p1.length() != weights.length || p2.length() != weights.length) {
90+
throw new IllegalArgumentException("p1 and/or p2 not of supported length of this instance");
91+
}
92+
// use inverse of p1 as a relabeling
93+
int[] invP1 = p1.getInverse();
94+
95+
// relabel array copy of p2 and likewise map weights to weights of relabeled copy
96+
int[] arrayP2 = new int[invP1.length];
97+
double[] w = new double[weights.length];
98+
for (int i = 0; i < arrayP2.length; i++) {
99+
arrayP2[i] = invP1[p2.get(i)];
100+
w[arrayP2[i]] = weights[p2.get(i)];
101+
}
102+
103+
return countWeightedInversions(arrayP2, w);
104+
}
105+
106+
/**
107+
* {@inheritDoc}
108+
*
109+
* <p><b>This implementation ignores the length parameter since this
110+
* distance is configured for one specific length based upon the weights
111+
* passed during construction.</b></p>
112+
*/
113+
@Override
114+
public double maxf(int length) {
115+
return maxDistance;
116+
}
117+
118+
private double countWeightedInversions(int[] array, double[] w) {
119+
if (array.length <= 1) return 0;
120+
int m = array.length >> 1;
121+
int[] left = Arrays.copyOfRange(array, 0, m);
122+
int[] right = Arrays.copyOfRange(array, m, array.length);
123+
double weightedCount = countWeightedInversions(left, w) + countWeightedInversions(right, w);
124+
int i = 0;
125+
int j = 0;
126+
int k = 0;
127+
while (i < left.length && j < right.length) {
128+
if (left[i] < right[j]) {
129+
array[k] = left[i];
130+
i++;
131+
k++;
132+
} else {
133+
// inversions
134+
double leftWeights = 0;
135+
for (int x = i; x < left.length; x++) {
136+
leftWeights += w[left[x]];
137+
}
138+
weightedCount += w[right[j]] * leftWeights;
139+
array[k] = right[j];
140+
j++;
141+
k++;
142+
}
143+
}
144+
while (i < left.length) {
145+
array[k] = left[i];
146+
i++;
147+
k++;
148+
}
149+
while (j < right.length) {
150+
array[k] = right[j];
151+
j++;
152+
k++;
153+
}
154+
return weightedCount;
155+
}
156+
}

src/test/java/org/cicirello/permutations/distance/PermutationDistanceMaxTests.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,30 @@ public void testKendallTauDistance() {
187187
}
188188
}
189189

190+
@Test
191+
public void testWeightedKendallTauDistance() {
192+
for (int n = 0; n <= 7; n++) {
193+
double[] weights = new double[n];
194+
for (int i = 0; i < n; i++) {
195+
weights[i] = 1;
196+
}
197+
WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights);
198+
double expected = n*(n-1)/2;
199+
assertEquals(expected, d.maxf(n), EPSILON, "Failed on length: " + n);
200+
201+
for (int i = 0; i < n; i++) {
202+
weights[i] = 2;
203+
}
204+
d = new WeightedKendallTauDistance(weights);
205+
expected *= 4;
206+
assertEquals(expected, d.maxf(n), EPSILON, "Failed on length: " + n);
207+
}
208+
double[] weights = { 5, 10, 2, 0, 8, 3 };
209+
WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights);
210+
double expected = 25*3 + 17*8 + 15*2 + 50;
211+
assertEquals(expected, d.maxf(weights.length), EPSILON);
212+
}
213+
190214

191215
@Test
192216
public void testReinsertionDistance() {

src/test/java/org/cicirello/permutations/distance/PermutationDistanceNormTests.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.junit.jupiter.api.*;
2525
import static org.junit.jupiter.api.Assertions.*;
2626
import org.cicirello.permutations.*;
27+
import java.util.SplittableRandom;
2728

2829
/**
2930
* JUnit tests for the normalizedDistance method of various classes that implement permutation distance metrics.
@@ -136,6 +137,19 @@ public void testKendallTauDistance() {
136137
}
137138
}
138139

140+
@Test
141+
public void testWeightedKendallTauDistance() {
142+
SplittableRandom gen = new SplittableRandom(42);
143+
for (int n = 0; n <= 6; n++) {
144+
double[] weights = new double[n];
145+
for (int i = 0; i < n; i++) {
146+
weights[i] = 5 + 15*gen.nextDouble();
147+
}
148+
WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights);
149+
assertEquals(n<=1 ? 0.0 : 1.0, bruteForceComputeMaxD(d,n), EPSILON, "Failed on length: " + n);
150+
}
151+
}
152+
139153

140154
@Test
141155
public void testReinsertionDistance() {

src/test/java/org/cicirello/permutations/distance/PermutationDistanceTests.java

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,69 @@ public void testInterchangeDistance() {
603603
);
604604
}
605605

606+
@Test
607+
public void testWeightedKendallTauDistance_WeightsAllOneCase() {
608+
for (int n = 2; n <= 10; n++) {
609+
double[] weights = new double[n];
610+
for (int i = 0; i < n; i++) {
611+
weights[i] = 1;
612+
}
613+
WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights);
614+
assertEquals(n, d.supportedLength());
615+
Permutation p = new Permutation(n);
616+
Permutation copy = new Permutation(p);
617+
assertEquals(0.0, d.distancef(p, copy), 1E-10);
618+
//maximal distance is permutation reversed
619+
copy.reverse();
620+
double expected = n*(n-1)/2;
621+
assertEquals(expected, d.distancef(p,copy));
622+
copy.reverse();
623+
copy.swap(0,n-1);
624+
expected = 2*n-3;
625+
assertEquals(expected, d.distancef(p,copy), 1E-10);
626+
}
627+
final WeightedKendallTauDistance d = new WeightedKendallTauDistance(new double[] {1, 1, 1, 1, 1, 1});
628+
Permutation p = new Permutation(6);
629+
for (Permutation q : p) {
630+
assertEquals(naiveKendalTau(p,q), d.distancef(p,q), 1E-10);
631+
}
632+
633+
IllegalArgumentException thrown = assertThrows(
634+
IllegalArgumentException.class,
635+
() -> d.distancef(new Permutation(5), new Permutation(6))
636+
);
637+
assertThrows(
638+
IllegalArgumentException.class,
639+
() -> d.distancef(new Permutation(6), new Permutation(5))
640+
);
641+
}
642+
643+
@Test
644+
public void testWeightedKendallTauDistance() {
645+
double[] weights = {8, 2, 10, 20, 5, 1};
646+
int[] p1 = { 5, 2, 0, 3, 1, 4};
647+
WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights);
648+
assertEquals(0.0, d.distancef(new Permutation(p1), new Permutation(p1)), 1E-10);
649+
int[] p2 = { 4, 2, 0, 3, 1, 5 };
650+
double expected = 41*5 + 40;
651+
assertEquals(expected, d.distancef(new Permutation(p1), new Permutation(p2)), 1E-10);
652+
int[] p3 = { 5, 2, 0, 1, 3, 4};
653+
expected = 40;
654+
assertEquals(expected, d.distancef(new Permutation(p1), new Permutation(p3)), 1E-10);
655+
}
656+
657+
@Test
658+
public void testWeightedKendallTauDistanceReversed() {
659+
double[] weights = {8, 2, 10, 20, 5, 1};
660+
WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights);
661+
int[] perm = { 5, 2, 0, 3, 1, 4};
662+
Permutation p1 = new Permutation(perm);
663+
Permutation p2 = new Permutation(p1);
664+
p2.reverse();
665+
double expected = 45.0 + 40.0*5 + 20*20 + 10*10 + 8*2;
666+
assertEquals(expected, d.distancef(new Permutation(p1), new Permutation(p2)), 1E-10);
667+
}
668+
606669
@Test
607670
public void testKendallTauDistance() {
608671
KendallTauDistance d = new KendallTauDistance();

0 commit comments

Comments
 (0)