+++ /dev/null
-package org.cacert.gigi.util;
-
-public class EditDistance {
-
- public static String getBestMatchingStringByEditDistance(String needle, Iterable<String> possibleStrings) {
- String best = "";
- int bestDistance = Integer.MAX_VALUE;
- for (String possibleString : possibleStrings) {
- int newDistance = calculateLevenshteinDistance(possibleString, needle);
- if (newDistance < bestDistance) {
- bestDistance = newDistance;
- best = possibleString;
- }
- }
- return best;
- }
-
- public static String getBestMatchingStringByEditDistance(String needle, String[] possibleStrings) {
- if (possibleStrings.length == 0) {
- return "";
- }
- String best = possibleStrings[0];
- int bestDistance = Integer.MAX_VALUE;
- for (String possibleString : possibleStrings) {
- int newDistance = calculateLevenshteinDistance(possibleString, needle);
- if (newDistance < bestDistance) {
- bestDistance = newDistance;
- best = possibleString;
- }
- }
- return best;
- }
-
- /**
- * Calculates the levenshtein edit distance between the passed strings.
- * Adapted from https://en.wikipedia.org/wiki/Levenshtein_distance
- */
- public static int calculateLevenshteinDistance(String s, String t) {
- // degenerate cases
- if (s == t || s.equals(t)) {
- return 0;
- }
- if (s.length() == 0) {
- return t.length();
- }
- if (t.length() == 0) {
- return s.length();
- }
-
- // create two work arrays of integer distances
- int[] previousRow = new int[t.length() + 1];
- int[] currentRow = new int[t.length() + 1];
-
- // initialize previousRow
- // this row is A[0][i]: edit distance for an empty s
- // the distance is just the number of characters to delete from t
- for (int i = 0; i < previousRow.length; i++) {
- previousRow[i] = i;
- }
-
- for (int i = 0; i < s.length(); i++) {
- // calculate current row from the previous row
-
- // first element of currentRow is A[i+1][0]
- // edit distance is delete (i+1) chars from s to match empty t
- currentRow[0] = i + 1;
-
- // use formula to fill in the rest of the row
- for (int j = 0; j < t.length(); j++) {
- int cost = s.charAt(i) == t.charAt(j) ? 0 : 1;
- currentRow[j + 1] = Math.min(Math.min(currentRow[j] + 1, previousRow[j + 1] + 1), previousRow[j] + cost);
- }
-
- System.arraycopy(currentRow, 0, previousRow, 0, currentRow.length);
- }
-
- return currentRow[t.length()];
- }
-}