]> WPIA git - gigi.git/blobdiff - src/org/cacert/gigi/util/EditDistance.java
add: improvement of template parsing
[gigi.git] / src / org / cacert / gigi / util / EditDistance.java
diff --git a/src/org/cacert/gigi/util/EditDistance.java b/src/org/cacert/gigi/util/EditDistance.java
new file mode 100644 (file)
index 0000000..0cadd21
--- /dev/null
@@ -0,0 +1,79 @@
+package org.cacert.gigi.util;
+
+public class EditDistance {
+
+    public static String getBestMatchingStringByEditDistance(String needle, Iterable<String> possibleStrings) {
+        String best = "";
+        int bestDistance = Integer.MAX_VALUE;
+        for (String possibleString : possibleStrings) {
+            int newDistance = calculateLevenshteinDistance(possibleString, needle);
+            if (newDistance < bestDistance) {
+                bestDistance = newDistance;
+                best = possibleString;
+            }
+        }
+        return best;
+    }
+
+    public static String getBestMatchingStringByEditDistance(String needle, String[] possibleStrings) {
+        if (possibleStrings.length == 0) {
+            return "";
+        }
+        String best = possibleStrings[0];
+        int bestDistance = Integer.MAX_VALUE;
+        for (String possibleString : possibleStrings) {
+            int newDistance = calculateLevenshteinDistance(possibleString, needle);
+            if (newDistance < bestDistance) {
+                bestDistance = newDistance;
+                best = possibleString;
+            }
+        }
+        return best;
+    }
+
+    /**
+     * Calculates the levenshtein edit distance between the passed strings.
+     * Adapted from https://en.wikipedia.org/wiki/Levenshtein_distance
+     */
+    public static int calculateLevenshteinDistance(String s, String t) {
+        // degenerate cases
+        if (s == t || s.equals(t)) {
+            return 0;
+        }
+        if (s.length() == 0) {
+            return t.length();
+        }
+        if (t.length() == 0) {
+            return s.length();
+        }
+
+        // create two work arrays of integer distances
+        int[] previousRow = new int[t.length() + 1];
+        int[] currentRow = new int[t.length() + 1];
+
+        // initialize previousRow
+        // this row is A[0][i]: edit distance for an empty s
+        // the distance is just the number of characters to delete from t
+        for (int i = 0; i < previousRow.length; i++) {
+            previousRow[i] = i;
+        }
+
+        for (int i = 0; i < s.length(); i++) {
+            // calculate current row from the previous row
+
+            // first element of currentRow is A[i+1][0]
+            // edit distance is delete (i+1) chars from s to match empty t
+            currentRow[0] = i + 1;
+
+            // use formula to fill in the rest of the row
+            for (int j = 0; j < t.length(); j++) {
+                int cost = s.charAt(i) == t.charAt(j) ? 0 : 1;
+                currentRow[j + 1] = Math.min(Math.min(currentRow[j] + 1, previousRow[j + 1] + 1), previousRow[j] + cost);
+            }
+
+            System.arraycopy(currentRow, 0, previousRow, 0, currentRow.length);
+        }
+
+        return currentRow[t.length()];
+    }
+}