From 6d41f0b3cd6f0cda6cd99d0757734b36d8dc2a95 Mon Sep 17 00:00:00 2001 From: Johannes Bechberger Date: Mon, 5 Sep 2016 20:01:39 +0200 Subject: [PATCH] add: improvement of template parsing Change-Id: I9e50cc2d8d30b7b795dedb9dee02ade4d090d891 --- .../cacert/gigi/output/template/Template.java | 141 ++++++++++++++++-- .../template/TemplateParseException.java | 89 +++++++++++ src/org/cacert/gigi/util/EditDistance.java | 79 ++++++++++ .../cacert/gigi/template/TestTemplate.java | 4 +- .../org/cacert/gigi/localisation/conf.txt | 2 +- 5 files changed, 300 insertions(+), 15 deletions(-) create mode 100644 src/org/cacert/gigi/output/template/TemplateParseException.java create mode 100644 src/org/cacert/gigi/util/EditDistance.java diff --git a/src/org/cacert/gigi/output/template/Template.java b/src/org/cacert/gigi/output/template/Template.java index 8c633c42..21c2b04e 100644 --- a/src/org/cacert/gigi/output/template/Template.java +++ b/src/org/cacert/gigi/output/template/Template.java @@ -1,6 +1,5 @@ package org.cacert.gigi.output.template; -import java.io.EOFException; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -10,6 +9,7 @@ import java.io.Reader; import java.net.URISyntaxException; import java.net.URL; import java.text.SimpleDateFormat; +import java.util.Arrays; import java.util.Collection; import java.util.Date; import java.util.LinkedList; @@ -20,6 +20,7 @@ import java.util.regex.Pattern; import org.cacert.gigi.localisation.Language; import org.cacert.gigi.output.DateSelector; import org.cacert.gigi.util.DayDate; +import org.cacert.gigi.util.EditDistance; import org.cacert.gigi.util.HTMLEncoder; /** @@ -66,6 +67,12 @@ public class Template implements Outputable { private static final Pattern ELSE_PATTERN = Pattern.compile(" ?\\} ?else ?\\{ ?"); + private static final String[] POSSIBLE_CONTROL_PATTERNS = new String[] { + "if", "else", "foreach" + }; + + private static final String UNKOWN_CONTROL_STRUCTURE_MSG = "Unknown control structure \"%s\", did you mean \"%s\"?"; + /** * Creates a new template by parsing the contents from the given URL. This * constructor will fail on syntax error. When the URL points to a file, @@ -111,14 +118,30 @@ public class Template implements Outputable { } protected ParseResult parseContent(Reader r) throws IOException { + ParseContext context = new ParseContext(r); + ParseResult result = parseContent(context); + if (context.parseException.isEmpty()) { + return result; + } + while (context.curChar != -1) { + parseContent(context); + } + throw context.parseException; + } + + protected ParseResult parseContent(ParseContext context) throws IOException { LinkedList splitted = new LinkedList(); LinkedList commands = new LinkedList(); StringBuffer buf = new StringBuffer(); String blockType = null; + ParseContext tContext = null; outer: while (true) { + if (tContext != null) { + context.merge(tContext); + } while ( !endsWith(buf, "")) { - int ch = r.read(); + int ch = context.read(); if (ch == -1) { - throw new EOFException(); + context.addError("Expected \"?>\""); + return null; } buf.append((char) ch); } @@ -144,18 +169,23 @@ public class Template implements Outputable { if (m.matches()) { String type = m.group(1); String variable = m.group(2); - ParseResult body = parseContent(r); + ParseContext bodyContext = tContext.copy(); + ParseResult body = parseContent(bodyContext); if (type.equals("if")) { if ("else".equals(body.getEndType())) { - commands.add(new IfStatement(variable, body.getBlock("else"), parseContent(r).getBlock("}"))); + ParseContext bodyContext2 = bodyContext.copy(); + commands.add(new IfStatement(variable, body.getBlock("else"), parseContent(bodyContext).getBlock("}"))); + bodyContext.merge(bodyContext2); } else { commands.add(new IfStatement(variable, body.getBlock("}"))); } } else if (type.equals("foreach")) { commands.add(new ForeachStatement(variable, body.getBlock("}"))); } else { - throw new IOException("Syntax error: unknown control structure: " + type); + String bestMatching = EditDistance.getBestMatchingStringByEditDistance(type, POSSIBLE_CONTROL_PATTERNS); + tContext.addError(String.format(UNKOWN_CONTROL_STRUCTURE_MSG, type, bestMatching)); } + tContext.merge(bodyContext); continue; } else if ((m = ELSE_PATTERN.matcher(com)).matches()) { blockType = "else"; @@ -164,18 +194,22 @@ public class Template implements Outputable { blockType = "}"; break; } else { - commands.add(parseCommand(com)); + commands.add(parseCommand(com, tContext)); } } + if (tContext != null) { + context.merge(tContext); + } splitted.add(buf.toString()); - return new ParseResult(new TemplateBlock(splitted.toArray(new String[splitted.size()]), commands.toArray(new Translatable[commands.size()])), blockType); + ParseResult result = new ParseResult(new TemplateBlock(splitted.toArray(new String[splitted.size()]), commands.toArray(new Translatable[commands.size()])), blockType); + return result; } private boolean endsWith(StringBuffer buf, String string) { return buf.length() >= string.length() && buf.substring(buf.length() - string.length(), buf.length()).equals(string); } - private Translatable parseCommand(String s2) { + private Translatable parseCommand(String s2, ParseContext context) { if (s2.startsWith("=_")) { final String raw = s2.substring(2); if ( !s2.contains("$") && !s2.contains("!'")) { @@ -187,7 +221,10 @@ public class Template implements Outputable { final String raw = s2.substring(2); return new OutputVariableCommand(raw); } else { - throw new Error("Unknown processing instruction: " + s2); + context.addError("Unknown processing instruction \"" + s2 + "\"," + " did you mean \"" + EditDistance.getBestMatchingStringByEditDistance(s2, new String[] { + "=_", "=$" + }) + "\"?"); + return null; } } @@ -218,7 +255,7 @@ public class Template implements Outputable { Object s = vars.get(varname); if (s == null) { - System.out.println("Empty variable: " + varname); + System.err.println("Empty variable: " + varname); } if (s instanceof Outputable) { ((Outputable) s).output(out, l, vars); @@ -240,4 +277,84 @@ public class Template implements Outputable { public void addTranslations(Collection s) { data.addTranslations(s); } + + private class ParseContext { + + public static final int CONTEXT_LENGTH = 20; + + private Reader reader; + + public final TemplateParseException parseException = new TemplateParseException(source); + + int line = 1; + + int column = 0; + + private int curChar = -1; + + private int[] charContext = new int[CONTEXT_LENGTH]; + + protected int contextPosition = 0; + + public ParseContext(Reader reader) { + this.reader = reader; + } + + public void addError(String message) { + addError(line, column, message); + } + + public void addError(int line, int column, String message) { + StringBuffer charContextBuffer = new StringBuffer(); + int j = contextPosition; + for (int i = 0; i < CONTEXT_LENGTH; i++) { + if (charContext[j] != 0) { + if (charContext[j] == '\n') { + charContextBuffer.append("\\n"); + } else { + charContextBuffer.appendCodePoint(charContext[j]); + } + } + j = (j + 1) % CONTEXT_LENGTH; + } + parseException.addError(line, column, message, charContextBuffer.toString()); + } + + public void merge(ParseContext other) { + line = other.line; + column = other.column; + parseException.append(other.parseException); + } + + public void append(ParseContext other) { + parseException.append(other.parseException); + } + + public int read() throws IOException { + int ch; + while ((ch = reader.read()) == '\r') { + } + curChar = ch; + if (ch == '\n') { + line++; + column = 0; + } else { + column++; + } + if (ch != -1) { + charContext[contextPosition] = ch; + contextPosition = (contextPosition + 1) % CONTEXT_LENGTH; + } + return ch; + } + + public ParseContext copy() { + ParseContext newParseContext = new ParseContext(reader); + newParseContext.line = line; + newParseContext.column = column; + newParseContext.charContext = Arrays.copyOf(charContext, charContext.length); + newParseContext.contextPosition = contextPosition; + return newParseContext; + } + } } diff --git a/src/org/cacert/gigi/output/template/TemplateParseException.java b/src/org/cacert/gigi/output/template/TemplateParseException.java new file mode 100644 index 00000000..e52f1146 --- /dev/null +++ b/src/org/cacert/gigi/output/template/TemplateParseException.java @@ -0,0 +1,89 @@ +package org.cacert.gigi.output.template; + +import java.io.IOException; +import java.util.Set; +import java.util.TreeSet; + +/** + * A exception that is thrown when a template contains syntactic errors. It + * allows the combining of several error messages to catch more than one error + * in a template. + */ +public class TemplateParseException extends IOException { + + private static final long serialVersionUID = 1L; + + private Object templateSource; + + private Set errors = new TreeSet<>(); + + public TemplateParseException(Object templateSource) { + this.templateSource = templateSource; + } + + public void addError(ErrorMessage error) { + errors.add(error); + } + + public void addError(int line, int column, String message, String erroneousLine) { + addError(new ErrorMessage(line, column, message, erroneousLine)); + } + + public void append(TemplateParseException other) { + errors.addAll(other.errors); + } + + @Override + public String toString() { + StringBuilder strb = new StringBuilder("Error in template \""); + strb.append(templateSource); + strb.append("\":"); + for (ErrorMessage errorMessage : errors) { + strb.append("\n\t"); + strb.append(errorMessage.toString()); + } + return strb.toString(); + } + + @Override + public String getMessage() { + return toString(); + } + + public boolean isEmpty() { + return errors.isEmpty(); + } + + public static class ErrorMessage implements Comparable { + + private final int line; + + private final int column; + + private final String message; + + private final String erroneousLine; + + public ErrorMessage(int line, int column, String message, String erroneousLine) { + this.line = line; + this.column = column; + this.message = message; + this.erroneousLine = erroneousLine; + } + + @Override + public String toString() { + return String.format("Around %d:%d (after …%s…) %s", line, column, erroneousLine, message); + } + + @Override + public int compareTo(ErrorMessage o) { + int l = Integer.compare(line, o.line); + if (l != 0) { + return l; + } + return Integer.compare(column, o.column); + } + } + +} diff --git a/src/org/cacert/gigi/util/EditDistance.java b/src/org/cacert/gigi/util/EditDistance.java new file mode 100644 index 00000000..0cadd213 --- /dev/null +++ b/src/org/cacert/gigi/util/EditDistance.java @@ -0,0 +1,79 @@ +package org.cacert.gigi.util; + +public class EditDistance { + + public static String getBestMatchingStringByEditDistance(String needle, Iterable possibleStrings) { + String best = ""; + int bestDistance = Integer.MAX_VALUE; + for (String possibleString : possibleStrings) { + int newDistance = calculateLevenshteinDistance(possibleString, needle); + if (newDistance < bestDistance) { + bestDistance = newDistance; + best = possibleString; + } + } + return best; + } + + public static String getBestMatchingStringByEditDistance(String needle, String[] possibleStrings) { + if (possibleStrings.length == 0) { + return ""; + } + String best = possibleStrings[0]; + int bestDistance = Integer.MAX_VALUE; + for (String possibleString : possibleStrings) { + int newDistance = calculateLevenshteinDistance(possibleString, needle); + if (newDistance < bestDistance) { + bestDistance = newDistance; + best = possibleString; + } + } + return best; + } + + /** + * Calculates the levenshtein edit distance between the passed strings. + * Adapted from https://en.wikipedia.org/wiki/Levenshtein_distance + */ + public static int calculateLevenshteinDistance(String s, String t) { + // degenerate cases + if (s == t || s.equals(t)) { + return 0; + } + if (s.length() == 0) { + return t.length(); + } + if (t.length() == 0) { + return s.length(); + } + + // create two work arrays of integer distances + int[] previousRow = new int[t.length() + 1]; + int[] currentRow = new int[t.length() + 1]; + + // initialize previousRow + // this row is A[0][i]: edit distance for an empty s + // the distance is just the number of characters to delete from t + for (int i = 0; i < previousRow.length; i++) { + previousRow[i] = i; + } + + for (int i = 0; i < s.length(); i++) { + // calculate current row from the previous row + + // first element of currentRow is A[i+1][0] + // edit distance is delete (i+1) chars from s to match empty t + currentRow[0] = i + 1; + + // use formula to fill in the rest of the row + for (int j = 0; j < t.length(); j++) { + int cost = s.charAt(i) == t.charAt(j) ? 0 : 1; + currentRow[j + 1] = Math.min(Math.min(currentRow[j] + 1, previousRow[j + 1] + 1), previousRow[j] + cost); + } + + System.arraycopy(currentRow, 0, previousRow, 0, currentRow.length); + } + + return currentRow[t.length()]; + } +} diff --git a/tests/org/cacert/gigi/template/TestTemplate.java b/tests/org/cacert/gigi/template/TestTemplate.java index fe7e25b8..82aad931 100644 --- a/tests/org/cacert/gigi/template/TestTemplate.java +++ b/tests/org/cacert/gigi/template/TestTemplate.java @@ -142,8 +142,8 @@ public class TestTemplate { public void testIgnoredNewline() { assertEquals("\\ab\\\\n\n\\c", testExecute(Language.getInstance(Locale.ENGLISH), vars, "\\a\\\nb\\\\n\n\\\\\nc")); assertEquals("a\\b\\c", testExecute(Language.getInstance(Locale.ENGLISH), vars, "a\\b\\\n\\c")); - // \r's are currently not valid. - assertEquals("a\\\r\nb", testExecute(Language.getInstance(Locale.ENGLISH), vars, "a\\\r\nb")); + // \r's are now valid. + assertEquals("ab", testExecute(Language.getInstance(Locale.ENGLISH), vars, "a\\\r\nb")); } } diff --git a/util-testing/org/cacert/gigi/localisation/conf.txt b/util-testing/org/cacert/gigi/localisation/conf.txt index f6a1c9db..21b24525 100644 --- a/util-testing/org/cacert/gigi/localisation/conf.txt +++ b/util-testing/org/cacert/gigi/localisation/conf.txt @@ -12,7 +12,7 @@ org.cacert.gigi.pages Page.Page(String),0 org.cacert.gigi.pages OneFormPage.OneFormPage(String, Class),0 org.cacert.gigi.pages StaticPage.StaticPage(String, InputStream),0 org.cacert.gigi.output.template SprintfCommand.SprintfCommand(String, List),0=>org.cacert.gigi.output.template SprintfCommand.output(PrintWriter, Language, Map),0 -org.cacert.gigi.output.template SprintfCom---invalid---mand.SprintfCommand(String),0=>org.cacert.gigi.output.template Template.parseCommand(String),0 +org.cacert.gigi.output.template SprintfCom---invalid---mand.SprintfCommand(String),0=>org.cacert.gigi.output.template Template.parseCommand(String, ParseContext),0 org.cacert.gigi.output.template TranslateCommand.TranslateCommand(String),0=>org.cacert.gigi.output.template TranslateCommand.output(PrintWriter, Language, Map),0 org.cacert.gigi.pages.account.domain DomainOverview.DomainOverview(String),0 org.cacert.gigi.dbObjects Group.Group(String, String, boolean, boolean, boolean),1 -- 2.39.2