]> WPIA git - gigi.git/commitdiff
Implement use of Public Suffix List.
authorFelix Dörre <felix@dogcraft.de>
Sat, 8 Nov 2014 22:21:44 +0000 (23:21 +0100)
committerJanis Streib <janis@dogcraft.de>
Wed, 31 Dec 2014 01:36:05 +0000 (02:36 +0100)
src/org/cacert/gigi/util/PublicSuffixes.java [new file with mode: 0644]
tests/org/cacert/gigi/util/TestPublicSuffixes.java [new file with mode: 0644]

diff --git a/src/org/cacert/gigi/util/PublicSuffixes.java b/src/org/cacert/gigi/util/PublicSuffixes.java
new file mode 100644 (file)
index 0000000..bb0d027
--- /dev/null
@@ -0,0 +1,130 @@
+package org.cacert.gigi.util;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.IDN;
+import java.net.URL;
+import java.util.HashSet;
+
+public class PublicSuffixes {
+
+    HashSet<String> suffixes = new HashSet<>();
+
+    HashSet<String> wildcards = new HashSet<>();
+
+    HashSet<String> exceptions = new HashSet<>();
+
+    private static final String url = "https://publicsuffix.org/list/effective_tld_names.dat";
+
+    private static PublicSuffixes instance;
+
+    private static void generateDefault() throws IOException {
+        URL u = new URL(url);
+        HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+        BufferedReader br = new BufferedReader(new InputStreamReader(huc.getInputStream(), "UTF-8"));
+        instance = new PublicSuffixes(br);
+    }
+
+    public static PublicSuffixes getInstance() {
+        if (instance == null) {
+            try {
+                generateDefault();
+            } catch (IOException e) {
+                throw new Error(e);
+            }
+        }
+        return instance;
+    }
+
+    private PublicSuffixes(BufferedReader br) throws IOException {
+        String line;
+        while ((line = br.readLine()) != null) {
+            if (line.startsWith("//")) {
+                continue;
+            }
+            if (line.isEmpty()) {
+                continue;
+            }
+            line = line.split("\\s", 2)[0];
+            if (line.startsWith("*.")) {
+                String data = line.substring(2);
+                if (data.contains("*") || data.contains("!")) {
+                    System.out.println("Error! unparsable public suffix line: " + line);
+                    continue;
+                }
+                addWildcard(IDN.toASCII(data));
+            } else if (line.startsWith("!")) {
+                String data = line.substring(1);
+                if (data.contains("*") || data.contains("!")) {
+                    System.out.println("Error! unparsable public suffix line: " + line);
+                    continue;
+                }
+                addException(IDN.toASCII(data));
+            } else {
+                if (line.contains("*") || line.contains("!")) {
+                    System.out.println("Error! unparsable public suffix line: " + line);
+                    continue;
+                }
+                addSuffix(IDN.toASCII(line));
+            }
+        }
+    }
+
+    private void addWildcard(String data) {
+        wildcards.add(data);
+    }
+
+    private void addException(String data) {
+        exceptions.add(data);
+    }
+
+    private void addSuffix(String line) {
+        suffixes.add(line);
+    }
+
+    public String getRegistrablePart(String domain) {
+        if (domain == null) {
+            return null;
+        }
+        if (domain.startsWith(".")) {
+            return null;
+        }
+        if (isSuffix(domain) && !exceptions.contains(domain)) {
+            return null;
+        }
+        return getPublicSuffix0(domain);
+    }
+
+    private String getPublicSuffix0(String domain) {
+
+        int d = domain.indexOf('.');
+        if (d == -1) {
+            return null;
+        }
+        if (exceptions.contains(domain)) {
+            return domain;
+        }
+        String nextDomain = domain.substring(d + 1);
+        if (isSuffix(nextDomain)) {
+            return domain;
+        }
+
+        return getPublicSuffix0(nextDomain);
+    }
+
+    private boolean isSuffix(String domain) {
+        if (suffixes.contains(domain)) {
+            return true;
+        }
+        if (exceptions.contains(domain)) {
+            return false;
+        }
+        int idx = domain.indexOf('.');
+        if (idx != -1 && wildcards.contains(domain.substring(idx + 1))) {
+            return true;
+        }
+        return false;
+    }
+}
diff --git a/tests/org/cacert/gigi/util/TestPublicSuffixes.java b/tests/org/cacert/gigi/util/TestPublicSuffixes.java
new file mode 100644 (file)
index 0000000..9f89395
--- /dev/null
@@ -0,0 +1,136 @@
+package org.cacert.gigi.util;
+
+import static org.junit.Assert.*;
+
+import java.net.IDN;
+
+import org.junit.Test;
+
+public class TestPublicSuffixes {
+
+    /**
+     * Taken from
+     * http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit
+     * /data/test_psl.txt?raw=1
+     */
+    @Test
+    public void testMozilla() {
+        // Any copyright is dedicated to the Public Domain.
+        // http://creativecommons.org/publicdomain/zero/1.0/
+
+        // null input.
+        checkPublicSuffix(null, null);
+        // Mixed case.
+        checkPublicSuffix("COM", null);
+        checkPublicSuffix("example.COM", "example.com");
+        checkPublicSuffix("WwW.example.COM", "example.com");
+        // Leading dot.
+        checkPublicSuffix(".com", null);
+        checkPublicSuffix(".example", null);
+        checkPublicSuffix(".example.com", null);
+        checkPublicSuffix(".example.example", null);
+        // Unlisted TLD.
+        /*
+         * checkPublicSuffix("example", null);
+         * checkPublicSuffix("example.example", "example.example");
+         * checkPublicSuffix("b.example.example", "example.example");
+         * checkPublicSuffix("a.b.example.example", "example.example");
+         */
+        // Listed, but non-Internet, TLD.
+        // checkPublicSuffix("local", null);
+        // checkPublicSuffix("example.local", null);
+        // checkPublicSuffix("b.example.local", null);
+        // checkPublicSuffix("a.b.example.local", null);
+        // TLD with only 1 rule.
+        checkPublicSuffix("biz", null);
+        checkPublicSuffix("domain.biz", "domain.biz");
+        checkPublicSuffix("b.domain.biz", "domain.biz");
+        checkPublicSuffix("a.b.domain.biz", "domain.biz");
+        // TLD with some 2-level rules.
+        checkPublicSuffix("com", null);
+        checkPublicSuffix("example.com", "example.com");
+        checkPublicSuffix("b.example.com", "example.com");
+        checkPublicSuffix("a.b.example.com", "example.com");
+        checkPublicSuffix("uk.com", null);
+        checkPublicSuffix("example.uk.com", "example.uk.com");
+        checkPublicSuffix("b.example.uk.com", "example.uk.com");
+        checkPublicSuffix("a.b.example.uk.com", "example.uk.com");
+        checkPublicSuffix("test.ac", "test.ac");
+        // TLD with only 1 (wildcard) rule.
+        checkPublicSuffix("cy", null);
+        checkPublicSuffix("c.cy", null);
+        checkPublicSuffix("b.c.cy", "b.c.cy");
+        checkPublicSuffix("a.b.c.cy", "b.c.cy");
+        // More complex TLD.
+        checkPublicSuffix("jp", null);
+        checkPublicSuffix("test.jp", "test.jp");
+        checkPublicSuffix("www.test.jp", "test.jp");
+        checkPublicSuffix("ac.jp", null);
+        checkPublicSuffix("test.ac.jp", "test.ac.jp");
+        checkPublicSuffix("www.test.ac.jp", "test.ac.jp");
+        checkPublicSuffix("kyoto.jp", null);
+        checkPublicSuffix("test.kyoto.jp", "test.kyoto.jp");
+        checkPublicSuffix("ide.kyoto.jp", null);
+        checkPublicSuffix("b.ide.kyoto.jp", "b.ide.kyoto.jp");
+        checkPublicSuffix("a.b.ide.kyoto.jp", "b.ide.kyoto.jp");
+        checkPublicSuffix("c.kobe.jp", null);
+        checkPublicSuffix("b.c.kobe.jp", "b.c.kobe.jp");
+        checkPublicSuffix("a.b.c.kobe.jp", "b.c.kobe.jp");
+        checkPublicSuffix("city.kobe.jp", "city.kobe.jp");
+        checkPublicSuffix("www.city.kobe.jp", "city.kobe.jp");
+        // TLD with a wildcard rule and exceptions.
+        checkPublicSuffix("ck", null);
+        checkPublicSuffix("test.ck", null);
+        checkPublicSuffix("b.test.ck", "b.test.ck");
+        checkPublicSuffix("a.b.test.ck", "b.test.ck");
+        checkPublicSuffix("www.ck", "www.ck");
+        checkPublicSuffix("www.www.ck", "www.ck");
+        // US K12.
+        checkPublicSuffix("us", null);
+        checkPublicSuffix("test.us", "test.us");
+        checkPublicSuffix("www.test.us", "test.us");
+        checkPublicSuffix("ak.us", null);
+        checkPublicSuffix("test.ak.us", "test.ak.us");
+        checkPublicSuffix("www.test.ak.us", "test.ak.us");
+        checkPublicSuffix("k12.ak.us", null);
+        checkPublicSuffix("test.k12.ak.us", "test.k12.ak.us");
+        checkPublicSuffix("www.test.k12.ak.us", "test.k12.ak.us");
+    }
+
+    @Test
+    public void testMozillaIDN() {
+        // IDN labels.
+        checkPublicSuffix("食狮.com.cn", "食狮.com.cn");
+        checkPublicSuffix("食狮.公司.cn", "食狮.公司.cn");
+        checkPublicSuffix("www.食狮.公司.cn", "食狮.公司.cn");
+        checkPublicSuffix("shishi.公司.cn", "shishi.公司.cn");
+        checkPublicSuffix("公司.cn", null);
+        checkPublicSuffix("食狮.中国", "食狮.中国");
+        checkPublicSuffix("www.食狮.中国", "食狮.中国");
+        checkPublicSuffix("shishi.中国", "shishi.中国");
+        checkPublicSuffix("中国", null);
+    }
+
+    @Test
+    public void testMozillaIDNPuny() {
+        // Same as above, but punycoded.
+        checkPublicSuffix("xn--85x722f.com.cn", "xn--85x722f.com.cn");
+        checkPublicSuffix("xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn");
+        checkPublicSuffix("www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn");
+        checkPublicSuffix("shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn");
+        checkPublicSuffix("xn--55qx5d.cn", null);
+        checkPublicSuffix("xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s");
+        checkPublicSuffix("www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s");
+        checkPublicSuffix("shishi.xn--fiqs8s", "shishi.xn--fiqs8s");
+        checkPublicSuffix("xn--fiqs8s", null);
+
+    }
+
+    private void checkPublicSuffix(String domain, String suffix) {
+        if (domain != null) {
+            domain = domain.toLowerCase();
+        }
+        String publicSuffix = PublicSuffixes.getInstance().getRegistrablePart(domain == null ? null : IDN.toASCII(domain));
+        assertEquals(suffix == null ? null : IDN.toASCII(suffix), publicSuffix);
+    }
+}