From 38482484489732331e9e0860d80c2c0f55f072ef Mon Sep 17 00:00:00 2001 From: =?utf8?q?Felix=20D=C3=B6rre?= Date: Sat, 8 Nov 2014 23:21:44 +0100 Subject: [PATCH] Implement use of Public Suffix List. --- src/org/cacert/gigi/util/PublicSuffixes.java | 130 +++++++++++++++++ .../cacert/gigi/util/TestPublicSuffixes.java | 136 ++++++++++++++++++ 2 files changed, 266 insertions(+) create mode 100644 src/org/cacert/gigi/util/PublicSuffixes.java create mode 100644 tests/org/cacert/gigi/util/TestPublicSuffixes.java diff --git a/src/org/cacert/gigi/util/PublicSuffixes.java b/src/org/cacert/gigi/util/PublicSuffixes.java new file mode 100644 index 00000000..bb0d027a --- /dev/null +++ b/src/org/cacert/gigi/util/PublicSuffixes.java @@ -0,0 +1,130 @@ +package org.cacert.gigi.util; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.IDN; +import java.net.URL; +import java.util.HashSet; + +public class PublicSuffixes { + + HashSet suffixes = new HashSet<>(); + + HashSet wildcards = new HashSet<>(); + + HashSet exceptions = new HashSet<>(); + + private static final String url = "https://publicsuffix.org/list/effective_tld_names.dat"; + + private static PublicSuffixes instance; + + private static void generateDefault() throws IOException { + URL u = new URL(url); + HttpURLConnection huc = (HttpURLConnection) u.openConnection(); + BufferedReader br = new BufferedReader(new InputStreamReader(huc.getInputStream(), "UTF-8")); + instance = new PublicSuffixes(br); + } + + public static PublicSuffixes getInstance() { + if (instance == null) { + try { + generateDefault(); + } catch (IOException e) { + throw new Error(e); + } + } + return instance; + } + + private PublicSuffixes(BufferedReader br) throws IOException { + String line; + while ((line = br.readLine()) != null) { + if (line.startsWith("//")) { + continue; + } + if (line.isEmpty()) { + continue; + } + line = line.split("\\s", 2)[0]; + if (line.startsWith("*.")) { + String data = line.substring(2); + if (data.contains("*") || data.contains("!")) { + System.out.println("Error! unparsable public suffix line: " + line); + continue; + } + addWildcard(IDN.toASCII(data)); + } else if (line.startsWith("!")) { + String data = line.substring(1); + if (data.contains("*") || data.contains("!")) { + System.out.println("Error! unparsable public suffix line: " + line); + continue; + } + addException(IDN.toASCII(data)); + } else { + if (line.contains("*") || line.contains("!")) { + System.out.println("Error! unparsable public suffix line: " + line); + continue; + } + addSuffix(IDN.toASCII(line)); + } + } + } + + private void addWildcard(String data) { + wildcards.add(data); + } + + private void addException(String data) { + exceptions.add(data); + } + + private void addSuffix(String line) { + suffixes.add(line); + } + + public String getRegistrablePart(String domain) { + if (domain == null) { + return null; + } + if (domain.startsWith(".")) { + return null; + } + if (isSuffix(domain) && !exceptions.contains(domain)) { + return null; + } + return getPublicSuffix0(domain); + } + + private String getPublicSuffix0(String domain) { + + int d = domain.indexOf('.'); + if (d == -1) { + return null; + } + if (exceptions.contains(domain)) { + return domain; + } + String nextDomain = domain.substring(d + 1); + if (isSuffix(nextDomain)) { + return domain; + } + + return getPublicSuffix0(nextDomain); + } + + private boolean isSuffix(String domain) { + if (suffixes.contains(domain)) { + return true; + } + if (exceptions.contains(domain)) { + return false; + } + int idx = domain.indexOf('.'); + if (idx != -1 && wildcards.contains(domain.substring(idx + 1))) { + return true; + } + return false; + } +} diff --git a/tests/org/cacert/gigi/util/TestPublicSuffixes.java b/tests/org/cacert/gigi/util/TestPublicSuffixes.java new file mode 100644 index 00000000..9f893954 --- /dev/null +++ b/tests/org/cacert/gigi/util/TestPublicSuffixes.java @@ -0,0 +1,136 @@ +package org.cacert.gigi.util; + +import static org.junit.Assert.*; + +import java.net.IDN; + +import org.junit.Test; + +public class TestPublicSuffixes { + + /** + * Taken from + * http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit + * /data/test_psl.txt?raw=1 + */ + @Test + public void testMozilla() { + // Any copyright is dedicated to the Public Domain. + // http://creativecommons.org/publicdomain/zero/1.0/ + + // null input. + checkPublicSuffix(null, null); + // Mixed case. + checkPublicSuffix("COM", null); + checkPublicSuffix("example.COM", "example.com"); + checkPublicSuffix("WwW.example.COM", "example.com"); + // Leading dot. + checkPublicSuffix(".com", null); + checkPublicSuffix(".example", null); + checkPublicSuffix(".example.com", null); + checkPublicSuffix(".example.example", null); + // Unlisted TLD. + /* + * checkPublicSuffix("example", null); + * checkPublicSuffix("example.example", "example.example"); + * checkPublicSuffix("b.example.example", "example.example"); + * checkPublicSuffix("a.b.example.example", "example.example"); + */ + // Listed, but non-Internet, TLD. + // checkPublicSuffix("local", null); + // checkPublicSuffix("example.local", null); + // checkPublicSuffix("b.example.local", null); + // checkPublicSuffix("a.b.example.local", null); + // TLD with only 1 rule. + checkPublicSuffix("biz", null); + checkPublicSuffix("domain.biz", "domain.biz"); + checkPublicSuffix("b.domain.biz", "domain.biz"); + checkPublicSuffix("a.b.domain.biz", "domain.biz"); + // TLD with some 2-level rules. + checkPublicSuffix("com", null); + checkPublicSuffix("example.com", "example.com"); + checkPublicSuffix("b.example.com", "example.com"); + checkPublicSuffix("a.b.example.com", "example.com"); + checkPublicSuffix("uk.com", null); + checkPublicSuffix("example.uk.com", "example.uk.com"); + checkPublicSuffix("b.example.uk.com", "example.uk.com"); + checkPublicSuffix("a.b.example.uk.com", "example.uk.com"); + checkPublicSuffix("test.ac", "test.ac"); + // TLD with only 1 (wildcard) rule. + checkPublicSuffix("cy", null); + checkPublicSuffix("c.cy", null); + checkPublicSuffix("b.c.cy", "b.c.cy"); + checkPublicSuffix("a.b.c.cy", "b.c.cy"); + // More complex TLD. + checkPublicSuffix("jp", null); + checkPublicSuffix("test.jp", "test.jp"); + checkPublicSuffix("www.test.jp", "test.jp"); + checkPublicSuffix("ac.jp", null); + checkPublicSuffix("test.ac.jp", "test.ac.jp"); + checkPublicSuffix("www.test.ac.jp", "test.ac.jp"); + checkPublicSuffix("kyoto.jp", null); + checkPublicSuffix("test.kyoto.jp", "test.kyoto.jp"); + checkPublicSuffix("ide.kyoto.jp", null); + checkPublicSuffix("b.ide.kyoto.jp", "b.ide.kyoto.jp"); + checkPublicSuffix("a.b.ide.kyoto.jp", "b.ide.kyoto.jp"); + checkPublicSuffix("c.kobe.jp", null); + checkPublicSuffix("b.c.kobe.jp", "b.c.kobe.jp"); + checkPublicSuffix("a.b.c.kobe.jp", "b.c.kobe.jp"); + checkPublicSuffix("city.kobe.jp", "city.kobe.jp"); + checkPublicSuffix("www.city.kobe.jp", "city.kobe.jp"); + // TLD with a wildcard rule and exceptions. + checkPublicSuffix("ck", null); + checkPublicSuffix("test.ck", null); + checkPublicSuffix("b.test.ck", "b.test.ck"); + checkPublicSuffix("a.b.test.ck", "b.test.ck"); + checkPublicSuffix("www.ck", "www.ck"); + checkPublicSuffix("www.www.ck", "www.ck"); + // US K12. + checkPublicSuffix("us", null); + checkPublicSuffix("test.us", "test.us"); + checkPublicSuffix("www.test.us", "test.us"); + checkPublicSuffix("ak.us", null); + checkPublicSuffix("test.ak.us", "test.ak.us"); + checkPublicSuffix("www.test.ak.us", "test.ak.us"); + checkPublicSuffix("k12.ak.us", null); + checkPublicSuffix("test.k12.ak.us", "test.k12.ak.us"); + checkPublicSuffix("www.test.k12.ak.us", "test.k12.ak.us"); + } + + @Test + public void testMozillaIDN() { + // IDN labels. + checkPublicSuffix("食狮.com.cn", "食狮.com.cn"); + checkPublicSuffix("食狮.公司.cn", "食狮.公司.cn"); + checkPublicSuffix("www.食狮.公司.cn", "食狮.公司.cn"); + checkPublicSuffix("shishi.公司.cn", "shishi.公司.cn"); + checkPublicSuffix("公司.cn", null); + checkPublicSuffix("食狮.中国", "食狮.中国"); + checkPublicSuffix("www.食狮.中国", "食狮.中国"); + checkPublicSuffix("shishi.中国", "shishi.中国"); + checkPublicSuffix("中国", null); + } + + @Test + public void testMozillaIDNPuny() { + // Same as above, but punycoded. + checkPublicSuffix("xn--85x722f.com.cn", "xn--85x722f.com.cn"); + checkPublicSuffix("xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"); + checkPublicSuffix("www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"); + checkPublicSuffix("shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn"); + checkPublicSuffix("xn--55qx5d.cn", null); + checkPublicSuffix("xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"); + checkPublicSuffix("www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"); + checkPublicSuffix("shishi.xn--fiqs8s", "shishi.xn--fiqs8s"); + checkPublicSuffix("xn--fiqs8s", null); + + } + + private void checkPublicSuffix(String domain, String suffix) { + if (domain != null) { + domain = domain.toLowerCase(); + } + String publicSuffix = PublicSuffixes.getInstance().getRegistrablePart(domain == null ? null : IDN.toASCII(domain)); + assertEquals(suffix == null ? null : IDN.toASCII(suffix), publicSuffix); + } +} -- 2.39.2