--- /dev/null
+package org.cacert.gigi.util;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.IDN;
+import java.net.URL;
+import java.util.HashSet;
+
+public class PublicSuffixes {
+
+ HashSet<String> suffixes = new HashSet<>();
+
+ HashSet<String> wildcards = new HashSet<>();
+
+ HashSet<String> exceptions = new HashSet<>();
+
+ private static final String url = "https://publicsuffix.org/list/effective_tld_names.dat";
+
+ private static PublicSuffixes instance;
+
+ private static void generateDefault() throws IOException {
+ URL u = new URL(url);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ BufferedReader br = new BufferedReader(new InputStreamReader(huc.getInputStream(), "UTF-8"));
+ instance = new PublicSuffixes(br);
+ }
+
+ public static PublicSuffixes getInstance() {
+ if (instance == null) {
+ try {
+ generateDefault();
+ } catch (IOException e) {
+ throw new Error(e);
+ }
+ }
+ return instance;
+ }
+
+ private PublicSuffixes(BufferedReader br) throws IOException {
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("//")) {
+ continue;
+ }
+ if (line.isEmpty()) {
+ continue;
+ }
+ line = line.split("\\s", 2)[0];
+ if (line.startsWith("*.")) {
+ String data = line.substring(2);
+ if (data.contains("*") || data.contains("!")) {
+ System.out.println("Error! unparsable public suffix line: " + line);
+ continue;
+ }
+ addWildcard(IDN.toASCII(data));
+ } else if (line.startsWith("!")) {
+ String data = line.substring(1);
+ if (data.contains("*") || data.contains("!")) {
+ System.out.println("Error! unparsable public suffix line: " + line);
+ continue;
+ }
+ addException(IDN.toASCII(data));
+ } else {
+ if (line.contains("*") || line.contains("!")) {
+ System.out.println("Error! unparsable public suffix line: " + line);
+ continue;
+ }
+ addSuffix(IDN.toASCII(line));
+ }
+ }
+ }
+
+ private void addWildcard(String data) {
+ wildcards.add(data);
+ }
+
+ private void addException(String data) {
+ exceptions.add(data);
+ }
+
+ private void addSuffix(String line) {
+ suffixes.add(line);
+ }
+
+ public String getRegistrablePart(String domain) {
+ if (domain == null) {
+ return null;
+ }
+ if (domain.startsWith(".")) {
+ return null;
+ }
+ if (isSuffix(domain) && !exceptions.contains(domain)) {
+ return null;
+ }
+ return getPublicSuffix0(domain);
+ }
+
+ private String getPublicSuffix0(String domain) {
+
+ int d = domain.indexOf('.');
+ if (d == -1) {
+ return null;
+ }
+ if (exceptions.contains(domain)) {
+ return domain;
+ }
+ String nextDomain = domain.substring(d + 1);
+ if (isSuffix(nextDomain)) {
+ return domain;
+ }
+
+ return getPublicSuffix0(nextDomain);
+ }
+
+ private boolean isSuffix(String domain) {
+ if (suffixes.contains(domain)) {
+ return true;
+ }
+ if (exceptions.contains(domain)) {
+ return false;
+ }
+ int idx = domain.indexOf('.');
+ if (idx != -1 && wildcards.contains(domain.substring(idx + 1))) {
+ return true;
+ }
+ return false;
+ }
+}
--- /dev/null
+package org.cacert.gigi.util;
+
+import static org.junit.Assert.*;
+
+import java.net.IDN;
+
+import org.junit.Test;
+
+public class TestPublicSuffixes {
+
+ /**
+ * Taken from
+ * http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit
+ * /data/test_psl.txt?raw=1
+ */
+ @Test
+ public void testMozilla() {
+ // Any copyright is dedicated to the Public Domain.
+ // http://creativecommons.org/publicdomain/zero/1.0/
+
+ // null input.
+ checkPublicSuffix(null, null);
+ // Mixed case.
+ checkPublicSuffix("COM", null);
+ checkPublicSuffix("example.COM", "example.com");
+ checkPublicSuffix("WwW.example.COM", "example.com");
+ // Leading dot.
+ checkPublicSuffix(".com", null);
+ checkPublicSuffix(".example", null);
+ checkPublicSuffix(".example.com", null);
+ checkPublicSuffix(".example.example", null);
+ // Unlisted TLD.
+ /*
+ * checkPublicSuffix("example", null);
+ * checkPublicSuffix("example.example", "example.example");
+ * checkPublicSuffix("b.example.example", "example.example");
+ * checkPublicSuffix("a.b.example.example", "example.example");
+ */
+ // Listed, but non-Internet, TLD.
+ // checkPublicSuffix("local", null);
+ // checkPublicSuffix("example.local", null);
+ // checkPublicSuffix("b.example.local", null);
+ // checkPublicSuffix("a.b.example.local", null);
+ // TLD with only 1 rule.
+ checkPublicSuffix("biz", null);
+ checkPublicSuffix("domain.biz", "domain.biz");
+ checkPublicSuffix("b.domain.biz", "domain.biz");
+ checkPublicSuffix("a.b.domain.biz", "domain.biz");
+ // TLD with some 2-level rules.
+ checkPublicSuffix("com", null);
+ checkPublicSuffix("example.com", "example.com");
+ checkPublicSuffix("b.example.com", "example.com");
+ checkPublicSuffix("a.b.example.com", "example.com");
+ checkPublicSuffix("uk.com", null);
+ checkPublicSuffix("example.uk.com", "example.uk.com");
+ checkPublicSuffix("b.example.uk.com", "example.uk.com");
+ checkPublicSuffix("a.b.example.uk.com", "example.uk.com");
+ checkPublicSuffix("test.ac", "test.ac");
+ // TLD with only 1 (wildcard) rule.
+ checkPublicSuffix("cy", null);
+ checkPublicSuffix("c.cy", null);
+ checkPublicSuffix("b.c.cy", "b.c.cy");
+ checkPublicSuffix("a.b.c.cy", "b.c.cy");
+ // More complex TLD.
+ checkPublicSuffix("jp", null);
+ checkPublicSuffix("test.jp", "test.jp");
+ checkPublicSuffix("www.test.jp", "test.jp");
+ checkPublicSuffix("ac.jp", null);
+ checkPublicSuffix("test.ac.jp", "test.ac.jp");
+ checkPublicSuffix("www.test.ac.jp", "test.ac.jp");
+ checkPublicSuffix("kyoto.jp", null);
+ checkPublicSuffix("test.kyoto.jp", "test.kyoto.jp");
+ checkPublicSuffix("ide.kyoto.jp", null);
+ checkPublicSuffix("b.ide.kyoto.jp", "b.ide.kyoto.jp");
+ checkPublicSuffix("a.b.ide.kyoto.jp", "b.ide.kyoto.jp");
+ checkPublicSuffix("c.kobe.jp", null);
+ checkPublicSuffix("b.c.kobe.jp", "b.c.kobe.jp");
+ checkPublicSuffix("a.b.c.kobe.jp", "b.c.kobe.jp");
+ checkPublicSuffix("city.kobe.jp", "city.kobe.jp");
+ checkPublicSuffix("www.city.kobe.jp", "city.kobe.jp");
+ // TLD with a wildcard rule and exceptions.
+ checkPublicSuffix("ck", null);
+ checkPublicSuffix("test.ck", null);
+ checkPublicSuffix("b.test.ck", "b.test.ck");
+ checkPublicSuffix("a.b.test.ck", "b.test.ck");
+ checkPublicSuffix("www.ck", "www.ck");
+ checkPublicSuffix("www.www.ck", "www.ck");
+ // US K12.
+ checkPublicSuffix("us", null);
+ checkPublicSuffix("test.us", "test.us");
+ checkPublicSuffix("www.test.us", "test.us");
+ checkPublicSuffix("ak.us", null);
+ checkPublicSuffix("test.ak.us", "test.ak.us");
+ checkPublicSuffix("www.test.ak.us", "test.ak.us");
+ checkPublicSuffix("k12.ak.us", null);
+ checkPublicSuffix("test.k12.ak.us", "test.k12.ak.us");
+ checkPublicSuffix("www.test.k12.ak.us", "test.k12.ak.us");
+ }
+
+ @Test
+ public void testMozillaIDN() {
+ // IDN labels.
+ checkPublicSuffix("食狮.com.cn", "食狮.com.cn");
+ checkPublicSuffix("食狮.公司.cn", "食狮.公司.cn");
+ checkPublicSuffix("www.食狮.公司.cn", "食狮.公司.cn");
+ checkPublicSuffix("shishi.公司.cn", "shishi.公司.cn");
+ checkPublicSuffix("公司.cn", null);
+ checkPublicSuffix("食狮.中国", "食狮.中国");
+ checkPublicSuffix("www.食狮.中国", "食狮.中国");
+ checkPublicSuffix("shishi.中国", "shishi.中国");
+ checkPublicSuffix("中国", null);
+ }
+
+ @Test
+ public void testMozillaIDNPuny() {
+ // Same as above, but punycoded.
+ checkPublicSuffix("xn--85x722f.com.cn", "xn--85x722f.com.cn");
+ checkPublicSuffix("xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn");
+ checkPublicSuffix("www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn");
+ checkPublicSuffix("shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn");
+ checkPublicSuffix("xn--55qx5d.cn", null);
+ checkPublicSuffix("xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s");
+ checkPublicSuffix("www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s");
+ checkPublicSuffix("shishi.xn--fiqs8s", "shishi.xn--fiqs8s");
+ checkPublicSuffix("xn--fiqs8s", null);
+
+ }
+
+ private void checkPublicSuffix(String domain, String suffix) {
+ if (domain != null) {
+ domain = domain.toLowerCase();
+ }
+ String publicSuffix = PublicSuffixes.getInstance().getRegistrablePart(domain == null ? null : IDN.toASCII(domain));
+ assertEquals(suffix == null ? null : IDN.toASCII(suffix), publicSuffix);
+ }
+}