1 /* 2 * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.publicsuffixlist; 27 28 import java.io.BufferedReader; 29 import java.io.BufferedWriter; 30 import java.io.FileInputStream; 31 import java.io.FileOutputStream; 32 import java.io.InputStreamReader; 33 import java.io.IOException; 34 import java.io.OutputStreamWriter; 35 import java.nio.file.attribute.FileTime; 36 import java.util.HashMap; 37 import java.util.LinkedList; 38 import java.util.List; 39 import java.util.Map; 40 import java.util.Set; 41 import java.util.regex.Pattern; 42 import java.util.stream.Collectors; 43 import java.util.zip.ZipEntry; 44 import java.util.zip.ZipOutputStream; 45 46 /** 47 * This tool takes the original Mozilla public suffix rule list as input 48 * and slices it into a set of files, one for each top-level domain. 49 * Each file contains only the rules for that domain. Lines containing comments 50 * or only whitespace are not copied. Each of these files are then combined 51 * into the target zipfile. 52 * 53 * Usage: java GeneratePublicSuffixList mozilla_file destination_zipfile 54 */ 55 public final class GeneratePublicSuffixList { 56 // patterns 57 private static final String COMMENT = "//"; 58 private static final String BEGIN_PRIVATE = "// ===BEGIN PRIVATE DOMAINS==="; 59 private static final Pattern WHITESPACE = Pattern.compile("\\s*"); 60 private static final byte ICANN = 0x00; 61 private static final byte PRIVATE = 0x01; 62 63 private static class Domain { 64 final String name; 65 final byte type; 66 Domain(String name, byte type) { 67 this.name = name; 68 this.type = type; 69 } 70 } 71 72 public static void main(String[] args) throws Exception { 73 if (args.length != 2) { 74 throw new Exception("2 args required: input_file output_file"); 75 } 76 try (FileInputStream fis = new FileInputStream(args[0]); 77 ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(args[1]))) 78 { 79 BufferedReader br = 80 new BufferedReader(new InputStreamReader(fis, "UTF-8")); 81 82 List<Domain> domains = new LinkedList<>(); 83 byte type = ICANN; 84 String line; 85 while ((line = br.readLine()) != null) { 86 if (line.startsWith(COMMENT)) { 87 if (line.startsWith(BEGIN_PRIVATE)) { 88 type = PRIVATE; 89 } 90 continue; 91 } 92 if (WHITESPACE.matcher(line).matches()) { 93 continue; 94 } 95 domains.add(new Domain(line, type)); 96 } 97 // have a list of rules now 98 99 // Map of TLD names to rules with the same TLD 100 Map<String, List<Domain>> rules = addDomains(domains); 101 102 // stream for writing the file contents 103 BufferedWriter bw = 104 new BufferedWriter(new OutputStreamWriter(zos, "UTF-8")); 105 106 // now output each map entry to its own file, 107 // whose filename is the TLD 108 writeRules(zos, bw, rules); 109 } 110 } 111 112 private static Map<String, List<Domain>> addDomains(List<Domain> domains) { 113 Map<String, List<Domain>> rules = new HashMap<>(); 114 for (Domain domain : domains) { 115 String tld = getTLD(domain.name); 116 117 rules.compute(tld, (k, v) -> { 118 if (v == null) { 119 List<Domain> newV = new LinkedList<>(); 120 newV.add(domain); 121 return newV; 122 } else { 123 v.add(domain); 124 return v; 125 } 126 }); 127 } 128 return rules; 129 } 130 131 private static void writeRules(ZipOutputStream zos, BufferedWriter bw, 132 Map<String, List<Domain>> rules) 133 throws IOException { 134 // Sort keys for deterministic output 135 List<String> tlds = rules.keySet().stream().sorted().collect(Collectors.toList()); 136 for (String tld : tlds) { 137 List<Domain> entries = rules.get(tld); 138 ZipEntry ze = new ZipEntry(tld); 139 ze.setLastModifiedTime(FileTime.fromMillis(0)); 140 zos.putNextEntry(ze); 141 for (Domain entry : entries) { 142 bw.write(entry.type); 143 bw.write(entry.name, 0, entry.name.length()); 144 bw.newLine(); 145 } 146 bw.flush(); 147 } 148 } 149 150 private static String getTLD(String line) { 151 int dotIndex = line.lastIndexOf('.'); 152 return (dotIndex == -1) ? line : line.substring(dotIndex + 1); 153 } 154 }