1 /* 2 * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.generatelsrequivmaps; 27 28 import java.io.BufferedWriter; 29 import java.io.IOException; 30 import java.nio.charset.Charset; 31 import java.nio.file.Files; 32 import java.nio.file.Paths; 33 import java.time.ZoneId; 34 import java.time.ZonedDateTime; 35 import java.util.ArrayList; 36 import java.util.List; 37 import java.util.Locale; 38 import java.util.Map; 39 import java.util.TreeMap; 40 41 /** 42 * This tool reads the IANA Language Subtag Registry data file downloaded from 43 * http://www.iana.org/assignments/language-subtag-registry, which is specified 44 * in the command line and generates a .java source file as specified in 45 * command line. The generated .java source file contains equivalent language 46 * maps. These equivalent language maps are used by LocaleMatcher.java 47 * for the locale matching mechanism specified in RFC 4647 "Matching of Language 48 * Tags". 49 */ 50 public class EquivMapsGenerator { 51 52 public static void main(String[] args) throws Exception { 53 if (args.length != 2) { 54 System.err.println("Usage: java EquivMapsGenerator" 55 + " language-subtag-registry.txt LocaleEquivalentMaps.java"); 56 System.exit(1); 57 } 58 readLSRfile(args[0]); 59 generateEquivalentMap(); 60 generateSourceCode(args[1]); 61 } 62 63 private static String LSRrevisionDate; 64 private static Map<String, StringBuilder> initialLanguageMap = 65 new TreeMap<>(); 66 private static Map<String, StringBuilder> initialRegionVariantMap = 67 new TreeMap<>(); 68 69 private static Map<String, String> sortedLanguageMap1 = new TreeMap<>(); 70 private static Map<String, String[]> sortedLanguageMap2 = new TreeMap<>(); 71 private static Map<String, String> sortedRegionVariantMap = 72 new TreeMap<>(); 73 74 private static void readLSRfile(String filename) throws Exception { 75 String type = null; 76 String tag = null; 77 String preferred = null; 78 79 for (String line : Files.readAllLines(Paths.get(filename), 80 Charset.forName("UTF-8"))) { 81 line = line.toLowerCase(Locale.ROOT); 82 int index = line.indexOf(' ')+1; 83 if (line.startsWith("file-date:")) { 84 LSRrevisionDate = line.substring(index); 85 } else if (line.startsWith("type:")) { 86 type = line.substring(index); 87 } else if (line.startsWith("tag:") || line.startsWith("subtag:")) { 88 tag = line.substring(index); 89 } else if (line.startsWith("preferred-value:") 90 && !type.equals("extlang")) { 91 preferred = line.substring(index); 92 processDeprecatedData(type, tag, preferred); 93 } else if (line.equals("%%")) { 94 type = null; 95 tag = null; 96 } 97 } 98 } 99 100 private static void processDeprecatedData(String type, 101 String tag, 102 String preferred) { 103 StringBuilder sb; 104 if (type.equals("region") || type.equals("variant")) { 105 if (!initialRegionVariantMap.containsKey(preferred)) { 106 sb = new StringBuilder("-"); 107 sb.append(preferred); 108 sb.append(",-"); 109 sb.append(tag); 110 initialRegionVariantMap.put("-"+preferred, sb); 111 } else { 112 throw new RuntimeException("New case, need implementation." 113 + " A region/variant subtag \"" + preferred 114 + "\" is registered for more than one subtags."); 115 } 116 } else { // language, grandfahered, and redundant 117 if (!initialLanguageMap.containsKey(preferred)) { 118 sb = new StringBuilder(preferred); 119 sb.append(','); 120 sb.append(tag); 121 initialLanguageMap.put(preferred, sb); 122 } else { 123 sb = initialLanguageMap.get(preferred); 124 sb.append(','); 125 sb.append(tag); 126 initialLanguageMap.put(preferred, sb); 127 } 128 } 129 } 130 131 private static void generateEquivalentMap() { 132 String[] subtags; 133 for (String preferred : initialLanguageMap.keySet()) { 134 subtags = initialLanguageMap.get(preferred).toString().split(","); 135 136 if (subtags.length == 2) { 137 sortedLanguageMap1.put(subtags[0], subtags[1]); 138 sortedLanguageMap1.put(subtags[1], subtags[0]); 139 } else if (subtags.length > 2) { 140 for (int i = 0; i < subtags.length; i++) { 141 sortedLanguageMap2.put(subtags[i], createLangArray(i, subtags)); 142 } 143 } else { 144 throw new RuntimeException("New case, need implementation." 145 + " A language subtag \"" + preferred 146 + "\" is registered for more than two subtags. "); 147 } 148 } 149 150 for (String preferred : initialRegionVariantMap.keySet()) { 151 subtags = 152 initialRegionVariantMap.get(preferred).toString().split(","); 153 154 sortedRegionVariantMap.put(subtags[0], subtags[1]); 155 sortedRegionVariantMap.put(subtags[1], subtags[0]); 156 } 157 158 } 159 160 /* create the array of subtags excluding the subtag at index location */ 161 private static String[] createLangArray(int index, String[] subtags) { 162 List<String> list = new ArrayList<>(); 163 for (int i = 0; i < subtags.length; i++) { 164 if (i != index) { 165 list.add(subtags[i]); 166 } 167 } 168 return list.toArray(new String[list.size()]); 169 } 170 171 private static String generateValuesString(String[] values) { 172 String outputStr = ""; 173 for (int i = 0; i < values.length; i++) { 174 if (i != values.length - 1) { 175 outputStr = outputStr + "\"" + values[i] + "\", "; 176 } else { 177 outputStr = outputStr + "\"" + values[i] + "\""; 178 } 179 180 } 181 return outputStr; 182 } 183 184 private static final String COPYRIGHT = "/*\n" 185 + " * Copyright (c) 2012, %d, Oracle and/or its affiliates. All rights reserved.\n" 186 + " * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.\n" 187 + " *\n" 188 + " * This code is free software; you can redistribute it and/or modify it\n" 189 + " * under the terms of the GNU General Public License version 2 only, as\n" 190 + " * published by the Free Software Foundation. Oracle designates this\n" 191 + " * particular file as subject to the \"Classpath\" exception as provided\n" 192 + " * by Oracle in the LICENSE file that accompanied this code.\n" 193 + " *\n" 194 + " * This code is distributed in the hope that it will be useful, but WITHOUT\n" 195 + " * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n" 196 + " * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License\n" 197 + " * version 2 for more details (a copy is included in the LICENSE file that\n" 198 + " * accompanied this code).\n" 199 + " *\n" 200 + " * You should have received a copy of the GNU General Public License version\n" 201 + " * 2 along with this work; if not, write to the Free Software Foundation,\n" 202 + " * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.\n" 203 + " *\n" 204 + " * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA\n" 205 + " * or visit www.oracle.com if you need additional information or have any\n" 206 + " * questions.\n" 207 + "*/\n\n"; 208 209 private static final String headerText = 210 "package sun.util.locale;\n\n" 211 + "import java.util.HashMap;\n" 212 + "import java.util.Map;\n\n" 213 + "final class LocaleEquivalentMaps {\n\n" 214 + " static final Map<String, String> singleEquivMap;\n" 215 + " static final Map<String, String[]> multiEquivsMap;\n" 216 + " static final Map<String, String> regionVariantEquivMap;\n\n" 217 + " static {\n" 218 + " singleEquivMap = new HashMap<>();\n" 219 + " multiEquivsMap = new HashMap<>();\n" 220 + " regionVariantEquivMap = new HashMap<>();\n\n" 221 + " // This is an auto-generated file and should not be manually edited.\n"; 222 223 private static final String footerText = 224 " }\n\n" 225 + "}"; 226 227 private static String getOpenJDKCopyright() { 228 int year = ZonedDateTime.now(ZoneId 229 .of("America/Los_Angeles")).getYear(); 230 return String.format(Locale.US, COPYRIGHT, year); 231 } 232 233 /** 234 * The input lsr data file is in UTF-8, so theoretically for the characters 235 * beyond US-ASCII, the generated Java String literals need to be Unicode 236 * escaped (\\uXXXX) while writing to a file. But as of now, there is not 237 * the case since we don't use "description", "comment" or alike. 238 */ 239 private static void generateSourceCode(String fileName) { 240 241 try (BufferedWriter writer = Files.newBufferedWriter( 242 Paths.get(fileName))) { 243 writer.write(getOpenJDKCopyright()); 244 writer.write(headerText 245 + " // LSR Revision: " + LSRrevisionDate); 246 writer.newLine(); 247 248 for (String key : sortedLanguageMap1.keySet()) { 249 String value = sortedLanguageMap1.get(key); 250 writer.write(" singleEquivMap.put(\"" 251 + key + "\", \"" + value + "\");"); 252 writer.newLine(); 253 } 254 255 writer.newLine(); 256 for (String key : sortedLanguageMap2.keySet()) { 257 String[] values = sortedLanguageMap2.get(key); 258 259 if (values.length >= 2) { 260 writer.write(" multiEquivsMap.put(\"" 261 + key + "\", new String[] {" 262 + generateValuesString(values) + "});"); 263 writer.newLine(); 264 } 265 } 266 267 writer.newLine(); 268 for (String key : sortedRegionVariantMap.keySet()) { 269 String value = sortedRegionVariantMap.get(key); 270 writer.write(" regionVariantEquivMap.put(\"" 271 + key + "\", \"" + value + "\");"); 272 writer.newLine(); 273 } 274 275 writer.write(footerText); 276 } catch (IOException ex) { 277 ex.printStackTrace(System.err); 278 System.exit(1); 279 } 280 281 } 282 283 }