1 /*
   2  * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.generatelsrequivmaps;
  27 
  28 import java.io.BufferedWriter;
  29 import java.io.IOException;
  30 import java.nio.charset.Charset;
  31 import java.nio.file.Files;
  32 import java.nio.file.Paths;
  33 import java.time.ZoneId;
  34 import java.time.ZonedDateTime;
  35 import java.util.ArrayList;
  36 import java.util.List;
  37 import java.util.Locale;
  38 import java.util.Map;
  39 import java.util.TreeMap;
  40 
  41 /**
  42  * This tool reads the IANA Language Subtag Registry data file downloaded from
  43  * http://www.iana.org/assignments/language-subtag-registry, which is specified
  44  * in the command line and generates a .java source file as specified in
  45  * command line. The generated .java source file contains equivalent language
  46  * maps. These equivalent language maps are used by LocaleMatcher.java
  47  * for the locale matching mechanism specified in RFC 4647 "Matching of Language
  48  * Tags".
  49  */
  50 public class EquivMapsGenerator {
  51 
  52     public static void main(String[] args) throws Exception {
  53         if (args.length != 2) {
  54             System.err.println("Usage: java EquivMapsGenerator"
  55                     + " language-subtag-registry.txt LocaleEquivalentMaps.java");
  56             System.exit(1);
  57         }
  58         readLSRfile(args[0]);
  59         generateEquivalentMap();
  60         generateSourceCode(args[1]);
  61     }
  62 
  63     private static String LSRrevisionDate;
  64     private static Map<String, StringBuilder> initialLanguageMap =
  65         new TreeMap<>();
  66     private static Map<String, StringBuilder> initialRegionVariantMap =
  67         new TreeMap<>();
  68 
  69     private static Map<String, String> sortedLanguageMap1 = new TreeMap<>();
  70     private static Map<String, String[]> sortedLanguageMap2 = new TreeMap<>();
  71     private static Map<String, String> sortedRegionVariantMap =
  72         new TreeMap<>();
  73 
  74     private static void readLSRfile(String filename) throws Exception {
  75         String type = null;
  76         String tag = null;
  77         String preferred = null;
  78 
  79         for (String line : Files.readAllLines(Paths.get(filename),
  80                                               Charset.forName("UTF-8"))) {
  81             line = line.toLowerCase(Locale.ROOT);
  82             int index = line.indexOf(' ')+1;
  83             if (line.startsWith("file-date:")) {
  84                 LSRrevisionDate = line.substring(index);
  85             } else if (line.startsWith("type:")) {
  86                 type = line.substring(index);
  87             } else if (line.startsWith("tag:") || line.startsWith("subtag:")) {
  88                 tag = line.substring(index);
  89             } else if (line.startsWith("preferred-value:")
  90                        && !type.equals("extlang")) {
  91                 preferred = line.substring(index);
  92                 processDeprecatedData(type, tag, preferred);
  93             } else if (line.equals("%%")) {
  94                 type = null;
  95                 tag = null;
  96             }
  97         }
  98     }
  99 
 100     private static void processDeprecatedData(String type,
 101                                               String tag,
 102                                               String preferred) {
 103         StringBuilder sb;
 104         if (type.equals("region") || type.equals("variant")) {
 105             if (!initialRegionVariantMap.containsKey(preferred)) {
 106                 sb = new StringBuilder("-");
 107                 sb.append(preferred);
 108                 sb.append(",-");
 109                 sb.append(tag);
 110                 initialRegionVariantMap.put("-"+preferred, sb);
 111             } else {
 112                 throw new RuntimeException("New case, need implementation."
 113                     + " A region/variant subtag \"" + preferred
 114                     + "\" is registered for more than one subtags.");
 115             }
 116         } else { // language, grandfahered, and redundant
 117             if (!initialLanguageMap.containsKey(preferred)) {
 118                 sb = new StringBuilder(preferred);
 119                 sb.append(',');
 120                 sb.append(tag);
 121                 initialLanguageMap.put(preferred, sb);
 122             } else {
 123                 sb = initialLanguageMap.get(preferred);
 124                 sb.append(',');
 125                 sb.append(tag);
 126                 initialLanguageMap.put(preferred, sb);
 127             }
 128         }
 129     }
 130 
 131     private static void generateEquivalentMap() {
 132         String[] subtags;
 133         for (String preferred : initialLanguageMap.keySet()) {
 134             subtags = initialLanguageMap.get(preferred).toString().split(",");
 135 
 136             if (subtags.length == 2) {
 137                 sortedLanguageMap1.put(subtags[0], subtags[1]);
 138                 sortedLanguageMap1.put(subtags[1], subtags[0]);
 139             } else if (subtags.length > 2) {
 140                 for (int i = 0; i < subtags.length; i++) {
 141                     sortedLanguageMap2.put(subtags[i], createLangArray(i, subtags));
 142                 }
 143             } else {
 144                     throw new RuntimeException("New case, need implementation."
 145                         + " A language subtag \"" + preferred
 146                         + "\" is registered for more than two subtags. ");
 147             }
 148         }
 149 
 150         for (String preferred : initialRegionVariantMap.keySet()) {
 151             subtags =
 152                 initialRegionVariantMap.get(preferred).toString().split(",");
 153 
 154             sortedRegionVariantMap.put(subtags[0], subtags[1]);
 155             sortedRegionVariantMap.put(subtags[1], subtags[0]);
 156         }
 157 
 158     }
 159 
 160     /* create the array of subtags excluding the subtag at index location */
 161     private static String[] createLangArray(int index, String[] subtags) {
 162         List<String> list = new ArrayList<>();
 163         for (int i = 0; i < subtags.length; i++) {
 164             if (i != index) {
 165                 list.add(subtags[i]);
 166             }
 167         }
 168         return list.toArray(new String[list.size()]);
 169     }
 170 
 171     private static String generateValuesString(String[] values) {
 172         String outputStr = "";
 173         for (int i = 0; i < values.length; i++) {
 174             if (i != values.length - 1) {
 175                 outputStr = outputStr + "\"" + values[i] + "\", ";
 176             } else {
 177                 outputStr = outputStr + "\"" + values[i] + "\"";
 178             }
 179 
 180         }
 181         return outputStr;
 182     }
 183 
 184     private static final String COPYRIGHT = "/*\n"
 185         + " * Copyright (c) 2012, %d, Oracle and/or its affiliates. All rights reserved.\n"
 186         + " * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.\n"
 187         + " *\n"
 188         + " * This code is free software; you can redistribute it and/or modify it\n"
 189         + " * under the terms of the GNU General Public License version 2 only, as\n"
 190         + " * published by the Free Software Foundation.  Oracle designates this\n"
 191         + " * particular file as subject to the \"Classpath\" exception as provided\n"
 192         + " * by Oracle in the LICENSE file that accompanied this code.\n"
 193         + " *\n"
 194         + " * This code is distributed in the hope that it will be useful, but WITHOUT\n"
 195         + " * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n"
 196         + " * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
 197         + " * version 2 for more details (a copy is included in the LICENSE file that\n"
 198         + " * accompanied this code).\n"
 199         + " *\n"
 200         + " * You should have received a copy of the GNU General Public License version\n"
 201         + " * 2 along with this work; if not, write to the Free Software Foundation,\n"
 202         + " * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.\n"
 203         + " *\n"
 204         + " * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA\n"
 205         + " * or visit www.oracle.com if you need additional information or have any\n"
 206         + " * questions.\n"
 207         + "*/\n\n";
 208 
 209     private static final String headerText =
 210         "package sun.util.locale;\n\n"
 211         + "import java.util.HashMap;\n"
 212         + "import java.util.Map;\n\n"
 213         + "final class LocaleEquivalentMaps {\n\n"
 214         + "    static final Map<String, String> singleEquivMap;\n"
 215         + "    static final Map<String, String[]> multiEquivsMap;\n"
 216         + "    static final Map<String, String> regionVariantEquivMap;\n\n"
 217         + "    static {\n"
 218         + "        singleEquivMap = new HashMap<>(";
 219 
 220     private static final String footerText =
 221         "    }\n\n"
 222         + "}";
 223 
 224     private static String getOpenJDKCopyright() {
 225         int year = ZonedDateTime.now(ZoneId
 226                 .of("America/Los_Angeles")).getYear();
 227         return String.format(Locale.US, COPYRIGHT, year);
 228     }
 229 
 230     /**
 231      * The input lsr data file is in UTF-8, so theoretically for the characters
 232      * beyond US-ASCII, the generated Java String literals need to be Unicode
 233      * escaped (\\uXXXX) while writing to a file. But as of now, there is not
 234      * the case since we don't use "description", "comment" or alike.
 235      */
 236     private static void generateSourceCode(String fileName) {
 237 
 238         try (BufferedWriter writer = Files.newBufferedWriter(
 239                 Paths.get(fileName))) {
 240             writer.write(getOpenJDKCopyright());
 241             writer.write(headerText
 242                 + (int)(sortedLanguageMap1.size() / 0.75f + 1) + ");\n"
 243                 + "        multiEquivsMap = new HashMap<>("
 244                 + (int)(sortedLanguageMap2.size() / 0.75f + 1) + ");\n"
 245                 + "        regionVariantEquivMap = new HashMap<>("
 246                 + (int)(sortedRegionVariantMap.size() / 0.75f + 1) + ");\n\n"
 247                 + "        // This is an auto-generated file and should not be manually edited.\n"
 248                 + "        //   LSR Revision: " + LSRrevisionDate);
 249             writer.newLine();
 250 
 251             for (String key : sortedLanguageMap1.keySet()) {
 252                 String value = sortedLanguageMap1.get(key);
 253                 writer.write("        singleEquivMap.put(\""
 254                     + key + "\", \"" + value + "\");");
 255                 writer.newLine();
 256             }
 257 
 258             writer.newLine();
 259             for (String key : sortedLanguageMap2.keySet()) {
 260                 String[] values = sortedLanguageMap2.get(key);
 261 
 262                 if (values.length >= 2) {
 263                     writer.write("        multiEquivsMap.put(\""
 264                         + key + "\", new String[] {"
 265                         + generateValuesString(values) + "});");
 266                     writer.newLine();
 267                 }
 268             }
 269 
 270             writer.newLine();
 271             for (String key : sortedRegionVariantMap.keySet()) {
 272                 String value = sortedRegionVariantMap.get(key);
 273                 writer.write("        regionVariantEquivMap.put(\""
 274                     + key + "\", \"" + value + "\");");
 275                 writer.newLine();
 276             }
 277 
 278             writer.write(footerText);
 279         } catch (IOException ex) {
 280             ex.printStackTrace(System.err);
 281             System.exit(1);
 282         }
 283 
 284     }
 285 
 286 }