1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 8204938
  27  * @summary Checks the IANA language subtag registry data update
  28  *          with Locale.LanguageRange parse method.
  29  * @run main LSRDataTest
  30  */
  31 import java.io.IOException;
  32 import java.nio.charset.Charset;
  33 import java.nio.file.Files;
  34 import java.nio.file.Paths;
  35 import java.nio.file.Path;
  36 import java.util.ArrayList;
  37 import java.util.HashMap;
  38 import java.util.List;
  39 import java.util.Map;
  40 import java.util.Locale;
  41 import java.util.Locale.LanguageRange;
  42 import java.util.stream.Collectors;
  43 import java.util.stream.Stream;
  44 
  45 import static java.util.Locale.LanguageRange.MAX_WEIGHT;
  46 import static java.util.Locale.LanguageRange.MIN_WEIGHT;
  47 
  48 public class LSRDataTest {
  49 
  50     private static final char HYPHEN = '-';
  51     private static final Map<String, String> singleLangEquivMap = new HashMap<>();
  52     private static final Map<String, List<String>> multiLangEquivsMap = new HashMap<>();
  53     private static final Map<String, String> regionVariantEquivMap = new HashMap<>();
  54 
  55     public static void main(String[] args) throws IOException {
  56         // accessing lsr file from the make folder, this test relies on the
  57         // relative path to the file in the make folder, considering
  58         // test and make will always exist in the same jdk layout
  59         String srcFileName = System.getProperty("test.src", ".")
  60                 + "/../../../../../make/data/lsrdata/language-subtag-registry.txt";
  61         loadLSRData(Paths.get(srcFileName).toRealPath());
  62 
  63         // checking the tags with weight
  64         String ranges = "Accept-Language: aam, adp, aue, bcg, cqu, ema,"
  65                 + " en-gb-oed, gti, koj, kwq, kxe, lii, lmm, mtm, ngv,"
  66                 + " oyb, phr, pub, suj, taj;q=0.9, yug;q=0.5, gfx;q=0.4";
  67         List<LanguageRange> expected = parse(ranges);
  68         List<LanguageRange> actual = LanguageRange.parse(ranges);
  69         checkEquality(actual, expected);
  70 
  71         // checking all language ranges
  72         ranges = generateLangRanges();
  73         expected = parse(ranges);
  74         actual = LanguageRange.parse(ranges);
  75         checkEquality(actual, expected);
  76 
  77         // checking all region/variant ranges
  78         ranges = generateRegionRanges();
  79         expected = parse(ranges);
  80         actual = LanguageRange.parse(ranges);
  81         checkEquality(actual, expected);
  82 
  83     }
  84 
  85     // generate range string containing all equiv language tags
  86     private static String generateLangRanges() {
  87         return Stream.concat(singleLangEquivMap.keySet().stream(), multiLangEquivsMap
  88                 .keySet().stream()).collect(Collectors.joining(","));
  89     }
  90 
  91     // generate range string containing all equiv region tags
  92     private static String generateRegionRanges() {
  93         return regionVariantEquivMap.keySet().stream()
  94                 .map(r -> "en".concat(r)).collect(Collectors.joining(", "));
  95     }
  96 
  97     // load LSR data from the file
  98     private static void loadLSRData(Path path) throws IOException {
  99         String type = null;
 100         String tag = null;
 101         String preferred;
 102 
 103         for (String line : Files.readAllLines(path, Charset.forName("UTF-8"))) {
 104             line = line.toLowerCase(Locale.ROOT);
 105             int index = line.indexOf(' ') + 1;
 106             if (line.startsWith("type:")) {
 107                 type = line.substring(index);
 108             } else if (line.startsWith("tag:") || line.startsWith("subtag:")) {
 109                 tag = line.substring(index);
 110             } else if (line.startsWith("preferred-value:") && !type.equals("extlang")) {
 111                 preferred = line.substring(index);
 112                 processDataAndGenerateMaps(type, tag, preferred);
 113             } else if (line.equals("%%")) {
 114                 type = null;
 115                 tag = null;
 116             }
 117         }
 118     }
 119 
 120     private static void processDataAndGenerateMaps(String type,
 121             String tag,
 122             String preferred) {
 123         StringBuilder sb;
 124         if (type.equals("region") || type.equals("variant")) {
 125             if (!regionVariantEquivMap.containsKey(preferred)) {
 126                 String tPref = HYPHEN + preferred;
 127                 String tTag = HYPHEN + tag;
 128                 regionVariantEquivMap.put(tPref, tTag);
 129                 regionVariantEquivMap.put(tTag, tPref);
 130             } else {
 131                 throw new RuntimeException("New case, need implementation."
 132                         + " A region/variant subtag \"" + preferred
 133                         + "\" is registered for more than one subtags.");
 134             }
 135         } else { // language, grandfathered, and redundant
 136             if (!singleLangEquivMap.containsKey(preferred)
 137                     && !multiLangEquivsMap.containsKey(preferred)) {
 138                 // new entry add it into single equiv map
 139                 singleLangEquivMap.put(preferred, tag);
 140                 singleLangEquivMap.put(tag, preferred);
 141             } else if (singleLangEquivMap.containsKey(preferred)
 142                     && !multiLangEquivsMap.containsKey(preferred)) {
 143                 String value = singleLangEquivMap.get(preferred);
 144                 List<String> subtags = List.of(preferred, value, tag);
 145                 // remove from single eqiv map before adding to multi equiv
 146                 singleLangEquivMap.keySet().removeAll(subtags);
 147                 addEntriesToMultiEquivsMap(subtags);
 148             } else if (multiLangEquivsMap.containsKey(preferred)
 149                     && !singleLangEquivMap.containsKey(preferred)) {
 150                 List<String> subtags = multiLangEquivsMap.get(preferred);
 151                 // should use the order preferred, subtags, tag to keep the
 152                 // expected order same as the JDK API in multi equivalent maps
 153                 subtags.add(0, preferred);
 154                 subtags.add(tag);
 155                 addEntriesToMultiEquivsMap(subtags);
 156             }
 157         }
 158     }
 159 
 160     // Add entries into the multi equivalent map from the given subtags
 161     private static void addEntriesToMultiEquivsMap(List<String> subtags) {
 162         // for each subtag within the given subtags, add an entry in multi
 163         // equivalent language map with subtag as the key and the value
 164         // as the list of all subtags excluding the one which is getting
 165         // traversed
 166         subtags.forEach(subtag -> multiLangEquivsMap.put(subtag, subtags.stream()
 167                 .filter(t -> !t.equals(subtag))
 168                 .collect(Collectors.toList())));
 169     }
 170 
 171     private static List<LanguageRange> parse(String ranges) {
 172         ranges = ranges.replace(" ", "").toLowerCase(Locale.ROOT);
 173         if (ranges.startsWith("accept-language:")) {
 174             ranges = ranges.substring(16);
 175         }
 176         String[] langRanges = ranges.split(",");
 177         List<LanguageRange> priorityList = new ArrayList<>(langRanges.length);
 178         int numOfRanges = 0;
 179         for (String range : langRanges) {
 180             int wIndex = range.indexOf(";q=");
 181             String tag;
 182             double weight = 0.0;
 183             if (wIndex == -1) {
 184                 tag = range;
 185                 weight = MAX_WEIGHT;
 186             } else {
 187                 tag = range.substring(0, wIndex);
 188                 try {
 189                     weight = Double.parseDouble(range.substring(wIndex + 3));
 190                 } catch (RuntimeException ex) {
 191                     throw new IllegalArgumentException("weight= " + weight + " for"
 192                             + " language range \"" + tag + "\", should be"
 193                             + " represented as a double");
 194                 }
 195 
 196                 if (weight < MIN_WEIGHT || weight > MAX_WEIGHT) {
 197                     throw new IllegalArgumentException("weight=" + weight
 198                             + " for language range \"" + tag
 199                             + "\", must be between " + MIN_WEIGHT
 200                             + " and " + MAX_WEIGHT + ".");
 201                 }
 202             }
 203 
 204             LanguageRange entry = new LanguageRange(tag, weight);
 205             if (!priorityList.contains(entry)) {
 206 
 207                 int index = numOfRanges;
 208                 // find the index in the list to add the current range at the
 209                 // correct index sorted by the descending order of weight
 210                 for (int i = 0; i < priorityList.size(); i++) {
 211                     if (priorityList.get(i).getWeight() < weight) {
 212                         index = i;
 213                         break;
 214                     }
 215                 }
 216                 priorityList.add(index, entry);
 217                 numOfRanges++;
 218 
 219                 String equivalent = getEquivalentForRegionAndVariant(tag);
 220                 if (equivalent != null) {
 221                     LanguageRange equivRange = new LanguageRange(equivalent, weight);
 222                     if (!priorityList.contains(equivRange)) {
 223                         priorityList.add(index + 1, equivRange);
 224                         numOfRanges++;
 225                     }
 226                 }
 227 
 228                 List<String> equivalents = getEquivalentsForLanguage(tag);
 229                 if (equivalents != null) {
 230                     for (String equiv : equivalents) {
 231                         LanguageRange equivRange = new LanguageRange(equiv, weight);
 232                         if (!priorityList.contains(equivRange)) {
 233                             priorityList.add(index + 1, equivRange);
 234                             numOfRanges++;
 235                         }
 236 
 237                         equivalent = getEquivalentForRegionAndVariant(equiv);
 238                         if (equivalent != null) {
 239                             equivRange = new LanguageRange(equivalent, weight);
 240                             if (!priorityList.contains(equivRange)) {
 241                                 priorityList.add(index + 1, equivRange);
 242                                 numOfRanges++;
 243                             }
 244                         }
 245                     }
 246                 }
 247             }
 248         }
 249         return priorityList;
 250     }
 251 
 252     /**
 253      * A faster alternative approach to String.replaceFirst(), if the given
 254      * string is a literal String, not a regex.
 255      */
 256     private static String replaceFirstSubStringMatch(String range,
 257             String substr, String replacement) {
 258         int pos = range.indexOf(substr);
 259         if (pos == -1) {
 260             return range;
 261         } else {
 262             return range.substring(0, pos) + replacement
 263                     + range.substring(pos + substr.length());
 264         }
 265     }
 266 
 267     private static List<String> getEquivalentsForLanguage(String range) {
 268         String r = range;
 269 
 270         while (r.length() > 0) {
 271             if (singleLangEquivMap.containsKey(r)) {
 272                 String equiv = singleLangEquivMap.get(r);
 273                 // Return immediately for performance if the first matching
 274                 // subtag is found.
 275                 return List.of(replaceFirstSubStringMatch(range, r, equiv));
 276             } else if (multiLangEquivsMap.containsKey(r)) {
 277                 List<String> equivs = multiLangEquivsMap.get(r);
 278                 List<String> result = new ArrayList(equivs.size());
 279                 for (int i = 0; i < equivs.size(); i++) {
 280                     result.add(i, replaceFirstSubStringMatch(range,
 281                             r, equivs.get(i)));
 282                 }
 283                 return result;
 284             }
 285 
 286             // Truncate the last subtag simply.
 287             int index = r.lastIndexOf(HYPHEN);
 288             if (index == -1) {
 289                 break;
 290             }
 291             r = r.substring(0, index);
 292         }
 293 
 294         return null;
 295     }
 296 
 297     private static String getEquivalentForRegionAndVariant(String range) {
 298         int extensionKeyIndex = getExtentionKeyIndex(range);
 299 
 300         for (String subtag : regionVariantEquivMap.keySet()) {
 301             int index;
 302             if ((index = range.indexOf(subtag)) != -1) {
 303                 // Check if the matching text is a valid region or variant.
 304                 if (extensionKeyIndex != Integer.MIN_VALUE
 305                         && index > extensionKeyIndex) {
 306                     continue;
 307                 }
 308 
 309                 int len = index + subtag.length();
 310                 if (range.length() == len || range.charAt(len) == HYPHEN) {
 311                     return replaceFirstSubStringMatch(range, subtag,
 312                             regionVariantEquivMap.get(subtag));
 313                 }
 314             }
 315         }
 316 
 317         return null;
 318     }
 319 
 320     private static int getExtentionKeyIndex(String s) {
 321         char[] c = s.toCharArray();
 322         int index = Integer.MIN_VALUE;
 323         for (int i = 1; i < c.length; i++) {
 324             if (c[i] == HYPHEN) {
 325                 if (i - index == 2) {
 326                     return index;
 327                 } else {
 328                     index = i;
 329                 }
 330             }
 331         }
 332         return Integer.MIN_VALUE;
 333     }
 334 
 335     private static void checkEquality(List<LanguageRange> expected,
 336             List<LanguageRange> actual) {
 337 
 338         int expectedSize = expected.size();
 339         int actualSize = actual.size();
 340 
 341         if (expectedSize != actualSize) {
 342             throw new RuntimeException("[FAILED: Size of the priority list"
 343                     + " does not match, Expected size=" + expectedSize + "]");
 344         } else {
 345             for (int i = 0; i < expectedSize; i++) {
 346                 LanguageRange lr1 = expected.get(i);
 347                 LanguageRange lr2 = actual.get(i);
 348 
 349                 if (!lr1.getRange().equals(lr2.getRange())
 350                         || lr1.getWeight() != lr2.getWeight()) {
 351                     throw new RuntimeException("[FAILED: Ranges at index "
 352                             + i + " do not match Expected: range=" + lr1.getRange()
 353                             + ", weight=" + lr1.getWeight() + ", Actual: range="
 354                             + lr2.getRange() + ", weight=" + lr2.getWeight() + "]");
 355                 }
 356             }
 357         }
 358     }
 359 }