1 /*
   2  * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 8204938 8242010
  27  * @summary Checks the IANA language subtag registry data update
  28  *          with Locale.LanguageRange parse method.
  29  * @run main LSRDataTest
  30  */
  31 import java.io.IOException;
  32 import java.nio.charset.Charset;
  33 import java.nio.file.Files;
  34 import java.nio.file.Paths;
  35 import java.nio.file.Path;
  36 import java.util.ArrayList;
  37 import java.util.HashMap;
  38 import java.util.List;
  39 import java.util.Map;
  40 import java.util.Locale;
  41 import java.util.Locale.LanguageRange;
  42 import java.util.stream.Collectors;
  43 import java.util.stream.Stream;
  44 
  45 import static java.util.Locale.LanguageRange.MAX_WEIGHT;
  46 import static java.util.Locale.LanguageRange.MIN_WEIGHT;
  47 
  48 public class LSRDataTest {
  49 
  50     private static final char HYPHEN = '-';
  51     private static final Map<String, String> singleLangEquivMap = new HashMap<>();
  52     private static final Map<String, List<String>> multiLangEquivsMap = new HashMap<>();
  53     private static final Map<String, String> regionVariantEquivMap = new HashMap<>();
  54 
  55     // path to the lsr file from the make folder, this test relies on the
  56     // relative path to the file in the make folder, considering
  57     // test and make will always exist in the same jdk layout
  58     private static final String LSR_FILE_PATH = System.getProperty("test.src", ".")
  59                 + "/../../../../../make/data/lsrdata/language-subtag-registry.txt";
  60 
  61     public static void main(String[] args) throws IOException {
  62 
  63         loadLSRData(Paths.get(LSR_FILE_PATH).toRealPath());
  64 
  65         // checking the tags with weight
  66         String ranges = "Accept-Language: aam, adp, aue, bcg, cqu, ema,"
  67                 + " en-gb-oed, gti, koj, kwq, kxe, lii, lmm, mtm, ngv,"
  68                 + " oyb, phr, pub, suj, taj;q=0.9, yug;q=0.5, gfx;q=0.4";
  69         List<LanguageRange> expected = parse(ranges);
  70         List<LanguageRange> actual = LanguageRange.parse(ranges);
  71         checkEquality(actual, expected);
  72 
  73         // checking all language ranges
  74         ranges = generateLangRanges();
  75         expected = parse(ranges);
  76         actual = LanguageRange.parse(ranges);
  77         checkEquality(actual, expected);
  78 
  79         // checking all region/variant ranges
  80         ranges = generateRegionRanges();
  81         expected = parse(ranges);
  82         actual = LanguageRange.parse(ranges);
  83         checkEquality(actual, expected);
  84 
  85     }
  86 
  87     // generate range string containing all equiv language tags
  88     private static String generateLangRanges() {
  89         return Stream.concat(singleLangEquivMap.keySet().stream(), multiLangEquivsMap
  90                 .keySet().stream()).collect(Collectors.joining(","));
  91     }
  92 
  93     // generate range string containing all equiv region tags
  94     private static String generateRegionRanges() {
  95         return regionVariantEquivMap.keySet().stream()
  96                 .map(r -> "en".concat(r)).collect(Collectors.joining(", "));
  97     }
  98 
  99     // load LSR data from the file
 100     private static void loadLSRData(Path path) throws IOException {
 101         String type = null;
 102         String tag = null;
 103         String preferred = null;
 104         String prefix = null;
 105 
 106         for (String line : Files.readAllLines(path, Charset.forName("UTF-8"))) {
 107             line = line.toLowerCase(Locale.ROOT);
 108             int index = line.indexOf(' ') + 1;
 109             if (line.startsWith("type:")) {
 110                 type = line.substring(index);
 111             } else if (line.startsWith("tag:") || line.startsWith("subtag:")) {
 112                 tag = line.substring(index);
 113             } else if (line.startsWith("preferred-value:")) {
 114                 preferred = line.substring(index);
 115             } else if (line.startsWith("prefix:")) {
 116                 prefix = line.substring(index);
 117             } else if (line.equals("%%")) {
 118                 processDataAndGenerateMaps(type, tag, preferred, prefix);
 119                 type = null;
 120                 tag = null;
 121                 preferred = null;
 122                 prefix = null;
 123             }
 124         }
 125 
 126         // Last entry
 127         processDataAndGenerateMaps(type, tag, preferred, prefix);
 128     }
 129 
 130     private static void processDataAndGenerateMaps(String type,
 131             String tag,
 132             String preferred,
 133             String prefix) {
 134 
 135         if (type == null || tag == null || preferred == null) {
 136             return;
 137         }
 138 
 139         if (type.equals("extlang") && prefix != null) {
 140             tag = prefix + "-" + tag;
 141         }
 142 
 143         if (type.equals("region") || type.equals("variant")) {
 144             if (!regionVariantEquivMap.containsKey(preferred)) {
 145                 String tPref = HYPHEN + preferred;
 146                 String tTag = HYPHEN + tag;
 147                 regionVariantEquivMap.put(tPref, tTag);
 148                 regionVariantEquivMap.put(tTag, tPref);
 149             } else {
 150                 throw new RuntimeException("New case, need implementation."
 151                         + " A region/variant subtag \"" + preferred
 152                         + "\" is registered for more than one subtags.");
 153             }
 154         } else { // language, extlang, grandfathered, and redundant
 155             if (!singleLangEquivMap.containsKey(preferred)
 156                     && !multiLangEquivsMap.containsKey(preferred)) {
 157                 // new entry add it into single equiv map
 158                 singleLangEquivMap.put(preferred, tag);
 159                 singleLangEquivMap.put(tag, preferred);
 160             } else if (singleLangEquivMap.containsKey(preferred)
 161                     && !multiLangEquivsMap.containsKey(preferred)) {
 162                 String value = singleLangEquivMap.get(preferred);
 163                 List<String> subtags = List.of(preferred, value, tag);
 164                 // remove from single eqiv map before adding to multi equiv
 165                 singleLangEquivMap.keySet().removeAll(subtags);
 166                 addEntriesToMultiEquivsMap(subtags);
 167             } else if (multiLangEquivsMap.containsKey(preferred)
 168                     && !singleLangEquivMap.containsKey(preferred)) {
 169                 List<String> subtags = multiLangEquivsMap.get(preferred);
 170                 // should use the order preferred, subtags, tag to keep the
 171                 // expected order same as the JDK API in multi equivalent maps
 172                 subtags.add(0, preferred);
 173                 subtags.add(tag);
 174                 addEntriesToMultiEquivsMap(subtags);
 175             }
 176         }
 177     }
 178 
 179     // Add entries into the multi equivalent map from the given subtags
 180     private static void addEntriesToMultiEquivsMap(List<String> subtags) {
 181         // for each subtag within the given subtags, add an entry in multi
 182         // equivalent language map with subtag as the key and the value
 183         // as the list of all subtags excluding the one which is getting
 184         // traversed
 185         subtags.forEach(subtag -> multiLangEquivsMap.put(subtag, subtags.stream()
 186                 .filter(t -> !t.equals(subtag))
 187                 .collect(Collectors.toList())));
 188     }
 189 
 190     private static List<LanguageRange> parse(String ranges) {
 191         ranges = ranges.replace(" ", "").toLowerCase(Locale.ROOT);
 192         if (ranges.startsWith("accept-language:")) {
 193             ranges = ranges.substring(16);
 194         }
 195         String[] langRanges = ranges.split(",");
 196         List<LanguageRange> priorityList = new ArrayList<>(langRanges.length);
 197         int numOfRanges = 0;
 198         for (String range : langRanges) {
 199             int wIndex = range.indexOf(";q=");
 200             String tag;
 201             double weight = 0.0;
 202             if (wIndex == -1) {
 203                 tag = range;
 204                 weight = MAX_WEIGHT;
 205             } else {
 206                 tag = range.substring(0, wIndex);
 207                 try {
 208                     weight = Double.parseDouble(range.substring(wIndex + 3));
 209                 } catch (RuntimeException ex) {
 210                     throw new IllegalArgumentException("weight= " + weight + " for"
 211                             + " language range \"" + tag + "\", should be"
 212                             + " represented as a double");
 213                 }
 214 
 215                 if (weight < MIN_WEIGHT || weight > MAX_WEIGHT) {
 216                     throw new IllegalArgumentException("weight=" + weight
 217                             + " for language range \"" + tag
 218                             + "\", must be between " + MIN_WEIGHT
 219                             + " and " + MAX_WEIGHT + ".");
 220                 }
 221             }
 222 
 223             LanguageRange entry = new LanguageRange(tag, weight);
 224             if (!priorityList.contains(entry)) {
 225 
 226                 int index = numOfRanges;
 227                 // find the index in the list to add the current range at the
 228                 // correct index sorted by the descending order of weight
 229                 for (int i = 0; i < priorityList.size(); i++) {
 230                     if (priorityList.get(i).getWeight() < weight) {
 231                         index = i;
 232                         break;
 233                     }
 234                 }
 235                 priorityList.add(index, entry);
 236                 numOfRanges++;
 237 
 238                 String equivalent = getEquivalentForRegionAndVariant(tag);
 239                 if (equivalent != null) {
 240                     LanguageRange equivRange = new LanguageRange(equivalent, weight);
 241                     if (!priorityList.contains(equivRange)) {
 242                         priorityList.add(index + 1, equivRange);
 243                         numOfRanges++;
 244                     }
 245                 }
 246 
 247                 List<String> equivalents = getEquivalentsForLanguage(tag);
 248                 if (equivalents != null) {
 249                     for (String equiv : equivalents) {
 250                         LanguageRange equivRange = new LanguageRange(equiv, weight);
 251                         if (!priorityList.contains(equivRange)) {
 252                             priorityList.add(index + 1, equivRange);
 253                             numOfRanges++;
 254                         }
 255 
 256                         equivalent = getEquivalentForRegionAndVariant(equiv);
 257                         if (equivalent != null) {
 258                             equivRange = new LanguageRange(equivalent, weight);
 259                             if (!priorityList.contains(equivRange)) {
 260                                 priorityList.add(index + 1, equivRange);
 261                                 numOfRanges++;
 262                             }
 263                         }
 264                     }
 265                 }
 266             }
 267         }
 268         return priorityList;
 269     }
 270 
 271     /**
 272      * A faster alternative approach to String.replaceFirst(), if the given
 273      * string is a literal String, not a regex.
 274      */
 275     private static String replaceFirstSubStringMatch(String range,
 276             String substr, String replacement) {
 277         int pos = range.indexOf(substr);
 278         if (pos == -1) {
 279             return range;
 280         } else {
 281             return range.substring(0, pos) + replacement
 282                     + range.substring(pos + substr.length());
 283         }
 284     }
 285 
 286     private static List<String> getEquivalentsForLanguage(String range) {
 287         String r = range;
 288 
 289         while (r.length() > 0) {
 290             if (singleLangEquivMap.containsKey(r)) {
 291                 String equiv = singleLangEquivMap.get(r);
 292                 // Return immediately for performance if the first matching
 293                 // subtag is found.
 294                 return List.of(replaceFirstSubStringMatch(range, r, equiv));
 295             } else if (multiLangEquivsMap.containsKey(r)) {
 296                 List<String> equivs = multiLangEquivsMap.get(r);
 297                 List<String> result = new ArrayList(equivs.size());
 298                 for (int i = 0; i < equivs.size(); i++) {
 299                     result.add(i, replaceFirstSubStringMatch(range,
 300                             r, equivs.get(i)));
 301                 }
 302                 return result;
 303             }
 304 
 305             // Truncate the last subtag simply.
 306             int index = r.lastIndexOf(HYPHEN);
 307             if (index == -1) {
 308                 break;
 309             }
 310             r = r.substring(0, index);
 311         }
 312 
 313         return null;
 314     }
 315 
 316     private static String getEquivalentForRegionAndVariant(String range) {
 317         int extensionKeyIndex = getExtentionKeyIndex(range);
 318 
 319         for (String subtag : regionVariantEquivMap.keySet()) {
 320             int index;
 321             if ((index = range.indexOf(subtag)) != -1) {
 322                 // Check if the matching text is a valid region or variant.
 323                 if (extensionKeyIndex != Integer.MIN_VALUE
 324                         && index > extensionKeyIndex) {
 325                     continue;
 326                 }
 327 
 328                 int len = index + subtag.length();
 329                 if (range.length() == len || range.charAt(len) == HYPHEN) {
 330                     return replaceFirstSubStringMatch(range, subtag,
 331                             regionVariantEquivMap.get(subtag));
 332                 }
 333             }
 334         }
 335 
 336         return null;
 337     }
 338 
 339     private static int getExtentionKeyIndex(String s) {
 340         char[] c = s.toCharArray();
 341         int index = Integer.MIN_VALUE;
 342         for (int i = 1; i < c.length; i++) {
 343             if (c[i] == HYPHEN) {
 344                 if (i - index == 2) {
 345                     return index;
 346                 } else {
 347                     index = i;
 348                 }
 349             }
 350         }
 351         return Integer.MIN_VALUE;
 352     }
 353 
 354     private static void checkEquality(List<LanguageRange> expected,
 355             List<LanguageRange> actual) {
 356 
 357         int expectedSize = expected.size();
 358         int actualSize = actual.size();
 359 
 360         if (expectedSize != actualSize) {
 361             throw new RuntimeException("[FAILED: Size of the priority list"
 362                     + " does not match, Expected size=" + expectedSize + "]");
 363         } else {
 364             for (int i = 0; i < expectedSize; i++) {
 365                 LanguageRange lr1 = expected.get(i);
 366                 LanguageRange lr2 = actual.get(i);
 367 
 368                 if (!lr1.getRange().equals(lr2.getRange())
 369                         || lr1.getWeight() != lr2.getWeight()) {
 370                     throw new RuntimeException("[FAILED: Ranges at index "
 371                             + i + " do not match Expected: range=" + lr1.getRange()
 372                             + ", weight=" + lr1.getWeight() + ", Actual: range="
 373                             + lr2.getRange() + ", weight=" + lr2.getWeight() + "]");
 374                 }
 375             }
 376         }
 377     }
 378 }