1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 * @test 26 * @bug 8204938 27 * @summary Checks the IANA language subtag registry data update 28 * with Locale.LanguageRange parse method. 29 * @run main LSRDataTest 30 */ 31 import java.io.IOException; 32 import java.nio.charset.Charset; 33 import java.nio.file.Files; 34 import java.nio.file.Paths; 35 import java.nio.file.Path; 36 import java.util.ArrayList; 37 import java.util.HashMap; 38 import java.util.List; 39 import java.util.Map; 40 import java.util.Locale; 41 import java.util.Locale.LanguageRange; 42 import java.util.stream.Collectors; 43 import java.util.stream.Stream; 44 45 import static java.util.Locale.LanguageRange.MAX_WEIGHT; 46 import static java.util.Locale.LanguageRange.MIN_WEIGHT; 47 48 public class LSRDataTest { 49 50 private static final char HYPHEN = '-'; 51 private static final Map<String, String> singleLangEquivMap = new HashMap<>(); 52 private static final Map<String, List<String>> multiLangEquivsMap = new HashMap<>(); 53 private static final Map<String, String> regionVariantEquivMap = new HashMap<>(); 54 55 public static void main(String[] args) throws IOException { 56 // accessing lsr file from the make folder, this test relies on the 57 // relative path to the file in the make folder, considering 58 // test and make will always exist in the same jdk layout 59 String srcFileName = System.getProperty("test.src", ".") 60 + "/../../../../../make/data/lsrdata/language-subtag-registry.txt"; 61 loadLSRData(Paths.get(srcFileName).toRealPath()); 62 63 // checking the tags with weight 64 String ranges = "Accept-Language: aam, adp, aue, bcg, cqu, ema," 65 + " en-gb-oed, gti, koj, kwq, kxe, lii, lmm, mtm, ngv," 66 + " oyb, phr, pub, suj, taj;q=0.9, yug;q=0.5, gfx;q=0.4"; 67 List<LanguageRange> expected = parse(ranges); 68 List<LanguageRange> actual = LanguageRange.parse(ranges); 69 checkEquality(actual, expected); 70 71 // checking all language ranges 72 ranges = generateLangRanges(); 73 expected = parse(ranges); 74 actual = LanguageRange.parse(ranges); 75 checkEquality(actual, expected); 76 77 // checking all region/variant ranges 78 ranges = generateRegionRanges(); 79 expected = parse(ranges); 80 actual = LanguageRange.parse(ranges); 81 checkEquality(actual, expected); 82 83 } 84 85 // generate range string containing all equiv language tags 86 private static String generateLangRanges() { 87 return Stream.concat(singleLangEquivMap.keySet().stream(), multiLangEquivsMap 88 .keySet().stream()).collect(Collectors.joining(",")); 89 } 90 91 // generate range string containing all equiv region tags 92 private static String generateRegionRanges() { 93 return regionVariantEquivMap.keySet().stream() 94 .map(r -> "en".concat(r)).collect(Collectors.joining(", ")); 95 } 96 97 // load LSR data from the file 98 private static void loadLSRData(Path path) throws IOException { 99 String type = null; 100 String tag = null; 101 String preferred; 102 103 for (String line : Files.readAllLines(path, Charset.forName("UTF-8"))) { 104 line = line.toLowerCase(Locale.ROOT); 105 int index = line.indexOf(' ') + 1; 106 if (line.startsWith("type:")) { 107 type = line.substring(index); 108 } else if (line.startsWith("tag:") || line.startsWith("subtag:")) { 109 tag = line.substring(index); 110 } else if (line.startsWith("preferred-value:") && !type.equals("extlang")) { 111 preferred = line.substring(index); 112 processDataAndGenerateMaps(type, tag, preferred); 113 } else if (line.equals("%%")) { 114 type = null; 115 tag = null; 116 } 117 } 118 } 119 120 private static void processDataAndGenerateMaps(String type, 121 String tag, 122 String preferred) { 123 StringBuilder sb; 124 if (type.equals("region") || type.equals("variant")) { 125 if (!regionVariantEquivMap.containsKey(preferred)) { 126 String tPref = HYPHEN + preferred; 127 String tTag = HYPHEN + tag; 128 regionVariantEquivMap.put(tPref, tTag); 129 regionVariantEquivMap.put(tTag, tPref); 130 } else { 131 throw new RuntimeException("New case, need implementation." 132 + " A region/variant subtag \"" + preferred 133 + "\" is registered for more than one subtags."); 134 } 135 } else { // language, grandfathered, and redundant 136 if (!singleLangEquivMap.containsKey(preferred) 137 && !multiLangEquivsMap.containsKey(preferred)) { 138 // new entry add it into single equiv map 139 singleLangEquivMap.put(preferred, tag); 140 singleLangEquivMap.put(tag, preferred); 141 } else if (singleLangEquivMap.containsKey(preferred) 142 && !multiLangEquivsMap.containsKey(preferred)) { 143 String value = singleLangEquivMap.get(preferred); 144 List<String> subtags = List.of(preferred, value, tag); 145 // remove from single eqiv map before adding to multi equiv 146 singleLangEquivMap.keySet().removeAll(subtags); 147 addEntriesToMultiEquivsMap(subtags); 148 } else if (multiLangEquivsMap.containsKey(preferred) 149 && !singleLangEquivMap.containsKey(preferred)) { 150 List<String> subtags = multiLangEquivsMap.get(preferred); 151 // should use the order preferred, subtags, tag to keep the 152 // expected order same as the JDK API in multi equivalent maps 153 subtags.add(0, preferred); 154 subtags.add(tag); 155 addEntriesToMultiEquivsMap(subtags); 156 } 157 } 158 } 159 160 // Add entries into the multi equivalent map from the given subtags 161 private static void addEntriesToMultiEquivsMap(List<String> subtags) { 162 // for each subtag within the given subtags, add an entry in multi 163 // equivalent language map with subtag as the key and the value 164 // as the list of all subtags excluding the one which is getting 165 // traversed 166 subtags.forEach(subtag -> multiLangEquivsMap.put(subtag, subtags.stream() 167 .filter(t -> !t.equals(subtag)) 168 .collect(Collectors.toList()))); 169 } 170 171 private static List<LanguageRange> parse(String ranges) { 172 ranges = ranges.replace(" ", "").toLowerCase(Locale.ROOT); 173 if (ranges.startsWith("accept-language:")) { 174 ranges = ranges.substring(16); 175 } 176 String[] langRanges = ranges.split(","); 177 List<LanguageRange> priorityList = new ArrayList<>(langRanges.length); 178 int numOfRanges = 0; 179 for (String range : langRanges) { 180 int wIndex = range.indexOf(";q="); 181 String tag; 182 double weight = 0.0; 183 if (wIndex == -1) { 184 tag = range; 185 weight = MAX_WEIGHT; 186 } else { 187 tag = range.substring(0, wIndex); 188 try { 189 weight = Double.parseDouble(range.substring(wIndex + 3)); 190 } catch (RuntimeException ex) { 191 throw new IllegalArgumentException("weight= " + weight + " for" 192 + " language range \"" + tag + "\", should be" 193 + " represented as a double"); 194 } 195 196 if (weight < MIN_WEIGHT || weight > MAX_WEIGHT) { 197 throw new IllegalArgumentException("weight=" + weight 198 + " for language range \"" + tag 199 + "\", must be between " + MIN_WEIGHT 200 + " and " + MAX_WEIGHT + "."); 201 } 202 } 203 204 LanguageRange entry = new LanguageRange(tag, weight); 205 if (!priorityList.contains(entry)) { 206 207 int index = numOfRanges; 208 // find the index in the list to add the current range at the 209 // correct index sorted by the descending order of weight 210 for (int i = 0; i < priorityList.size(); i++) { 211 if (priorityList.get(i).getWeight() < weight) { 212 index = i; 213 break; 214 } 215 } 216 priorityList.add(index, entry); 217 numOfRanges++; 218 219 String equivalent = getEquivalentForRegionAndVariant(tag); 220 if (equivalent != null) { 221 LanguageRange equivRange = new LanguageRange(equivalent, weight); 222 if (!priorityList.contains(equivRange)) { 223 priorityList.add(index + 1, equivRange); 224 numOfRanges++; 225 } 226 } 227 228 List<String> equivalents = getEquivalentsForLanguage(tag); 229 if (equivalents != null) { 230 for (String equiv : equivalents) { 231 LanguageRange equivRange = new LanguageRange(equiv, weight); 232 if (!priorityList.contains(equivRange)) { 233 priorityList.add(index + 1, equivRange); 234 numOfRanges++; 235 } 236 237 equivalent = getEquivalentForRegionAndVariant(equiv); 238 if (equivalent != null) { 239 equivRange = new LanguageRange(equivalent, weight); 240 if (!priorityList.contains(equivRange)) { 241 priorityList.add(index + 1, equivRange); 242 numOfRanges++; 243 } 244 } 245 } 246 } 247 } 248 } 249 return priorityList; 250 } 251 252 /** 253 * A faster alternative approach to String.replaceFirst(), if the given 254 * string is a literal String, not a regex. 255 */ 256 private static String replaceFirstSubStringMatch(String range, 257 String substr, String replacement) { 258 int pos = range.indexOf(substr); 259 if (pos == -1) { 260 return range; 261 } else { 262 return range.substring(0, pos) + replacement 263 + range.substring(pos + substr.length()); 264 } 265 } 266 267 private static List<String> getEquivalentsForLanguage(String range) { 268 String r = range; 269 270 while (r.length() > 0) { 271 if (singleLangEquivMap.containsKey(r)) { 272 String equiv = singleLangEquivMap.get(r); 273 // Return immediately for performance if the first matching 274 // subtag is found. 275 return List.of(replaceFirstSubStringMatch(range, r, equiv)); 276 } else if (multiLangEquivsMap.containsKey(r)) { 277 List<String> equivs = multiLangEquivsMap.get(r); 278 List<String> result = new ArrayList(equivs.size()); 279 for (int i = 0; i < equivs.size(); i++) { 280 result.add(i, replaceFirstSubStringMatch(range, 281 r, equivs.get(i))); 282 } 283 return result; 284 } 285 286 // Truncate the last subtag simply. 287 int index = r.lastIndexOf(HYPHEN); 288 if (index == -1) { 289 break; 290 } 291 r = r.substring(0, index); 292 } 293 294 return null; 295 } 296 297 private static String getEquivalentForRegionAndVariant(String range) { 298 int extensionKeyIndex = getExtentionKeyIndex(range); 299 300 for (String subtag : regionVariantEquivMap.keySet()) { 301 int index; 302 if ((index = range.indexOf(subtag)) != -1) { 303 // Check if the matching text is a valid region or variant. 304 if (extensionKeyIndex != Integer.MIN_VALUE 305 && index > extensionKeyIndex) { 306 continue; 307 } 308 309 int len = index + subtag.length(); 310 if (range.length() == len || range.charAt(len) == HYPHEN) { 311 return replaceFirstSubStringMatch(range, subtag, 312 regionVariantEquivMap.get(subtag)); 313 } 314 } 315 } 316 317 return null; 318 } 319 320 private static int getExtentionKeyIndex(String s) { 321 char[] c = s.toCharArray(); 322 int index = Integer.MIN_VALUE; 323 for (int i = 1; i < c.length; i++) { 324 if (c[i] == HYPHEN) { 325 if (i - index == 2) { 326 return index; 327 } else { 328 index = i; 329 } 330 } 331 } 332 return Integer.MIN_VALUE; 333 } 334 335 private static void checkEquality(List<LanguageRange> expected, 336 List<LanguageRange> actual) { 337 338 int expectedSize = expected.size(); 339 int actualSize = actual.size(); 340 341 if (expectedSize != actualSize) { 342 throw new RuntimeException("[FAILED: Size of the priority list" 343 + " does not match, Expected size=" + expectedSize + "]"); 344 } else { 345 for (int i = 0; i < expectedSize; i++) { 346 LanguageRange lr1 = expected.get(i); 347 LanguageRange lr2 = actual.get(i); 348 349 if (!lr1.getRange().equals(lr2.getRange()) 350 || lr1.getWeight() != lr2.getWeight()) { 351 throw new RuntimeException("[FAILED: Ranges at index " 352 + i + " do not match Expected: range=" + lr1.getRange() 353 + ", weight=" + lr1.getWeight() + ", Actual: range=" 354 + lr2.getRange() + ", weight=" + lr2.getWeight() + "]"); 355 } 356 } 357 } 358 } 359 }