1 /* 2 * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.util.locale; 27 28 import java.util.ArrayList; 29 import java.util.Collection; 30 import java.util.HashMap; 31 import java.util.List; 32 import java.util.Locale; 33 import java.util.Locale.*; 34 import static java.util.Locale.FilteringMode.*; 35 import static java.util.Locale.LanguageRange.*; 36 import java.util.Map; 37 38 /** 39 * Implementation for BCP47 Locale matching 40 * 41 */ 42 public final class LocaleMatcher { 43 44 public static List<Locale> filter(List<LanguageRange> priorityList, 45 Collection<Locale> locales, 46 FilteringMode mode) { 47 if (priorityList.isEmpty() || locales.isEmpty()) { 48 return new ArrayList<>(); // need to return a empty mutable List 49 } 50 51 // Create a list of language tags to be matched. 52 List<String> tags = new ArrayList<>(); 53 for (Locale locale : locales) { 54 tags.add(locale.toLanguageTag()); 55 } 56 57 // Filter language tags. 58 List<String> filteredTags = filterTags(priorityList, tags, mode); 59 60 // Create a list of matching locales. 61 List<Locale> filteredLocales = new ArrayList<>(filteredTags.size()); 62 for (String tag : filteredTags) { 63 filteredLocales.add(Locale.forLanguageTag(tag)); 64 } 65 66 return filteredLocales; 67 } 68 69 public static List<String> filterTags(List<LanguageRange> priorityList, 70 Collection<String> tags, 71 FilteringMode mode) { 72 if (priorityList.isEmpty() || tags.isEmpty()) { 73 return new ArrayList<>(); // need to return a empty mutable List 74 } 75 76 ArrayList<LanguageRange> list; 77 if (mode == EXTENDED_FILTERING) { 78 return filterExtended(priorityList, tags); 79 } else { 80 list = new ArrayList<>(); 81 for (LanguageRange lr : priorityList) { 82 String range = lr.getRange(); 83 if (range.startsWith("*-") 84 || range.indexOf("-*") != -1) { // Extended range 85 if (mode == AUTOSELECT_FILTERING) { 86 return filterExtended(priorityList, tags); 87 } else if (mode == MAP_EXTENDED_RANGES) { 88 if (range.charAt(0) == '*') { 89 range = "*"; 90 } else { 91 range = range.replaceAll("-[*]", ""); 92 } 93 list.add(new LanguageRange(range, lr.getWeight())); 94 } else if (mode == REJECT_EXTENDED_RANGES) { 95 throw new IllegalArgumentException("An extended range \"" 96 + range 97 + "\" found in REJECT_EXTENDED_RANGES mode."); 98 } 99 } else { // Basic range 100 list.add(lr); 101 } 102 } 103 104 return filterBasic(list, tags); 105 } 106 } 107 108 private static List<String> filterBasic(List<LanguageRange> priorityList, 109 Collection<String> tags) { 110 int splitIndex = splitRanges(priorityList); 111 List<LanguageRange> nonZeroRanges; 112 List<LanguageRange> zeroRanges; 113 if (splitIndex != -1) { 114 nonZeroRanges = priorityList.subList(0, splitIndex); 115 zeroRanges = priorityList.subList(splitIndex, priorityList.size()); 116 } else { 117 nonZeroRanges = priorityList; 118 zeroRanges = List.of(); 119 } 120 121 List<String> list = new ArrayList<>(); 122 for (LanguageRange lr : nonZeroRanges) { 123 String range = lr.getRange(); 124 if (range.equals("*")) { 125 tags = removeTagsMatchingBasicZeroRange(zeroRanges, tags); 126 return new ArrayList<String>(tags); 127 } else { 128 for (String tag : tags) { 129 tag = tag.toLowerCase(Locale.ROOT); 130 if (tag.startsWith(range)) { 131 int len = range.length(); 132 if ((tag.length() == len || tag.charAt(len) == '-') 133 && !list.contains(tag) 134 && !shouldIgnoreFilterBasicMatch(zeroRanges, tag)) { 135 list.add(tag); 136 } 137 } 138 } 139 } 140 } 141 142 return list; 143 } 144 145 /** 146 * Removes the tag(s) which are falling in the basic exclusion range(s) i.e 147 * range(s) with q=0 and returns the updated collection. If the basic 148 * language ranges contains '*' as one of its non zero range then instead of 149 * returning all the tags, remove those which are matching the range with 150 * quality weight q=0. 151 */ 152 private static Collection<String> removeTagsMatchingBasicZeroRange( 153 List<LanguageRange> zeroRange, Collection<String> tags) { 154 if (zeroRange.isEmpty()) { 155 return tags; 156 } 157 158 List<String> matchingTags = new ArrayList<>(); 159 for (String tag : tags) { 160 tag = tag.toLowerCase(Locale.ROOT); 161 if (!shouldIgnoreFilterBasicMatch(zeroRange, tag)) { 162 matchingTags.add(tag); 163 } 164 } 165 166 return matchingTags; 167 } 168 169 /** 170 * The tag which is falling in the basic exclusion range(s) should not 171 * be considered as the matching tag. Ignores the tag matching with the 172 * non-zero ranges, if the tag also matches with one of the basic exclusion 173 * ranges i.e. range(s) having quality weight q=0 174 */ 175 private static boolean shouldIgnoreFilterBasicMatch( 176 List<LanguageRange> zeroRange, String tag) { 177 if (zeroRange.isEmpty()) { 178 return false; 179 } 180 181 for (LanguageRange lr : zeroRange) { 182 String range = lr.getRange(); 183 if (range.equals("*")) { 184 return true; 185 } 186 if (tag.startsWith(range)) { 187 int len = range.length(); 188 if ((tag.length() == len || tag.charAt(len) == '-')) { 189 return true; 190 } 191 } 192 } 193 194 return false; 195 } 196 197 private static List<String> filterExtended(List<LanguageRange> priorityList, 198 Collection<String> tags) { 199 int splitIndex = splitRanges(priorityList); 200 List<LanguageRange> nonZeroRanges; 201 List<LanguageRange> zeroRanges; 202 if (splitIndex != -1) { 203 nonZeroRanges = priorityList.subList(0, splitIndex); 204 zeroRanges = priorityList.subList(splitIndex, priorityList.size()); 205 } else { 206 nonZeroRanges = priorityList; 207 zeroRanges = List.of(); 208 } 209 210 List<String> list = new ArrayList<>(); 211 for (LanguageRange lr : nonZeroRanges) { 212 String range = lr.getRange(); 213 if (range.equals("*")) { 214 tags = removeTagsMatchingExtendedZeroRange(zeroRanges, tags); 215 return new ArrayList<String>(tags); 216 } 217 String[] rangeSubtags = range.split("-"); 218 for (String tag : tags) { 219 tag = tag.toLowerCase(Locale.ROOT); 220 String[] tagSubtags = tag.split("-"); 221 if (!rangeSubtags[0].equals(tagSubtags[0]) 222 && !rangeSubtags[0].equals("*")) { 223 continue; 224 } 225 226 int rangeIndex = matchFilterExtendedSubtags(rangeSubtags, 227 tagSubtags); 228 if (rangeSubtags.length == rangeIndex && !list.contains(tag) 229 && !shouldIgnoreFilterExtendedMatch(zeroRanges, tag)) { 230 list.add(tag); 231 } 232 } 233 } 234 235 return list; 236 } 237 238 /** 239 * Removes the tag(s) which are falling in the extended exclusion range(s) 240 * i.e range(s) with q=0 and returns the updated collection. If the extended 241 * language ranges contains '*' as one of its non zero range then instead of 242 * returning all the tags, remove those which are matching the range with 243 * quality weight q=0. 244 */ 245 private static Collection<String> removeTagsMatchingExtendedZeroRange( 246 List<LanguageRange> zeroRange, Collection<String> tags) { 247 if (zeroRange.isEmpty()) { 248 return tags; 249 } 250 251 List<String> matchingTags = new ArrayList<>(); 252 for (String tag : tags) { 253 tag = tag.toLowerCase(Locale.ROOT); 254 if (!shouldIgnoreFilterExtendedMatch(zeroRange, tag)) { 255 matchingTags.add(tag); 256 } 257 } 258 259 return matchingTags; 260 } 261 262 /** 263 * The tag which is falling in the extended exclusion range(s) should 264 * not be considered as the matching tag. Ignores the tag matching with the 265 * non zero range(s), if the tag also matches with one of the extended 266 * exclusion range(s) i.e. range(s) having quality weight q=0 267 */ 268 private static boolean shouldIgnoreFilterExtendedMatch( 269 List<LanguageRange> zeroRange, String tag) { 270 if (zeroRange.isEmpty()) { 271 return false; 272 } 273 274 String[] tagSubtags = tag.split("-"); 275 for (LanguageRange lr : zeroRange) { 276 String range = lr.getRange(); 277 if (range.equals("*")) { 278 return true; 279 } 280 281 String[] rangeSubtags = range.split("-"); 282 283 if (!rangeSubtags[0].equals(tagSubtags[0]) 284 && !rangeSubtags[0].equals("*")) { 285 continue; 286 } 287 288 int rangeIndex = matchFilterExtendedSubtags(rangeSubtags, 289 tagSubtags); 290 if (rangeSubtags.length == rangeIndex) { 291 return true; 292 } 293 } 294 295 return false; 296 } 297 298 private static int matchFilterExtendedSubtags(String[] rangeSubtags, 299 String[] tagSubtags) { 300 int rangeIndex = 1; 301 int tagIndex = 1; 302 303 while (rangeIndex < rangeSubtags.length 304 && tagIndex < tagSubtags.length) { 305 if (rangeSubtags[rangeIndex].equals("*")) { 306 rangeIndex++; 307 } else if (rangeSubtags[rangeIndex] 308 .equals(tagSubtags[tagIndex])) { 309 rangeIndex++; 310 tagIndex++; 311 } else if (tagSubtags[tagIndex].length() == 1 312 && !tagSubtags[tagIndex].equals("*")) { 313 break; 314 } else { 315 tagIndex++; 316 } 317 } 318 return rangeIndex; 319 } 320 321 public static Locale lookup(List<LanguageRange> priorityList, 322 Collection<Locale> locales) { 323 if (priorityList.isEmpty() || locales.isEmpty()) { 324 return null; 325 } 326 327 // Create a list of language tags to be matched. 328 List<String> tags = new ArrayList<>(); 329 for (Locale locale : locales) { 330 tags.add(locale.toLanguageTag()); 331 } 332 333 // Look up a language tags. 334 String lookedUpTag = lookupTag(priorityList, tags); 335 336 if (lookedUpTag == null) { 337 return null; 338 } else { 339 return Locale.forLanguageTag(lookedUpTag); 340 } 341 } 342 343 public static String lookupTag(List<LanguageRange> priorityList, 344 Collection<String> tags) { 345 if (priorityList.isEmpty() || tags.isEmpty()) { 346 return null; 347 } 348 349 int splitIndex = splitRanges(priorityList); 350 List<LanguageRange> nonZeroRanges; 351 List<LanguageRange> zeroRanges; 352 if (splitIndex != -1) { 353 nonZeroRanges = priorityList.subList(0, splitIndex); 354 zeroRanges = priorityList.subList(splitIndex, priorityList.size()); 355 } else { 356 nonZeroRanges = priorityList; 357 zeroRanges = List.of(); 358 } 359 360 for (LanguageRange lr : nonZeroRanges) { 361 String range = lr.getRange(); 362 363 // Special language range ("*") is ignored in lookup. 364 if (range.equals("*")) { 365 continue; 366 } 367 368 String rangeForRegex = range.replace("*", "\\p{Alnum}*"); 369 while (rangeForRegex.length() > 0) { 370 for (String tag : tags) { 371 tag = tag.toLowerCase(Locale.ROOT); 372 if (tag.matches(rangeForRegex) 373 && !shouldIgnoreLookupMatch(zeroRanges, tag)) { 374 return tag; 375 } 376 } 377 378 // Truncate from the end.... 379 rangeForRegex = truncateRange(rangeForRegex); 380 } 381 } 382 383 return null; 384 } 385 386 /** 387 * The tag which is falling in the exclusion range(s) should not be 388 * considered as the matching tag. Ignores the tag matching with the 389 * non zero range(s), if the tag also matches with one of the exclusion 390 * range(s) i.e. range(s) having quality weight q=0. 391 */ 392 private static boolean shouldIgnoreLookupMatch(List<LanguageRange> zeroRange, 393 String tag) { 394 for (LanguageRange lr : zeroRange) { 395 String range = lr.getRange(); 396 397 // Special language range ("*") is ignored in lookup. 398 if (range.equals("*")) { 399 continue; 400 } 401 402 String rangeForRegex = range.replace("*", "\\p{Alnum}*"); 403 while (rangeForRegex.length() > 0) { 404 if (tag.matches(rangeForRegex)) { 405 return true; 406 } 407 // Truncate from the end.... 408 rangeForRegex = truncateRange(rangeForRegex); 409 } 410 } 411 412 return false; 413 } 414 415 /* Truncate the range from end during the lookup match */ 416 private static String truncateRange(String rangeForRegex) { 417 int index = rangeForRegex.lastIndexOf('-'); 418 if (index >= 0) { 419 rangeForRegex = rangeForRegex.substring(0, index); 420 421 // if range ends with an extension key, truncate it. 422 index = rangeForRegex.lastIndexOf('-'); 423 if (index >= 0 && index == rangeForRegex.length() - 2) { 424 rangeForRegex 425 = rangeForRegex.substring(0, rangeForRegex.length() - 2); 426 } 427 } else { 428 rangeForRegex = ""; 429 } 430 431 return rangeForRegex; 432 } 433 434 /* Returns the split index of the priority list, if it contains 435 * language range(s) with quality weight as 0 i.e. q=0, else -1 436 */ 437 private static int splitRanges(List<LanguageRange> priorityList) { 438 int size = priorityList.size(); 439 for (int index = 0; index < size; index++) { 440 LanguageRange range = priorityList.get(index); 441 if (range.getWeight() == 0) { 442 return index; 443 } 444 } 445 446 return -1; // no q=0 range exists 447 } 448 449 public static List<LanguageRange> parse(String ranges) { 450 ranges = ranges.replace(" ", "").toLowerCase(Locale.ROOT); 451 if (ranges.startsWith("accept-language:")) { 452 ranges = ranges.substring(16); // delete unnecessary prefix 453 } 454 455 String[] langRanges = ranges.split(","); 456 List<LanguageRange> list = new ArrayList<>(langRanges.length); 457 List<String> tempList = new ArrayList<>(); 458 int numOfRanges = 0; 459 460 for (String range : langRanges) { 461 int index; 462 String r; 463 double w; 464 465 if ((index = range.indexOf(";q=")) == -1) { 466 r = range; 467 w = MAX_WEIGHT; 468 } else { 469 r = range.substring(0, index); 470 index += 3; 471 try { 472 w = Double.parseDouble(range.substring(index)); 473 } 474 catch (Exception e) { 475 throw new IllegalArgumentException("weight=\"" 476 + range.substring(index) 477 + "\" for language range \"" + r + "\""); 478 } 479 480 if (w < MIN_WEIGHT || w > MAX_WEIGHT) { 481 throw new IllegalArgumentException("weight=" + w 482 + " for language range \"" + r 483 + "\". It must be between " + MIN_WEIGHT 484 + " and " + MAX_WEIGHT + "."); 485 } 486 } 487 488 if (!tempList.contains(r)) { 489 LanguageRange lr = new LanguageRange(r, w); 490 index = numOfRanges; 491 for (int j = 0; j < numOfRanges; j++) { 492 if (list.get(j).getWeight() < w) { 493 index = j; 494 break; 495 } 496 } 497 list.add(index, lr); 498 numOfRanges++; 499 tempList.add(r); 500 501 // Check if the range has an equivalent using IANA LSR data. 502 // If yes, add it to the User's Language Priority List as well. 503 504 // aa-XX -> aa-YY 505 String equivalent; 506 if ((equivalent = getEquivalentForRegionAndVariant(r)) != null 507 && !tempList.contains(equivalent)) { 508 list.add(index+1, new LanguageRange(equivalent, w)); 509 numOfRanges++; 510 tempList.add(equivalent); 511 } 512 513 String[] equivalents; 514 if ((equivalents = getEquivalentsForLanguage(r)) != null) { 515 for (String equiv: equivalents) { 516 // aa-XX -> bb-XX(, cc-XX) 517 if (!tempList.contains(equiv)) { 518 list.add(index+1, new LanguageRange(equiv, w)); 519 numOfRanges++; 520 tempList.add(equiv); 521 } 522 523 // bb-XX -> bb-YY(, cc-YY) 524 equivalent = getEquivalentForRegionAndVariant(equiv); 525 if (equivalent != null 526 && !tempList.contains(equivalent)) { 527 list.add(index+1, new LanguageRange(equivalent, w)); 528 numOfRanges++; 529 tempList.add(equivalent); 530 } 531 } 532 } 533 } 534 } 535 536 return list; 537 } 538 539 /** 540 * A faster alternative approach to String.replaceFirst(), if the given 541 * string is a literal String, not a regex. 542 */ 543 private static String replaceFirstSubStringMatch(String range, 544 String substr, String replacement) { 545 int pos = range.indexOf(substr); 546 if (pos == -1) { 547 return range; 548 } else { 549 return range.substring(0, pos) + replacement 550 + range.substring(pos + substr.length()); 551 } 552 } 553 554 private static String[] getEquivalentsForLanguage(String range) { 555 String r = range; 556 557 while (r.length() > 0) { 558 if (LocaleEquivalentMaps.singleEquivMap.containsKey(r)) { 559 String equiv = LocaleEquivalentMaps.singleEquivMap.get(r); 560 // Return immediately for performance if the first matching 561 // subtag is found. 562 return new String[]{replaceFirstSubStringMatch(range, 563 r, equiv)}; 564 } else if (LocaleEquivalentMaps.multiEquivsMap.containsKey(r)) { 565 String[] equivs = LocaleEquivalentMaps.multiEquivsMap.get(r); 566 String[] result = new String[equivs.length]; 567 for (int i = 0; i < equivs.length; i++) { 568 result[i] = replaceFirstSubStringMatch(range, 569 r, equivs[i]); 570 } 571 return result; 572 } 573 574 // Truncate the last subtag simply. 575 int index = r.lastIndexOf('-'); 576 if (index == -1) { 577 break; 578 } 579 r = r.substring(0, index); 580 } 581 582 return null; 583 } 584 585 private static String getEquivalentForRegionAndVariant(String range) { 586 int extensionKeyIndex = getExtentionKeyIndex(range); 587 588 for (String subtag : LocaleEquivalentMaps.regionVariantEquivMap.keySet()) { 589 int index; 590 if ((index = range.indexOf(subtag)) != -1) { 591 // Check if the matching text is a valid region or variant. 592 if (extensionKeyIndex != Integer.MIN_VALUE 593 && index > extensionKeyIndex) { 594 continue; 595 } 596 597 int len = index + subtag.length(); 598 if (range.length() == len || range.charAt(len) == '-') { 599 return replaceFirstSubStringMatch(range, subtag, 600 LocaleEquivalentMaps.regionVariantEquivMap 601 .get(subtag)); 602 } 603 } 604 } 605 606 return null; 607 } 608 609 private static int getExtentionKeyIndex(String s) { 610 char[] c = s.toCharArray(); 611 int index = Integer.MIN_VALUE; 612 for (int i = 1; i < c.length; i++) { 613 if (c[i] == '-') { 614 if (i - index == 2) { 615 return index; 616 } else { 617 index = i; 618 } 619 } 620 } 621 return Integer.MIN_VALUE; 622 } 623 624 public static List<LanguageRange> mapEquivalents( 625 List<LanguageRange>priorityList, 626 Map<String, List<String>> map) { 627 if (priorityList.isEmpty()) { 628 return new ArrayList<>(); // need to return a empty mutable List 629 } 630 if (map == null || map.isEmpty()) { 631 return new ArrayList<LanguageRange>(priorityList); 632 } 633 634 // Create a map, key=originalKey.toLowerCaes(), value=originalKey 635 Map<String, String> keyMap = new HashMap<>(); 636 for (String key : map.keySet()) { 637 keyMap.put(key.toLowerCase(Locale.ROOT), key); 638 } 639 640 List<LanguageRange> list = new ArrayList<>(); 641 for (LanguageRange lr : priorityList) { 642 String range = lr.getRange(); 643 String r = range; 644 boolean hasEquivalent = false; 645 646 while (r.length() > 0) { 647 if (keyMap.containsKey(r)) { 648 hasEquivalent = true; 649 List<String> equivalents = map.get(keyMap.get(r)); 650 if (equivalents != null) { 651 int len = r.length(); 652 for (String equivalent : equivalents) { 653 list.add(new LanguageRange(equivalent.toLowerCase(Locale.ROOT) 654 + range.substring(len), 655 lr.getWeight())); 656 } 657 } 658 // Return immediately if the first matching subtag is found. 659 break; 660 } 661 662 // Truncate the last subtag simply. 663 int index = r.lastIndexOf('-'); 664 if (index == -1) { 665 break; 666 } 667 r = r.substring(0, index); 668 } 669 670 if (!hasEquivalent) { 671 list.add(lr); 672 } 673 } 674 675 return list; 676 } 677 678 private LocaleMatcher() {} 679 680 }