1 /* 2 * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.util.locale; 27 28 import java.util.ArrayList; 29 import java.util.Collection; 30 import java.util.HashMap; 31 import java.util.List; 32 import java.util.Locale; 33 import java.util.Locale.*; 34 import static java.util.Locale.FilteringMode.*; 35 import static java.util.Locale.LanguageRange.*; 36 import java.util.Map; 37 import java.util.Set; 38 import java.util.TreeSet; 39 import java.util.stream.Collectors; 40 41 /** 42 * Implementation for BCP47 Locale matching 43 * 44 */ 45 public final class LocaleMatcher { 46 47 public static List<Locale> filter(List<LanguageRange> priorityList, 48 Collection<Locale> locales, 49 FilteringMode mode) { 50 if (priorityList.isEmpty() || locales.isEmpty()) { 51 return new ArrayList<>(); // need to return a empty mutable List 52 } 53 54 // Create a list of language tags to be matched. 55 List<String> tags = new ArrayList<>(); 56 for (Locale locale : locales) { 57 tags.add(locale.toLanguageTag()); 58 } 59 60 // Filter language tags. 61 List<String> filteredTags = filterTags(priorityList, tags, mode); 62 63 // Create a list of matching locales. 64 List<Locale> filteredLocales = new ArrayList<>(filteredTags.size()); 65 for (String tag : filteredTags) { 66 filteredLocales.add(Locale.forLanguageTag(tag)); 67 } 68 69 return filteredLocales; 70 } 71 72 public static List<String> filterTags(List<LanguageRange> priorityList, 73 Collection<String> tags, 74 FilteringMode mode) { 75 if (priorityList.isEmpty() || tags.isEmpty()) { 76 return new ArrayList<>(); // need to return a empty mutable List 77 } 78 79 ArrayList<LanguageRange> list; 80 if (mode == EXTENDED_FILTERING) { 81 return filterExtended(priorityList, tags); 82 } else { 83 list = new ArrayList<>(); 84 for (LanguageRange lr : priorityList) { 85 String range = lr.getRange(); 86 if (range.startsWith("*-") 87 || range.indexOf("-*") != -1) { // Extended range 88 if (mode == AUTOSELECT_FILTERING) { 89 return filterExtended(priorityList, tags); 90 } else if (mode == MAP_EXTENDED_RANGES) { 91 if (range.charAt(0) == '*') { 92 range = "*"; 93 } else { 94 range = range.replaceAll("-[*]", ""); 95 } 96 list.add(new LanguageRange(range, lr.getWeight())); 97 } else if (mode == REJECT_EXTENDED_RANGES) { 98 throw new IllegalArgumentException("An extended range \"" 99 + range 100 + "\" found in REJECT_EXTENDED_RANGES mode."); 101 } 102 } else { // Basic range 103 list.add(lr); 104 } 105 } 106 107 return filterBasic(list, tags); 108 } 109 } 110 111 private static List<String> filterBasic(List<LanguageRange> priorityList, 112 Collection<String> tags) { 113 int splitIndex = splitRanges(priorityList); 114 List<LanguageRange> nonZeroRanges; 115 List<LanguageRange> zeroRanges; 116 if (splitIndex != -1) { 117 nonZeroRanges = priorityList.subList(0, splitIndex); 118 zeroRanges = priorityList.subList(splitIndex, priorityList.size()); 119 } else { 120 nonZeroRanges = priorityList; 121 zeroRanges = List.of(); 122 } 123 124 List<String> list = new ArrayList<>(); 125 for (LanguageRange lr : nonZeroRanges) { 126 String range = lr.getRange(); 127 if (range.equals("*")) { 128 tags = removeTagsMatchingBasicZeroRange(zeroRanges, tags); 129 return new ArrayList<String>(tags); 130 } else { 131 for (String tag : tags) { 132 // change to lowercase for case-insensitive matching 133 String lowerCaseTag = tag.toLowerCase(Locale.ROOT); 134 if (lowerCaseTag.startsWith(range)) { 135 int len = range.length(); 136 if ((lowerCaseTag.length() == len 137 || lowerCaseTag.charAt(len) == '-') 138 && !caseInsensitiveMatch(list, lowerCaseTag) 139 && !shouldIgnoreFilterBasicMatch(zeroRanges, 140 lowerCaseTag)) { 141 // preserving the case of the input tag 142 list.add(tag); 143 } 144 } 145 } 146 } 147 } 148 149 return list; 150 } 151 152 /** 153 * Removes the tag(s) which are falling in the basic exclusion range(s) i.e 154 * range(s) with q=0 and returns the updated collection. If the basic 155 * language ranges contains '*' as one of its non zero range then instead of 156 * returning all the tags, remove those which are matching the range with 157 * quality weight q=0. 158 */ 159 private static Collection<String> removeTagsMatchingBasicZeroRange( 160 List<LanguageRange> zeroRange, Collection<String> tags) { 161 if (zeroRange.isEmpty()) { 162 tags = removeDuplicates(tags); 163 return tags; 164 } 165 166 List<String> matchingTags = new ArrayList<>(); 167 for (String tag : tags) { 168 // change to lowercase for case-insensitive matching 169 String lowerCaseTag = tag.toLowerCase(Locale.ROOT); 170 if (!shouldIgnoreFilterBasicMatch(zeroRange, lowerCaseTag) 171 && !caseInsensitiveMatch(matchingTags, lowerCaseTag)) { 172 matchingTags.add(tag); // preserving the case of the input tag 173 } 174 } 175 176 return matchingTags; 177 } 178 179 /** 180 * Remove duplicate tags from the given {@code tags} by 181 * ignoring case considerations. 182 */ 183 private static Collection<String> removeDuplicates( 184 Collection<String> tags) { 185 Set<String> distinctTags = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); 186 return tags.stream().filter(x -> distinctTags.add(x)) 187 .collect(Collectors.toList()); 188 } 189 190 /** 191 * Returns true if the given {@code list} contains an element which matches 192 * with the given {@code tag} ignoring case considerations. 193 */ 194 private static boolean caseInsensitiveMatch(List<String> list, String tag) { 195 return list.stream().anyMatch((element) 196 -> (element.equalsIgnoreCase(tag))); 197 } 198 199 /** 200 * The tag which is falling in the basic exclusion range(s) should not 201 * be considered as the matching tag. Ignores the tag matching with the 202 * non-zero ranges, if the tag also matches with one of the basic exclusion 203 * ranges i.e. range(s) having quality weight q=0 204 */ 205 private static boolean shouldIgnoreFilterBasicMatch( 206 List<LanguageRange> zeroRange, String tag) { 207 if (zeroRange.isEmpty()) { 208 return false; 209 } 210 211 for (LanguageRange lr : zeroRange) { 212 String range = lr.getRange(); 213 if (range.equals("*")) { 214 return true; 215 } 216 if (tag.startsWith(range)) { 217 int len = range.length(); 218 if ((tag.length() == len || tag.charAt(len) == '-')) { 219 return true; 220 } 221 } 222 } 223 224 return false; 225 } 226 227 private static List<String> filterExtended(List<LanguageRange> priorityList, 228 Collection<String> tags) { 229 int splitIndex = splitRanges(priorityList); 230 List<LanguageRange> nonZeroRanges; 231 List<LanguageRange> zeroRanges; 232 if (splitIndex != -1) { 233 nonZeroRanges = priorityList.subList(0, splitIndex); 234 zeroRanges = priorityList.subList(splitIndex, priorityList.size()); 235 } else { 236 nonZeroRanges = priorityList; 237 zeroRanges = List.of(); 238 } 239 240 List<String> list = new ArrayList<>(); 241 for (LanguageRange lr : nonZeroRanges) { 242 String range = lr.getRange(); 243 if (range.equals("*")) { 244 tags = removeTagsMatchingExtendedZeroRange(zeroRanges, tags); 245 return new ArrayList<String>(tags); 246 } 247 String[] rangeSubtags = range.split("-"); 248 for (String tag : tags) { 249 // change to lowercase for case-insensitive matching 250 String lowerCaseTag = tag.toLowerCase(Locale.ROOT); 251 String[] tagSubtags = lowerCaseTag.split("-"); 252 if (!rangeSubtags[0].equals(tagSubtags[0]) 253 && !rangeSubtags[0].equals("*")) { 254 continue; 255 } 256 257 int rangeIndex = matchFilterExtendedSubtags(rangeSubtags, 258 tagSubtags); 259 if (rangeSubtags.length == rangeIndex 260 && !caseInsensitiveMatch(list, lowerCaseTag) 261 && !shouldIgnoreFilterExtendedMatch(zeroRanges, 262 lowerCaseTag)) { 263 list.add(tag); // preserve the case of the input tag 264 } 265 } 266 } 267 268 return list; 269 } 270 271 /** 272 * Removes the tag(s) which are falling in the extended exclusion range(s) 273 * i.e range(s) with q=0 and returns the updated collection. If the extended 274 * language ranges contains '*' as one of its non zero range then instead of 275 * returning all the tags, remove those which are matching the range with 276 * quality weight q=0. 277 */ 278 private static Collection<String> removeTagsMatchingExtendedZeroRange( 279 List<LanguageRange> zeroRange, Collection<String> tags) { 280 if (zeroRange.isEmpty()) { 281 tags = removeDuplicates(tags); 282 return tags; 283 } 284 285 List<String> matchingTags = new ArrayList<>(); 286 for (String tag : tags) { 287 // change to lowercase for case-insensitive matching 288 String lowerCaseTag = tag.toLowerCase(Locale.ROOT); 289 if (!shouldIgnoreFilterExtendedMatch(zeroRange, lowerCaseTag) 290 && !caseInsensitiveMatch(matchingTags, lowerCaseTag)) { 291 matchingTags.add(tag); // preserve the case of the input tag 292 } 293 } 294 295 return matchingTags; 296 } 297 298 /** 299 * The tag which is falling in the extended exclusion range(s) should 300 * not be considered as the matching tag. Ignores the tag matching with the 301 * non zero range(s), if the tag also matches with one of the extended 302 * exclusion range(s) i.e. range(s) having quality weight q=0 303 */ 304 private static boolean shouldIgnoreFilterExtendedMatch( 305 List<LanguageRange> zeroRange, String tag) { 306 if (zeroRange.isEmpty()) { 307 return false; 308 } 309 310 String[] tagSubtags = tag.split("-"); 311 for (LanguageRange lr : zeroRange) { 312 String range = lr.getRange(); 313 if (range.equals("*")) { 314 return true; 315 } 316 317 String[] rangeSubtags = range.split("-"); 318 319 if (!rangeSubtags[0].equals(tagSubtags[0]) 320 && !rangeSubtags[0].equals("*")) { 321 continue; 322 } 323 324 int rangeIndex = matchFilterExtendedSubtags(rangeSubtags, 325 tagSubtags); 326 if (rangeSubtags.length == rangeIndex) { 327 return true; 328 } 329 } 330 331 return false; 332 } 333 334 private static int matchFilterExtendedSubtags(String[] rangeSubtags, 335 String[] tagSubtags) { 336 int rangeIndex = 1; 337 int tagIndex = 1; 338 339 while (rangeIndex < rangeSubtags.length 340 && tagIndex < tagSubtags.length) { 341 if (rangeSubtags[rangeIndex].equals("*")) { 342 rangeIndex++; 343 } else if (rangeSubtags[rangeIndex] 344 .equals(tagSubtags[tagIndex])) { 345 rangeIndex++; 346 tagIndex++; 347 } else if (tagSubtags[tagIndex].length() == 1 348 && !tagSubtags[tagIndex].equals("*")) { 349 break; 350 } else { 351 tagIndex++; 352 } 353 } 354 return rangeIndex; 355 } 356 357 public static Locale lookup(List<LanguageRange> priorityList, 358 Collection<Locale> locales) { 359 if (priorityList.isEmpty() || locales.isEmpty()) { 360 return null; 361 } 362 363 // Create a list of language tags to be matched. 364 List<String> tags = new ArrayList<>(); 365 for (Locale locale : locales) { 366 tags.add(locale.toLanguageTag()); 367 } 368 369 // Look up a language tags. 370 String lookedUpTag = lookupTag(priorityList, tags); 371 372 if (lookedUpTag == null) { 373 return null; 374 } else { 375 return Locale.forLanguageTag(lookedUpTag); 376 } 377 } 378 379 public static String lookupTag(List<LanguageRange> priorityList, 380 Collection<String> tags) { 381 if (priorityList.isEmpty() || tags.isEmpty()) { 382 return null; 383 } 384 385 int splitIndex = splitRanges(priorityList); 386 List<LanguageRange> nonZeroRanges; 387 List<LanguageRange> zeroRanges; 388 if (splitIndex != -1) { 389 nonZeroRanges = priorityList.subList(0, splitIndex); 390 zeroRanges = priorityList.subList(splitIndex, priorityList.size()); 391 } else { 392 nonZeroRanges = priorityList; 393 zeroRanges = List.of(); 394 } 395 396 for (LanguageRange lr : nonZeroRanges) { 397 String range = lr.getRange(); 398 399 // Special language range ("*") is ignored in lookup. 400 if (range.equals("*")) { 401 continue; 402 } 403 404 String rangeForRegex = range.replace("*", "\\p{Alnum}*"); 405 while (!rangeForRegex.isEmpty()) { 406 for (String tag : tags) { 407 // change to lowercase for case-insensitive matching 408 String lowerCaseTag = tag.toLowerCase(Locale.ROOT); 409 if (lowerCaseTag.matches(rangeForRegex) 410 && !shouldIgnoreLookupMatch(zeroRanges, lowerCaseTag)) { 411 return tag; // preserve the case of the input tag 412 } 413 } 414 415 // Truncate from the end.... 416 rangeForRegex = truncateRange(rangeForRegex); 417 } 418 } 419 420 return null; 421 } 422 423 /** 424 * The tag which is falling in the exclusion range(s) should not be 425 * considered as the matching tag. Ignores the tag matching with the 426 * non zero range(s), if the tag also matches with one of the exclusion 427 * range(s) i.e. range(s) having quality weight q=0. 428 */ 429 private static boolean shouldIgnoreLookupMatch(List<LanguageRange> zeroRange, 430 String tag) { 431 for (LanguageRange lr : zeroRange) { 432 String range = lr.getRange(); 433 434 // Special language range ("*") is ignored in lookup. 435 if (range.equals("*")) { 436 continue; 437 } 438 439 String rangeForRegex = range.replace("*", "\\p{Alnum}*"); 440 while (!rangeForRegex.isEmpty()) { 441 if (tag.matches(rangeForRegex)) { 442 return true; 443 } 444 // Truncate from the end.... 445 rangeForRegex = truncateRange(rangeForRegex); 446 } 447 } 448 449 return false; 450 } 451 452 /* Truncate the range from end during the lookup match */ 453 private static String truncateRange(String rangeForRegex) { 454 int index = rangeForRegex.lastIndexOf('-'); 455 if (index >= 0) { 456 rangeForRegex = rangeForRegex.substring(0, index); 457 458 // if range ends with an extension key, truncate it. 459 index = rangeForRegex.lastIndexOf('-'); 460 if (index >= 0 && index == rangeForRegex.length() - 2) { 461 rangeForRegex 462 = rangeForRegex.substring(0, rangeForRegex.length() - 2); 463 } 464 } else { 465 rangeForRegex = ""; 466 } 467 468 return rangeForRegex; 469 } 470 471 /* Returns the split index of the priority list, if it contains 472 * language range(s) with quality weight as 0 i.e. q=0, else -1 473 */ 474 private static int splitRanges(List<LanguageRange> priorityList) { 475 int size = priorityList.size(); 476 for (int index = 0; index < size; index++) { 477 LanguageRange range = priorityList.get(index); 478 if (range.getWeight() == 0) { 479 return index; 480 } 481 } 482 483 return -1; // no q=0 range exists 484 } 485 486 public static List<LanguageRange> parse(String ranges) { 487 ranges = ranges.replace(" ", "").toLowerCase(Locale.ROOT); 488 if (ranges.startsWith("accept-language:")) { 489 ranges = ranges.substring(16); // delete unnecessary prefix 490 } 491 492 String[] langRanges = ranges.split(","); 493 List<LanguageRange> list = new ArrayList<>(langRanges.length); 494 List<String> tempList = new ArrayList<>(); 495 int numOfRanges = 0; 496 497 for (String range : langRanges) { 498 int index; 499 String r; 500 double w; 501 502 if ((index = range.indexOf(";q=")) == -1) { 503 r = range; 504 w = MAX_WEIGHT; 505 } else { 506 r = range.substring(0, index); 507 index += 3; 508 try { 509 w = Double.parseDouble(range.substring(index)); 510 } 511 catch (Exception e) { 512 throw new IllegalArgumentException("weight=\"" 513 + range.substring(index) 514 + "\" for language range \"" + r + "\""); 515 } 516 517 if (w < MIN_WEIGHT || w > MAX_WEIGHT) { 518 throw new IllegalArgumentException("weight=" + w 519 + " for language range \"" + r 520 + "\". It must be between " + MIN_WEIGHT 521 + " and " + MAX_WEIGHT + "."); 522 } 523 } 524 525 if (!tempList.contains(r)) { 526 LanguageRange lr = new LanguageRange(r, w); 527 index = numOfRanges; 528 for (int j = 0; j < numOfRanges; j++) { 529 if (list.get(j).getWeight() < w) { 530 index = j; 531 break; 532 } 533 } 534 list.add(index, lr); 535 numOfRanges++; 536 tempList.add(r); 537 538 // Check if the range has an equivalent using IANA LSR data. 539 // If yes, add it to the User's Language Priority List as well. 540 541 // aa-XX -> aa-YY 542 String equivalent; 543 if ((equivalent = getEquivalentForRegionAndVariant(r)) != null 544 && !tempList.contains(equivalent)) { 545 list.add(index+1, new LanguageRange(equivalent, w)); 546 numOfRanges++; 547 tempList.add(equivalent); 548 } 549 550 String[] equivalents; 551 if ((equivalents = getEquivalentsForLanguage(r)) != null) { 552 for (String equiv: equivalents) { 553 // aa-XX -> bb-XX(, cc-XX) 554 if (!tempList.contains(equiv)) { 555 list.add(index+1, new LanguageRange(equiv, w)); 556 numOfRanges++; 557 tempList.add(equiv); 558 } 559 560 // bb-XX -> bb-YY(, cc-YY) 561 equivalent = getEquivalentForRegionAndVariant(equiv); 562 if (equivalent != null 563 && !tempList.contains(equivalent)) { 564 list.add(index+1, new LanguageRange(equivalent, w)); 565 numOfRanges++; 566 tempList.add(equivalent); 567 } 568 } 569 } 570 } 571 } 572 573 return list; 574 } 575 576 /** 577 * A faster alternative approach to String.replaceFirst(), if the given 578 * string is a literal String, not a regex. 579 */ 580 private static String replaceFirstSubStringMatch(String range, 581 String substr, String replacement) { 582 int pos = range.indexOf(substr); 583 if (pos == -1) { 584 return range; 585 } else { 586 return range.substring(0, pos) + replacement 587 + range.substring(pos + substr.length()); 588 } 589 } 590 591 private static String[] getEquivalentsForLanguage(String range) { 592 String r = range; 593 594 while (!r.isEmpty()) { 595 if (LocaleEquivalentMaps.singleEquivMap.containsKey(r)) { 596 String equiv = LocaleEquivalentMaps.singleEquivMap.get(r); 597 // Return immediately for performance if the first matching 598 // subtag is found. 599 return new String[]{replaceFirstSubStringMatch(range, 600 r, equiv)}; 601 } else if (LocaleEquivalentMaps.multiEquivsMap.containsKey(r)) { 602 String[] equivs = LocaleEquivalentMaps.multiEquivsMap.get(r); 603 String[] result = new String[equivs.length]; 604 for (int i = 0; i < equivs.length; i++) { 605 result[i] = replaceFirstSubStringMatch(range, 606 r, equivs[i]); 607 } 608 return result; 609 } 610 611 // Truncate the last subtag simply. 612 int index = r.lastIndexOf('-'); 613 if (index == -1) { 614 break; 615 } 616 r = r.substring(0, index); 617 } 618 619 return null; 620 } 621 622 private static String getEquivalentForRegionAndVariant(String range) { 623 int extensionKeyIndex = getExtentionKeyIndex(range); 624 625 for (String subtag : LocaleEquivalentMaps.regionVariantEquivMap.keySet()) { 626 int index; 627 if ((index = range.indexOf(subtag)) != -1) { 628 // Check if the matching text is a valid region or variant. 629 if (extensionKeyIndex != Integer.MIN_VALUE 630 && index > extensionKeyIndex) { 631 continue; 632 } 633 634 int len = index + subtag.length(); 635 if (range.length() == len || range.charAt(len) == '-') { 636 return replaceFirstSubStringMatch(range, subtag, 637 LocaleEquivalentMaps.regionVariantEquivMap 638 .get(subtag)); 639 } 640 } 641 } 642 643 return null; 644 } 645 646 private static int getExtentionKeyIndex(String s) { 647 char[] c = s.toCharArray(); 648 int index = Integer.MIN_VALUE; 649 for (int i = 1; i < c.length; i++) { 650 if (c[i] == '-') { 651 if (i - index == 2) { 652 return index; 653 } else { 654 index = i; 655 } 656 } 657 } 658 return Integer.MIN_VALUE; 659 } 660 661 public static List<LanguageRange> mapEquivalents( 662 List<LanguageRange>priorityList, 663 Map<String, List<String>> map) { 664 if (priorityList.isEmpty()) { 665 return new ArrayList<>(); // need to return a empty mutable List 666 } 667 if (map == null || map.isEmpty()) { 668 return new ArrayList<LanguageRange>(priorityList); 669 } 670 671 // Create a map, key=originalKey.toLowerCaes(), value=originalKey 672 Map<String, String> keyMap = new HashMap<>(); 673 for (String key : map.keySet()) { 674 keyMap.put(key.toLowerCase(Locale.ROOT), key); 675 } 676 677 List<LanguageRange> list = new ArrayList<>(); 678 for (LanguageRange lr : priorityList) { 679 String range = lr.getRange(); 680 String r = range; 681 boolean hasEquivalent = false; 682 683 while (!r.isEmpty()) { 684 if (keyMap.containsKey(r)) { 685 hasEquivalent = true; 686 List<String> equivalents = map.get(keyMap.get(r)); 687 if (equivalents != null) { 688 int len = r.length(); 689 for (String equivalent : equivalents) { 690 list.add(new LanguageRange(equivalent.toLowerCase(Locale.ROOT) 691 + range.substring(len), 692 lr.getWeight())); 693 } 694 } 695 // Return immediately if the first matching subtag is found. 696 break; 697 } 698 699 // Truncate the last subtag simply. 700 int index = r.lastIndexOf('-'); 701 if (index == -1) { 702 break; 703 } 704 r = r.substring(0, index); 705 } 706 707 if (!hasEquivalent) { 708 list.add(lr); 709 } 710 } 711 712 return list; 713 } 714 715 private LocaleMatcher() {} 716 717 }