1 /*
   2  * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.util.locale;
  27 
  28 import java.util.ArrayList;
  29 import java.util.Collection;
  30 import java.util.HashMap;
  31 import java.util.List;
  32 import java.util.Locale;
  33 import java.util.Locale.*;
  34 import static java.util.Locale.FilteringMode.*;
  35 import static java.util.Locale.LanguageRange.*;
  36 import java.util.Map;
  37 
  38 /**
  39  * Implementation for BCP47 Locale matching
  40  *
  41  */
  42 public final class LocaleMatcher {
  43 
  44     public static List<Locale> filter(List<LanguageRange> priorityList,
  45                                       Collection<Locale> locales,
  46                                       FilteringMode mode) {
  47         if (priorityList.isEmpty() || locales.isEmpty()) {
  48             return new ArrayList<>(); // need to return a empty mutable List
  49         }
  50 
  51         // Create a list of language tags to be matched.
  52         List<String> tags = new ArrayList<>();
  53         for (Locale locale : locales) {
  54             tags.add(locale.toLanguageTag());
  55         }
  56 
  57         // Filter language tags.
  58         List<String> filteredTags = filterTags(priorityList, tags, mode);
  59 
  60         // Create a list of matching locales.
  61         List<Locale> filteredLocales = new ArrayList<>(filteredTags.size());
  62         for (String tag : filteredTags) {
  63               filteredLocales.add(Locale.forLanguageTag(tag));
  64         }
  65 
  66         return filteredLocales;
  67     }
  68 
  69     public static List<String> filterTags(List<LanguageRange> priorityList,
  70                                           Collection<String> tags,
  71                                           FilteringMode mode) {
  72         if (priorityList.isEmpty() || tags.isEmpty()) {
  73             return new ArrayList<>(); // need to return a empty mutable List
  74         }
  75 
  76         ArrayList<LanguageRange> list;
  77         if (mode == EXTENDED_FILTERING) {
  78             return filterExtended(priorityList, tags);
  79         } else {
  80             list = new ArrayList<>();
  81             for (LanguageRange lr : priorityList) {
  82                 String range = lr.getRange();
  83                 if (range.startsWith("*-")
  84                     || range.indexOf("-*") != -1) { // Extended range
  85                     if (mode == AUTOSELECT_FILTERING) {
  86                         return filterExtended(priorityList, tags);
  87                     } else if (mode == MAP_EXTENDED_RANGES) {
  88                         if (range.charAt(0) == '*') {
  89                             range = "*";
  90                         } else {
  91                             range = range.replaceAll("-[*]", "");
  92                         }
  93                         list.add(new LanguageRange(range, lr.getWeight()));
  94                     } else if (mode == REJECT_EXTENDED_RANGES) {
  95                         throw new IllegalArgumentException("An extended range \""
  96                                       + range
  97                                       + "\" found in REJECT_EXTENDED_RANGES mode.");
  98                     }
  99                 } else { // Basic range
 100                     list.add(lr);
 101                 }
 102             }
 103 
 104             return filterBasic(list, tags);
 105         }
 106     }
 107 
 108     private static List<String> filterBasic(List<LanguageRange> priorityList,
 109                                             Collection<String> tags) {
 110         int splitIndex = splitRanges(priorityList);
 111         List<LanguageRange> nonZeroRanges;
 112         List<LanguageRange> zeroRanges;
 113         if (splitIndex != -1) {
 114             nonZeroRanges = priorityList.subList(0, splitIndex);
 115             zeroRanges = priorityList.subList(splitIndex, priorityList.size());
 116         } else {
 117             nonZeroRanges = priorityList;
 118             zeroRanges = List.of();
 119         }
 120 
 121         List<String> list = new ArrayList<>();
 122         for (LanguageRange lr : nonZeroRanges) {
 123             String range = lr.getRange();
 124             if (range.equals("*")) {
 125                 tags = removeTagsMatchingBasicZeroRange(zeroRanges, tags);
 126                 return new ArrayList<String>(tags);
 127             } else {
 128                 for (String tag : tags) {
 129                     tag = tag.toLowerCase(Locale.ROOT);
 130                     if (tag.startsWith(range)) {
 131                         int len = range.length();
 132                         if ((tag.length() == len || tag.charAt(len) == '-')
 133                             && !list.contains(tag)
 134                             && !shouldIgnoreFilterBasicMatch(zeroRanges, tag)) {
 135                             list.add(tag);
 136                         }
 137                     }
 138                 }
 139             }
 140         }
 141 
 142         return list;
 143     }
 144 
 145     /**
 146      * Removes the tag(s) which are falling in the basic exclusion range(s) i.e
 147      * range(s) with q=0 and returns the updated collection. If the basic
 148      * language ranges contains '*' as one of its non zero range then instead of
 149      * returning all the tags, remove those which are matching the range with
 150      * quality weight q=0.
 151      */
 152     private static Collection<String> removeTagsMatchingBasicZeroRange(
 153             List<LanguageRange> zeroRange, Collection<String> tags) {
 154         if (zeroRange.isEmpty()) {
 155             return tags;
 156         }
 157 
 158         List<String> matchingTags = new ArrayList<>();
 159         for (String tag : tags) {
 160             tag = tag.toLowerCase(Locale.ROOT);
 161             if (!shouldIgnoreFilterBasicMatch(zeroRange, tag)) {
 162                 matchingTags.add(tag);
 163             }
 164         }
 165 
 166         return matchingTags;
 167     }
 168 
 169     /**
 170      * The tag which is falling in the basic exclusion range(s) should not
 171      * be considered as the matching tag. Ignores the tag matching with the
 172      * non-zero ranges, if the tag also matches with one of the basic exclusion
 173      * ranges i.e. range(s) having quality weight q=0
 174      */
 175     private static boolean shouldIgnoreFilterBasicMatch(
 176             List<LanguageRange> zeroRange, String tag) {
 177         if (zeroRange.isEmpty()) {
 178             return false;
 179         }
 180 
 181         for (LanguageRange lr : zeroRange) {
 182             String range = lr.getRange();
 183             if (range.equals("*")) {
 184                 return true;
 185             }
 186             if (tag.startsWith(range)) {
 187                 int len = range.length();
 188                 if ((tag.length() == len || tag.charAt(len) == '-')) {
 189                     return true;
 190                 }
 191             }
 192         }
 193 
 194         return false;
 195     }
 196 
 197     private static List<String> filterExtended(List<LanguageRange> priorityList,
 198                                                Collection<String> tags) {
 199         int splitIndex = splitRanges(priorityList);
 200         List<LanguageRange> nonZeroRanges;
 201         List<LanguageRange> zeroRanges;
 202         if (splitIndex != -1) {
 203             nonZeroRanges = priorityList.subList(0, splitIndex);
 204             zeroRanges = priorityList.subList(splitIndex, priorityList.size());
 205         } else {
 206             nonZeroRanges = priorityList;
 207             zeroRanges = List.of();
 208         }
 209 
 210         List<String> list = new ArrayList<>();
 211         for (LanguageRange lr : nonZeroRanges) {
 212             String range = lr.getRange();
 213             if (range.equals("*")) {
 214                 tags = removeTagsMatchingExtendedZeroRange(zeroRanges, tags);
 215                 return new ArrayList<String>(tags);
 216             }
 217             String[] rangeSubtags = range.split("-");
 218             for (String tag : tags) {
 219                 tag = tag.toLowerCase(Locale.ROOT);
 220                 String[] tagSubtags = tag.split("-");
 221                 if (!rangeSubtags[0].equals(tagSubtags[0])
 222                     && !rangeSubtags[0].equals("*")) {
 223                     continue;
 224                 }
 225 
 226                 int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,
 227                         tagSubtags);
 228                 if (rangeSubtags.length == rangeIndex && !list.contains(tag)
 229                         && !shouldIgnoreFilterExtendedMatch(zeroRanges, tag)) {
 230                     list.add(tag);
 231                 }
 232             }
 233         }
 234 
 235         return list;
 236     }
 237 
 238     /**
 239      * Removes the tag(s) which are falling in the extended exclusion range(s)
 240      * i.e range(s) with q=0 and returns the updated collection. If the extended
 241      * language ranges contains '*' as one of its non zero range then instead of
 242      * returning all the tags, remove those which are matching the range with
 243      * quality weight q=0.
 244      */
 245     private static Collection<String> removeTagsMatchingExtendedZeroRange(
 246             List<LanguageRange> zeroRange, Collection<String> tags) {
 247         if (zeroRange.isEmpty()) {
 248             return tags;
 249         }
 250 
 251         List<String> matchingTags = new ArrayList<>();
 252         for (String tag : tags) {
 253             tag = tag.toLowerCase(Locale.ROOT);
 254             if (!shouldIgnoreFilterExtendedMatch(zeroRange, tag)) {
 255                 matchingTags.add(tag);
 256             }
 257         }
 258 
 259         return matchingTags;
 260     }
 261 
 262     /**
 263      * The tag which is falling in the extended exclusion range(s) should
 264      * not be considered as the matching tag. Ignores the tag matching with the
 265      * non zero range(s), if the tag also matches with one of the extended
 266      * exclusion range(s) i.e. range(s) having quality weight q=0
 267      */
 268     private static boolean shouldIgnoreFilterExtendedMatch(
 269             List<LanguageRange> zeroRange, String tag) {
 270         if (zeroRange.isEmpty()) {
 271             return false;
 272         }
 273 
 274         String[] tagSubtags = tag.split("-");
 275         for (LanguageRange lr : zeroRange) {
 276             String range = lr.getRange();
 277             if (range.equals("*")) {
 278                 return true;
 279             }
 280 
 281             String[] rangeSubtags = range.split("-");
 282 
 283             if (!rangeSubtags[0].equals(tagSubtags[0])
 284                     && !rangeSubtags[0].equals("*")) {
 285                 continue;
 286             }
 287 
 288             int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,
 289                     tagSubtags);
 290             if (rangeSubtags.length == rangeIndex) {
 291                 return true;
 292             }
 293         }
 294 
 295         return false;
 296     }
 297 
 298     private static int matchFilterExtendedSubtags(String[] rangeSubtags,
 299             String[] tagSubtags) {
 300         int rangeIndex = 1;
 301         int tagIndex = 1;
 302 
 303         while (rangeIndex < rangeSubtags.length
 304                 && tagIndex < tagSubtags.length) {
 305             if (rangeSubtags[rangeIndex].equals("*")) {
 306                 rangeIndex++;
 307             } else if (rangeSubtags[rangeIndex]
 308                     .equals(tagSubtags[tagIndex])) {
 309                 rangeIndex++;
 310                 tagIndex++;
 311             } else if (tagSubtags[tagIndex].length() == 1
 312                     && !tagSubtags[tagIndex].equals("*")) {
 313                 break;
 314             } else {
 315                 tagIndex++;
 316             }
 317         }
 318         return rangeIndex;
 319     }
 320 
 321     public static Locale lookup(List<LanguageRange> priorityList,
 322                                 Collection<Locale> locales) {
 323         if (priorityList.isEmpty() || locales.isEmpty()) {
 324             return null;
 325         }
 326 
 327         // Create a list of language tags to be matched.
 328         List<String> tags = new ArrayList<>();
 329         for (Locale locale : locales) {
 330             tags.add(locale.toLanguageTag());
 331         }
 332 
 333         // Look up a language tags.
 334         String lookedUpTag = lookupTag(priorityList, tags);
 335 
 336         if (lookedUpTag == null) {
 337             return null;
 338         } else {
 339             return Locale.forLanguageTag(lookedUpTag);
 340         }
 341     }
 342 
 343     public static String lookupTag(List<LanguageRange> priorityList,
 344                                    Collection<String> tags) {
 345         if (priorityList.isEmpty() || tags.isEmpty()) {
 346             return null;
 347         }
 348 
 349         int splitIndex = splitRanges(priorityList);
 350         List<LanguageRange> nonZeroRanges;
 351         List<LanguageRange> zeroRanges;
 352         if (splitIndex != -1) {
 353             nonZeroRanges = priorityList.subList(0, splitIndex);
 354             zeroRanges = priorityList.subList(splitIndex, priorityList.size());
 355         } else {
 356             nonZeroRanges = priorityList;
 357             zeroRanges = List.of();
 358         }
 359 
 360         for (LanguageRange lr : nonZeroRanges) {
 361             String range = lr.getRange();
 362 
 363             // Special language range ("*") is ignored in lookup.
 364             if (range.equals("*")) {
 365                 continue;
 366             }
 367 
 368             String rangeForRegex = range.replace("*", "\\p{Alnum}*");
 369             while (rangeForRegex.length() > 0) {
 370                 for (String tag : tags) {
 371                     tag = tag.toLowerCase(Locale.ROOT);
 372                     if (tag.matches(rangeForRegex)
 373                             && !shouldIgnoreLookupMatch(zeroRanges, tag)) {
 374                         return tag;
 375                     }
 376                 }
 377 
 378                 // Truncate from the end....
 379                 rangeForRegex = truncateRange(rangeForRegex);
 380             }
 381         }
 382 
 383         return null;
 384     }
 385 
 386     /**
 387      * The tag which is falling in the exclusion range(s) should not be
 388      * considered as the matching tag. Ignores the tag matching with the
 389      * non zero range(s), if the tag also matches with one of the exclusion
 390      * range(s) i.e. range(s) having quality weight q=0.
 391      */
 392     private static boolean shouldIgnoreLookupMatch(List<LanguageRange> zeroRange,
 393             String tag) {
 394         for (LanguageRange lr : zeroRange) {
 395             String range = lr.getRange();
 396 
 397             // Special language range ("*") is ignored in lookup.
 398             if (range.equals("*")) {
 399                 continue;
 400             }
 401 
 402             String rangeForRegex = range.replace("*", "\\p{Alnum}*");
 403             while (rangeForRegex.length() > 0) {
 404                 if (tag.matches(rangeForRegex)) {
 405                     return true;
 406                 }
 407                 // Truncate from the end....
 408                 rangeForRegex = truncateRange(rangeForRegex);
 409             }
 410         }
 411 
 412         return false;
 413     }
 414 
 415     /* Truncate the range from end during the lookup match */
 416     private static String truncateRange(String rangeForRegex) {
 417         int index = rangeForRegex.lastIndexOf('-');
 418         if (index >= 0) {
 419             rangeForRegex = rangeForRegex.substring(0, index);
 420 
 421             // if range ends with an extension key, truncate it.
 422             index = rangeForRegex.lastIndexOf('-');
 423             if (index >= 0 && index == rangeForRegex.length() - 2) {
 424                 rangeForRegex
 425                         = rangeForRegex.substring(0, rangeForRegex.length() - 2);
 426             }
 427         } else {
 428             rangeForRegex = "";
 429         }
 430 
 431         return rangeForRegex;
 432     }
 433 
 434     /* Returns the split index of the priority list, if it contains
 435      * language range(s) with quality weight as 0 i.e. q=0, else -1
 436      */
 437     private static int splitRanges(List<LanguageRange> priorityList) {
 438         int size = priorityList.size();
 439         for (int index = 0; index < size; index++) {
 440             LanguageRange range = priorityList.get(index);
 441             if (range.getWeight() == 0) {
 442                 return index;
 443             }
 444         }
 445 
 446         return -1; // no q=0 range exists
 447     }
 448 
 449     public static List<LanguageRange> parse(String ranges) {
 450         ranges = ranges.replace(" ", "").toLowerCase(Locale.ROOT);
 451         if (ranges.startsWith("accept-language:")) {
 452             ranges = ranges.substring(16); // delete unnecessary prefix
 453         }
 454 
 455         String[] langRanges = ranges.split(",");
 456         List<LanguageRange> list = new ArrayList<>(langRanges.length);
 457         List<String> tempList = new ArrayList<>();
 458         int numOfRanges = 0;
 459 
 460         for (String range : langRanges) {
 461             int index;
 462             String r;
 463             double w;
 464 
 465             if ((index = range.indexOf(";q=")) == -1) {
 466                 r = range;
 467                 w = MAX_WEIGHT;
 468             } else {
 469                 r = range.substring(0, index);
 470                 index += 3;
 471                 try {
 472                     w = Double.parseDouble(range.substring(index));
 473                 }
 474                 catch (Exception e) {
 475                     throw new IllegalArgumentException("weight=\""
 476                                   + range.substring(index)
 477                                   + "\" for language range \"" + r + "\"");
 478                 }
 479 
 480                 if (w < MIN_WEIGHT || w > MAX_WEIGHT) {
 481                     throw new IllegalArgumentException("weight=" + w
 482                                   + " for language range \"" + r
 483                                   + "\". It must be between " + MIN_WEIGHT
 484                                   + " and " + MAX_WEIGHT + ".");
 485                 }
 486             }
 487 
 488             if (!tempList.contains(r)) {
 489                 LanguageRange lr = new LanguageRange(r, w);
 490                 index = numOfRanges;
 491                 for (int j = 0; j < numOfRanges; j++) {
 492                     if (list.get(j).getWeight() < w) {
 493                         index = j;
 494                         break;
 495                     }
 496                 }
 497                 list.add(index, lr);
 498                 numOfRanges++;
 499                 tempList.add(r);
 500 
 501                 // Check if the range has an equivalent using IANA LSR data.
 502                 // If yes, add it to the User's Language Priority List as well.
 503 
 504                 // aa-XX -> aa-YY
 505                 String equivalent;
 506                 if ((equivalent = getEquivalentForRegionAndVariant(r)) != null
 507                     && !tempList.contains(equivalent)) {
 508                     list.add(index+1, new LanguageRange(equivalent, w));
 509                     numOfRanges++;
 510                     tempList.add(equivalent);
 511                 }
 512 
 513                 String[] equivalents;
 514                 if ((equivalents = getEquivalentsForLanguage(r)) != null) {
 515                     for (String equiv: equivalents) {
 516                         // aa-XX -> bb-XX(, cc-XX)
 517                         if (!tempList.contains(equiv)) {
 518                             list.add(index+1, new LanguageRange(equiv, w));
 519                             numOfRanges++;
 520                             tempList.add(equiv);
 521                         }
 522 
 523                         // bb-XX -> bb-YY(, cc-YY)
 524                         equivalent = getEquivalentForRegionAndVariant(equiv);
 525                         if (equivalent != null
 526                             && !tempList.contains(equivalent)) {
 527                             list.add(index+1, new LanguageRange(equivalent, w));
 528                             numOfRanges++;
 529                             tempList.add(equivalent);
 530                         }
 531                     }
 532                 }
 533             }
 534         }
 535 
 536         return list;
 537     }
 538 
 539     /**
 540      * A faster alternative approach to String.replaceFirst(), if the given
 541      * string is a literal String, not a regex.
 542      */
 543     private static String replaceFirstSubStringMatch(String range,
 544             String substr, String replacement) {
 545         int pos = range.indexOf(substr);
 546         if (pos == -1) {
 547             return range;
 548         } else {
 549             return range.substring(0, pos) + replacement
 550                     + range.substring(pos + substr.length());
 551         }
 552     }
 553 
 554     private static String[] getEquivalentsForLanguage(String range) {
 555         String r = range;
 556 
 557         while (r.length() > 0) {
 558             if (LocaleEquivalentMaps.singleEquivMap.containsKey(r)) {
 559                 String equiv = LocaleEquivalentMaps.singleEquivMap.get(r);
 560                 // Return immediately for performance if the first matching
 561                 // subtag is found.
 562                 return new String[]{replaceFirstSubStringMatch(range,
 563                     r, equiv)};
 564             } else if (LocaleEquivalentMaps.multiEquivsMap.containsKey(r)) {
 565                 String[] equivs = LocaleEquivalentMaps.multiEquivsMap.get(r);
 566                 String[] result = new String[equivs.length];
 567                 for (int i = 0; i < equivs.length; i++) {
 568                     result[i] = replaceFirstSubStringMatch(range,
 569                             r, equivs[i]);
 570                 }
 571                 return result;
 572             }
 573 
 574             // Truncate the last subtag simply.
 575             int index = r.lastIndexOf('-');
 576             if (index == -1) {
 577                 break;
 578             }
 579             r = r.substring(0, index);
 580         }
 581 
 582         return null;
 583     }
 584 
 585     private static String getEquivalentForRegionAndVariant(String range) {
 586         int extensionKeyIndex = getExtentionKeyIndex(range);
 587 
 588         for (String subtag : LocaleEquivalentMaps.regionVariantEquivMap.keySet()) {
 589             int index;
 590             if ((index = range.indexOf(subtag)) != -1) {
 591                 // Check if the matching text is a valid region or variant.
 592                 if (extensionKeyIndex != Integer.MIN_VALUE
 593                     && index > extensionKeyIndex) {
 594                     continue;
 595                 }
 596 
 597                 int len = index + subtag.length();
 598                 if (range.length() == len || range.charAt(len) == '-') {
 599                     return replaceFirstSubStringMatch(range, subtag,
 600                             LocaleEquivalentMaps.regionVariantEquivMap
 601                                     .get(subtag));
 602                 }
 603             }
 604         }
 605 
 606         return null;
 607     }
 608 
 609     private static int getExtentionKeyIndex(String s) {
 610         char[] c = s.toCharArray();
 611         int index = Integer.MIN_VALUE;
 612         for (int i = 1; i < c.length; i++) {
 613             if (c[i] == '-') {
 614                 if (i - index == 2) {
 615                     return index;
 616                 } else {
 617                     index = i;
 618                 }
 619             }
 620         }
 621         return Integer.MIN_VALUE;
 622     }
 623 
 624     public static List<LanguageRange> mapEquivalents(
 625                                           List<LanguageRange>priorityList,
 626                                           Map<String, List<String>> map) {
 627         if (priorityList.isEmpty()) {
 628             return new ArrayList<>(); // need to return a empty mutable List
 629         }
 630         if (map == null || map.isEmpty()) {
 631             return new ArrayList<LanguageRange>(priorityList);
 632         }
 633 
 634         // Create a map, key=originalKey.toLowerCaes(), value=originalKey
 635         Map<String, String> keyMap = new HashMap<>();
 636         for (String key : map.keySet()) {
 637             keyMap.put(key.toLowerCase(Locale.ROOT), key);
 638         }
 639 
 640         List<LanguageRange> list = new ArrayList<>();
 641         for (LanguageRange lr : priorityList) {
 642             String range = lr.getRange();
 643             String r = range;
 644             boolean hasEquivalent = false;
 645 
 646             while (r.length() > 0) {
 647                 if (keyMap.containsKey(r)) {
 648                     hasEquivalent = true;
 649                     List<String> equivalents = map.get(keyMap.get(r));
 650                     if (equivalents != null) {
 651                         int len = r.length();
 652                         for (String equivalent : equivalents) {
 653                             list.add(new LanguageRange(equivalent.toLowerCase(Locale.ROOT)
 654                                      + range.substring(len),
 655                                      lr.getWeight()));
 656                         }
 657                     }
 658                     // Return immediately if the first matching subtag is found.
 659                     break;
 660                 }
 661 
 662                 // Truncate the last subtag simply.
 663                 int index = r.lastIndexOf('-');
 664                 if (index == -1) {
 665                     break;
 666                 }
 667                 r = r.substring(0, index);
 668             }
 669 
 670             if (!hasEquivalent) {
 671                 list.add(lr);
 672             }
 673         }
 674 
 675         return list;
 676     }
 677 
 678     private LocaleMatcher() {}
 679 
 680 }