1 /*
   2  * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.tools.jlink.internal.plugins;
  26 
  27 import java.util.AbstractMap;
  28 import java.util.ArrayList;
  29 import java.util.Arrays;
  30 import java.util.IllformedLocaleException;
  31 import java.util.Locale;
  32 import java.util.List;
  33 import java.util.Map;
  34 import java.util.Objects;
  35 import java.util.Optional;
  36 import static java.util.ResourceBundle.Control;
  37 import java.util.Set;
  38 import java.util.function.Predicate;
  39 import java.util.regex.Pattern;
  40 import java.util.stream.Collectors;
  41 import java.util.stream.IntStream;
  42 import java.util.stream.Stream;
  43 import jdk.internal.org.objectweb.asm.ClassReader;
  44 import jdk.tools.jlink.internal.ResourcePrevisitor;
  45 import jdk.tools.jlink.internal.StringTable;
  46 import jdk.tools.jlink.plugin.ResourcePoolModule;
  47 import jdk.tools.jlink.plugin.PluginException;
  48 import jdk.tools.jlink.plugin.ResourcePool;
  49 import jdk.tools.jlink.plugin.ResourcePoolBuilder;
  50 import jdk.tools.jlink.plugin.ResourcePoolEntry;
  51 import jdk.tools.jlink.plugin.Plugin;
  52 import sun.util.cldr.CLDRBaseLocaleDataMetaInfo;
  53 import sun.util.locale.provider.LocaleProviderAdapter;
  54 import sun.util.locale.provider.LocaleProviderAdapter.Type;
  55 import sun.util.locale.provider.ResourceBundleBasedAdapter;
  56 
  57 /**
  58  * Plugin to explicitly specify the locale data included in jdk.localedata
  59  * module. This plugin provides a jlink command line option "--include-locales"
  60  * with an argument. The argument is a list of BCP 47 language tags separated
  61  * by a comma. E.g.,
  62  *
  63  *  "jlink --include-locales en,ja,*-IN"
  64  *
  65  * This option will include locale data for all available English and Japanese
  66  * languages, and ones for the country of India. All other locale data are
  67  * filtered out on the image creation.
  68  *
  69  * Here are a few assumptions:
  70  *
  71  *  0. All locale data in java.base are unconditionally included.
  72  *  1. All the selective locale data are in jdk.localedata module
  73  *  2. Their package names are constructed by appending ".ext" to
  74  *     the corresponding ones in java.base module.
  75  *  3. Available locales string in LocaleDataMetaInfo class should
  76  *     start with at least one white space character, e.g., " ar ar-EG ..."
  77  *                                                           ^
  78  */
  79 public final class IncludeLocalesPlugin implements Plugin, ResourcePrevisitor {
  80 
  81     public static final String NAME = "include-locales";
  82     private static final String MODULENAME = "jdk.localedata";
  83     private static final Set<String> LOCALEDATA_PACKAGES = Set.of(
  84         "sun.text.resources.cldr.ext",
  85         "sun.text.resources.ext",
  86         "sun.util.resources.cldr.ext",
  87         "sun.util.resources.cldr.provider",
  88         "sun.util.resources.ext",
  89         "sun.util.resources.provider");
  90     private static final String METAINFONAME = "LocaleDataMetaInfo";
  91     private static final List<String> META_FILES = List.of(
  92         ".+module-info.class",
  93         ".+LocaleDataProvider.class",
  94         ".+" + METAINFONAME + ".class");
  95     private static final List<String> INCLUDE_LOCALE_FILES = List.of(
  96         ".+sun/text/resources/ext/[^_]+_",
  97         ".+sun/util/resources/ext/[^_]+_",
  98         ".+sun/text/resources/cldr/ext/[^_]+_",
  99         ".+sun/util/resources/cldr/ext/[^_]+_");
 100     private Predicate<String> predicate;
 101     private String userParam;
 102     private List<Locale.LanguageRange> priorityList;
 103     private List<Locale> available;
 104     private List<String> filtered;
 105 
 106     private static final ResourceBundleBasedAdapter CLDR_ADAPTER =
 107         (ResourceBundleBasedAdapter)LocaleProviderAdapter.forType(Type.CLDR);
 108     private static final Map<Locale, String[]> CLDR_PARENT_LOCALES =
 109         new CLDRBaseLocaleDataMetaInfo().parentLocales();
 110 
 111     // Equivalent map
 112     private static final Map<String, List<String>> EQUIV_MAP =
 113         Stream.concat(
 114             // COMPAT equivalence
 115             Map.of(
 116                 "zh-Hans", List.of("zh-Hans", "zh-CN", "zh-SG"),
 117                 "zh-Hant", List.of("zh-Hant", "zh-HK", "zh-MO", "zh-TW"))
 118                 .entrySet()
 119                 .stream(),
 120 
 121             // CLDR parent locales
 122             CLDR_PARENT_LOCALES.entrySet().stream()
 123                 .map(entry -> {
 124                     String parent = entry.getKey().toLanguageTag();
 125                     List<String> children = new ArrayList<>();
 126                     children.add(parent);
 127 
 128                     Arrays.stream(entry.getValue())
 129                         .filter(child -> !child.isEmpty())
 130                         .flatMap(child ->
 131                             Stream.concat(
 132                                 Arrays.stream(CLDR_PARENT_LOCALES.getOrDefault(
 133                                     Locale.forLanguageTag(child), new String[0]))
 134                                         .filter(grandchild -> !grandchild.isEmpty()),
 135                                 List.of(child).stream()))
 136                         .distinct()
 137                         .forEach(children::add);
 138                     return new AbstractMap.SimpleEntry<String, List<String>>(parent, children);
 139                 })
 140         ).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
 141 
 142     // Special COMPAT provider locales
 143     private static final String jaJPJPTag = "ja-JP-JP";
 144     private static final String noNONYTag = "no-NO-NY";
 145     private static final String thTHTHTag = "th-TH-TH";
 146     private static final Locale jaJPJP = new Locale("ja", "JP", "JP");
 147     private static final Locale noNONY = new Locale("no", "NO", "NY");
 148     private static final Locale thTHTH = new Locale("th", "TH", "TH");
 149 
 150     @Override
 151     public String getName() {
 152         return NAME;
 153     }
 154 
 155     @Override
 156     public ResourcePool transform(ResourcePool in, ResourcePoolBuilder out) {
 157         in.transformAndCopy((resource) -> {
 158             if (resource.moduleName().equals(MODULENAME)) {
 159                 String path = resource.path();
 160                 resource = predicate.test(path) ? resource: null;
 161                 if (resource != null &&
 162                     resource.type().equals(ResourcePoolEntry.Type.CLASS_OR_RESOURCE)) {
 163                     byte[] bytes = resource.contentBytes();
 164                     ClassReader cr = new ClassReader(bytes);
 165                     if (Arrays.stream(cr.getInterfaces())
 166                         .anyMatch(i -> i.contains(METAINFONAME)) &&
 167                         stripUnsupportedLocales(bytes, cr)) {
 168                         resource = resource.copyWithContent(bytes);
 169                     }
 170                 }
 171             }
 172             return resource;
 173         }, out);
 174 
 175         return out.build();
 176     }
 177 
 178     @Override
 179     public Category getType() {
 180         return Category.FILTER;
 181     }
 182 
 183     @Override
 184     public String getDescription() {
 185         return PluginsResourceBundle.getDescription(NAME);
 186     }
 187 
 188     @Override
 189     public boolean hasArguments() {
 190         return true;
 191     }
 192 
 193     @Override
 194     public String getArgumentsDescription() {
 195        return PluginsResourceBundle.getArgument(NAME);
 196     }
 197 
 198     @Override
 199     public void configure(Map<String, String> config) {
 200         userParam = config.get(NAME);
 201 
 202         try {
 203             priorityList = Locale.LanguageRange.parse(userParam, EQUIV_MAP);
 204         } catch (IllegalArgumentException iae) {
 205             throw new IllegalArgumentException(String.format(
 206                 PluginsResourceBundle.getMessage(NAME + ".invalidtag"),
 207                     iae.getMessage().replaceFirst("^range=", "")));
 208         }
 209     }
 210 
 211     @Override
 212     public void previsit(ResourcePool resources, StringTable strings) {
 213         final Pattern p = Pattern.compile(".*((Data_)|(Names_))(?<tag>.*)\\.class");
 214         Optional<ResourcePoolModule> optMod = resources.moduleView().findModule(MODULENAME);
 215 
 216         // jdk.localedata module validation
 217         if (optMod.isPresent()) {
 218             ResourcePoolModule module = optMod.get();
 219             Set<String> packages = module.packages();
 220             if (!packages.containsAll(LOCALEDATA_PACKAGES)) {
 221                 throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".missingpackages") +
 222                     LOCALEDATA_PACKAGES.stream()
 223                         .filter(pn -> !packages.contains(pn))
 224                         .collect(Collectors.joining(",\n\t")));
 225             }
 226 
 227             available = Stream.concat(module.entries()
 228                                         .map(md -> p.matcher(md.path()))
 229                                         .filter(m -> m.matches())
 230                                         .map(m -> m.group("tag").replaceAll("_", "-")),
 231                                     Stream.concat(Stream.of(jaJPJPTag), Stream.of(thTHTHTag)))
 232                 .distinct()
 233                 .sorted()
 234                 .map(IncludeLocalesPlugin::tagToLocale)
 235                 .collect(Collectors.toList());
 236         } else {
 237             // jdk.localedata is not added.
 238             throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".localedatanotfound"));
 239         }
 240 
 241         filtered = filterLocales(available);
 242 
 243         if (filtered.isEmpty()) {
 244             throw new PluginException(
 245                 String.format(PluginsResourceBundle.getMessage(NAME + ".nomatchinglocales"), userParam));
 246         }
 247 
 248         List<String> value = Stream.concat(
 249                 META_FILES.stream(),
 250                 filtered.stream().flatMap(s -> includeLocaleFilePatterns(s).stream()))
 251             .map(s -> "regex:" + s)
 252             .collect(Collectors.toList());
 253 
 254         predicate = ResourceFilter.includeFilter(value);
 255     }
 256 
 257     private List<String> includeLocaleFilePatterns(String tag) {
 258         // Ignore extension variations
 259         if (tag.matches(".+-[a-z]-.+")) {
 260             return List.of();
 261         }
 262 
 263         List<String> files = new ArrayList<>(includeLocaleFiles(tag.replaceAll("-", "_")));
 264 
 265         // Add Thai BreakIterator related data files
 266         if (tag.equals("th")) {
 267             files.add(".+sun/text/resources/ext/thai_dict");
 268             files.add(".+sun/text/resources/ext/[^_]+BreakIteratorData_th");
 269         }
 270 
 271         // Add Taiwan resource bundles for Hong Kong
 272         if (tag.equals("zh-HK")) {
 273             files.addAll(includeLocaleFiles("zh_TW"));
 274         }
 275 
 276         return files;
 277     }
 278 
 279     private List<String> includeLocaleFiles(String localeStr) {
 280         return INCLUDE_LOCALE_FILES.stream()
 281             .map(s -> s + localeStr + ".class")
 282             .collect(Collectors.toList());
 283     }
 284 
 285     private boolean stripUnsupportedLocales(byte[] bytes, ClassReader cr) {
 286         char[] buf = new char[cr.getMaxStringLength()];
 287         boolean[] modified = new boolean[1];
 288 
 289         IntStream.range(1, cr.getItemCount())
 290             .map(item -> cr.getItem(item))
 291             .forEach(itemIndex -> {
 292                 if (bytes[itemIndex - 1] == 1 &&         // UTF-8
 293                     bytes[itemIndex + 2] == (byte)' ') { // fast check for leading space
 294                     int length = cr.readUnsignedShort(itemIndex);
 295                     byte[] b = new byte[length];
 296                     System.arraycopy(bytes, itemIndex + 2, b, 0, length);
 297                     if (filterOutUnsupportedTags(b)) {
 298                         // copy back
 299                         System.arraycopy(b, 0, bytes, itemIndex + 2, length);
 300                         modified[0] = true;
 301                     }
 302                 }
 303             });
 304 
 305         return modified[0];
 306     }
 307 
 308     private boolean filterOutUnsupportedTags(byte[] b) {
 309         List<Locale> locales;
 310         String original = new String(b);
 311 
 312         try {
 313             locales = Arrays.asList(original.split(" ")).stream()
 314                 .filter(tag -> !tag.isEmpty())
 315                 .map(IncludeLocalesPlugin::tagToLocale)
 316                 .collect(Collectors.toList());
 317         } catch (IllformedLocaleException ile) {
 318             // Seems not an available locales string literal.
 319             return false;
 320         }
 321 
 322         byte[] filteredBytes = filterLocales(locales).stream()
 323             // Make sure the filtered language tags do exist in the
 324             // original supported tags for compatibility codes, e.g., "iw"
 325             .filter(t -> original.indexOf(t) != -1)
 326             .collect(Collectors.joining(" "))
 327             .getBytes();
 328 
 329         if (filteredBytes.length > b.length) {
 330             throw new InternalError("Size of filtered locales is bigger than the original one");
 331         }
 332 
 333         System.arraycopy(filteredBytes, 0, b, 0, filteredBytes.length);
 334         Arrays.fill(b, filteredBytes.length, b.length, (byte)' ');
 335         return true;
 336     }
 337 
 338     /*
 339      * Filter list of locales according to the secified priorityList. Note
 340      * that returned list of language tags may include extra ones, such as
 341      * compatibility ones (e.g., "iw" -> "iw", "he").
 342      */
 343     private List<String> filterLocales(List<Locale> locales) {
 344         List<String> ret =
 345             Locale.filter(priorityList, locales, Locale.FilteringMode.EXTENDED_FILTERING).stream()
 346                 .flatMap(loc -> Stream.concat(Control.getNoFallbackControl(Control.FORMAT_DEFAULT)
 347                                      .getCandidateLocales("", loc).stream(),
 348                                 CLDR_ADAPTER.getCandidateLocales("", loc).stream()))
 349                 .map(loc ->
 350                     // Locale.filter() does not preserve the case, which is
 351                     // significant for "variant" equality. Retrieve the original
 352                     // locales from the pre-filtered list.
 353                     locales.stream()
 354                         .filter(l -> l.toString().equalsIgnoreCase(loc.toString()))
 355                         .findAny())
 356                 .flatMap(Optional::stream)
 357                 .flatMap(IncludeLocalesPlugin::localeToTags)
 358                 .distinct()
 359                 .collect(Collectors.toList());
 360 
 361         return ret;
 362     }
 363 
 364     private static final Locale.Builder LOCALE_BUILDER = new Locale.Builder();
 365     private static Locale tagToLocale(String tag) {
 366         // ISO3166 compatibility
 367         tag = tag.replaceFirst("^iw", "he").replaceFirst("^ji", "yi").replaceFirst("^in", "id");
 368 
 369         // Special COMPAT provider locales
 370         switch (tag) {
 371             case jaJPJPTag:
 372                 return jaJPJP;
 373             case noNONYTag:
 374                 return noNONY;
 375             case thTHTHTag:
 376                 return thTHTH;
 377             default:
 378                 LOCALE_BUILDER.clear();
 379                 LOCALE_BUILDER.setLanguageTag(tag);
 380                 return LOCALE_BUILDER.build();
 381         }
 382     }
 383 
 384     private static Stream<String> localeToTags(Locale loc) {
 385         Objects.requireNonNull(loc);
 386 
 387         String tag = loc.toLanguageTag();
 388         List<String> tags = null;
 389 
 390         switch (loc.getLanguage()) {
 391             // ISO3166 compatibility
 392             case "iw":
 393                 tags = List.of(tag, tag.replaceFirst("^he", "iw"));
 394                 break;
 395             case "in":
 396                 tags = List.of(tag, tag.replaceFirst("^id", "in"));
 397                 break;
 398             case "ji":
 399                 tags = List.of(tag, tag.replaceFirst("^yi", "ji"));
 400                 break;
 401 
 402             // Special COMPAT provider locales
 403             case "ja":
 404                 if (loc.getCountry() == "JP") {
 405                     tags = List.of(tag, jaJPJPTag);
 406                 }
 407                 break;
 408             case "no":
 409             case "nn":
 410                 if (loc.getCountry() == "NO") {
 411                     tags = List.of(tag, noNONYTag);
 412                 }
 413                 break;
 414             case "th":
 415                 if (loc.getCountry() == "TH") {
 416                     tags = List.of(tag, thTHTHTag);
 417                 }
 418                 break;
 419         }
 420 
 421         return tags == null ? List.of(tag).stream() : tags.stream();
 422     }
 423 }