1 /*
   2  * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.tools.jlink.internal.plugins;
  26 
  27 import java.util.AbstractMap;
  28 import java.util.ArrayList;
  29 import java.util.Arrays;
  30 import java.util.IllformedLocaleException;
  31 import java.util.Locale;
  32 import java.util.List;
  33 import java.util.Map;
  34 import java.util.Objects;
  35 import java.util.Optional;
  36 import static java.util.ResourceBundle.Control;
  37 import java.util.Set;
  38 import java.util.function.Predicate;
  39 import java.util.regex.Pattern;
  40 import java.util.stream.Collectors;
  41 import java.util.stream.IntStream;
  42 import java.util.stream.Stream;
  43 import jdk.internal.org.objectweb.asm.ClassReader;
  44 import jdk.tools.jlink.internal.ResourcePrevisitor;
  45 import jdk.tools.jlink.internal.StringTable;
  46 import jdk.tools.jlink.plugin.ResourcePoolModule;
  47 import jdk.tools.jlink.plugin.PluginException;
  48 import jdk.tools.jlink.plugin.ResourcePool;
  49 import jdk.tools.jlink.plugin.ResourcePoolBuilder;
  50 import jdk.tools.jlink.plugin.ResourcePoolEntry;
  51 import jdk.tools.jlink.plugin.Plugin;
  52 import sun.util.cldr.CLDRBaseLocaleDataMetaInfo;
  53 import sun.util.locale.provider.LocaleProviderAdapter;
  54 import sun.util.locale.provider.LocaleProviderAdapter.Type;
  55 import sun.util.locale.provider.ResourceBundleBasedAdapter;
  56 
  57 /**
  58  * Plugin to explicitly specify the locale data included in jdk.localedata
  59  * module. This plugin provides a jlink command line option "--include-locales"
  60  * with an argument. The argument is a list of BCP 47 language tags separated
  61  * by a comma. E.g.,
  62  *
  63  *  "jlink --include-locales en,ja,*-IN"
  64  *
  65  * This option will include locale data for all available English and Japanese
  66  * languages, and ones for the country of India. All other locale data are
  67  * filtered out on the image creation.
  68  *
  69  * Here are a few assumptions:
  70  *
  71  *  0. All locale data in java.base are unconditionally included.
  72  *  1. All the selective locale data are in jdk.localedata module
  73  *  2. Their package names are constructed by appending ".ext" to
  74  *     the corresponding ones in java.base module.
  75  *  3. Available locales string in LocaleDataMetaInfo class should
  76  *     start with at least one white space character, e.g., " ar ar-EG ..."
  77  *                                                           ^
  78  */
  79 public final class IncludeLocalesPlugin implements Plugin, ResourcePrevisitor {
  80 
  81     public static final String NAME = "include-locales";
  82     private static final String MODULENAME = "jdk.localedata";
  83     private static final Set<String> LOCALEDATA_PACKAGES = Set.of(
  84         "sun.text.resources.cldr.ext",
  85         "sun.text.resources.ext",
  86         "sun.util.resources.cldr.ext",
  87         "sun.util.resources.cldr.provider",
  88         "sun.util.resources.ext",
  89         "sun.util.resources.provider");
  90     private static final String METAINFONAME = "LocaleDataMetaInfo";
  91     private static final List<String> META_FILES = List.of(
  92         ".+module-info.class",
  93         ".+LocaleDataProvider.class",
  94         ".+" + METAINFONAME + ".class");
  95     private static final List<String> INCLUDE_LOCALE_FILES = List.of(
  96         ".+sun/text/resources/ext/[^_]+_",
  97         ".+sun/util/resources/ext/[^_]+_",
  98         ".+sun/text/resources/cldr/ext/[^_]+_",
  99         ".+sun/util/resources/cldr/ext/[^_]+_");
 100     private Predicate<String> predicate;
 101     private String userParam;
 102     private List<Locale.LanguageRange> priorityList;
 103     private List<Locale> available;
 104     private List<String> filtered;
 105 
 106     private static final ResourceBundleBasedAdapter CLDR_ADAPTER =
 107         (ResourceBundleBasedAdapter)LocaleProviderAdapter.forType(Type.CLDR);
 108     private static final Map<Locale, String[]> CLDR_PARENT_LOCALES =
 109         new CLDRBaseLocaleDataMetaInfo().parentLocales();
 110 
 111     // Equivalent map
 112     private static final Map<String, List<String>> EQUIV_MAP =
 113         Stream.concat(
 114             // COMPAT equivalence
 115             Map.of(
 116                 "zh-Hans", List.of("zh-Hans", "zh-CN", "zh-SG"),
 117                 "zh-Hant", List.of("zh-Hant", "zh-HK", "zh-MO", "zh-TW"))
 118                 .entrySet()
 119                 .stream(),
 120 
 121             // CLDR parent locales
 122             CLDR_PARENT_LOCALES.entrySet().stream()
 123                 .map(entry -> {
 124                     String parent = entry.getKey().toLanguageTag();
 125                     List<String> children = new ArrayList<>();
 126                     children.add(parent);
 127 
 128                     Arrays.stream(entry.getValue())
 129                         .filter(child -> !child.isEmpty())
 130                         .flatMap(child ->
 131                             Stream.concat(
 132                                 Arrays.stream(CLDR_PARENT_LOCALES.getOrDefault(
 133                                     Locale.forLanguageTag(child), new String[0]))
 134                                         .filter(grandchild -> !grandchild.isEmpty()),
 135                                 List.of(child).stream()))
 136                         .distinct()
 137                         .forEach(children::add);
 138                     return new AbstractMap.SimpleEntry<String, List<String>>(parent, children);
 139                 })
 140         ).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
 141 
 142     // Special COMPAT provider locales
 143     private static final String jaJPJPTag = "ja-JP-JP";
 144     private static final String noNONYTag = "no-NO-NY";
 145     private static final String thTHTHTag = "th-TH-TH";
 146     private static final Locale jaJPJP = new Locale("ja", "JP", "JP");
 147     private static final Locale noNONY = new Locale("no", "NO", "NY");
 148     private static final Locale thTHTH = new Locale("th", "TH", "TH");
 149 
 150     @Override
 151     public String getName() {
 152         return NAME;
 153     }
 154 
 155     @Override
 156     public ResourcePool transform(ResourcePool in, ResourcePoolBuilder out) {
 157         in.transformAndCopy((resource) -> {
 158             if (resource.moduleName().equals(MODULENAME)) {
 159                 String path = resource.path();
 160                 resource = predicate.test(path) ? resource: null;
 161                 if (resource != null &&
 162                     resource.type().equals(ResourcePoolEntry.Type.CLASS_OR_RESOURCE)) {
 163                     byte[] bytes = resource.contentBytes();
 164                     ClassReader cr = new ClassReader(bytes);
 165                     if (Arrays.stream(cr.getInterfaces())
 166                         .anyMatch(i -> i.contains(METAINFONAME)) &&
 167                         stripUnsupportedLocales(bytes, cr)) {
 168                         resource = resource.copyWithContent(bytes);
 169                     }
 170                 }
 171             }
 172             return resource;
 173         }, out);
 174 
 175         return out.build();
 176     }
 177 
 178     @Override
 179     public Category getType() {
 180         return Category.FILTER;
 181     }
 182 
 183     @Override
 184     public String getDescription() {
 185         return PluginsResourceBundle.getDescription(NAME);
 186     }
 187 
 188     @Override
 189     public boolean hasArguments() {
 190         return true;
 191     }
 192 
 193     @Override
 194     public String getArgumentsDescription() {
 195        return PluginsResourceBundle.getArgument(NAME);
 196     }
 197 
 198     @Override
 199     public void configure(Map<String, String> config) {
 200         userParam = config.get(NAME);
 201 
 202         try {
 203             priorityList = Locale.LanguageRange.parse(userParam, EQUIV_MAP);
 204         } catch (IllegalArgumentException iae) {
 205             throw new IllegalArgumentException(String.format(
 206                 PluginsResourceBundle.getMessage(NAME + ".invalidtag"),
 207                     iae.getMessage().replaceFirst("^range=", "")));
 208         }
 209     }
 210 
 211     @Override
 212     public void previsit(ResourcePool resources, StringTable strings) {
 213         final Pattern p = Pattern.compile(".*((Data_)|(Names_))(?<tag>.*)\\.class");
 214         Optional<ResourcePoolModule> optMod = resources.moduleView().findModule(MODULENAME);
 215 
 216         // jdk.localedata module validation
 217         if (optMod.isPresent()) {
 218             ResourcePoolModule module = optMod.get();
 219             Set<String> packages = module.packages();
 220             if (!packages.containsAll(LOCALEDATA_PACKAGES)) {
 221                 throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".missingpackages") +
 222                     LOCALEDATA_PACKAGES.stream()
 223                         .filter(pn -> !packages.contains(pn))
 224                         .collect(Collectors.joining(",\n\t")));
 225             }
 226 
 227             available = Stream.concat(module.entries()
 228                                         .map(md -> p.matcher(md.path()))
 229                                         .filter(m -> m.matches())
 230                                         .map(m -> m.group("tag").replaceAll("_", "-")),
 231                                     Stream.concat(Stream.of(jaJPJPTag), Stream.of(thTHTHTag)))
 232                 .distinct()
 233                 .sorted()
 234                 .map(IncludeLocalesPlugin::tagToLocale)
 235                 .collect(Collectors.toList());
 236         } else {
 237             // jdk.localedata is not added.
 238             throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".localedatanotfound"));
 239         }
 240 
 241         filtered = filterLocales(available);
 242 
 243         if (filtered.isEmpty()) {
 244             throw new PluginException(
 245                 String.format(PluginsResourceBundle.getMessage(NAME + ".nomatchinglocales"), userParam));
 246         }
 247 
 248         List<String> value = Stream.concat(
 249                 META_FILES.stream(),
 250                 filtered.stream().flatMap(s -> includeLocaleFilePatterns(s).stream()))
 251             .map(s -> "regex:" + s)
 252             .collect(Collectors.toList());
 253 
 254         predicate = ResourceFilter.includeFilter(value);
 255     }
 256 
 257     private List<String> includeLocaleFilePatterns(String tag) {
 258         // Ignore extension variations
 259         if (tag.matches(".+-[a-z]-.+")) {
 260             return List.of();
 261         }
 262 
 263         List<String> files = new ArrayList<>(includeLocaleFiles(tag.replaceAll("-", "_")));
 264 
 265         // Add Thai BreakIterator related data files
 266         if (tag.equals("th")) {
 267             files.add(".+sun/text/resources/ext/thai_dict");
 268             files.add(".+sun/text/resources/ext/[^_]+BreakIteratorData_th");
 269         }
 270 
 271         // Add Taiwan resource bundles for Hong Kong
 272         if (tag.equals("zh-HK")) {
 273             files.addAll(includeLocaleFiles("zh_TW"));
 274         }
 275 
 276         return files;
 277     }
 278 
 279     private List<String> includeLocaleFiles(String localeStr) {
 280         return INCLUDE_LOCALE_FILES.stream()
 281             .map(s -> s + localeStr + ".class")
 282             .collect(Collectors.toList());
 283     }
 284 
 285     private boolean stripUnsupportedLocales(byte[] bytes, ClassReader cr) {
 286         char[] buf = new char[cr.getMaxStringLength()];
 287         boolean[] modified = new boolean[1];
 288 
 289         IntStream.range(1, cr.getItemCount())
 290             .map(item -> cr.getItem(item))
 291             .forEach(itemIndex -> {
 292                 if (bytes[itemIndex - 1] == 1 &&         // UTF-8
 293                     bytes[itemIndex + 2] == (byte)' ') { // fast check for leading space
 294                     int length = cr.readUnsignedShort(itemIndex);
 295                     byte[] b = new byte[length];
 296                     System.arraycopy(bytes, itemIndex + 2, b, 0, length);
 297                     if (filterOutUnsupportedTags(b)) {
 298                         // copy back
 299                         System.arraycopy(b, 0, bytes, itemIndex + 2, length);
 300                         modified[0] = true;
 301                     }
 302                 }
 303             });
 304 
 305         return modified[0];
 306     }
 307 
 308     private boolean filterOutUnsupportedTags(byte[] b) {
 309         List<Locale> locales;
 310 
 311         try {
 312             locales = Arrays.asList(new String(b).split(" ")).stream()
 313                 .filter(tag -> !tag.isEmpty())
 314                 .map(IncludeLocalesPlugin::tagToLocale)
 315                 .collect(Collectors.toList());
 316         } catch (IllformedLocaleException ile) {
 317             // Seems not an available locales string literal.
 318             return false;
 319         }
 320 
 321         byte[] filteredBytes = filterLocales(locales).stream()
 322             .collect(Collectors.joining(" "))
 323             .getBytes();
 324 
 325         if (filteredBytes.length > b.length) {
 326             throw new InternalError("Size of filtered locales is bigger than the original one");
 327         }
 328 
 329         System.arraycopy(filteredBytes, 0, b, 0, filteredBytes.length);
 330         Arrays.fill(b, filteredBytes.length, b.length, (byte)' ');
 331         return true;
 332     }
 333 
 334     private List<String> filterLocales(List<Locale> locales) {
 335         List<String> ret =
 336             Locale.filter(priorityList, locales, Locale.FilteringMode.EXTENDED_FILTERING).stream()
 337                 .flatMap(loc -> Stream.concat(Control.getNoFallbackControl(Control.FORMAT_DEFAULT)
 338                                      .getCandidateLocales("", loc).stream(),
 339                                 CLDR_ADAPTER.getCandidateLocales("", loc).stream()))
 340                 .map(loc ->
 341                     // Locale.filter() does not preserve the case, which is
 342                     // significant for "variant" equality. Retrieve the original
 343                     // locales from the pre-filtered list.
 344                     locales.stream()
 345                         .filter(l -> l.toString().equalsIgnoreCase(loc.toString()))
 346                         .findAny())
 347                 .flatMap(Optional::stream)
 348                 .flatMap(IncludeLocalesPlugin::localeToTags)
 349                 .distinct()
 350                 .collect(Collectors.toList());
 351 
 352         return ret;
 353     }
 354 
 355     private static final Locale.Builder LOCALE_BUILDER = new Locale.Builder();
 356     private static Locale tagToLocale(String tag) {
 357         // ISO3166 compatibility
 358         tag = tag.replaceFirst("^iw", "he").replaceFirst("^ji", "yi").replaceFirst("^in", "id");
 359 
 360         // Special COMPAT provider locales
 361         switch (tag) {
 362             case jaJPJPTag:
 363                 return jaJPJP;
 364             case noNONYTag:
 365                 return noNONY;
 366             case thTHTHTag:
 367                 return thTHTH;
 368             default:
 369                 LOCALE_BUILDER.clear();
 370                 LOCALE_BUILDER.setLanguageTag(tag);
 371                 return LOCALE_BUILDER.build();
 372         }
 373     }
 374 
 375     private static Stream<String> localeToTags(Locale loc) {
 376         Objects.requireNonNull(loc);
 377 
 378         String tag = loc.toLanguageTag();
 379         List<String> tags = null;
 380 
 381         switch (loc.getLanguage()) {
 382             // ISO3166 compatibility
 383             case "iw":
 384                 tags = List.of(tag, tag.replaceFirst("^he", "iw"));
 385                 break;
 386             case "in":
 387                 tags = List.of(tag, tag.replaceFirst("^id", "in"));
 388                 break;
 389             case "ji":
 390                 tags = List.of(tag, tag.replaceFirst("^yi", "ji"));
 391                 break;
 392 
 393             // Special COMPAT provider locales
 394             case "ja":
 395                 if (loc.getCountry() == "JP") {
 396                     tags = List.of(tag, jaJPJPTag);
 397                 }
 398                 break;
 399             case "no":
 400             case "nn":
 401                 if (loc.getCountry() == "NO") {
 402                     tags = List.of(tag, noNONYTag);
 403                 }
 404                 break;
 405             case "th":
 406                 if (loc.getCountry() == "TH") {
 407                     tags = List.of(tag, thTHTHTag);
 408                 }
 409                 break;
 410         }
 411 
 412         return tags == null ? List.of(tag).stream() : tags.stream();
 413     }
 414 }