1 /*
   2  * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.tools.jlink.internal.plugins;
  26 
  27 import java.util.AbstractMap;
  28 import java.util.ArrayList;
  29 import java.util.Arrays;
  30 import java.util.IllformedLocaleException;
  31 import java.util.Locale;
  32 import java.util.List;
  33 import java.util.Map;
  34 import java.util.Optional;
  35 import static java.util.ResourceBundle.Control;
  36 import java.util.Set;
  37 import java.util.function.Predicate;
  38 import java.util.regex.Pattern;
  39 import java.util.stream.Collectors;
  40 import java.util.stream.IntStream;
  41 import java.util.stream.Stream;
  42 import jdk.internal.org.objectweb.asm.ClassReader;
  43 import jdk.tools.jlink.internal.ResourcePrevisitor;
  44 import jdk.tools.jlink.internal.StringTable;
  45 import jdk.tools.jlink.plugin.LinkModule;
  46 import jdk.tools.jlink.plugin.ModuleEntry;
  47 import jdk.tools.jlink.plugin.PluginException;
  48 import jdk.tools.jlink.plugin.ModulePool;
  49 import jdk.tools.jlink.plugin.Plugin;
  50 import sun.util.cldr.CLDRBaseLocaleDataMetaInfo;
  51 import sun.util.locale.provider.LocaleProviderAdapter;
  52 import sun.util.locale.provider.LocaleProviderAdapter.Type;
  53 import sun.util.locale.provider.ResourceBundleBasedAdapter;
  54 
  55 /**
  56  * Plugin to explicitly specify the locale data included in jdk.localedata
  57  * module. This plugin provides a jlink command line option "--include-locales"
  58  * with an argument. The argument is a list of BCP 47 language tags separated
  59  * by a comma. E.g.,
  60  *
  61  *  "jlink --include-locales en,ja,*-IN"
  62  *
  63  * This option will include locale data for all available English and Japanese
  64  * languages, and ones for the country of India. All other locale data are
  65  * filtered out on the image creation.
  66  *
  67  * Here are a few assumptions:
  68  *
  69  *  0. All locale data in java.base are unconditionally included.
  70  *  1. All the selective locale data are in jdk.localedata module
  71  *  2. Their package names are constructed by appending ".ext" to
  72  *     the corresponding ones in java.base module.
  73  *  3. Available locales string in LocaleDataMetaInfo class should
  74  *     start with at least one white space character, e.g., " ar ar-EG ..."
  75  *                                                           ^
  76  */
  77 public final class IncludeLocalesPlugin implements Plugin, ResourcePrevisitor {
  78 
  79     public static final String NAME = "include-locales";
  80     private static final String MODULENAME = "jdk.localedata";
  81     private static final Set<String> LOCALEDATA_PACKAGES = Set.of(
  82         "sun.text.resources.cldr.ext",
  83         "sun.text.resources.ext",
  84         "sun.util.resources.cldr.ext",
  85         "sun.util.resources.cldr.provider",
  86         "sun.util.resources.ext",
  87         "sun.util.resources.provider");
  88     private static final String METAINFONAME = "LocaleDataMetaInfo";
  89     private static final List<String> META_FILES = List.of(
  90         ".+module-info.class",
  91         ".+LocaleDataProvider.class",
  92         ".+" + METAINFONAME + ".class");
  93     private static final List<String> INCLUDE_LOCALE_FILES = List.of(
  94         ".+sun/text/resources/ext/[^_]+_",
  95         ".+sun/util/resources/ext/[^_]+_",
  96         ".+sun/text/resources/cldr/ext/[^_]+_",
  97         ".+sun/util/resources/cldr/ext/[^_]+_");
  98     private Predicate<String> predicate;
  99     private String userParam;
 100     private List<Locale.LanguageRange> priorityList;
 101     private List<Locale> available;
 102     private List<String> filtered;
 103 
 104     private static final ResourceBundleBasedAdapter CLDR_ADAPTER =
 105         (ResourceBundleBasedAdapter)LocaleProviderAdapter.forType(Type.CLDR);
 106     private static final Map<Locale, String[]> CLDR_PARENT_LOCALES =
 107         new CLDRBaseLocaleDataMetaInfo().parentLocales();
 108 
 109     // Equivalent map
 110     private static final Map<String, List<String>> EQUIV_MAP =
 111         Stream.concat(
 112             // COMPAT equivalence
 113             Map.of(
 114                 "zh-Hans", List.of("zh-Hans", "zh-CN", "zh-SG"),
 115                 "zh-Hant", List.of("zh-Hant", "zh-HK", "zh-MO", "zh-TW"))
 116                 .entrySet()
 117                 .stream(),
 118 
 119             // CLDR parent locales
 120             CLDR_PARENT_LOCALES.entrySet().stream()
 121                 .map(entry -> {
 122                     String parent = entry.getKey().toLanguageTag();
 123                     List<String> children = new ArrayList<>();
 124                     children.add(parent);
 125 
 126                     Arrays.stream(entry.getValue())
 127                         .filter(child -> !child.isEmpty())
 128                         .flatMap(child -> 
 129                             Stream.concat(
 130                                 Arrays.stream(CLDR_PARENT_LOCALES.getOrDefault(
 131                                     Locale.forLanguageTag(child), new String[0]))
 132                                         .filter(grandchild -> !grandchild.isEmpty()),
 133                                 List.of(child).stream()))
 134                         .distinct()
 135                         .forEach(children::add);
 136                     return new AbstractMap.SimpleEntry<String, List<String>>(parent, children);
 137                 })
 138         ).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
 139 
 140     // Special COMPAT provider locales
 141     private static final String jaJPJPTag = "ja-JP-JP";
 142     private static final String noNONYTag = "no-NO-NY";
 143     private static final String thTHTHTag = "th-TH-TH";
 144     private static final Locale jaJPJP = new Locale("ja", "JP", "JP");
 145     private static final Locale noNONY = new Locale("no", "NO", "NY");
 146     private static final Locale thTHTH = new Locale("th", "TH", "TH");
 147 
 148     @Override
 149     public String getName() {
 150         return NAME;
 151     }
 152 
 153     @Override
 154     public void visit(ModulePool in, ModulePool out) {
 155         in.transformAndCopy((resource) -> {
 156             if (resource.getModule().equals(MODULENAME)) {
 157                 String path = resource.getPath();
 158                 resource = predicate.test(path) ? resource: null;
 159                 if (resource != null &&
 160                     resource.getType().equals(ModuleEntry.Type.CLASS_OR_RESOURCE)) {
 161                     byte[] bytes = resource.getBytes();
 162                     ClassReader cr = new ClassReader(bytes);
 163                     if (Arrays.stream(cr.getInterfaces())
 164                         .anyMatch(i -> i.contains(METAINFONAME)) &&
 165                         stripUnsupportedLocales(bytes, cr)) {
 166                         resource = resource.create(bytes);
 167                     }
 168                 }
 169             }
 170             return resource;
 171         }, out);
 172     }
 173 
 174     @Override
 175     public Category getType() {
 176         return Category.FILTER;
 177     }
 178 
 179     @Override
 180     public String getDescription() {
 181         return PluginsResourceBundle.getDescription(NAME);
 182     }
 183 
 184     @Override
 185     public boolean hasArguments() {
 186         return true;
 187     }
 188 
 189     @Override
 190     public String getArgumentsDescription() {
 191        return PluginsResourceBundle.getArgument(NAME);
 192     }
 193 
 194     @Override
 195     public void configure(Map<String, String> config) {
 196         userParam = config.get(NAME);
 197 
 198         try {
 199             priorityList = Locale.LanguageRange.parse(userParam, EQUIV_MAP);
 200         } catch (IllegalArgumentException iae) {
 201             throw new IllegalArgumentException(String.format(
 202                 PluginsResourceBundle.getMessage(NAME + ".invalidtag"),
 203                     iae.getMessage().replaceFirst("^range=", "")));
 204         }
 205     }
 206 
 207     @Override
 208     public void previsit(ModulePool resources, StringTable strings) {
 209         final Pattern p = Pattern.compile(".*((Data_)|(Names_))(?<tag>.*)\\.class");
 210         Optional<LinkModule> optMod = resources.findModule(MODULENAME);
 211 
 212         // jdk.localedata module validation
 213         if (optMod.isPresent()) {
 214             LinkModule module = optMod.get();
 215             Set<String> packages = module.getAllPackages();
 216             if (!packages.containsAll(LOCALEDATA_PACKAGES)) {
 217                 throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".missingpackages") +
 218                     LOCALEDATA_PACKAGES.stream()
 219                         .filter(pn -> !packages.contains(pn))
 220                         .collect(Collectors.joining(",\n\t")));
 221             }
 222 
 223             available = Stream.concat(module.entries()
 224                                         .map(md -> p.matcher(md.getPath()))
 225                                         .filter(m -> m.matches())
 226                                         .map(m -> m.group("tag").replaceAll("_", "-")),
 227                                     Stream.concat(Stream.of(jaJPJPTag), Stream.of(thTHTHTag)))
 228                 .distinct()
 229                 .sorted()
 230                 .map(IncludeLocalesPlugin::tagToLocale)
 231                 .collect(Collectors.toList());
 232         } else {
 233             // jdk.localedata is not added.
 234             throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".localedatanotfound"));
 235         }
 236 
 237         filtered = filterLocales(available);
 238 
 239         if (filtered.isEmpty()) {
 240             throw new PluginException(
 241                 String.format(PluginsResourceBundle.getMessage(NAME + ".nomatchinglocales"), userParam));
 242         }
 243 
 244         List<String> value = Stream.concat(
 245                 META_FILES.stream(),
 246                 filtered.stream().flatMap(s -> includeLocaleFilePatterns(s).stream()))
 247             .map(s -> "regex:" + s)
 248             .collect(Collectors.toList());
 249 
 250         predicate = ResourceFilter.includeFilter(value);
 251     }
 252 
 253     private List<String> includeLocaleFilePatterns(String tag) {
 254         // Ignore extension variations
 255         if (tag.matches(".+-[a-z]-.+")) {
 256             return List.of();
 257         }
 258 
 259         List<String> files = new ArrayList<>(includeLocaleFiles(tag.replaceAll("-", "_")));
 260 
 261         // Add Thai BreakIterator related data files
 262         if (tag.equals("th")) {
 263             files.add(".+sun/text/resources/thai_dict");
 264             files.add(".+sun/text/resources/[^_]+BreakIteratorData_th");
 265         }
 266 
 267         // Add Taiwan resource bundles for Hong Kong
 268         if (tag.equals("zh-HK")) {
 269             files.addAll(includeLocaleFiles("zh_TW"));
 270         }
 271 
 272         return files;
 273     }
 274 
 275     private List<String> includeLocaleFiles(String localeStr) {
 276         return INCLUDE_LOCALE_FILES.stream()
 277             .map(s -> s + localeStr + ".class")
 278             .collect(Collectors.toList());
 279     }
 280 
 281     private boolean stripUnsupportedLocales(byte[] bytes, ClassReader cr) {
 282         char[] buf = new char[cr.getMaxStringLength()];
 283         boolean[] modified = new boolean[1];
 284 
 285         IntStream.range(1, cr.getItemCount())
 286             .map(item -> cr.getItem(item))
 287             .forEach(itemIndex -> {
 288                 if (bytes[itemIndex - 1] == 1 &&         // UTF-8
 289                     bytes[itemIndex + 2] == (byte)' ') { // fast check for leading space
 290                     int length = cr.readUnsignedShort(itemIndex);
 291                     byte[] b = new byte[length];
 292                     System.arraycopy(bytes, itemIndex + 2, b, 0, length);
 293                     if (filterOutUnsupportedTags(b)) {
 294                         // copy back
 295                         System.arraycopy(b, 0, bytes, itemIndex + 2, length);
 296                         modified[0] = true;
 297                     }
 298                 }
 299             });
 300 
 301         return modified[0];
 302     }
 303 
 304     private boolean filterOutUnsupportedTags(byte[] b) {
 305         List<Locale> locales;
 306 
 307         try {
 308             locales = Arrays.asList(new String(b).split(" ")).stream()
 309                 .filter(tag -> !tag.isEmpty())
 310                 .map(IncludeLocalesPlugin::tagToLocale)
 311                 .collect(Collectors.toList());
 312         } catch (IllformedLocaleException ile) {
 313             // Seems not an available locales string literal.
 314             return false;
 315         }
 316 
 317         byte[] filteredBytes = filterLocales(locales).stream()
 318             .collect(Collectors.joining(" "))
 319             .getBytes();
 320 
 321         if (filteredBytes.length > b.length) {
 322             throw new InternalError("Size of filtered locales is bigger than the original one");
 323         }
 324 
 325         System.arraycopy(filteredBytes, 0, b, 0, filteredBytes.length);
 326         Arrays.fill(b, filteredBytes.length, b.length, (byte)' ');
 327         return true;
 328     }
 329 
 330     private List<String> filterLocales(List<Locale> locales) {
 331         List<String> ret =
 332             Locale.filter(priorityList, locales, Locale.FilteringMode.EXTENDED_FILTERING).stream()
 333                 .flatMap(loc -> Stream.concat(Control.getNoFallbackControl(Control.FORMAT_DEFAULT)
 334                                      .getCandidateLocales("", loc).stream(),
 335                                 CLDR_ADAPTER.getCandidateLocales("", loc).stream()))
 336                 .map(loc ->
 337                     // Locale.filter() does not preserve the case, which is
 338                     // significant for "variant" equality. Retrieve the original
 339                     // locales from the pre-filtered list.
 340                     locales.stream()
 341                         .filter(l -> l.toString().equalsIgnoreCase(loc.toString()))
 342                         .findAny()
 343                         .orElse(Locale.ROOT))
 344                 .filter(loc -> !loc.equals(Locale.ROOT))
 345                 .flatMap(IncludeLocalesPlugin::localeToTags)
 346                 .distinct()
 347                 .collect(Collectors.toList());
 348 
 349         return ret;
 350     }
 351 
 352     private static final Locale.Builder LOCALE_BUILDER = new Locale.Builder();
 353     private static Locale tagToLocale(String tag) {
 354         // ISO3166 compatibility
 355         tag = tag.replaceFirst("^iw", "he").replaceFirst("^ji", "yi").replaceFirst("^in", "id");
 356 
 357         // Special COMPAT provider locales
 358         switch (tag) {
 359             case jaJPJPTag:
 360                 return jaJPJP;
 361             case noNONYTag:
 362                 return noNONY;
 363             case thTHTHTag:
 364                 return thTHTH;
 365             default:
 366                 LOCALE_BUILDER.clear();
 367                 LOCALE_BUILDER.setLanguageTag(tag);
 368                 return LOCALE_BUILDER.build();
 369         }
 370     }
 371 
 372     private static Stream<String> localeToTags(Locale loc) {
 373         String tag = loc.toLanguageTag();
 374         Stream<String> ret = null;
 375         
 376         switch (loc.getLanguage()) {
 377             // ISO3166 compatibility
 378             case "iw":
 379                 ret = List.of(tag, tag.replaceFirst("^he", "iw")).stream();
 380                 break;
 381             case "in":
 382                 ret = List.of(tag, tag.replaceFirst("^id", "in")).stream();
 383                 break;
 384             case "ji":
 385                 ret = List.of(tag, tag.replaceFirst("^yi", "ji")).stream();
 386                 break;
 387                 
 388             // Special COMPAT provider locales
 389             case "ja":
 390                 if (loc.getCountry() == "JP") {
 391                     ret = List.of(tag, jaJPJPTag).stream();
 392                 }
 393                 break;
 394             case "no":
 395             case "nn":
 396                 if (loc.getCountry() == "NO") {
 397                     ret = List.of(tag, noNONYTag).stream();
 398                 }
 399                 break;
 400             case "th":
 401                 if (loc.getCountry() == "TH") {
 402                     ret = List.of(tag, thTHTHTag).stream();
 403                 }
 404                 break;
 405         }
 406 
 407         return ret == null ? List.of(tag).stream() : ret;
 408     }
 409 }