1 /*
   2  * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.tools.jlink.internal.plugins;
  26 
  27 import java.io.ByteArrayInputStream;
  28 import java.util.ArrayList;
  29 import java.util.Arrays;
  30 import java.util.Collections;
  31 import java.util.IllformedLocaleException;
  32 import java.util.Locale;
  33 import java.util.List;
  34 import java.util.Map;
  35 import java.util.Optional;
  36 import java.util.Set;
  37 import java.util.function.Predicate;
  38 import java.util.regex.Pattern;
  39 import java.util.stream.Collectors;
  40 import java.util.stream.IntStream;
  41 import java.util.stream.Stream;
  42 import jdk.internal.org.objectweb.asm.ClassReader;
  43 import jdk.tools.jlink.internal.ResourcePrevisitor;
  44 import jdk.tools.jlink.internal.StringTable;
  45 import jdk.tools.jlink.plugin.LinkModule;
  46 import jdk.tools.jlink.plugin.ModuleEntry;
  47 import jdk.tools.jlink.plugin.PluginException;
  48 import jdk.tools.jlink.plugin.ModulePool;
  49 import jdk.tools.jlink.plugin.TransformerPlugin;
  50 
  51 /**
  52  * Plugin to explicitly specify the locale data included in jdk.localedata
  53  * module. This plugin provides a jlink command line option "--include-locales"
  54  * with an argument. The argument is a list of BCP 47 language tags separated
  55  * by a comma. E.g.,
  56  *
  57  *  "jlink --include-locales en,ja,*-IN"
  58  *
  59  * This option will include locale data for all available English and Japanese
  60  * languages, and ones for the country of India. All other locale data are
  61  * filtered out on the image creation.
  62  *
  63  * Here are a few assumptions:
  64  *
  65  *  0. All locale data in java.base are unconditionally included.
  66  *  1. All the selective locale data are in jdk.localedata module
  67  *  2. Their package names are constructed by appending ".ext" to
  68  *     the corresponding ones in java.base module.
  69  *  3. Available locales string in LocaleDataMetaInfo class should
  70  *     start with at least one white space character, e.g., " ar ar-EG ..."
  71  *                                                           ^
  72  */
  73 public final class IncludeLocalesPlugin implements TransformerPlugin, ResourcePrevisitor {
  74 
  75     public static final String NAME = "include-locales";
  76     private static final String MODULENAME = "jdk.localedata";
  77     private static final Set<String> LOCALEDATA_PACKAGES = Set.of(
  78         "sun.text.resources.cldr.ext",
  79         "sun.text.resources.ext",
  80         "sun.util.resources.cldr.ext",
  81         "sun.util.resources.cldr.provider",
  82         "sun.util.resources.ext",
  83         "sun.util.resources.provider");
  84     private static final String METAINFONAME = "LocaleDataMetaInfo";
  85     private static final List<String> META_FILES = List.of(
  86         ".+module-info.class",
  87         ".+LocaleDataProvider.class",
  88         ".+" + METAINFONAME + ".class");
  89     private static final List<String> INCLUDE_LOCALE_FILES = List.of(
  90         ".+sun/text/resources/ext/[^_]+_",
  91         ".+sun/util/resources/ext/[^_]+_",
  92         ".+sun/text/resources/cldr/ext/[^_]+_",
  93         ".+sun/util/resources/cldr/ext/[^_]+_");
  94     private Predicate<String> predicate;
  95     private String userParam;
  96     private List<Locale.LanguageRange> priorityList;
  97     private List<Locale> available;
  98     private List<String> filtered;
  99 
 100     // Special COMPAT provider locales
 101     private static final String jaJPJPTag = "ja-JP-JP";
 102     private static final String noNONYTag = "no-NO-NY";
 103     private static final String thTHTHTag = "th-TH-TH";
 104     private static final Locale jaJPJP = new Locale("ja", "JP", "JP");
 105     private static final Locale noNONY = new Locale("no", "NO", "NY");
 106     private static final Locale thTHTH = new Locale("th", "TH", "TH");
 107 
 108     @Override
 109     public String getName() {
 110         return NAME;
 111     }
 112 
 113     @Override
 114     public void visit(ModulePool in, ModulePool out) {
 115         in.transformAndCopy((resource) -> {
 116             if (resource.getModule().equals(MODULENAME)) {
 117                 String path = resource.getPath();
 118                 resource = predicate.test(path) ? resource: null;
 119                 if (resource != null &&
 120                     resource.getType().equals(ModuleEntry.Type.CLASS_OR_RESOURCE)) {
 121                     byte[] bytes = resource.getBytes();
 122                     ClassReader cr = new ClassReader(bytes);
 123                     if (Arrays.stream(cr.getInterfaces())
 124                         .anyMatch(i -> i.contains(METAINFONAME)) &&
 125                         stripUnsupportedLocales(bytes, cr)) {
 126                         resource = ModuleEntry.create(MODULENAME, path,
 127                             resource.getType(),
 128                             new ByteArrayInputStream(bytes), bytes.length);
 129                     }
 130                 }
 131             }
 132             return resource;
 133         }, out);
 134     }
 135 
 136     @Override
 137     public Category getType() {
 138         return Category.FILTER;
 139     }
 140 
 141     @Override
 142     public String getDescription() {
 143         return PluginsResourceBundle.getDescription(NAME);
 144     }
 145 
 146     @Override
 147     public boolean hasArguments() {
 148         return true;
 149     }
 150 
 151     @Override
 152     public String getArgumentsDescription() {
 153        return PluginsResourceBundle.getArgument(NAME);
 154     }
 155 
 156     @Override
 157     public void configure(Map<String, String> config) {
 158         userParam = config.get(NAME);
 159         priorityList = Arrays.stream(userParam.split(","))
 160             .map(s -> {
 161                 try {
 162                     return new Locale.LanguageRange(s);
 163                 } catch (IllegalArgumentException iae) {
 164                     throw new IllegalArgumentException(String.format(
 165                         PluginsResourceBundle.getMessage(NAME + ".invalidtag"), s));
 166                 }
 167             })
 168             .collect(Collectors.toList());
 169     }
 170 
 171     @Override
 172     public void previsit(ModulePool resources, StringTable strings) {
 173         final Pattern p = Pattern.compile(".*((Data_)|(Names_))(?<tag>.*)\\.class");
 174         Optional<LinkModule> optMod = resources.findModule(MODULENAME);
 175 
 176         // jdk.localedata module validation
 177         if (optMod.isPresent()) {
 178             LinkModule module = optMod.get();
 179             Set<String> packages = module.getAllPackages();
 180             if (!packages.containsAll(LOCALEDATA_PACKAGES)) {
 181                 throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".missingpackages") +
 182                     LOCALEDATA_PACKAGES.stream()
 183                         .filter(pn -> !packages.contains(pn))
 184                         .collect(Collectors.joining(",\n\t")));
 185             }
 186 
 187             available = Stream.concat(module.entries()
 188                                         .map(md -> p.matcher(md.getPath()))
 189                                         .filter(m -> m.matches())
 190                                         .map(m -> m.group("tag").replaceAll("_", "-")),
 191                                     Stream.concat(Stream.of(jaJPJPTag), Stream.of(thTHTHTag)))
 192                 .distinct()
 193                 .sorted()
 194                 .map(IncludeLocalesPlugin::tagToLocale)
 195                 .collect(Collectors.toList());
 196         } else {
 197             // jdk.localedata is not added.
 198             throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".localedatanotfound"));
 199         }
 200         filtered = filterLocales(available);
 201 
 202         if (filtered.isEmpty()) {
 203             throw new PluginException(
 204                 String.format(PluginsResourceBundle.getMessage(NAME + ".nomatchinglocales"), userParam));
 205         }
 206 
 207         List<String> value = Stream.concat(
 208                 META_FILES.stream(),
 209                 filtered.stream().flatMap(s -> includeLocaleFilePatterns(s).stream()))
 210             .map(s -> "regex:" + s)
 211             .collect(Collectors.toList());
 212         predicate = ResourceFilter.includeFilter(value);
 213     }
 214 
 215     private List<String> includeLocaleFilePatterns(String tag) {
 216         List<String> files = new ArrayList<>();
 217         String pTag = tag.replaceAll("-", "_");
 218         int lastDelimiter = tag.length();
 219         String isoSpecial = pTag.matches("^(he|yi|id).*") ?
 220                             pTag.replaceFirst("he", "iw")
 221                                 .replaceFirst("yi", "ji")
 222                                 .replaceFirst("id", "in") : "";
 223 
 224         // Add tag patterns including parents
 225         while (true) {
 226             pTag = pTag.substring(0, lastDelimiter);
 227             files.addAll(includeLocaleFiles(pTag));
 228 
 229             if (!isoSpecial.isEmpty()) {
 230                 isoSpecial = isoSpecial.substring(0, lastDelimiter);
 231                 files.addAll(includeLocaleFiles(isoSpecial));
 232             }
 233 
 234             lastDelimiter = pTag.lastIndexOf('_');
 235             if (lastDelimiter == -1) {
 236                 break;
 237             }
 238         }
 239 
 240         final String lang = pTag;
 241 
 242         // Add possible special locales of the COMPAT provider
 243         files.addAll(Set.of(jaJPJPTag, noNONYTag, thTHTHTag).stream()
 244             .filter(stag -> lang.equals(stag.substring(0,2)))
 245             .flatMap(t -> includeLocaleFiles(t.replaceAll("-", "_")).stream())
 246             .collect(Collectors.toList()));
 247 
 248         // Add possible UN.M49 files (unconditional for now) for each language
 249         files.addAll(includeLocaleFiles(lang + "_[0-9]{3}"));
 250         if (!isoSpecial.isEmpty()) {
 251             files.addAll(includeLocaleFiles(isoSpecial + "_[0-9]{3}"));
 252         }
 253 
 254         // Add Thai BreakIterator related data files
 255         if (lang.equals("th")) {
 256             files.add(".+sun/text/resources/thai_dict");
 257             files.add(".+sun/text/resources/[^_]+BreakIteratorData_th");
 258         }
 259 
 260         // Add Taiwan resource bundles for Hong Kong
 261         if (tag.startsWith("zh-HK")) {
 262             files.addAll(includeLocaleFiles("zh_TW"));
 263         }
 264 
 265         return files;
 266     }
 267 
 268     private List<String> includeLocaleFiles(String localeStr) {
 269         return INCLUDE_LOCALE_FILES.stream()
 270             .map(s -> s + localeStr + ".class")
 271             .collect(Collectors.toList());
 272     }
 273 
 274     private boolean stripUnsupportedLocales(byte[] bytes, ClassReader cr) {
 275         char[] buf = new char[cr.getMaxStringLength()];
 276         boolean[] modified = new boolean[1];
 277 
 278         IntStream.range(1, cr.getItemCount())
 279             .map(item -> cr.getItem(item))
 280             .forEach(itemIndex -> {
 281                 if (bytes[itemIndex - 1] == 1 &&         // UTF-8
 282                     bytes[itemIndex + 2] == (byte)' ') { // fast check for leading space
 283                     int length = cr.readUnsignedShort(itemIndex);
 284                     byte[] b = new byte[length];
 285                     System.arraycopy(bytes, itemIndex + 2, b, 0, length);
 286                     if (filterOutUnsupportedTags(b)) {
 287                         // copy back
 288                         System.arraycopy(b, 0, bytes, itemIndex + 2, length);
 289                         modified[0] = true;
 290                     }
 291                 }
 292             });
 293 
 294         return modified[0];
 295     }
 296 
 297     private boolean filterOutUnsupportedTags(byte[] b) {
 298         List<Locale> locales;
 299 
 300         try {
 301             locales = Arrays.asList(new String(b).split(" ")).stream()
 302                 .filter(tag -> !tag.isEmpty())
 303                 .map(IncludeLocalesPlugin::tagToLocale)
 304                 .collect(Collectors.toList());
 305         } catch (IllformedLocaleException ile) {
 306             // Seems not an available locales string literal.
 307             return false;
 308         }
 309 
 310         byte[] filteredBytes = filterLocales(locales).stream()
 311             .collect(Collectors.joining(" "))
 312             .getBytes();
 313         System.arraycopy(filteredBytes, 0, b, 0, filteredBytes.length);
 314         Arrays.fill(b, filteredBytes.length, b.length, (byte)' ');
 315         return true;
 316     }
 317 
 318     private List<String> filterLocales(List<Locale> locales) {
 319         List<String> ret =
 320             Locale.filter(priorityList, locales, Locale.FilteringMode.EXTENDED_FILTERING).stream()
 321                 .map(loc ->
 322                     // Locale.filter() does not preserve the case, which is
 323                     // significant for "variant" equality. Retrieve the original
 324                     // locales from the pre-filtered list.
 325                     locales.stream()
 326                         .filter(l -> l.toString().equalsIgnoreCase(loc.toString()))
 327                         .findAny()
 328                         .orElse(Locale.ROOT)
 329                         .toLanguageTag())
 330                 .collect(Collectors.toList());
 331 
 332         // no-NO-NY.toLanguageTag() returns "nn-NO", so specially handle it here
 333         if (ret.contains("no-NO")) {
 334             ret.add(noNONYTag);
 335         }
 336 
 337         return ret;
 338     }
 339 
 340     private static final Locale.Builder LOCALE_BUILDER = new Locale.Builder();
 341     private static Locale tagToLocale(String tag) {
 342         // ISO3166 compatibility
 343         tag = tag.replaceFirst("^iw", "he").replaceFirst("^ji", "yi").replaceFirst("^in", "id");
 344 
 345         switch (tag) {
 346             case jaJPJPTag:
 347                 return jaJPJP;
 348             case noNONYTag:
 349                 return noNONY;
 350             case thTHTHTag:
 351                 return thTHTH;
 352             default:
 353                 LOCALE_BUILDER.clear();
 354                 LOCALE_BUILDER.setLanguageTag(tag);
 355                 return LOCALE_BUILDER.build();
 356         }
 357     }
 358 }