1 /* 2 * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package jdk.tools.jlink.internal.plugins; 26 27 import java.util.AbstractMap; 28 import java.util.ArrayList; 29 import java.util.Arrays; 30 import java.util.IllformedLocaleException; 31 import java.util.Locale; 32 import java.util.List; 33 import java.util.Map; 34 import java.util.Optional; 35 import static java.util.ResourceBundle.Control; 36 import java.util.Set; 37 import java.util.function.Predicate; 38 import java.util.regex.Pattern; 39 import java.util.stream.Collectors; 40 import java.util.stream.IntStream; 41 import java.util.stream.Stream; 42 import jdk.internal.org.objectweb.asm.ClassReader; 43 import jdk.tools.jlink.internal.ResourcePrevisitor; 44 import jdk.tools.jlink.internal.StringTable; 45 import jdk.tools.jlink.plugin.LinkModule; 46 import jdk.tools.jlink.plugin.ModuleEntry; 47 import jdk.tools.jlink.plugin.PluginException; 48 import jdk.tools.jlink.plugin.ModulePool; 49 import jdk.tools.jlink.plugin.Plugin; 50 import sun.util.cldr.CLDRBaseLocaleDataMetaInfo; 51 import sun.util.locale.provider.LocaleProviderAdapter; 52 import sun.util.locale.provider.LocaleProviderAdapter.Type; 53 import sun.util.locale.provider.ResourceBundleBasedAdapter; 54 55 /** 56 * Plugin to explicitly specify the locale data included in jdk.localedata 57 * module. This plugin provides a jlink command line option "--include-locales" 58 * with an argument. The argument is a list of BCP 47 language tags separated 59 * by a comma. E.g., 60 * 61 * "jlink --include-locales en,ja,*-IN" 62 * 63 * This option will include locale data for all available English and Japanese 64 * languages, and ones for the country of India. All other locale data are 65 * filtered out on the image creation. 66 * 67 * Here are a few assumptions: 68 * 69 * 0. All locale data in java.base are unconditionally included. 70 * 1. All the selective locale data are in jdk.localedata module 71 * 2. Their package names are constructed by appending ".ext" to 72 * the corresponding ones in java.base module. 73 * 3. Available locales string in LocaleDataMetaInfo class should 74 * start with at least one white space character, e.g., " ar ar-EG ..." 75 * ^ 76 */ 77 public final class IncludeLocalesPlugin implements Plugin, ResourcePrevisitor { 78 79 public static final String NAME = "include-locales"; 80 private static final String MODULENAME = "jdk.localedata"; 81 private static final Set<String> LOCALEDATA_PACKAGES = Set.of( 82 "sun.text.resources.cldr.ext", 83 "sun.text.resources.ext", 84 "sun.util.resources.cldr.ext", 85 "sun.util.resources.cldr.provider", 86 "sun.util.resources.ext", 87 "sun.util.resources.provider"); 88 private static final String METAINFONAME = "LocaleDataMetaInfo"; 89 private static final List<String> META_FILES = List.of( 90 ".+module-info.class", 91 ".+LocaleDataProvider.class", 92 ".+" + METAINFONAME + ".class"); 93 private static final List<String> INCLUDE_LOCALE_FILES = List.of( 94 ".+sun/text/resources/ext/[^_]+_", 95 ".+sun/util/resources/ext/[^_]+_", 96 ".+sun/text/resources/cldr/ext/[^_]+_", 97 ".+sun/util/resources/cldr/ext/[^_]+_"); 98 private Predicate<String> predicate; 99 private String userParam; 100 private List<Locale.LanguageRange> priorityList; 101 private List<Locale> available; 102 private List<String> filtered; 103 104 private static final ResourceBundleBasedAdapter CLDR_ADAPTER = 105 (ResourceBundleBasedAdapter)LocaleProviderAdapter.forType(Type.CLDR); 106 private static final Map<Locale, String[]> CLDR_PARENT_LOCALES = 107 new CLDRBaseLocaleDataMetaInfo().parentLocales(); 108 109 // Equivalent map 110 private static final Map<String, List<String>> EQUIV_MAP = 111 Stream.concat( 112 // COMPAT equivalence 113 Map.of( 114 "zh-Hans", List.of("zh-Hans", "zh-CN", "zh-SG"), 115 "zh-Hant", List.of("zh-Hant", "zh-HK", "zh-MO", "zh-TW")) 116 .entrySet() 117 .stream(), 118 119 // CLDR parent locales 120 CLDR_PARENT_LOCALES.entrySet().stream() 121 .map(entry -> { 122 String parent = entry.getKey().toLanguageTag(); 123 List<String> children = new ArrayList<>(); 124 children.add(parent); 125 126 Arrays.stream(entry.getValue()) 127 .filter(child -> !child.isEmpty()) 128 .flatMap(child -> 129 Stream.concat( 130 Arrays.stream(CLDR_PARENT_LOCALES.getOrDefault( 131 Locale.forLanguageTag(child), new String[0])) 132 .filter(grandchild -> !grandchild.isEmpty()), 133 List.of(child).stream())) 134 .distinct() 135 .forEach(children::add); 136 return new AbstractMap.SimpleEntry<String, List<String>>(parent, children); 137 }) 138 ).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); 139 140 // Special COMPAT provider locales 141 private static final String jaJPJPTag = "ja-JP-JP"; 142 private static final String noNONYTag = "no-NO-NY"; 143 private static final String thTHTHTag = "th-TH-TH"; 144 private static final Locale jaJPJP = new Locale("ja", "JP", "JP"); 145 private static final Locale noNONY = new Locale("no", "NO", "NY"); 146 private static final Locale thTHTH = new Locale("th", "TH", "TH"); 147 148 @Override 149 public String getName() { 150 return NAME; 151 } 152 153 @Override 154 public void visit(ModulePool in, ModulePool out) { 155 in.transformAndCopy((resource) -> { 156 if (resource.getModule().equals(MODULENAME)) { 157 String path = resource.getPath(); 158 resource = predicate.test(path) ? resource: null; 159 if (resource != null && 160 resource.getType().equals(ModuleEntry.Type.CLASS_OR_RESOURCE)) { 161 byte[] bytes = resource.getBytes(); 162 ClassReader cr = new ClassReader(bytes); 163 if (Arrays.stream(cr.getInterfaces()) 164 .anyMatch(i -> i.contains(METAINFONAME)) && 165 stripUnsupportedLocales(bytes, cr)) { 166 resource = resource.create(bytes); 167 } 168 } 169 } 170 return resource; 171 }, out); 172 } 173 174 @Override 175 public Category getType() { 176 return Category.FILTER; 177 } 178 179 @Override 180 public String getDescription() { 181 return PluginsResourceBundle.getDescription(NAME); 182 } 183 184 @Override 185 public boolean hasArguments() { 186 return true; 187 } 188 189 @Override 190 public String getArgumentsDescription() { 191 return PluginsResourceBundle.getArgument(NAME); 192 } 193 194 @Override 195 public void configure(Map<String, String> config) { 196 userParam = config.get(NAME); 197 198 try { 199 priorityList = Locale.LanguageRange.parse(userParam, EQUIV_MAP); 200 } catch (IllegalArgumentException iae) { 201 throw new IllegalArgumentException(String.format( 202 PluginsResourceBundle.getMessage(NAME + ".invalidtag"), 203 iae.getMessage().replaceFirst("^range=", ""))); 204 } 205 } 206 207 @Override 208 public void previsit(ModulePool resources, StringTable strings) { 209 final Pattern p = Pattern.compile(".*((Data_)|(Names_))(?<tag>.*)\\.class"); 210 Optional<LinkModule> optMod = resources.findModule(MODULENAME); 211 212 // jdk.localedata module validation 213 if (optMod.isPresent()) { 214 LinkModule module = optMod.get(); 215 Set<String> packages = module.getAllPackages(); 216 if (!packages.containsAll(LOCALEDATA_PACKAGES)) { 217 throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".missingpackages") + 218 LOCALEDATA_PACKAGES.stream() 219 .filter(pn -> !packages.contains(pn)) 220 .collect(Collectors.joining(",\n\t"))); 221 } 222 223 available = Stream.concat(module.entries() 224 .map(md -> p.matcher(md.getPath())) 225 .filter(m -> m.matches()) 226 .map(m -> m.group("tag").replaceAll("_", "-")), 227 Stream.concat(Stream.of(jaJPJPTag), Stream.of(thTHTHTag))) 228 .distinct() 229 .sorted() 230 .map(IncludeLocalesPlugin::tagToLocale) 231 .collect(Collectors.toList()); 232 } else { 233 // jdk.localedata is not added. 234 throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".localedatanotfound")); 235 } 236 237 filtered = filterLocales(available); 238 239 if (filtered.isEmpty()) { 240 throw new PluginException( 241 String.format(PluginsResourceBundle.getMessage(NAME + ".nomatchinglocales"), userParam)); 242 } 243 244 List<String> value = Stream.concat( 245 META_FILES.stream(), 246 filtered.stream().flatMap(s -> includeLocaleFilePatterns(s).stream())) 247 .map(s -> "regex:" + s) 248 .collect(Collectors.toList()); 249 250 predicate = ResourceFilter.includeFilter(value); 251 } 252 253 private List<String> includeLocaleFilePatterns(String tag) { 254 // Ignore extension variations 255 if (tag.matches(".+-[a-z]-.+")) { 256 return List.of(); 257 } 258 259 List<String> files = new ArrayList<>(includeLocaleFiles(tag.replaceAll("-", "_"))); 260 261 // Add Thai BreakIterator related data files 262 if (tag.equals("th")) { 263 files.add(".+sun/text/resources/thai_dict"); 264 files.add(".+sun/text/resources/[^_]+BreakIteratorData_th"); 265 } 266 267 // Add Taiwan resource bundles for Hong Kong 268 if (tag.equals("zh-HK")) { 269 files.addAll(includeLocaleFiles("zh_TW")); 270 } 271 272 return files; 273 } 274 275 private List<String> includeLocaleFiles(String localeStr) { 276 return INCLUDE_LOCALE_FILES.stream() 277 .map(s -> s + localeStr + ".class") 278 .collect(Collectors.toList()); 279 } 280 281 private boolean stripUnsupportedLocales(byte[] bytes, ClassReader cr) { 282 char[] buf = new char[cr.getMaxStringLength()]; 283 boolean[] modified = new boolean[1]; 284 285 IntStream.range(1, cr.getItemCount()) 286 .map(item -> cr.getItem(item)) 287 .forEach(itemIndex -> { 288 if (bytes[itemIndex - 1] == 1 && // UTF-8 289 bytes[itemIndex + 2] == (byte)' ') { // fast check for leading space 290 int length = cr.readUnsignedShort(itemIndex); 291 byte[] b = new byte[length]; 292 System.arraycopy(bytes, itemIndex + 2, b, 0, length); 293 if (filterOutUnsupportedTags(b)) { 294 // copy back 295 System.arraycopy(b, 0, bytes, itemIndex + 2, length); 296 modified[0] = true; 297 } 298 } 299 }); 300 301 return modified[0]; 302 } 303 304 private boolean filterOutUnsupportedTags(byte[] b) { 305 List<Locale> locales; 306 307 try { 308 locales = Arrays.asList(new String(b).split(" ")).stream() 309 .filter(tag -> !tag.isEmpty()) 310 .map(IncludeLocalesPlugin::tagToLocale) 311 .collect(Collectors.toList()); 312 } catch (IllformedLocaleException ile) { 313 // Seems not an available locales string literal. 314 return false; 315 } 316 317 byte[] filteredBytes = filterLocales(locales).stream() 318 .collect(Collectors.joining(" ")) 319 .getBytes(); 320 321 if (filteredBytes.length > b.length) { 322 throw new InternalError("Size of filtered locales is bigger than the original one"); 323 } 324 325 System.arraycopy(filteredBytes, 0, b, 0, filteredBytes.length); 326 Arrays.fill(b, filteredBytes.length, b.length, (byte)' '); 327 return true; 328 } 329 330 private List<String> filterLocales(List<Locale> locales) { 331 List<String> ret = 332 Locale.filter(priorityList, locales, Locale.FilteringMode.EXTENDED_FILTERING).stream() 333 .flatMap(loc -> Stream.concat(Control.getNoFallbackControl(Control.FORMAT_DEFAULT) 334 .getCandidateLocales("", loc).stream(), 335 CLDR_ADAPTER.getCandidateLocales("", loc).stream())) 336 .map(loc -> 337 // Locale.filter() does not preserve the case, which is 338 // significant for "variant" equality. Retrieve the original 339 // locales from the pre-filtered list. 340 locales.stream() 341 .filter(l -> l.toString().equalsIgnoreCase(loc.toString())) 342 .findAny() 343 .orElse(Locale.ROOT)) 344 .filter(loc -> !loc.equals(Locale.ROOT)) 345 .flatMap(IncludeLocalesPlugin::localeToTags) 346 .distinct() 347 .collect(Collectors.toList()); 348 349 return ret; 350 } 351 352 private static final Locale.Builder LOCALE_BUILDER = new Locale.Builder(); 353 private static Locale tagToLocale(String tag) { 354 // ISO3166 compatibility 355 tag = tag.replaceFirst("^iw", "he").replaceFirst("^ji", "yi").replaceFirst("^in", "id"); 356 357 // Special COMPAT provider locales 358 switch (tag) { 359 case jaJPJPTag: 360 return jaJPJP; 361 case noNONYTag: 362 return noNONY; 363 case thTHTHTag: 364 return thTHTH; 365 default: 366 LOCALE_BUILDER.clear(); 367 LOCALE_BUILDER.setLanguageTag(tag); 368 return LOCALE_BUILDER.build(); 369 } 370 } 371 372 private static Stream<String> localeToTags(Locale loc) { 373 String tag = loc.toLanguageTag(); 374 Stream<String> ret = null; 375 376 switch (loc.getLanguage()) { 377 // ISO3166 compatibility 378 case "iw": 379 ret = List.of(tag, tag.replaceFirst("^he", "iw")).stream(); 380 break; 381 case "in": 382 ret = List.of(tag, tag.replaceFirst("^id", "in")).stream(); 383 break; 384 case "ji": 385 ret = List.of(tag, tag.replaceFirst("^yi", "ji")).stream(); 386 break; 387 388 // Special COMPAT provider locales 389 case "ja": 390 if (loc.getCountry() == "JP") { 391 ret = List.of(tag, jaJPJPTag).stream(); 392 } 393 break; 394 case "no": 395 case "nn": 396 if (loc.getCountry() == "NO") { 397 ret = List.of(tag, noNONYTag).stream(); 398 } 399 break; 400 case "th": 401 if (loc.getCountry() == "TH") { 402 ret = List.of(tag, thTHTHTag).stream(); 403 } 404 break; 405 } 406 407 return ret == null ? List.of(tag).stream() : ret; 408 } 409 }