1 /* 2 * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package jdk.tools.jlink.internal.plugins; 26 27 import java.io.ByteArrayInputStream; 28 import java.util.ArrayList; 29 import java.util.Arrays; 30 import java.util.Collections; 31 import java.util.IllformedLocaleException; 32 import java.util.Locale; 33 import java.util.List; 34 import java.util.Map; 35 import java.util.Optional; 36 import java.util.Set; 37 import java.util.function.Predicate; 38 import java.util.regex.Pattern; 39 import java.util.stream.Collectors; 40 import java.util.stream.IntStream; 41 import java.util.stream.Stream; 42 import jdk.internal.org.objectweb.asm.ClassReader; 43 import jdk.tools.jlink.internal.ResourcePrevisitor; 44 import jdk.tools.jlink.internal.StringTable; 45 import jdk.tools.jlink.plugin.LinkModule; 46 import jdk.tools.jlink.plugin.ModuleEntry; 47 import jdk.tools.jlink.plugin.PluginException; 48 import jdk.tools.jlink.plugin.ModulePool; 49 import jdk.tools.jlink.plugin.TransformerPlugin; 50 51 /** 52 * Plugin to explicitly specify the locale data included in jdk.localedata 53 * module. This plugin provides a jlink command line option "--include-locales" 54 * with an argument. The argument is a list of BCP 47 language tags separated 55 * by a comma. E.g., 56 * 57 * "jlink --include-locales en,ja,*-IN" 58 * 59 * This option will include locale data for all available English and Japanese 60 * languages, and ones for the country of India. All other locale data are 61 * filtered out on the image creation. 62 * 63 * Here are a few assumptions: 64 * 65 * 0. All locale data in java.base are unconditionally included. 66 * 1. All the selective locale data are in jdk.localedata module 67 * 2. Their package names are constructed by appending ".ext" to 68 * the corresponding ones in java.base module. 69 * 3. Available locales string in LocaleDataMetaInfo class should 70 * start with at least one white space character, e.g., " ar ar-EG ..." 71 * ^ 72 */ 73 public final class IncludeLocalesPlugin implements TransformerPlugin, ResourcePrevisitor { 74 75 public static final String NAME = "include-locales"; 76 private static final String MODULENAME = "jdk.localedata"; 77 private static final Set<String> LOCALEDATA_PACKAGES = Set.of( 78 "sun.text.resources.cldr.ext", 79 "sun.text.resources.ext", 80 "sun.util.resources.cldr.ext", 81 "sun.util.resources.cldr.provider", 82 "sun.util.resources.ext", 83 "sun.util.resources.provider"); 84 private static final String METAINFONAME = "LocaleDataMetaInfo"; 85 private static final List<String> META_FILES = List.of( 86 ".+module-info.class", 87 ".+LocaleDataProvider.class", 88 ".+" + METAINFONAME + ".class"); 89 private static final List<String> INCLUDE_LOCALE_FILES = List.of( 90 ".+sun/text/resources/ext/[^_]+_", 91 ".+sun/util/resources/ext/[^_]+_", 92 ".+sun/text/resources/cldr/ext/[^_]+_", 93 ".+sun/util/resources/cldr/ext/[^_]+_"); 94 private Predicate<String> predicate; 95 private String userParam; 96 private List<Locale.LanguageRange> priorityList; 97 private List<Locale> available; 98 private List<String> filtered; 99 100 // Special COMPAT provider locales 101 private static final String jaJPJPTag = "ja-JP-JP"; 102 private static final String noNONYTag = "no-NO-NY"; 103 private static final String thTHTHTag = "th-TH-TH"; 104 private static final Locale jaJPJP = new Locale("ja", "JP", "JP"); 105 private static final Locale noNONY = new Locale("no", "NO", "NY"); 106 private static final Locale thTHTH = new Locale("th", "TH", "TH"); 107 108 @Override 109 public String getName() { 110 return NAME; 111 } 112 113 @Override 114 public void visit(ModulePool in, ModulePool out) { 115 in.transformAndCopy((resource) -> { 116 if (resource.getModule().equals(MODULENAME)) { 117 String path = resource.getPath(); 118 resource = predicate.test(path) ? resource: null; 119 if (resource != null && 120 resource.getType().equals(ModuleEntry.Type.CLASS_OR_RESOURCE)) { 121 byte[] bytes = resource.getBytes(); 122 ClassReader cr = new ClassReader(bytes); 123 if (Arrays.stream(cr.getInterfaces()) 124 .anyMatch(i -> i.contains(METAINFONAME)) && 125 stripUnsupportedLocales(bytes, cr)) { 126 resource = ModuleEntry.create(MODULENAME, path, 127 resource.getType(), 128 new ByteArrayInputStream(bytes), bytes.length); 129 } 130 } 131 } 132 return resource; 133 }, out); 134 } 135 136 @Override 137 public Category getType() { 138 return Category.FILTER; 139 } 140 141 @Override 142 public String getDescription() { 143 return PluginsResourceBundle.getDescription(NAME); 144 } 145 146 @Override 147 public boolean hasArguments() { 148 return true; 149 } 150 151 @Override 152 public String getArgumentsDescription() { 153 return PluginsResourceBundle.getArgument(NAME); 154 } 155 156 @Override 157 public void configure(Map<String, String> config) { 158 userParam = config.get(NAME); 159 priorityList = Arrays.stream(userParam.split(",")) 160 .map(s -> { 161 try { 162 return new Locale.LanguageRange(s); 163 } catch (IllegalArgumentException iae) { 164 throw new IllegalArgumentException(String.format( 165 PluginsResourceBundle.getMessage(NAME + ".invalidtag"), s)); 166 } 167 }) 168 .collect(Collectors.toList()); 169 } 170 171 @Override 172 public void previsit(ModulePool resources, StringTable strings) { 173 final Pattern p = Pattern.compile(".*((Data_)|(Names_))(?<tag>.*)\\.class"); 174 Optional<LinkModule> optMod = resources.findModule(MODULENAME); 175 176 // jdk.localedata module validation 177 if (optMod.isPresent()) { 178 LinkModule module = optMod.get(); 179 Set<String> packages = module.getAllPackages(); 180 if (!packages.containsAll(LOCALEDATA_PACKAGES)) { 181 throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".missingpackages") + 182 LOCALEDATA_PACKAGES.stream() 183 .filter(pn -> !packages.contains(pn)) 184 .collect(Collectors.joining(",\n\t"))); 185 } 186 187 available = Stream.concat(module.entries() 188 .map(md -> p.matcher(md.getPath())) 189 .filter(m -> m.matches()) 190 .map(m -> m.group("tag").replaceAll("_", "-")), 191 Stream.concat(Stream.of(jaJPJPTag), Stream.of(thTHTHTag))) 192 .distinct() 193 .sorted() 194 .map(IncludeLocalesPlugin::tagToLocale) 195 .collect(Collectors.toList()); 196 } else { 197 // jdk.localedata is not added. 198 throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".localedatanotfound")); 199 } 200 filtered = filterLocales(available); 201 202 if (filtered.isEmpty()) { 203 throw new PluginException( 204 String.format(PluginsResourceBundle.getMessage(NAME + ".nomatchinglocales"), userParam)); 205 } 206 207 List<String> value = Stream.concat( 208 META_FILES.stream(), 209 filtered.stream().flatMap(s -> includeLocaleFilePatterns(s).stream())) 210 .map(s -> "regex:" + s) 211 .collect(Collectors.toList()); 212 predicate = ResourceFilter.includeFilter(value); 213 } 214 215 private List<String> includeLocaleFilePatterns(String tag) { 216 List<String> files = new ArrayList<>(); 217 String pTag = tag.replaceAll("-", "_"); 218 int lastDelimiter = tag.length(); 219 String isoSpecial = pTag.matches("^(he|yi|id).*") ? 220 pTag.replaceFirst("he", "iw") 221 .replaceFirst("yi", "ji") 222 .replaceFirst("id", "in") : ""; 223 224 // Add tag patterns including parents 225 while (true) { 226 pTag = pTag.substring(0, lastDelimiter); 227 files.addAll(includeLocaleFiles(pTag)); 228 229 if (!isoSpecial.isEmpty()) { 230 isoSpecial = isoSpecial.substring(0, lastDelimiter); 231 files.addAll(includeLocaleFiles(isoSpecial)); 232 } 233 234 lastDelimiter = pTag.lastIndexOf('_'); 235 if (lastDelimiter == -1) { 236 break; 237 } 238 } 239 240 final String lang = pTag; 241 242 // Add possible special locales of the COMPAT provider 243 files.addAll(Set.of(jaJPJPTag, noNONYTag, thTHTHTag).stream() 244 .filter(stag -> lang.equals(stag.substring(0,2))) 245 .flatMap(t -> includeLocaleFiles(t.replaceAll("-", "_")).stream()) 246 .collect(Collectors.toList())); 247 248 // Add possible UN.M49 files (unconditional for now) for each language 249 files.addAll(includeLocaleFiles(lang + "_[0-9]{3}")); 250 if (!isoSpecial.isEmpty()) { 251 files.addAll(includeLocaleFiles(isoSpecial + "_[0-9]{3}")); 252 } 253 254 // Add Thai BreakIterator related data files 255 if (lang.equals("th")) { 256 files.add(".+sun/text/resources/thai_dict"); 257 files.add(".+sun/text/resources/[^_]+BreakIteratorData_th"); 258 } 259 260 // Add Taiwan resource bundles for Hong Kong 261 if (tag.startsWith("zh-HK")) { 262 files.addAll(includeLocaleFiles("zh_TW")); 263 } 264 265 return files; 266 } 267 268 private List<String> includeLocaleFiles(String localeStr) { 269 return INCLUDE_LOCALE_FILES.stream() 270 .map(s -> s + localeStr + ".class") 271 .collect(Collectors.toList()); 272 } 273 274 private boolean stripUnsupportedLocales(byte[] bytes, ClassReader cr) { 275 char[] buf = new char[cr.getMaxStringLength()]; 276 boolean[] modified = new boolean[1]; 277 278 IntStream.range(1, cr.getItemCount()) 279 .map(item -> cr.getItem(item)) 280 .forEach(itemIndex -> { 281 if (bytes[itemIndex - 1] == 1 && // UTF-8 282 bytes[itemIndex + 2] == (byte)' ') { // fast check for leading space 283 int length = cr.readUnsignedShort(itemIndex); 284 byte[] b = new byte[length]; 285 System.arraycopy(bytes, itemIndex + 2, b, 0, length); 286 if (filterOutUnsupportedTags(b)) { 287 // copy back 288 System.arraycopy(b, 0, bytes, itemIndex + 2, length); 289 modified[0] = true; 290 } 291 } 292 }); 293 294 return modified[0]; 295 } 296 297 private boolean filterOutUnsupportedTags(byte[] b) { 298 List<Locale> locales; 299 300 try { 301 locales = Arrays.asList(new String(b).split(" ")).stream() 302 .filter(tag -> !tag.isEmpty()) 303 .map(IncludeLocalesPlugin::tagToLocale) 304 .collect(Collectors.toList()); 305 } catch (IllformedLocaleException ile) { 306 // Seems not an available locales string literal. 307 return false; 308 } 309 310 byte[] filteredBytes = filterLocales(locales).stream() 311 .collect(Collectors.joining(" ")) 312 .getBytes(); 313 System.arraycopy(filteredBytes, 0, b, 0, filteredBytes.length); 314 Arrays.fill(b, filteredBytes.length, b.length, (byte)' '); 315 return true; 316 } 317 318 private List<String> filterLocales(List<Locale> locales) { 319 List<String> ret = 320 Locale.filter(priorityList, locales, Locale.FilteringMode.EXTENDED_FILTERING).stream() 321 .map(loc -> 322 // Locale.filter() does not preserve the case, which is 323 // significant for "variant" equality. Retrieve the original 324 // locales from the pre-filtered list. 325 locales.stream() 326 .filter(l -> l.toString().equalsIgnoreCase(loc.toString())) 327 .findAny() 328 .orElse(Locale.ROOT) 329 .toLanguageTag()) 330 .collect(Collectors.toList()); 331 332 // no-NO-NY.toLanguageTag() returns "nn-NO", so specially handle it here 333 if (ret.contains("no-NO")) { 334 ret.add(noNONYTag); 335 } 336 337 return ret; 338 } 339 340 private static final Locale.Builder LOCALE_BUILDER = new Locale.Builder(); 341 private static Locale tagToLocale(String tag) { 342 // ISO3166 compatibility 343 tag = tag.replaceFirst("^iw", "he").replaceFirst("^ji", "yi").replaceFirst("^in", "id"); 344 345 switch (tag) { 346 case jaJPJPTag: 347 return jaJPJP; 348 case noNONYTag: 349 return noNONY; 350 case thTHTHTag: 351 return thTHTH; 352 default: 353 LOCALE_BUILDER.clear(); 354 LOCALE_BUILDER.setLanguageTag(tag); 355 return LOCALE_BUILDER.build(); 356 } 357 } 358 }