1 /* 2 * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package jdk.tools.jlink.internal.plugins; 26 27 import java.io.ByteArrayInputStream; 28 import java.util.Arrays; 29 import java.util.Collections; 30 import java.util.IllformedLocaleException; 31 import java.util.Locale; 32 import java.util.List; 33 import java.util.Map; 34 import java.util.Optional; 35 import java.util.Set; 36 import java.util.function.Predicate; 37 import java.util.regex.Pattern; 38 import java.util.stream.Collectors; 39 import java.util.stream.IntStream; 40 import java.util.stream.Stream; 41 import jdk.internal.org.objectweb.asm.ClassReader; 42 import jdk.tools.jlink.internal.ResourcePrevisitor; 43 import jdk.tools.jlink.internal.StringTable; 44 import jdk.tools.jlink.plugin.LinkModule; 45 import jdk.tools.jlink.plugin.ModuleEntry; 46 import jdk.tools.jlink.plugin.PluginException; 47 import jdk.tools.jlink.plugin.ModulePool; 48 import jdk.tools.jlink.plugin.TransformerPlugin; 49 50 /** 51 * Plugin to explicitly specify the locale data included in jdk.localedata 52 * module. This plugin provides a jlink command line option "--include-locales" 53 * with an argument. The argument is a list of BCP 47 language tags separated 54 * by a comma. E.g., 55 * 56 * "jlink --include-locales en,ja,*-IN" 57 * 58 * This option will include locale data for all available English and Japanese 59 * languages, and ones for the country of India. All other locale data are 60 * filtered out on the image creation. 61 * 62 * Here are a few assumptions: 63 * 64 * 0. All locale data in java.base are unconditionally included. 65 * 1. All the selective locale data are in jdk.localedata module 66 * 2. Their package names are constructed by appending ".ext" to 67 * the corresponding ones in java.base module. 68 * 3. Available locales string in LocaleDataMetaInfo class should 69 * start with at least one white space character, e.g., " ar ar-EG ..." 70 * ^ 71 */ 72 public final class IncludeLocalesPlugin implements TransformerPlugin, ResourcePrevisitor { 73 74 public static final String NAME = "include-locales"; 75 private static final String MODULENAME = "jdk.localedata"; 76 private static final Set<String> LOCALEDATA_PACKAGES = Set.of( 77 "sun.text.resources.cldr.ext", 78 "sun.text.resources.ext", 79 "sun.util.resources.cldr.ext", 80 "sun.util.resources.cldr.provider", 81 "sun.util.resources.ext", 82 "sun.util.resources.provider"); 83 private static final String METAINFONAME = "LocaleDataMetaInfo"; 84 private static final String META_FILES = 85 "*module-info.class," + 86 "*LocaleDataProvider.class," + 87 "*" + METAINFONAME + ".class,"; 88 private static final String INCLUDE_LOCALE_FILES = 89 "*sun/text/resources/ext/[^\\/]+_%%.class," + 90 "*sun/util/resources/ext/[^\\/]+_%%.class," + 91 "*sun/text/resources/cldr/ext/[^\\/]+_%%.class," + 92 "*sun/util/resources/cldr/ext/[^\\/]+_%%.class,"; 93 private Predicate<String> predicate; 94 private String userParam; 95 private List<Locale.LanguageRange> priorityList; 96 private List<Locale> available; 97 private List<String> filtered; 98 99 // Special COMPAT provider locales 100 private static final String jaJPJPTag = "ja-JP-JP"; 101 private static final String noNONYTag = "no-NO-NY"; 102 private static final String thTHTHTag = "th-TH-TH"; 103 private static final Locale jaJPJP = new Locale("ja", "JP", "JP"); 104 private static final Locale noNONY = new Locale("no", "NO", "NY"); 105 private static final Locale thTHTH = new Locale("th", "TH", "TH"); 106 107 @Override 108 public String getName() { 109 return NAME; 110 } 111 112 @Override 113 public void visit(ModulePool in, ModulePool out) { 114 in.transformAndCopy((resource) -> { 115 if (resource.getModule().equals(MODULENAME)) { 116 String path = resource.getPath(); 117 resource = predicate.test(path) ? resource: null; 118 if (resource != null && 119 resource.getType().equals(ModuleEntry.Type.CLASS_OR_RESOURCE)) { 120 byte[] bytes = resource.getBytes(); 121 ClassReader cr = new ClassReader(bytes); 122 if (Arrays.stream(cr.getInterfaces()) 123 .anyMatch(i -> i.contains(METAINFONAME)) && 124 stripUnsupportedLocales(bytes, cr)) { 125 resource = ModuleEntry.create(MODULENAME, path, 126 resource.getType(), 127 new ByteArrayInputStream(bytes), bytes.length); 128 } 129 } 130 } 131 return resource; 132 }, out); 133 } 134 135 @Override 136 public Category getType() { 137 return Category.FILTER; 138 } 139 140 @Override 141 public String getDescription() { 142 return PluginsResourceBundle.getDescription(NAME); 143 } 144 145 @Override 146 public boolean hasArguments() { 147 return true; 148 } 149 150 @Override 151 public String getArgumentsDescription() { 152 return PluginsResourceBundle.getArgument(NAME); 153 } 154 155 @Override 156 public void configure(Map<String, String> config) { 157 userParam = config.get(NAME); 158 priorityList = Arrays.stream(userParam.split(",")) 159 .map(s -> { 160 try { 161 return new Locale.LanguageRange(s); 162 } catch (IllegalArgumentException iae) { 163 throw new IllegalArgumentException(String.format( 164 PluginsResourceBundle.getMessage(NAME + ".invalidtag"), s)); 165 } 166 }) 167 .collect(Collectors.toList()); 168 } 169 170 @Override 171 public void previsit(ModulePool resources, StringTable strings) { 172 final Pattern p = Pattern.compile(".*((Data_)|(Names_))(?<tag>.*)\\.class"); 173 Optional<LinkModule> optMod = resources.findModule(MODULENAME); 174 175 // jdk.localedata module validation 176 if (optMod.isPresent()) { 177 LinkModule module = optMod.get(); 178 Set<String> packages = module.getAllPackages(); 179 if (!packages.containsAll(LOCALEDATA_PACKAGES)) { 180 throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".missingpackages") + 181 LOCALEDATA_PACKAGES.stream() 182 .filter(pn -> !packages.contains(pn)) 183 .collect(Collectors.joining(",\n\t"))); 184 } 185 186 available = Stream.concat(module.entries() 187 .map(md -> p.matcher(md.getPath())) 188 .filter(m -> m.matches()) 189 .map(m -> m.group("tag").replaceAll("_", "-")), 190 Stream.concat(Stream.of(jaJPJPTag), Stream.of(thTHTHTag))) 191 .distinct() 192 .sorted() 193 .map(IncludeLocalesPlugin::tagToLocale) 194 .collect(Collectors.toList()); 195 } else { 196 // jdk.localedata is not added. 197 throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".localedatanotfound")); 198 } 199 filtered = filterLocales(available); 200 201 if (filtered.isEmpty()) { 202 throw new PluginException( 203 String.format(PluginsResourceBundle.getMessage(NAME + ".nomatchinglocales"), userParam)); 204 } 205 206 String value = META_FILES + filtered.stream() 207 .map(s -> includeLocaleFilePatterns(s)) 208 .collect(Collectors.joining(",")); 209 predicate = ResourceFilter.includeFilter(value); 210 } 211 212 private String includeLocaleFilePatterns(String tag) { 213 String pTag = tag.replaceAll("-", "_"); 214 String files = ""; 215 int lastDelimiter = tag.length(); 216 String isoSpecial = pTag.matches("^(he|yi|id).*") ? 217 pTag.replaceFirst("he", "iw") 218 .replaceFirst("yi", "ji") 219 .replaceFirst("id", "in") : ""; 220 221 // Add tag patterns including parents 222 while (true) { 223 pTag = pTag.substring(0, lastDelimiter); 224 files += INCLUDE_LOCALE_FILES.replaceAll("%%", pTag); 225 226 if (!isoSpecial.isEmpty()) { 227 isoSpecial = isoSpecial.substring(0, lastDelimiter); 228 files += INCLUDE_LOCALE_FILES.replaceAll("%%", isoSpecial); 229 } 230 231 lastDelimiter = pTag.lastIndexOf('_'); 232 if (lastDelimiter == -1) { 233 break; 234 } 235 } 236 237 final String lang = pTag; 238 239 // Add possible special locales of the COMPAT provider 240 files += Set.of(jaJPJPTag, noNONYTag, thTHTHTag).stream() 241 .filter(stag -> lang.equals(stag.substring(0,2))) 242 .map(t -> INCLUDE_LOCALE_FILES.replaceAll("%%", t.replaceAll("-", "_"))) 243 .collect(Collectors.joining(",")); 244 245 // Add possible UN.M49 files (unconditional for now) for each language 246 files += INCLUDE_LOCALE_FILES.replaceAll("%%", lang + "_[0-9]{3}"); 247 if (!isoSpecial.isEmpty()) { 248 files += INCLUDE_LOCALE_FILES.replaceAll("%%", isoSpecial + "_[0-9]{3}"); 249 } 250 251 // Add Thai BreakIterator related data files 252 if (lang.equals("th")) { 253 files += "*sun/text/resources/thai_dict," + 254 "*sun/text/resources/[^\\/]+BreakIteratorData_th,"; 255 } 256 257 // Add Taiwan resource bundles for Hong Kong 258 if (tag.startsWith("zh-HK")) { 259 files += INCLUDE_LOCALE_FILES.replaceAll("%%", "zh_TW"); 260 } 261 262 return files; 263 } 264 265 private boolean stripUnsupportedLocales(byte[] bytes, ClassReader cr) { 266 char[] buf = new char[cr.getMaxStringLength()]; 267 boolean[] modified = new boolean[1]; 268 269 IntStream.range(1, cr.getItemCount()) 270 .map(item -> cr.getItem(item)) 271 .forEach(itemIndex -> { 272 if (bytes[itemIndex - 1] == 1 && // UTF-8 273 bytes[itemIndex + 2] == (byte)' ') { // fast check for leading space 274 int length = cr.readUnsignedShort(itemIndex); 275 byte[] b = new byte[length]; 276 System.arraycopy(bytes, itemIndex + 2, b, 0, length); 277 if (filterOutUnsupportedTags(b)) { 278 // copy back 279 System.arraycopy(b, 0, bytes, itemIndex + 2, length); 280 modified[0] = true; 281 } 282 } 283 }); 284 285 return modified[0]; 286 } 287 288 private boolean filterOutUnsupportedTags(byte[] b) { 289 List<Locale> locales; 290 291 try { 292 locales = Arrays.asList(new String(b).split(" ")).stream() 293 .filter(tag -> !tag.isEmpty()) 294 .map(IncludeLocalesPlugin::tagToLocale) 295 .collect(Collectors.toList()); 296 } catch (IllformedLocaleException ile) { 297 // Seems not an available locales string literal. 298 return false; 299 } 300 301 byte[] filteredBytes = filterLocales(locales).stream() 302 .collect(Collectors.joining(" ")) 303 .getBytes(); 304 System.arraycopy(filteredBytes, 0, b, 0, filteredBytes.length); 305 Arrays.fill(b, filteredBytes.length, b.length, (byte)' '); 306 return true; 307 } 308 309 private List<String> filterLocales(List<Locale> locales) { 310 List<String> ret = 311 Locale.filter(priorityList, locales, Locale.FilteringMode.EXTENDED_FILTERING).stream() 312 .map(loc -> 313 // Locale.filter() does not preserve the case, which is 314 // significant for "variant" equality. Retrieve the original 315 // locales from the pre-filtered list. 316 locales.stream() 317 .filter(l -> l.toString().equalsIgnoreCase(loc.toString())) 318 .findAny() 319 .orElse(Locale.ROOT) 320 .toLanguageTag()) 321 .collect(Collectors.toList()); 322 323 // no-NO-NY.toLanguageTag() returns "nn-NO", so specially handle it here 324 if (ret.contains("no-NO")) { 325 ret.add(noNONYTag); 326 } 327 328 return ret; 329 } 330 331 private static final Locale.Builder LOCALE_BUILDER = new Locale.Builder(); 332 private static Locale tagToLocale(String tag) { 333 // ISO3166 compatibility 334 tag = tag.replaceFirst("^iw", "he").replaceFirst("^ji", "yi").replaceFirst("^in", "id"); 335 336 switch (tag) { 337 case jaJPJPTag: 338 return jaJPJP; 339 case noNONYTag: 340 return noNONY; 341 case thTHTHTag: 342 return thTHTH; 343 default: 344 LOCALE_BUILDER.clear(); 345 LOCALE_BUILDER.setLanguageTag(tag); 346 return LOCALE_BUILDER.build(); 347 } 348 } 349 }