1 /* 2 * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.cldrconverter; 27 28 import static build.tools.cldrconverter.Bundle.jreTimeZoneNames; 29 import build.tools.cldrconverter.BundleGenerator.BundleType; 30 import java.io.File; 31 import java.io.IOException; 32 import java.io.UncheckedIOException; 33 import java.nio.file.*; 34 import java.text.MessageFormat; 35 import java.time.*; 36 import java.util.*; 37 import java.util.ResourceBundle.Control; 38 import java.util.logging.Level; 39 import java.util.logging.Logger; 40 import java.util.stream.Collectors; 41 import java.util.stream.IntStream; 42 import java.util.stream.Stream; 43 import javax.xml.parsers.SAXParser; 44 import javax.xml.parsers.SAXParserFactory; 45 import org.xml.sax.SAXNotRecognizedException; 46 import org.xml.sax.SAXNotSupportedException; 47 48 49 /** 50 * Converts locale data from "Locale Data Markup Language" format to 51 * JRE resource bundle format. LDML is the format used by the Common 52 * Locale Data Repository maintained by the Unicode Consortium. 53 */ 54 public class CLDRConverter { 55 56 static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd"; 57 static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd"; 58 static final String BCP47_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlBCP47.dtd"; 59 60 61 private static String CLDR_BASE; 62 static String LOCAL_LDML_DTD; 63 static String LOCAL_SPPL_LDML_DTD; 64 static String LOCAL_BCP47_LDML_DTD; 65 private static String SOURCE_FILE_DIR; 66 private static String SPPL_SOURCE_FILE; 67 private static String SPPL_META_SOURCE_FILE; 68 private static String NUMBERING_SOURCE_FILE; 69 private static String METAZONES_SOURCE_FILE; 70 private static String LIKELYSUBTAGS_SOURCE_FILE; 71 private static String TIMEZONE_SOURCE_FILE; 72 private static String WINZONES_SOURCE_FILE; 73 private static String PLURALS_SOURCE_FILE; 74 static String DESTINATION_DIR = "build/gensrc"; 75 76 static final String LOCALE_NAME_PREFIX = "locale.displayname."; 77 static final String LOCALE_SEPARATOR = LOCALE_NAME_PREFIX + "separator"; 78 static final String LOCALE_KEYTYPE = LOCALE_NAME_PREFIX + "keytype"; 79 static final String LOCALE_KEY_PREFIX = LOCALE_NAME_PREFIX + "key."; 80 static final String LOCALE_TYPE_PREFIX = LOCALE_NAME_PREFIX + "type."; 81 static final String LOCALE_TYPE_PREFIX_CA = LOCALE_TYPE_PREFIX + "ca."; 82 static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol."; 83 static final String CURRENCY_NAME_PREFIX = "currency.displayname."; 84 static final String CALENDAR_NAME_PREFIX = "calendarname."; 85 static final String CALENDAR_FIRSTDAY_PREFIX = "firstDay."; 86 static final String CALENDAR_MINDAYS_PREFIX = "minDays."; 87 static final String TIMEZONE_ID_PREFIX = "timezone.id."; 88 static final String EXEMPLAR_CITY_PREFIX = "timezone.excity."; 89 static final String ZONE_NAME_PREFIX = "timezone.displayname."; 90 static final String METAZONE_ID_PREFIX = "metazone.id."; 91 static final String PARENT_LOCALE_PREFIX = "parentLocale."; 92 static final String[] EMPTY_ZONE = {"", "", "", "", "", ""}; 93 94 private static SupplementDataParseHandler handlerSuppl; 95 private static LikelySubtagsParseHandler handlerLikelySubtags; 96 private static WinZonesParseHandler handlerWinZones; 97 static PluralsParseHandler handlerPlurals; 98 static SupplementalMetadataParseHandler handlerSupplMeta; 99 static NumberingSystemsParseHandler handlerNumbering; 100 static MetaZonesParseHandler handlerMetaZones; 101 static TimeZoneParseHandler handlerTimeZone; 102 private static BundleGenerator bundleGenerator; 103 104 // java.base module related 105 static boolean isBaseModule = false; 106 static final Set<Locale> BASE_LOCALES = new HashSet<>(); 107 108 // "parentLocales" map 109 private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>(); 110 private static final ResourceBundle.Control defCon = 111 ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT); 112 113 private static Set<String> AVAILABLE_TZIDS; 114 private static String zoneNameTempFile; 115 private static String tzDataDir; 116 private static final Map<String, String> canonicalTZMap = new HashMap<>(); 117 118 static enum DraftType { 119 UNCONFIRMED, 120 PROVISIONAL, 121 CONTRIBUTED, 122 APPROVED; 123 124 private static final Map<String, DraftType> map = new HashMap<>(); 125 static { 126 for (DraftType dt : values()) { 127 map.put(dt.getKeyword(), dt); 128 } 129 } 130 static private DraftType defaultType = CONTRIBUTED; 131 132 private final String keyword; 133 134 private DraftType() { 135 keyword = this.name().toLowerCase(Locale.ROOT); 136 137 } 138 139 static DraftType forKeyword(String keyword) { 140 return map.get(keyword); 141 } 142 143 static DraftType getDefault() { 144 return defaultType; 145 } 146 147 static void setDefault(String keyword) { 148 defaultType = Objects.requireNonNull(forKeyword(keyword)); 149 } 150 151 String getKeyword() { 152 return keyword; 153 } 154 } 155 156 static boolean USE_UTF8 = false; 157 private static boolean verbose; 158 159 private CLDRConverter() { 160 // no instantiation 161 } 162 163 @SuppressWarnings("AssignmentToForLoopParameter") 164 public static void main(String[] args) throws Exception { 165 if (args.length != 0) { 166 String currentArg = null; 167 try { 168 for (int i = 0; i < args.length; i++) { 169 currentArg = args[i]; 170 switch (currentArg) { 171 case "-draft": 172 String draftDataType = args[++i]; 173 try { 174 DraftType.setDefault(draftDataType); 175 } catch (NullPointerException e) { 176 severe("Error: incorrect draft value: %s%n", draftDataType); 177 System.exit(1); 178 } 179 info("Using the specified data type: %s%n", draftDataType); 180 break; 181 182 case "-base": 183 // base directory for input files 184 CLDR_BASE = args[++i]; 185 if (!CLDR_BASE.endsWith("/")) { 186 CLDR_BASE += "/"; 187 } 188 break; 189 190 case "-baselocales": 191 // base locales 192 setupBaseLocales(args[++i]); 193 break; 194 195 case "-basemodule": 196 // indicates java.base module resource generation 197 isBaseModule = true; 198 break; 199 200 case "-o": 201 // output directory 202 DESTINATION_DIR = args[++i]; 203 break; 204 205 case "-utf8": 206 USE_UTF8 = true; 207 break; 208 209 case "-verbose": 210 verbose = true; 211 break; 212 213 case "-zntempfile": 214 zoneNameTempFile = args[++i]; 215 break; 216 217 case "-tzdatadir": 218 tzDataDir = args[++i]; 219 break; 220 221 case "-help": 222 usage(); 223 System.exit(0); 224 break; 225 226 default: 227 throw new RuntimeException(); 228 } 229 } 230 } catch (RuntimeException e) { 231 severe("unknown or imcomplete arg(s): " + currentArg); 232 usage(); 233 System.exit(1); 234 } 235 } 236 237 // Set up path names 238 LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd"; 239 LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd"; 240 LOCAL_BCP47_LDML_DTD = CLDR_BASE + "/dtd/ldmlBCP47.dtd"; 241 SOURCE_FILE_DIR = CLDR_BASE + "/main"; 242 SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml"; 243 LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml"; 244 NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml"; 245 METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml"; 246 TIMEZONE_SOURCE_FILE = CLDR_BASE + "/bcp47/timezone.xml"; 247 SPPL_META_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalMetadata.xml"; 248 WINZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/windowsZones.xml"; 249 PLURALS_SOURCE_FILE = CLDR_BASE + "/supplemental/plurals.xml"; 250 251 if (BASE_LOCALES.isEmpty()) { 252 setupBaseLocales("en-US"); 253 } 254 255 bundleGenerator = new ResourceBundleGenerator(); 256 257 // Parse data independent of locales 258 parseSupplemental(); 259 parseBCP47(); 260 261 List<Bundle> bundles = readBundleList(); 262 convertBundles(bundles); 263 264 if (isBaseModule) { 265 // Generate java.time.format.ZoneName.java 266 generateZoneName(); 267 268 // Generate Windows tzmappings 269 generateWindowsTZMappings(); 270 271 // Generate Plural rules 272 generatePluralRules(); 273 } 274 } 275 276 private static void usage() { 277 errout("Usage: java CLDRConverter [options]%n" 278 + "\t-help output this usage message and exit%n" 279 + "\t-verbose output information%n" 280 + "\t-draft [contributed | approved | provisional | unconfirmed]%n" 281 + "\t\t draft level for using data (default: contributed)%n" 282 + "\t-base dir base directory for CLDR input files%n" 283 + "\t-basemodule generates bundles that go into java.base module%n" 284 + "\t-baselocales loc(,loc)* locales that go into the base module%n" 285 + "\t-o dir output directory (default: ./build/gensrc)%n" 286 + "\t-zntempfile template file for java.time.format.ZoneName.java%n" 287 + "\t-tzdatadir tzdata directory for java.time.format.ZoneName.java%n" 288 + "\t-utf8 use UTF-8 rather than \\uxxxx (for debug)%n"); 289 } 290 291 static void info(String fmt, Object... args) { 292 if (verbose) { 293 System.out.printf(fmt, args); 294 } 295 } 296 297 static void info(String msg) { 298 if (verbose) { 299 System.out.println(msg); 300 } 301 } 302 303 static void warning(String fmt, Object... args) { 304 System.err.print("Warning: "); 305 System.err.printf(fmt, args); 306 } 307 308 static void warning(String msg) { 309 System.err.print("Warning: "); 310 errout(msg); 311 } 312 313 static void severe(String fmt, Object... args) { 314 System.err.print("Error: "); 315 System.err.printf(fmt, args); 316 } 317 318 static void severe(String msg) { 319 System.err.print("Error: "); 320 errout(msg); 321 } 322 323 private static void errout(String msg) { 324 if (msg.contains("%n")) { 325 System.err.printf(msg); 326 } else { 327 System.err.println(msg); 328 } 329 } 330 331 /** 332 * Configure the parser to allow access to DTDs on the file system. 333 */ 334 private static void enableFileAccess(SAXParser parser) throws SAXNotSupportedException { 335 try { 336 parser.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "file"); 337 } catch (SAXNotRecognizedException ignore) { 338 // property requires >= JAXP 1.5 339 } 340 } 341 342 private static List<Bundle> readBundleList() throws Exception { 343 List<Bundle> retList = new ArrayList<>(); 344 Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR); 345 try (DirectoryStream<Path> dirStr = Files.newDirectoryStream(path)) { 346 for (Path entry : dirStr) { 347 String fileName = entry.getFileName().toString(); 348 if (fileName.endsWith(".xml")) { 349 String id = fileName.substring(0, fileName.indexOf('.')); 350 Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id)); 351 StringBuilder sb = getCandLocales(cldrLoc); 352 if (sb.indexOf("root") == -1) { 353 sb.append("root"); 354 } 355 retList.add(new Bundle(id, sb.toString(), null, null)); 356 } 357 } 358 } 359 360 // Sort the bundles based on id. This will make sure all the parent bundles are 361 // processed first, e.g., for en_GB bundle, en_001, and "root" comes before 362 // en_GB. In order for "root" to come at the beginning, "root" is replaced with 363 // empty string on comparison. 364 retList.sort((o1, o2) -> { 365 String id1 = o1.getID(); 366 String id2 = o2.getID(); 367 if(id1.equals("root")) { 368 id1 = ""; 369 } 370 if(id2.equals("root")) { 371 id2 = ""; 372 } 373 return id1.compareTo(id2); 374 }); 375 return retList; 376 } 377 378 private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>(); 379 380 private static Map<String, SortedSet<String>> metaInfo = new HashMap<>(); 381 382 static { 383 // For generating information on supported locales. 384 metaInfo.put("AvailableLocales", new TreeSet<>()); 385 } 386 387 static Map<String, Object> getCLDRBundle(String id) throws Exception { 388 Map<String, Object> bundle = cldrBundles.get(id); 389 if (bundle != null) { 390 return bundle; 391 } 392 File file = new File(SOURCE_FILE_DIR + File.separator + id + ".xml"); 393 if (!file.exists()) { 394 // Skip if the file doesn't exist. 395 return Collections.emptyMap(); 396 } 397 398 info("..... main directory ....."); 399 LDMLParseHandler handler = new LDMLParseHandler(id); 400 parseLDMLFile(file, handler); 401 402 bundle = handler.getData(); 403 cldrBundles.put(id, bundle); 404 405 if (id.equals("root")) { 406 // Calendar data (firstDayOfWeek & minDaysInFirstWeek) 407 bundle = handlerSuppl.getData("root"); 408 if (bundle != null) { 409 //merge two maps into one map 410 Map<String, Object> temp = cldrBundles.remove(id); 411 bundle.putAll(temp); 412 cldrBundles.put(id, bundle); 413 } 414 } 415 return bundle; 416 } 417 418 // Parsers for data in "supplemental" directory 419 // 420 private static void parseSupplemental() throws Exception { 421 // Parse SupplementalData file and store the information in the HashMap 422 // Calendar information such as firstDay and minDay are stored in 423 // supplementalData.xml as of CLDR1.4. Individual territory is listed 424 // with its ISO 3166 country code while default is listed using UNM49 425 // region and composition numerical code (001 for World.) 426 // 427 // SupplementalData file also provides the "parent" locales which 428 // are othrwise not to be fallen back. Process them here as well. 429 // 430 handlerSuppl = new SupplementDataParseHandler(); 431 parseLDMLFile(new File(SPPL_SOURCE_FILE), handlerSuppl); 432 Map<String, Object> parentData = handlerSuppl.getData("root"); 433 parentData.keySet().stream() 434 .filter(key -> key.startsWith(PARENT_LOCALE_PREFIX)) 435 .forEach(key -> { 436 parentLocalesMap.put(key, new TreeSet( 437 Arrays.asList(((String)parentData.get(key)).split(" ")))); 438 }); 439 440 // Parse numberingSystems to get digit zero character information. 441 handlerNumbering = new NumberingSystemsParseHandler(); 442 parseLDMLFile(new File(NUMBERING_SOURCE_FILE), handlerNumbering); 443 444 // Parse metaZones to create mappings between Olson tzids and CLDR meta zone names 445 handlerMetaZones = new MetaZonesParseHandler(); 446 parseLDMLFile(new File(METAZONES_SOURCE_FILE), handlerMetaZones); 447 448 // Parse likelySubtags 449 handlerLikelySubtags = new LikelySubtagsParseHandler(); 450 parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags); 451 452 // Parse supplementalMetadata 453 // Currently interested in deprecated time zone ids and language aliases. 454 handlerSupplMeta = new SupplementalMetadataParseHandler(); 455 parseLDMLFile(new File(SPPL_META_SOURCE_FILE), handlerSupplMeta); 456 457 // Parse windowsZones 458 handlerWinZones = new WinZonesParseHandler(); 459 parseLDMLFile(new File(WINZONES_SOURCE_FILE), handlerWinZones); 460 461 // Parse plurals 462 handlerPlurals = new PluralsParseHandler(); 463 parseLDMLFile(new File(PLURALS_SOURCE_FILE), handlerPlurals); 464 } 465 466 // Parsers for data in "bcp47" directory 467 // 468 private static void parseBCP47() throws Exception { 469 // Parse timezone 470 handlerTimeZone = new TimeZoneParseHandler(); 471 parseLDMLFile(new File(TIMEZONE_SOURCE_FILE), handlerTimeZone); 472 473 // canonical tz name map 474 // alias -> primary 475 handlerTimeZone.getData().forEach((k, v) -> { 476 String[] ids = ((String)v).split("\\s"); 477 for (int i = 1; i < ids.length; i++) { 478 canonicalTZMap.put(ids[i], ids[0]); 479 } 480 }); 481 } 482 483 private static void parseLDMLFile(File srcfile, AbstractLDMLHandler handler) throws Exception { 484 info("..... Parsing " + srcfile.getName() + " ....."); 485 SAXParserFactory pf = SAXParserFactory.newInstance(); 486 pf.setValidating(true); 487 SAXParser parser = pf.newSAXParser(); 488 enableFileAccess(parser); 489 parser.parse(srcfile, handler); 490 } 491 492 private static StringBuilder getCandLocales(Locale cldrLoc) { 493 List<Locale> candList = getCandidateLocales(cldrLoc); 494 StringBuilder sb = new StringBuilder(); 495 for (Locale loc : candList) { 496 if (!loc.equals(Locale.ROOT)) { 497 sb.append(toLocaleName(loc.toLanguageTag())); 498 sb.append(","); 499 } 500 } 501 return sb; 502 } 503 504 private static List<Locale> getCandidateLocales(Locale cldrLoc) { 505 List<Locale> candList = new ArrayList<>(); 506 candList = applyParentLocales("", defCon.getCandidateLocales("", cldrLoc)); 507 return candList; 508 } 509 510 private static void convertBundles(List<Bundle> bundles) throws Exception { 511 // parent locales map. The mappings are put in base metaInfo file 512 // for now. 513 if (isBaseModule) { 514 metaInfo.putAll(parentLocalesMap); 515 } 516 517 for (Bundle bundle : bundles) { 518 // Get the target map, which contains all the data that should be 519 // visible for the bundle's locale 520 521 Map<String, Object> targetMap = bundle.getTargetMap(); 522 523 EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes(); 524 525 if (bundle.isRoot()) { 526 // Add DateTimePatternChars because CLDR no longer supports localized patterns. 527 targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ"); 528 } 529 530 // Now the map contains just the entries that need to be in the resources bundles. 531 // Go ahead and generate them. 532 if (bundleTypes.contains(Bundle.Type.LOCALENAMES)) { 533 Map<String, Object> localeNamesMap = extractLocaleNames(targetMap, bundle.getID()); 534 if (!localeNamesMap.isEmpty() || bundle.isRoot()) { 535 bundleGenerator.generateBundle("util", "LocaleNames", bundle.getJavaID(), true, localeNamesMap, BundleType.OPEN); 536 } 537 } 538 if (bundleTypes.contains(Bundle.Type.CURRENCYNAMES)) { 539 Map<String, Object> currencyNamesMap = extractCurrencyNames(targetMap, bundle.getID(), bundle.getCurrencies()); 540 if (!currencyNamesMap.isEmpty() || bundle.isRoot()) { 541 bundleGenerator.generateBundle("util", "CurrencyNames", bundle.getJavaID(), true, currencyNamesMap, BundleType.OPEN); 542 } 543 } 544 if (bundleTypes.contains(Bundle.Type.TIMEZONENAMES)) { 545 Map<String, Object> zoneNamesMap = extractZoneNames(targetMap, bundle.getID()); 546 if (!zoneNamesMap.isEmpty() || bundle.isRoot()) { 547 bundleGenerator.generateBundle("util", "TimeZoneNames", bundle.getJavaID(), true, zoneNamesMap, BundleType.TIMEZONE); 548 } 549 } 550 if (bundleTypes.contains(Bundle.Type.CALENDARDATA)) { 551 Map<String, Object> calendarDataMap = extractCalendarData(targetMap, bundle.getID()); 552 if (!calendarDataMap.isEmpty() || bundle.isRoot()) { 553 bundleGenerator.generateBundle("util", "CalendarData", bundle.getJavaID(), true, calendarDataMap, BundleType.PLAIN); 554 } 555 } 556 if (bundleTypes.contains(Bundle.Type.FORMATDATA)) { 557 Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID()); 558 if (!formatDataMap.isEmpty() || bundle.isRoot()) { 559 bundleGenerator.generateBundle("text", "FormatData", bundle.getJavaID(), true, formatDataMap, BundleType.PLAIN); 560 } 561 } 562 563 // For AvailableLocales 564 metaInfo.get("AvailableLocales").add(toLanguageTag(bundle.getID())); 565 addLikelySubtags(metaInfo, "AvailableLocales", bundle.getID()); 566 } 567 bundleGenerator.generateMetaInfo(metaInfo); 568 } 569 570 static final Map<String, String> aliases = new HashMap<>(); 571 572 /** 573 * Translate the aliases into the real entries in the bundle map. 574 */ 575 static void handleAliases(Map<String, Object> bundleMap) { 576 Set bundleKeys = bundleMap.keySet(); 577 try { 578 for (String key : aliases.keySet()) { 579 String targetKey = aliases.get(key); 580 if (bundleKeys.contains(targetKey)) { 581 bundleMap.putIfAbsent(key, bundleMap.get(targetKey)); 582 } 583 } 584 } catch (Exception ex) { 585 Logger.getLogger(CLDRConverter.class.getName()).log(Level.SEVERE, null, ex); 586 } 587 } 588 589 /* 590 * Returns the language portion of the given id. 591 * If id is "root", "" is returned. 592 */ 593 static String getLanguageCode(String id) { 594 return "root".equals(id) ? "" : Locale.forLanguageTag(id.replaceAll("_", "-")).getLanguage(); 595 } 596 597 /** 598 * Examine if the id includes the country (territory) code. If it does, it returns 599 * the country code. 600 * Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG". 601 * It does NOT return UN M.49 code, e.g., '001', as those three digit numbers cannot 602 * be translated into package names. 603 */ 604 static String getCountryCode(String id) { 605 String rgn = getRegionCode(id); 606 return rgn.length() == 2 ? rgn: null; 607 } 608 609 /** 610 * Examine if the id includes the region code. If it does, it returns 611 * the region code. 612 * Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG". 613 * It DOES return UN M.49 code, e.g., '001', as well as ISO 3166 two letter country codes. 614 */ 615 static String getRegionCode(String id) { 616 return Locale.forLanguageTag(id.replaceAll("_", "-")).getCountry(); 617 } 618 619 private static class KeyComparator implements Comparator<String> { 620 static KeyComparator INSTANCE = new KeyComparator(); 621 622 private KeyComparator() { 623 } 624 625 @Override 626 public int compare(String o1, String o2) { 627 int len1 = o1.length(); 628 int len2 = o2.length(); 629 if (!isDigit(o1.charAt(0)) && !isDigit(o2.charAt(0))) { 630 // Shorter string comes first unless either starts with a digit. 631 if (len1 < len2) { 632 return -1; 633 } 634 if (len1 > len2) { 635 return 1; 636 } 637 } 638 return o1.compareTo(o2); 639 } 640 641 private boolean isDigit(char c) { 642 return c >= '0' && c <= '9'; 643 } 644 } 645 646 private static Map<String, Object> extractLocaleNames(Map<String, Object> map, String id) { 647 Map<String, Object> localeNames = new TreeMap<>(KeyComparator.INSTANCE); 648 for (String key : map.keySet()) { 649 if (key.startsWith(LOCALE_NAME_PREFIX)) { 650 switch (key) { 651 case LOCALE_SEPARATOR: 652 localeNames.put("ListCompositionPattern", map.get(key)); 653 break; 654 case LOCALE_KEYTYPE: 655 localeNames.put("ListKeyTypePattern", map.get(key)); 656 break; 657 default: 658 localeNames.put(key.substring(LOCALE_NAME_PREFIX.length()), map.get(key)); 659 break; 660 } 661 } 662 } 663 664 if (id.equals("root")) { 665 // Add display name pattern, which is not in CLDR 666 localeNames.put("DisplayNamePattern", "{0,choice,0#|1#{1}|2#{1} ({2})}"); 667 } 668 669 return localeNames; 670 } 671 672 @SuppressWarnings("AssignmentToForLoopParameter") 673 private static Map<String, Object> extractCurrencyNames(Map<String, Object> map, String id, String names) 674 throws Exception { 675 Map<String, Object> currencyNames = new TreeMap<>(KeyComparator.INSTANCE); 676 for (String key : map.keySet()) { 677 if (key.startsWith(CURRENCY_NAME_PREFIX)) { 678 currencyNames.put(key.substring(CURRENCY_NAME_PREFIX.length()), map.get(key)); 679 } else if (key.startsWith(CURRENCY_SYMBOL_PREFIX)) { 680 currencyNames.put(key.substring(CURRENCY_SYMBOL_PREFIX.length()), map.get(key)); 681 } 682 } 683 return currencyNames; 684 } 685 686 private static Map<String, Object> extractZoneNames(Map<String, Object> map, String id) { 687 Map<String, Object> names = new HashMap<>(); 688 689 // Copy over missing time zone ids from JRE for English locale 690 if (id.equals("en")) { 691 Map<String[], String> jreMetaMap = new HashMap<>(); 692 jreTimeZoneNames.stream().forEach(e -> { 693 String tzid = (String)e[0]; 694 String[] data = (String[])e[1]; 695 696 if (map.get(TIMEZONE_ID_PREFIX + tzid) == null && 697 handlerMetaZones.get(tzid) == null || 698 handlerMetaZones.get(tzid) != null && 699 map.get(METAZONE_ID_PREFIX + handlerMetaZones.get(tzid)) == null) { 700 701 // First, check the alias 702 String canonID = canonicalTZMap.get(tzid); 703 if (canonID != null && !tzid.equals(canonID)) { 704 Object value = map.get(TIMEZONE_ID_PREFIX + canonID); 705 if (value != null) { 706 names.put(tzid, value); 707 return; 708 } else { 709 String meta = handlerMetaZones.get(canonID); 710 if (meta != null) { 711 value = map.get(METAZONE_ID_PREFIX + meta); 712 if (value != null) { 713 names.put(tzid, meta); 714 return; 715 } 716 } 717 } 718 } 719 720 // Check the CLDR meta key 721 Optional<Map.Entry<String, String>> cldrMeta = 722 handlerMetaZones.getData().entrySet().stream() 723 .filter(me -> 724 Arrays.deepEquals(data, 725 (String[])map.get(METAZONE_ID_PREFIX + me.getValue()))) 726 .findAny(); 727 cldrMeta.ifPresentOrElse(meta -> names.put(tzid, meta.getValue()), () -> { 728 // Check the JRE meta key, add if there is not. 729 Optional<Map.Entry<String[], String>> jreMeta = 730 jreMetaMap.entrySet().stream() 731 .filter(jm -> Arrays.deepEquals(data, jm.getKey())) 732 .findAny(); 733 jreMeta.ifPresentOrElse(meta -> names.put(tzid, meta.getValue()), () -> { 734 String metaName = "JRE_" + tzid.replaceAll("[/-]", "_"); 735 names.put(METAZONE_ID_PREFIX + metaName, data); 736 names.put(tzid, metaName); 737 }); 738 }); 739 } 740 }); 741 } 742 743 getAvailableZoneIds().stream().forEach(tzid -> { 744 // If the tzid is deprecated, get the data for the replacement id 745 String tzKey = Optional.ofNullable((String)handlerSupplMeta.get(tzid)) 746 .orElse(tzid); 747 Object data = map.get(TIMEZONE_ID_PREFIX + tzKey); 748 749 if (data instanceof String[]) { 750 names.put(tzid, data); 751 } else { 752 String meta = handlerMetaZones.get(tzKey); 753 if (meta != null) { 754 String metaKey = METAZONE_ID_PREFIX + meta; 755 data = map.get(metaKey); 756 if (data instanceof String[]) { 757 // Keep the metazone prefix here. 758 names.put(metaKey, data); 759 names.put(tzid, meta); 760 } 761 } 762 } 763 }); 764 765 // exemplar cities. 766 Map<String, Object> exCities = map.entrySet().stream() 767 .filter(e -> e.getKey().startsWith(CLDRConverter.EXEMPLAR_CITY_PREFIX)) 768 .collect(Collectors 769 .toMap(Map.Entry::getKey, Map.Entry::getValue)); 770 names.putAll(exCities); 771 772 if (!id.equals("en") && 773 !names.isEmpty()) { 774 // CLDR does not have UTC entry, so add it here. 775 names.put("UTC", EMPTY_ZONE); 776 777 // no metazone zones 778 Arrays.asList(handlerMetaZones.get(MetaZonesParseHandler.NO_METAZONE_KEY) 779 .split("\\s")).stream() 780 .forEach(tz -> { 781 names.put(tz, EMPTY_ZONE); 782 }); 783 } 784 785 return names; 786 } 787 788 /** 789 * Extracts the language independent calendar data. Each of the two keys, 790 * "firstDayOfWeek" and "minimalDaysInFirstWeek" has a string value consists of 791 * one or multiple occurrences of: 792 * i: rg1 rg2 ... rgn; 793 * where "i" is the data for the following regions (delimited by a space) after 794 * ":", and ends with a ";". 795 */ 796 private static Map<String, Object> extractCalendarData(Map<String, Object> map, String id) { 797 Map<String, Object> calendarData = new LinkedHashMap<>(); 798 if (id.equals("root")) { 799 calendarData.put("firstDayOfWeek", 800 IntStream.range(1, 8) 801 .mapToObj(String::valueOf) 802 .filter(d -> map.keySet().contains(CALENDAR_FIRSTDAY_PREFIX + d)) 803 .map(d -> d + ": " + map.get(CALENDAR_FIRSTDAY_PREFIX + d)) 804 .collect(Collectors.joining(";"))); 805 calendarData.put("minimalDaysInFirstWeek", 806 IntStream.range(0, 7) 807 .mapToObj(String::valueOf) 808 .filter(d -> map.keySet().contains(CALENDAR_MINDAYS_PREFIX + d)) 809 .map(d -> d + ": " + map.get(CALENDAR_MINDAYS_PREFIX + d)) 810 .collect(Collectors.joining(";"))); 811 } 812 return calendarData; 813 } 814 815 static final String[] FORMAT_DATA_ELEMENTS = { 816 "MonthNames", 817 "standalone.MonthNames", 818 "MonthAbbreviations", 819 "standalone.MonthAbbreviations", 820 "MonthNarrows", 821 "standalone.MonthNarrows", 822 "DayNames", 823 "standalone.DayNames", 824 "DayAbbreviations", 825 "standalone.DayAbbreviations", 826 "DayNarrows", 827 "standalone.DayNarrows", 828 "QuarterNames", 829 "standalone.QuarterNames", 830 "QuarterAbbreviations", 831 "standalone.QuarterAbbreviations", 832 "QuarterNarrows", 833 "standalone.QuarterNarrows", 834 "AmPmMarkers", 835 "narrow.AmPmMarkers", 836 "abbreviated.AmPmMarkers", 837 "long.Eras", 838 "Eras", 839 "narrow.Eras", 840 "field.era", 841 "field.year", 842 "field.month", 843 "field.week", 844 "field.weekday", 845 "field.dayperiod", 846 "field.hour", 847 "timezone.hourFormat", 848 "timezone.gmtFormat", 849 "timezone.gmtZeroFormat", 850 "timezone.regionFormat", 851 "timezone.regionFormat.daylight", 852 "timezone.regionFormat.standard", 853 "field.minute", 854 "field.second", 855 "field.zone", 856 "TimePatterns", 857 "DatePatterns", 858 "DateTimePatterns", 859 "DateTimePatternChars" 860 }; 861 862 private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) { 863 Map<String, Object> formatData = new LinkedHashMap<>(); 864 for (CalendarType calendarType : CalendarType.values()) { 865 if (calendarType == CalendarType.GENERIC) { 866 continue; 867 } 868 String prefix = calendarType.keyElementName(); 869 for (String element : FORMAT_DATA_ELEMENTS) { 870 String key = prefix + element; 871 copyIfPresent(map, "java.time." + key, formatData); 872 copyIfPresent(map, key, formatData); 873 } 874 } 875 876 for (String key : map.keySet()) { 877 // Copy available calendar names 878 if (key.startsWith(CLDRConverter.LOCALE_TYPE_PREFIX_CA)) { 879 String type = key.substring(CLDRConverter.LOCALE_TYPE_PREFIX_CA.length()); 880 for (CalendarType calendarType : CalendarType.values()) { 881 if (calendarType == CalendarType.GENERIC) { 882 continue; 883 } 884 if (type.equals(calendarType.lname())) { 885 Object value = map.get(key); 886 String dataKey = key.replace(LOCALE_TYPE_PREFIX_CA, 887 CALENDAR_NAME_PREFIX); 888 formatData.put(dataKey, value); 889 String ukey = CALENDAR_NAME_PREFIX + calendarType.uname(); 890 if (!dataKey.equals(ukey)) { 891 formatData.put(ukey, value); 892 } 893 } 894 } 895 } 896 } 897 898 copyIfPresent(map, "DefaultNumberingSystem", formatData); 899 900 @SuppressWarnings("unchecked") 901 List<String> numberingScripts = (List<String>) map.remove("numberingScripts"); 902 if (numberingScripts != null) { 903 for (String script : numberingScripts) { 904 copyIfPresent(map, script + ".NumberElements", formatData); 905 copyIfPresent(map, script + ".NumberPatterns", formatData); 906 } 907 } else { 908 copyIfPresent(map, "NumberElements", formatData); 909 copyIfPresent(map, "NumberPatterns", formatData); 910 } 911 copyIfPresent(map, "short.CompactNumberPatterns", formatData); 912 copyIfPresent(map, "long.CompactNumberPatterns", formatData); 913 914 // put extra number elements for available scripts into formatData, if it is "root" 915 if (id.equals("root")) { 916 handlerNumbering.keySet().stream() 917 .filter(k -> !numberingScripts.contains(k)) 918 .forEach(k -> { 919 String[] ne = (String[])map.get("latn.NumberElements"); 920 String[] neNew = Arrays.copyOf(ne, ne.length); 921 neNew[4] = handlerNumbering.get(k).substring(0, 1); 922 formatData.put(k + ".NumberElements", neNew); 923 }); 924 } 925 return formatData; 926 } 927 928 private static void copyIfPresent(Map<String, Object> src, String key, Map<String, Object> dest) { 929 Object value = src.get(key); 930 if (value != null) { 931 dest.put(key, value); 932 } 933 } 934 935 // --- code below here is adapted from java.util.Properties --- 936 private static final String specialSaveCharsJava = "\""; 937 private static final String specialSaveCharsProperties = "=: \t\r\n\f#!"; 938 939 /* 940 * Converts unicodes to encoded \uxxxx 941 * and writes out any of the characters in specialSaveChars 942 * with a preceding slash 943 */ 944 static String saveConvert(String theString, boolean useJava) { 945 if (theString == null) { 946 return ""; 947 } 948 949 String specialSaveChars; 950 if (useJava) { 951 specialSaveChars = specialSaveCharsJava; 952 } else { 953 specialSaveChars = specialSaveCharsProperties; 954 } 955 boolean escapeSpace = false; 956 957 int len = theString.length(); 958 StringBuilder outBuffer = new StringBuilder(len * 2); 959 Formatter formatter = new Formatter(outBuffer, Locale.ROOT); 960 961 for (int x = 0; x < len; x++) { 962 char aChar = theString.charAt(x); 963 switch (aChar) { 964 case ' ': 965 if (x == 0 || escapeSpace) { 966 outBuffer.append('\\'); 967 } 968 outBuffer.append(' '); 969 break; 970 case '\\': 971 outBuffer.append('\\'); 972 outBuffer.append('\\'); 973 break; 974 case '\t': 975 outBuffer.append('\\'); 976 outBuffer.append('t'); 977 break; 978 case '\n': 979 outBuffer.append('\\'); 980 outBuffer.append('n'); 981 break; 982 case '\r': 983 outBuffer.append('\\'); 984 outBuffer.append('r'); 985 break; 986 case '\f': 987 outBuffer.append('\\'); 988 outBuffer.append('f'); 989 break; 990 default: 991 if (aChar < 0x0020 || (!USE_UTF8 && aChar > 0x007e)) { 992 formatter.format("\\u%04x", (int)aChar); 993 } else { 994 if (specialSaveChars.indexOf(aChar) != -1) { 995 outBuffer.append('\\'); 996 } 997 outBuffer.append(aChar); 998 } 999 } 1000 } 1001 return outBuffer.toString(); 1002 } 1003 1004 private static String toLanguageTag(String locName) { 1005 if (locName.indexOf('_') == -1) { 1006 return locName; 1007 } 1008 String tag = locName.replaceAll("_", "-"); 1009 Locale loc = Locale.forLanguageTag(tag); 1010 return loc.toLanguageTag(); 1011 } 1012 1013 private static void addLikelySubtags(Map<String, SortedSet<String>> metaInfo, String category, String id) { 1014 String likelySubtag = handlerLikelySubtags.get(id); 1015 if (likelySubtag != null) { 1016 // Remove Script for now 1017 metaInfo.get(category).add(toLanguageTag(likelySubtag).replaceFirst("-[A-Z][a-z]{3}", "")); 1018 } 1019 } 1020 1021 private static String toLocaleName(String tag) { 1022 if (tag.indexOf('-') == -1) { 1023 return tag; 1024 } 1025 return tag.replaceAll("-", "_"); 1026 } 1027 1028 private static void setupBaseLocales(String localeList) { 1029 Arrays.stream(localeList.split(",")) 1030 .map(Locale::forLanguageTag) 1031 .map(l -> Control.getControl(Control.FORMAT_DEFAULT) 1032 .getCandidateLocales("", l)) 1033 .forEach(BASE_LOCALES::addAll); 1034 } 1035 1036 // applying parent locale rules to the passed candidates list 1037 // This has to match with the one in sun.util.cldr.CLDRLocaleProviderAdapter 1038 private static Map<Locale, Locale> childToParentLocaleMap = null; 1039 private static List<Locale> applyParentLocales(String baseName, List<Locale> candidates) { 1040 if (Objects.isNull(childToParentLocaleMap)) { 1041 childToParentLocaleMap = new HashMap<>(); 1042 parentLocalesMap.keySet().forEach(key -> { 1043 String parent = key.substring(PARENT_LOCALE_PREFIX.length()).replaceAll("_", "-"); 1044 parentLocalesMap.get(key).stream().forEach(child -> { 1045 childToParentLocaleMap.put(Locale.forLanguageTag(child), 1046 "root".equals(parent) ? Locale.ROOT : Locale.forLanguageTag(parent)); 1047 }); 1048 }); 1049 } 1050 1051 // check irregular parents 1052 for (int i = 0; i < candidates.size(); i++) { 1053 Locale l = candidates.get(i); 1054 Locale p = childToParentLocaleMap.get(l); 1055 if (!l.equals(Locale.ROOT) && 1056 Objects.nonNull(p) && 1057 !candidates.get(i+1).equals(p)) { 1058 List<Locale> applied = candidates.subList(0, i+1); 1059 applied.addAll(applyParentLocales(baseName, defCon.getCandidateLocales(baseName, p))); 1060 return applied; 1061 } 1062 } 1063 1064 return candidates; 1065 } 1066 1067 private static void generateZoneName() throws Exception { 1068 Files.createDirectories(Paths.get(DESTINATION_DIR, "java", "time", "format")); 1069 Files.write(Paths.get(DESTINATION_DIR, "java", "time", "format", "ZoneName.java"), 1070 Files.lines(Paths.get(zoneNameTempFile)) 1071 .flatMap(l -> { 1072 if (l.equals("%%%%ZIDMAP%%%%")) { 1073 return zidMapEntry(); 1074 } else if (l.equals("%%%%MZONEMAP%%%%")) { 1075 return handlerMetaZones.mzoneMapEntry(); 1076 } else if (l.equals("%%%%DEPRECATED%%%%")) { 1077 return handlerSupplMeta.deprecatedMap(); 1078 } else if (l.equals("%%%%TZDATALINK%%%%")) { 1079 return tzDataLinkEntry(); 1080 } else { 1081 return Stream.of(l); 1082 } 1083 }) 1084 .collect(Collectors.toList()), 1085 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1086 } 1087 1088 // This method assumes handlerMetaZones is already initialized 1089 private static Set<String> getAvailableZoneIds() { 1090 assert handlerMetaZones != null; 1091 if (AVAILABLE_TZIDS == null) { 1092 AVAILABLE_TZIDS = new HashSet<>(ZoneId.getAvailableZoneIds()); 1093 AVAILABLE_TZIDS.addAll(handlerMetaZones.keySet()); 1094 AVAILABLE_TZIDS.remove(MetaZonesParseHandler.NO_METAZONE_KEY); 1095 } 1096 1097 return AVAILABLE_TZIDS; 1098 } 1099 1100 private static Stream<String> zidMapEntry() { 1101 return getAvailableZoneIds().stream() 1102 .map(id -> { 1103 String canonId = canonicalTZMap.getOrDefault(id, id); 1104 String meta = handlerMetaZones.get(canonId); 1105 String zone001 = handlerMetaZones.zidMap().get(meta); 1106 return zone001 == null ? "" : 1107 String.format(" \"%s\", \"%s\", \"%s\",", 1108 id, meta, zone001); 1109 }) 1110 .filter(s -> !s.isEmpty()) 1111 .sorted(); 1112 } 1113 1114 private static Stream<String> tzDataLinkEntry() { 1115 try { 1116 return Files.walk(Paths.get(tzDataDir), 1) 1117 .filter(p -> !Files.isDirectory(p)) 1118 .flatMap(CLDRConverter::extractLinks) 1119 .sorted(); 1120 } catch (IOException e) { 1121 throw new UncheckedIOException(e); 1122 } 1123 } 1124 1125 private static Stream<String> extractLinks(Path tzFile) { 1126 try { 1127 return Files.lines(tzFile) 1128 .filter(l -> l.startsWith("Link")) 1129 .map(l -> l.replaceFirst("^Link[\\s]+(\\S+)\\s+(\\S+).*", 1130 " \"$2\", \"$1\",")); 1131 } catch (IOException e) { 1132 throw new UncheckedIOException(e); 1133 } 1134 } 1135 1136 // Generate tzmappings for Windows. The format is: 1137 // 1138 // (Windows Zone Name):(REGION):(Java TZID) 1139 // 1140 // where: 1141 // Windows Zone Name: arbitrary time zone name string used in Windows 1142 // REGION: ISO3166 or UN M.49 code 1143 // Java TZID: Java's time zone ID 1144 // 1145 // Note: the entries are alphabetically sorted, *except* the "world" region 1146 // code, i.e., "001". It should be the last entry for the same windows time 1147 // zone name entries. (cf. TimeZone_md.c) 1148 private static void generateWindowsTZMappings() throws Exception { 1149 Files.createDirectories(Paths.get(DESTINATION_DIR, "windows", "conf")); 1150 Files.write(Paths.get(DESTINATION_DIR, "windows", "conf", "tzmappings"), 1151 handlerWinZones.keySet().stream() 1152 .map(k -> k + ":" + handlerWinZones.get(k) + ":") 1153 .sorted(new Comparator<String>() { 1154 public int compare(String t1, String t2) { 1155 String[] s1 = t1.split(":"); 1156 String[] s2 = t2.split(":"); 1157 if (s1[0].equals(s2[0])) { 1158 if (s1[1].equals("001")) { 1159 return 1; 1160 } else if (s2[1].equals("001")) { 1161 return -1; 1162 } else { 1163 return s1[1].compareTo(s2[1]); 1164 } 1165 } else { 1166 return s1[0].compareTo(s2[0]); 1167 } 1168 } 1169 }) 1170 .collect(Collectors.toList()), 1171 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1172 } 1173 1174 /** 1175 * Generate ResourceBundle source file for plural rules. The generated 1176 * class is {@code sun.text.resources.PluralRules} which has one public 1177 * two dimensional array {@code rulesArray}. Each array element consists 1178 * of two elements that designate the locale and the locale's plural rules 1179 * string. The latter has the syntax from Unicode Consortium's 1180 * <a href="http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax"> 1181 * Plural rules syntax</a>. {@code samples} and {@code "other"} are being ommited. 1182 * 1183 * @throws Exception 1184 */ 1185 private static void generatePluralRules() throws Exception { 1186 Files.createDirectories(Paths.get(DESTINATION_DIR, "sun", "text", "resources")); 1187 Files.write(Paths.get(DESTINATION_DIR, "sun", "text", "resources", "PluralRules.java"), 1188 Stream.concat( 1189 Stream.concat( 1190 Stream.of( 1191 "package sun.text.resources;", 1192 "public final class PluralRules {", 1193 " public static final String[][] rulesArray = {" 1194 ), 1195 pluralRulesStream().sorted() 1196 ), 1197 Stream.of( 1198 " };", 1199 "}" 1200 ) 1201 ) 1202 .collect(Collectors.toList()), 1203 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1204 } 1205 1206 private static Stream<String> pluralRulesStream() { 1207 return handlerPlurals.getData().entrySet().stream() 1208 .filter(e -> !((Map<String, String>)e.getValue()).isEmpty()) 1209 .map(e -> { 1210 String loc = e.getKey(); 1211 Map<String, String> rules = (Map<String, String>)e.getValue(); 1212 return " {\"" + loc + "\", \"" + 1213 rules.entrySet().stream() 1214 .map(rule -> rule.getKey() + ":" + rule.getValue().replaceFirst("@.*", "")) 1215 .map(String::trim) 1216 .collect(Collectors.joining(";")) + "\"},"; 1217 }); 1218 } 1219 1220 // for debug 1221 static void dumpMap(Map<String, Object> map) { 1222 map.entrySet().stream() 1223 .sorted(Map.Entry.comparingByKey()) 1224 .map(e -> { 1225 Object val = e.getValue(); 1226 String valStr = null; 1227 1228 if (val instanceof String[]) { 1229 valStr = Arrays.asList((String[])val).toString(); 1230 } else if (val != null) { 1231 valStr = val.toString(); 1232 } 1233 return e.getKey() + " = " + valStr; 1234 }) 1235 .forEach(System.out::println); 1236 } 1237 } 1238