1 /* 2 * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.cldrconverter; 27 28 import build.tools.cldrconverter.BundleGenerator.BundleType; 29 import java.io.File; 30 import java.io.IOException; 31 import java.io.UncheckedIOException; 32 import java.nio.file.*; 33 import java.text.MessageFormat; 34 import java.time.*; 35 import java.util.*; 36 import java.util.ResourceBundle.Control; 37 import java.util.logging.Level; 38 import java.util.logging.Logger; 39 import java.util.stream.Collectors; 40 import java.util.stream.IntStream; 41 import java.util.stream.Stream; 42 import javax.xml.parsers.SAXParser; 43 import javax.xml.parsers.SAXParserFactory; 44 import org.xml.sax.SAXNotRecognizedException; 45 import org.xml.sax.SAXNotSupportedException; 46 47 48 /** 49 * Converts locale data from "Locale Data Markup Language" format to 50 * JRE resource bundle format. LDML is the format used by the Common 51 * Locale Data Repository maintained by the Unicode Consortium. 52 */ 53 public class CLDRConverter { 54 55 static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd"; 56 static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd"; 57 static final String BCP47_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlBCP47.dtd"; 58 59 60 private static String CLDR_BASE; 61 static String LOCAL_LDML_DTD; 62 static String LOCAL_SPPL_LDML_DTD; 63 static String LOCAL_BCP47_LDML_DTD; 64 private static String SOURCE_FILE_DIR; 65 private static String SPPL_SOURCE_FILE; 66 private static String SPPL_META_SOURCE_FILE; 67 private static String NUMBERING_SOURCE_FILE; 68 private static String METAZONES_SOURCE_FILE; 69 private static String LIKELYSUBTAGS_SOURCE_FILE; 70 private static String TIMEZONE_SOURCE_FILE; 71 private static String WINZONES_SOURCE_FILE; 72 private static String PLURALS_SOURCE_FILE; 73 static String DESTINATION_DIR = "build/gensrc"; 74 75 static final String LOCALE_NAME_PREFIX = "locale.displayname."; 76 static final String LOCALE_SEPARATOR = LOCALE_NAME_PREFIX + "separator"; 77 static final String LOCALE_KEYTYPE = LOCALE_NAME_PREFIX + "keytype"; 78 static final String LOCALE_KEY_PREFIX = LOCALE_NAME_PREFIX + "key."; 79 static final String LOCALE_TYPE_PREFIX = LOCALE_NAME_PREFIX + "type."; 80 static final String LOCALE_TYPE_PREFIX_CA = LOCALE_TYPE_PREFIX + "ca."; 81 static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol."; 82 static final String CURRENCY_NAME_PREFIX = "currency.displayname."; 83 static final String CALENDAR_NAME_PREFIX = "calendarname."; 84 static final String CALENDAR_FIRSTDAY_PREFIX = "firstDay."; 85 static final String CALENDAR_MINDAYS_PREFIX = "minDays."; 86 static final String TIMEZONE_ID_PREFIX = "timezone.id."; 87 static final String EXEMPLAR_CITY_PREFIX = "timezone.excity."; 88 static final String ZONE_NAME_PREFIX = "timezone.displayname."; 89 static final String METAZONE_ID_PREFIX = "metazone.id."; 90 static final String PARENT_LOCALE_PREFIX = "parentLocale."; 91 static final String META_EMPTY_ZONE_NAME = "EMPTY_ZONE"; 92 static final String[] EMPTY_ZONE = {"", "", "", "", "", ""}; 93 static final String META_ETCUTC_ZONE_NAME = "ETC_UTC"; 94 95 private static SupplementDataParseHandler handlerSuppl; 96 private static LikelySubtagsParseHandler handlerLikelySubtags; 97 private static WinZonesParseHandler handlerWinZones; 98 static PluralsParseHandler handlerPlurals; 99 static SupplementalMetadataParseHandler handlerSupplMeta; 100 static NumberingSystemsParseHandler handlerNumbering; 101 static MetaZonesParseHandler handlerMetaZones; 102 static TimeZoneParseHandler handlerTimeZone; 103 private static BundleGenerator bundleGenerator; 104 105 // java.base module related 106 static boolean isBaseModule = false; 107 static final Set<Locale> BASE_LOCALES = new HashSet<>(); 108 109 // "parentLocales" map 110 private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>(); 111 private static final ResourceBundle.Control defCon = 112 ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT); 113 114 private static Set<String> AVAILABLE_TZIDS; 115 private static String zoneNameTempFile; 116 private static String tzDataDir; 117 private static final Map<String, String> canonicalTZMap = new HashMap<>(); 118 119 static enum DraftType { 120 UNCONFIRMED, 121 PROVISIONAL, 122 CONTRIBUTED, 123 APPROVED; 124 125 private static final Map<String, DraftType> map = new HashMap<>(); 126 static { 127 for (DraftType dt : values()) { 128 map.put(dt.getKeyword(), dt); 129 } 130 } 131 static private DraftType defaultType = CONTRIBUTED; 132 133 private final String keyword; 134 135 private DraftType() { 136 keyword = this.name().toLowerCase(Locale.ROOT); 137 138 } 139 140 static DraftType forKeyword(String keyword) { 141 return map.get(keyword); 142 } 143 144 static DraftType getDefault() { 145 return defaultType; 146 } 147 148 static void setDefault(String keyword) { 149 defaultType = Objects.requireNonNull(forKeyword(keyword)); 150 } 151 152 String getKeyword() { 153 return keyword; 154 } 155 } 156 157 static boolean USE_UTF8 = false; 158 private static boolean verbose; 159 160 private CLDRConverter() { 161 // no instantiation 162 } 163 164 @SuppressWarnings("AssignmentToForLoopParameter") 165 public static void main(String[] args) throws Exception { 166 if (args.length != 0) { 167 String currentArg = null; 168 try { 169 for (int i = 0; i < args.length; i++) { 170 currentArg = args[i]; 171 switch (currentArg) { 172 case "-draft": 173 String draftDataType = args[++i]; 174 try { 175 DraftType.setDefault(draftDataType); 176 } catch (NullPointerException e) { 177 severe("Error: incorrect draft value: %s%n", draftDataType); 178 System.exit(1); 179 } 180 info("Using the specified data type: %s%n", draftDataType); 181 break; 182 183 case "-base": 184 // base directory for input files 185 CLDR_BASE = args[++i]; 186 if (!CLDR_BASE.endsWith("/")) { 187 CLDR_BASE += "/"; 188 } 189 break; 190 191 case "-baselocales": 192 // base locales 193 setupBaseLocales(args[++i]); 194 break; 195 196 case "-basemodule": 197 // indicates java.base module resource generation 198 isBaseModule = true; 199 break; 200 201 case "-o": 202 // output directory 203 DESTINATION_DIR = args[++i]; 204 break; 205 206 case "-utf8": 207 USE_UTF8 = true; 208 break; 209 210 case "-verbose": 211 verbose = true; 212 break; 213 214 case "-zntempfile": 215 zoneNameTempFile = args[++i]; 216 break; 217 218 case "-tzdatadir": 219 tzDataDir = args[++i]; 220 break; 221 222 case "-help": 223 usage(); 224 System.exit(0); 225 break; 226 227 default: 228 throw new RuntimeException(); 229 } 230 } 231 } catch (RuntimeException e) { 232 severe("unknown or imcomplete arg(s): " + currentArg); 233 usage(); 234 System.exit(1); 235 } 236 } 237 238 // Set up path names 239 LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd"; 240 LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd"; 241 LOCAL_BCP47_LDML_DTD = CLDR_BASE + "/dtd/ldmlBCP47.dtd"; 242 SOURCE_FILE_DIR = CLDR_BASE + "/main"; 243 SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml"; 244 LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml"; 245 NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml"; 246 METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml"; 247 TIMEZONE_SOURCE_FILE = CLDR_BASE + "/bcp47/timezone.xml"; 248 SPPL_META_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalMetadata.xml"; 249 WINZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/windowsZones.xml"; 250 PLURALS_SOURCE_FILE = CLDR_BASE + "/supplemental/plurals.xml"; 251 252 if (BASE_LOCALES.isEmpty()) { 253 setupBaseLocales("en-US"); 254 } 255 256 bundleGenerator = new ResourceBundleGenerator(); 257 258 // Parse data independent of locales 259 parseSupplemental(); 260 parseBCP47(); 261 262 List<Bundle> bundles = readBundleList(); 263 convertBundles(bundles); 264 265 if (isBaseModule) { 266 // Generate java.time.format.ZoneName.java 267 generateZoneName(); 268 269 // Generate Windows tzmappings 270 generateWindowsTZMappings(); 271 272 // Generate Plural rules 273 generatePluralRules(); 274 } 275 } 276 277 private static void usage() { 278 errout("Usage: java CLDRConverter [options]%n" 279 + "\t-help output this usage message and exit%n" 280 + "\t-verbose output information%n" 281 + "\t-draft [contributed | approved | provisional | unconfirmed]%n" 282 + "\t\t draft level for using data (default: contributed)%n" 283 + "\t-base dir base directory for CLDR input files%n" 284 + "\t-basemodule generates bundles that go into java.base module%n" 285 + "\t-baselocales loc(,loc)* locales that go into the base module%n" 286 + "\t-o dir output directory (default: ./build/gensrc)%n" 287 + "\t-zntempfile template file for java.time.format.ZoneName.java%n" 288 + "\t-tzdatadir tzdata directory for java.time.format.ZoneName.java%n" 289 + "\t-utf8 use UTF-8 rather than \\uxxxx (for debug)%n"); 290 } 291 292 static void info(String fmt, Object... args) { 293 if (verbose) { 294 System.out.printf(fmt, args); 295 } 296 } 297 298 static void info(String msg) { 299 if (verbose) { 300 System.out.println(msg); 301 } 302 } 303 304 static void warning(String fmt, Object... args) { 305 System.err.print("Warning: "); 306 System.err.printf(fmt, args); 307 } 308 309 static void warning(String msg) { 310 System.err.print("Warning: "); 311 errout(msg); 312 } 313 314 static void severe(String fmt, Object... args) { 315 System.err.print("Error: "); 316 System.err.printf(fmt, args); 317 } 318 319 static void severe(String msg) { 320 System.err.print("Error: "); 321 errout(msg); 322 } 323 324 private static void errout(String msg) { 325 if (msg.contains("%n")) { 326 System.err.printf(msg); 327 } else { 328 System.err.println(msg); 329 } 330 } 331 332 /** 333 * Configure the parser to allow access to DTDs on the file system. 334 */ 335 private static void enableFileAccess(SAXParser parser) throws SAXNotSupportedException { 336 try { 337 parser.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "file"); 338 } catch (SAXNotRecognizedException ignore) { 339 // property requires >= JAXP 1.5 340 } 341 } 342 343 private static List<Bundle> readBundleList() throws Exception { 344 List<Bundle> retList = new ArrayList<>(); 345 Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR); 346 try (DirectoryStream<Path> dirStr = Files.newDirectoryStream(path)) { 347 for (Path entry : dirStr) { 348 String fileName = entry.getFileName().toString(); 349 if (fileName.endsWith(".xml")) { 350 String id = fileName.substring(0, fileName.indexOf('.')); 351 Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id)); 352 StringBuilder sb = getCandLocales(cldrLoc); 353 if (sb.indexOf("root") == -1) { 354 sb.append("root"); 355 } 356 retList.add(new Bundle(id, sb.toString(), null, null)); 357 } 358 } 359 } 360 361 // Sort the bundles based on id. This will make sure all the parent bundles are 362 // processed first, e.g., for en_GB bundle, en_001, and "root" comes before 363 // en_GB. In order for "root" to come at the beginning, "root" is replaced with 364 // empty string on comparison. 365 retList.sort((o1, o2) -> { 366 String id1 = o1.getID(); 367 String id2 = o2.getID(); 368 if(id1.equals("root")) { 369 id1 = ""; 370 } 371 if(id2.equals("root")) { 372 id2 = ""; 373 } 374 return id1.compareTo(id2); 375 }); 376 return retList; 377 } 378 379 private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>(); 380 381 private static Map<String, SortedSet<String>> metaInfo = new HashMap<>(); 382 383 static { 384 // For generating information on supported locales. 385 metaInfo.put("AvailableLocales", new TreeSet<>()); 386 } 387 388 static Map<String, Object> getCLDRBundle(String id) throws Exception { 389 Map<String, Object> bundle = cldrBundles.get(id); 390 if (bundle != null) { 391 return bundle; 392 } 393 File file = new File(SOURCE_FILE_DIR + File.separator + id + ".xml"); 394 if (!file.exists()) { 395 // Skip if the file doesn't exist. 396 return Collections.emptyMap(); 397 } 398 399 info("..... main directory ....."); 400 LDMLParseHandler handler = new LDMLParseHandler(id); 401 parseLDMLFile(file, handler); 402 403 bundle = handler.getData(); 404 cldrBundles.put(id, bundle); 405 406 if (id.equals("root")) { 407 // Calendar data (firstDayOfWeek & minDaysInFirstWeek) 408 bundle = handlerSuppl.getData("root"); 409 if (bundle != null) { 410 //merge two maps into one map 411 Map<String, Object> temp = cldrBundles.remove(id); 412 bundle.putAll(temp); 413 cldrBundles.put(id, bundle); 414 } 415 } 416 return bundle; 417 } 418 419 // Parsers for data in "supplemental" directory 420 // 421 @SuppressWarnings("unchecked") 422 private static void parseSupplemental() throws Exception { 423 // Parse SupplementalData file and store the information in the HashMap 424 // Calendar information such as firstDay and minDay are stored in 425 // supplementalData.xml as of CLDR1.4. Individual territory is listed 426 // with its ISO 3166 country code while default is listed using UNM49 427 // region and composition numerical code (001 for World.) 428 // 429 // SupplementalData file also provides the "parent" locales which 430 // are othrwise not to be fallen back. Process them here as well. 431 // 432 handlerSuppl = new SupplementDataParseHandler(); 433 parseLDMLFile(new File(SPPL_SOURCE_FILE), handlerSuppl); 434 Map<String, Object> parentData = handlerSuppl.getData("root"); 435 parentData.keySet().stream() 436 .filter(key -> key.startsWith(PARENT_LOCALE_PREFIX)) 437 .forEach(key -> { 438 parentLocalesMap.put(key, new TreeSet<String>( 439 Arrays.asList(((String)parentData.get(key)).split(" ")))); 440 }); 441 442 // Parse numberingSystems to get digit zero character information. 443 handlerNumbering = new NumberingSystemsParseHandler(); 444 parseLDMLFile(new File(NUMBERING_SOURCE_FILE), handlerNumbering); 445 446 // Parse metaZones to create mappings between Olson tzids and CLDR meta zone names 447 handlerMetaZones = new MetaZonesParseHandler(); 448 parseLDMLFile(new File(METAZONES_SOURCE_FILE), handlerMetaZones); 449 450 // Parse likelySubtags 451 handlerLikelySubtags = new LikelySubtagsParseHandler(); 452 parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags); 453 454 // Parse supplementalMetadata 455 // Currently interested in deprecated time zone ids and language aliases. 456 handlerSupplMeta = new SupplementalMetadataParseHandler(); 457 parseLDMLFile(new File(SPPL_META_SOURCE_FILE), handlerSupplMeta); 458 459 // Parse windowsZones 460 handlerWinZones = new WinZonesParseHandler(); 461 parseLDMLFile(new File(WINZONES_SOURCE_FILE), handlerWinZones); 462 463 // Parse plurals 464 handlerPlurals = new PluralsParseHandler(); 465 parseLDMLFile(new File(PLURALS_SOURCE_FILE), handlerPlurals); 466 } 467 468 // Parsers for data in "bcp47" directory 469 // 470 private static void parseBCP47() throws Exception { 471 // Parse timezone 472 handlerTimeZone = new TimeZoneParseHandler(); 473 parseLDMLFile(new File(TIMEZONE_SOURCE_FILE), handlerTimeZone); 474 475 // canonical tz name map 476 // alias -> primary 477 handlerTimeZone.getData().forEach((k, v) -> { 478 String[] ids = ((String)v).split("\\s"); 479 for (int i = 1; i < ids.length; i++) { 480 canonicalTZMap.put(ids[i], ids[0]); 481 } 482 }); 483 } 484 485 @SuppressWarnings("rawtypes") 486 private static void parseLDMLFile(File srcfile, AbstractLDMLHandler handler) throws Exception { 487 info("..... Parsing " + srcfile.getName() + " ....."); 488 SAXParserFactory pf = SAXParserFactory.newInstance(); 489 pf.setValidating(true); 490 SAXParser parser = pf.newSAXParser(); 491 enableFileAccess(parser); 492 parser.parse(srcfile, handler); 493 } 494 495 private static StringBuilder getCandLocales(Locale cldrLoc) { 496 List<Locale> candList = getCandidateLocales(cldrLoc); 497 StringBuilder sb = new StringBuilder(); 498 for (Locale loc : candList) { 499 if (!loc.equals(Locale.ROOT)) { 500 sb.append(toLocaleName(loc.toLanguageTag())); 501 sb.append(","); 502 } 503 } 504 return sb; 505 } 506 507 private static List<Locale> getCandidateLocales(Locale cldrLoc) { 508 List<Locale> candList = new ArrayList<>(); 509 candList = applyParentLocales("", defCon.getCandidateLocales("", cldrLoc)); 510 return candList; 511 } 512 513 private static void convertBundles(List<Bundle> bundles) throws Exception { 514 // parent locales map. The mappings are put in base metaInfo file 515 // for now. 516 if (isBaseModule) { 517 metaInfo.putAll(parentLocalesMap); 518 } 519 520 for (Bundle bundle : bundles) { 521 // Get the target map, which contains all the data that should be 522 // visible for the bundle's locale 523 524 Map<String, Object> targetMap = bundle.getTargetMap(); 525 526 EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes(); 527 528 if (bundle.isRoot()) { 529 // Add DateTimePatternChars because CLDR no longer supports localized patterns. 530 targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ"); 531 } 532 533 // Now the map contains just the entries that need to be in the resources bundles. 534 // Go ahead and generate them. 535 if (bundleTypes.contains(Bundle.Type.LOCALENAMES)) { 536 Map<String, Object> localeNamesMap = extractLocaleNames(targetMap, bundle.getID()); 537 if (!localeNamesMap.isEmpty() || bundle.isRoot()) { 538 bundleGenerator.generateBundle("util", "LocaleNames", bundle.getJavaID(), true, localeNamesMap, BundleType.OPEN); 539 } 540 } 541 if (bundleTypes.contains(Bundle.Type.CURRENCYNAMES)) { 542 Map<String, Object> currencyNamesMap = extractCurrencyNames(targetMap, bundle.getID(), bundle.getCurrencies()); 543 if (!currencyNamesMap.isEmpty() || bundle.isRoot()) { 544 bundleGenerator.generateBundle("util", "CurrencyNames", bundle.getJavaID(), true, currencyNamesMap, BundleType.OPEN); 545 } 546 } 547 if (bundleTypes.contains(Bundle.Type.TIMEZONENAMES)) { 548 Map<String, Object> zoneNamesMap = extractZoneNames(targetMap, bundle.getID()); 549 if (!zoneNamesMap.isEmpty() || bundle.isRoot()) { 550 bundleGenerator.generateBundle("util", "TimeZoneNames", bundle.getJavaID(), true, zoneNamesMap, BundleType.TIMEZONE); 551 } 552 } 553 if (bundleTypes.contains(Bundle.Type.CALENDARDATA)) { 554 Map<String, Object> calendarDataMap = extractCalendarData(targetMap, bundle.getID()); 555 if (!calendarDataMap.isEmpty() || bundle.isRoot()) { 556 bundleGenerator.generateBundle("util", "CalendarData", bundle.getJavaID(), true, calendarDataMap, BundleType.PLAIN); 557 } 558 } 559 if (bundleTypes.contains(Bundle.Type.FORMATDATA)) { 560 Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID()); 561 if (!formatDataMap.isEmpty() || bundle.isRoot()) { 562 bundleGenerator.generateBundle("text", "FormatData", bundle.getJavaID(), true, formatDataMap, BundleType.PLAIN); 563 } 564 } 565 566 // For AvailableLocales 567 metaInfo.get("AvailableLocales").add(toLanguageTag(bundle.getID())); 568 addLikelySubtags(metaInfo, "AvailableLocales", bundle.getID()); 569 } 570 bundleGenerator.generateMetaInfo(metaInfo); 571 } 572 573 static final Map<String, String> aliases = new HashMap<>(); 574 575 /** 576 * Translate the aliases into the real entries in the bundle map. 577 */ 578 static void handleAliases(Map<String, Object> bundleMap) { 579 Set<String> bundleKeys = bundleMap.keySet(); 580 try { 581 for (String key : aliases.keySet()) { 582 String targetKey = aliases.get(key); 583 if (bundleKeys.contains(targetKey)) { 584 bundleMap.putIfAbsent(key, bundleMap.get(targetKey)); 585 } 586 } 587 } catch (Exception ex) { 588 Logger.getLogger(CLDRConverter.class.getName()).log(Level.SEVERE, null, ex); 589 } 590 } 591 592 /* 593 * Returns the language portion of the given id. 594 * If id is "root", "" is returned. 595 */ 596 static String getLanguageCode(String id) { 597 return "root".equals(id) ? "" : Locale.forLanguageTag(id.replaceAll("_", "-")).getLanguage(); 598 } 599 600 /** 601 * Examine if the id includes the country (territory) code. If it does, it returns 602 * the country code. 603 * Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG". 604 * It does NOT return UN M.49 code, e.g., '001', as those three digit numbers cannot 605 * be translated into package names. 606 */ 607 static String getCountryCode(String id) { 608 String rgn = getRegionCode(id); 609 return rgn.length() == 2 ? rgn: null; 610 } 611 612 /** 613 * Examine if the id includes the region code. If it does, it returns 614 * the region code. 615 * Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG". 616 * It DOES return UN M.49 code, e.g., '001', as well as ISO 3166 two letter country codes. 617 */ 618 static String getRegionCode(String id) { 619 return Locale.forLanguageTag(id.replaceAll("_", "-")).getCountry(); 620 } 621 622 private static class KeyComparator implements Comparator<String> { 623 static KeyComparator INSTANCE = new KeyComparator(); 624 625 private KeyComparator() { 626 } 627 628 @Override 629 public int compare(String o1, String o2) { 630 int len1 = o1.length(); 631 int len2 = o2.length(); 632 if (!isDigit(o1.charAt(0)) && !isDigit(o2.charAt(0))) { 633 // Shorter string comes first unless either starts with a digit. 634 if (len1 < len2) { 635 return -1; 636 } 637 if (len1 > len2) { 638 return 1; 639 } 640 } 641 return o1.compareTo(o2); 642 } 643 644 private boolean isDigit(char c) { 645 return c >= '0' && c <= '9'; 646 } 647 } 648 649 private static Map<String, Object> extractLocaleNames(Map<String, Object> map, String id) { 650 Map<String, Object> localeNames = new TreeMap<>(KeyComparator.INSTANCE); 651 for (String key : map.keySet()) { 652 if (key.startsWith(LOCALE_NAME_PREFIX)) { 653 switch (key) { 654 case LOCALE_SEPARATOR: 655 localeNames.put("ListCompositionPattern", map.get(key)); 656 break; 657 case LOCALE_KEYTYPE: 658 localeNames.put("ListKeyTypePattern", map.get(key)); 659 break; 660 default: 661 localeNames.put(key.substring(LOCALE_NAME_PREFIX.length()), map.get(key)); 662 break; 663 } 664 } 665 } 666 667 if (id.equals("root")) { 668 // Add display name pattern, which is not in CLDR 669 localeNames.put("DisplayNamePattern", "{0,choice,0#|1#{1}|2#{1} ({2})}"); 670 } 671 672 return localeNames; 673 } 674 675 @SuppressWarnings("AssignmentToForLoopParameter") 676 private static Map<String, Object> extractCurrencyNames(Map<String, Object> map, String id, String names) 677 throws Exception { 678 Map<String, Object> currencyNames = new TreeMap<>(KeyComparator.INSTANCE); 679 for (String key : map.keySet()) { 680 if (key.startsWith(CURRENCY_NAME_PREFIX)) { 681 currencyNames.put(key.substring(CURRENCY_NAME_PREFIX.length()), map.get(key)); 682 } else if (key.startsWith(CURRENCY_SYMBOL_PREFIX)) { 683 currencyNames.put(key.substring(CURRENCY_SYMBOL_PREFIX.length()), map.get(key)); 684 } 685 } 686 return currencyNames; 687 } 688 689 private static Map<String, Object> extractZoneNames(Map<String, Object> map, String id) { 690 Map<String, Object> names = new TreeMap<>(KeyComparator.INSTANCE); 691 692 getAvailableZoneIds().stream().forEach(tzid -> { 693 // If the tzid is deprecated, get the data for the replacement id 694 String tzKey = Optional.ofNullable((String)handlerSupplMeta.get(tzid)) 695 .orElse(tzid); 696 Object data = map.get(TIMEZONE_ID_PREFIX + tzKey); 697 698 if (data instanceof String[]) { 699 // Hack for UTC. UTC is an alias to Etc/UTC in CLDR 700 if (tzid.equals("Etc/UTC") && !map.containsKey(TIMEZONE_ID_PREFIX + "UTC")) { 701 names.put(METAZONE_ID_PREFIX + META_ETCUTC_ZONE_NAME, data); 702 names.put(tzid, META_ETCUTC_ZONE_NAME); 703 names.put("UTC", META_ETCUTC_ZONE_NAME); 704 } else { 705 names.put(tzid, data); 706 } 707 } else { 708 String meta = handlerMetaZones.get(tzKey); 709 if (meta != null) { 710 String metaKey = METAZONE_ID_PREFIX + meta; 711 data = map.get(metaKey); 712 if (data instanceof String[]) { 713 // Keep the metazone prefix here. 714 names.put(metaKey, data); 715 names.put(tzid, meta); 716 } 717 } 718 } 719 }); 720 721 // exemplar cities. 722 Map<String, Object> exCities = map.entrySet().stream() 723 .filter(e -> e.getKey().startsWith(CLDRConverter.EXEMPLAR_CITY_PREFIX)) 724 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); 725 names.putAll(exCities); 726 727 // If there's no UTC entry at this point, add an empty one 728 if (!names.isEmpty() && !names.containsKey("UTC")) { 729 names.putIfAbsent(METAZONE_ID_PREFIX + META_EMPTY_ZONE_NAME, EMPTY_ZONE); 730 names.put("UTC", META_EMPTY_ZONE_NAME); 731 } 732 733 // Finally some compatibility stuff 734 ZoneId.SHORT_IDS.entrySet().stream() 735 .filter(e -> !names.containsKey(e.getKey()) && names.containsKey(e.getValue())) 736 .forEach(e -> { 737 names.put(e.getKey(), names.get(e.getValue())); 738 }); 739 740 return names; 741 } 742 743 /** 744 * Extracts the language independent calendar data. Each of the two keys, 745 * "firstDayOfWeek" and "minimalDaysInFirstWeek" has a string value consists of 746 * one or multiple occurrences of: 747 * i: rg1 rg2 ... rgn; 748 * where "i" is the data for the following regions (delimited by a space) after 749 * ":", and ends with a ";". 750 */ 751 private static Map<String, Object> extractCalendarData(Map<String, Object> map, String id) { 752 Map<String, Object> calendarData = new LinkedHashMap<>(); 753 if (id.equals("root")) { 754 calendarData.put("firstDayOfWeek", 755 IntStream.range(1, 8) 756 .mapToObj(String::valueOf) 757 .filter(d -> map.keySet().contains(CALENDAR_FIRSTDAY_PREFIX + d)) 758 .map(d -> d + ": " + map.get(CALENDAR_FIRSTDAY_PREFIX + d)) 759 .collect(Collectors.joining(";"))); 760 calendarData.put("minimalDaysInFirstWeek", 761 IntStream.range(0, 7) 762 .mapToObj(String::valueOf) 763 .filter(d -> map.keySet().contains(CALENDAR_MINDAYS_PREFIX + d)) 764 .map(d -> d + ": " + map.get(CALENDAR_MINDAYS_PREFIX + d)) 765 .collect(Collectors.joining(";"))); 766 } 767 return calendarData; 768 } 769 770 static final String[] FORMAT_DATA_ELEMENTS = { 771 "MonthNames", 772 "standalone.MonthNames", 773 "MonthAbbreviations", 774 "standalone.MonthAbbreviations", 775 "MonthNarrows", 776 "standalone.MonthNarrows", 777 "DayNames", 778 "standalone.DayNames", 779 "DayAbbreviations", 780 "standalone.DayAbbreviations", 781 "DayNarrows", 782 "standalone.DayNarrows", 783 "QuarterNames", 784 "standalone.QuarterNames", 785 "QuarterAbbreviations", 786 "standalone.QuarterAbbreviations", 787 "QuarterNarrows", 788 "standalone.QuarterNarrows", 789 "AmPmMarkers", 790 "narrow.AmPmMarkers", 791 "abbreviated.AmPmMarkers", 792 "long.Eras", 793 "Eras", 794 "narrow.Eras", 795 "field.era", 796 "field.year", 797 "field.month", 798 "field.week", 799 "field.weekday", 800 "field.dayperiod", 801 "field.hour", 802 "timezone.hourFormat", 803 "timezone.gmtFormat", 804 "timezone.gmtZeroFormat", 805 "timezone.regionFormat", 806 "timezone.regionFormat.daylight", 807 "timezone.regionFormat.standard", 808 "field.minute", 809 "field.second", 810 "field.zone", 811 "TimePatterns", 812 "DatePatterns", 813 "DateTimePatterns", 814 "DateTimePatternChars" 815 }; 816 817 private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) { 818 Map<String, Object> formatData = new LinkedHashMap<>(); 819 for (CalendarType calendarType : CalendarType.values()) { 820 if (calendarType == CalendarType.GENERIC) { 821 continue; 822 } 823 String prefix = calendarType.keyElementName(); 824 for (String element : FORMAT_DATA_ELEMENTS) { 825 String key = prefix + element; 826 copyIfPresent(map, "java.time." + key, formatData); 827 copyIfPresent(map, key, formatData); 828 } 829 } 830 831 for (String key : map.keySet()) { 832 // Copy available calendar names 833 if (key.startsWith(CLDRConverter.LOCALE_TYPE_PREFIX_CA)) { 834 String type = key.substring(CLDRConverter.LOCALE_TYPE_PREFIX_CA.length()); 835 for (CalendarType calendarType : CalendarType.values()) { 836 if (calendarType == CalendarType.GENERIC) { 837 continue; 838 } 839 if (type.equals(calendarType.lname())) { 840 Object value = map.get(key); 841 String dataKey = key.replace(LOCALE_TYPE_PREFIX_CA, 842 CALENDAR_NAME_PREFIX); 843 formatData.put(dataKey, value); 844 String ukey = CALENDAR_NAME_PREFIX + calendarType.uname(); 845 if (!dataKey.equals(ukey)) { 846 formatData.put(ukey, value); 847 } 848 } 849 } 850 } 851 } 852 853 copyIfPresent(map, "DefaultNumberingSystem", formatData); 854 855 @SuppressWarnings("unchecked") 856 List<String> numberingScripts = (List<String>) map.remove("numberingScripts"); 857 if (numberingScripts != null) { 858 for (String script : numberingScripts) { 859 copyIfPresent(map, script + ".NumberElements", formatData); 860 copyIfPresent(map, script + ".NumberPatterns", formatData); 861 } 862 } else { 863 copyIfPresent(map, "NumberElements", formatData); 864 copyIfPresent(map, "NumberPatterns", formatData); 865 } 866 copyIfPresent(map, "short.CompactNumberPatterns", formatData); 867 copyIfPresent(map, "long.CompactNumberPatterns", formatData); 868 869 // put extra number elements for available scripts into formatData, if it is "root" 870 if (id.equals("root")) { 871 handlerNumbering.keySet().stream() 872 .filter(k -> !numberingScripts.contains(k)) 873 .forEach(k -> { 874 String[] ne = (String[])map.get("latn.NumberElements"); 875 String[] neNew = Arrays.copyOf(ne, ne.length); 876 neNew[4] = handlerNumbering.get(k).substring(0, 1); 877 formatData.put(k + ".NumberElements", neNew); 878 }); 879 } 880 return formatData; 881 } 882 883 private static void copyIfPresent(Map<String, Object> src, String key, Map<String, Object> dest) { 884 Object value = src.get(key); 885 if (value != null) { 886 dest.put(key, value); 887 } 888 } 889 890 // --- code below here is adapted from java.util.Properties --- 891 private static final String specialSaveCharsJava = "\""; 892 private static final String specialSaveCharsProperties = "=: \t\r\n\f#!"; 893 894 /* 895 * Converts unicodes to encoded \uxxxx 896 * and writes out any of the characters in specialSaveChars 897 * with a preceding slash 898 */ 899 static String saveConvert(String theString, boolean useJava) { 900 if (theString == null) { 901 return ""; 902 } 903 904 String specialSaveChars; 905 if (useJava) { 906 specialSaveChars = specialSaveCharsJava; 907 } else { 908 specialSaveChars = specialSaveCharsProperties; 909 } 910 boolean escapeSpace = false; 911 912 int len = theString.length(); 913 StringBuilder outBuffer = new StringBuilder(len * 2); 914 Formatter formatter = new Formatter(outBuffer, Locale.ROOT); 915 916 for (int x = 0; x < len; x++) { 917 char aChar = theString.charAt(x); 918 switch (aChar) { 919 case ' ': 920 if (x == 0 || escapeSpace) { 921 outBuffer.append('\\'); 922 } 923 outBuffer.append(' '); 924 break; 925 case '\\': 926 outBuffer.append('\\'); 927 outBuffer.append('\\'); 928 break; 929 case '\t': 930 outBuffer.append('\\'); 931 outBuffer.append('t'); 932 break; 933 case '\n': 934 outBuffer.append('\\'); 935 outBuffer.append('n'); 936 break; 937 case '\r': 938 outBuffer.append('\\'); 939 outBuffer.append('r'); 940 break; 941 case '\f': 942 outBuffer.append('\\'); 943 outBuffer.append('f'); 944 break; 945 default: 946 if (aChar < 0x0020 || (!USE_UTF8 && aChar > 0x007e)) { 947 formatter.format("\\u%04x", (int)aChar); 948 } else { 949 if (specialSaveChars.indexOf(aChar) != -1) { 950 outBuffer.append('\\'); 951 } 952 outBuffer.append(aChar); 953 } 954 } 955 } 956 return outBuffer.toString(); 957 } 958 959 private static String toLanguageTag(String locName) { 960 if (locName.indexOf('_') == -1) { 961 return locName; 962 } 963 String tag = locName.replaceAll("_", "-"); 964 Locale loc = Locale.forLanguageTag(tag); 965 return loc.toLanguageTag(); 966 } 967 968 private static void addLikelySubtags(Map<String, SortedSet<String>> metaInfo, String category, String id) { 969 String likelySubtag = handlerLikelySubtags.get(id); 970 if (likelySubtag != null) { 971 // Remove Script for now 972 metaInfo.get(category).add(toLanguageTag(likelySubtag).replaceFirst("-[A-Z][a-z]{3}", "")); 973 } 974 } 975 976 private static String toLocaleName(String tag) { 977 if (tag.indexOf('-') == -1) { 978 return tag; 979 } 980 return tag.replaceAll("-", "_"); 981 } 982 983 private static void setupBaseLocales(String localeList) { 984 Arrays.stream(localeList.split(",")) 985 .map(Locale::forLanguageTag) 986 .map(l -> Control.getControl(Control.FORMAT_DEFAULT) 987 .getCandidateLocales("", l)) 988 .forEach(BASE_LOCALES::addAll); 989 } 990 991 // applying parent locale rules to the passed candidates list 992 // This has to match with the one in sun.util.cldr.CLDRLocaleProviderAdapter 993 private static Map<Locale, Locale> childToParentLocaleMap = null; 994 private static List<Locale> applyParentLocales(String baseName, List<Locale> candidates) { 995 if (Objects.isNull(childToParentLocaleMap)) { 996 childToParentLocaleMap = new HashMap<>(); 997 parentLocalesMap.keySet().forEach(key -> { 998 String parent = key.substring(PARENT_LOCALE_PREFIX.length()).replaceAll("_", "-"); 999 parentLocalesMap.get(key).stream().forEach(child -> { 1000 childToParentLocaleMap.put(Locale.forLanguageTag(child), 1001 "root".equals(parent) ? Locale.ROOT : Locale.forLanguageTag(parent)); 1002 }); 1003 }); 1004 } 1005 1006 // check irregular parents 1007 for (int i = 0; i < candidates.size(); i++) { 1008 Locale l = candidates.get(i); 1009 Locale p = childToParentLocaleMap.get(l); 1010 if (!l.equals(Locale.ROOT) && 1011 Objects.nonNull(p) && 1012 !candidates.get(i+1).equals(p)) { 1013 List<Locale> applied = candidates.subList(0, i+1); 1014 applied.addAll(applyParentLocales(baseName, defCon.getCandidateLocales(baseName, p))); 1015 return applied; 1016 } 1017 } 1018 1019 return candidates; 1020 } 1021 1022 private static void generateZoneName() throws Exception { 1023 Files.createDirectories(Paths.get(DESTINATION_DIR, "java", "time", "format")); 1024 Files.write(Paths.get(DESTINATION_DIR, "java", "time", "format", "ZoneName.java"), 1025 Files.lines(Paths.get(zoneNameTempFile)) 1026 .flatMap(l -> { 1027 if (l.equals("%%%%ZIDMAP%%%%")) { 1028 return zidMapEntry(); 1029 } else if (l.equals("%%%%MZONEMAP%%%%")) { 1030 return handlerMetaZones.mzoneMapEntry(); 1031 } else if (l.equals("%%%%DEPRECATED%%%%")) { 1032 return handlerSupplMeta.deprecatedMap(); 1033 } else if (l.equals("%%%%TZDATALINK%%%%")) { 1034 return tzDataLinkEntry(); 1035 } else { 1036 return Stream.of(l); 1037 } 1038 }) 1039 .collect(Collectors.toList()), 1040 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1041 } 1042 1043 // This method assumes handlerMetaZones is already initialized 1044 private static Set<String> getAvailableZoneIds() { 1045 assert handlerMetaZones != null; 1046 if (AVAILABLE_TZIDS == null) { 1047 AVAILABLE_TZIDS = new HashSet<>(ZoneId.getAvailableZoneIds()); 1048 AVAILABLE_TZIDS.addAll(handlerMetaZones.keySet()); 1049 AVAILABLE_TZIDS.remove(MetaZonesParseHandler.NO_METAZONE_KEY); 1050 } 1051 1052 return AVAILABLE_TZIDS; 1053 } 1054 1055 private static Stream<String> zidMapEntry() { 1056 return getAvailableZoneIds().stream() 1057 .map(id -> { 1058 String canonId = canonicalTZMap.getOrDefault(id, id); 1059 String meta = handlerMetaZones.get(canonId); 1060 String zone001 = handlerMetaZones.zidMap().get(meta); 1061 return zone001 == null ? "" : 1062 String.format(" \"%s\", \"%s\", \"%s\",", 1063 id, meta, zone001); 1064 }) 1065 .filter(s -> !s.isEmpty()) 1066 .sorted(); 1067 } 1068 1069 private static Stream<String> tzDataLinkEntry() { 1070 try { 1071 return Files.walk(Paths.get(tzDataDir), 1) 1072 .filter(p -> !Files.isDirectory(p)) 1073 .flatMap(CLDRConverter::extractLinks) 1074 .sorted(); 1075 } catch (IOException e) { 1076 throw new UncheckedIOException(e); 1077 } 1078 } 1079 1080 private static Stream<String> extractLinks(Path tzFile) { 1081 try { 1082 return Files.lines(tzFile) 1083 .filter(l -> l.startsWith("Link")) 1084 .map(l -> l.replaceFirst("^Link[\\s]+(\\S+)\\s+(\\S+).*", 1085 " \"$2\", \"$1\",")); 1086 } catch (IOException e) { 1087 throw new UncheckedIOException(e); 1088 } 1089 } 1090 1091 // Generate tzmappings for Windows. The format is: 1092 // 1093 // (Windows Zone Name):(REGION):(Java TZID) 1094 // 1095 // where: 1096 // Windows Zone Name: arbitrary time zone name string used in Windows 1097 // REGION: ISO3166 or UN M.49 code 1098 // Java TZID: Java's time zone ID 1099 // 1100 // Note: the entries are alphabetically sorted, *except* the "world" region 1101 // code, i.e., "001". It should be the last entry for the same windows time 1102 // zone name entries. (cf. TimeZone_md.c) 1103 private static void generateWindowsTZMappings() throws Exception { 1104 Files.createDirectories(Paths.get(DESTINATION_DIR, "windows", "conf")); 1105 Files.write(Paths.get(DESTINATION_DIR, "windows", "conf", "tzmappings"), 1106 handlerWinZones.keySet().stream() 1107 .map(k -> k + ":" + handlerWinZones.get(k) + ":") 1108 .sorted(new Comparator<String>() { 1109 public int compare(String t1, String t2) { 1110 String[] s1 = t1.split(":"); 1111 String[] s2 = t2.split(":"); 1112 if (s1[0].equals(s2[0])) { 1113 if (s1[1].equals("001")) { 1114 return 1; 1115 } else if (s2[1].equals("001")) { 1116 return -1; 1117 } else { 1118 return s1[1].compareTo(s2[1]); 1119 } 1120 } else { 1121 return s1[0].compareTo(s2[0]); 1122 } 1123 } 1124 }) 1125 .collect(Collectors.toList()), 1126 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1127 } 1128 1129 /** 1130 * Generate ResourceBundle source file for plural rules. The generated 1131 * class is {@code sun.text.resources.PluralRules} which has one public 1132 * two dimensional array {@code rulesArray}. Each array element consists 1133 * of two elements that designate the locale and the locale's plural rules 1134 * string. The latter has the syntax from Unicode Consortium's 1135 * <a href="http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax"> 1136 * Plural rules syntax</a>. {@code samples} and {@code "other"} are being ommited. 1137 * 1138 * @throws Exception 1139 */ 1140 private static void generatePluralRules() throws Exception { 1141 Files.createDirectories(Paths.get(DESTINATION_DIR, "sun", "text", "resources")); 1142 Files.write(Paths.get(DESTINATION_DIR, "sun", "text", "resources", "PluralRules.java"), 1143 Stream.concat( 1144 Stream.concat( 1145 Stream.of( 1146 "package sun.text.resources;", 1147 "public final class PluralRules {", 1148 " public static final String[][] rulesArray = {" 1149 ), 1150 pluralRulesStream().sorted() 1151 ), 1152 Stream.of( 1153 " };", 1154 "}" 1155 ) 1156 ) 1157 .collect(Collectors.toList()), 1158 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1159 } 1160 1161 @SuppressWarnings("unchecked") 1162 private static Stream<String> pluralRulesStream() { 1163 return handlerPlurals.getData().entrySet().stream() 1164 .filter(e -> !((Map<String, String>)e.getValue()).isEmpty()) 1165 .map(e -> { 1166 String loc = e.getKey(); 1167 Map<String, String> rules = (Map<String, String>)e.getValue(); 1168 return " {\"" + loc + "\", \"" + 1169 rules.entrySet().stream() 1170 .map(rule -> rule.getKey() + ":" + rule.getValue().replaceFirst("@.*", "")) 1171 .map(String::trim) 1172 .collect(Collectors.joining(";")) + "\"},"; 1173 }); 1174 } 1175 1176 // for debug 1177 static void dumpMap(Map<String, Object> map) { 1178 map.entrySet().stream() 1179 .sorted(Map.Entry.comparingByKey()) 1180 .map(e -> { 1181 Object val = e.getValue(); 1182 String valStr = null; 1183 1184 if (val instanceof String[]) { 1185 valStr = Arrays.asList((String[])val).toString(); 1186 } else if (val != null) { 1187 valStr = val.toString(); 1188 } 1189 return e.getKey() + " = " + valStr; 1190 }) 1191 .forEach(System.out::println); 1192 } 1193 } 1194