1 /* 2 * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.cldrconverter; 27 28 import build.tools.cldrconverter.BundleGenerator.BundleType; 29 import java.io.File; 30 import java.io.IOException; 31 import java.io.UncheckedIOException; 32 import java.nio.file.*; 33 import java.text.MessageFormat; 34 import java.time.*; 35 import java.util.*; 36 import java.util.ResourceBundle.Control; 37 import java.util.logging.Level; 38 import java.util.logging.Logger; 39 import java.util.stream.Collectors; 40 import java.util.stream.IntStream; 41 import java.util.stream.Stream; 42 import javax.xml.parsers.SAXParser; 43 import javax.xml.parsers.SAXParserFactory; 44 import org.xml.sax.SAXNotRecognizedException; 45 import org.xml.sax.SAXNotSupportedException; 46 47 48 /** 49 * Converts locale data from "Locale Data Markup Language" format to 50 * JRE resource bundle format. LDML is the format used by the Common 51 * Locale Data Repository maintained by the Unicode Consortium. 52 */ 53 public class CLDRConverter { 54 55 static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd"; 56 static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd"; 57 static final String BCP47_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlBCP47.dtd"; 58 59 60 private static String CLDR_BASE; 61 static String LOCAL_LDML_DTD; 62 static String LOCAL_SPPL_LDML_DTD; 63 static String LOCAL_BCP47_LDML_DTD; 64 private static String SOURCE_FILE_DIR; 65 private static String SPPL_SOURCE_FILE; 66 private static String SPPL_META_SOURCE_FILE; 67 private static String NUMBERING_SOURCE_FILE; 68 private static String METAZONES_SOURCE_FILE; 69 private static String LIKELYSUBTAGS_SOURCE_FILE; 70 private static String TIMEZONE_SOURCE_FILE; 71 private static String WINZONES_SOURCE_FILE; 72 private static String PLURALS_SOURCE_FILE; 73 static String DESTINATION_DIR = "build/gensrc"; 74 75 static final String LOCALE_NAME_PREFIX = "locale.displayname."; 76 static final String LOCALE_SEPARATOR = LOCALE_NAME_PREFIX + "separator"; 77 static final String LOCALE_KEYTYPE = LOCALE_NAME_PREFIX + "keytype"; 78 static final String LOCALE_KEY_PREFIX = LOCALE_NAME_PREFIX + "key."; 79 static final String LOCALE_TYPE_PREFIX = LOCALE_NAME_PREFIX + "type."; 80 static final String LOCALE_TYPE_PREFIX_CA = LOCALE_TYPE_PREFIX + "ca."; 81 static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol."; 82 static final String CURRENCY_NAME_PREFIX = "currency.displayname."; 83 static final String CALENDAR_NAME_PREFIX = "calendarname."; 84 static final String CALENDAR_FIRSTDAY_PREFIX = "firstDay."; 85 static final String CALENDAR_MINDAYS_PREFIX = "minDays."; 86 static final String TIMEZONE_ID_PREFIX = "timezone.id."; 87 static final String EXEMPLAR_CITY_PREFIX = "timezone.excity."; 88 static final String ZONE_NAME_PREFIX = "timezone.displayname."; 89 static final String METAZONE_ID_PREFIX = "metazone.id."; 90 static final String PARENT_LOCALE_PREFIX = "parentLocale."; 91 static final String META_EMPTY_ZONE_NAME = "EMPTY_ZONE"; 92 static final String[] EMPTY_ZONE = {"", "", "", "", "", ""}; 93 static final String META_ETCUTC_ZONE_NAME = "ETC_UTC"; 94 95 private static SupplementDataParseHandler handlerSuppl; 96 private static LikelySubtagsParseHandler handlerLikelySubtags; 97 private static WinZonesParseHandler handlerWinZones; 98 static PluralsParseHandler handlerPlurals; 99 static SupplementalMetadataParseHandler handlerSupplMeta; 100 static NumberingSystemsParseHandler handlerNumbering; 101 static MetaZonesParseHandler handlerMetaZones; 102 static TimeZoneParseHandler handlerTimeZone; 103 private static BundleGenerator bundleGenerator; 104 105 // java.base module related 106 static boolean isBaseModule = false; 107 static final Set<Locale> BASE_LOCALES = new HashSet<>(); 108 109 // "parentLocales" map 110 private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>(); 111 private static final ResourceBundle.Control defCon = 112 ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT); 113 114 private static Set<String> AVAILABLE_TZIDS; 115 private static String zoneNameTempFile; 116 private static String tzDataDir; 117 private static final Map<String, String> canonicalTZMap = new HashMap<>(); 118 119 static enum DraftType { 120 UNCONFIRMED, 121 PROVISIONAL, 122 CONTRIBUTED, 123 APPROVED; 124 125 private static final Map<String, DraftType> map = new HashMap<>(); 126 static { 127 for (DraftType dt : values()) { 128 map.put(dt.getKeyword(), dt); 129 } 130 } 131 static private DraftType defaultType = CONTRIBUTED; 132 133 private final String keyword; 134 135 private DraftType() { 136 keyword = this.name().toLowerCase(Locale.ROOT); 137 138 } 139 140 static DraftType forKeyword(String keyword) { 141 return map.get(keyword); 142 } 143 144 static DraftType getDefault() { 145 return defaultType; 146 } 147 148 static void setDefault(String keyword) { 149 defaultType = Objects.requireNonNull(forKeyword(keyword)); 150 } 151 152 String getKeyword() { 153 return keyword; 154 } 155 } 156 157 static boolean USE_UTF8 = false; 158 private static boolean verbose; 159 160 private CLDRConverter() { 161 // no instantiation 162 } 163 164 @SuppressWarnings("AssignmentToForLoopParameter") 165 public static void main(String[] args) throws Exception { 166 if (args.length != 0) { 167 String currentArg = null; 168 try { 169 for (int i = 0; i < args.length; i++) { 170 currentArg = args[i]; 171 switch (currentArg) { 172 case "-draft": 173 String draftDataType = args[++i]; 174 try { 175 DraftType.setDefault(draftDataType); 176 } catch (NullPointerException e) { 177 severe("Error: incorrect draft value: %s%n", draftDataType); 178 System.exit(1); 179 } 180 info("Using the specified data type: %s%n", draftDataType); 181 break; 182 183 case "-base": 184 // base directory for input files 185 CLDR_BASE = args[++i]; 186 if (!CLDR_BASE.endsWith("/")) { 187 CLDR_BASE += "/"; 188 } 189 break; 190 191 case "-baselocales": 192 // base locales 193 setupBaseLocales(args[++i]); 194 break; 195 196 case "-basemodule": 197 // indicates java.base module resource generation 198 isBaseModule = true; 199 break; 200 201 case "-o": 202 // output directory 203 DESTINATION_DIR = args[++i]; 204 break; 205 206 case "-utf8": 207 USE_UTF8 = true; 208 break; 209 210 case "-verbose": 211 verbose = true; 212 break; 213 214 case "-zntempfile": 215 zoneNameTempFile = args[++i]; 216 break; 217 218 case "-tzdatadir": 219 tzDataDir = args[++i]; 220 break; 221 222 case "-help": 223 usage(); 224 System.exit(0); 225 break; 226 227 default: 228 throw new RuntimeException(); 229 } 230 } 231 } catch (RuntimeException e) { 232 severe("unknown or imcomplete arg(s): " + currentArg); 233 usage(); 234 System.exit(1); 235 } 236 } 237 238 // Set up path names 239 LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd"; 240 LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd"; 241 LOCAL_BCP47_LDML_DTD = CLDR_BASE + "/dtd/ldmlBCP47.dtd"; 242 SOURCE_FILE_DIR = CLDR_BASE + "/main"; 243 SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml"; 244 LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml"; 245 NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml"; 246 METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml"; 247 TIMEZONE_SOURCE_FILE = CLDR_BASE + "/bcp47/timezone.xml"; 248 SPPL_META_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalMetadata.xml"; 249 WINZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/windowsZones.xml"; 250 PLURALS_SOURCE_FILE = CLDR_BASE + "/supplemental/plurals.xml"; 251 252 if (BASE_LOCALES.isEmpty()) { 253 setupBaseLocales("en-US"); 254 } 255 256 bundleGenerator = new ResourceBundleGenerator(); 257 258 // Parse data independent of locales 259 parseSupplemental(); 260 parseBCP47(); 261 262 List<Bundle> bundles = readBundleList(); 263 convertBundles(bundles); 264 265 if (isBaseModule) { 266 // Generate java.time.format.ZoneName.java 267 generateZoneName(); 268 269 // Generate Windows tzmappings 270 generateWindowsTZMappings(); 271 272 // Generate Plural rules 273 generatePluralRules(); 274 } 275 } 276 277 private static void usage() { 278 errout("Usage: java CLDRConverter [options]%n" 279 + "\t-help output this usage message and exit%n" 280 + "\t-verbose output information%n" 281 + "\t-draft [contributed | approved | provisional | unconfirmed]%n" 282 + "\t\t draft level for using data (default: contributed)%n" 283 + "\t-base dir base directory for CLDR input files%n" 284 + "\t-basemodule generates bundles that go into java.base module%n" 285 + "\t-baselocales loc(,loc)* locales that go into the base module%n" 286 + "\t-o dir output directory (default: ./build/gensrc)%n" 287 + "\t-zntempfile template file for java.time.format.ZoneName.java%n" 288 + "\t-tzdatadir tzdata directory for java.time.format.ZoneName.java%n" 289 + "\t-utf8 use UTF-8 rather than \\uxxxx (for debug)%n"); 290 } 291 292 static void info(String fmt, Object... args) { 293 if (verbose) { 294 System.out.printf(fmt, args); 295 } 296 } 297 298 static void info(String msg) { 299 if (verbose) { 300 System.out.println(msg); 301 } 302 } 303 304 static void warning(String fmt, Object... args) { 305 System.err.print("Warning: "); 306 System.err.printf(fmt, args); 307 } 308 309 static void warning(String msg) { 310 System.err.print("Warning: "); 311 errout(msg); 312 } 313 314 static void severe(String fmt, Object... args) { 315 System.err.print("Error: "); 316 System.err.printf(fmt, args); 317 } 318 319 static void severe(String msg) { 320 System.err.print("Error: "); 321 errout(msg); 322 } 323 324 private static void errout(String msg) { 325 if (msg.contains("%n")) { 326 System.err.printf(msg); 327 } else { 328 System.err.println(msg); 329 } 330 } 331 332 /** 333 * Configure the parser to allow access to DTDs on the file system. 334 */ 335 private static void enableFileAccess(SAXParser parser) throws SAXNotSupportedException { 336 try { 337 parser.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "file"); 338 } catch (SAXNotRecognizedException ignore) { 339 // property requires >= JAXP 1.5 340 } 341 } 342 343 private static List<Bundle> readBundleList() throws Exception { 344 List<Bundle> retList = new ArrayList<>(); 345 Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR); 346 try (DirectoryStream<Path> dirStr = Files.newDirectoryStream(path)) { 347 for (Path entry : dirStr) { 348 String fileName = entry.getFileName().toString(); 349 if (fileName.endsWith(".xml")) { 350 String id = fileName.substring(0, fileName.indexOf('.')); 351 Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id)); 352 StringBuilder sb = getCandLocales(cldrLoc); 353 if (sb.indexOf("root") == -1) { 354 sb.append("root"); 355 } 356 retList.add(new Bundle(id, sb.toString(), null, null)); 357 } 358 } 359 } 360 361 // Sort the bundles based on id. This will make sure all the parent bundles are 362 // processed first, e.g., for en_GB bundle, en_001, and "root" comes before 363 // en_GB. In order for "root" to come at the beginning, "root" is replaced with 364 // empty string on comparison. 365 retList.sort((o1, o2) -> { 366 String id1 = o1.getID(); 367 String id2 = o2.getID(); 368 if(id1.equals("root")) { 369 id1 = ""; 370 } 371 if(id2.equals("root")) { 372 id2 = ""; 373 } 374 return id1.compareTo(id2); 375 }); 376 return retList; 377 } 378 379 private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>(); 380 381 private static Map<String, SortedSet<String>> metaInfo = new HashMap<>(); 382 383 static { 384 // For generating information on supported locales. 385 metaInfo.put("AvailableLocales", new TreeSet<>()); 386 } 387 388 static Map<String, Object> getCLDRBundle(String id) throws Exception { 389 Map<String, Object> bundle = cldrBundles.get(id); 390 if (bundle != null) { 391 return bundle; 392 } 393 File file = new File(SOURCE_FILE_DIR + File.separator + id + ".xml"); 394 if (!file.exists()) { 395 // Skip if the file doesn't exist. 396 return Collections.emptyMap(); 397 } 398 399 info("..... main directory ....."); 400 LDMLParseHandler handler = new LDMLParseHandler(id); 401 parseLDMLFile(file, handler); 402 403 bundle = handler.getData(); 404 cldrBundles.put(id, bundle); 405 406 if (id.equals("root")) { 407 // Calendar data (firstDayOfWeek & minDaysInFirstWeek) 408 bundle = handlerSuppl.getData("root"); 409 if (bundle != null) { 410 //merge two maps into one map 411 Map<String, Object> temp = cldrBundles.remove(id); 412 bundle.putAll(temp); 413 cldrBundles.put(id, bundle); 414 } 415 } 416 return bundle; 417 } 418 419 // Parsers for data in "supplemental" directory 420 // 421 private static void parseSupplemental() throws Exception { 422 // Parse SupplementalData file and store the information in the HashMap 423 // Calendar information such as firstDay and minDay are stored in 424 // supplementalData.xml as of CLDR1.4. Individual territory is listed 425 // with its ISO 3166 country code while default is listed using UNM49 426 // region and composition numerical code (001 for World.) 427 // 428 // SupplementalData file also provides the "parent" locales which 429 // are othrwise not to be fallen back. Process them here as well. 430 // 431 handlerSuppl = new SupplementDataParseHandler(); 432 parseLDMLFile(new File(SPPL_SOURCE_FILE), handlerSuppl); 433 Map<String, Object> parentData = handlerSuppl.getData("root"); 434 parentData.keySet().stream() 435 .filter(key -> key.startsWith(PARENT_LOCALE_PREFIX)) 436 .forEach(key -> { 437 parentLocalesMap.put(key, new TreeSet( 438 Arrays.asList(((String)parentData.get(key)).split(" ")))); 439 }); 440 441 // Parse numberingSystems to get digit zero character information. 442 handlerNumbering = new NumberingSystemsParseHandler(); 443 parseLDMLFile(new File(NUMBERING_SOURCE_FILE), handlerNumbering); 444 445 // Parse metaZones to create mappings between Olson tzids and CLDR meta zone names 446 handlerMetaZones = new MetaZonesParseHandler(); 447 parseLDMLFile(new File(METAZONES_SOURCE_FILE), handlerMetaZones); 448 449 // Parse likelySubtags 450 handlerLikelySubtags = new LikelySubtagsParseHandler(); 451 parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags); 452 453 // Parse supplementalMetadata 454 // Currently interested in deprecated time zone ids and language aliases. 455 handlerSupplMeta = new SupplementalMetadataParseHandler(); 456 parseLDMLFile(new File(SPPL_META_SOURCE_FILE), handlerSupplMeta); 457 458 // Parse windowsZones 459 handlerWinZones = new WinZonesParseHandler(); 460 parseLDMLFile(new File(WINZONES_SOURCE_FILE), handlerWinZones); 461 462 // Parse plurals 463 handlerPlurals = new PluralsParseHandler(); 464 parseLDMLFile(new File(PLURALS_SOURCE_FILE), handlerPlurals); 465 } 466 467 // Parsers for data in "bcp47" directory 468 // 469 private static void parseBCP47() throws Exception { 470 // Parse timezone 471 handlerTimeZone = new TimeZoneParseHandler(); 472 parseLDMLFile(new File(TIMEZONE_SOURCE_FILE), handlerTimeZone); 473 474 // canonical tz name map 475 // alias -> primary 476 handlerTimeZone.getData().forEach((k, v) -> { 477 String[] ids = ((String)v).split("\\s"); 478 for (int i = 1; i < ids.length; i++) { 479 canonicalTZMap.put(ids[i], ids[0]); 480 } 481 }); 482 } 483 484 private static void parseLDMLFile(File srcfile, AbstractLDMLHandler handler) throws Exception { 485 info("..... Parsing " + srcfile.getName() + " ....."); 486 SAXParserFactory pf = SAXParserFactory.newInstance(); 487 pf.setValidating(true); 488 SAXParser parser = pf.newSAXParser(); 489 enableFileAccess(parser); 490 parser.parse(srcfile, handler); 491 } 492 493 private static StringBuilder getCandLocales(Locale cldrLoc) { 494 List<Locale> candList = getCandidateLocales(cldrLoc); 495 StringBuilder sb = new StringBuilder(); 496 for (Locale loc : candList) { 497 if (!loc.equals(Locale.ROOT)) { 498 sb.append(toLocaleName(loc.toLanguageTag())); 499 sb.append(","); 500 } 501 } 502 return sb; 503 } 504 505 private static List<Locale> getCandidateLocales(Locale cldrLoc) { 506 List<Locale> candList = new ArrayList<>(); 507 candList = applyParentLocales("", defCon.getCandidateLocales("", cldrLoc)); 508 return candList; 509 } 510 511 private static void convertBundles(List<Bundle> bundles) throws Exception { 512 // parent locales map. The mappings are put in base metaInfo file 513 // for now. 514 if (isBaseModule) { 515 metaInfo.putAll(parentLocalesMap); 516 } 517 518 for (Bundle bundle : bundles) { 519 // Get the target map, which contains all the data that should be 520 // visible for the bundle's locale 521 522 Map<String, Object> targetMap = bundle.getTargetMap(); 523 524 EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes(); 525 526 if (bundle.isRoot()) { 527 // Add DateTimePatternChars because CLDR no longer supports localized patterns. 528 targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ"); 529 } 530 531 // Now the map contains just the entries that need to be in the resources bundles. 532 // Go ahead and generate them. 533 if (bundleTypes.contains(Bundle.Type.LOCALENAMES)) { 534 Map<String, Object> localeNamesMap = extractLocaleNames(targetMap, bundle.getID()); 535 if (!localeNamesMap.isEmpty() || bundle.isRoot()) { 536 bundleGenerator.generateBundle("util", "LocaleNames", bundle.getJavaID(), true, localeNamesMap, BundleType.OPEN); 537 } 538 } 539 if (bundleTypes.contains(Bundle.Type.CURRENCYNAMES)) { 540 Map<String, Object> currencyNamesMap = extractCurrencyNames(targetMap, bundle.getID(), bundle.getCurrencies()); 541 if (!currencyNamesMap.isEmpty() || bundle.isRoot()) { 542 bundleGenerator.generateBundle("util", "CurrencyNames", bundle.getJavaID(), true, currencyNamesMap, BundleType.OPEN); 543 } 544 } 545 if (bundleTypes.contains(Bundle.Type.TIMEZONENAMES)) { 546 Map<String, Object> zoneNamesMap = extractZoneNames(targetMap, bundle.getID()); 547 if (!zoneNamesMap.isEmpty() || bundle.isRoot()) { 548 bundleGenerator.generateBundle("util", "TimeZoneNames", bundle.getJavaID(), true, zoneNamesMap, BundleType.TIMEZONE); 549 } 550 } 551 if (bundleTypes.contains(Bundle.Type.CALENDARDATA)) { 552 Map<String, Object> calendarDataMap = extractCalendarData(targetMap, bundle.getID()); 553 if (!calendarDataMap.isEmpty() || bundle.isRoot()) { 554 bundleGenerator.generateBundle("util", "CalendarData", bundle.getJavaID(), true, calendarDataMap, BundleType.PLAIN); 555 } 556 } 557 if (bundleTypes.contains(Bundle.Type.FORMATDATA)) { 558 Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID()); 559 if (!formatDataMap.isEmpty() || bundle.isRoot()) { 560 bundleGenerator.generateBundle("text", "FormatData", bundle.getJavaID(), true, formatDataMap, BundleType.PLAIN); 561 } 562 } 563 564 // For AvailableLocales 565 metaInfo.get("AvailableLocales").add(toLanguageTag(bundle.getID())); 566 addLikelySubtags(metaInfo, "AvailableLocales", bundle.getID()); 567 } 568 bundleGenerator.generateMetaInfo(metaInfo); 569 } 570 571 static final Map<String, String> aliases = new HashMap<>(); 572 573 /** 574 * Translate the aliases into the real entries in the bundle map. 575 */ 576 static void handleAliases(Map<String, Object> bundleMap) { 577 Set bundleKeys = bundleMap.keySet(); 578 try { 579 for (String key : aliases.keySet()) { 580 String targetKey = aliases.get(key); 581 if (bundleKeys.contains(targetKey)) { 582 bundleMap.putIfAbsent(key, bundleMap.get(targetKey)); 583 } 584 } 585 } catch (Exception ex) { 586 Logger.getLogger(CLDRConverter.class.getName()).log(Level.SEVERE, null, ex); 587 } 588 } 589 590 /* 591 * Returns the language portion of the given id. 592 * If id is "root", "" is returned. 593 */ 594 static String getLanguageCode(String id) { 595 return "root".equals(id) ? "" : Locale.forLanguageTag(id.replaceAll("_", "-")).getLanguage(); 596 } 597 598 /** 599 * Examine if the id includes the country (territory) code. If it does, it returns 600 * the country code. 601 * Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG". 602 * It does NOT return UN M.49 code, e.g., '001', as those three digit numbers cannot 603 * be translated into package names. 604 */ 605 static String getCountryCode(String id) { 606 String rgn = getRegionCode(id); 607 return rgn.length() == 2 ? rgn: null; 608 } 609 610 /** 611 * Examine if the id includes the region code. If it does, it returns 612 * the region code. 613 * Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG". 614 * It DOES return UN M.49 code, e.g., '001', as well as ISO 3166 two letter country codes. 615 */ 616 static String getRegionCode(String id) { 617 return Locale.forLanguageTag(id.replaceAll("_", "-")).getCountry(); 618 } 619 620 private static class KeyComparator implements Comparator<String> { 621 static KeyComparator INSTANCE = new KeyComparator(); 622 623 private KeyComparator() { 624 } 625 626 @Override 627 public int compare(String o1, String o2) { 628 int len1 = o1.length(); 629 int len2 = o2.length(); 630 if (!isDigit(o1.charAt(0)) && !isDigit(o2.charAt(0))) { 631 // Shorter string comes first unless either starts with a digit. 632 if (len1 < len2) { 633 return -1; 634 } 635 if (len1 > len2) { 636 return 1; 637 } 638 } 639 return o1.compareTo(o2); 640 } 641 642 private boolean isDigit(char c) { 643 return c >= '0' && c <= '9'; 644 } 645 } 646 647 private static Map<String, Object> extractLocaleNames(Map<String, Object> map, String id) { 648 Map<String, Object> localeNames = new TreeMap<>(KeyComparator.INSTANCE); 649 for (String key : map.keySet()) { 650 if (key.startsWith(LOCALE_NAME_PREFIX)) { 651 switch (key) { 652 case LOCALE_SEPARATOR: 653 localeNames.put("ListCompositionPattern", map.get(key)); 654 break; 655 case LOCALE_KEYTYPE: 656 localeNames.put("ListKeyTypePattern", map.get(key)); 657 break; 658 default: 659 localeNames.put(key.substring(LOCALE_NAME_PREFIX.length()), map.get(key)); 660 break; 661 } 662 } 663 } 664 665 if (id.equals("root")) { 666 // Add display name pattern, which is not in CLDR 667 localeNames.put("DisplayNamePattern", "{0,choice,0#|1#{1}|2#{1} ({2})}"); 668 } 669 670 return localeNames; 671 } 672 673 @SuppressWarnings("AssignmentToForLoopParameter") 674 private static Map<String, Object> extractCurrencyNames(Map<String, Object> map, String id, String names) 675 throws Exception { 676 Map<String, Object> currencyNames = new TreeMap<>(KeyComparator.INSTANCE); 677 for (String key : map.keySet()) { 678 if (key.startsWith(CURRENCY_NAME_PREFIX)) { 679 currencyNames.put(key.substring(CURRENCY_NAME_PREFIX.length()), map.get(key)); 680 } else if (key.startsWith(CURRENCY_SYMBOL_PREFIX)) { 681 currencyNames.put(key.substring(CURRENCY_SYMBOL_PREFIX.length()), map.get(key)); 682 } 683 } 684 return currencyNames; 685 } 686 687 private static Map<String, Object> extractZoneNames(Map<String, Object> map, String id) { 688 Map<String, Object> names = new TreeMap<>(KeyComparator.INSTANCE); 689 690 getAvailableZoneIds().stream().forEach(tzid -> { 691 // If the tzid is deprecated, get the data for the replacement id 692 String tzKey = Optional.ofNullable((String)handlerSupplMeta.get(tzid)) 693 .orElse(tzid); 694 Object data = map.get(TIMEZONE_ID_PREFIX + tzKey); 695 696 if (data instanceof String[]) { 697 // Hack for UTC. UTC is an alias to Etc/UTC in CLDR 698 if (tzid.equals("Etc/UTC") && !map.containsKey(TIMEZONE_ID_PREFIX + "UTC")) { 699 names.put(METAZONE_ID_PREFIX + META_ETCUTC_ZONE_NAME, data); 700 names.put(tzid, META_ETCUTC_ZONE_NAME); 701 names.put("UTC", META_ETCUTC_ZONE_NAME); 702 } else { 703 names.put(tzid, data); 704 } 705 } else { 706 String meta = handlerMetaZones.get(tzKey); 707 if (meta != null) { 708 String metaKey = METAZONE_ID_PREFIX + meta; 709 data = map.get(metaKey); 710 if (data instanceof String[]) { 711 // Keep the metazone prefix here. 712 names.put(metaKey, data); 713 names.put(tzid, meta); 714 } 715 } 716 } 717 }); 718 719 // exemplar cities. 720 Map<String, Object> exCities = map.entrySet().stream() 721 .filter(e -> e.getKey().startsWith(CLDRConverter.EXEMPLAR_CITY_PREFIX)) 722 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); 723 names.putAll(exCities); 724 725 // If there's no UTC entry at this point, add an empty one 726 if (!names.isEmpty() && !names.containsKey("UTC")) { 727 names.putIfAbsent(METAZONE_ID_PREFIX + META_EMPTY_ZONE_NAME, EMPTY_ZONE); 728 names.put("UTC", META_EMPTY_ZONE_NAME); 729 } 730 731 // Finally some compatibility stuff 732 ZoneId.SHORT_IDS.entrySet().stream() 733 .filter(e -> !names.containsKey(e.getKey()) && names.containsKey(e.getValue())) 734 .forEach(e -> { 735 names.put(e.getKey(), names.get(e.getValue())); 736 }); 737 738 return names; 739 } 740 741 /** 742 * Extracts the language independent calendar data. Each of the two keys, 743 * "firstDayOfWeek" and "minimalDaysInFirstWeek" has a string value consists of 744 * one or multiple occurrences of: 745 * i: rg1 rg2 ... rgn; 746 * where "i" is the data for the following regions (delimited by a space) after 747 * ":", and ends with a ";". 748 */ 749 private static Map<String, Object> extractCalendarData(Map<String, Object> map, String id) { 750 Map<String, Object> calendarData = new LinkedHashMap<>(); 751 if (id.equals("root")) { 752 calendarData.put("firstDayOfWeek", 753 IntStream.range(1, 8) 754 .mapToObj(String::valueOf) 755 .filter(d -> map.keySet().contains(CALENDAR_FIRSTDAY_PREFIX + d)) 756 .map(d -> d + ": " + map.get(CALENDAR_FIRSTDAY_PREFIX + d)) 757 .collect(Collectors.joining(";"))); 758 calendarData.put("minimalDaysInFirstWeek", 759 IntStream.range(0, 7) 760 .mapToObj(String::valueOf) 761 .filter(d -> map.keySet().contains(CALENDAR_MINDAYS_PREFIX + d)) 762 .map(d -> d + ": " + map.get(CALENDAR_MINDAYS_PREFIX + d)) 763 .collect(Collectors.joining(";"))); 764 } 765 return calendarData; 766 } 767 768 static final String[] FORMAT_DATA_ELEMENTS = { 769 "MonthNames", 770 "standalone.MonthNames", 771 "MonthAbbreviations", 772 "standalone.MonthAbbreviations", 773 "MonthNarrows", 774 "standalone.MonthNarrows", 775 "DayNames", 776 "standalone.DayNames", 777 "DayAbbreviations", 778 "standalone.DayAbbreviations", 779 "DayNarrows", 780 "standalone.DayNarrows", 781 "QuarterNames", 782 "standalone.QuarterNames", 783 "QuarterAbbreviations", 784 "standalone.QuarterAbbreviations", 785 "QuarterNarrows", 786 "standalone.QuarterNarrows", 787 "AmPmMarkers", 788 "narrow.AmPmMarkers", 789 "abbreviated.AmPmMarkers", 790 "long.Eras", 791 "Eras", 792 "narrow.Eras", 793 "field.era", 794 "field.year", 795 "field.month", 796 "field.week", 797 "field.weekday", 798 "field.dayperiod", 799 "field.hour", 800 "timezone.hourFormat", 801 "timezone.gmtFormat", 802 "timezone.gmtZeroFormat", 803 "timezone.regionFormat", 804 "timezone.regionFormat.daylight", 805 "timezone.regionFormat.standard", 806 "field.minute", 807 "field.second", 808 "field.zone", 809 "TimePatterns", 810 "DatePatterns", 811 "DateTimePatterns", 812 "DateTimePatternChars" 813 }; 814 815 private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) { 816 Map<String, Object> formatData = new LinkedHashMap<>(); 817 for (CalendarType calendarType : CalendarType.values()) { 818 if (calendarType == CalendarType.GENERIC) { 819 continue; 820 } 821 String prefix = calendarType.keyElementName(); 822 for (String element : FORMAT_DATA_ELEMENTS) { 823 String key = prefix + element; 824 copyIfPresent(map, "java.time." + key, formatData); 825 copyIfPresent(map, key, formatData); 826 } 827 } 828 829 for (String key : map.keySet()) { 830 // Copy available calendar names 831 if (key.startsWith(CLDRConverter.LOCALE_TYPE_PREFIX_CA)) { 832 String type = key.substring(CLDRConverter.LOCALE_TYPE_PREFIX_CA.length()); 833 for (CalendarType calendarType : CalendarType.values()) { 834 if (calendarType == CalendarType.GENERIC) { 835 continue; 836 } 837 if (type.equals(calendarType.lname())) { 838 Object value = map.get(key); 839 String dataKey = key.replace(LOCALE_TYPE_PREFIX_CA, 840 CALENDAR_NAME_PREFIX); 841 formatData.put(dataKey, value); 842 String ukey = CALENDAR_NAME_PREFIX + calendarType.uname(); 843 if (!dataKey.equals(ukey)) { 844 formatData.put(ukey, value); 845 } 846 } 847 } 848 } 849 } 850 851 copyIfPresent(map, "DefaultNumberingSystem", formatData); 852 853 @SuppressWarnings("unchecked") 854 List<String> numberingScripts = (List<String>) map.remove("numberingScripts"); 855 if (numberingScripts != null) { 856 for (String script : numberingScripts) { 857 copyIfPresent(map, script + ".NumberElements", formatData); 858 copyIfPresent(map, script + ".NumberPatterns", formatData); 859 } 860 } else { 861 copyIfPresent(map, "NumberElements", formatData); 862 copyIfPresent(map, "NumberPatterns", formatData); 863 } 864 copyIfPresent(map, "short.CompactNumberPatterns", formatData); 865 copyIfPresent(map, "long.CompactNumberPatterns", formatData); 866 867 // put extra number elements for available scripts into formatData, if it is "root" 868 if (id.equals("root")) { 869 handlerNumbering.keySet().stream() 870 .filter(k -> !numberingScripts.contains(k)) 871 .forEach(k -> { 872 String[] ne = (String[])map.get("latn.NumberElements"); 873 String[] neNew = Arrays.copyOf(ne, ne.length); 874 neNew[4] = handlerNumbering.get(k).substring(0, 1); 875 formatData.put(k + ".NumberElements", neNew); 876 }); 877 } 878 return formatData; 879 } 880 881 private static void copyIfPresent(Map<String, Object> src, String key, Map<String, Object> dest) { 882 Object value = src.get(key); 883 if (value != null) { 884 dest.put(key, value); 885 } 886 } 887 888 // --- code below here is adapted from java.util.Properties --- 889 private static final String specialSaveCharsJava = "\""; 890 private static final String specialSaveCharsProperties = "=: \t\r\n\f#!"; 891 892 /* 893 * Converts unicodes to encoded \uxxxx 894 * and writes out any of the characters in specialSaveChars 895 * with a preceding slash 896 */ 897 static String saveConvert(String theString, boolean useJava) { 898 if (theString == null) { 899 return ""; 900 } 901 902 String specialSaveChars; 903 if (useJava) { 904 specialSaveChars = specialSaveCharsJava; 905 } else { 906 specialSaveChars = specialSaveCharsProperties; 907 } 908 boolean escapeSpace = false; 909 910 int len = theString.length(); 911 StringBuilder outBuffer = new StringBuilder(len * 2); 912 Formatter formatter = new Formatter(outBuffer, Locale.ROOT); 913 914 for (int x = 0; x < len; x++) { 915 char aChar = theString.charAt(x); 916 switch (aChar) { 917 case ' ': 918 if (x == 0 || escapeSpace) { 919 outBuffer.append('\\'); 920 } 921 outBuffer.append(' '); 922 break; 923 case '\\': 924 outBuffer.append('\\'); 925 outBuffer.append('\\'); 926 break; 927 case '\t': 928 outBuffer.append('\\'); 929 outBuffer.append('t'); 930 break; 931 case '\n': 932 outBuffer.append('\\'); 933 outBuffer.append('n'); 934 break; 935 case '\r': 936 outBuffer.append('\\'); 937 outBuffer.append('r'); 938 break; 939 case '\f': 940 outBuffer.append('\\'); 941 outBuffer.append('f'); 942 break; 943 default: 944 if (aChar < 0x0020 || (!USE_UTF8 && aChar > 0x007e)) { 945 formatter.format("\\u%04x", (int)aChar); 946 } else { 947 if (specialSaveChars.indexOf(aChar) != -1) { 948 outBuffer.append('\\'); 949 } 950 outBuffer.append(aChar); 951 } 952 } 953 } 954 return outBuffer.toString(); 955 } 956 957 private static String toLanguageTag(String locName) { 958 if (locName.indexOf('_') == -1) { 959 return locName; 960 } 961 String tag = locName.replaceAll("_", "-"); 962 Locale loc = Locale.forLanguageTag(tag); 963 return loc.toLanguageTag(); 964 } 965 966 private static void addLikelySubtags(Map<String, SortedSet<String>> metaInfo, String category, String id) { 967 String likelySubtag = handlerLikelySubtags.get(id); 968 if (likelySubtag != null) { 969 // Remove Script for now 970 metaInfo.get(category).add(toLanguageTag(likelySubtag).replaceFirst("-[A-Z][a-z]{3}", "")); 971 } 972 } 973 974 private static String toLocaleName(String tag) { 975 if (tag.indexOf('-') == -1) { 976 return tag; 977 } 978 return tag.replaceAll("-", "_"); 979 } 980 981 private static void setupBaseLocales(String localeList) { 982 Arrays.stream(localeList.split(",")) 983 .map(Locale::forLanguageTag) 984 .map(l -> Control.getControl(Control.FORMAT_DEFAULT) 985 .getCandidateLocales("", l)) 986 .forEach(BASE_LOCALES::addAll); 987 } 988 989 // applying parent locale rules to the passed candidates list 990 // This has to match with the one in sun.util.cldr.CLDRLocaleProviderAdapter 991 private static Map<Locale, Locale> childToParentLocaleMap = null; 992 private static List<Locale> applyParentLocales(String baseName, List<Locale> candidates) { 993 if (Objects.isNull(childToParentLocaleMap)) { 994 childToParentLocaleMap = new HashMap<>(); 995 parentLocalesMap.keySet().forEach(key -> { 996 String parent = key.substring(PARENT_LOCALE_PREFIX.length()).replaceAll("_", "-"); 997 parentLocalesMap.get(key).stream().forEach(child -> { 998 childToParentLocaleMap.put(Locale.forLanguageTag(child), 999 "root".equals(parent) ? Locale.ROOT : Locale.forLanguageTag(parent)); 1000 }); 1001 }); 1002 } 1003 1004 // check irregular parents 1005 for (int i = 0; i < candidates.size(); i++) { 1006 Locale l = candidates.get(i); 1007 Locale p = childToParentLocaleMap.get(l); 1008 if (!l.equals(Locale.ROOT) && 1009 Objects.nonNull(p) && 1010 !candidates.get(i+1).equals(p)) { 1011 List<Locale> applied = candidates.subList(0, i+1); 1012 applied.addAll(applyParentLocales(baseName, defCon.getCandidateLocales(baseName, p))); 1013 return applied; 1014 } 1015 } 1016 1017 return candidates; 1018 } 1019 1020 private static void generateZoneName() throws Exception { 1021 Files.createDirectories(Paths.get(DESTINATION_DIR, "java", "time", "format")); 1022 Files.write(Paths.get(DESTINATION_DIR, "java", "time", "format", "ZoneName.java"), 1023 Files.lines(Paths.get(zoneNameTempFile)) 1024 .flatMap(l -> { 1025 if (l.equals("%%%%ZIDMAP%%%%")) { 1026 return zidMapEntry(); 1027 } else if (l.equals("%%%%MZONEMAP%%%%")) { 1028 return handlerMetaZones.mzoneMapEntry(); 1029 } else if (l.equals("%%%%DEPRECATED%%%%")) { 1030 return handlerSupplMeta.deprecatedMap(); 1031 } else if (l.equals("%%%%TZDATALINK%%%%")) { 1032 return tzDataLinkEntry(); 1033 } else { 1034 return Stream.of(l); 1035 } 1036 }) 1037 .collect(Collectors.toList()), 1038 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1039 } 1040 1041 // This method assumes handlerMetaZones is already initialized 1042 private static Set<String> getAvailableZoneIds() { 1043 assert handlerMetaZones != null; 1044 if (AVAILABLE_TZIDS == null) { 1045 AVAILABLE_TZIDS = new HashSet<>(ZoneId.getAvailableZoneIds()); 1046 AVAILABLE_TZIDS.addAll(handlerMetaZones.keySet()); 1047 AVAILABLE_TZIDS.remove(MetaZonesParseHandler.NO_METAZONE_KEY); 1048 } 1049 1050 return AVAILABLE_TZIDS; 1051 } 1052 1053 private static Stream<String> zidMapEntry() { 1054 return getAvailableZoneIds().stream() 1055 .map(id -> { 1056 String canonId = canonicalTZMap.getOrDefault(id, id); 1057 String meta = handlerMetaZones.get(canonId); 1058 String zone001 = handlerMetaZones.zidMap().get(meta); 1059 return zone001 == null ? "" : 1060 String.format(" \"%s\", \"%s\", \"%s\",", 1061 id, meta, zone001); 1062 }) 1063 .filter(s -> !s.isEmpty()) 1064 .sorted(); 1065 } 1066 1067 private static Stream<String> tzDataLinkEntry() { 1068 try { 1069 return Files.walk(Paths.get(tzDataDir), 1) 1070 .filter(p -> !Files.isDirectory(p)) 1071 .flatMap(CLDRConverter::extractLinks) 1072 .sorted(); 1073 } catch (IOException e) { 1074 throw new UncheckedIOException(e); 1075 } 1076 } 1077 1078 private static Stream<String> extractLinks(Path tzFile) { 1079 try { 1080 return Files.lines(tzFile) 1081 .filter(l -> l.startsWith("Link")) 1082 .map(l -> l.replaceFirst("^Link[\\s]+(\\S+)\\s+(\\S+).*", 1083 " \"$2\", \"$1\",")); 1084 } catch (IOException e) { 1085 throw new UncheckedIOException(e); 1086 } 1087 } 1088 1089 // Generate tzmappings for Windows. The format is: 1090 // 1091 // (Windows Zone Name):(REGION):(Java TZID) 1092 // 1093 // where: 1094 // Windows Zone Name: arbitrary time zone name string used in Windows 1095 // REGION: ISO3166 or UN M.49 code 1096 // Java TZID: Java's time zone ID 1097 // 1098 // Note: the entries are alphabetically sorted, *except* the "world" region 1099 // code, i.e., "001". It should be the last entry for the same windows time 1100 // zone name entries. (cf. TimeZone_md.c) 1101 private static void generateWindowsTZMappings() throws Exception { 1102 Files.createDirectories(Paths.get(DESTINATION_DIR, "windows", "conf")); 1103 Files.write(Paths.get(DESTINATION_DIR, "windows", "conf", "tzmappings"), 1104 handlerWinZones.keySet().stream() 1105 .map(k -> k + ":" + handlerWinZones.get(k) + ":") 1106 .sorted(new Comparator<String>() { 1107 public int compare(String t1, String t2) { 1108 String[] s1 = t1.split(":"); 1109 String[] s2 = t2.split(":"); 1110 if (s1[0].equals(s2[0])) { 1111 if (s1[1].equals("001")) { 1112 return 1; 1113 } else if (s2[1].equals("001")) { 1114 return -1; 1115 } else { 1116 return s1[1].compareTo(s2[1]); 1117 } 1118 } else { 1119 return s1[0].compareTo(s2[0]); 1120 } 1121 } 1122 }) 1123 .collect(Collectors.toList()), 1124 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1125 } 1126 1127 /** 1128 * Generate ResourceBundle source file for plural rules. The generated 1129 * class is {@code sun.text.resources.PluralRules} which has one public 1130 * two dimensional array {@code rulesArray}. Each array element consists 1131 * of two elements that designate the locale and the locale's plural rules 1132 * string. The latter has the syntax from Unicode Consortium's 1133 * <a href="http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax"> 1134 * Plural rules syntax</a>. {@code samples} and {@code "other"} are being ommited. 1135 * 1136 * @throws Exception 1137 */ 1138 private static void generatePluralRules() throws Exception { 1139 Files.createDirectories(Paths.get(DESTINATION_DIR, "sun", "text", "resources")); 1140 Files.write(Paths.get(DESTINATION_DIR, "sun", "text", "resources", "PluralRules.java"), 1141 Stream.concat( 1142 Stream.concat( 1143 Stream.of( 1144 "package sun.text.resources;", 1145 "public final class PluralRules {", 1146 " public static final String[][] rulesArray = {" 1147 ), 1148 pluralRulesStream().sorted() 1149 ), 1150 Stream.of( 1151 " };", 1152 "}" 1153 ) 1154 ) 1155 .collect(Collectors.toList()), 1156 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1157 } 1158 1159 private static Stream<String> pluralRulesStream() { 1160 return handlerPlurals.getData().entrySet().stream() 1161 .filter(e -> !((Map<String, String>)e.getValue()).isEmpty()) 1162 .map(e -> { 1163 String loc = e.getKey(); 1164 Map<String, String> rules = (Map<String, String>)e.getValue(); 1165 return " {\"" + loc + "\", \"" + 1166 rules.entrySet().stream() 1167 .map(rule -> rule.getKey() + ":" + rule.getValue().replaceFirst("@.*", "")) 1168 .map(String::trim) 1169 .collect(Collectors.joining(";")) + "\"},"; 1170 }); 1171 } 1172 1173 // for debug 1174 static void dumpMap(Map<String, Object> map) { 1175 map.entrySet().stream() 1176 .sorted(Map.Entry.comparingByKey()) 1177 .map(e -> { 1178 Object val = e.getValue(); 1179 String valStr = null; 1180 1181 if (val instanceof String[]) { 1182 valStr = Arrays.asList((String[])val).toString(); 1183 } else if (val != null) { 1184 valStr = val.toString(); 1185 } 1186 return e.getKey() + " = " + valStr; 1187 }) 1188 .forEach(System.out::println); 1189 } 1190 } 1191