1 /* 2 * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.cldrconverter; 27 28 import build.tools.cldrconverter.BundleGenerator.BundleType; 29 import java.io.File; 30 import java.io.IOException; 31 import java.io.UncheckedIOException; 32 import java.nio.file.*; 33 import java.text.MessageFormat; 34 import java.time.*; 35 import java.util.*; 36 import java.util.ResourceBundle.Control; 37 import java.util.logging.Level; 38 import java.util.logging.Logger; 39 import java.util.stream.Collectors; 40 import java.util.stream.IntStream; 41 import java.util.stream.Stream; 42 import javax.xml.parsers.SAXParser; 43 import javax.xml.parsers.SAXParserFactory; 44 import org.xml.sax.SAXNotRecognizedException; 45 import org.xml.sax.SAXNotSupportedException; 46 47 48 /** 49 * Converts locale data from "Locale Data Markup Language" format to 50 * JRE resource bundle format. LDML is the format used by the Common 51 * Locale Data Repository maintained by the Unicode Consortium. 52 */ 53 public class CLDRConverter { 54 55 static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd"; 56 static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd"; 57 static final String BCP47_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlBCP47.dtd"; 58 59 60 private static String CLDR_BASE; 61 static String LOCAL_LDML_DTD; 62 static String LOCAL_SPPL_LDML_DTD; 63 static String LOCAL_BCP47_LDML_DTD; 64 private static String SOURCE_FILE_DIR; 65 private static String SPPL_SOURCE_FILE; 66 private static String SPPL_META_SOURCE_FILE; 67 private static String NUMBERING_SOURCE_FILE; 68 private static String METAZONES_SOURCE_FILE; 69 private static String LIKELYSUBTAGS_SOURCE_FILE; 70 private static String TIMEZONE_SOURCE_FILE; 71 private static String WINZONES_SOURCE_FILE; 72 private static String PLURALS_SOURCE_FILE; 73 static String DESTINATION_DIR = "build/gensrc"; 74 75 static final String LOCALE_NAME_PREFIX = "locale.displayname."; 76 static final String LOCALE_SEPARATOR = LOCALE_NAME_PREFIX + "separator"; 77 static final String LOCALE_KEYTYPE = LOCALE_NAME_PREFIX + "keytype"; 78 static final String LOCALE_KEY_PREFIX = LOCALE_NAME_PREFIX + "key."; 79 static final String LOCALE_TYPE_PREFIX = LOCALE_NAME_PREFIX + "type."; 80 static final String LOCALE_TYPE_PREFIX_CA = LOCALE_TYPE_PREFIX + "ca."; 81 static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol."; 82 static final String CURRENCY_NAME_PREFIX = "currency.displayname."; 83 static final String CALENDAR_NAME_PREFIX = "calendarname."; 84 static final String CALENDAR_FIRSTDAY_PREFIX = "firstDay."; 85 static final String CALENDAR_MINDAYS_PREFIX = "minDays."; 86 static final String TIMEZONE_ID_PREFIX = "timezone.id."; 87 static final String EXEMPLAR_CITY_PREFIX = "timezone.excity."; 88 static final String ZONE_NAME_PREFIX = "timezone.displayname."; 89 static final String METAZONE_ID_PREFIX = "metazone.id."; 90 static final String PARENT_LOCALE_PREFIX = "parentLocale."; 91 static final String META_EMPTY_ZONE_NAME = "EMPTY_ZONE"; 92 static final String[] EMPTY_ZONE = {"", "", "", "", "", ""}; 93 static final String META_ETCUTC_ZONE_NAME = "ETC_UTC"; 94 95 // Old 3-letter-id mappings for compatibility. Copied from sun.util.calendar.ZoneInfoFile. 96 private static final String[][] oldMappings = new String[][] { 97 { "ACT", "Australia/Darwin" }, 98 { "AET", "Australia/Sydney" }, 99 { "AGT", "America/Argentina/Buenos_Aires" }, 100 { "ART", "Africa/Cairo" }, 101 { "AST", "America/Anchorage" }, 102 { "BET", "America/Sao_Paulo" }, 103 { "BST", "Asia/Dhaka" }, 104 { "CAT", "Africa/Harare" }, 105 { "CNT", "America/St_Johns" }, 106 { "CST", "America/Chicago" }, 107 { "CTT", "Asia/Shanghai" }, 108 { "EAT", "Africa/Addis_Ababa" }, 109 { "ECT", "Europe/Paris" }, 110 { "IET", "America/Indiana/Indianapolis" }, 111 { "IST", "Asia/Kolkata" }, 112 { "JST", "Asia/Tokyo" }, 113 { "MIT", "Pacific/Apia" }, 114 { "NET", "Asia/Yerevan" }, 115 { "NST", "Pacific/Auckland" }, 116 { "PLT", "Asia/Karachi" }, 117 { "PNT", "America/Phoenix" }, 118 { "PRT", "America/Puerto_Rico" }, 119 { "PST", "America/Los_Angeles" }, 120 { "SST", "Pacific/Guadalcanal" }, 121 { "VST", "Asia/Ho_Chi_Minh" }, 122 }; 123 124 private static SupplementDataParseHandler handlerSuppl; 125 private static LikelySubtagsParseHandler handlerLikelySubtags; 126 private static WinZonesParseHandler handlerWinZones; 127 static PluralsParseHandler handlerPlurals; 128 static SupplementalMetadataParseHandler handlerSupplMeta; 129 static NumberingSystemsParseHandler handlerNumbering; 130 static MetaZonesParseHandler handlerMetaZones; 131 static TimeZoneParseHandler handlerTimeZone; 132 private static BundleGenerator bundleGenerator; 133 134 // java.base module related 135 static boolean isBaseModule = false; 136 static final Set<Locale> BASE_LOCALES = new HashSet<>(); 137 138 // "parentLocales" map 139 private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>(); 140 private static final ResourceBundle.Control defCon = 141 ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT); 142 143 private static Set<String> AVAILABLE_TZIDS; 144 private static String zoneNameTempFile; 145 private static String tzDataDir; 146 private static final Map<String, String> canonicalTZMap = new HashMap<>(); 147 148 static enum DraftType { 149 UNCONFIRMED, 150 PROVISIONAL, 151 CONTRIBUTED, 152 APPROVED; 153 154 private static final Map<String, DraftType> map = new HashMap<>(); 155 static { 156 for (DraftType dt : values()) { 157 map.put(dt.getKeyword(), dt); 158 } 159 } 160 static private DraftType defaultType = CONTRIBUTED; 161 162 private final String keyword; 163 164 private DraftType() { 165 keyword = this.name().toLowerCase(Locale.ROOT); 166 167 } 168 169 static DraftType forKeyword(String keyword) { 170 return map.get(keyword); 171 } 172 173 static DraftType getDefault() { 174 return defaultType; 175 } 176 177 static void setDefault(String keyword) { 178 defaultType = Objects.requireNonNull(forKeyword(keyword)); 179 } 180 181 String getKeyword() { 182 return keyword; 183 } 184 } 185 186 static boolean USE_UTF8 = false; 187 private static boolean verbose; 188 189 private CLDRConverter() { 190 // no instantiation 191 } 192 193 @SuppressWarnings("AssignmentToForLoopParameter") 194 public static void main(String[] args) throws Exception { 195 if (args.length != 0) { 196 String currentArg = null; 197 try { 198 for (int i = 0; i < args.length; i++) { 199 currentArg = args[i]; 200 switch (currentArg) { 201 case "-draft": 202 String draftDataType = args[++i]; 203 try { 204 DraftType.setDefault(draftDataType); 205 } catch (NullPointerException e) { 206 severe("Error: incorrect draft value: %s%n", draftDataType); 207 System.exit(1); 208 } 209 info("Using the specified data type: %s%n", draftDataType); 210 break; 211 212 case "-base": 213 // base directory for input files 214 CLDR_BASE = args[++i]; 215 if (!CLDR_BASE.endsWith("/")) { 216 CLDR_BASE += "/"; 217 } 218 break; 219 220 case "-baselocales": 221 // base locales 222 setupBaseLocales(args[++i]); 223 break; 224 225 case "-basemodule": 226 // indicates java.base module resource generation 227 isBaseModule = true; 228 break; 229 230 case "-o": 231 // output directory 232 DESTINATION_DIR = args[++i]; 233 break; 234 235 case "-utf8": 236 USE_UTF8 = true; 237 break; 238 239 case "-verbose": 240 verbose = true; 241 break; 242 243 case "-zntempfile": 244 zoneNameTempFile = args[++i]; 245 break; 246 247 case "-tzdatadir": 248 tzDataDir = args[++i]; 249 break; 250 251 case "-help": 252 usage(); 253 System.exit(0); 254 break; 255 256 default: 257 throw new RuntimeException(); 258 } 259 } 260 } catch (RuntimeException e) { 261 severe("unknown or imcomplete arg(s): " + currentArg); 262 usage(); 263 System.exit(1); 264 } 265 } 266 267 // Set up path names 268 LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd"; 269 LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd"; 270 LOCAL_BCP47_LDML_DTD = CLDR_BASE + "/dtd/ldmlBCP47.dtd"; 271 SOURCE_FILE_DIR = CLDR_BASE + "/main"; 272 SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml"; 273 LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml"; 274 NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml"; 275 METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml"; 276 TIMEZONE_SOURCE_FILE = CLDR_BASE + "/bcp47/timezone.xml"; 277 SPPL_META_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalMetadata.xml"; 278 WINZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/windowsZones.xml"; 279 PLURALS_SOURCE_FILE = CLDR_BASE + "/supplemental/plurals.xml"; 280 281 if (BASE_LOCALES.isEmpty()) { 282 setupBaseLocales("en-US"); 283 } 284 285 bundleGenerator = new ResourceBundleGenerator(); 286 287 // Parse data independent of locales 288 parseSupplemental(); 289 parseBCP47(); 290 291 List<Bundle> bundles = readBundleList(); 292 convertBundles(bundles); 293 294 if (isBaseModule) { 295 // Generate java.time.format.ZoneName.java 296 generateZoneName(); 297 298 // Generate Windows tzmappings 299 generateWindowsTZMappings(); 300 301 // Generate Plural rules 302 generatePluralRules(); 303 } 304 } 305 306 private static void usage() { 307 errout("Usage: java CLDRConverter [options]%n" 308 + "\t-help output this usage message and exit%n" 309 + "\t-verbose output information%n" 310 + "\t-draft [contributed | approved | provisional | unconfirmed]%n" 311 + "\t\t draft level for using data (default: contributed)%n" 312 + "\t-base dir base directory for CLDR input files%n" 313 + "\t-basemodule generates bundles that go into java.base module%n" 314 + "\t-baselocales loc(,loc)* locales that go into the base module%n" 315 + "\t-o dir output directory (default: ./build/gensrc)%n" 316 + "\t-zntempfile template file for java.time.format.ZoneName.java%n" 317 + "\t-tzdatadir tzdata directory for java.time.format.ZoneName.java%n" 318 + "\t-utf8 use UTF-8 rather than \\uxxxx (for debug)%n"); 319 } 320 321 static void info(String fmt, Object... args) { 322 if (verbose) { 323 System.out.printf(fmt, args); 324 } 325 } 326 327 static void info(String msg) { 328 if (verbose) { 329 System.out.println(msg); 330 } 331 } 332 333 static void warning(String fmt, Object... args) { 334 System.err.print("Warning: "); 335 System.err.printf(fmt, args); 336 } 337 338 static void warning(String msg) { 339 System.err.print("Warning: "); 340 errout(msg); 341 } 342 343 static void severe(String fmt, Object... args) { 344 System.err.print("Error: "); 345 System.err.printf(fmt, args); 346 } 347 348 static void severe(String msg) { 349 System.err.print("Error: "); 350 errout(msg); 351 } 352 353 private static void errout(String msg) { 354 if (msg.contains("%n")) { 355 System.err.printf(msg); 356 } else { 357 System.err.println(msg); 358 } 359 } 360 361 /** 362 * Configure the parser to allow access to DTDs on the file system. 363 */ 364 private static void enableFileAccess(SAXParser parser) throws SAXNotSupportedException { 365 try { 366 parser.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "file"); 367 } catch (SAXNotRecognizedException ignore) { 368 // property requires >= JAXP 1.5 369 } 370 } 371 372 private static List<Bundle> readBundleList() throws Exception { 373 List<Bundle> retList = new ArrayList<>(); 374 Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR); 375 try (DirectoryStream<Path> dirStr = Files.newDirectoryStream(path)) { 376 for (Path entry : dirStr) { 377 String fileName = entry.getFileName().toString(); 378 if (fileName.endsWith(".xml")) { 379 String id = fileName.substring(0, fileName.indexOf('.')); 380 Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id)); 381 StringBuilder sb = getCandLocales(cldrLoc); 382 if (sb.indexOf("root") == -1) { 383 sb.append("root"); 384 } 385 retList.add(new Bundle(id, sb.toString(), null, null)); 386 } 387 } 388 } 389 390 // Sort the bundles based on id. This will make sure all the parent bundles are 391 // processed first, e.g., for en_GB bundle, en_001, and "root" comes before 392 // en_GB. In order for "root" to come at the beginning, "root" is replaced with 393 // empty string on comparison. 394 retList.sort((o1, o2) -> { 395 String id1 = o1.getID(); 396 String id2 = o2.getID(); 397 if(id1.equals("root")) { 398 id1 = ""; 399 } 400 if(id2.equals("root")) { 401 id2 = ""; 402 } 403 return id1.compareTo(id2); 404 }); 405 return retList; 406 } 407 408 private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>(); 409 410 private static Map<String, SortedSet<String>> metaInfo = new HashMap<>(); 411 412 static { 413 // For generating information on supported locales. 414 metaInfo.put("AvailableLocales", new TreeSet<>()); 415 } 416 417 static Map<String, Object> getCLDRBundle(String id) throws Exception { 418 Map<String, Object> bundle = cldrBundles.get(id); 419 if (bundle != null) { 420 return bundle; 421 } 422 File file = new File(SOURCE_FILE_DIR + File.separator + id + ".xml"); 423 if (!file.exists()) { 424 // Skip if the file doesn't exist. 425 return Collections.emptyMap(); 426 } 427 428 info("..... main directory ....."); 429 LDMLParseHandler handler = new LDMLParseHandler(id); 430 parseLDMLFile(file, handler); 431 432 bundle = handler.getData(); 433 cldrBundles.put(id, bundle); 434 435 if (id.equals("root")) { 436 // Calendar data (firstDayOfWeek & minDaysInFirstWeek) 437 bundle = handlerSuppl.getData("root"); 438 if (bundle != null) { 439 //merge two maps into one map 440 Map<String, Object> temp = cldrBundles.remove(id); 441 bundle.putAll(temp); 442 cldrBundles.put(id, bundle); 443 } 444 } 445 return bundle; 446 } 447 448 // Parsers for data in "supplemental" directory 449 // 450 private static void parseSupplemental() throws Exception { 451 // Parse SupplementalData file and store the information in the HashMap 452 // Calendar information such as firstDay and minDay are stored in 453 // supplementalData.xml as of CLDR1.4. Individual territory is listed 454 // with its ISO 3166 country code while default is listed using UNM49 455 // region and composition numerical code (001 for World.) 456 // 457 // SupplementalData file also provides the "parent" locales which 458 // are othrwise not to be fallen back. Process them here as well. 459 // 460 handlerSuppl = new SupplementDataParseHandler(); 461 parseLDMLFile(new File(SPPL_SOURCE_FILE), handlerSuppl); 462 Map<String, Object> parentData = handlerSuppl.getData("root"); 463 parentData.keySet().stream() 464 .filter(key -> key.startsWith(PARENT_LOCALE_PREFIX)) 465 .forEach(key -> { 466 parentLocalesMap.put(key, new TreeSet( 467 Arrays.asList(((String)parentData.get(key)).split(" ")))); 468 }); 469 470 // Parse numberingSystems to get digit zero character information. 471 handlerNumbering = new NumberingSystemsParseHandler(); 472 parseLDMLFile(new File(NUMBERING_SOURCE_FILE), handlerNumbering); 473 474 // Parse metaZones to create mappings between Olson tzids and CLDR meta zone names 475 handlerMetaZones = new MetaZonesParseHandler(); 476 parseLDMLFile(new File(METAZONES_SOURCE_FILE), handlerMetaZones); 477 478 // Parse likelySubtags 479 handlerLikelySubtags = new LikelySubtagsParseHandler(); 480 parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags); 481 482 // Parse supplementalMetadata 483 // Currently interested in deprecated time zone ids and language aliases. 484 handlerSupplMeta = new SupplementalMetadataParseHandler(); 485 parseLDMLFile(new File(SPPL_META_SOURCE_FILE), handlerSupplMeta); 486 487 // Parse windowsZones 488 handlerWinZones = new WinZonesParseHandler(); 489 parseLDMLFile(new File(WINZONES_SOURCE_FILE), handlerWinZones); 490 491 // Parse plurals 492 handlerPlurals = new PluralsParseHandler(); 493 parseLDMLFile(new File(PLURALS_SOURCE_FILE), handlerPlurals); 494 } 495 496 // Parsers for data in "bcp47" directory 497 // 498 private static void parseBCP47() throws Exception { 499 // Parse timezone 500 handlerTimeZone = new TimeZoneParseHandler(); 501 parseLDMLFile(new File(TIMEZONE_SOURCE_FILE), handlerTimeZone); 502 503 // canonical tz name map 504 // alias -> primary 505 handlerTimeZone.getData().forEach((k, v) -> { 506 String[] ids = ((String)v).split("\\s"); 507 for (int i = 1; i < ids.length; i++) { 508 canonicalTZMap.put(ids[i], ids[0]); 509 } 510 }); 511 } 512 513 private static void parseLDMLFile(File srcfile, AbstractLDMLHandler handler) throws Exception { 514 info("..... Parsing " + srcfile.getName() + " ....."); 515 SAXParserFactory pf = SAXParserFactory.newInstance(); 516 pf.setValidating(true); 517 SAXParser parser = pf.newSAXParser(); 518 enableFileAccess(parser); 519 parser.parse(srcfile, handler); 520 } 521 522 private static StringBuilder getCandLocales(Locale cldrLoc) { 523 List<Locale> candList = getCandidateLocales(cldrLoc); 524 StringBuilder sb = new StringBuilder(); 525 for (Locale loc : candList) { 526 if (!loc.equals(Locale.ROOT)) { 527 sb.append(toLocaleName(loc.toLanguageTag())); 528 sb.append(","); 529 } 530 } 531 return sb; 532 } 533 534 private static List<Locale> getCandidateLocales(Locale cldrLoc) { 535 List<Locale> candList = new ArrayList<>(); 536 candList = applyParentLocales("", defCon.getCandidateLocales("", cldrLoc)); 537 return candList; 538 } 539 540 private static void convertBundles(List<Bundle> bundles) throws Exception { 541 // parent locales map. The mappings are put in base metaInfo file 542 // for now. 543 if (isBaseModule) { 544 metaInfo.putAll(parentLocalesMap); 545 } 546 547 for (Bundle bundle : bundles) { 548 // Get the target map, which contains all the data that should be 549 // visible for the bundle's locale 550 551 Map<String, Object> targetMap = bundle.getTargetMap(); 552 553 EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes(); 554 555 if (bundle.isRoot()) { 556 // Add DateTimePatternChars because CLDR no longer supports localized patterns. 557 targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ"); 558 } 559 560 // Now the map contains just the entries that need to be in the resources bundles. 561 // Go ahead and generate them. 562 if (bundleTypes.contains(Bundle.Type.LOCALENAMES)) { 563 Map<String, Object> localeNamesMap = extractLocaleNames(targetMap, bundle.getID()); 564 if (!localeNamesMap.isEmpty() || bundle.isRoot()) { 565 bundleGenerator.generateBundle("util", "LocaleNames", bundle.getJavaID(), true, localeNamesMap, BundleType.OPEN); 566 } 567 } 568 if (bundleTypes.contains(Bundle.Type.CURRENCYNAMES)) { 569 Map<String, Object> currencyNamesMap = extractCurrencyNames(targetMap, bundle.getID(), bundle.getCurrencies()); 570 if (!currencyNamesMap.isEmpty() || bundle.isRoot()) { 571 bundleGenerator.generateBundle("util", "CurrencyNames", bundle.getJavaID(), true, currencyNamesMap, BundleType.OPEN); 572 } 573 } 574 if (bundleTypes.contains(Bundle.Type.TIMEZONENAMES)) { 575 Map<String, Object> zoneNamesMap = extractZoneNames(targetMap, bundle.getID()); 576 if (!zoneNamesMap.isEmpty() || bundle.isRoot()) { 577 bundleGenerator.generateBundle("util", "TimeZoneNames", bundle.getJavaID(), true, zoneNamesMap, BundleType.TIMEZONE); 578 } 579 } 580 if (bundleTypes.contains(Bundle.Type.CALENDARDATA)) { 581 Map<String, Object> calendarDataMap = extractCalendarData(targetMap, bundle.getID()); 582 if (!calendarDataMap.isEmpty() || bundle.isRoot()) { 583 bundleGenerator.generateBundle("util", "CalendarData", bundle.getJavaID(), true, calendarDataMap, BundleType.PLAIN); 584 } 585 } 586 if (bundleTypes.contains(Bundle.Type.FORMATDATA)) { 587 Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID()); 588 if (!formatDataMap.isEmpty() || bundle.isRoot()) { 589 bundleGenerator.generateBundle("text", "FormatData", bundle.getJavaID(), true, formatDataMap, BundleType.PLAIN); 590 } 591 } 592 593 // For AvailableLocales 594 metaInfo.get("AvailableLocales").add(toLanguageTag(bundle.getID())); 595 addLikelySubtags(metaInfo, "AvailableLocales", bundle.getID()); 596 } 597 bundleGenerator.generateMetaInfo(metaInfo); 598 } 599 600 static final Map<String, String> aliases = new HashMap<>(); 601 602 /** 603 * Translate the aliases into the real entries in the bundle map. 604 */ 605 static void handleAliases(Map<String, Object> bundleMap) { 606 Set bundleKeys = bundleMap.keySet(); 607 try { 608 for (String key : aliases.keySet()) { 609 String targetKey = aliases.get(key); 610 if (bundleKeys.contains(targetKey)) { 611 bundleMap.putIfAbsent(key, bundleMap.get(targetKey)); 612 } 613 } 614 } catch (Exception ex) { 615 Logger.getLogger(CLDRConverter.class.getName()).log(Level.SEVERE, null, ex); 616 } 617 } 618 619 /* 620 * Returns the language portion of the given id. 621 * If id is "root", "" is returned. 622 */ 623 static String getLanguageCode(String id) { 624 return "root".equals(id) ? "" : Locale.forLanguageTag(id.replaceAll("_", "-")).getLanguage(); 625 } 626 627 /** 628 * Examine if the id includes the country (territory) code. If it does, it returns 629 * the country code. 630 * Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG". 631 * It does NOT return UN M.49 code, e.g., '001', as those three digit numbers cannot 632 * be translated into package names. 633 */ 634 static String getCountryCode(String id) { 635 String rgn = getRegionCode(id); 636 return rgn.length() == 2 ? rgn: null; 637 } 638 639 /** 640 * Examine if the id includes the region code. If it does, it returns 641 * the region code. 642 * Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG". 643 * It DOES return UN M.49 code, e.g., '001', as well as ISO 3166 two letter country codes. 644 */ 645 static String getRegionCode(String id) { 646 return Locale.forLanguageTag(id.replaceAll("_", "-")).getCountry(); 647 } 648 649 private static class KeyComparator implements Comparator<String> { 650 static KeyComparator INSTANCE = new KeyComparator(); 651 652 private KeyComparator() { 653 } 654 655 @Override 656 public int compare(String o1, String o2) { 657 int len1 = o1.length(); 658 int len2 = o2.length(); 659 if (!isDigit(o1.charAt(0)) && !isDigit(o2.charAt(0))) { 660 // Shorter string comes first unless either starts with a digit. 661 if (len1 < len2) { 662 return -1; 663 } 664 if (len1 > len2) { 665 return 1; 666 } 667 } 668 return o1.compareTo(o2); 669 } 670 671 private boolean isDigit(char c) { 672 return c >= '0' && c <= '9'; 673 } 674 } 675 676 private static Map<String, Object> extractLocaleNames(Map<String, Object> map, String id) { 677 Map<String, Object> localeNames = new TreeMap<>(KeyComparator.INSTANCE); 678 for (String key : map.keySet()) { 679 if (key.startsWith(LOCALE_NAME_PREFIX)) { 680 switch (key) { 681 case LOCALE_SEPARATOR: 682 localeNames.put("ListCompositionPattern", map.get(key)); 683 break; 684 case LOCALE_KEYTYPE: 685 localeNames.put("ListKeyTypePattern", map.get(key)); 686 break; 687 default: 688 localeNames.put(key.substring(LOCALE_NAME_PREFIX.length()), map.get(key)); 689 break; 690 } 691 } 692 } 693 694 if (id.equals("root")) { 695 // Add display name pattern, which is not in CLDR 696 localeNames.put("DisplayNamePattern", "{0,choice,0#|1#{1}|2#{1} ({2})}"); 697 } 698 699 return localeNames; 700 } 701 702 @SuppressWarnings("AssignmentToForLoopParameter") 703 private static Map<String, Object> extractCurrencyNames(Map<String, Object> map, String id, String names) 704 throws Exception { 705 Map<String, Object> currencyNames = new TreeMap<>(KeyComparator.INSTANCE); 706 for (String key : map.keySet()) { 707 if (key.startsWith(CURRENCY_NAME_PREFIX)) { 708 currencyNames.put(key.substring(CURRENCY_NAME_PREFIX.length()), map.get(key)); 709 } else if (key.startsWith(CURRENCY_SYMBOL_PREFIX)) { 710 currencyNames.put(key.substring(CURRENCY_SYMBOL_PREFIX.length()), map.get(key)); 711 } 712 } 713 return currencyNames; 714 } 715 716 private static Map<String, Object> extractZoneNames(Map<String, Object> map, String id) { 717 Map<String, Object> names = new HashMap<>(); 718 719 getAvailableZoneIds().stream().forEach(tzid -> { 720 // If the tzid is deprecated, get the data for the replacement id 721 String tzKey = Optional.ofNullable((String)handlerSupplMeta.get(tzid)) 722 .orElse(tzid); 723 Object data = map.get(TIMEZONE_ID_PREFIX + tzKey); 724 725 if (data instanceof String[]) { 726 // Hack for UTC. UTC is an alias to Etc/UTC in CLDR 727 if (tzid.equals("Etc/UTC") && !map.containsKey(TIMEZONE_ID_PREFIX + "UTC")) { 728 names.put(METAZONE_ID_PREFIX + META_ETCUTC_ZONE_NAME, data); 729 names.put(tzid, META_ETCUTC_ZONE_NAME); 730 names.put("UTC", META_ETCUTC_ZONE_NAME); 731 } else { 732 names.put(tzid, data); 733 } 734 } else { 735 String meta = handlerMetaZones.get(tzKey); 736 if (meta != null) { 737 String metaKey = METAZONE_ID_PREFIX + meta; 738 data = map.get(metaKey); 739 if (data instanceof String[]) { 740 // Keep the metazone prefix here. 741 names.put(metaKey, data); 742 names.put(tzid, meta); 743 } 744 } 745 } 746 }); 747 748 // exemplar cities. 749 Map<String, Object> exCities = map.entrySet().stream() 750 .filter(e -> e.getKey().startsWith(CLDRConverter.EXEMPLAR_CITY_PREFIX)) 751 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); 752 names.putAll(exCities); 753 754 // If there's no UTC entry at this point, add an empty one 755 if (!names.isEmpty() && !names.containsKey("UTC")) { 756 names.putIfAbsent(METAZONE_ID_PREFIX + META_EMPTY_ZONE_NAME, EMPTY_ZONE); 757 names.put("UTC", META_EMPTY_ZONE_NAME); 758 } 759 760 // Finally some compatibility stuff 761 Arrays.stream(oldMappings) 762 .filter(oldmap -> !names.containsKey(oldmap[0]) && names.containsKey(oldmap[1])) 763 .forEach(oldmap -> { 764 names.put(oldmap[0], names.get(oldmap[1])); 765 }); 766 767 return names; 768 } 769 770 /** 771 * Extracts the language independent calendar data. Each of the two keys, 772 * "firstDayOfWeek" and "minimalDaysInFirstWeek" has a string value consists of 773 * one or multiple occurrences of: 774 * i: rg1 rg2 ... rgn; 775 * where "i" is the data for the following regions (delimited by a space) after 776 * ":", and ends with a ";". 777 */ 778 private static Map<String, Object> extractCalendarData(Map<String, Object> map, String id) { 779 Map<String, Object> calendarData = new LinkedHashMap<>(); 780 if (id.equals("root")) { 781 calendarData.put("firstDayOfWeek", 782 IntStream.range(1, 8) 783 .mapToObj(String::valueOf) 784 .filter(d -> map.keySet().contains(CALENDAR_FIRSTDAY_PREFIX + d)) 785 .map(d -> d + ": " + map.get(CALENDAR_FIRSTDAY_PREFIX + d)) 786 .collect(Collectors.joining(";"))); 787 calendarData.put("minimalDaysInFirstWeek", 788 IntStream.range(0, 7) 789 .mapToObj(String::valueOf) 790 .filter(d -> map.keySet().contains(CALENDAR_MINDAYS_PREFIX + d)) 791 .map(d -> d + ": " + map.get(CALENDAR_MINDAYS_PREFIX + d)) 792 .collect(Collectors.joining(";"))); 793 } 794 return calendarData; 795 } 796 797 static final String[] FORMAT_DATA_ELEMENTS = { 798 "MonthNames", 799 "standalone.MonthNames", 800 "MonthAbbreviations", 801 "standalone.MonthAbbreviations", 802 "MonthNarrows", 803 "standalone.MonthNarrows", 804 "DayNames", 805 "standalone.DayNames", 806 "DayAbbreviations", 807 "standalone.DayAbbreviations", 808 "DayNarrows", 809 "standalone.DayNarrows", 810 "QuarterNames", 811 "standalone.QuarterNames", 812 "QuarterAbbreviations", 813 "standalone.QuarterAbbreviations", 814 "QuarterNarrows", 815 "standalone.QuarterNarrows", 816 "AmPmMarkers", 817 "narrow.AmPmMarkers", 818 "abbreviated.AmPmMarkers", 819 "long.Eras", 820 "Eras", 821 "narrow.Eras", 822 "field.era", 823 "field.year", 824 "field.month", 825 "field.week", 826 "field.weekday", 827 "field.dayperiod", 828 "field.hour", 829 "timezone.hourFormat", 830 "timezone.gmtFormat", 831 "timezone.gmtZeroFormat", 832 "timezone.regionFormat", 833 "timezone.regionFormat.daylight", 834 "timezone.regionFormat.standard", 835 "field.minute", 836 "field.second", 837 "field.zone", 838 "TimePatterns", 839 "DatePatterns", 840 "DateTimePatterns", 841 "DateTimePatternChars" 842 }; 843 844 private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) { 845 Map<String, Object> formatData = new LinkedHashMap<>(); 846 for (CalendarType calendarType : CalendarType.values()) { 847 if (calendarType == CalendarType.GENERIC) { 848 continue; 849 } 850 String prefix = calendarType.keyElementName(); 851 for (String element : FORMAT_DATA_ELEMENTS) { 852 String key = prefix + element; 853 copyIfPresent(map, "java.time." + key, formatData); 854 copyIfPresent(map, key, formatData); 855 } 856 } 857 858 for (String key : map.keySet()) { 859 // Copy available calendar names 860 if (key.startsWith(CLDRConverter.LOCALE_TYPE_PREFIX_CA)) { 861 String type = key.substring(CLDRConverter.LOCALE_TYPE_PREFIX_CA.length()); 862 for (CalendarType calendarType : CalendarType.values()) { 863 if (calendarType == CalendarType.GENERIC) { 864 continue; 865 } 866 if (type.equals(calendarType.lname())) { 867 Object value = map.get(key); 868 String dataKey = key.replace(LOCALE_TYPE_PREFIX_CA, 869 CALENDAR_NAME_PREFIX); 870 formatData.put(dataKey, value); 871 String ukey = CALENDAR_NAME_PREFIX + calendarType.uname(); 872 if (!dataKey.equals(ukey)) { 873 formatData.put(ukey, value); 874 } 875 } 876 } 877 } 878 } 879 880 copyIfPresent(map, "DefaultNumberingSystem", formatData); 881 882 @SuppressWarnings("unchecked") 883 List<String> numberingScripts = (List<String>) map.remove("numberingScripts"); 884 if (numberingScripts != null) { 885 for (String script : numberingScripts) { 886 copyIfPresent(map, script + ".NumberElements", formatData); 887 copyIfPresent(map, script + ".NumberPatterns", formatData); 888 } 889 } else { 890 copyIfPresent(map, "NumberElements", formatData); 891 copyIfPresent(map, "NumberPatterns", formatData); 892 } 893 copyIfPresent(map, "short.CompactNumberPatterns", formatData); 894 copyIfPresent(map, "long.CompactNumberPatterns", formatData); 895 896 // put extra number elements for available scripts into formatData, if it is "root" 897 if (id.equals("root")) { 898 handlerNumbering.keySet().stream() 899 .filter(k -> !numberingScripts.contains(k)) 900 .forEach(k -> { 901 String[] ne = (String[])map.get("latn.NumberElements"); 902 String[] neNew = Arrays.copyOf(ne, ne.length); 903 neNew[4] = handlerNumbering.get(k).substring(0, 1); 904 formatData.put(k + ".NumberElements", neNew); 905 }); 906 } 907 return formatData; 908 } 909 910 private static void copyIfPresent(Map<String, Object> src, String key, Map<String, Object> dest) { 911 Object value = src.get(key); 912 if (value != null) { 913 dest.put(key, value); 914 } 915 } 916 917 // --- code below here is adapted from java.util.Properties --- 918 private static final String specialSaveCharsJava = "\""; 919 private static final String specialSaveCharsProperties = "=: \t\r\n\f#!"; 920 921 /* 922 * Converts unicodes to encoded \uxxxx 923 * and writes out any of the characters in specialSaveChars 924 * with a preceding slash 925 */ 926 static String saveConvert(String theString, boolean useJava) { 927 if (theString == null) { 928 return ""; 929 } 930 931 String specialSaveChars; 932 if (useJava) { 933 specialSaveChars = specialSaveCharsJava; 934 } else { 935 specialSaveChars = specialSaveCharsProperties; 936 } 937 boolean escapeSpace = false; 938 939 int len = theString.length(); 940 StringBuilder outBuffer = new StringBuilder(len * 2); 941 Formatter formatter = new Formatter(outBuffer, Locale.ROOT); 942 943 for (int x = 0; x < len; x++) { 944 char aChar = theString.charAt(x); 945 switch (aChar) { 946 case ' ': 947 if (x == 0 || escapeSpace) { 948 outBuffer.append('\\'); 949 } 950 outBuffer.append(' '); 951 break; 952 case '\\': 953 outBuffer.append('\\'); 954 outBuffer.append('\\'); 955 break; 956 case '\t': 957 outBuffer.append('\\'); 958 outBuffer.append('t'); 959 break; 960 case '\n': 961 outBuffer.append('\\'); 962 outBuffer.append('n'); 963 break; 964 case '\r': 965 outBuffer.append('\\'); 966 outBuffer.append('r'); 967 break; 968 case '\f': 969 outBuffer.append('\\'); 970 outBuffer.append('f'); 971 break; 972 default: 973 if (aChar < 0x0020 || (!USE_UTF8 && aChar > 0x007e)) { 974 formatter.format("\\u%04x", (int)aChar); 975 } else { 976 if (specialSaveChars.indexOf(aChar) != -1) { 977 outBuffer.append('\\'); 978 } 979 outBuffer.append(aChar); 980 } 981 } 982 } 983 return outBuffer.toString(); 984 } 985 986 private static String toLanguageTag(String locName) { 987 if (locName.indexOf('_') == -1) { 988 return locName; 989 } 990 String tag = locName.replaceAll("_", "-"); 991 Locale loc = Locale.forLanguageTag(tag); 992 return loc.toLanguageTag(); 993 } 994 995 private static void addLikelySubtags(Map<String, SortedSet<String>> metaInfo, String category, String id) { 996 String likelySubtag = handlerLikelySubtags.get(id); 997 if (likelySubtag != null) { 998 // Remove Script for now 999 metaInfo.get(category).add(toLanguageTag(likelySubtag).replaceFirst("-[A-Z][a-z]{3}", "")); 1000 } 1001 } 1002 1003 private static String toLocaleName(String tag) { 1004 if (tag.indexOf('-') == -1) { 1005 return tag; 1006 } 1007 return tag.replaceAll("-", "_"); 1008 } 1009 1010 private static void setupBaseLocales(String localeList) { 1011 Arrays.stream(localeList.split(",")) 1012 .map(Locale::forLanguageTag) 1013 .map(l -> Control.getControl(Control.FORMAT_DEFAULT) 1014 .getCandidateLocales("", l)) 1015 .forEach(BASE_LOCALES::addAll); 1016 } 1017 1018 // applying parent locale rules to the passed candidates list 1019 // This has to match with the one in sun.util.cldr.CLDRLocaleProviderAdapter 1020 private static Map<Locale, Locale> childToParentLocaleMap = null; 1021 private static List<Locale> applyParentLocales(String baseName, List<Locale> candidates) { 1022 if (Objects.isNull(childToParentLocaleMap)) { 1023 childToParentLocaleMap = new HashMap<>(); 1024 parentLocalesMap.keySet().forEach(key -> { 1025 String parent = key.substring(PARENT_LOCALE_PREFIX.length()).replaceAll("_", "-"); 1026 parentLocalesMap.get(key).stream().forEach(child -> { 1027 childToParentLocaleMap.put(Locale.forLanguageTag(child), 1028 "root".equals(parent) ? Locale.ROOT : Locale.forLanguageTag(parent)); 1029 }); 1030 }); 1031 } 1032 1033 // check irregular parents 1034 for (int i = 0; i < candidates.size(); i++) { 1035 Locale l = candidates.get(i); 1036 Locale p = childToParentLocaleMap.get(l); 1037 if (!l.equals(Locale.ROOT) && 1038 Objects.nonNull(p) && 1039 !candidates.get(i+1).equals(p)) { 1040 List<Locale> applied = candidates.subList(0, i+1); 1041 applied.addAll(applyParentLocales(baseName, defCon.getCandidateLocales(baseName, p))); 1042 return applied; 1043 } 1044 } 1045 1046 return candidates; 1047 } 1048 1049 private static void generateZoneName() throws Exception { 1050 Files.createDirectories(Paths.get(DESTINATION_DIR, "java", "time", "format")); 1051 Files.write(Paths.get(DESTINATION_DIR, "java", "time", "format", "ZoneName.java"), 1052 Files.lines(Paths.get(zoneNameTempFile)) 1053 .flatMap(l -> { 1054 if (l.equals("%%%%ZIDMAP%%%%")) { 1055 return zidMapEntry(); 1056 } else if (l.equals("%%%%MZONEMAP%%%%")) { 1057 return handlerMetaZones.mzoneMapEntry(); 1058 } else if (l.equals("%%%%DEPRECATED%%%%")) { 1059 return handlerSupplMeta.deprecatedMap(); 1060 } else if (l.equals("%%%%TZDATALINK%%%%")) { 1061 return tzDataLinkEntry(); 1062 } else { 1063 return Stream.of(l); 1064 } 1065 }) 1066 .collect(Collectors.toList()), 1067 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1068 } 1069 1070 // This method assumes handlerMetaZones is already initialized 1071 private static Set<String> getAvailableZoneIds() { 1072 assert handlerMetaZones != null; 1073 if (AVAILABLE_TZIDS == null) { 1074 AVAILABLE_TZIDS = new HashSet<>(ZoneId.getAvailableZoneIds()); 1075 AVAILABLE_TZIDS.addAll(handlerMetaZones.keySet()); 1076 AVAILABLE_TZIDS.remove(MetaZonesParseHandler.NO_METAZONE_KEY); 1077 } 1078 1079 return AVAILABLE_TZIDS; 1080 } 1081 1082 private static Stream<String> zidMapEntry() { 1083 return getAvailableZoneIds().stream() 1084 .map(id -> { 1085 String canonId = canonicalTZMap.getOrDefault(id, id); 1086 String meta = handlerMetaZones.get(canonId); 1087 String zone001 = handlerMetaZones.zidMap().get(meta); 1088 return zone001 == null ? "" : 1089 String.format(" \"%s\", \"%s\", \"%s\",", 1090 id, meta, zone001); 1091 }) 1092 .filter(s -> !s.isEmpty()) 1093 .sorted(); 1094 } 1095 1096 private static Stream<String> tzDataLinkEntry() { 1097 try { 1098 return Files.walk(Paths.get(tzDataDir), 1) 1099 .filter(p -> !Files.isDirectory(p)) 1100 .flatMap(CLDRConverter::extractLinks) 1101 .sorted(); 1102 } catch (IOException e) { 1103 throw new UncheckedIOException(e); 1104 } 1105 } 1106 1107 private static Stream<String> extractLinks(Path tzFile) { 1108 try { 1109 return Files.lines(tzFile) 1110 .filter(l -> l.startsWith("Link")) 1111 .map(l -> l.replaceFirst("^Link[\\s]+(\\S+)\\s+(\\S+).*", 1112 " \"$2\", \"$1\",")); 1113 } catch (IOException e) { 1114 throw new UncheckedIOException(e); 1115 } 1116 } 1117 1118 // Generate tzmappings for Windows. The format is: 1119 // 1120 // (Windows Zone Name):(REGION):(Java TZID) 1121 // 1122 // where: 1123 // Windows Zone Name: arbitrary time zone name string used in Windows 1124 // REGION: ISO3166 or UN M.49 code 1125 // Java TZID: Java's time zone ID 1126 // 1127 // Note: the entries are alphabetically sorted, *except* the "world" region 1128 // code, i.e., "001". It should be the last entry for the same windows time 1129 // zone name entries. (cf. TimeZone_md.c) 1130 private static void generateWindowsTZMappings() throws Exception { 1131 Files.createDirectories(Paths.get(DESTINATION_DIR, "windows", "conf")); 1132 Files.write(Paths.get(DESTINATION_DIR, "windows", "conf", "tzmappings"), 1133 handlerWinZones.keySet().stream() 1134 .map(k -> k + ":" + handlerWinZones.get(k) + ":") 1135 .sorted(new Comparator<String>() { 1136 public int compare(String t1, String t2) { 1137 String[] s1 = t1.split(":"); 1138 String[] s2 = t2.split(":"); 1139 if (s1[0].equals(s2[0])) { 1140 if (s1[1].equals("001")) { 1141 return 1; 1142 } else if (s2[1].equals("001")) { 1143 return -1; 1144 } else { 1145 return s1[1].compareTo(s2[1]); 1146 } 1147 } else { 1148 return s1[0].compareTo(s2[0]); 1149 } 1150 } 1151 }) 1152 .collect(Collectors.toList()), 1153 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1154 } 1155 1156 /** 1157 * Generate ResourceBundle source file for plural rules. The generated 1158 * class is {@code sun.text.resources.PluralRules} which has one public 1159 * two dimensional array {@code rulesArray}. Each array element consists 1160 * of two elements that designate the locale and the locale's plural rules 1161 * string. The latter has the syntax from Unicode Consortium's 1162 * <a href="http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax"> 1163 * Plural rules syntax</a>. {@code samples} and {@code "other"} are being ommited. 1164 * 1165 * @throws Exception 1166 */ 1167 private static void generatePluralRules() throws Exception { 1168 Files.createDirectories(Paths.get(DESTINATION_DIR, "sun", "text", "resources")); 1169 Files.write(Paths.get(DESTINATION_DIR, "sun", "text", "resources", "PluralRules.java"), 1170 Stream.concat( 1171 Stream.concat( 1172 Stream.of( 1173 "package sun.text.resources;", 1174 "public final class PluralRules {", 1175 " public static final String[][] rulesArray = {" 1176 ), 1177 pluralRulesStream().sorted() 1178 ), 1179 Stream.of( 1180 " };", 1181 "}" 1182 ) 1183 ) 1184 .collect(Collectors.toList()), 1185 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); 1186 } 1187 1188 private static Stream<String> pluralRulesStream() { 1189 return handlerPlurals.getData().entrySet().stream() 1190 .filter(e -> !((Map<String, String>)e.getValue()).isEmpty()) 1191 .map(e -> { 1192 String loc = e.getKey(); 1193 Map<String, String> rules = (Map<String, String>)e.getValue(); 1194 return " {\"" + loc + "\", \"" + 1195 rules.entrySet().stream() 1196 .map(rule -> rule.getKey() + ":" + rule.getValue().replaceFirst("@.*", "")) 1197 .map(String::trim) 1198 .collect(Collectors.joining(";")) + "\"},"; 1199 }); 1200 } 1201 1202 // for debug 1203 static void dumpMap(Map<String, Object> map) { 1204 map.entrySet().stream() 1205 .sorted(Map.Entry.comparingByKey()) 1206 .map(e -> { 1207 Object val = e.getValue(); 1208 String valStr = null; 1209 1210 if (val instanceof String[]) { 1211 valStr = Arrays.asList((String[])val).toString(); 1212 } else if (val != null) { 1213 valStr = val.toString(); 1214 } 1215 return e.getKey() + " = " + valStr; 1216 }) 1217 .forEach(System.out::println); 1218 } 1219 } 1220