< prev index next >

make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java

Print this page
rev 47480 : [mq]: 8176841


  35 import java.util.*;
  36 import java.util.ResourceBundle.Control;
  37 import java.util.logging.Level;
  38 import java.util.logging.Logger;
  39 import java.util.stream.Collectors;
  40 import javax.xml.parsers.SAXParser;
  41 import javax.xml.parsers.SAXParserFactory;
  42 import org.xml.sax.SAXNotRecognizedException;
  43 import org.xml.sax.SAXNotSupportedException;
  44 
  45 
  46 /**
  47  * Converts locale data from "Locale Data Markup Language" format to
  48  * JRE resource bundle format. LDML is the format used by the Common
  49  * Locale Data Repository maintained by the Unicode Consortium.
  50  */
  51 public class CLDRConverter {
  52 
  53     static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd";
  54     static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd";


  55 
  56     private static String CLDR_BASE = "../CLDR/21.0.1/";
  57     static String LOCAL_LDML_DTD;
  58     static String LOCAL_SPPL_LDML_DTD;

  59     private static String SOURCE_FILE_DIR;
  60     private static String SPPL_SOURCE_FILE;
  61     private static String NUMBERING_SOURCE_FILE;
  62     private static String METAZONES_SOURCE_FILE;
  63     private static String LIKELYSUBTAGS_SOURCE_FILE;

  64     static String DESTINATION_DIR = "build/gensrc";
  65 
  66     static final String LOCALE_NAME_PREFIX = "locale.displayname.";





  67     static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol.";
  68     static final String CURRENCY_NAME_PREFIX = "currency.displayname.";
  69     static final String CALENDAR_NAME_PREFIX = "calendarname.";
  70     static final String TIMEZONE_ID_PREFIX = "timezone.id.";
  71     static final String ZONE_NAME_PREFIX = "timezone.displayname.";
  72     static final String METAZONE_ID_PREFIX = "metazone.id.";
  73     static final String PARENT_LOCALE_PREFIX = "parentLocale.";
  74 
  75     private static SupplementDataParseHandler handlerSuppl;
  76     private static LikelySubtagsParseHandler handlerLikelySubtags;
  77     static NumberingSystemsParseHandler handlerNumbering;
  78     static MetaZonesParseHandler handlerMetaZones;

  79     private static BundleGenerator bundleGenerator;
  80 
  81     // java.base module related
  82     static boolean isBaseModule = false;
  83     static final Set<Locale> BASE_LOCALES = new HashSet<>();
  84 
  85     // "parentLocales" map
  86     private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>();
  87     private static final ResourceBundle.Control defCon =
  88         ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
  89 
  90     static enum DraftType {
  91         UNCONFIRMED,
  92         PROVISIONAL,
  93         CONTRIBUTED,
  94         APPROVED;
  95 
  96         private static final Map<String, DraftType> map = new HashMap<>();
  97         static {
  98             for (DraftType dt : values()) {


 184 
 185                     case "-help":
 186                         usage();
 187                         System.exit(0);
 188                         break;
 189 
 190                     default:
 191                         throw new RuntimeException();
 192                     }
 193                 }
 194             } catch (RuntimeException e) {
 195                 severe("unknown or imcomplete arg(s): " + currentArg);
 196                 usage();
 197                 System.exit(1);
 198             }
 199         }
 200 
 201         // Set up path names
 202         LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd";
 203         LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd";

 204         SOURCE_FILE_DIR = CLDR_BASE + "/main";
 205         SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml";
 206         LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml";
 207         NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml";
 208         METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml";

 209 
 210         if (BASE_LOCALES.isEmpty()) {
 211             setupBaseLocales("en-US");
 212         }
 213 
 214         bundleGenerator = new ResourceBundleGenerator();
 215 
 216         // Parse data independent of locales
 217         parseSupplemental();

 218 
 219         List<Bundle> bundles = readBundleList();
 220         convertBundles(bundles);
 221         convertBundles(addedBundles);
 222     }
 223 
 224     private static void usage() {
 225         errout("Usage: java CLDRConverter [options]%n"
 226                 + "\t-help          output this usage message and exit%n"
 227                 + "\t-verbose       output information%n"
 228                 + "\t-draft [contributed | approved | provisional | unconfirmed]%n"
 229                 + "\t\t       draft level for using data (default: contributed)%n"
 230                 + "\t-base dir      base directory for CLDR input files%n"
 231                 + "\t-basemodule    generates bundles that go into java.base module%n"
 232                 + "\t-baselocales loc(,loc)*      locales that go into the base module%n"
 233                 + "\t-o dir         output directory (default: ./build/gensrc)%n"
 234                 + "\t-o dir         output directory (defaut: ./build/gensrc)%n"
 235                 + "\t-utf8          use UTF-8 rather than \\uxxxx (for debug)%n");
 236     }
 237 


 304                     // processed first.
 305                     if ("root".equals(id)) {
 306                         retList.add(0, b);
 307                     } else {
 308                         retList.add(b);
 309                     }
 310                 }
 311             }
 312         }
 313         return retList;
 314     }
 315 
 316     private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>();
 317     // this list will contain additional bundles to be generated for Region dependent Data.
 318     private static List<Bundle> addedBundles = new ArrayList<>();
 319 
 320     private static Map<String, SortedSet<String>> metaInfo = new HashMap<>();
 321 
 322     static {
 323         // For generating information on supported locales.
 324         metaInfo.put("LocaleNames", new TreeSet<>());
 325         metaInfo.put("CurrencyNames", new TreeSet<>());
 326         metaInfo.put("TimeZoneNames", new TreeSet<>());
 327         metaInfo.put("CalendarData", new TreeSet<>());
 328         metaInfo.put("FormatData", new TreeSet<>());
 329         metaInfo.put("AvailableLocales", new TreeSet<>());
 330     }
 331 
 332 
 333     private static Set<String> calendarDataFields = Set.of("firstDayOfWeek", "minimalDaysInFirstWeek");
 334 
 335     static Map<String, Object> getCLDRBundle(String id) throws Exception {
 336         Map<String, Object> bundle = cldrBundles.get(id);
 337         if (bundle != null) {
 338             return bundle;
 339         }
 340         SAXParserFactory factory = SAXParserFactory.newInstance();
 341         factory.setValidating(true);
 342         SAXParser parser = factory.newSAXParser();
 343         enableFileAccess(parser);
 344         LDMLParseHandler handler = new LDMLParseHandler(id);
 345         File file = new File(SOURCE_FILE_DIR + File.separator + id + ".xml");
 346         if (!file.exists()) {
 347             // Skip if the file doesn't exist.
 348             return Collections.emptyMap();
 349         }
 350 
 351         info("..... main directory .....");
 352         info("Reading file " + file);
 353         parser.parse(file, handler);
 354 
 355         bundle = handler.getData();
 356         cldrBundles.put(id, bundle);
 357         String country = getCountryCode(id);
 358         if (country != null) {
 359             bundle = handlerSuppl.getData(country);
 360             if (bundle != null) {
 361                 //merge two maps into one map
 362                 Map<String, Object> temp = cldrBundles.remove(id);
 363                 bundle.putAll(temp);
 364                 cldrBundles.put(id, bundle);
 365             }
 366         }
 367         return bundle;
 368     }
 369 
 370     // Parsers for data in "supplemental" directory
 371     //
 372     private static void parseSupplemental() throws Exception {
 373         // Parse SupplementalData file and store the information in the HashMap
 374         // Calendar information such as firstDay and minDay are stored in
 375         // supplementalData.xml as of CLDR1.4. Individual territory is listed
 376         // with its ISO 3166 country code while default is listed using UNM49
 377         // region and composition numerical code (001 for World.)
 378         //
 379         // SupplementalData file also provides the "parent" locales which
 380         // are othrwise not to be fallen back. Process them here as well.
 381         //
 382         info("..... Parsing supplementalData.xml .....");
 383         SAXParserFactory factorySuppl = SAXParserFactory.newInstance();
 384         factorySuppl.setValidating(true);
 385         SAXParser parserSuppl = factorySuppl.newSAXParser();
 386         enableFileAccess(parserSuppl);
 387         handlerSuppl = new SupplementDataParseHandler();
 388         File fileSupply = new File(SPPL_SOURCE_FILE);
 389         parserSuppl.parse(fileSupply, handlerSuppl);
 390         Map<String, Object> parentData = handlerSuppl.getData("root");
 391         parentData.keySet().forEach(key -> {
 392                 parentLocalesMap.put(key, new TreeSet(
 393                     Arrays.asList(((String)parentData.get(key)).split(" "))));
 394             });
 395 
 396         // Parse numberingSystems to get digit zero character information.
 397         SAXParserFactory numberingParser = SAXParserFactory.newInstance();
 398         numberingParser.setValidating(true);
 399         SAXParser parserNumbering = numberingParser.newSAXParser();
 400         enableFileAccess(parserNumbering);
 401         handlerNumbering = new NumberingSystemsParseHandler();
 402         File fileNumbering = new File(NUMBERING_SOURCE_FILE);
 403         parserNumbering.parse(fileNumbering, handlerNumbering);
 404 
 405         // Parse metaZones to create mappings between Olson tzids and CLDR meta zone names
 406         info("..... Parsing metaZones.xml .....");
 407         SAXParserFactory metazonesParser = SAXParserFactory.newInstance();
 408         metazonesParser.setValidating(true);
 409         SAXParser parserMetaZones = metazonesParser.newSAXParser();
 410         enableFileAccess(parserMetaZones);
 411         handlerMetaZones = new MetaZonesParseHandler();
 412         File fileMetaZones = new File(METAZONES_SOURCE_FILE);
 413         parserMetaZones.parse(fileMetaZones, handlerMetaZones);
 414 
 415         // Parse likelySubtags
 416         info("..... Parsing likelySubtags.xml .....");
 417         SAXParserFactory likelySubtagsParser = SAXParserFactory.newInstance();
 418         likelySubtagsParser.setValidating(true);
 419         SAXParser parserLikelySubtags = likelySubtagsParser.newSAXParser();
 420         enableFileAccess(parserLikelySubtags);
 421         handlerLikelySubtags = new LikelySubtagsParseHandler();
 422         File fileLikelySubtags = new File(LIKELYSUBTAGS_SOURCE_FILE);
 423         parserLikelySubtags.parse(fileLikelySubtags, handlerLikelySubtags);
















 424     }
 425 
 426     /**
 427      * This method will check if a new region dependent Bundle needs to be
 428      * generated for this Locale id and targetMap. New Bundle will be generated
 429      * when Locale id has non empty script and country code and targetMap
 430      * contains region dependent data. This method will also remove region
 431      * dependent data from this targetMap after candidate locales check. E.g. It
 432      * will call genRegionDependentBundle() in case of az_Latn_AZ locale and
 433      * remove region dependent data from this targetMap so that az_Latn_AZ
 434      * bundle will not be created. For az_Cyrl_AZ, new Bundle will be generated
 435      * but region dependent data will not be removed from targetMap as its candidate
 436      * locales are [az_Cyrl_AZ, az_Cyrl, root], which does not include az_AZ for
 437      * fallback.
 438      *
 439      */
 440 
 441     private static void checkRegionDependentBundle(Map<String, Object> targetMap, String id) {
 442         if ((CLDRConverter.getScript(id) != "")
 443                 && (CLDRConverter.getCountryCode(id) != "")) {


 511         for (Bundle bundle : bundles) {
 512             // Get the target map, which contains all the data that should be
 513             // visible for the bundle's locale
 514 
 515             Map<String, Object> targetMap = bundle.getTargetMap();
 516 
 517             // check if new region DependentBundle needs to be generated for this Locale.
 518             checkRegionDependentBundle(targetMap, bundle.getID());
 519             EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes();
 520 
 521             if (bundle.isRoot()) {
 522                 // Add DateTimePatternChars because CLDR no longer supports localized patterns.
 523                 targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ");
 524             }
 525 
 526             // Now the map contains just the entries that need to be in the resources bundles.
 527             // Go ahead and generate them.
 528             if (bundleTypes.contains(Bundle.Type.LOCALENAMES)) {
 529                 Map<String, Object> localeNamesMap = extractLocaleNames(targetMap, bundle.getID());
 530                 if (!localeNamesMap.isEmpty() || bundle.isRoot()) {
 531                     metaInfo.get("LocaleNames").add(toLanguageTag(bundle.getID()));
 532                     addLikelySubtags(metaInfo, "LocaleNames", bundle.getID());
 533                     bundleGenerator.generateBundle("util", "LocaleNames", bundle.getJavaID(), true, localeNamesMap, BundleType.OPEN);
 534                 }
 535             }
 536             if (bundleTypes.contains(Bundle.Type.CURRENCYNAMES)) {
 537                 Map<String, Object> currencyNamesMap = extractCurrencyNames(targetMap, bundle.getID(), bundle.getCurrencies());
 538                 if (!currencyNamesMap.isEmpty() || bundle.isRoot()) {
 539                     metaInfo.get("CurrencyNames").add(toLanguageTag(bundle.getID()));
 540                     addLikelySubtags(metaInfo, "CurrencyNames", bundle.getID());
 541                     bundleGenerator.generateBundle("util", "CurrencyNames", bundle.getJavaID(), true, currencyNamesMap, BundleType.OPEN);
 542                 }
 543             }
 544             if (bundleTypes.contains(Bundle.Type.TIMEZONENAMES)) {
 545                 Map<String, Object> zoneNamesMap = extractZoneNames(targetMap, bundle.getID());
 546                 if (!zoneNamesMap.isEmpty() || bundle.isRoot()) {
 547                     metaInfo.get("TimeZoneNames").add(toLanguageTag(bundle.getID()));
 548                     addLikelySubtags(metaInfo, "TimeZoneNames", bundle.getID());
 549                     bundleGenerator.generateBundle("util", "TimeZoneNames", bundle.getJavaID(), true, zoneNamesMap, BundleType.TIMEZONE);
 550                 }
 551             }
 552             if (bundleTypes.contains(Bundle.Type.CALENDARDATA)) {
 553                 Map<String, Object> calendarDataMap = extractCalendarData(targetMap, bundle.getID());
 554                 if (!calendarDataMap.isEmpty() || bundle.isRoot()) {
 555                     metaInfo.get("CalendarData").add(toLanguageTag(bundle.getID()));
 556                     addLikelySubtags(metaInfo, "CalendarData", bundle.getID());
 557                     bundleGenerator.generateBundle("util", "CalendarData", bundle.getJavaID(), true, calendarDataMap, BundleType.PLAIN);
 558                 }
 559             }
 560             if (bundleTypes.contains(Bundle.Type.FORMATDATA)) {
 561                 Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID());
 562                 if (!formatDataMap.isEmpty() || bundle.isRoot()) {
 563                     metaInfo.get("FormatData").add(toLanguageTag(bundle.getID()));
 564                     addLikelySubtags(metaInfo, "FormatData", bundle.getID());
 565                     bundleGenerator.generateBundle("text", "FormatData", bundle.getJavaID(), true, formatDataMap, BundleType.PLAIN);
 566                 }
 567             }
 568 
 569             // For AvailableLocales
 570             metaInfo.get("AvailableLocales").add(toLanguageTag(bundle.getID()));
 571             addLikelySubtags(metaInfo, "AvailableLocales", bundle.getID());
 572         }
 573         addCldrImplicitLocales(metaInfo);
 574         bundleGenerator.generateMetaInfo(metaInfo);
 575     }
 576 
 577     /**
 578      * These are the Locales that are implicitly supported by CLDR.
 579      * Adding them explicitly as likelySubtags here, will ensure that
 580      * COMPAT locales do not precede them during ResourceBundle search path.
 581      */
 582     private static void addCldrImplicitLocales(Map<String, SortedSet<String>> metaInfo) {
 583         metaInfo.get("LocaleNames").add("zh-Hans-CN");
 584         metaInfo.get("LocaleNames").add("zh-Hans-SG");
 585         metaInfo.get("LocaleNames").add("zh-Hant-HK");
 586         metaInfo.get("LocaleNames").add("zh-Hant-MO");
 587         metaInfo.get("LocaleNames").add("zh-Hant-TW");
 588         metaInfo.get("CurrencyNames").add("zh-Hans-CN");
 589         metaInfo.get("CurrencyNames").add("zh-Hans-SG");
 590         metaInfo.get("CurrencyNames").add("zh-Hant-HK");
 591         metaInfo.get("CurrencyNames").add("zh-Hant-MO");
 592         metaInfo.get("CurrencyNames").add("zh-Hant-TW");
 593         metaInfo.get("TimeZoneNames").add("zh-Hans-CN");
 594         metaInfo.get("TimeZoneNames").add("zh-Hans-SG");
 595         metaInfo.get("TimeZoneNames").add("zh-Hant-HK");
 596         metaInfo.get("TimeZoneNames").add("zh-Hant-MO");
 597         metaInfo.get("TimeZoneNames").add("zh-Hant-TW");
 598         metaInfo.get("TimeZoneNames").add("zh-HK");
 599         metaInfo.get("CalendarData").add("zh-Hans-CN");
 600         metaInfo.get("CalendarData").add("zh-Hans-SG");
 601         metaInfo.get("CalendarData").add("zh-Hant-HK");
 602         metaInfo.get("CalendarData").add("zh-Hant-MO");
 603         metaInfo.get("CalendarData").add("zh-Hant-TW");
 604         metaInfo.get("FormatData").add("zh-Hans-CN");
 605         metaInfo.get("FormatData").add("zh-Hans-SG");
 606         metaInfo.get("FormatData").add("zh-Hant-HK");
 607         metaInfo.get("FormatData").add("zh-Hant-MO");
 608         metaInfo.get("FormatData").add("zh-Hant-TW");
 609     }
 610     static final Map<String, String> aliases = new HashMap<>();
 611 
 612     /**
 613      * Translate the aliases into the real entries in the bundle map.
 614      */
 615     static void handleAliases(Map<String, Object> bundleMap) {
 616         Set bundleKeys = bundleMap.keySet();
 617         try {
 618             for (String key : aliases.keySet()) {
 619                 String targetKey = aliases.get(key);
 620                 if (bundleKeys.contains(targetKey)) {
 621                     bundleMap.putIfAbsent(key, bundleMap.get(targetKey));
 622                 }
 623             }
 624         } catch (Exception ex) {
 625             Logger.getLogger(CLDRConverter.class.getName()).log(Level.SEVERE, null, ex);
 626         }
 627     }
 628 
 629     /*


 678                 // Shorter string comes first unless either starts with a digit.
 679                 if (len1 < len2) {
 680                     return -1;
 681                 }
 682                 if (len1 > len2) {
 683                     return 1;
 684                 }
 685             }
 686             return o1.compareTo(o2);
 687         }
 688 
 689         private boolean isDigit(char c) {
 690             return c >= '0' && c <= '9';
 691         }
 692     }
 693 
 694     private static Map<String, Object> extractLocaleNames(Map<String, Object> map, String id) {
 695         Map<String, Object> localeNames = new TreeMap<>(KeyComparator.INSTANCE);
 696         for (String key : map.keySet()) {
 697             if (key.startsWith(LOCALE_NAME_PREFIX)) {








 698                 localeNames.put(key.substring(LOCALE_NAME_PREFIX.length()), map.get(key));


 699             }
 700         }






 701         return localeNames;
 702     }
 703 
 704     @SuppressWarnings("AssignmentToForLoopParameter")
 705     private static Map<String, Object> extractCurrencyNames(Map<String, Object> map, String id, String names)
 706             throws Exception {
 707         Map<String, Object> currencyNames = new TreeMap<>(KeyComparator.INSTANCE);
 708         for (String key : map.keySet()) {
 709             if (key.startsWith(CURRENCY_NAME_PREFIX)) {
 710                 currencyNames.put(key.substring(CURRENCY_NAME_PREFIX.length()), map.get(key));
 711             } else if (key.startsWith(CURRENCY_SYMBOL_PREFIX)) {
 712                 currencyNames.put(key.substring(CURRENCY_SYMBOL_PREFIX.length()), map.get(key));
 713             }
 714         }
 715         return currencyNames;
 716     }
 717 
 718     private static Map<String, Object> extractZoneNames(Map<String, Object> map, String id) {
 719         Map<String, Object> names = new HashMap<>();
 720 


 827         "DateTimePatterns",
 828         "DateTimePatternChars"
 829     };
 830 
 831     private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) {
 832         Map<String, Object> formatData = new LinkedHashMap<>();
 833         for (CalendarType calendarType : CalendarType.values()) {
 834             if (calendarType == CalendarType.GENERIC) {
 835                 continue;
 836             }
 837             String prefix = calendarType.keyElementName();
 838             for (String element : FORMAT_DATA_ELEMENTS) {
 839                 String key = prefix + element;
 840                 copyIfPresent(map, "java.time." + key, formatData);
 841                 copyIfPresent(map, key, formatData);
 842             }
 843         }
 844 
 845         for (String key : map.keySet()) {
 846         // Copy available calendar names
 847             if (key.startsWith(CLDRConverter.CALENDAR_NAME_PREFIX)) {
 848                 String type = key.substring(CLDRConverter.CALENDAR_NAME_PREFIX.length());
 849                 for (CalendarType calendarType : CalendarType.values()) {
 850                     if (calendarType == CalendarType.GENERIC) {
 851                         continue;
 852                     }
 853                     if (type.equals(calendarType.lname())) {
 854                         Object value = map.get(key);
 855                         formatData.put(key, value);
 856                         String ukey = CLDRConverter.CALENDAR_NAME_PREFIX + calendarType.uname();
 857                         if (!key.equals(ukey)) {


 858                             formatData.put(ukey, value);
 859                         }
 860                     }
 861                 }
 862             }
 863         }
 864 
 865         copyIfPresent(map, "DefaultNumberingSystem", formatData);
 866 
 867         @SuppressWarnings("unchecked")
 868         List<String> numberingScripts = (List<String>) map.remove("numberingScripts");
 869         if (numberingScripts != null) {
 870             for (String script : numberingScripts) {
 871                 copyIfPresent(map, script + "." + "NumberElements", formatData);
 872             }
 873         } else {
 874             copyIfPresent(map, "NumberElements", formatData);
 875         }
 876         copyIfPresent(map, "NumberPatterns", formatData);












 877         return formatData;
 878     }
 879 
 880     private static void copyIfPresent(Map<String, Object> src, String key, Map<String, Object> dest) {
 881         Object value = src.get(key);
 882         if (value != null) {
 883             dest.put(key, value);
 884         }
 885     }
 886 
 887     // --- code below here is adapted from java.util.Properties ---
 888     private static final String specialSaveCharsJava = "\"";
 889     private static final String specialSaveCharsProperties = "=: \t\r\n\f#!";
 890 
 891     /*
 892      * Converts unicodes to encoded \uxxxx
 893      * and writes out any of the characters in specialSaveChars
 894      * with a preceding slash
 895      */
 896     static String saveConvert(String theString, boolean useJava) {




  35 import java.util.*;
  36 import java.util.ResourceBundle.Control;
  37 import java.util.logging.Level;
  38 import java.util.logging.Logger;
  39 import java.util.stream.Collectors;
  40 import javax.xml.parsers.SAXParser;
  41 import javax.xml.parsers.SAXParserFactory;
  42 import org.xml.sax.SAXNotRecognizedException;
  43 import org.xml.sax.SAXNotSupportedException;
  44 
  45 
  46 /**
  47  * Converts locale data from "Locale Data Markup Language" format to
  48  * JRE resource bundle format. LDML is the format used by the Common
  49  * Locale Data Repository maintained by the Unicode Consortium.
  50  */
  51 public class CLDRConverter {
  52 
  53     static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd";
  54     static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd";
  55     static final String BCP47_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlBCP47.dtd";
  56   
  57 
  58     private static String CLDR_BASE = "../CLDR/21.0.1/";
  59     static String LOCAL_LDML_DTD;
  60     static String LOCAL_SPPL_LDML_DTD;
  61     static String LOCAL_BCP47_LDML_DTD;  
  62     private static String SOURCE_FILE_DIR;
  63     private static String SPPL_SOURCE_FILE;
  64     private static String NUMBERING_SOURCE_FILE;
  65     private static String METAZONES_SOURCE_FILE;
  66     private static String LIKELYSUBTAGS_SOURCE_FILE;
  67     private static String TIMEZONE_SOURCE_FILE;
  68     static String DESTINATION_DIR = "build/gensrc";
  69 
  70     static final String LOCALE_NAME_PREFIX = "locale.displayname.";
  71     static final String LOCALE_SEPARATOR = LOCALE_NAME_PREFIX + "separator";
  72     static final String LOCALE_KEYTYPE = LOCALE_NAME_PREFIX + "keytype";    
  73     static final String LOCALE_KEY_PREFIX = LOCALE_NAME_PREFIX + "key.";
  74     static final String LOCALE_TYPE_PREFIX = LOCALE_NAME_PREFIX + "type.";
  75     static final String LOCALE_TYPE_PREFIX_CA = LOCALE_TYPE_PREFIX + "ca.";
  76     static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol.";
  77     static final String CURRENCY_NAME_PREFIX = "currency.displayname.";
  78     static final String CALENDAR_NAME_PREFIX = "calendarname.";
  79     static final String TIMEZONE_ID_PREFIX = "timezone.id.";
  80     static final String ZONE_NAME_PREFIX = "timezone.displayname.";
  81     static final String METAZONE_ID_PREFIX = "metazone.id.";
  82     static final String PARENT_LOCALE_PREFIX = "parentLocale.";
  83 
  84     private static SupplementDataParseHandler handlerSuppl;
  85     private static LikelySubtagsParseHandler handlerLikelySubtags;
  86     static NumberingSystemsParseHandler handlerNumbering;
  87     static MetaZonesParseHandler handlerMetaZones;
  88     static TimeZoneParseHandler handlerTimeZone;
  89     private static BundleGenerator bundleGenerator;
  90 
  91     // java.base module related
  92     static boolean isBaseModule = false;
  93     static final Set<Locale> BASE_LOCALES = new HashSet<>();
  94 
  95     // "parentLocales" map
  96     private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>();
  97     private static final ResourceBundle.Control defCon =
  98         ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
  99 
 100     static enum DraftType {
 101         UNCONFIRMED,
 102         PROVISIONAL,
 103         CONTRIBUTED,
 104         APPROVED;
 105 
 106         private static final Map<String, DraftType> map = new HashMap<>();
 107         static {
 108             for (DraftType dt : values()) {


 194 
 195                     case "-help":
 196                         usage();
 197                         System.exit(0);
 198                         break;
 199 
 200                     default:
 201                         throw new RuntimeException();
 202                     }
 203                 }
 204             } catch (RuntimeException e) {
 205                 severe("unknown or imcomplete arg(s): " + currentArg);
 206                 usage();
 207                 System.exit(1);
 208             }
 209         }
 210 
 211         // Set up path names
 212         LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd";
 213         LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd";
 214         LOCAL_BCP47_LDML_DTD = CLDR_BASE + "/dtd/ldmlBCP47.dtd";
 215         SOURCE_FILE_DIR = CLDR_BASE + "/main";
 216         SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml";
 217         LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml";
 218         NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml";
 219         METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml";
 220         TIMEZONE_SOURCE_FILE = CLDR_BASE + "/bcp47/timezone.xml";
 221 
 222         if (BASE_LOCALES.isEmpty()) {
 223             setupBaseLocales("en-US");
 224         }
 225 
 226         bundleGenerator = new ResourceBundleGenerator();
 227 
 228         // Parse data independent of locales
 229         parseSupplemental();
 230         parseBCP47();
 231 
 232         List<Bundle> bundles = readBundleList();
 233         convertBundles(bundles);
 234         convertBundles(addedBundles);
 235     }
 236 
 237     private static void usage() {
 238         errout("Usage: java CLDRConverter [options]%n"
 239                 + "\t-help          output this usage message and exit%n"
 240                 + "\t-verbose       output information%n"
 241                 + "\t-draft [contributed | approved | provisional | unconfirmed]%n"
 242                 + "\t\t       draft level for using data (default: contributed)%n"
 243                 + "\t-base dir      base directory for CLDR input files%n"
 244                 + "\t-basemodule    generates bundles that go into java.base module%n"
 245                 + "\t-baselocales loc(,loc)*      locales that go into the base module%n"
 246                 + "\t-o dir         output directory (default: ./build/gensrc)%n"
 247                 + "\t-o dir         output directory (defaut: ./build/gensrc)%n"
 248                 + "\t-utf8          use UTF-8 rather than \\uxxxx (for debug)%n");
 249     }
 250 


 317                     // processed first.
 318                     if ("root".equals(id)) {
 319                         retList.add(0, b);
 320                     } else {
 321                         retList.add(b);
 322                     }
 323                 }
 324             }
 325         }
 326         return retList;
 327     }
 328 
 329     private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>();
 330     // this list will contain additional bundles to be generated for Region dependent Data.
 331     private static List<Bundle> addedBundles = new ArrayList<>();
 332 
 333     private static Map<String, SortedSet<String>> metaInfo = new HashMap<>();
 334 
 335     static {
 336         // For generating information on supported locales.





 337         metaInfo.put("AvailableLocales", new TreeSet<>());
 338     }
 339 
 340 
 341     private static Set<String> calendarDataFields = Set.of("firstDayOfWeek", "minimalDaysInFirstWeek");
 342 
 343     static Map<String, Object> getCLDRBundle(String id) throws Exception {
 344         Map<String, Object> bundle = cldrBundles.get(id);
 345         if (bundle != null) {
 346             return bundle;
 347         }





 348         File file = new File(SOURCE_FILE_DIR + File.separator + id + ".xml");
 349         if (!file.exists()) {
 350             // Skip if the file doesn't exist.
 351             return Collections.emptyMap();
 352         }
 353         
 354         info("..... main directory .....");
 355         LDMLParseHandler handler = new LDMLParseHandler(id);
 356         parseLDMLFile(file, handler);
 357 
 358         bundle = handler.getData();
 359         cldrBundles.put(id, bundle);
 360         String country = getCountryCode(id);
 361         if (country != null) {
 362             bundle = handlerSuppl.getData(country);
 363             if (bundle != null) {
 364                 //merge two maps into one map
 365                 Map<String, Object> temp = cldrBundles.remove(id);
 366                 bundle.putAll(temp);
 367                 cldrBundles.put(id, bundle);
 368             }
 369         }
 370         return bundle;
 371     }
 372 
 373     // Parsers for data in "supplemental" directory
 374     //
 375     private static void parseSupplemental() throws Exception {
 376         // Parse SupplementalData file and store the information in the HashMap
 377         // Calendar information such as firstDay and minDay are stored in
 378         // supplementalData.xml as of CLDR1.4. Individual territory is listed
 379         // with its ISO 3166 country code while default is listed using UNM49
 380         // region and composition numerical code (001 for World.)
 381         //
 382         // SupplementalData file also provides the "parent" locales which
 383         // are othrwise not to be fallen back. Process them here as well.
 384         //





 385         handlerSuppl = new SupplementDataParseHandler();
 386         parseLDMLFile(new File(SPPL_SOURCE_FILE), handlerSuppl);

 387         Map<String, Object> parentData = handlerSuppl.getData("root");
 388         parentData.keySet().forEach(key -> {
 389                 parentLocalesMap.put(key, new TreeSet(
 390                     Arrays.asList(((String)parentData.get(key)).split(" "))));
 391             });
 392 
 393         // Parse numberingSystems to get digit zero character information.




 394         handlerNumbering = new NumberingSystemsParseHandler();
 395         parseLDMLFile(new File(NUMBERING_SOURCE_FILE), handlerNumbering);

 396 
 397         // Parse metaZones to create mappings between Olson tzids and CLDR meta zone names





 398         handlerMetaZones = new MetaZonesParseHandler();
 399         parseLDMLFile(new File(METAZONES_SOURCE_FILE), handlerMetaZones);

 400 
 401         // Parse likelySubtags





 402         handlerLikelySubtags = new LikelySubtagsParseHandler();
 403         parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags);
 404     }
 405     
 406     // Parsers for data in "bcp47" directory
 407     //
 408     private static void parseBCP47() throws Exception {
 409         // Parse timezone
 410         handlerTimeZone = new TimeZoneParseHandler();
 411         parseLDMLFile(new File(TIMEZONE_SOURCE_FILE), handlerTimeZone);
 412     }
 413     
 414     private static void parseLDMLFile(File srcfile, AbstractLDMLHandler handler) throws Exception {
 415         info("..... Parsing " + srcfile.getName() + " .....");
 416         SAXParserFactory pf = SAXParserFactory.newInstance();
 417         pf.setValidating(true);
 418         SAXParser parser = pf.newSAXParser();
 419         enableFileAccess(parser);
 420         parser.parse(srcfile, handler);       
 421     }
 422 
 423     /**
 424      * This method will check if a new region dependent Bundle needs to be
 425      * generated for this Locale id and targetMap. New Bundle will be generated
 426      * when Locale id has non empty script and country code and targetMap
 427      * contains region dependent data. This method will also remove region
 428      * dependent data from this targetMap after candidate locales check. E.g. It
 429      * will call genRegionDependentBundle() in case of az_Latn_AZ locale and
 430      * remove region dependent data from this targetMap so that az_Latn_AZ
 431      * bundle will not be created. For az_Cyrl_AZ, new Bundle will be generated
 432      * but region dependent data will not be removed from targetMap as its candidate
 433      * locales are [az_Cyrl_AZ, az_Cyrl, root], which does not include az_AZ for
 434      * fallback.
 435      *
 436      */
 437 
 438     private static void checkRegionDependentBundle(Map<String, Object> targetMap, String id) {
 439         if ((CLDRConverter.getScript(id) != "")
 440                 && (CLDRConverter.getCountryCode(id) != "")) {


 508         for (Bundle bundle : bundles) {
 509             // Get the target map, which contains all the data that should be
 510             // visible for the bundle's locale
 511 
 512             Map<String, Object> targetMap = bundle.getTargetMap();
 513 
 514             // check if new region DependentBundle needs to be generated for this Locale.
 515             checkRegionDependentBundle(targetMap, bundle.getID());
 516             EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes();
 517 
 518             if (bundle.isRoot()) {
 519                 // Add DateTimePatternChars because CLDR no longer supports localized patterns.
 520                 targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ");
 521             }
 522 
 523             // Now the map contains just the entries that need to be in the resources bundles.
 524             // Go ahead and generate them.
 525             if (bundleTypes.contains(Bundle.Type.LOCALENAMES)) {
 526                 Map<String, Object> localeNamesMap = extractLocaleNames(targetMap, bundle.getID());
 527                 if (!localeNamesMap.isEmpty() || bundle.isRoot()) {


 528                     bundleGenerator.generateBundle("util", "LocaleNames", bundle.getJavaID(), true, localeNamesMap, BundleType.OPEN);
 529                 }
 530             }
 531             if (bundleTypes.contains(Bundle.Type.CURRENCYNAMES)) {
 532                 Map<String, Object> currencyNamesMap = extractCurrencyNames(targetMap, bundle.getID(), bundle.getCurrencies());
 533                 if (!currencyNamesMap.isEmpty() || bundle.isRoot()) {


 534                     bundleGenerator.generateBundle("util", "CurrencyNames", bundle.getJavaID(), true, currencyNamesMap, BundleType.OPEN);
 535                 }
 536             }
 537             if (bundleTypes.contains(Bundle.Type.TIMEZONENAMES)) {
 538                 Map<String, Object> zoneNamesMap = extractZoneNames(targetMap, bundle.getID());
 539                 if (!zoneNamesMap.isEmpty() || bundle.isRoot()) {


 540                     bundleGenerator.generateBundle("util", "TimeZoneNames", bundle.getJavaID(), true, zoneNamesMap, BundleType.TIMEZONE);
 541                 }
 542             }
 543             if (bundleTypes.contains(Bundle.Type.CALENDARDATA)) {
 544                 Map<String, Object> calendarDataMap = extractCalendarData(targetMap, bundle.getID());
 545                 if (!calendarDataMap.isEmpty() || bundle.isRoot()) {


 546                     bundleGenerator.generateBundle("util", "CalendarData", bundle.getJavaID(), true, calendarDataMap, BundleType.PLAIN);
 547                 }
 548             }
 549             if (bundleTypes.contains(Bundle.Type.FORMATDATA)) {
 550                 Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID());
 551                 if (!formatDataMap.isEmpty() || bundle.isRoot()) {


 552                     bundleGenerator.generateBundle("text", "FormatData", bundle.getJavaID(), true, formatDataMap, BundleType.PLAIN);
 553                 }
 554             }
 555 
 556             // For AvailableLocales
 557             metaInfo.get("AvailableLocales").add(toLanguageTag(bundle.getID()));
 558             addLikelySubtags(metaInfo, "AvailableLocales", bundle.getID());
 559         }

 560         bundleGenerator.generateMetaInfo(metaInfo);
 561     }
 562 

































 563     static final Map<String, String> aliases = new HashMap<>();
 564 
 565     /**
 566      * Translate the aliases into the real entries in the bundle map.
 567      */
 568     static void handleAliases(Map<String, Object> bundleMap) {
 569         Set bundleKeys = bundleMap.keySet();
 570         try {
 571             for (String key : aliases.keySet()) {
 572                 String targetKey = aliases.get(key);
 573                 if (bundleKeys.contains(targetKey)) {
 574                     bundleMap.putIfAbsent(key, bundleMap.get(targetKey));
 575                 }
 576             }
 577         } catch (Exception ex) {
 578             Logger.getLogger(CLDRConverter.class.getName()).log(Level.SEVERE, null, ex);
 579         }
 580     }
 581 
 582     /*


 631                 // Shorter string comes first unless either starts with a digit.
 632                 if (len1 < len2) {
 633                     return -1;
 634                 }
 635                 if (len1 > len2) {
 636                     return 1;
 637                 }
 638             }
 639             return o1.compareTo(o2);
 640         }
 641 
 642         private boolean isDigit(char c) {
 643             return c >= '0' && c <= '9';
 644         }
 645     }
 646 
 647     private static Map<String, Object> extractLocaleNames(Map<String, Object> map, String id) {
 648         Map<String, Object> localeNames = new TreeMap<>(KeyComparator.INSTANCE);
 649         for (String key : map.keySet()) {
 650             if (key.startsWith(LOCALE_NAME_PREFIX)) {
 651                 switch (key) {
 652                     case LOCALE_SEPARATOR:
 653                         localeNames.put("ListCompositionPattern", map.get(key));
 654                         break;
 655                     case LOCALE_KEYTYPE:
 656                         localeNames.put("ListKeyTypePattern", map.get(key));
 657                         break;
 658                     default:
 659                         localeNames.put(key.substring(LOCALE_NAME_PREFIX.length()), map.get(key));
 660                         break;
 661                 }
 662             }
 663         }
 664 
 665         if (id.equals("root")) {
 666             // Add display name pattern, which is not in CLDR
 667             localeNames.put("DisplayNamePattern", "{0,choice,0#|1#{1}|2#{1} ({2})}");
 668         }
 669 
 670         return localeNames;
 671     }
 672 
 673     @SuppressWarnings("AssignmentToForLoopParameter")
 674     private static Map<String, Object> extractCurrencyNames(Map<String, Object> map, String id, String names)
 675             throws Exception {
 676         Map<String, Object> currencyNames = new TreeMap<>(KeyComparator.INSTANCE);
 677         for (String key : map.keySet()) {
 678             if (key.startsWith(CURRENCY_NAME_PREFIX)) {
 679                 currencyNames.put(key.substring(CURRENCY_NAME_PREFIX.length()), map.get(key));
 680             } else if (key.startsWith(CURRENCY_SYMBOL_PREFIX)) {
 681                 currencyNames.put(key.substring(CURRENCY_SYMBOL_PREFIX.length()), map.get(key));
 682             }
 683         }
 684         return currencyNames;
 685     }
 686 
 687     private static Map<String, Object> extractZoneNames(Map<String, Object> map, String id) {
 688         Map<String, Object> names = new HashMap<>();
 689 


 796         "DateTimePatterns",
 797         "DateTimePatternChars"
 798     };
 799 
 800     private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) {
 801         Map<String, Object> formatData = new LinkedHashMap<>();
 802         for (CalendarType calendarType : CalendarType.values()) {
 803             if (calendarType == CalendarType.GENERIC) {
 804                 continue;
 805             }
 806             String prefix = calendarType.keyElementName();
 807             for (String element : FORMAT_DATA_ELEMENTS) {
 808                 String key = prefix + element;
 809                 copyIfPresent(map, "java.time." + key, formatData);
 810                 copyIfPresent(map, key, formatData);
 811             }
 812         }
 813 
 814         for (String key : map.keySet()) {
 815         // Copy available calendar names
 816             if (key.startsWith(CLDRConverter.LOCALE_TYPE_PREFIX_CA)) {
 817                 String type = key.substring(CLDRConverter.LOCALE_TYPE_PREFIX_CA.length());
 818                 for (CalendarType calendarType : CalendarType.values()) {
 819                     if (calendarType == CalendarType.GENERIC) {
 820                         continue;
 821                     }
 822                     if (type.equals(calendarType.lname())) {
 823                         Object value = map.get(key);
 824                         String dataKey = key.replace(LOCALE_TYPE_PREFIX_CA,
 825                                 CALENDAR_NAME_PREFIX);
 826                         formatData.put(dataKey, value);
 827                         String ukey = CALENDAR_NAME_PREFIX + calendarType.uname();
 828                         if (!dataKey.equals(ukey)) {
 829                             formatData.put(ukey, value);
 830                         }
 831                     }
 832                 }
 833             }
 834         }
 835 
 836         copyIfPresent(map, "DefaultNumberingSystem", formatData);
 837 
 838         @SuppressWarnings("unchecked")
 839         List<String> numberingScripts = (List<String>) map.remove("numberingScripts");
 840         if (numberingScripts != null) {
 841             for (String script : numberingScripts) {
 842                 copyIfPresent(map, script + "." + "NumberElements", formatData);
 843             }
 844         } else {
 845             copyIfPresent(map, "NumberElements", formatData);
 846         }
 847         copyIfPresent(map, "NumberPatterns", formatData);
 848 
 849         // put extra number elements for available scripts into formatData, if it is "root"
 850         if (id.equals("root")) {
 851             handlerNumbering.keySet().stream()
 852                 .filter(k -> !numberingScripts.contains(k))
 853                 .forEach(k -> {
 854                     String[] ne = (String[])map.get("latn.NumberElements");
 855                     String[] neNew = Arrays.copyOf(ne, ne.length);
 856                     neNew[4] = handlerNumbering.get(k).substring(0, 1);
 857                     formatData.put(k + ".NumberElements", neNew);
 858                 });
 859         }
 860         return formatData;
 861     }
 862 
 863     private static void copyIfPresent(Map<String, Object> src, String key, Map<String, Object> dest) {
 864         Object value = src.get(key);
 865         if (value != null) {
 866             dest.put(key, value);
 867         }
 868     }
 869 
 870     // --- code below here is adapted from java.util.Properties ---
 871     private static final String specialSaveCharsJava = "\"";
 872     private static final String specialSaveCharsProperties = "=: \t\r\n\f#!";
 873 
 874     /*
 875      * Converts unicodes to encoded \uxxxx
 876      * and writes out any of the characters in specialSaveChars
 877      * with a preceding slash
 878      */
 879     static String saveConvert(String theString, boolean useJava) {


< prev index next >