make/src/classes/build/tools/cldrconverter/CLDRConverter.java
Print this page
*** 1,7 ****
/*
! * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
--- 1,7 ----
/*
! * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
*** 30,39 ****
--- 30,42 ----
import java.nio.file.DirectoryStream;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
+ import java.util.ResourceBundle.Control;
+ import java.util.logging.Level;
+ import java.util.logging.Logger;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
*** 62,77 ****
--- 65,90 ----
static final String CURRENCY_NAME_PREFIX = "currency.displayname.";
static final String CALENDAR_NAME_PREFIX = "calendarname.";
static final String TIMEZONE_ID_PREFIX = "timezone.id.";
static final String ZONE_NAME_PREFIX = "timezone.displayname.";
static final String METAZONE_ID_PREFIX = "metazone.id.";
+ static final String PARENT_LOCALE_PREFIX = "parentLocale.";
private static SupplementDataParseHandler handlerSuppl;
static NumberingSystemsParseHandler handlerNumbering;
static MetaZonesParseHandler handlerMetaZones;
private static BundleGenerator bundleGenerator;
+ // java.base module related
+ static boolean isBaseModule = false;
+ static final Set<Locale> BASE_LOCALES = new HashSet<>();
+
+ // "parentLocales" map
+ private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>();
+ private static final ResourceBundle.Control defCon =
+ ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
+
static enum DraftType {
UNCONFIRMED,
PROVISIONAL,
CONTRIBUTED,
APPROVED;
*** 140,149 ****
--- 153,172 ----
if (!CLDR_BASE.endsWith("/")) {
CLDR_BASE += "/";
}
break;
+ case "-baselocales":
+ // base locales
+ setupBaseLocales(args[++i]);
+ break;
+
+ case "-basemodule":
+ // indicates java.base module resource generation
+ isBaseModule = true;
+ break;
+
case "-o":
// output directory
DESTINATION_DIR = args[++i];
break;
*** 177,188 ****
--- 200,218 ----
SOURCE_FILE_DIR = CLDR_BASE + "common/main";
SPPL_SOURCE_FILE = CLDR_BASE + "common/supplemental/supplementalData.xml";
NUMBERING_SOURCE_FILE = CLDR_BASE + "common/supplemental/numberingSystems.xml";
METAZONES_SOURCE_FILE = CLDR_BASE + "common/supplemental/metaZones.xml";
+ if (BASE_LOCALES.isEmpty()) {
+ setupBaseLocales("en-US");
+ }
+
bundleGenerator = new ResourceBundleGenerator();
+ // Parse data independent of locales
+ parseSupplemental();
+
List<Bundle> bundles = readBundleList();
convertBundles(bundles);
}
private static void usage() {
*** 190,199 ****
--- 220,232 ----
+ "\t-help output this usage message and exit%n"
+ "\t-verbose output information%n"
+ "\t-draft [approved | provisional | unconfirmed]%n"
+ "\t\t draft level for using data (default: approved)%n"
+ "\t-base dir base directory for CLDR input files%n"
+ + "\t-basemodule generates bundles that go into java.base module%n"
+ + "\t-baselocales loc(,loc)* locales that go into the base module%n"
+ + "\t-o dir output directory (default: ./build/gensrc)%n"
+ "\t-o dir output directory (defaut: ./build/gensrc)%n"
+ "\t-utf8 use UTF-8 rather than \\uxxxx (for debug)%n");
}
static void info(String fmt, Object... args) {
*** 246,265 ****
// property requires >= JAXP 1.5
}
}
private static List<Bundle> readBundleList() throws Exception {
- ResourceBundle.Control defCon = ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
List<Bundle> retList = new ArrayList<>();
Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR);
try (DirectoryStream<Path> dirStr = Files.newDirectoryStream(path)) {
for (Path entry : dirStr) {
String fileName = entry.getFileName().toString();
if (fileName.endsWith(".xml")) {
String id = fileName.substring(0, fileName.indexOf('.'));
Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id));
! List<Locale> candList = defCon.getCandidateLocales("", cldrLoc);
StringBuilder sb = new StringBuilder();
for (Locale loc : candList) {
if (!loc.equals(Locale.ROOT)) {
sb.append(toLocaleName(loc.toLanguageTag()));
sb.append(",");
--- 279,297 ----
// property requires >= JAXP 1.5
}
}
private static List<Bundle> readBundleList() throws Exception {
List<Bundle> retList = new ArrayList<>();
Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR);
try (DirectoryStream<Path> dirStr = Files.newDirectoryStream(path)) {
for (Path entry : dirStr) {
String fileName = entry.getFileName().toString();
if (fileName.endsWith(".xml")) {
String id = fileName.substring(0, fileName.indexOf('.'));
Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id));
! List<Locale> candList = applyParentLocales("", defCon.getCandidateLocales("", cldrLoc));
StringBuilder sb = new StringBuilder();
for (Locale loc : candList) {
if (!loc.equals(Locale.ROOT)) {
sb.append(toLocaleName(loc.toLanguageTag()));
sb.append(",");
*** 267,290 ****
}
if (sb.indexOf("root") == -1) {
sb.append("root");
}
Bundle b = new Bundle(id, sb.toString(), null, null);
! // Insert the bundle for en at the top so that it will get
// processed first.
! if ("en".equals(id)) {
retList.add(0, b);
} else {
retList.add(b);
}
}
}
}
return retList;
}
! private static Map<String, Map<String, Object>> cldrBundles = new HashMap<>();
static Map<String, Object> getCLDRBundle(String id) throws Exception {
Map<String, Object> bundle = cldrBundles.get(id);
if (bundle != null) {
return bundle;
--- 299,322 ----
}
if (sb.indexOf("root") == -1) {
sb.append("root");
}
Bundle b = new Bundle(id, sb.toString(), null, null);
! // Insert the bundle for root at the top so that it will get
// processed first.
! if ("root".equals(id)) {
retList.add(0, b);
} else {
retList.add(b);
}
}
}
}
return retList;
}
! private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>();
static Map<String, Object> getCLDRBundle(String id) throws Exception {
Map<String, Object> bundle = cldrBundles.get(id);
if (bundle != null) {
return bundle;
*** 317,389 ****
}
}
return bundle;
}
! private static void convertBundles(List<Bundle> bundles) throws Exception {
// Parse SupplementalData file and store the information in the HashMap
// Calendar information such as firstDay and minDay are stored in
// supplementalData.xml as of CLDR1.4. Individual territory is listed
// with its ISO 3166 country code while default is listed using UNM49
// region and composition numerical code (001 for World.)
SAXParserFactory factorySuppl = SAXParserFactory.newInstance();
factorySuppl.setValidating(true);
SAXParser parserSuppl = factorySuppl.newSAXParser();
enableFileAccess(parserSuppl);
handlerSuppl = new SupplementDataParseHandler();
File fileSupply = new File(SPPL_SOURCE_FILE);
parserSuppl.parse(fileSupply, handlerSuppl);
// Parse numberingSystems to get digit zero character information.
SAXParserFactory numberingParser = SAXParserFactory.newInstance();
numberingParser.setValidating(true);
SAXParser parserNumbering = numberingParser.newSAXParser();
enableFileAccess(parserNumbering);
handlerNumbering = new NumberingSystemsParseHandler();
File fileNumbering = new File(NUMBERING_SOURCE_FILE);
parserNumbering.parse(fileNumbering, handlerNumbering);
// Parse metaZones to create mappings between Olson tzids and CLDR meta zone names
SAXParserFactory metazonesParser = SAXParserFactory.newInstance();
metazonesParser.setValidating(true);
SAXParser parserMetaZones = metazonesParser.newSAXParser();
enableFileAccess(parserMetaZones);
handlerMetaZones = new MetaZonesParseHandler();
File fileMetaZones = new File(METAZONES_SOURCE_FILE);
parserNumbering.parse(fileMetaZones, handlerMetaZones);
// For generating information on supported locales.
Map<String, SortedSet<String>> metaInfo = new HashMap<>();
! metaInfo.put("LocaleNames", new TreeSet<String>());
! metaInfo.put("CurrencyNames", new TreeSet<String>());
! metaInfo.put("TimeZoneNames", new TreeSet<String>());
! metaInfo.put("CalendarData", new TreeSet<String>());
! metaInfo.put("FormatData", new TreeSet<String>());
for (Bundle bundle : bundles) {
// Get the target map, which contains all the data that should be
// visible for the bundle's locale
Map<String, Object> targetMap = bundle.getTargetMap();
EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes();
- // Fill in any missing resources in the base bundle from en and en-US data.
- // This is because CLDR root.xml is supposed to be language neutral and doesn't
- // provide some resource data. Currently, the runtime assumes that there are all
- // resources though the parent resource bundle chain.
if (bundle.isRoot()) {
- Map<String, Object> enData = new HashMap<>();
- // Create a superset of en-US and en bundles data in order to
- // fill in any missing resources in the base bundle.
- enData.putAll(Bundle.getBundle("en").getTargetMap());
- enData.putAll(Bundle.getBundle("en_US").getTargetMap());
- for (String key : enData.keySet()) {
- if (!targetMap.containsKey(key)) {
- targetMap.put(key, enData.get(key));
- }
- }
// Add DateTimePatternChars because CLDR no longer supports localized patterns.
targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ");
}
// Now the map contains just the entries that need to be in the resources bundles.
--- 349,430 ----
}
}
return bundle;
}
! // Parsers for data in "supplemental" directory
! //
! private static void parseSupplemental() throws Exception {
// Parse SupplementalData file and store the information in the HashMap
// Calendar information such as firstDay and minDay are stored in
// supplementalData.xml as of CLDR1.4. Individual territory is listed
// with its ISO 3166 country code while default is listed using UNM49
// region and composition numerical code (001 for World.)
+ //
+ // SupplementalData file also provides the "parent" locales which
+ // are othrwise not to be fallen back. Process them here as well.
+ //
+ info("..... Parsing supplementalData.xml .....");
SAXParserFactory factorySuppl = SAXParserFactory.newInstance();
factorySuppl.setValidating(true);
SAXParser parserSuppl = factorySuppl.newSAXParser();
enableFileAccess(parserSuppl);
handlerSuppl = new SupplementDataParseHandler();
File fileSupply = new File(SPPL_SOURCE_FILE);
parserSuppl.parse(fileSupply, handlerSuppl);
+ Map<String, Object> parentData = handlerSuppl.getData("root");
+ parentData.keySet().forEach(key -> {
+ parentLocalesMap.put(key, new TreeSet(
+ Arrays.asList(((String)parentData.get(key)).split(" "))));
+ });
// Parse numberingSystems to get digit zero character information.
+ info("..... Parsing numberingSystem.xml .....");
SAXParserFactory numberingParser = SAXParserFactory.newInstance();
numberingParser.setValidating(true);
SAXParser parserNumbering = numberingParser.newSAXParser();
enableFileAccess(parserNumbering);
handlerNumbering = new NumberingSystemsParseHandler();
File fileNumbering = new File(NUMBERING_SOURCE_FILE);
parserNumbering.parse(fileNumbering, handlerNumbering);
// Parse metaZones to create mappings between Olson tzids and CLDR meta zone names
+ info("..... Parsing metaZones.xml .....");
SAXParserFactory metazonesParser = SAXParserFactory.newInstance();
metazonesParser.setValidating(true);
SAXParser parserMetaZones = metazonesParser.newSAXParser();
enableFileAccess(parserMetaZones);
handlerMetaZones = new MetaZonesParseHandler();
File fileMetaZones = new File(METAZONES_SOURCE_FILE);
parserNumbering.parse(fileMetaZones, handlerMetaZones);
+ }
+ private static void convertBundles(List<Bundle> bundles) throws Exception {
// For generating information on supported locales.
Map<String, SortedSet<String>> metaInfo = new HashMap<>();
! metaInfo.put("LocaleNames", new TreeSet<>());
! metaInfo.put("CurrencyNames", new TreeSet<>());
! metaInfo.put("TimeZoneNames", new TreeSet<>());
! metaInfo.put("CalendarData", new TreeSet<>());
! metaInfo.put("FormatData", new TreeSet<>());
! metaInfo.put("AvailableLocales", new TreeSet<>());
!
! // parent locales map. The mappings are put in base metaInfo file
! // for now.
! if (isBaseModule) {
! metaInfo.putAll(parentLocalesMap);
! }
for (Bundle bundle : bundles) {
// Get the target map, which contains all the data that should be
// visible for the bundle's locale
Map<String, Object> targetMap = bundle.getTargetMap();
EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes();
if (bundle.isRoot()) {
// Add DateTimePatternChars because CLDR no longer supports localized patterns.
targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ");
}
// Now the map contains just the entries that need to be in the resources bundles.
*** 416,479 ****
bundleGenerator.generateBundle("util", "CalendarData", bundle.getID(), true, calendarDataMap, BundleType.PLAIN);
}
}
if (bundleTypes.contains(Bundle.Type.FORMATDATA)) {
Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID());
- // LocaleData.getAvailableLocales depends on having FormatData bundles around
if (!formatDataMap.isEmpty() || bundle.isRoot()) {
metaInfo.get("FormatData").add(toLanguageTag(bundle.getID()));
bundleGenerator.generateBundle("text", "FormatData", bundle.getID(), true, formatDataMap, BundleType.PLAIN);
}
}
! // For testing
! SortedSet<String> allLocales = new TreeSet<>();
! allLocales.addAll(metaInfo.get("CurrencyNames"));
! allLocales.addAll(metaInfo.get("LocaleNames"));
! allLocales.addAll(metaInfo.get("CalendarData"));
! allLocales.addAll(metaInfo.get("FormatData"));
! metaInfo.put("AvailableLocales", allLocales);
}
bundleGenerator.generateMetaInfo(metaInfo);
}
/*
* Returns the language portion of the given id.
* If id is "root", "" is returned.
*/
static String getLanguageCode(String id) {
! int index = id.indexOf('_');
! String lang = null;
! if (index != -1) {
! lang = id.substring(0, index);
! } else {
! lang = "root".equals(id) ? "" : id;
! }
! return lang;
}
/**
* Examine if the id includes the country (territory) code. If it does, it returns
* the country code.
* Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG".
*/
! private static String getCountryCode(String id) {
! //Truncate a variant code with '@' if there is any
! //(eg. de_DE@collation=phonebook,currency=DOM)
! if (id.indexOf('@') != -1) {
! id = id.substring(0, id.indexOf('@'));
! }
! String[] tokens = id.split("_");
! for (int index = 1; index < tokens.length; ++index) {
! if (tokens[index].length() == 2
! && Character.isLetter(tokens[index].charAt(0))
! && Character.isLetter(tokens[index].charAt(1))) {
! return tokens[index];
! }
! }
! return null;
}
private static class KeyComparator implements Comparator<String> {
static KeyComparator INSTANCE = new KeyComparator();
--- 457,516 ----
bundleGenerator.generateBundle("util", "CalendarData", bundle.getID(), true, calendarDataMap, BundleType.PLAIN);
}
}
if (bundleTypes.contains(Bundle.Type.FORMATDATA)) {
Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID());
if (!formatDataMap.isEmpty() || bundle.isRoot()) {
metaInfo.get("FormatData").add(toLanguageTag(bundle.getID()));
bundleGenerator.generateBundle("text", "FormatData", bundle.getID(), true, formatDataMap, BundleType.PLAIN);
}
}
! // For AvailableLocales
! metaInfo.get("AvailableLocales").add(toLanguageTag(bundle.getID()));
}
bundleGenerator.generateMetaInfo(metaInfo);
}
+ static final Map<String, String> aliases = new HashMap<>();
+
+ /**
+ * Translate the aliases into the real entries in the bundle map.
+ */
+ static void handleAliases(Map<String, Object> bundleMap) {
+ Set bundleKeys = bundleMap.keySet();
+ try {
+ for (String key : aliases.keySet()) {
+ String targetKey = aliases.get(key);
+ if (bundleKeys.contains(targetKey)) {
+ bundleMap.putIfAbsent(key, bundleMap.get(targetKey));
+ }
+ }
+ } catch (Exception ex) {
+ Logger.getLogger(CLDRConverter.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+
/*
* Returns the language portion of the given id.
* If id is "root", "" is returned.
*/
static String getLanguageCode(String id) {
! return "root".equals(id) ? "" : Locale.forLanguageTag(id.replaceAll("_", "-")).getLanguage();
}
/**
* Examine if the id includes the country (territory) code. If it does, it returns
* the country code.
* Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG".
+ * For now, it does not return US M.49 code, e.g., '001', as those three digit numbers cannot
+ * be translated into package names.
*/
! static String getCountryCode(String id) {
! String ctry = Locale.forLanguageTag(id.replaceAll("_", "-")).getCountry();
! return ctry.length() == 2 ? ctry : null;
}
private static class KeyComparator implements Comparator<String> {
static KeyComparator INSTANCE = new KeyComparator();
*** 596,634 ****
};
private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) {
Map<String, Object> formatData = new LinkedHashMap<>();
for (CalendarType calendarType : CalendarType.values()) {
String prefix = calendarType.keyElementName();
for (String element : FORMAT_DATA_ELEMENTS) {
String key = prefix + element;
copyIfPresent(map, "java.time." + key, formatData);
copyIfPresent(map, key, formatData);
}
}
! // Workaround for islamic-umalqura name support (JDK-8015986)
! switch (id) {
! case "ar":
! map.put(CLDRConverter.CALENDAR_NAME_PREFIX
! + CalendarType.ISLAMIC_UMALQURA.lname(),
! // derived from CLDR 24 draft
! "\u0627\u0644\u062a\u0642\u0648\u064a\u0645 "
! +"\u0627\u0644\u0625\u0633\u0644\u0627\u0645\u064a "
! +"[\u0623\u0645 \u0627\u0644\u0642\u0631\u0649]");
! break;
! case "en":
! map.put(CLDRConverter.CALENDAR_NAME_PREFIX
! + CalendarType.ISLAMIC_UMALQURA.lname(),
! // derived from CLDR 24 draft
! "Islamic Calendar [Umm al-Qura]");
! break;
! }
! // Copy available calendar names
for (String key : map.keySet()) {
if (key.startsWith(CLDRConverter.CALENDAR_NAME_PREFIX)) {
String type = key.substring(CLDRConverter.CALENDAR_NAME_PREFIX.length());
for (CalendarType calendarType : CalendarType.values()) {
if (type.equals(calendarType.lname())) {
Object value = map.get(key);
formatData.put(key, value);
String ukey = CLDRConverter.CALENDAR_NAME_PREFIX + calendarType.uname();
if (!key.equals(ukey)) {
--- 633,661 ----
};
private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) {
Map<String, Object> formatData = new LinkedHashMap<>();
for (CalendarType calendarType : CalendarType.values()) {
+ if (calendarType == CalendarType.GENERIC) {
+ continue;
+ }
String prefix = calendarType.keyElementName();
for (String element : FORMAT_DATA_ELEMENTS) {
String key = prefix + element;
copyIfPresent(map, "java.time." + key, formatData);
copyIfPresent(map, key, formatData);
}
}
!
for (String key : map.keySet()) {
+ // Copy available calendar names
if (key.startsWith(CLDRConverter.CALENDAR_NAME_PREFIX)) {
String type = key.substring(CLDRConverter.CALENDAR_NAME_PREFIX.length());
for (CalendarType calendarType : CalendarType.values()) {
+ if (calendarType == CalendarType.GENERIC) {
+ continue;
+ }
if (type.equals(calendarType.lname())) {
Object value = map.get(key);
formatData.put(key, value);
String ukey = CLDRConverter.CALENDAR_NAME_PREFIX + calendarType.uname();
if (!key.equals(ukey)) {
*** 743,748 ****
--- 770,814 ----
if (tag.indexOf('-') == -1) {
return tag;
}
return tag.replaceAll("-", "_");
}
+
+ private static void setupBaseLocales(String localeList) {
+ Arrays.stream(localeList.split(","))
+ .map(Locale::forLanguageTag)
+ .map(l -> Control.getControl(Control.FORMAT_DEFAULT)
+ .getCandidateLocales("", l))
+ .forEach(BASE_LOCALES::addAll);
+ }
+
+ // applying parent locale rules to the passed candidates list
+ // This has to match with the one in sun.util.cldr.CLDRLocaleProviderAdapter
+ private static Map<Locale, Locale> childToParentLocaleMap = null;
+ private static List<Locale> applyParentLocales(String baseName, List<Locale> candidates) {
+ if (Objects.isNull(childToParentLocaleMap)) {
+ childToParentLocaleMap = new HashMap<>();
+ parentLocalesMap.keySet().forEach(key -> {
+ String parent = key.substring(PARENT_LOCALE_PREFIX.length()).replaceAll("_", "-");
+ parentLocalesMap.get(key).stream().forEach(child -> {
+ childToParentLocaleMap.put(Locale.forLanguageTag(child),
+ "root".equals(parent) ? Locale.ROOT : Locale.forLanguageTag(parent));
+ });
+ });
+ }
+
+ // check irregular parents
+ for (int i = 0; i < candidates.size(); i++) {
+ Locale l = candidates.get(i);
+ Locale p = childToParentLocaleMap.get(l);
+ if (!l.equals(Locale.ROOT) &&
+ Objects.nonNull(p) &&
+ !candidates.get(i+1).equals(p)) {
+ List<Locale> applied = candidates.subList(0, i+1);
+ applied.addAll(applyParentLocales(baseName, defCon.getCandidateLocales(baseName, p)));
+ return applied;
+ }
+ }
+
+ return candidates;
+ }
}