make/src/classes/build/tools/cldrconverter/CLDRConverter.java
Print this page
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
@@ -30,10 +30,13 @@
import java.nio.file.DirectoryStream;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
+import java.util.ResourceBundle.Control;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
@@ -62,16 +65,26 @@
static final String CURRENCY_NAME_PREFIX = "currency.displayname.";
static final String CALENDAR_NAME_PREFIX = "calendarname.";
static final String TIMEZONE_ID_PREFIX = "timezone.id.";
static final String ZONE_NAME_PREFIX = "timezone.displayname.";
static final String METAZONE_ID_PREFIX = "metazone.id.";
+ static final String PARENT_LOCALE_PREFIX = "parentLocale.";
private static SupplementDataParseHandler handlerSuppl;
static NumberingSystemsParseHandler handlerNumbering;
static MetaZonesParseHandler handlerMetaZones;
private static BundleGenerator bundleGenerator;
+ // java.base module related
+ static boolean isBaseModule = false;
+ static final Set<Locale> BASE_LOCALES = new HashSet<>();
+
+ // "parentLocales" map
+ private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>();
+ private static final ResourceBundle.Control defCon =
+ ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
+
static enum DraftType {
UNCONFIRMED,
PROVISIONAL,
CONTRIBUTED,
APPROVED;
@@ -140,10 +153,20 @@
if (!CLDR_BASE.endsWith("/")) {
CLDR_BASE += "/";
}
break;
+ case "-baselocales":
+ // base locales
+ setupBaseLocales(args[++i]);
+ break;
+
+ case "-basemodule":
+ // indicates java.base module resource generation
+ isBaseModule = true;
+ break;
+
case "-o":
// output directory
DESTINATION_DIR = args[++i];
break;
@@ -177,12 +200,19 @@
SOURCE_FILE_DIR = CLDR_BASE + "common/main";
SPPL_SOURCE_FILE = CLDR_BASE + "common/supplemental/supplementalData.xml";
NUMBERING_SOURCE_FILE = CLDR_BASE + "common/supplemental/numberingSystems.xml";
METAZONES_SOURCE_FILE = CLDR_BASE + "common/supplemental/metaZones.xml";
+ if (BASE_LOCALES.isEmpty()) {
+ setupBaseLocales("en-US");
+ }
+
bundleGenerator = new ResourceBundleGenerator();
+ // Parse data independent of locales
+ parseSupplemental();
+
List<Bundle> bundles = readBundleList();
convertBundles(bundles);
}
private static void usage() {
@@ -190,10 +220,13 @@
+ "\t-help output this usage message and exit%n"
+ "\t-verbose output information%n"
+ "\t-draft [approved | provisional | unconfirmed]%n"
+ "\t\t draft level for using data (default: approved)%n"
+ "\t-base dir base directory for CLDR input files%n"
+ + "\t-basemodule generates bundles that go into java.base module%n"
+ + "\t-baselocales loc(,loc)* locales that go into the base module%n"
+ + "\t-o dir output directory (default: ./build/gensrc)%n"
+ "\t-o dir output directory (defaut: ./build/gensrc)%n"
+ "\t-utf8 use UTF-8 rather than \\uxxxx (for debug)%n");
}
static void info(String fmt, Object... args) {
@@ -246,20 +279,19 @@
// property requires >= JAXP 1.5
}
}
private static List<Bundle> readBundleList() throws Exception {
- ResourceBundle.Control defCon = ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
List<Bundle> retList = new ArrayList<>();
Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR);
try (DirectoryStream<Path> dirStr = Files.newDirectoryStream(path)) {
for (Path entry : dirStr) {
String fileName = entry.getFileName().toString();
if (fileName.endsWith(".xml")) {
String id = fileName.substring(0, fileName.indexOf('.'));
Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id));
- List<Locale> candList = defCon.getCandidateLocales("", cldrLoc);
+ List<Locale> candList = applyParentLocales("", defCon.getCandidateLocales("", cldrLoc));
StringBuilder sb = new StringBuilder();
for (Locale loc : candList) {
if (!loc.equals(Locale.ROOT)) {
sb.append(toLocaleName(loc.toLanguageTag()));
sb.append(",");
@@ -267,24 +299,24 @@
}
if (sb.indexOf("root") == -1) {
sb.append("root");
}
Bundle b = new Bundle(id, sb.toString(), null, null);
- // Insert the bundle for en at the top so that it will get
+ // Insert the bundle for root at the top so that it will get
// processed first.
- if ("en".equals(id)) {
+ if ("root".equals(id)) {
retList.add(0, b);
} else {
retList.add(b);
}
}
}
}
return retList;
}
- private static Map<String, Map<String, Object>> cldrBundles = new HashMap<>();
+ private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>();
static Map<String, Object> getCLDRBundle(String id) throws Exception {
Map<String, Object> bundle = cldrBundles.get(id);
if (bundle != null) {
return bundle;
@@ -317,73 +349,82 @@
}
}
return bundle;
}
- private static void convertBundles(List<Bundle> bundles) throws Exception {
+ // Parsers for data in "supplemental" directory
+ //
+ private static void parseSupplemental() throws Exception {
// Parse SupplementalData file and store the information in the HashMap
// Calendar information such as firstDay and minDay are stored in
// supplementalData.xml as of CLDR1.4. Individual territory is listed
// with its ISO 3166 country code while default is listed using UNM49
// region and composition numerical code (001 for World.)
+ //
+ // SupplementalData file also provides the "parent" locales which
+ // are othrwise not to be fallen back. Process them here as well.
+ //
+ info("..... Parsing supplementalData.xml .....");
SAXParserFactory factorySuppl = SAXParserFactory.newInstance();
factorySuppl.setValidating(true);
SAXParser parserSuppl = factorySuppl.newSAXParser();
enableFileAccess(parserSuppl);
handlerSuppl = new SupplementDataParseHandler();
File fileSupply = new File(SPPL_SOURCE_FILE);
parserSuppl.parse(fileSupply, handlerSuppl);
+ Map<String, Object> parentData = handlerSuppl.getData("root");
+ parentData.keySet().forEach(key -> {
+ parentLocalesMap.put(key, new TreeSet(
+ Arrays.asList(((String)parentData.get(key)).split(" "))));
+ });
// Parse numberingSystems to get digit zero character information.
+ info("..... Parsing numberingSystem.xml .....");
SAXParserFactory numberingParser = SAXParserFactory.newInstance();
numberingParser.setValidating(true);
SAXParser parserNumbering = numberingParser.newSAXParser();
enableFileAccess(parserNumbering);
handlerNumbering = new NumberingSystemsParseHandler();
File fileNumbering = new File(NUMBERING_SOURCE_FILE);
parserNumbering.parse(fileNumbering, handlerNumbering);
// Parse metaZones to create mappings between Olson tzids and CLDR meta zone names
+ info("..... Parsing metaZones.xml .....");
SAXParserFactory metazonesParser = SAXParserFactory.newInstance();
metazonesParser.setValidating(true);
SAXParser parserMetaZones = metazonesParser.newSAXParser();
enableFileAccess(parserMetaZones);
handlerMetaZones = new MetaZonesParseHandler();
File fileMetaZones = new File(METAZONES_SOURCE_FILE);
parserNumbering.parse(fileMetaZones, handlerMetaZones);
+ }
+ private static void convertBundles(List<Bundle> bundles) throws Exception {
// For generating information on supported locales.
Map<String, SortedSet<String>> metaInfo = new HashMap<>();
- metaInfo.put("LocaleNames", new TreeSet<String>());
- metaInfo.put("CurrencyNames", new TreeSet<String>());
- metaInfo.put("TimeZoneNames", new TreeSet<String>());
- metaInfo.put("CalendarData", new TreeSet<String>());
- metaInfo.put("FormatData", new TreeSet<String>());
+ metaInfo.put("LocaleNames", new TreeSet<>());
+ metaInfo.put("CurrencyNames", new TreeSet<>());
+ metaInfo.put("TimeZoneNames", new TreeSet<>());
+ metaInfo.put("CalendarData", new TreeSet<>());
+ metaInfo.put("FormatData", new TreeSet<>());
+ metaInfo.put("AvailableLocales", new TreeSet<>());
+
+ // parent locales map. The mappings are put in base metaInfo file
+ // for now.
+ if (isBaseModule) {
+ metaInfo.putAll(parentLocalesMap);
+ }
for (Bundle bundle : bundles) {
// Get the target map, which contains all the data that should be
// visible for the bundle's locale
Map<String, Object> targetMap = bundle.getTargetMap();
EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes();
- // Fill in any missing resources in the base bundle from en and en-US data.
- // This is because CLDR root.xml is supposed to be language neutral and doesn't
- // provide some resource data. Currently, the runtime assumes that there are all
- // resources though the parent resource bundle chain.
if (bundle.isRoot()) {
- Map<String, Object> enData = new HashMap<>();
- // Create a superset of en-US and en bundles data in order to
- // fill in any missing resources in the base bundle.
- enData.putAll(Bundle.getBundle("en").getTargetMap());
- enData.putAll(Bundle.getBundle("en_US").getTargetMap());
- for (String key : enData.keySet()) {
- if (!targetMap.containsKey(key)) {
- targetMap.put(key, enData.get(key));
- }
- }
// Add DateTimePatternChars because CLDR no longer supports localized patterns.
targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ");
}
// Now the map contains just the entries that need to be in the resources bundles.
@@ -416,64 +457,60 @@
bundleGenerator.generateBundle("util", "CalendarData", bundle.getID(), true, calendarDataMap, BundleType.PLAIN);
}
}
if (bundleTypes.contains(Bundle.Type.FORMATDATA)) {
Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID());
- // LocaleData.getAvailableLocales depends on having FormatData bundles around
if (!formatDataMap.isEmpty() || bundle.isRoot()) {
metaInfo.get("FormatData").add(toLanguageTag(bundle.getID()));
bundleGenerator.generateBundle("text", "FormatData", bundle.getID(), true, formatDataMap, BundleType.PLAIN);
}
}
- // For testing
- SortedSet<String> allLocales = new TreeSet<>();
- allLocales.addAll(metaInfo.get("CurrencyNames"));
- allLocales.addAll(metaInfo.get("LocaleNames"));
- allLocales.addAll(metaInfo.get("CalendarData"));
- allLocales.addAll(metaInfo.get("FormatData"));
- metaInfo.put("AvailableLocales", allLocales);
+ // For AvailableLocales
+ metaInfo.get("AvailableLocales").add(toLanguageTag(bundle.getID()));
}
bundleGenerator.generateMetaInfo(metaInfo);
}
+ static final Map<String, String> aliases = new HashMap<>();
+
+ /**
+ * Translate the aliases into the real entries in the bundle map.
+ */
+ static void handleAliases(Map<String, Object> bundleMap) {
+ Set bundleKeys = bundleMap.keySet();
+ try {
+ for (String key : aliases.keySet()) {
+ String targetKey = aliases.get(key);
+ if (bundleKeys.contains(targetKey)) {
+ bundleMap.putIfAbsent(key, bundleMap.get(targetKey));
+ }
+ }
+ } catch (Exception ex) {
+ Logger.getLogger(CLDRConverter.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+
/*
* Returns the language portion of the given id.
* If id is "root", "" is returned.
*/
static String getLanguageCode(String id) {
- int index = id.indexOf('_');
- String lang = null;
- if (index != -1) {
- lang = id.substring(0, index);
- } else {
- lang = "root".equals(id) ? "" : id;
- }
- return lang;
+ return "root".equals(id) ? "" : Locale.forLanguageTag(id.replaceAll("_", "-")).getLanguage();
}
/**
* Examine if the id includes the country (territory) code. If it does, it returns
* the country code.
* Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it return "SG".
+ * For now, it does not return US M.49 code, e.g., '001', as those three digit numbers cannot
+ * be translated into package names.
*/
- private static String getCountryCode(String id) {
- //Truncate a variant code with '@' if there is any
- //(eg. de_DE@collation=phonebook,currency=DOM)
- if (id.indexOf('@') != -1) {
- id = id.substring(0, id.indexOf('@'));
- }
- String[] tokens = id.split("_");
- for (int index = 1; index < tokens.length; ++index) {
- if (tokens[index].length() == 2
- && Character.isLetter(tokens[index].charAt(0))
- && Character.isLetter(tokens[index].charAt(1))) {
- return tokens[index];
- }
- }
- return null;
+ static String getCountryCode(String id) {
+ String ctry = Locale.forLanguageTag(id.replaceAll("_", "-")).getCountry();
+ return ctry.length() == 2 ? ctry : null;
}
private static class KeyComparator implements Comparator<String> {
static KeyComparator INSTANCE = new KeyComparator();
@@ -596,39 +633,29 @@
};
private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) {
Map<String, Object> formatData = new LinkedHashMap<>();
for (CalendarType calendarType : CalendarType.values()) {
+ if (calendarType == CalendarType.GENERIC) {
+ continue;
+ }
String prefix = calendarType.keyElementName();
for (String element : FORMAT_DATA_ELEMENTS) {
String key = prefix + element;
copyIfPresent(map, "java.time." + key, formatData);
copyIfPresent(map, key, formatData);
}
}
- // Workaround for islamic-umalqura name support (JDK-8015986)
- switch (id) {
- case "ar":
- map.put(CLDRConverter.CALENDAR_NAME_PREFIX
- + CalendarType.ISLAMIC_UMALQURA.lname(),
- // derived from CLDR 24 draft
- "\u0627\u0644\u062a\u0642\u0648\u064a\u0645 "
- +"\u0627\u0644\u0625\u0633\u0644\u0627\u0645\u064a "
- +"[\u0623\u0645 \u0627\u0644\u0642\u0631\u0649]");
- break;
- case "en":
- map.put(CLDRConverter.CALENDAR_NAME_PREFIX
- + CalendarType.ISLAMIC_UMALQURA.lname(),
- // derived from CLDR 24 draft
- "Islamic Calendar [Umm al-Qura]");
- break;
- }
- // Copy available calendar names
+
for (String key : map.keySet()) {
+ // Copy available calendar names
if (key.startsWith(CLDRConverter.CALENDAR_NAME_PREFIX)) {
String type = key.substring(CLDRConverter.CALENDAR_NAME_PREFIX.length());
for (CalendarType calendarType : CalendarType.values()) {
+ if (calendarType == CalendarType.GENERIC) {
+ continue;
+ }
if (type.equals(calendarType.lname())) {
Object value = map.get(key);
formatData.put(key, value);
String ukey = CLDRConverter.CALENDAR_NAME_PREFIX + calendarType.uname();
if (!key.equals(ukey)) {
@@ -743,6 +770,45 @@
if (tag.indexOf('-') == -1) {
return tag;
}
return tag.replaceAll("-", "_");
}
+
+ private static void setupBaseLocales(String localeList) {
+ Arrays.stream(localeList.split(","))
+ .map(Locale::forLanguageTag)
+ .map(l -> Control.getControl(Control.FORMAT_DEFAULT)
+ .getCandidateLocales("", l))
+ .forEach(BASE_LOCALES::addAll);
+ }
+
+ // applying parent locale rules to the passed candidates list
+ // This has to match with the one in sun.util.cldr.CLDRLocaleProviderAdapter
+ private static Map<Locale, Locale> childToParentLocaleMap = null;
+ private static List<Locale> applyParentLocales(String baseName, List<Locale> candidates) {
+ if (Objects.isNull(childToParentLocaleMap)) {
+ childToParentLocaleMap = new HashMap<>();
+ parentLocalesMap.keySet().forEach(key -> {
+ String parent = key.substring(PARENT_LOCALE_PREFIX.length()).replaceAll("_", "-");
+ parentLocalesMap.get(key).stream().forEach(child -> {
+ childToParentLocaleMap.put(Locale.forLanguageTag(child),
+ "root".equals(parent) ? Locale.ROOT : Locale.forLanguageTag(parent));
+ });
+ });
+ }
+
+ // check irregular parents
+ for (int i = 0; i < candidates.size(); i++) {
+ Locale l = candidates.get(i);
+ Locale p = childToParentLocaleMap.get(l);
+ if (!l.equals(Locale.ROOT) &&
+ Objects.nonNull(p) &&
+ !candidates.get(i+1).equals(p)) {
+ List<Locale> applied = candidates.subList(0, i+1);
+ applied.addAll(applyParentLocales(baseName, defCon.getCandidateLocales(baseName, p)));
+ return applied;
+ }
+ }
+
+ return candidates;
+ }
}