35 import java.util.*;
36 import java.util.ResourceBundle.Control;
37 import java.util.logging.Level;
38 import java.util.logging.Logger;
39 import java.util.stream.Collectors;
40 import javax.xml.parsers.SAXParser;
41 import javax.xml.parsers.SAXParserFactory;
42 import org.xml.sax.SAXNotRecognizedException;
43 import org.xml.sax.SAXNotSupportedException;
44
45
46 /**
47 * Converts locale data from "Locale Data Markup Language" format to
48 * JRE resource bundle format. LDML is the format used by the Common
49 * Locale Data Repository maintained by the Unicode Consortium.
50 */
51 public class CLDRConverter {
52
53 static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd";
54 static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd";
55
56 private static String CLDR_BASE = "../CLDR/21.0.1/";
57 static String LOCAL_LDML_DTD;
58 static String LOCAL_SPPL_LDML_DTD;
59 private static String SOURCE_FILE_DIR;
60 private static String SPPL_SOURCE_FILE;
61 private static String NUMBERING_SOURCE_FILE;
62 private static String METAZONES_SOURCE_FILE;
63 private static String LIKELYSUBTAGS_SOURCE_FILE;
64 static String DESTINATION_DIR = "build/gensrc";
65
66 static final String LOCALE_NAME_PREFIX = "locale.displayname.";
67 static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol.";
68 static final String CURRENCY_NAME_PREFIX = "currency.displayname.";
69 static final String CALENDAR_NAME_PREFIX = "calendarname.";
70 static final String TIMEZONE_ID_PREFIX = "timezone.id.";
71 static final String ZONE_NAME_PREFIX = "timezone.displayname.";
72 static final String METAZONE_ID_PREFIX = "metazone.id.";
73 static final String PARENT_LOCALE_PREFIX = "parentLocale.";
74
75 private static SupplementDataParseHandler handlerSuppl;
76 private static LikelySubtagsParseHandler handlerLikelySubtags;
77 static NumberingSystemsParseHandler handlerNumbering;
78 static MetaZonesParseHandler handlerMetaZones;
79 private static BundleGenerator bundleGenerator;
80
81 // java.base module related
82 static boolean isBaseModule = false;
83 static final Set<Locale> BASE_LOCALES = new HashSet<>();
84
85 // "parentLocales" map
86 private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>();
87 private static final ResourceBundle.Control defCon =
88 ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
89
90 static enum DraftType {
91 UNCONFIRMED,
92 PROVISIONAL,
93 CONTRIBUTED,
94 APPROVED;
95
96 private static final Map<String, DraftType> map = new HashMap<>();
97 static {
98 for (DraftType dt : values()) {
184
185 case "-help":
186 usage();
187 System.exit(0);
188 break;
189
190 default:
191 throw new RuntimeException();
192 }
193 }
194 } catch (RuntimeException e) {
195 severe("unknown or imcomplete arg(s): " + currentArg);
196 usage();
197 System.exit(1);
198 }
199 }
200
201 // Set up path names
202 LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd";
203 LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd";
204 SOURCE_FILE_DIR = CLDR_BASE + "/main";
205 SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml";
206 LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml";
207 NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml";
208 METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml";
209
210 if (BASE_LOCALES.isEmpty()) {
211 setupBaseLocales("en-US");
212 }
213
214 bundleGenerator = new ResourceBundleGenerator();
215
216 // Parse data independent of locales
217 parseSupplemental();
218
219 List<Bundle> bundles = readBundleList();
220 convertBundles(bundles);
221 convertBundles(addedBundles);
222 }
223
224 private static void usage() {
225 errout("Usage: java CLDRConverter [options]%n"
226 + "\t-help output this usage message and exit%n"
227 + "\t-verbose output information%n"
228 + "\t-draft [contributed | approved | provisional | unconfirmed]%n"
229 + "\t\t draft level for using data (default: contributed)%n"
230 + "\t-base dir base directory for CLDR input files%n"
231 + "\t-basemodule generates bundles that go into java.base module%n"
232 + "\t-baselocales loc(,loc)* locales that go into the base module%n"
233 + "\t-o dir output directory (default: ./build/gensrc)%n"
234 + "\t-o dir output directory (defaut: ./build/gensrc)%n"
235 + "\t-utf8 use UTF-8 rather than \\uxxxx (for debug)%n");
236 }
237
304 // processed first.
305 if ("root".equals(id)) {
306 retList.add(0, b);
307 } else {
308 retList.add(b);
309 }
310 }
311 }
312 }
313 return retList;
314 }
315
316 private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>();
317 // this list will contain additional bundles to be generated for Region dependent Data.
318 private static List<Bundle> addedBundles = new ArrayList<>();
319
320 private static Map<String, SortedSet<String>> metaInfo = new HashMap<>();
321
322 static {
323 // For generating information on supported locales.
324 metaInfo.put("LocaleNames", new TreeSet<>());
325 metaInfo.put("CurrencyNames", new TreeSet<>());
326 metaInfo.put("TimeZoneNames", new TreeSet<>());
327 metaInfo.put("CalendarData", new TreeSet<>());
328 metaInfo.put("FormatData", new TreeSet<>());
329 metaInfo.put("AvailableLocales", new TreeSet<>());
330 }
331
332
333 private static Set<String> calendarDataFields = Set.of("firstDayOfWeek", "minimalDaysInFirstWeek");
334
335 static Map<String, Object> getCLDRBundle(String id) throws Exception {
336 Map<String, Object> bundle = cldrBundles.get(id);
337 if (bundle != null) {
338 return bundle;
339 }
340 SAXParserFactory factory = SAXParserFactory.newInstance();
341 factory.setValidating(true);
342 SAXParser parser = factory.newSAXParser();
343 enableFileAccess(parser);
344 LDMLParseHandler handler = new LDMLParseHandler(id);
345 File file = new File(SOURCE_FILE_DIR + File.separator + id + ".xml");
346 if (!file.exists()) {
347 // Skip if the file doesn't exist.
348 return Collections.emptyMap();
349 }
350
351 info("..... main directory .....");
352 info("Reading file " + file);
353 parser.parse(file, handler);
354
355 bundle = handler.getData();
356 cldrBundles.put(id, bundle);
357 String country = getCountryCode(id);
358 if (country != null) {
359 bundle = handlerSuppl.getData(country);
360 if (bundle != null) {
361 //merge two maps into one map
362 Map<String, Object> temp = cldrBundles.remove(id);
363 bundle.putAll(temp);
364 cldrBundles.put(id, bundle);
365 }
366 }
367 return bundle;
368 }
369
370 // Parsers for data in "supplemental" directory
371 //
372 private static void parseSupplemental() throws Exception {
373 // Parse SupplementalData file and store the information in the HashMap
374 // Calendar information such as firstDay and minDay are stored in
375 // supplementalData.xml as of CLDR1.4. Individual territory is listed
376 // with its ISO 3166 country code while default is listed using UNM49
377 // region and composition numerical code (001 for World.)
378 //
379 // SupplementalData file also provides the "parent" locales which
380 // are othrwise not to be fallen back. Process them here as well.
381 //
382 info("..... Parsing supplementalData.xml .....");
383 SAXParserFactory factorySuppl = SAXParserFactory.newInstance();
384 factorySuppl.setValidating(true);
385 SAXParser parserSuppl = factorySuppl.newSAXParser();
386 enableFileAccess(parserSuppl);
387 handlerSuppl = new SupplementDataParseHandler();
388 File fileSupply = new File(SPPL_SOURCE_FILE);
389 parserSuppl.parse(fileSupply, handlerSuppl);
390 Map<String, Object> parentData = handlerSuppl.getData("root");
391 parentData.keySet().forEach(key -> {
392 parentLocalesMap.put(key, new TreeSet(
393 Arrays.asList(((String)parentData.get(key)).split(" "))));
394 });
395
396 // Parse numberingSystems to get digit zero character information.
397 SAXParserFactory numberingParser = SAXParserFactory.newInstance();
398 numberingParser.setValidating(true);
399 SAXParser parserNumbering = numberingParser.newSAXParser();
400 enableFileAccess(parserNumbering);
401 handlerNumbering = new NumberingSystemsParseHandler();
402 File fileNumbering = new File(NUMBERING_SOURCE_FILE);
403 parserNumbering.parse(fileNumbering, handlerNumbering);
404
405 // Parse metaZones to create mappings between Olson tzids and CLDR meta zone names
406 info("..... Parsing metaZones.xml .....");
407 SAXParserFactory metazonesParser = SAXParserFactory.newInstance();
408 metazonesParser.setValidating(true);
409 SAXParser parserMetaZones = metazonesParser.newSAXParser();
410 enableFileAccess(parserMetaZones);
411 handlerMetaZones = new MetaZonesParseHandler();
412 File fileMetaZones = new File(METAZONES_SOURCE_FILE);
413 parserMetaZones.parse(fileMetaZones, handlerMetaZones);
414
415 // Parse likelySubtags
416 info("..... Parsing likelySubtags.xml .....");
417 SAXParserFactory likelySubtagsParser = SAXParserFactory.newInstance();
418 likelySubtagsParser.setValidating(true);
419 SAXParser parserLikelySubtags = likelySubtagsParser.newSAXParser();
420 enableFileAccess(parserLikelySubtags);
421 handlerLikelySubtags = new LikelySubtagsParseHandler();
422 File fileLikelySubtags = new File(LIKELYSUBTAGS_SOURCE_FILE);
423 parserLikelySubtags.parse(fileLikelySubtags, handlerLikelySubtags);
424 }
425
426 /**
427 * This method will check if a new region dependent Bundle needs to be
428 * generated for this Locale id and targetMap. New Bundle will be generated
429 * when Locale id has non empty script and country code and targetMap
430 * contains region dependent data. This method will also remove region
431 * dependent data from this targetMap after candidate locales check. E.g. It
432 * will call genRegionDependentBundle() in case of az_Latn_AZ locale and
433 * remove region dependent data from this targetMap so that az_Latn_AZ
434 * bundle will not be created. For az_Cyrl_AZ, new Bundle will be generated
435 * but region dependent data will not be removed from targetMap as its candidate
436 * locales are [az_Cyrl_AZ, az_Cyrl, root], which does not include az_AZ for
437 * fallback.
438 *
439 */
440
441 private static void checkRegionDependentBundle(Map<String, Object> targetMap, String id) {
442 if ((CLDRConverter.getScript(id) != "")
443 && (CLDRConverter.getCountryCode(id) != "")) {
511 for (Bundle bundle : bundles) {
512 // Get the target map, which contains all the data that should be
513 // visible for the bundle's locale
514
515 Map<String, Object> targetMap = bundle.getTargetMap();
516
517 // check if new region DependentBundle needs to be generated for this Locale.
518 checkRegionDependentBundle(targetMap, bundle.getID());
519 EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes();
520
521 if (bundle.isRoot()) {
522 // Add DateTimePatternChars because CLDR no longer supports localized patterns.
523 targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ");
524 }
525
526 // Now the map contains just the entries that need to be in the resources bundles.
527 // Go ahead and generate them.
528 if (bundleTypes.contains(Bundle.Type.LOCALENAMES)) {
529 Map<String, Object> localeNamesMap = extractLocaleNames(targetMap, bundle.getID());
530 if (!localeNamesMap.isEmpty() || bundle.isRoot()) {
531 metaInfo.get("LocaleNames").add(toLanguageTag(bundle.getID()));
532 addLikelySubtags(metaInfo, "LocaleNames", bundle.getID());
533 bundleGenerator.generateBundle("util", "LocaleNames", bundle.getJavaID(), true, localeNamesMap, BundleType.OPEN);
534 }
535 }
536 if (bundleTypes.contains(Bundle.Type.CURRENCYNAMES)) {
537 Map<String, Object> currencyNamesMap = extractCurrencyNames(targetMap, bundle.getID(), bundle.getCurrencies());
538 if (!currencyNamesMap.isEmpty() || bundle.isRoot()) {
539 metaInfo.get("CurrencyNames").add(toLanguageTag(bundle.getID()));
540 addLikelySubtags(metaInfo, "CurrencyNames", bundle.getID());
541 bundleGenerator.generateBundle("util", "CurrencyNames", bundle.getJavaID(), true, currencyNamesMap, BundleType.OPEN);
542 }
543 }
544 if (bundleTypes.contains(Bundle.Type.TIMEZONENAMES)) {
545 Map<String, Object> zoneNamesMap = extractZoneNames(targetMap, bundle.getID());
546 if (!zoneNamesMap.isEmpty() || bundle.isRoot()) {
547 metaInfo.get("TimeZoneNames").add(toLanguageTag(bundle.getID()));
548 addLikelySubtags(metaInfo, "TimeZoneNames", bundle.getID());
549 bundleGenerator.generateBundle("util", "TimeZoneNames", bundle.getJavaID(), true, zoneNamesMap, BundleType.TIMEZONE);
550 }
551 }
552 if (bundleTypes.contains(Bundle.Type.CALENDARDATA)) {
553 Map<String, Object> calendarDataMap = extractCalendarData(targetMap, bundle.getID());
554 if (!calendarDataMap.isEmpty() || bundle.isRoot()) {
555 metaInfo.get("CalendarData").add(toLanguageTag(bundle.getID()));
556 addLikelySubtags(metaInfo, "CalendarData", bundle.getID());
557 bundleGenerator.generateBundle("util", "CalendarData", bundle.getJavaID(), true, calendarDataMap, BundleType.PLAIN);
558 }
559 }
560 if (bundleTypes.contains(Bundle.Type.FORMATDATA)) {
561 Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID());
562 if (!formatDataMap.isEmpty() || bundle.isRoot()) {
563 metaInfo.get("FormatData").add(toLanguageTag(bundle.getID()));
564 addLikelySubtags(metaInfo, "FormatData", bundle.getID());
565 bundleGenerator.generateBundle("text", "FormatData", bundle.getJavaID(), true, formatDataMap, BundleType.PLAIN);
566 }
567 }
568
569 // For AvailableLocales
570 metaInfo.get("AvailableLocales").add(toLanguageTag(bundle.getID()));
571 addLikelySubtags(metaInfo, "AvailableLocales", bundle.getID());
572 }
573 addCldrImplicitLocales(metaInfo);
574 bundleGenerator.generateMetaInfo(metaInfo);
575 }
576
577 /**
578 * These are the Locales that are implicitly supported by CLDR.
579 * Adding them explicitly as likelySubtags here, will ensure that
580 * COMPAT locales do not precede them during ResourceBundle search path.
581 */
582 private static void addCldrImplicitLocales(Map<String, SortedSet<String>> metaInfo) {
583 metaInfo.get("LocaleNames").add("zh-Hans-CN");
584 metaInfo.get("LocaleNames").add("zh-Hans-SG");
585 metaInfo.get("LocaleNames").add("zh-Hant-HK");
586 metaInfo.get("LocaleNames").add("zh-Hant-MO");
587 metaInfo.get("LocaleNames").add("zh-Hant-TW");
588 metaInfo.get("CurrencyNames").add("zh-Hans-CN");
589 metaInfo.get("CurrencyNames").add("zh-Hans-SG");
590 metaInfo.get("CurrencyNames").add("zh-Hant-HK");
591 metaInfo.get("CurrencyNames").add("zh-Hant-MO");
592 metaInfo.get("CurrencyNames").add("zh-Hant-TW");
593 metaInfo.get("TimeZoneNames").add("zh-Hans-CN");
594 metaInfo.get("TimeZoneNames").add("zh-Hans-SG");
595 metaInfo.get("TimeZoneNames").add("zh-Hant-HK");
596 metaInfo.get("TimeZoneNames").add("zh-Hant-MO");
597 metaInfo.get("TimeZoneNames").add("zh-Hant-TW");
598 metaInfo.get("TimeZoneNames").add("zh-HK");
599 metaInfo.get("CalendarData").add("zh-Hans-CN");
600 metaInfo.get("CalendarData").add("zh-Hans-SG");
601 metaInfo.get("CalendarData").add("zh-Hant-HK");
602 metaInfo.get("CalendarData").add("zh-Hant-MO");
603 metaInfo.get("CalendarData").add("zh-Hant-TW");
604 metaInfo.get("FormatData").add("zh-Hans-CN");
605 metaInfo.get("FormatData").add("zh-Hans-SG");
606 metaInfo.get("FormatData").add("zh-Hant-HK");
607 metaInfo.get("FormatData").add("zh-Hant-MO");
608 metaInfo.get("FormatData").add("zh-Hant-TW");
609 }
610 static final Map<String, String> aliases = new HashMap<>();
611
612 /**
613 * Translate the aliases into the real entries in the bundle map.
614 */
615 static void handleAliases(Map<String, Object> bundleMap) {
616 Set bundleKeys = bundleMap.keySet();
617 try {
618 for (String key : aliases.keySet()) {
619 String targetKey = aliases.get(key);
620 if (bundleKeys.contains(targetKey)) {
621 bundleMap.putIfAbsent(key, bundleMap.get(targetKey));
622 }
623 }
624 } catch (Exception ex) {
625 Logger.getLogger(CLDRConverter.class.getName()).log(Level.SEVERE, null, ex);
626 }
627 }
628
629 /*
678 // Shorter string comes first unless either starts with a digit.
679 if (len1 < len2) {
680 return -1;
681 }
682 if (len1 > len2) {
683 return 1;
684 }
685 }
686 return o1.compareTo(o2);
687 }
688
689 private boolean isDigit(char c) {
690 return c >= '0' && c <= '9';
691 }
692 }
693
694 private static Map<String, Object> extractLocaleNames(Map<String, Object> map, String id) {
695 Map<String, Object> localeNames = new TreeMap<>(KeyComparator.INSTANCE);
696 for (String key : map.keySet()) {
697 if (key.startsWith(LOCALE_NAME_PREFIX)) {
698 localeNames.put(key.substring(LOCALE_NAME_PREFIX.length()), map.get(key));
699 }
700 }
701 return localeNames;
702 }
703
704 @SuppressWarnings("AssignmentToForLoopParameter")
705 private static Map<String, Object> extractCurrencyNames(Map<String, Object> map, String id, String names)
706 throws Exception {
707 Map<String, Object> currencyNames = new TreeMap<>(KeyComparator.INSTANCE);
708 for (String key : map.keySet()) {
709 if (key.startsWith(CURRENCY_NAME_PREFIX)) {
710 currencyNames.put(key.substring(CURRENCY_NAME_PREFIX.length()), map.get(key));
711 } else if (key.startsWith(CURRENCY_SYMBOL_PREFIX)) {
712 currencyNames.put(key.substring(CURRENCY_SYMBOL_PREFIX.length()), map.get(key));
713 }
714 }
715 return currencyNames;
716 }
717
718 private static Map<String, Object> extractZoneNames(Map<String, Object> map, String id) {
719 Map<String, Object> names = new HashMap<>();
720
827 "DateTimePatterns",
828 "DateTimePatternChars"
829 };
830
831 private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) {
832 Map<String, Object> formatData = new LinkedHashMap<>();
833 for (CalendarType calendarType : CalendarType.values()) {
834 if (calendarType == CalendarType.GENERIC) {
835 continue;
836 }
837 String prefix = calendarType.keyElementName();
838 for (String element : FORMAT_DATA_ELEMENTS) {
839 String key = prefix + element;
840 copyIfPresent(map, "java.time." + key, formatData);
841 copyIfPresent(map, key, formatData);
842 }
843 }
844
845 for (String key : map.keySet()) {
846 // Copy available calendar names
847 if (key.startsWith(CLDRConverter.CALENDAR_NAME_PREFIX)) {
848 String type = key.substring(CLDRConverter.CALENDAR_NAME_PREFIX.length());
849 for (CalendarType calendarType : CalendarType.values()) {
850 if (calendarType == CalendarType.GENERIC) {
851 continue;
852 }
853 if (type.equals(calendarType.lname())) {
854 Object value = map.get(key);
855 formatData.put(key, value);
856 String ukey = CLDRConverter.CALENDAR_NAME_PREFIX + calendarType.uname();
857 if (!key.equals(ukey)) {
858 formatData.put(ukey, value);
859 }
860 }
861 }
862 }
863 }
864
865 copyIfPresent(map, "DefaultNumberingSystem", formatData);
866
867 @SuppressWarnings("unchecked")
868 List<String> numberingScripts = (List<String>) map.remove("numberingScripts");
869 if (numberingScripts != null) {
870 for (String script : numberingScripts) {
871 copyIfPresent(map, script + "." + "NumberElements", formatData);
872 }
873 } else {
874 copyIfPresent(map, "NumberElements", formatData);
875 }
876 copyIfPresent(map, "NumberPatterns", formatData);
877 return formatData;
878 }
879
880 private static void copyIfPresent(Map<String, Object> src, String key, Map<String, Object> dest) {
881 Object value = src.get(key);
882 if (value != null) {
883 dest.put(key, value);
884 }
885 }
886
887 // --- code below here is adapted from java.util.Properties ---
888 private static final String specialSaveCharsJava = "\"";
889 private static final String specialSaveCharsProperties = "=: \t\r\n\f#!";
890
891 /*
892 * Converts unicodes to encoded \uxxxx
893 * and writes out any of the characters in specialSaveChars
894 * with a preceding slash
895 */
896 static String saveConvert(String theString, boolean useJava) {
|
35 import java.util.*;
36 import java.util.ResourceBundle.Control;
37 import java.util.logging.Level;
38 import java.util.logging.Logger;
39 import java.util.stream.Collectors;
40 import javax.xml.parsers.SAXParser;
41 import javax.xml.parsers.SAXParserFactory;
42 import org.xml.sax.SAXNotRecognizedException;
43 import org.xml.sax.SAXNotSupportedException;
44
45
46 /**
47 * Converts locale data from "Locale Data Markup Language" format to
48 * JRE resource bundle format. LDML is the format used by the Common
49 * Locale Data Repository maintained by the Unicode Consortium.
50 */
51 public class CLDRConverter {
52
53 static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd";
54 static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd";
55 static final String BCP47_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlBCP47.dtd";
56
57
58 private static String CLDR_BASE = "../CLDR/21.0.1/";
59 static String LOCAL_LDML_DTD;
60 static String LOCAL_SPPL_LDML_DTD;
61 static String LOCAL_BCP47_LDML_DTD;
62 private static String SOURCE_FILE_DIR;
63 private static String SPPL_SOURCE_FILE;
64 private static String NUMBERING_SOURCE_FILE;
65 private static String METAZONES_SOURCE_FILE;
66 private static String LIKELYSUBTAGS_SOURCE_FILE;
67 private static String TIMEZONE_SOURCE_FILE;
68 static String DESTINATION_DIR = "build/gensrc";
69
70 static final String LOCALE_NAME_PREFIX = "locale.displayname.";
71 static final String LOCALE_SEPARATOR = LOCALE_NAME_PREFIX + "separator";
72 static final String LOCALE_KEYTYPE = LOCALE_NAME_PREFIX + "keytype";
73 static final String LOCALE_KEY_PREFIX = LOCALE_NAME_PREFIX + "key.";
74 static final String LOCALE_TYPE_PREFIX = LOCALE_NAME_PREFIX + "type.";
75 static final String LOCALE_TYPE_PREFIX_CA = LOCALE_TYPE_PREFIX + "ca.";
76 static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol.";
77 static final String CURRENCY_NAME_PREFIX = "currency.displayname.";
78 static final String CALENDAR_NAME_PREFIX = "calendarname.";
79 static final String TIMEZONE_ID_PREFIX = "timezone.id.";
80 static final String ZONE_NAME_PREFIX = "timezone.displayname.";
81 static final String METAZONE_ID_PREFIX = "metazone.id.";
82 static final String PARENT_LOCALE_PREFIX = "parentLocale.";
83
84 private static SupplementDataParseHandler handlerSuppl;
85 private static LikelySubtagsParseHandler handlerLikelySubtags;
86 static NumberingSystemsParseHandler handlerNumbering;
87 static MetaZonesParseHandler handlerMetaZones;
88 static TimeZoneParseHandler handlerTimeZone;
89 private static BundleGenerator bundleGenerator;
90
91 // java.base module related
92 static boolean isBaseModule = false;
93 static final Set<Locale> BASE_LOCALES = new HashSet<>();
94
95 // "parentLocales" map
96 private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>();
97 private static final ResourceBundle.Control defCon =
98 ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
99
100 static enum DraftType {
101 UNCONFIRMED,
102 PROVISIONAL,
103 CONTRIBUTED,
104 APPROVED;
105
106 private static final Map<String, DraftType> map = new HashMap<>();
107 static {
108 for (DraftType dt : values()) {
194
195 case "-help":
196 usage();
197 System.exit(0);
198 break;
199
200 default:
201 throw new RuntimeException();
202 }
203 }
204 } catch (RuntimeException e) {
205 severe("unknown or imcomplete arg(s): " + currentArg);
206 usage();
207 System.exit(1);
208 }
209 }
210
211 // Set up path names
212 LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd";
213 LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd";
214 LOCAL_BCP47_LDML_DTD = CLDR_BASE + "/dtd/ldmlBCP47.dtd";
215 SOURCE_FILE_DIR = CLDR_BASE + "/main";
216 SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml";
217 LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml";
218 NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml";
219 METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml";
220 TIMEZONE_SOURCE_FILE = CLDR_BASE + "/bcp47/timezone.xml";
221
222 if (BASE_LOCALES.isEmpty()) {
223 setupBaseLocales("en-US");
224 }
225
226 bundleGenerator = new ResourceBundleGenerator();
227
228 // Parse data independent of locales
229 parseSupplemental();
230 parseBCP47();
231
232 List<Bundle> bundles = readBundleList();
233 convertBundles(bundles);
234 convertBundles(addedBundles);
235 }
236
237 private static void usage() {
238 errout("Usage: java CLDRConverter [options]%n"
239 + "\t-help output this usage message and exit%n"
240 + "\t-verbose output information%n"
241 + "\t-draft [contributed | approved | provisional | unconfirmed]%n"
242 + "\t\t draft level for using data (default: contributed)%n"
243 + "\t-base dir base directory for CLDR input files%n"
244 + "\t-basemodule generates bundles that go into java.base module%n"
245 + "\t-baselocales loc(,loc)* locales that go into the base module%n"
246 + "\t-o dir output directory (default: ./build/gensrc)%n"
247 + "\t-o dir output directory (defaut: ./build/gensrc)%n"
248 + "\t-utf8 use UTF-8 rather than \\uxxxx (for debug)%n");
249 }
250
317 // processed first.
318 if ("root".equals(id)) {
319 retList.add(0, b);
320 } else {
321 retList.add(b);
322 }
323 }
324 }
325 }
326 return retList;
327 }
328
329 private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>();
330 // this list will contain additional bundles to be generated for Region dependent Data.
331 private static List<Bundle> addedBundles = new ArrayList<>();
332
333 private static Map<String, SortedSet<String>> metaInfo = new HashMap<>();
334
335 static {
336 // For generating information on supported locales.
337 metaInfo.put("AvailableLocales", new TreeSet<>());
338 }
339
340
341 private static Set<String> calendarDataFields = Set.of("firstDayOfWeek", "minimalDaysInFirstWeek");
342
343 static Map<String, Object> getCLDRBundle(String id) throws Exception {
344 Map<String, Object> bundle = cldrBundles.get(id);
345 if (bundle != null) {
346 return bundle;
347 }
348 File file = new File(SOURCE_FILE_DIR + File.separator + id + ".xml");
349 if (!file.exists()) {
350 // Skip if the file doesn't exist.
351 return Collections.emptyMap();
352 }
353
354 info("..... main directory .....");
355 LDMLParseHandler handler = new LDMLParseHandler(id);
356 parseLDMLFile(file, handler);
357
358 bundle = handler.getData();
359 cldrBundles.put(id, bundle);
360 String country = getCountryCode(id);
361 if (country != null) {
362 bundle = handlerSuppl.getData(country);
363 if (bundle != null) {
364 //merge two maps into one map
365 Map<String, Object> temp = cldrBundles.remove(id);
366 bundle.putAll(temp);
367 cldrBundles.put(id, bundle);
368 }
369 }
370 return bundle;
371 }
372
373 // Parsers for data in "supplemental" directory
374 //
375 private static void parseSupplemental() throws Exception {
376 // Parse SupplementalData file and store the information in the HashMap
377 // Calendar information such as firstDay and minDay are stored in
378 // supplementalData.xml as of CLDR1.4. Individual territory is listed
379 // with its ISO 3166 country code while default is listed using UNM49
380 // region and composition numerical code (001 for World.)
381 //
382 // SupplementalData file also provides the "parent" locales which
383 // are othrwise not to be fallen back. Process them here as well.
384 //
385 handlerSuppl = new SupplementDataParseHandler();
386 parseLDMLFile(new File(SPPL_SOURCE_FILE), handlerSuppl);
387 Map<String, Object> parentData = handlerSuppl.getData("root");
388 parentData.keySet().forEach(key -> {
389 parentLocalesMap.put(key, new TreeSet(
390 Arrays.asList(((String)parentData.get(key)).split(" "))));
391 });
392
393 // Parse numberingSystems to get digit zero character information.
394 handlerNumbering = new NumberingSystemsParseHandler();
395 parseLDMLFile(new File(NUMBERING_SOURCE_FILE), handlerNumbering);
396
397 // Parse metaZones to create mappings between Olson tzids and CLDR meta zone names
398 handlerMetaZones = new MetaZonesParseHandler();
399 parseLDMLFile(new File(METAZONES_SOURCE_FILE), handlerMetaZones);
400
401 // Parse likelySubtags
402 handlerLikelySubtags = new LikelySubtagsParseHandler();
403 parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags);
404 }
405
406 // Parsers for data in "bcp47" directory
407 //
408 private static void parseBCP47() throws Exception {
409 // Parse timezone
410 handlerTimeZone = new TimeZoneParseHandler();
411 parseLDMLFile(new File(TIMEZONE_SOURCE_FILE), handlerTimeZone);
412 }
413
414 private static void parseLDMLFile(File srcfile, AbstractLDMLHandler handler) throws Exception {
415 info("..... Parsing " + srcfile.getName() + " .....");
416 SAXParserFactory pf = SAXParserFactory.newInstance();
417 pf.setValidating(true);
418 SAXParser parser = pf.newSAXParser();
419 enableFileAccess(parser);
420 parser.parse(srcfile, handler);
421 }
422
423 /**
424 * This method will check if a new region dependent Bundle needs to be
425 * generated for this Locale id and targetMap. New Bundle will be generated
426 * when Locale id has non empty script and country code and targetMap
427 * contains region dependent data. This method will also remove region
428 * dependent data from this targetMap after candidate locales check. E.g. It
429 * will call genRegionDependentBundle() in case of az_Latn_AZ locale and
430 * remove region dependent data from this targetMap so that az_Latn_AZ
431 * bundle will not be created. For az_Cyrl_AZ, new Bundle will be generated
432 * but region dependent data will not be removed from targetMap as its candidate
433 * locales are [az_Cyrl_AZ, az_Cyrl, root], which does not include az_AZ for
434 * fallback.
435 *
436 */
437
438 private static void checkRegionDependentBundle(Map<String, Object> targetMap, String id) {
439 if ((CLDRConverter.getScript(id) != "")
440 && (CLDRConverter.getCountryCode(id) != "")) {
508 for (Bundle bundle : bundles) {
509 // Get the target map, which contains all the data that should be
510 // visible for the bundle's locale
511
512 Map<String, Object> targetMap = bundle.getTargetMap();
513
514 // check if new region DependentBundle needs to be generated for this Locale.
515 checkRegionDependentBundle(targetMap, bundle.getID());
516 EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes();
517
518 if (bundle.isRoot()) {
519 // Add DateTimePatternChars because CLDR no longer supports localized patterns.
520 targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ");
521 }
522
523 // Now the map contains just the entries that need to be in the resources bundles.
524 // Go ahead and generate them.
525 if (bundleTypes.contains(Bundle.Type.LOCALENAMES)) {
526 Map<String, Object> localeNamesMap = extractLocaleNames(targetMap, bundle.getID());
527 if (!localeNamesMap.isEmpty() || bundle.isRoot()) {
528 bundleGenerator.generateBundle("util", "LocaleNames", bundle.getJavaID(), true, localeNamesMap, BundleType.OPEN);
529 }
530 }
531 if (bundleTypes.contains(Bundle.Type.CURRENCYNAMES)) {
532 Map<String, Object> currencyNamesMap = extractCurrencyNames(targetMap, bundle.getID(), bundle.getCurrencies());
533 if (!currencyNamesMap.isEmpty() || bundle.isRoot()) {
534 bundleGenerator.generateBundle("util", "CurrencyNames", bundle.getJavaID(), true, currencyNamesMap, BundleType.OPEN);
535 }
536 }
537 if (bundleTypes.contains(Bundle.Type.TIMEZONENAMES)) {
538 Map<String, Object> zoneNamesMap = extractZoneNames(targetMap, bundle.getID());
539 if (!zoneNamesMap.isEmpty() || bundle.isRoot()) {
540 bundleGenerator.generateBundle("util", "TimeZoneNames", bundle.getJavaID(), true, zoneNamesMap, BundleType.TIMEZONE);
541 }
542 }
543 if (bundleTypes.contains(Bundle.Type.CALENDARDATA)) {
544 Map<String, Object> calendarDataMap = extractCalendarData(targetMap, bundle.getID());
545 if (!calendarDataMap.isEmpty() || bundle.isRoot()) {
546 bundleGenerator.generateBundle("util", "CalendarData", bundle.getJavaID(), true, calendarDataMap, BundleType.PLAIN);
547 }
548 }
549 if (bundleTypes.contains(Bundle.Type.FORMATDATA)) {
550 Map<String, Object> formatDataMap = extractFormatData(targetMap, bundle.getID());
551 if (!formatDataMap.isEmpty() || bundle.isRoot()) {
552 bundleGenerator.generateBundle("text", "FormatData", bundle.getJavaID(), true, formatDataMap, BundleType.PLAIN);
553 }
554 }
555
556 // For AvailableLocales
557 metaInfo.get("AvailableLocales").add(toLanguageTag(bundle.getID()));
558 addLikelySubtags(metaInfo, "AvailableLocales", bundle.getID());
559 }
560 bundleGenerator.generateMetaInfo(metaInfo);
561 }
562
563 static final Map<String, String> aliases = new HashMap<>();
564
565 /**
566 * Translate the aliases into the real entries in the bundle map.
567 */
568 static void handleAliases(Map<String, Object> bundleMap) {
569 Set bundleKeys = bundleMap.keySet();
570 try {
571 for (String key : aliases.keySet()) {
572 String targetKey = aliases.get(key);
573 if (bundleKeys.contains(targetKey)) {
574 bundleMap.putIfAbsent(key, bundleMap.get(targetKey));
575 }
576 }
577 } catch (Exception ex) {
578 Logger.getLogger(CLDRConverter.class.getName()).log(Level.SEVERE, null, ex);
579 }
580 }
581
582 /*
631 // Shorter string comes first unless either starts with a digit.
632 if (len1 < len2) {
633 return -1;
634 }
635 if (len1 > len2) {
636 return 1;
637 }
638 }
639 return o1.compareTo(o2);
640 }
641
642 private boolean isDigit(char c) {
643 return c >= '0' && c <= '9';
644 }
645 }
646
647 private static Map<String, Object> extractLocaleNames(Map<String, Object> map, String id) {
648 Map<String, Object> localeNames = new TreeMap<>(KeyComparator.INSTANCE);
649 for (String key : map.keySet()) {
650 if (key.startsWith(LOCALE_NAME_PREFIX)) {
651 switch (key) {
652 case LOCALE_SEPARATOR:
653 localeNames.put("ListCompositionPattern", map.get(key));
654 break;
655 case LOCALE_KEYTYPE:
656 localeNames.put("ListKeyTypePattern", map.get(key));
657 break;
658 default:
659 localeNames.put(key.substring(LOCALE_NAME_PREFIX.length()), map.get(key));
660 break;
661 }
662 }
663 }
664
665 if (id.equals("root")) {
666 // Add display name pattern, which is not in CLDR
667 localeNames.put("DisplayNamePattern", "{0,choice,0#|1#{1}|2#{1} ({2})}");
668 }
669
670 return localeNames;
671 }
672
673 @SuppressWarnings("AssignmentToForLoopParameter")
674 private static Map<String, Object> extractCurrencyNames(Map<String, Object> map, String id, String names)
675 throws Exception {
676 Map<String, Object> currencyNames = new TreeMap<>(KeyComparator.INSTANCE);
677 for (String key : map.keySet()) {
678 if (key.startsWith(CURRENCY_NAME_PREFIX)) {
679 currencyNames.put(key.substring(CURRENCY_NAME_PREFIX.length()), map.get(key));
680 } else if (key.startsWith(CURRENCY_SYMBOL_PREFIX)) {
681 currencyNames.put(key.substring(CURRENCY_SYMBOL_PREFIX.length()), map.get(key));
682 }
683 }
684 return currencyNames;
685 }
686
687 private static Map<String, Object> extractZoneNames(Map<String, Object> map, String id) {
688 Map<String, Object> names = new HashMap<>();
689
796 "DateTimePatterns",
797 "DateTimePatternChars"
798 };
799
800 private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) {
801 Map<String, Object> formatData = new LinkedHashMap<>();
802 for (CalendarType calendarType : CalendarType.values()) {
803 if (calendarType == CalendarType.GENERIC) {
804 continue;
805 }
806 String prefix = calendarType.keyElementName();
807 for (String element : FORMAT_DATA_ELEMENTS) {
808 String key = prefix + element;
809 copyIfPresent(map, "java.time." + key, formatData);
810 copyIfPresent(map, key, formatData);
811 }
812 }
813
814 for (String key : map.keySet()) {
815 // Copy available calendar names
816 if (key.startsWith(CLDRConverter.LOCALE_TYPE_PREFIX_CA)) {
817 String type = key.substring(CLDRConverter.LOCALE_TYPE_PREFIX_CA.length());
818 for (CalendarType calendarType : CalendarType.values()) {
819 if (calendarType == CalendarType.GENERIC) {
820 continue;
821 }
822 if (type.equals(calendarType.lname())) {
823 Object value = map.get(key);
824 String dataKey = key.replace(LOCALE_TYPE_PREFIX_CA,
825 CALENDAR_NAME_PREFIX);
826 formatData.put(dataKey, value);
827 String ukey = CALENDAR_NAME_PREFIX + calendarType.uname();
828 if (!dataKey.equals(ukey)) {
829 formatData.put(ukey, value);
830 }
831 }
832 }
833 }
834 }
835
836 copyIfPresent(map, "DefaultNumberingSystem", formatData);
837
838 @SuppressWarnings("unchecked")
839 List<String> numberingScripts = (List<String>) map.remove("numberingScripts");
840 if (numberingScripts != null) {
841 for (String script : numberingScripts) {
842 copyIfPresent(map, script + "." + "NumberElements", formatData);
843 }
844 } else {
845 copyIfPresent(map, "NumberElements", formatData);
846 }
847 copyIfPresent(map, "NumberPatterns", formatData);
848
849 // put extra number elements for available scripts into formatData, if it is "root"
850 if (id.equals("root")) {
851 handlerNumbering.keySet().stream()
852 .filter(k -> !numberingScripts.contains(k))
853 .forEach(k -> {
854 String[] ne = (String[])map.get("latn.NumberElements");
855 String[] neNew = Arrays.copyOf(ne, ne.length);
856 neNew[4] = handlerNumbering.get(k).substring(0, 1);
857 formatData.put(k + ".NumberElements", neNew);
858 });
859 }
860 return formatData;
861 }
862
863 private static void copyIfPresent(Map<String, Object> src, String key, Map<String, Object> dest) {
864 Object value = src.get(key);
865 if (value != null) {
866 dest.put(key, value);
867 }
868 }
869
870 // --- code below here is adapted from java.util.Properties ---
871 private static final String specialSaveCharsJava = "\"";
872 private static final String specialSaveCharsProperties = "=: \t\r\n\f#!";
873
874 /*
875 * Converts unicodes to encoded \uxxxx
876 * and writes out any of the characters in specialSaveChars
877 * with a preceding slash
878 */
879 static String saveConvert(String theString, boolean useJava) {
|