< prev index next >

make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java

Print this page
rev 51713 : [mq]: 8209167


  52  * Locale Data Repository maintained by the Unicode Consortium.
  53  */
  54 public class CLDRConverter {
  55 
  56     static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd";
  57     static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd";
  58     static final String BCP47_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlBCP47.dtd";
  59 
  60 
  61     private static String CLDR_BASE;
  62     static String LOCAL_LDML_DTD;
  63     static String LOCAL_SPPL_LDML_DTD;
  64     static String LOCAL_BCP47_LDML_DTD;
  65     private static String SOURCE_FILE_DIR;
  66     private static String SPPL_SOURCE_FILE;
  67     private static String SPPL_META_SOURCE_FILE;
  68     private static String NUMBERING_SOURCE_FILE;
  69     private static String METAZONES_SOURCE_FILE;
  70     private static String LIKELYSUBTAGS_SOURCE_FILE;
  71     private static String TIMEZONE_SOURCE_FILE;

  72     static String DESTINATION_DIR = "build/gensrc";
  73 
  74     static final String LOCALE_NAME_PREFIX = "locale.displayname.";
  75     static final String LOCALE_SEPARATOR = LOCALE_NAME_PREFIX + "separator";
  76     static final String LOCALE_KEYTYPE = LOCALE_NAME_PREFIX + "keytype";
  77     static final String LOCALE_KEY_PREFIX = LOCALE_NAME_PREFIX + "key.";
  78     static final String LOCALE_TYPE_PREFIX = LOCALE_NAME_PREFIX + "type.";
  79     static final String LOCALE_TYPE_PREFIX_CA = LOCALE_TYPE_PREFIX + "ca.";
  80     static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol.";
  81     static final String CURRENCY_NAME_PREFIX = "currency.displayname.";
  82     static final String CALENDAR_NAME_PREFIX = "calendarname.";
  83     static final String CALENDAR_FIRSTDAY_PREFIX = "firstDay.";
  84     static final String CALENDAR_MINDAYS_PREFIX = "minDays.";
  85     static final String TIMEZONE_ID_PREFIX = "timezone.id.";
  86     static final String EXEMPLAR_CITY_PREFIX = "timezone.excity.";
  87     static final String ZONE_NAME_PREFIX = "timezone.displayname.";
  88     static final String METAZONE_ID_PREFIX = "metazone.id.";
  89     static final String PARENT_LOCALE_PREFIX = "parentLocale.";
  90     static final String[] EMPTY_ZONE = {"", "", "", "", "", ""};
  91 
  92     private static SupplementDataParseHandler handlerSuppl;
  93     private static LikelySubtagsParseHandler handlerLikelySubtags;

  94     static SupplementalMetadataParseHandler handlerSupplMeta;
  95     static NumberingSystemsParseHandler handlerNumbering;
  96     static MetaZonesParseHandler handlerMetaZones;
  97     static TimeZoneParseHandler handlerTimeZone;
  98     private static BundleGenerator bundleGenerator;
  99 
 100     // java.base module related
 101     static boolean isBaseModule = false;
 102     static final Set<Locale> BASE_LOCALES = new HashSet<>();
 103 
 104     // "parentLocales" map
 105     private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>();
 106     private static final ResourceBundle.Control defCon =
 107         ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
 108 
 109     private static final String[] AVAILABLE_TZIDS = TimeZone.getAvailableIDs();
 110     private static String zoneNameTempFile;
 111     private static String tzDataDir;
 112     private static final Map<String, String> canonicalTZMap = new HashMap<>();
 113 


 224                     }
 225                 }
 226             } catch (RuntimeException e) {
 227                 severe("unknown or imcomplete arg(s): " + currentArg);
 228                 usage();
 229                 System.exit(1);
 230             }
 231         }
 232 
 233         // Set up path names
 234         LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd";
 235         LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd";
 236         LOCAL_BCP47_LDML_DTD = CLDR_BASE + "/dtd/ldmlBCP47.dtd";
 237         SOURCE_FILE_DIR = CLDR_BASE + "/main";
 238         SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml";
 239         LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml";
 240         NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml";
 241         METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml";
 242         TIMEZONE_SOURCE_FILE = CLDR_BASE + "/bcp47/timezone.xml";
 243         SPPL_META_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalMetadata.xml";

 244 
 245         if (BASE_LOCALES.isEmpty()) {
 246             setupBaseLocales("en-US");
 247         }
 248 
 249         bundleGenerator = new ResourceBundleGenerator();
 250 
 251         // Parse data independent of locales
 252         parseSupplemental();
 253         parseBCP47();
 254 
 255         List<Bundle> bundles = readBundleList();
 256         convertBundles(bundles);
 257 
 258         // Generate java.time.format.ZoneName.java
 259         if (isBaseModule) {

 260             generateZoneName();



 261         }
 262     }
 263 
 264     private static void usage() {
 265         errout("Usage: java CLDRConverter [options]%n"
 266                 + "\t-help          output this usage message and exit%n"
 267                 + "\t-verbose       output information%n"
 268                 + "\t-draft [contributed | approved | provisional | unconfirmed]%n"
 269                 + "\t\t       draft level for using data (default: contributed)%n"
 270                 + "\t-base dir      base directory for CLDR input files%n"
 271                 + "\t-basemodule    generates bundles that go into java.base module%n"
 272                 + "\t-baselocales loc(,loc)*      locales that go into the base module%n"
 273                 + "\t-o dir         output directory (default: ./build/gensrc)%n"
 274                 + "\t-zntempfile    template file for java.time.format.ZoneName.java%n"
 275                 + "\t-tzdatadir     tzdata directory for java.time.format.ZoneName.java%n"
 276                 + "\t-utf8          use UTF-8 rather than \\uxxxx (for debug)%n");
 277     }
 278 
 279     static void info(String fmt, Object... args) {
 280         if (verbose) {


 415                 parentLocalesMap.put(key, new TreeSet(
 416                     Arrays.asList(((String)parentData.get(key)).split(" "))));
 417             });
 418 
 419         // Parse numberingSystems to get digit zero character information.
 420         handlerNumbering = new NumberingSystemsParseHandler();
 421         parseLDMLFile(new File(NUMBERING_SOURCE_FILE), handlerNumbering);
 422 
 423         // Parse metaZones to create mappings between Olson tzids and CLDR meta zone names
 424         handlerMetaZones = new MetaZonesParseHandler();
 425         parseLDMLFile(new File(METAZONES_SOURCE_FILE), handlerMetaZones);
 426 
 427         // Parse likelySubtags
 428         handlerLikelySubtags = new LikelySubtagsParseHandler();
 429         parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags);
 430 
 431         // Parse supplementalMetadata
 432         // Currently interested in deprecated time zone ids and language aliases.
 433         handlerSupplMeta = new SupplementalMetadataParseHandler();
 434         parseLDMLFile(new File(SPPL_META_SOURCE_FILE), handlerSupplMeta);




 435     }
 436 
 437     // Parsers for data in "bcp47" directory
 438     //
 439     private static void parseBCP47() throws Exception {
 440         // Parse timezone
 441         handlerTimeZone = new TimeZoneParseHandler();
 442         parseLDMLFile(new File(TIMEZONE_SOURCE_FILE), handlerTimeZone);
 443 
 444         // canonical tz name map
 445         // alias -> primary
 446         handlerTimeZone.getData().forEach((k, v) -> {
 447             String[] ids = ((String)v).split("\\s");
 448             for (int i = 1; i < ids.length; i++) {
 449                 canonicalTZMap.put(ids[i], ids[0]);
 450             }
 451         });
 452     }
 453 
 454     private static void parseLDMLFile(File srcfile, AbstractLDMLHandler handler) throws Exception {


1071         try {
1072             return Files.walk(Paths.get(tzDataDir), 1)
1073                 .filter(p -> !Files.isDirectory(p))
1074                 .flatMap(CLDRConverter::extractLinks)
1075                 .sorted();
1076         } catch (IOException e) {
1077             throw new UncheckedIOException(e);
1078         }
1079     }
1080 
1081     private static Stream<String> extractLinks(Path tzFile) {
1082         try {
1083             return Files.lines(tzFile)
1084                 .filter(l -> l.startsWith("Link"))
1085                 .map(l -> l.replaceFirst("^Link[\\s]+(\\S+)\\s+(\\S+).*",
1086                                          "        \"$2\", \"$1\","));
1087         } catch (IOException e) {
1088             throw new UncheckedIOException(e);
1089         }
1090     }






































1091 }


  52  * Locale Data Repository maintained by the Unicode Consortium.
  53  */
  54 public class CLDRConverter {
  55 
  56     static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd";
  57     static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd";
  58     static final String BCP47_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlBCP47.dtd";
  59 
  60 
  61     private static String CLDR_BASE;
  62     static String LOCAL_LDML_DTD;
  63     static String LOCAL_SPPL_LDML_DTD;
  64     static String LOCAL_BCP47_LDML_DTD;
  65     private static String SOURCE_FILE_DIR;
  66     private static String SPPL_SOURCE_FILE;
  67     private static String SPPL_META_SOURCE_FILE;
  68     private static String NUMBERING_SOURCE_FILE;
  69     private static String METAZONES_SOURCE_FILE;
  70     private static String LIKELYSUBTAGS_SOURCE_FILE;
  71     private static String TIMEZONE_SOURCE_FILE;
  72     private static String WINZONES_SOURCE_FILE;
  73     static String DESTINATION_DIR = "build/gensrc";
  74 
  75     static final String LOCALE_NAME_PREFIX = "locale.displayname.";
  76     static final String LOCALE_SEPARATOR = LOCALE_NAME_PREFIX + "separator";
  77     static final String LOCALE_KEYTYPE = LOCALE_NAME_PREFIX + "keytype";
  78     static final String LOCALE_KEY_PREFIX = LOCALE_NAME_PREFIX + "key.";
  79     static final String LOCALE_TYPE_PREFIX = LOCALE_NAME_PREFIX + "type.";
  80     static final String LOCALE_TYPE_PREFIX_CA = LOCALE_TYPE_PREFIX + "ca.";
  81     static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol.";
  82     static final String CURRENCY_NAME_PREFIX = "currency.displayname.";
  83     static final String CALENDAR_NAME_PREFIX = "calendarname.";
  84     static final String CALENDAR_FIRSTDAY_PREFIX = "firstDay.";
  85     static final String CALENDAR_MINDAYS_PREFIX = "minDays.";
  86     static final String TIMEZONE_ID_PREFIX = "timezone.id.";
  87     static final String EXEMPLAR_CITY_PREFIX = "timezone.excity.";
  88     static final String ZONE_NAME_PREFIX = "timezone.displayname.";
  89     static final String METAZONE_ID_PREFIX = "metazone.id.";
  90     static final String PARENT_LOCALE_PREFIX = "parentLocale.";
  91     static final String[] EMPTY_ZONE = {"", "", "", "", "", ""};
  92 
  93     private static SupplementDataParseHandler handlerSuppl;
  94     private static LikelySubtagsParseHandler handlerLikelySubtags;
  95     private static WinZonesParseHandler handlerWinZones;
  96     static SupplementalMetadataParseHandler handlerSupplMeta;
  97     static NumberingSystemsParseHandler handlerNumbering;
  98     static MetaZonesParseHandler handlerMetaZones;
  99     static TimeZoneParseHandler handlerTimeZone;
 100     private static BundleGenerator bundleGenerator;
 101 
 102     // java.base module related
 103     static boolean isBaseModule = false;
 104     static final Set<Locale> BASE_LOCALES = new HashSet<>();
 105 
 106     // "parentLocales" map
 107     private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>();
 108     private static final ResourceBundle.Control defCon =
 109         ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
 110 
 111     private static final String[] AVAILABLE_TZIDS = TimeZone.getAvailableIDs();
 112     private static String zoneNameTempFile;
 113     private static String tzDataDir;
 114     private static final Map<String, String> canonicalTZMap = new HashMap<>();
 115 


 226                     }
 227                 }
 228             } catch (RuntimeException e) {
 229                 severe("unknown or imcomplete arg(s): " + currentArg);
 230                 usage();
 231                 System.exit(1);
 232             }
 233         }
 234 
 235         // Set up path names
 236         LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd";
 237         LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd";
 238         LOCAL_BCP47_LDML_DTD = CLDR_BASE + "/dtd/ldmlBCP47.dtd";
 239         SOURCE_FILE_DIR = CLDR_BASE + "/main";
 240         SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml";
 241         LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml";
 242         NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml";
 243         METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml";
 244         TIMEZONE_SOURCE_FILE = CLDR_BASE + "/bcp47/timezone.xml";
 245         SPPL_META_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalMetadata.xml";
 246         WINZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/windowsZones.xml";
 247 
 248         if (BASE_LOCALES.isEmpty()) {
 249             setupBaseLocales("en-US");
 250         }
 251 
 252         bundleGenerator = new ResourceBundleGenerator();
 253 
 254         // Parse data independent of locales
 255         parseSupplemental();
 256         parseBCP47();
 257 
 258         List<Bundle> bundles = readBundleList();
 259         convertBundles(bundles);
 260 

 261         if (isBaseModule) {
 262             // Generate java.time.format.ZoneName.java
 263             generateZoneName();
 264 
 265             // Generate Windows tzmappings
 266             generateWindowsTZMappings();
 267         }
 268     }
 269 
 270     private static void usage() {
 271         errout("Usage: java CLDRConverter [options]%n"
 272                 + "\t-help          output this usage message and exit%n"
 273                 + "\t-verbose       output information%n"
 274                 + "\t-draft [contributed | approved | provisional | unconfirmed]%n"
 275                 + "\t\t       draft level for using data (default: contributed)%n"
 276                 + "\t-base dir      base directory for CLDR input files%n"
 277                 + "\t-basemodule    generates bundles that go into java.base module%n"
 278                 + "\t-baselocales loc(,loc)*      locales that go into the base module%n"
 279                 + "\t-o dir         output directory (default: ./build/gensrc)%n"
 280                 + "\t-zntempfile    template file for java.time.format.ZoneName.java%n"
 281                 + "\t-tzdatadir     tzdata directory for java.time.format.ZoneName.java%n"
 282                 + "\t-utf8          use UTF-8 rather than \\uxxxx (for debug)%n");
 283     }
 284 
 285     static void info(String fmt, Object... args) {
 286         if (verbose) {


 421                 parentLocalesMap.put(key, new TreeSet(
 422                     Arrays.asList(((String)parentData.get(key)).split(" "))));
 423             });
 424 
 425         // Parse numberingSystems to get digit zero character information.
 426         handlerNumbering = new NumberingSystemsParseHandler();
 427         parseLDMLFile(new File(NUMBERING_SOURCE_FILE), handlerNumbering);
 428 
 429         // Parse metaZones to create mappings between Olson tzids and CLDR meta zone names
 430         handlerMetaZones = new MetaZonesParseHandler();
 431         parseLDMLFile(new File(METAZONES_SOURCE_FILE), handlerMetaZones);
 432 
 433         // Parse likelySubtags
 434         handlerLikelySubtags = new LikelySubtagsParseHandler();
 435         parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags);
 436 
 437         // Parse supplementalMetadata
 438         // Currently interested in deprecated time zone ids and language aliases.
 439         handlerSupplMeta = new SupplementalMetadataParseHandler();
 440         parseLDMLFile(new File(SPPL_META_SOURCE_FILE), handlerSupplMeta);
 441 
 442         // Parse windowsZones
 443         handlerWinZones = new WinZonesParseHandler();
 444         parseLDMLFile(new File(WINZONES_SOURCE_FILE), handlerWinZones);
 445     }
 446 
 447     // Parsers for data in "bcp47" directory
 448     //
 449     private static void parseBCP47() throws Exception {
 450         // Parse timezone
 451         handlerTimeZone = new TimeZoneParseHandler();
 452         parseLDMLFile(new File(TIMEZONE_SOURCE_FILE), handlerTimeZone);
 453 
 454         // canonical tz name map
 455         // alias -> primary
 456         handlerTimeZone.getData().forEach((k, v) -> {
 457             String[] ids = ((String)v).split("\\s");
 458             for (int i = 1; i < ids.length; i++) {
 459                 canonicalTZMap.put(ids[i], ids[0]);
 460             }
 461         });
 462     }
 463 
 464     private static void parseLDMLFile(File srcfile, AbstractLDMLHandler handler) throws Exception {


1081         try {
1082             return Files.walk(Paths.get(tzDataDir), 1)
1083                 .filter(p -> !Files.isDirectory(p))
1084                 .flatMap(CLDRConverter::extractLinks)
1085                 .sorted();
1086         } catch (IOException e) {
1087             throw new UncheckedIOException(e);
1088         }
1089     }
1090 
1091     private static Stream<String> extractLinks(Path tzFile) {
1092         try {
1093             return Files.lines(tzFile)
1094                 .filter(l -> l.startsWith("Link"))
1095                 .map(l -> l.replaceFirst("^Link[\\s]+(\\S+)\\s+(\\S+).*",
1096                                          "        \"$2\", \"$1\","));
1097         } catch (IOException e) {
1098             throw new UncheckedIOException(e);
1099         }
1100     }
1101 
1102     // Generate tzmappings for Windows. The format is:
1103     //
1104     // (Windows Zone Name):(REGION):(Java TZID)
1105     //
1106     // where:
1107     //   Windows Zone Name: arbitrary time zone name string used in Windows
1108     //   REGION: ISO3166 or UN M.49 code
1109     //   Java TZID: Java's time zone ID
1110     //
1111     // Note: the entries are alphabetically sorted, *except* the "world" region
1112     // code, i.e., "001". It should be the last entry for the same windows time
1113     // zone name entries. (cf. TimeZone_md.c)
1114     private static void generateWindowsTZMappings() throws Exception {
1115         Files.createDirectories(Paths.get(DESTINATION_DIR, "windows", "conf"));
1116         Files.write(Paths.get(DESTINATION_DIR, "windows", "conf", "tzmappings"),
1117             handlerWinZones.keySet().stream()
1118                 .map(k -> k + ":" + handlerWinZones.get(k) + ":")
1119                 .sorted(new Comparator<String>() {
1120                     public int compare(String t1, String t2) {
1121                         String[] s1 = t1.split(":");
1122                         String[] s2 = t2.split(":");
1123                         if (s1[0].equals(s2[0])) {
1124                             if (s1[1].equals("001")) {
1125                                 return 1;
1126                             } else if (s2[1].equals("001")) {
1127                                 return -1;
1128                             } else {
1129                                 return s1[1].compareTo(s2[1]);
1130                             }
1131                         } else {
1132                             return s1[0].compareTo(s2[0]);
1133                         }
1134                     }
1135                 })
1136                 .collect(Collectors.toList()),
1137             StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
1138     }
1139 }
< prev index next >