1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.cldrconverter;
  27 
  28 import java.io.File;
  29 import java.io.IOException;
  30 import java.util.ArrayList;
  31 import java.util.HashMap;
  32 import java.util.List;
  33 import java.util.Locale;
  34 import java.util.Map;
  35 import org.xml.sax.Attributes;
  36 import org.xml.sax.InputSource;
  37 import org.xml.sax.SAXException;
  38 
  39 /**
  40  * Handles parsing of files in Locale Data Markup Language and produces a map
  41  * that uses the keys and values of JRE locale data.
  42  */
  43 class LDMLParseHandler extends AbstractLDMLHandler<Object> {
  44     private String defaultNumberingSystem;
  45     private String currentNumberingSystem = "";
  46     private CalendarType currentCalendarType;
  47     private String zoneNameStyle; // "long" or "short" for time zone names
  48     private String zonePrefix;
  49     private final String id;
  50 
  51     LDMLParseHandler(String id) {
  52         this.id = id;
  53     }
  54 
  55     @Override
  56     public InputSource resolveEntity(String publicID, String systemID) throws IOException, SAXException {
  57         // avoid HTTP traffic to unicode.org
  58         if (systemID.startsWith(CLDRConverter.LDML_DTD_SYSTEM_ID)) {
  59             return new InputSource((new File(CLDRConverter.LOCAL_LDML_DTD)).toURI().toString());
  60         }
  61         return null;
  62     }
  63 
  64     @Override
  65     public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
  66         switch (qName) {
  67         //
  68         // Generic information
  69         //
  70         case "identity":
  71             // ignore this element - it has language and territory elements that aren't locale data
  72             pushIgnoredContainer(qName);
  73             break;
  74         case "language":
  75             // for LocaleNames
  76             // copy string
  77             pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type"));
  78             break;
  79         case "script":
  80             // for LocaleNames
  81             // copy string
  82             pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type"));
  83             break;
  84         case "territory":
  85             // for LocaleNames
  86             // copy string
  87             pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type"));
  88             break;
  89 
  90         //
  91         // Currency information
  92         //
  93         case "currency":
  94             // for CurrencyNames
  95             // stash away "type" value for nested <symbol>
  96             pushKeyContainer(qName, attributes, attributes.getValue("type"));
  97             break;
  98         case "symbol":
  99             // for CurrencyNames
 100             // need to get the key from the containing <currency> element
 101             pushStringEntry(qName, attributes, CLDRConverter.CURRENCY_SYMBOL_PREFIX + getContainerKey());
 102             break;
 103         case "displayName":
 104             // for CurrencyNames
 105             // need to get the key from the containing <currency> element
 106             // ignore if is has "count" attribute
 107             String containerKey = getContainerKey();
 108             if (containerKey != null && attributes.getValue("count") == null) {
 109                 pushStringEntry(qName, attributes,
 110                                 CLDRConverter.CURRENCY_NAME_PREFIX + containerKey.toLowerCase(Locale.ROOT),
 111                                 attributes.getValue("type"));
 112             } else {
 113                 pushIgnoredContainer(qName);
 114             }
 115             break;
 116 
 117         //
 118         // Calendar information
 119         //
 120         case "calendar":
 121             {
 122                 // mostly for FormatData (CalendarData items firstDay and minDays are also nested)
 123                 // use only if it's supported by java.util.Calendar.
 124                 String calendarName = attributes.getValue("type");
 125                 currentCalendarType = CalendarType.forName(calendarName);
 126                 if (currentCalendarType != null) {
 127                     pushContainer(qName, attributes);
 128                 } else {
 129                     pushIgnoredContainer(qName);
 130                 }
 131             }
 132             break;
 133         case "monthContext":
 134             {
 135                 // for FormatData
 136                 // need to keep stand-alone and format, to allow for inheritance in CLDR
 137                 String type = attributes.getValue("type");
 138                 if ("stand-alone".equals(type) || "format".equals(type)) {
 139                     pushKeyContainer(qName, attributes, type);
 140                 } else {
 141                     pushIgnoredContainer(qName);
 142                 }
 143             }
 144             break;
 145         case "monthWidth":
 146             {
 147                 // for FormatData
 148                 // create string array for the two types that the JRE knows
 149                 // keep info about the context type so we can sort out inheritance later
 150                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 151                 switch (attributes.getValue("type")) {
 152                 case "wide":
 153                     pushStringArrayEntry(qName, attributes, prefix + "MonthNames/" + getContainerKey(), 13);
 154                     break;
 155                 case "abbreviated":
 156                     pushStringArrayEntry(qName, attributes, prefix + "MonthAbbreviations/" + getContainerKey(), 13);
 157                     break;
 158                 default:
 159                     pushIgnoredContainer(qName);
 160                     break;
 161                 }
 162             }
 163             break;
 164         case "month":
 165             // for FormatData
 166             // add to string array entry of monthWidth element
 167             pushStringArrayElement(qName, attributes, Integer.parseInt(attributes.getValue("type")) - 1);
 168             break;
 169         case "dayContext":
 170             {
 171                 // for FormatData
 172                 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR
 173                 String type = attributes.getValue("type");
 174                 if ("stand-alone".equals(type) || "format".equals(type)) {
 175                     pushKeyContainer(qName, attributes, type);
 176                 } else {
 177                     pushIgnoredContainer(qName);
 178                 }
 179             }
 180             break;
 181         case "dayWidth":
 182             {
 183                 // for FormatData
 184                 // create string array for the two types that the JRE knows
 185                 // keep info about the context type so we can sort out inheritance later
 186                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 187                 switch (attributes.getValue("type")) {
 188                 case "wide":
 189                     pushStringArrayEntry(qName, attributes, prefix + "DayNames/" + getContainerKey(), 7);
 190                     break;
 191                 case "abbreviated":
 192                     pushStringArrayEntry(qName, attributes, prefix + "DayAbbreviations/" + getContainerKey(), 7);
 193                     break;
 194                 default:
 195                     pushIgnoredContainer(qName);
 196                     break;
 197                 }
 198             }
 199             break;
 200         case "day":
 201             // for FormatData
 202             // add to string array entry of monthWidth element
 203             pushStringArrayElement(qName, attributes, Integer.parseInt(DAY_OF_WEEK_MAP.get(attributes.getValue("type"))) - 1);
 204             break;
 205         case "dayPeriodContext":
 206             // for FormatData
 207             // need to keep stand-alone and format, to allow for multiple inheritance in CLDR
 208             // for FormatData
 209             // need to keep stand-alone and format, to allow for multiple inheritance in CLDR
 210             {
 211                 String type = attributes.getValue("type");
 212                 if ("stand-alone".equals(type) || "format".equals(type)) {
 213                     pushKeyContainer(qName, attributes, type);
 214                 } else {
 215                     pushIgnoredContainer(qName);
 216                 }
 217             }
 218             break;
 219         case "dayPeriodWidth":
 220             // for FormatData
 221             // create string array entry for am/pm. only keeping wide
 222             if ("wide".equals(attributes.getValue("type"))) {
 223                 pushStringArrayEntry(qName, attributes, "AmPmMarkers/" + getContainerKey(), 2);
 224             } else {
 225                 pushIgnoredContainer(qName);
 226             }
 227             break;
 228         case "dayPeriod":
 229             // for FormatData
 230             // add to string array entry of AmPmMarkers element
 231             switch (attributes.getValue("type")) {
 232             case "am":
 233                 pushStringArrayElement(qName, attributes, 0);
 234                 break;
 235             case "pm":
 236                 pushStringArrayElement(qName, attributes, 1);
 237                 break;
 238             default:
 239                 pushIgnoredContainer(qName);
 240                 break;
 241             }
 242             break;
 243         case "eraNames":
 244             // CLDR era names are inconsistent in terms of their lengths. For example,
 245             // the full names of Japanese imperial eras are eraAbbr, while the full names
 246             // of the Julian eras are eraNames.
 247             if (currentCalendarType == null) {
 248                 assert currentContainer instanceof IgnoredContainer;
 249                 pushIgnoredContainer(qName);
 250             } else {
 251                 String key = currentCalendarType.keyElementName() + "long.Eras"; // for now
 252                 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName));
 253             }
 254             break;
 255         case "eraAbbr":
 256             // for FormatData
 257             // create string array entry
 258             if (currentCalendarType == null) {
 259                 assert currentContainer instanceof IgnoredContainer;
 260                 pushIgnoredContainer(qName);
 261             } else {
 262                 String key = currentCalendarType.keyElementName() + "Eras";
 263                 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName));
 264             }
 265             break;
 266         case "eraNarrow":
 267             // mainly used for the Japanese imperial calendar
 268             if (currentCalendarType == null) {
 269                 assert currentContainer instanceof IgnoredContainer;
 270                 pushIgnoredContainer(qName);
 271             } else {
 272                 String key = currentCalendarType.keyElementName() + "short.Eras";
 273                 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName));
 274             }
 275             break;
 276         case "era":
 277             // for FormatData
 278             // add to string array entry of eraAbbr element
 279             if (currentCalendarType == null) {
 280                 assert currentContainer instanceof IgnoredContainer;
 281                 pushIgnoredContainer(qName);
 282             } else {
 283                 int index = Integer.parseInt(attributes.getValue("type"));
 284                 index = currentCalendarType.normalizeEraIndex(index);
 285                 if (index >= 0) {
 286                     pushStringArrayElement(qName, attributes, index);
 287                 } else {
 288                     pushIgnoredContainer(qName);
 289                 }
 290                 if (currentContainer.getParent() == null) {
 291                     throw new InternalError("currentContainer: null parent");
 292                 }
 293             }
 294             break;
 295 
 296         //
 297         // Time zone names
 298         //
 299         case "timeZoneNames":
 300             pushContainer(qName, attributes);
 301             break;
 302         case "zone":
 303             {
 304                 String zone = attributes.getValue("type");
 305                 zonePrefix = CLDRConverter.TIMEZONE_ID_PREFIX;
 306                 put(zonePrefix + zone, new HashMap<String, String>());
 307                 pushKeyContainer(qName, attributes, zone);
 308             }
 309             break;
 310         case "metazone":
 311             {
 312                 String zone = attributes.getValue("type");
 313                 zonePrefix = CLDRConverter.METAZONE_ID_PREFIX;
 314                 put(zonePrefix + zone, new HashMap<String, String>());
 315                 pushKeyContainer(qName, attributes, zone);
 316             }
 317             break;
 318         case "long":
 319             zoneNameStyle = "long";
 320             pushContainer(qName, attributes);
 321             break;
 322         case "short":
 323             zoneNameStyle = "short";
 324             pushContainer(qName, attributes);
 325             break;
 326         case "generic": // not used in JDK
 327             pushIgnoredContainer(qName);
 328             break;
 329         case "standard": // standard time
 330             pushStringEntry(qName, attributes, CLDRConverter.TIMEZONE_NAME_PREFIX + "standard." + zoneNameStyle);
 331             break;
 332         case "daylight":
 333             pushStringEntry(qName, attributes, CLDRConverter.TIMEZONE_NAME_PREFIX + "daylight." + zoneNameStyle);
 334             break;
 335         case "exemplarCity":
 336             pushIgnoredContainer(qName);
 337             break;
 338 
 339         //
 340         // Number format information
 341         //
 342         case "decimalFormatLength":
 343             if (attributes.getValue("type") == null) {
 344                 // skipping type="short" data
 345                 // for FormatData
 346                 // copy string for later assembly into NumberPatterns
 347                 pushStringEntry(qName, attributes, "NumberPatterns/decimal");
 348             } else {
 349                 pushIgnoredContainer(qName);
 350             }
 351             break;
 352         case "currencyFormat":
 353             // for FormatData
 354             // copy string for later assembly into NumberPatterns
 355             pushStringEntry(qName, attributes, "NumberPatterns/currency");
 356             break;
 357         case "percentFormat":
 358             // for FormatData
 359             // copy string for later assembly into NumberPatterns
 360             pushStringEntry(qName, attributes, "NumberPatterns/percent");
 361             break;
 362         case "defaultNumberingSystem":
 363             // default numbering system if multiple numbering systems are used.
 364             pushStringEntry(qName, attributes, "DefaultNumberingSystem");
 365             break;
 366         case "symbols":
 367             // for FormatData
 368             // look up numberingSystems
 369             symbols: {
 370                 String script = attributes.getValue("numberSystem");
 371                 if (script == null) {
 372                     // Has no script. Just ignore.
 373                     pushIgnoredContainer(qName);
 374                     break;
 375                 }
 376 
 377                 // Use keys as <script>."NumberElements/<symbol>"
 378                 currentNumberingSystem = script + ".";
 379                 String digits = CLDRConverter.handlerNumbering.get(script);
 380                 if (digits == null) {
 381                     throw new InternalError("null digits for " + script);
 382                 }
 383                 if (Character.isSurrogate(digits.charAt(0))) {
 384                     // DecimalFormatSymbols doesn't support supplementary characters as digit zero.
 385                     pushIgnoredContainer(qName);
 386                     break;
 387                 }
 388                 // in case digits are in the reversed order, reverse back the order.
 389                 if (digits.charAt(0) > digits.charAt(digits.length() - 1)) {
 390                     StringBuilder sb = new StringBuilder(digits);
 391                     digits = sb.reverse().toString();
 392                 }
 393                 // Check if the order is sequential.
 394                 char c0 = digits.charAt(0);
 395                 for (int i = 1; i < digits.length(); i++) {
 396                     if (digits.charAt(i) != c0 + i) {
 397                         pushIgnoredContainer(qName);
 398                         break symbols;
 399                     }
 400                 }
 401                 @SuppressWarnings("unchecked")
 402                 List<String> numberingScripts = (List<String>) get("numberingScripts");
 403                 if (numberingScripts == null) {
 404                     numberingScripts = new ArrayList<>();
 405                     put("numberingScripts", numberingScripts);
 406                 }
 407                 numberingScripts.add(script);
 408                 put(currentNumberingSystem + "NumberElements/zero", digits.substring(0, 1));
 409                 pushContainer(qName, attributes);
 410             }
 411             break;
 412         case "decimal":
 413             // for FormatData
 414             // copy string for later assembly into NumberElements
 415             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/decimal");
 416             break;
 417         case "group":
 418             // for FormatData
 419             // copy string for later assembly into NumberElements
 420             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/group");
 421             break;
 422         case "list":
 423             // for FormatData
 424             // copy string for later assembly into NumberElements
 425             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/list");
 426             break;
 427         case "percentSign":
 428             // for FormatData
 429             // copy string for later assembly into NumberElements
 430             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/percent");
 431             break;
 432         case "nativeZeroDigit":
 433             // for FormatData
 434             // copy string for later assembly into NumberElements
 435             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/zero");
 436             break;
 437         case "patternDigit":
 438             // for FormatData
 439             // copy string for later assembly into NumberElements
 440             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/pattern");
 441             break;
 442         case "plusSign":
 443             // TODO: DecimalFormatSymbols doesn't support plusSign
 444             pushIgnoredContainer(qName);
 445             break;
 446         case "minusSign":
 447             // for FormatData
 448             // copy string for later assembly into NumberElements
 449             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/minus");
 450             break;
 451         case "exponential":
 452             // for FormatData
 453             // copy string for later assembly into NumberElements
 454             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/exponential");
 455             break;
 456         case "perMille":
 457             // for FormatData
 458             // copy string for later assembly into NumberElements
 459             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/permille");
 460             break;
 461         case "infinity":
 462             // for FormatData
 463             // copy string for later assembly into NumberElements
 464             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/infinity");
 465             break;
 466         case "nan":
 467             // for FormatData
 468             // copy string for later assembly into NumberElements
 469             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/nan");
 470             break;
 471         case "timeFormatLength":
 472             {
 473                 // for FormatData
 474                 // copy string for later assembly into DateTimePatterns
 475                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 476                 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/" + attributes.getValue("type") + "-time");
 477             }
 478             break;
 479         case "dateFormatLength":
 480             {
 481                 // for FormatData
 482                 // copy string for later assembly into DateTimePatterns
 483                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 484                 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/" + attributes.getValue("type") + "-date");
 485             }
 486             break;
 487         case "dateTimeFormat":
 488             {
 489                 // for FormatData
 490                 // copy string for later assembly into DateTimePatterns
 491                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 492                 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/date-time");
 493             }
 494             break;
 495         case "localizedPatternChars":
 496             {
 497                 // for FormatData
 498                 // copy string for later adaptation to JRE use
 499                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 500                 pushStringEntry(qName, attributes, prefix + "DateTimePatternChars");
 501             }
 502             break;
 503 
 504         default:
 505             // treat anything else as a container
 506             pushContainer(qName, attributes);
 507             break;
 508         }
 509     }
 510 
 511     @Override
 512     public void endElement(String uri, String localName, String qName) throws SAXException {
 513         assert qName.equals(currentContainer.getqName()) : "current=" + currentContainer.getqName() + ", param=" + qName;
 514         switch (qName) {
 515         case "calendar":
 516             assert !(currentContainer instanceof Entry);
 517             currentCalendarType = null;
 518             break;
 519 
 520         case "defaultNumberingSystem":
 521             if (currentContainer instanceof StringEntry) {
 522                 defaultNumberingSystem = ((StringEntry) currentContainer).getValue();
 523                 assert defaultNumberingSystem != null;
 524                 put(((StringEntry) currentContainer).getKey(), defaultNumberingSystem);
 525             } else {
 526                 defaultNumberingSystem = null;
 527             }
 528             break;
 529 
 530         case "timeZoneNames":
 531             zonePrefix = null;
 532             break;
 533         case "standard":
 534         case "daylight":
 535             if (zonePrefix != null && (currentContainer instanceof Entry)) {
 536                 @SuppressWarnings("unchecked")
 537                 Map<String, String> valmap = (Map<String, String>) get(zonePrefix + getContainerKey());
 538                 Entry<?> entry = (Entry<?>) currentContainer;
 539                 valmap.put(entry.getKey(), (String) entry.getValue());
 540             }
 541             break;
 542         default:
 543             if (currentContainer instanceof Entry) {
 544                 Entry<?> entry = (Entry<?>) currentContainer;
 545                 Object value = entry.getValue();
 546                 if (value != null) {
 547                     put(entry.getKey(), value);
 548                 }
 549             }
 550         }
 551         currentContainer = currentContainer.getParent();
 552     }
 553 }