1 /*
   2  * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.cldrconverter;
  27 
  28 import java.io.File;
  29 import java.io.IOException;
  30 import java.util.ArrayList;
  31 import java.util.HashMap;
  32 import java.util.List;
  33 import java.util.Locale;
  34 import java.util.Map;
  35 import org.xml.sax.Attributes;
  36 import org.xml.sax.InputSource;
  37 import org.xml.sax.SAXException;
  38 
  39 /**
  40  * Handles parsing of files in Locale Data Markup Language and produces a map
  41  * that uses the keys and values of JRE locale data.
  42  */
  43 class LDMLParseHandler extends AbstractLDMLHandler<Object> {
  44     private String defaultNumberingSystem;
  45     private String currentNumberingSystem = "";
  46     private CalendarType currentCalendarType;
  47     private String zoneNameStyle; // "long" or "short" for time zone names
  48     private String zonePrefix;
  49     private final String id;
  50 
  51     LDMLParseHandler(String id) {
  52         this.id = id;
  53     }
  54 
  55     @Override
  56     public InputSource resolveEntity(String publicID, String systemID) throws IOException, SAXException {
  57         // avoid HTTP traffic to unicode.org
  58         if (systemID.startsWith(CLDRConverter.LDML_DTD_SYSTEM_ID)) {
  59             return new InputSource((new File(CLDRConverter.LOCAL_LDML_DTD)).toURI().toString());
  60         }
  61         return null;
  62     }
  63 
  64     @Override
  65     public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
  66         switch (qName) {
  67         //
  68         // Generic information
  69         //
  70         case "identity":
  71             // ignore this element - it has language and territory elements that aren't locale data
  72             pushIgnoredContainer(qName);
  73             break;
  74         case "type":
  75             if ("calendar".equals(attributes.getValue("key"))) {
  76                 pushStringEntry(qName, attributes, CLDRConverter.CALENDAR_NAME_PREFIX + attributes.getValue("type"));
  77             } else {
  78                 pushIgnoredContainer(qName);
  79             }
  80             break;
  81         case "language":
  82             // for LocaleNames
  83             // copy string
  84             pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type"));
  85             break;
  86         case "script":
  87             // for LocaleNames
  88             // copy string
  89             pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type"));
  90             break;
  91         case "territory":
  92             // for LocaleNames
  93             // copy string
  94             pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type"));
  95             break;
  96 
  97         //
  98         // Currency information
  99         //
 100         case "currency":
 101             // for CurrencyNames
 102             // stash away "type" value for nested <symbol>
 103             pushKeyContainer(qName, attributes, attributes.getValue("type"));
 104             break;
 105         case "symbol":
 106             // for CurrencyNames
 107             // need to get the key from the containing <currency> element
 108             pushStringEntry(qName, attributes, CLDRConverter.CURRENCY_SYMBOL_PREFIX
 109                                                + getContainerKey());
 110             break;
 111 
 112         // Calendar or currency
 113         case "displayName":
 114             {
 115                 if (currentCalendarType != null) {
 116                     pushStringEntry(qName, attributes,
 117                             currentCalendarType.keyElementName() + "field." + getContainerKey());
 118                 } else {
 119                     // for CurrencyNames
 120                     // need to get the key from the containing <currency> element
 121                     // ignore if is has "count" attribute
 122                     String containerKey = getContainerKey();
 123                     if (containerKey != null && attributes.getValue("count") == null) {
 124                         pushStringEntry(qName, attributes,
 125                                         CLDRConverter.CURRENCY_NAME_PREFIX
 126                                         + containerKey.toLowerCase(Locale.ROOT),
 127                                         attributes.getValue("type"));
 128                     } else {
 129                         pushIgnoredContainer(qName);
 130                     }
 131                 }
 132             }
 133             break;
 134 
 135         //
 136         // Calendar information
 137         //
 138         case "calendar":
 139             {
 140                 // mostly for FormatData (CalendarData items firstDay and minDays are also nested)
 141                 // use only if it's supported by java.util.Calendar.
 142                 String calendarName = attributes.getValue("type");
 143                 currentCalendarType = CalendarType.forName(calendarName);
 144                 if (currentCalendarType != null) {
 145                     pushContainer(qName, attributes);
 146                 } else {
 147                     pushIgnoredContainer(qName);
 148                 }
 149             }
 150             break;
 151         case "fields":
 152             if (currentCalendarType != null) {
 153                 pushContainer(qName, attributes);
 154             } else {
 155                 pushIgnoredContainer(qName);
 156             }
 157             break;
 158         case "field":
 159             {
 160                 String type = attributes.getValue("type");
 161                 switch (type) {
 162                 case "era":
 163                 case "year":
 164                 case "month":
 165                 case "week":
 166                 case "weekday":
 167                 case "dayperiod":
 168                 case "hour":
 169                 case "minute":
 170                 case "second":
 171                 case "zone":
 172                     pushKeyContainer(qName, attributes, type);
 173                     break;
 174                 default:
 175                     pushIgnoredContainer(qName);
 176                     break;
 177                 }
 178             }
 179             break;
 180         case "monthContext":
 181             {
 182                 // for FormatData
 183                 // need to keep stand-alone and format, to allow for inheritance in CLDR
 184                 String type = attributes.getValue("type");
 185                 if ("stand-alone".equals(type) || "format".equals(type)) {
 186                     pushKeyContainer(qName, attributes, type);
 187                 } else {
 188                     pushIgnoredContainer(qName);
 189                 }
 190             }
 191             break;
 192         case "monthWidth":
 193             {
 194                 // for FormatData
 195                 // create string array for the two types that the JRE knows
 196                 // keep info about the context type so we can sort out inheritance later
 197                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 198                 switch (attributes.getValue("type")) {
 199                 case "wide":
 200                     pushStringArrayEntry(qName, attributes, prefix + "MonthNames/" + getContainerKey(), 13);
 201                     break;
 202                 case "abbreviated":
 203                     pushStringArrayEntry(qName, attributes, prefix + "MonthAbbreviations/" + getContainerKey(), 13);
 204                     break;
 205                 case "narrow":
 206                     pushStringArrayEntry(qName, attributes, prefix + "MonthNarrows/" + getContainerKey(), 13);
 207                     break;
 208                 default:
 209                     pushIgnoredContainer(qName);
 210                     break;
 211                 }
 212             }
 213             break;
 214         case "month":
 215             // for FormatData
 216             // add to string array entry of monthWidth element
 217             pushStringArrayElement(qName, attributes, Integer.parseInt(attributes.getValue("type")) - 1);
 218             break;
 219         case "dayContext":
 220             {
 221                 // for FormatData
 222                 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR
 223                 String type = attributes.getValue("type");
 224                 if ("stand-alone".equals(type) || "format".equals(type)) {
 225                     pushKeyContainer(qName, attributes, type);
 226                 } else {
 227                     pushIgnoredContainer(qName);
 228                 }
 229             }
 230             break;
 231         case "dayWidth":
 232             {
 233                 // for FormatData
 234                 // create string array for the two types that the JRE knows
 235                 // keep info about the context type so we can sort out inheritance later
 236                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 237                 switch (attributes.getValue("type")) {
 238                 case "wide":
 239                     pushStringArrayEntry(qName, attributes, prefix + "DayNames/" + getContainerKey(), 7);
 240                     break;
 241                 case "abbreviated":
 242                     pushStringArrayEntry(qName, attributes, prefix + "DayAbbreviations/" + getContainerKey(), 7);
 243                     break;
 244                 case "narrow":
 245                     pushStringArrayEntry(qName, attributes, prefix + "DayNarrows/" + getContainerKey(), 7);
 246                     break;
 247                 default:
 248                     pushIgnoredContainer(qName);
 249                     break;
 250                 }
 251             }
 252             break;
 253         case "day":
 254             // for FormatData
 255             // add to string array entry of monthWidth element
 256             pushStringArrayElement(qName, attributes, Integer.parseInt(DAY_OF_WEEK_MAP.get(attributes.getValue("type"))) - 1);
 257             break;
 258         case "dayPeriodContext":
 259             // for FormatData
 260             // need to keep stand-alone and format, to allow for multiple inheritance in CLDR
 261             // for FormatData
 262             // need to keep stand-alone and format, to allow for multiple inheritance in CLDR
 263             {
 264                 String type = attributes.getValue("type");
 265                 if ("stand-alone".equals(type) || "format".equals(type)) {
 266                     pushKeyContainer(qName, attributes, type);
 267                 } else {
 268                     pushIgnoredContainer(qName);
 269                 }
 270             }
 271             break;
 272         case "dayPeriodWidth":
 273             // for FormatData
 274             // create string array entry for am/pm. only keeping wide
 275             switch (attributes.getValue("type")) {
 276             case "wide":
 277                 pushStringArrayEntry(qName, attributes, "AmPmMarkers/" + getContainerKey(), 2);
 278                 break;
 279             case "narrow":
 280                 pushStringArrayEntry(qName, attributes, "narrow.AmPmMarkers/" + getContainerKey(), 2);
 281                 break;
 282             default:
 283                 pushIgnoredContainer(qName);
 284                 break;
 285             }
 286             break;
 287         case "dayPeriod":
 288             // for FormatData
 289             // add to string array entry of AmPmMarkers element
 290             if (attributes.getValue("alt") == null) {
 291                 switch (attributes.getValue("type")) {
 292                 case "am":
 293                     pushStringArrayElement(qName, attributes, 0);
 294                     break;
 295                 case "pm":
 296                     pushStringArrayElement(qName, attributes, 1);
 297                     break;
 298                 default:
 299                     pushIgnoredContainer(qName);
 300                     break;
 301                 }
 302             } else {
 303                 // discard alt values
 304                 pushIgnoredContainer(qName);
 305             }
 306             break;
 307         case "eraNames":
 308             // CLDR era names are inconsistent in terms of their lengths. For example,
 309             // the full names of Japanese imperial eras are eraAbbr, while the full names
 310             // of the Julian eras are eraNames.
 311             if (currentCalendarType == null) {
 312                 assert currentContainer instanceof IgnoredContainer;
 313                 pushIgnoredContainer(qName);
 314             } else {
 315                 String key = currentCalendarType.keyElementName() + "long.Eras"; // for now
 316                 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName));
 317             }
 318             break;
 319         case "eraAbbr":
 320             // for FormatData
 321             // create string array entry
 322             if (currentCalendarType == null) {
 323                 assert currentContainer instanceof IgnoredContainer;
 324                 pushIgnoredContainer(qName);
 325             } else {
 326                 String key = currentCalendarType.keyElementName() + "Eras";
 327                 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName));
 328             }
 329             break;
 330         case "eraNarrow":
 331             // mainly used for the Japanese imperial calendar
 332             if (currentCalendarType == null) {
 333                 assert currentContainer instanceof IgnoredContainer;
 334                 pushIgnoredContainer(qName);
 335             } else {
 336                 String key = currentCalendarType.keyElementName() + "narrow.Eras";
 337                 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName));
 338             }
 339             break;
 340         case "era":
 341             // for FormatData
 342             // add to string array entry of eraAbbr element
 343             if (currentCalendarType == null) {
 344                 assert currentContainer instanceof IgnoredContainer;
 345                 pushIgnoredContainer(qName);
 346             } else {
 347                 int index = Integer.parseInt(attributes.getValue("type"));
 348                 index = currentCalendarType.normalizeEraIndex(index);
 349                 if (index >= 0) {
 350                     pushStringArrayElement(qName, attributes, index);
 351                 } else {
 352                     pushIgnoredContainer(qName);
 353                 }
 354                 if (currentContainer.getParent() == null) {
 355                     throw new InternalError("currentContainer: null parent");
 356                 }
 357             }
 358             break;
 359 
 360         //
 361         // Time zone names
 362         //
 363         case "timeZoneNames":
 364             pushContainer(qName, attributes);
 365             break;
 366         case "zone":
 367             {
 368                 String tzid = attributes.getValue("type"); // Olson tz id
 369                 zonePrefix = CLDRConverter.TIMEZONE_ID_PREFIX;
 370                 put(zonePrefix + tzid, new HashMap<String, String>());
 371                 pushKeyContainer(qName, attributes, tzid);
 372             }
 373             break;
 374         case "metazone":
 375             {
 376                 String zone = attributes.getValue("type"); // LDML meta zone id
 377                 zonePrefix = CLDRConverter.METAZONE_ID_PREFIX;
 378                 put(zonePrefix + zone, new HashMap<String, String>());
 379                 pushKeyContainer(qName, attributes, zone);
 380             }
 381             break;
 382         case "long":
 383             zoneNameStyle = "long";
 384             pushContainer(qName, attributes);
 385             break;
 386         case "short":
 387             zoneNameStyle = "short";
 388             pushContainer(qName, attributes);
 389             break;
 390         case "generic":  // generic name
 391         case "standard": // standard time name
 392         case "daylight": // daylight saving (summer) time name
 393             pushStringEntry(qName, attributes, CLDRConverter.ZONE_NAME_PREFIX + qName + "." + zoneNameStyle);
 394             break;
 395         case "exemplarCity":  // not used in JDK
 396             pushIgnoredContainer(qName);
 397             break;
 398 
 399         //
 400         // Number format information
 401         //
 402         case "decimalFormatLength":
 403             if (attributes.getValue("type") == null) {
 404                 // skipping type="short" data
 405                 // for FormatData
 406                 // copy string for later assembly into NumberPatterns
 407                 pushStringEntry(qName, attributes, "NumberPatterns/decimal");
 408             } else {
 409                 pushIgnoredContainer(qName);
 410             }
 411             break;
 412         case "currencyFormat":
 413             // for FormatData
 414             // copy string for later assembly into NumberPatterns
 415             pushStringEntry(qName, attributes, "NumberPatterns/currency");
 416             break;
 417         case "percentFormat":
 418             // for FormatData
 419             // copy string for later assembly into NumberPatterns
 420             pushStringEntry(qName, attributes, "NumberPatterns/percent");
 421             break;
 422         case "defaultNumberingSystem":
 423             // default numbering system if multiple numbering systems are used.
 424             pushStringEntry(qName, attributes, "DefaultNumberingSystem");
 425             break;
 426         case "symbols":
 427             // for FormatData
 428             // look up numberingSystems
 429             symbols: {
 430                 String script = attributes.getValue("numberSystem");
 431                 if (script == null) {
 432                     // Has no script. Just ignore.
 433                     pushIgnoredContainer(qName);
 434                     break;
 435                 }
 436 
 437                 // Use keys as <script>."NumberElements/<symbol>"
 438                 currentNumberingSystem = script + ".";
 439                 String digits = CLDRConverter.handlerNumbering.get(script);
 440                 if (digits == null) {
 441                     throw new InternalError("null digits for " + script);
 442                 }
 443                 if (Character.isSurrogate(digits.charAt(0))) {
 444                     // DecimalFormatSymbols doesn't support supplementary characters as digit zero.
 445                     pushIgnoredContainer(qName);
 446                     break;
 447                 }
 448                 // in case digits are in the reversed order, reverse back the order.
 449                 if (digits.charAt(0) > digits.charAt(digits.length() - 1)) {
 450                     StringBuilder sb = new StringBuilder(digits);
 451                     digits = sb.reverse().toString();
 452                 }
 453                 // Check if the order is sequential.
 454                 char c0 = digits.charAt(0);
 455                 for (int i = 1; i < digits.length(); i++) {
 456                     if (digits.charAt(i) != c0 + i) {
 457                         pushIgnoredContainer(qName);
 458                         break symbols;
 459                     }
 460                 }
 461                 @SuppressWarnings("unchecked")
 462                 List<String> numberingScripts = (List<String>) get("numberingScripts");
 463                 if (numberingScripts == null) {
 464                     numberingScripts = new ArrayList<>();
 465                     put("numberingScripts", numberingScripts);
 466                 }
 467                 numberingScripts.add(script);
 468                 put(currentNumberingSystem + "NumberElements/zero", digits.substring(0, 1));
 469                 pushContainer(qName, attributes);
 470             }
 471             break;
 472         case "decimal":
 473             // for FormatData
 474             // copy string for later assembly into NumberElements
 475             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/decimal");
 476             break;
 477         case "group":
 478             // for FormatData
 479             // copy string for later assembly into NumberElements
 480             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/group");
 481             break;
 482         case "list":
 483             // for FormatData
 484             // copy string for later assembly into NumberElements
 485             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/list");
 486             break;
 487         case "percentSign":
 488             // for FormatData
 489             // copy string for later assembly into NumberElements
 490             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/percent");
 491             break;
 492         case "nativeZeroDigit":
 493             // for FormatData
 494             // copy string for later assembly into NumberElements
 495             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/zero");
 496             break;
 497         case "patternDigit":
 498             // for FormatData
 499             // copy string for later assembly into NumberElements
 500             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/pattern");
 501             break;
 502         case "plusSign":
 503             // TODO: DecimalFormatSymbols doesn't support plusSign
 504             pushIgnoredContainer(qName);
 505             break;
 506         case "minusSign":
 507             // for FormatData
 508             // copy string for later assembly into NumberElements
 509             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/minus");
 510             break;
 511         case "exponential":
 512             // for FormatData
 513             // copy string for later assembly into NumberElements
 514             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/exponential");
 515             break;
 516         case "perMille":
 517             // for FormatData
 518             // copy string for later assembly into NumberElements
 519             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/permille");
 520             break;
 521         case "infinity":
 522             // for FormatData
 523             // copy string for later assembly into NumberElements
 524             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/infinity");
 525             break;
 526         case "nan":
 527             // for FormatData
 528             // copy string for later assembly into NumberElements
 529             pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/nan");
 530             break;
 531         case "timeFormatLength":
 532             {
 533                 // for FormatData
 534                 // copy string for later assembly into DateTimePatterns
 535                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 536                 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/" + attributes.getValue("type") + "-time");
 537             }
 538             break;
 539         case "dateFormatLength":
 540             {
 541                 // for FormatData
 542                 // copy string for later assembly into DateTimePatterns
 543                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 544                 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/" + attributes.getValue("type") + "-date");
 545             }
 546             break;
 547         case "dateTimeFormat":
 548             {
 549                 // for FormatData
 550                 // copy string for later assembly into DateTimePatterns
 551                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 552                 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/date-time");
 553             }
 554             break;
 555         case "localizedPatternChars":
 556             {
 557                 // for FormatData
 558                 // copy string for later adaptation to JRE use
 559                 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName();
 560                 pushStringEntry(qName, attributes, prefix + "DateTimePatternChars");
 561             }
 562             break;
 563 
 564         default:
 565             // treat anything else as a container
 566             pushContainer(qName, attributes);
 567             break;
 568         }
 569     }
 570 
 571     @Override
 572     public void endElement(String uri, String localName, String qName) throws SAXException {
 573         assert qName.equals(currentContainer.getqName()) : "current=" + currentContainer.getqName() + ", param=" + qName;
 574         switch (qName) {
 575         case "calendar":
 576             assert !(currentContainer instanceof Entry);
 577             currentCalendarType = null;
 578             break;
 579 
 580         case "defaultNumberingSystem":
 581             if (currentContainer instanceof StringEntry) {
 582                 defaultNumberingSystem = ((StringEntry) currentContainer).getValue();
 583                 assert defaultNumberingSystem != null;
 584                 put(((StringEntry) currentContainer).getKey(), defaultNumberingSystem);
 585             } else {
 586                 defaultNumberingSystem = null;
 587             }
 588             break;
 589 
 590         case "timeZoneNames":
 591             zonePrefix = null;
 592             break;
 593         case "generic":
 594         case "standard":
 595         case "daylight":
 596             if (zonePrefix != null && (currentContainer instanceof Entry)) {
 597                 @SuppressWarnings("unchecked")
 598                 Map<String, String> valmap = (Map<String, String>) get(zonePrefix + getContainerKey());
 599                 Entry<?> entry = (Entry<?>) currentContainer;
 600                 valmap.put(entry.getKey(), (String) entry.getValue());
 601             }
 602             break;
 603         default:
 604             if (currentContainer instanceof Entry) {
 605                 Entry<?> entry = (Entry<?>) currentContainer;
 606                 Object value = entry.getValue();
 607                 if (value != null) {
 608                     put(entry.getKey(), value);
 609                 }
 610             }
 611         }
 612         currentContainer = currentContainer.getParent();
 613     }
 614 }