1 /* 2 * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.cldrconverter; 27 28 import java.io.File; 29 import java.io.IOException; 30 import java.util.ArrayList; 31 import java.util.HashMap; 32 import java.util.List; 33 import java.util.Locale; 34 import java.util.Map; 35 import org.xml.sax.Attributes; 36 import org.xml.sax.InputSource; 37 import org.xml.sax.SAXException; 38 39 /** 40 * Handles parsing of files in Locale Data Markup Language and produces a map 41 * that uses the keys and values of JRE locale data. 42 */ 43 class LDMLParseHandler extends AbstractLDMLHandler<Object> { 44 private String defaultNumberingSystem; 45 private String currentNumberingSystem = ""; 46 private CalendarType currentCalendarType; 47 private String zoneNameStyle; // "long" or "short" for time zone names 48 private String zonePrefix; 49 private final String id; 50 51 LDMLParseHandler(String id) { 52 this.id = id; 53 } 54 55 @Override 56 public InputSource resolveEntity(String publicID, String systemID) throws IOException, SAXException { 57 // avoid HTTP traffic to unicode.org 58 if (systemID.startsWith(CLDRConverter.LDML_DTD_SYSTEM_ID)) { 59 return new InputSource((new File(CLDRConverter.LOCAL_LDML_DTD)).toURI().toString()); 60 } 61 return null; 62 } 63 64 @Override 65 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { 66 switch (qName) { 67 // 68 // Generic information 69 // 70 case "identity": 71 // ignore this element - it has language and territory elements that aren't locale data 72 pushIgnoredContainer(qName); 73 break; 74 case "type": 75 if ("calendar".equals(attributes.getValue("key"))) { 76 pushStringEntry(qName, attributes, CLDRConverter.CALENDAR_NAME_PREFIX + attributes.getValue("type")); 77 } else { 78 pushIgnoredContainer(qName); 79 } 80 break; 81 case "language": 82 // for LocaleNames 83 // copy string 84 pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); 85 break; 86 case "script": 87 // for LocaleNames 88 // copy string 89 pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); 90 break; 91 case "territory": 92 // for LocaleNames 93 // copy string 94 pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); 95 break; 96 97 // 98 // Currency information 99 // 100 case "currency": 101 // for CurrencyNames 102 // stash away "type" value for nested <symbol> 103 pushKeyContainer(qName, attributes, attributes.getValue("type")); 104 break; 105 case "symbol": 106 // for CurrencyNames 107 // need to get the key from the containing <currency> element 108 pushStringEntry(qName, attributes, CLDRConverter.CURRENCY_SYMBOL_PREFIX 109 + getContainerKey()); 110 break; 111 112 // Calendar or currency 113 case "displayName": 114 { 115 if (currentCalendarType != null) { 116 pushStringEntry(qName, attributes, 117 currentCalendarType.keyElementName() + "field." + getContainerKey()); 118 } else { 119 // for CurrencyNames 120 // need to get the key from the containing <currency> element 121 // ignore if is has "count" attribute 122 String containerKey = getContainerKey(); 123 if (containerKey != null && attributes.getValue("count") == null) { 124 pushStringEntry(qName, attributes, 125 CLDRConverter.CURRENCY_NAME_PREFIX 126 + containerKey.toLowerCase(Locale.ROOT), 127 attributes.getValue("type")); 128 } else { 129 pushIgnoredContainer(qName); 130 } 131 } 132 } 133 break; 134 135 // 136 // Calendar information 137 // 138 case "calendar": 139 { 140 // mostly for FormatData (CalendarData items firstDay and minDays are also nested) 141 // use only if it's supported by java.util.Calendar. 142 String calendarName = attributes.getValue("type"); 143 currentCalendarType = CalendarType.forName(calendarName); 144 if (currentCalendarType != null) { 145 pushContainer(qName, attributes); 146 } else { 147 pushIgnoredContainer(qName); 148 } 149 } 150 break; 151 case "fields": 152 if (currentCalendarType != null) { 153 pushContainer(qName, attributes); 154 } else { 155 pushIgnoredContainer(qName); 156 } 157 break; 158 case "field": 159 { 160 String type = attributes.getValue("type"); 161 switch (type) { 162 case "era": 163 case "year": 164 case "month": 165 case "week": 166 case "weekday": 167 case "dayperiod": 168 case "hour": 169 case "minute": 170 case "second": 171 case "zone": 172 pushKeyContainer(qName, attributes, type); 173 break; 174 default: 175 pushIgnoredContainer(qName); 176 break; 177 } 178 } 179 break; 180 case "monthContext": 181 { 182 // for FormatData 183 // need to keep stand-alone and format, to allow for inheritance in CLDR 184 String type = attributes.getValue("type"); 185 if ("stand-alone".equals(type) || "format".equals(type)) { 186 pushKeyContainer(qName, attributes, type); 187 } else { 188 pushIgnoredContainer(qName); 189 } 190 } 191 break; 192 case "monthWidth": 193 { 194 // for FormatData 195 // create string array for the two types that the JRE knows 196 // keep info about the context type so we can sort out inheritance later 197 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 198 switch (attributes.getValue("type")) { 199 case "wide": 200 pushStringArrayEntry(qName, attributes, prefix + "MonthNames/" + getContainerKey(), 13); 201 break; 202 case "abbreviated": 203 pushStringArrayEntry(qName, attributes, prefix + "MonthAbbreviations/" + getContainerKey(), 13); 204 break; 205 case "narrow": 206 pushStringArrayEntry(qName, attributes, prefix + "MonthNarrows/" + getContainerKey(), 13); 207 break; 208 default: 209 pushIgnoredContainer(qName); 210 break; 211 } 212 } 213 break; 214 case "month": 215 // for FormatData 216 // add to string array entry of monthWidth element 217 pushStringArrayElement(qName, attributes, Integer.parseInt(attributes.getValue("type")) - 1); 218 break; 219 case "dayContext": 220 { 221 // for FormatData 222 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR 223 String type = attributes.getValue("type"); 224 if ("stand-alone".equals(type) || "format".equals(type)) { 225 pushKeyContainer(qName, attributes, type); 226 } else { 227 pushIgnoredContainer(qName); 228 } 229 } 230 break; 231 case "dayWidth": 232 { 233 // for FormatData 234 // create string array for the two types that the JRE knows 235 // keep info about the context type so we can sort out inheritance later 236 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 237 switch (attributes.getValue("type")) { 238 case "wide": 239 pushStringArrayEntry(qName, attributes, prefix + "DayNames/" + getContainerKey(), 7); 240 break; 241 case "abbreviated": 242 pushStringArrayEntry(qName, attributes, prefix + "DayAbbreviations/" + getContainerKey(), 7); 243 break; 244 case "narrow": 245 pushStringArrayEntry(qName, attributes, prefix + "DayNarrows/" + getContainerKey(), 7); 246 break; 247 default: 248 pushIgnoredContainer(qName); 249 break; 250 } 251 } 252 break; 253 case "day": 254 // for FormatData 255 // add to string array entry of monthWidth element 256 pushStringArrayElement(qName, attributes, Integer.parseInt(DAY_OF_WEEK_MAP.get(attributes.getValue("type"))) - 1); 257 break; 258 case "dayPeriodContext": 259 // for FormatData 260 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR 261 // for FormatData 262 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR 263 { 264 String type = attributes.getValue("type"); 265 if ("stand-alone".equals(type) || "format".equals(type)) { 266 pushKeyContainer(qName, attributes, type); 267 } else { 268 pushIgnoredContainer(qName); 269 } 270 } 271 break; 272 case "dayPeriodWidth": 273 // for FormatData 274 // create string array entry for am/pm. only keeping wide 275 switch (attributes.getValue("type")) { 276 case "wide": 277 pushStringArrayEntry(qName, attributes, "AmPmMarkers/" + getContainerKey(), 2); 278 break; 279 case "narrow": 280 pushStringArrayEntry(qName, attributes, "narrow.AmPmMarkers/" + getContainerKey(), 2); 281 break; 282 default: 283 pushIgnoredContainer(qName); 284 break; 285 } 286 break; 287 case "dayPeriod": 288 // for FormatData 289 // add to string array entry of AmPmMarkers element 290 if (attributes.getValue("alt") == null) { 291 switch (attributes.getValue("type")) { 292 case "am": 293 pushStringArrayElement(qName, attributes, 0); 294 break; 295 case "pm": 296 pushStringArrayElement(qName, attributes, 1); 297 break; 298 default: 299 pushIgnoredContainer(qName); 300 break; 301 } 302 } else { 303 // discard alt values 304 pushIgnoredContainer(qName); 305 } 306 break; 307 case "eraNames": 308 // CLDR era names are inconsistent in terms of their lengths. For example, 309 // the full names of Japanese imperial eras are eraAbbr, while the full names 310 // of the Julian eras are eraNames. 311 if (currentCalendarType == null) { 312 assert currentContainer instanceof IgnoredContainer; 313 pushIgnoredContainer(qName); 314 } else { 315 String key = currentCalendarType.keyElementName() + "long.Eras"; // for now 316 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); 317 } 318 break; 319 case "eraAbbr": 320 // for FormatData 321 // create string array entry 322 if (currentCalendarType == null) { 323 assert currentContainer instanceof IgnoredContainer; 324 pushIgnoredContainer(qName); 325 } else { 326 String key = currentCalendarType.keyElementName() + "Eras"; 327 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); 328 } 329 break; 330 case "eraNarrow": 331 // mainly used for the Japanese imperial calendar 332 if (currentCalendarType == null) { 333 assert currentContainer instanceof IgnoredContainer; 334 pushIgnoredContainer(qName); 335 } else { 336 String key = currentCalendarType.keyElementName() + "narrow.Eras"; 337 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); 338 } 339 break; 340 case "era": 341 // for FormatData 342 // add to string array entry of eraAbbr element 343 if (currentCalendarType == null) { 344 assert currentContainer instanceof IgnoredContainer; 345 pushIgnoredContainer(qName); 346 } else { 347 int index = Integer.parseInt(attributes.getValue("type")); 348 index = currentCalendarType.normalizeEraIndex(index); 349 if (index >= 0) { 350 pushStringArrayElement(qName, attributes, index); 351 } else { 352 pushIgnoredContainer(qName); 353 } 354 if (currentContainer.getParent() == null) { 355 throw new InternalError("currentContainer: null parent"); 356 } 357 } 358 break; 359 case "quarterContext": 360 { 361 // for FormatData 362 // need to keep stand-alone and format, to allow for inheritance in CLDR 363 String type = attributes.getValue("type"); 364 if ("stand-alone".equals(type) || "format".equals(type)) { 365 pushKeyContainer(qName, attributes, type); 366 } else { 367 pushIgnoredContainer(qName); 368 } 369 } 370 break; 371 case "quarterWidth": 372 { 373 // for FormatData 374 // keep info about the context type so we can sort out inheritance later 375 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 376 switch (attributes.getValue("type")) { 377 case "wide": 378 pushStringArrayEntry(qName, attributes, prefix + "QuarterNames/" + getContainerKey(), 4); 379 break; 380 case "abbreviated": 381 pushStringArrayEntry(qName, attributes, prefix + "QuarterAbbreviations/" + getContainerKey(), 4); 382 break; 383 case "narrow": 384 pushStringArrayEntry(qName, attributes, prefix + "QuarterNarrows/" + getContainerKey(), 4); 385 break; 386 default: 387 pushIgnoredContainer(qName); 388 break; 389 } 390 } 391 break; 392 case "quarter": 393 // for FormatData 394 // add to string array entry of quarterWidth element 395 pushStringArrayElement(qName, attributes, Integer.parseInt(attributes.getValue("type")) - 1); 396 break; 397 398 // 399 // Time zone names 400 // 401 case "timeZoneNames": 402 pushContainer(qName, attributes); 403 break; 404 case "zone": 405 { 406 String tzid = attributes.getValue("type"); // Olson tz id 407 zonePrefix = CLDRConverter.TIMEZONE_ID_PREFIX; 408 put(zonePrefix + tzid, new HashMap<String, String>()); 409 pushKeyContainer(qName, attributes, tzid); 410 } 411 break; 412 case "metazone": 413 { 414 String zone = attributes.getValue("type"); // LDML meta zone id 415 zonePrefix = CLDRConverter.METAZONE_ID_PREFIX; 416 put(zonePrefix + zone, new HashMap<String, String>()); 417 pushKeyContainer(qName, attributes, zone); 418 } 419 break; 420 case "long": 421 zoneNameStyle = "long"; 422 pushContainer(qName, attributes); 423 break; 424 case "short": 425 zoneNameStyle = "short"; 426 pushContainer(qName, attributes); 427 break; 428 case "generic": // generic name 429 case "standard": // standard time name 430 case "daylight": // daylight saving (summer) time name 431 pushStringEntry(qName, attributes, CLDRConverter.ZONE_NAME_PREFIX + qName + "." + zoneNameStyle); 432 break; 433 case "exemplarCity": // not used in JDK 434 pushIgnoredContainer(qName); 435 break; 436 437 // 438 // Number format information 439 // 440 case "decimalFormatLength": 441 if (attributes.getValue("type") == null) { 442 // skipping type="short" data 443 // for FormatData 444 // copy string for later assembly into NumberPatterns 445 pushStringEntry(qName, attributes, "NumberPatterns/decimal"); 446 } else { 447 pushIgnoredContainer(qName); 448 } 449 break; 450 case "currencyFormat": 451 // for FormatData 452 // copy string for later assembly into NumberPatterns 453 pushStringEntry(qName, attributes, "NumberPatterns/currency"); 454 break; 455 case "percentFormat": 456 // for FormatData 457 // copy string for later assembly into NumberPatterns 458 pushStringEntry(qName, attributes, "NumberPatterns/percent"); 459 break; 460 case "defaultNumberingSystem": 461 // default numbering system if multiple numbering systems are used. 462 pushStringEntry(qName, attributes, "DefaultNumberingSystem"); 463 break; 464 case "symbols": 465 // for FormatData 466 // look up numberingSystems 467 symbols: { 468 String script = attributes.getValue("numberSystem"); 469 if (script == null) { 470 // Has no script. Just ignore. 471 pushIgnoredContainer(qName); 472 break; 473 } 474 475 // Use keys as <script>."NumberElements/<symbol>" 476 currentNumberingSystem = script + "."; 477 String digits = CLDRConverter.handlerNumbering.get(script); 478 if (digits == null) { 479 throw new InternalError("null digits for " + script); 480 } 481 if (Character.isSurrogate(digits.charAt(0))) { 482 // DecimalFormatSymbols doesn't support supplementary characters as digit zero. 483 pushIgnoredContainer(qName); 484 break; 485 } 486 // in case digits are in the reversed order, reverse back the order. 487 if (digits.charAt(0) > digits.charAt(digits.length() - 1)) { 488 StringBuilder sb = new StringBuilder(digits); 489 digits = sb.reverse().toString(); 490 } 491 // Check if the order is sequential. 492 char c0 = digits.charAt(0); 493 for (int i = 1; i < digits.length(); i++) { 494 if (digits.charAt(i) != c0 + i) { 495 pushIgnoredContainer(qName); 496 break symbols; 497 } 498 } 499 @SuppressWarnings("unchecked") 500 List<String> numberingScripts = (List<String>) get("numberingScripts"); 501 if (numberingScripts == null) { 502 numberingScripts = new ArrayList<>(); 503 put("numberingScripts", numberingScripts); 504 } 505 numberingScripts.add(script); 506 put(currentNumberingSystem + "NumberElements/zero", digits.substring(0, 1)); 507 pushContainer(qName, attributes); 508 } 509 break; 510 case "decimal": 511 // for FormatData 512 // copy string for later assembly into NumberElements 513 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/decimal"); 514 break; 515 case "group": 516 // for FormatData 517 // copy string for later assembly into NumberElements 518 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/group"); 519 break; 520 case "list": 521 // for FormatData 522 // copy string for later assembly into NumberElements 523 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/list"); 524 break; 525 case "percentSign": 526 // for FormatData 527 // copy string for later assembly into NumberElements 528 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/percent"); 529 break; 530 case "nativeZeroDigit": 531 // for FormatData 532 // copy string for later assembly into NumberElements 533 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/zero"); 534 break; 535 case "patternDigit": 536 // for FormatData 537 // copy string for later assembly into NumberElements 538 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/pattern"); 539 break; 540 case "plusSign": 541 // TODO: DecimalFormatSymbols doesn't support plusSign 542 pushIgnoredContainer(qName); 543 break; 544 case "minusSign": 545 // for FormatData 546 // copy string for later assembly into NumberElements 547 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/minus"); 548 break; 549 case "exponential": 550 // for FormatData 551 // copy string for later assembly into NumberElements 552 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/exponential"); 553 break; 554 case "perMille": 555 // for FormatData 556 // copy string for later assembly into NumberElements 557 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/permille"); 558 break; 559 case "infinity": 560 // for FormatData 561 // copy string for later assembly into NumberElements 562 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/infinity"); 563 break; 564 case "nan": 565 // for FormatData 566 // copy string for later assembly into NumberElements 567 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/nan"); 568 break; 569 case "timeFormatLength": 570 { 571 // for FormatData 572 // copy string for later assembly into DateTimePatterns 573 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 574 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/" + attributes.getValue("type") + "-time"); 575 } 576 break; 577 case "dateFormatLength": 578 { 579 // for FormatData 580 // copy string for later assembly into DateTimePatterns 581 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 582 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/" + attributes.getValue("type") + "-date"); 583 } 584 break; 585 case "dateTimeFormat": 586 { 587 // for FormatData 588 // copy string for later assembly into DateTimePatterns 589 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 590 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/date-time"); 591 } 592 break; 593 case "localizedPatternChars": 594 { 595 // for FormatData 596 // copy string for later adaptation to JRE use 597 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 598 pushStringEntry(qName, attributes, prefix + "DateTimePatternChars"); 599 } 600 break; 601 602 default: 603 // treat anything else as a container 604 pushContainer(qName, attributes); 605 break; 606 } 607 } 608 609 @Override 610 public void endElement(String uri, String localName, String qName) throws SAXException { 611 assert qName.equals(currentContainer.getqName()) : "current=" + currentContainer.getqName() + ", param=" + qName; 612 switch (qName) { 613 case "calendar": 614 assert !(currentContainer instanceof Entry); 615 currentCalendarType = null; 616 break; 617 618 case "defaultNumberingSystem": 619 if (currentContainer instanceof StringEntry) { 620 defaultNumberingSystem = ((StringEntry) currentContainer).getValue(); 621 assert defaultNumberingSystem != null; 622 put(((StringEntry) currentContainer).getKey(), defaultNumberingSystem); 623 } else { 624 defaultNumberingSystem = null; 625 } 626 break; 627 628 case "timeZoneNames": 629 zonePrefix = null; 630 break; 631 case "generic": 632 case "standard": 633 case "daylight": 634 if (zonePrefix != null && (currentContainer instanceof Entry)) { 635 @SuppressWarnings("unchecked") 636 Map<String, String> valmap = (Map<String, String>) get(zonePrefix + getContainerKey()); 637 Entry<?> entry = (Entry<?>) currentContainer; 638 valmap.put(entry.getKey(), (String) entry.getValue()); 639 } 640 break; 641 default: 642 if (currentContainer instanceof Entry) { 643 Entry<?> entry = (Entry<?>) currentContainer; 644 Object value = entry.getValue(); 645 if (value != null) { 646 put(entry.getKey(), value); 647 } 648 } 649 } 650 currentContainer = currentContainer.getParent(); 651 } 652 }