1 /* 2 * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.cldrconverter; 27 28 import java.io.File; 29 import java.io.IOException; 30 import java.util.ArrayList; 31 import java.util.HashMap; 32 import java.util.List; 33 import java.util.Locale; 34 import java.util.Map; 35 import org.xml.sax.Attributes; 36 import org.xml.sax.InputSource; 37 import org.xml.sax.SAXException; 38 39 /** 40 * Handles parsing of files in Locale Data Markup Language and produces a map 41 * that uses the keys and values of JRE locale data. 42 */ 43 class LDMLParseHandler extends AbstractLDMLHandler<Object> { 44 private String defaultNumberingSystem; 45 private String currentNumberingSystem = ""; 46 private CalendarType currentCalendarType; 47 private String zoneNameStyle; // "long" or "short" for time zone names 48 private String zonePrefix; 49 private final String id; 50 51 LDMLParseHandler(String id) { 52 this.id = id; 53 } 54 55 @Override 56 public InputSource resolveEntity(String publicID, String systemID) throws IOException, SAXException { 57 // avoid HTTP traffic to unicode.org 58 if (systemID.startsWith(CLDRConverter.LDML_DTD_SYSTEM_ID)) { 59 return new InputSource((new File(CLDRConverter.LOCAL_LDML_DTD)).toURI().toString()); 60 } 61 return null; 62 } 63 64 @Override 65 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { 66 switch (qName) { 67 // 68 // Generic information 69 // 70 case "identity": 71 // ignore this element - it has language and territory elements that aren't locale data 72 pushIgnoredContainer(qName); 73 break; 74 case "type": 75 if ("calendar".equals(attributes.getValue("key"))) { 76 pushStringEntry(qName, attributes, CLDRConverter.CALENDAR_NAME_PREFIX + attributes.getValue("type")); 77 } else { 78 pushIgnoredContainer(qName); 79 } 80 break; 81 case "language": 82 // for LocaleNames 83 // copy string 84 pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); 85 break; 86 case "script": 87 // for LocaleNames 88 // copy string 89 pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); 90 break; 91 case "territory": 92 // for LocaleNames 93 // copy string 94 pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); 95 break; 96 97 // 98 // Currency information 99 // 100 case "currency": 101 // for CurrencyNames 102 // stash away "type" value for nested <symbol> 103 pushKeyContainer(qName, attributes, attributes.getValue("type")); 104 break; 105 case "symbol": 106 // for CurrencyNames 107 // need to get the key from the containing <currency> element 108 pushStringEntry(qName, attributes, CLDRConverter.CURRENCY_SYMBOL_PREFIX 109 + getContainerKey()); 110 break; 111 112 // Calendar or currency 113 case "displayName": 114 { 115 if (currentCalendarType != null) { 116 pushStringEntry(qName, attributes, 117 currentCalendarType.keyElementName() + "field." + getContainerKey()); 118 } else { 119 // for CurrencyNames 120 // need to get the key from the containing <currency> element 121 // ignore if is has "count" attribute 122 String containerKey = getContainerKey(); 123 if (containerKey != null && attributes.getValue("count") == null) { 124 pushStringEntry(qName, attributes, 125 CLDRConverter.CURRENCY_NAME_PREFIX 126 + containerKey.toLowerCase(Locale.ROOT), 127 attributes.getValue("type")); 128 } else { 129 pushIgnoredContainer(qName); 130 } 131 } 132 } 133 break; 134 135 // 136 // Calendar information 137 // 138 case "calendar": 139 { 140 // mostly for FormatData (CalendarData items firstDay and minDays are also nested) 141 // use only if it's supported by java.util.Calendar. 142 String calendarName = attributes.getValue("type"); 143 currentCalendarType = CalendarType.forName(calendarName); 144 if (currentCalendarType != null) { 145 pushContainer(qName, attributes); 146 } else { 147 pushIgnoredContainer(qName); 148 } 149 } 150 break; 151 case "fields": 152 if (currentCalendarType != null) { 153 pushContainer(qName, attributes); 154 } else { 155 pushIgnoredContainer(qName); 156 } 157 break; 158 case "field": 159 { 160 String type = attributes.getValue("type"); 161 switch (type) { 162 case "era": 163 case "year": 164 case "month": 165 case "week": 166 case "weekday": 167 case "dayperiod": 168 case "hour": 169 case "minute": 170 case "second": 171 case "zone": 172 pushKeyContainer(qName, attributes, type); 173 break; 174 default: 175 pushIgnoredContainer(qName); 176 break; 177 } 178 } 179 break; 180 case "monthContext": 181 { 182 // for FormatData 183 // need to keep stand-alone and format, to allow for inheritance in CLDR 184 String type = attributes.getValue("type"); 185 if ("stand-alone".equals(type) || "format".equals(type)) { 186 pushKeyContainer(qName, attributes, type); 187 } else { 188 pushIgnoredContainer(qName); 189 } 190 } 191 break; 192 case "monthWidth": 193 { 194 // for FormatData 195 // create string array for the two types that the JRE knows 196 // keep info about the context type so we can sort out inheritance later 197 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 198 switch (attributes.getValue("type")) { 199 case "wide": 200 pushStringArrayEntry(qName, attributes, prefix + "MonthNames/" + getContainerKey(), 13); 201 break; 202 case "abbreviated": 203 pushStringArrayEntry(qName, attributes, prefix + "MonthAbbreviations/" + getContainerKey(), 13); 204 break; 205 case "narrow": 206 pushStringArrayEntry(qName, attributes, prefix + "MonthNarrows/" + getContainerKey(), 13); 207 break; 208 default: 209 pushIgnoredContainer(qName); 210 break; 211 } 212 } 213 break; 214 case "month": 215 // for FormatData 216 // add to string array entry of monthWidth element 217 pushStringArrayElement(qName, attributes, Integer.parseInt(attributes.getValue("type")) - 1); 218 break; 219 case "dayContext": 220 { 221 // for FormatData 222 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR 223 String type = attributes.getValue("type"); 224 if ("stand-alone".equals(type) || "format".equals(type)) { 225 pushKeyContainer(qName, attributes, type); 226 } else { 227 pushIgnoredContainer(qName); 228 } 229 } 230 break; 231 case "dayWidth": 232 { 233 // for FormatData 234 // create string array for the two types that the JRE knows 235 // keep info about the context type so we can sort out inheritance later 236 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 237 switch (attributes.getValue("type")) { 238 case "wide": 239 pushStringArrayEntry(qName, attributes, prefix + "DayNames/" + getContainerKey(), 7); 240 break; 241 case "abbreviated": 242 pushStringArrayEntry(qName, attributes, prefix + "DayAbbreviations/" + getContainerKey(), 7); 243 break; 244 case "narrow": 245 pushStringArrayEntry(qName, attributes, prefix + "DayNarrows/" + getContainerKey(), 7); 246 break; 247 default: 248 pushIgnoredContainer(qName); 249 break; 250 } 251 } 252 break; 253 case "day": 254 // for FormatData 255 // add to string array entry of monthWidth element 256 pushStringArrayElement(qName, attributes, Integer.parseInt(DAY_OF_WEEK_MAP.get(attributes.getValue("type"))) - 1); 257 break; 258 case "dayPeriodContext": 259 // for FormatData 260 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR 261 // for FormatData 262 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR 263 { 264 String type = attributes.getValue("type"); 265 if ("stand-alone".equals(type) || "format".equals(type)) { 266 pushKeyContainer(qName, attributes, type); 267 } else { 268 pushIgnoredContainer(qName); 269 } 270 } 271 break; 272 case "dayPeriodWidth": 273 // for FormatData 274 // create string array entry for am/pm. only keeping wide 275 switch (attributes.getValue("type")) { 276 case "wide": 277 pushStringArrayEntry(qName, attributes, "AmPmMarkers/" + getContainerKey(), 2); 278 break; 279 case "narrow": 280 pushStringArrayEntry(qName, attributes, "narrow.AmPmMarkers/" + getContainerKey(), 2); 281 break; 282 default: 283 pushIgnoredContainer(qName); 284 break; 285 } 286 break; 287 case "dayPeriod": 288 // for FormatData 289 // add to string array entry of AmPmMarkers element 290 if (attributes.getValue("alt") == null) { 291 switch (attributes.getValue("type")) { 292 case "am": 293 pushStringArrayElement(qName, attributes, 0); 294 break; 295 case "pm": 296 pushStringArrayElement(qName, attributes, 1); 297 break; 298 default: 299 pushIgnoredContainer(qName); 300 break; 301 } 302 } else { 303 // discard alt values 304 pushIgnoredContainer(qName); 305 } 306 break; 307 case "eraNames": 308 // CLDR era names are inconsistent in terms of their lengths. For example, 309 // the full names of Japanese imperial eras are eraAbbr, while the full names 310 // of the Julian eras are eraNames. 311 if (currentCalendarType == null) { 312 assert currentContainer instanceof IgnoredContainer; 313 pushIgnoredContainer(qName); 314 } else { 315 String key = currentCalendarType.keyElementName() + "long.Eras"; // for now 316 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); 317 } 318 break; 319 case "eraAbbr": 320 // for FormatData 321 // create string array entry 322 if (currentCalendarType == null) { 323 assert currentContainer instanceof IgnoredContainer; 324 pushIgnoredContainer(qName); 325 } else { 326 String key = currentCalendarType.keyElementName() + "Eras"; 327 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); 328 } 329 break; 330 case "eraNarrow": 331 // mainly used for the Japanese imperial calendar 332 if (currentCalendarType == null) { 333 assert currentContainer instanceof IgnoredContainer; 334 pushIgnoredContainer(qName); 335 } else { 336 String key = currentCalendarType.keyElementName() + "narrow.Eras"; 337 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); 338 } 339 break; 340 case "era": 341 // for FormatData 342 // add to string array entry of eraAbbr element 343 if (currentCalendarType == null) { 344 assert currentContainer instanceof IgnoredContainer; 345 pushIgnoredContainer(qName); 346 } else { 347 int index = Integer.parseInt(attributes.getValue("type")); 348 index = currentCalendarType.normalizeEraIndex(index); 349 if (index >= 0) { 350 pushStringArrayElement(qName, attributes, index); 351 } else { 352 pushIgnoredContainer(qName); 353 } 354 if (currentContainer.getParent() == null) { 355 throw new InternalError("currentContainer: null parent"); 356 } 357 } 358 break; 359 360 // 361 // Time zone names 362 // 363 case "timeZoneNames": 364 pushContainer(qName, attributes); 365 break; 366 case "zone": 367 { 368 String tzid = attributes.getValue("type"); // Olson tz id 369 zonePrefix = CLDRConverter.TIMEZONE_ID_PREFIX; 370 put(zonePrefix + tzid, new HashMap<String, String>()); 371 pushKeyContainer(qName, attributes, tzid); 372 } 373 break; 374 case "metazone": 375 { 376 String zone = attributes.getValue("type"); // LDML meta zone id 377 zonePrefix = CLDRConverter.METAZONE_ID_PREFIX; 378 put(zonePrefix + zone, new HashMap<String, String>()); 379 pushKeyContainer(qName, attributes, zone); 380 } 381 break; 382 case "long": 383 zoneNameStyle = "long"; 384 pushContainer(qName, attributes); 385 break; 386 case "short": 387 zoneNameStyle = "short"; 388 pushContainer(qName, attributes); 389 break; 390 case "generic": // generic name 391 case "standard": // standard time name 392 case "daylight": // daylight saving (summer) time name 393 pushStringEntry(qName, attributes, CLDRConverter.ZONE_NAME_PREFIX + qName + "." + zoneNameStyle); 394 break; 395 case "exemplarCity": // not used in JDK 396 pushIgnoredContainer(qName); 397 break; 398 399 // 400 // Number format information 401 // 402 case "decimalFormatLength": 403 if (attributes.getValue("type") == null) { 404 // skipping type="short" data 405 // for FormatData 406 // copy string for later assembly into NumberPatterns 407 pushStringEntry(qName, attributes, "NumberPatterns/decimal"); 408 } else { 409 pushIgnoredContainer(qName); 410 } 411 break; 412 case "currencyFormat": 413 // for FormatData 414 // copy string for later assembly into NumberPatterns 415 pushStringEntry(qName, attributes, "NumberPatterns/currency"); 416 break; 417 case "percentFormat": 418 // for FormatData 419 // copy string for later assembly into NumberPatterns 420 pushStringEntry(qName, attributes, "NumberPatterns/percent"); 421 break; 422 case "defaultNumberingSystem": 423 // default numbering system if multiple numbering systems are used. 424 pushStringEntry(qName, attributes, "DefaultNumberingSystem"); 425 break; 426 case "symbols": 427 // for FormatData 428 // look up numberingSystems 429 symbols: { 430 String script = attributes.getValue("numberSystem"); 431 if (script == null) { 432 // Has no script. Just ignore. 433 pushIgnoredContainer(qName); 434 break; 435 } 436 437 // Use keys as <script>."NumberElements/<symbol>" 438 currentNumberingSystem = script + "."; 439 String digits = CLDRConverter.handlerNumbering.get(script); 440 if (digits == null) { 441 throw new InternalError("null digits for " + script); 442 } 443 if (Character.isSurrogate(digits.charAt(0))) { 444 // DecimalFormatSymbols doesn't support supplementary characters as digit zero. 445 pushIgnoredContainer(qName); 446 break; 447 } 448 // in case digits are in the reversed order, reverse back the order. 449 if (digits.charAt(0) > digits.charAt(digits.length() - 1)) { 450 StringBuilder sb = new StringBuilder(digits); 451 digits = sb.reverse().toString(); 452 } 453 // Check if the order is sequential. 454 char c0 = digits.charAt(0); 455 for (int i = 1; i < digits.length(); i++) { 456 if (digits.charAt(i) != c0 + i) { 457 pushIgnoredContainer(qName); 458 break symbols; 459 } 460 } 461 @SuppressWarnings("unchecked") 462 List<String> numberingScripts = (List<String>) get("numberingScripts"); 463 if (numberingScripts == null) { 464 numberingScripts = new ArrayList<>(); 465 put("numberingScripts", numberingScripts); 466 } 467 numberingScripts.add(script); 468 put(currentNumberingSystem + "NumberElements/zero", digits.substring(0, 1)); 469 pushContainer(qName, attributes); 470 } 471 break; 472 case "decimal": 473 // for FormatData 474 // copy string for later assembly into NumberElements 475 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/decimal"); 476 break; 477 case "group": 478 // for FormatData 479 // copy string for later assembly into NumberElements 480 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/group"); 481 break; 482 case "list": 483 // for FormatData 484 // copy string for later assembly into NumberElements 485 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/list"); 486 break; 487 case "percentSign": 488 // for FormatData 489 // copy string for later assembly into NumberElements 490 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/percent"); 491 break; 492 case "nativeZeroDigit": 493 // for FormatData 494 // copy string for later assembly into NumberElements 495 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/zero"); 496 break; 497 case "patternDigit": 498 // for FormatData 499 // copy string for later assembly into NumberElements 500 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/pattern"); 501 break; 502 case "plusSign": 503 // TODO: DecimalFormatSymbols doesn't support plusSign 504 pushIgnoredContainer(qName); 505 break; 506 case "minusSign": 507 // for FormatData 508 // copy string for later assembly into NumberElements 509 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/minus"); 510 break; 511 case "exponential": 512 // for FormatData 513 // copy string for later assembly into NumberElements 514 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/exponential"); 515 break; 516 case "perMille": 517 // for FormatData 518 // copy string for later assembly into NumberElements 519 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/permille"); 520 break; 521 case "infinity": 522 // for FormatData 523 // copy string for later assembly into NumberElements 524 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/infinity"); 525 break; 526 case "nan": 527 // for FormatData 528 // copy string for later assembly into NumberElements 529 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/nan"); 530 break; 531 case "timeFormatLength": 532 { 533 // for FormatData 534 // copy string for later assembly into DateTimePatterns 535 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 536 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/" + attributes.getValue("type") + "-time"); 537 } 538 break; 539 case "dateFormatLength": 540 { 541 // for FormatData 542 // copy string for later assembly into DateTimePatterns 543 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 544 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/" + attributes.getValue("type") + "-date"); 545 } 546 break; 547 case "dateTimeFormat": 548 { 549 // for FormatData 550 // copy string for later assembly into DateTimePatterns 551 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 552 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/date-time"); 553 } 554 break; 555 case "localizedPatternChars": 556 { 557 // for FormatData 558 // copy string for later adaptation to JRE use 559 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 560 pushStringEntry(qName, attributes, prefix + "DateTimePatternChars"); 561 } 562 break; 563 564 default: 565 // treat anything else as a container 566 pushContainer(qName, attributes); 567 break; 568 } 569 } 570 571 @Override 572 public void endElement(String uri, String localName, String qName) throws SAXException { 573 assert qName.equals(currentContainer.getqName()) : "current=" + currentContainer.getqName() + ", param=" + qName; 574 switch (qName) { 575 case "calendar": 576 assert !(currentContainer instanceof Entry); 577 currentCalendarType = null; 578 break; 579 580 case "defaultNumberingSystem": 581 if (currentContainer instanceof StringEntry) { 582 defaultNumberingSystem = ((StringEntry) currentContainer).getValue(); 583 assert defaultNumberingSystem != null; 584 put(((StringEntry) currentContainer).getKey(), defaultNumberingSystem); 585 } else { 586 defaultNumberingSystem = null; 587 } 588 break; 589 590 case "timeZoneNames": 591 zonePrefix = null; 592 break; 593 case "generic": 594 case "standard": 595 case "daylight": 596 if (zonePrefix != null && (currentContainer instanceof Entry)) { 597 @SuppressWarnings("unchecked") 598 Map<String, String> valmap = (Map<String, String>) get(zonePrefix + getContainerKey()); 599 Entry<?> entry = (Entry<?>) currentContainer; 600 valmap.put(entry.getKey(), (String) entry.getValue()); 601 } 602 break; 603 default: 604 if (currentContainer instanceof Entry) { 605 Entry<?> entry = (Entry<?>) currentContainer; 606 Object value = entry.getValue(); 607 if (value != null) { 608 put(entry.getKey(), value); 609 } 610 } 611 } 612 currentContainer = currentContainer.getParent(); 613 } 614 }