1 /* 2 * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.cldrconverter; 27 28 import java.io.File; 29 import java.io.IOException; 30 import java.util.ArrayList; 31 import java.util.HashMap; 32 import java.util.List; 33 import java.util.Locale; 34 import java.util.Map; 35 import org.xml.sax.Attributes; 36 import org.xml.sax.InputSource; 37 import org.xml.sax.SAXException; 38 39 /** 40 * Handles parsing of files in Locale Data Markup Language and produces a map 41 * that uses the keys and values of JRE locale data. 42 */ 43 class LDMLParseHandler extends AbstractLDMLHandler<Object> { 44 private String defaultNumberingSystem; 45 private String currentNumberingSystem = ""; 46 private CalendarType currentCalendarType; 47 private String zoneNameStyle; // "long" or "short" for time zone names 48 private String zonePrefix; 49 private final String id; 50 51 LDMLParseHandler(String id) { 52 this.id = id; 53 } 54 55 @Override 56 public InputSource resolveEntity(String publicID, String systemID) throws IOException, SAXException { 57 // avoid HTTP traffic to unicode.org 58 if (systemID.startsWith(CLDRConverter.LDML_DTD_SYSTEM_ID)) { 59 return new InputSource((new File(CLDRConverter.LOCAL_LDML_DTD)).toURI().toString()); 60 } 61 return null; 62 } 63 64 @Override 65 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { 66 switch (qName) { 67 // 68 // Generic information 69 // 70 case "identity": 71 // ignore this element - it has language and territory elements that aren't locale data 72 pushIgnoredContainer(qName); 73 break; 74 case "language": 75 // for LocaleNames 76 // copy string 77 pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); 78 break; 79 case "script": 80 // for LocaleNames 81 // copy string 82 pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); 83 break; 84 case "territory": 85 // for LocaleNames 86 // copy string 87 pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); 88 break; 89 90 // 91 // Currency information 92 // 93 case "currency": 94 // for CurrencyNames 95 // stash away "type" value for nested <symbol> 96 pushKeyContainer(qName, attributes, attributes.getValue("type")); 97 break; 98 case "symbol": 99 // for CurrencyNames 100 // need to get the key from the containing <currency> element 101 pushStringEntry(qName, attributes, CLDRConverter.CURRENCY_SYMBOL_PREFIX + getContainerKey()); 102 break; 103 case "displayName": 104 // for CurrencyNames 105 // need to get the key from the containing <currency> element 106 // ignore if is has "count" attribute 107 String containerKey = getContainerKey(); 108 if (containerKey != null && attributes.getValue("count") == null) { 109 pushStringEntry(qName, attributes, 110 CLDRConverter.CURRENCY_NAME_PREFIX + containerKey.toLowerCase(Locale.ROOT), 111 attributes.getValue("type")); 112 } else { 113 pushIgnoredContainer(qName); 114 } 115 break; 116 117 // 118 // Calendar information 119 // 120 case "calendar": 121 { 122 // mostly for FormatData (CalendarData items firstDay and minDays are also nested) 123 // use only if it's supported by java.util.Calendar. 124 String calendarName = attributes.getValue("type"); 125 currentCalendarType = CalendarType.forName(calendarName); 126 if (currentCalendarType != null) { 127 pushContainer(qName, attributes); 128 } else { 129 pushIgnoredContainer(qName); 130 } 131 } 132 break; 133 case "monthContext": 134 { 135 // for FormatData 136 // need to keep stand-alone and format, to allow for inheritance in CLDR 137 String type = attributes.getValue("type"); 138 if ("stand-alone".equals(type) || "format".equals(type)) { 139 pushKeyContainer(qName, attributes, type); 140 } else { 141 pushIgnoredContainer(qName); 142 } 143 } 144 break; 145 case "monthWidth": 146 { 147 // for FormatData 148 // create string array for the two types that the JRE knows 149 // keep info about the context type so we can sort out inheritance later 150 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 151 switch (attributes.getValue("type")) { 152 case "wide": 153 pushStringArrayEntry(qName, attributes, prefix + "MonthNames/" + getContainerKey(), 13); 154 break; 155 case "abbreviated": 156 pushStringArrayEntry(qName, attributes, prefix + "MonthAbbreviations/" + getContainerKey(), 13); 157 break; 158 default: 159 pushIgnoredContainer(qName); 160 break; 161 } 162 } 163 break; 164 case "month": 165 // for FormatData 166 // add to string array entry of monthWidth element 167 pushStringArrayElement(qName, attributes, Integer.parseInt(attributes.getValue("type")) - 1); 168 break; 169 case "dayContext": 170 { 171 // for FormatData 172 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR 173 String type = attributes.getValue("type"); 174 if ("stand-alone".equals(type) || "format".equals(type)) { 175 pushKeyContainer(qName, attributes, type); 176 } else { 177 pushIgnoredContainer(qName); 178 } 179 } 180 break; 181 case "dayWidth": 182 { 183 // for FormatData 184 // create string array for the two types that the JRE knows 185 // keep info about the context type so we can sort out inheritance later 186 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 187 switch (attributes.getValue("type")) { 188 case "wide": 189 pushStringArrayEntry(qName, attributes, prefix + "DayNames/" + getContainerKey(), 7); 190 break; 191 case "abbreviated": 192 pushStringArrayEntry(qName, attributes, prefix + "DayAbbreviations/" + getContainerKey(), 7); 193 break; 194 default: 195 pushIgnoredContainer(qName); 196 break; 197 } 198 } 199 break; 200 case "day": 201 // for FormatData 202 // add to string array entry of monthWidth element 203 pushStringArrayElement(qName, attributes, Integer.parseInt(DAY_OF_WEEK_MAP.get(attributes.getValue("type"))) - 1); 204 break; 205 case "dayPeriodContext": 206 // for FormatData 207 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR 208 // for FormatData 209 // need to keep stand-alone and format, to allow for multiple inheritance in CLDR 210 { 211 String type = attributes.getValue("type"); 212 if ("stand-alone".equals(type) || "format".equals(type)) { 213 pushKeyContainer(qName, attributes, type); 214 } else { 215 pushIgnoredContainer(qName); 216 } 217 } 218 break; 219 case "dayPeriodWidth": 220 // for FormatData 221 // create string array entry for am/pm. only keeping wide 222 if ("wide".equals(attributes.getValue("type"))) { 223 pushStringArrayEntry(qName, attributes, "AmPmMarkers/" + getContainerKey(), 2); 224 } else { 225 pushIgnoredContainer(qName); 226 } 227 break; 228 case "dayPeriod": 229 // for FormatData 230 // add to string array entry of AmPmMarkers element 231 switch (attributes.getValue("type")) { 232 case "am": 233 pushStringArrayElement(qName, attributes, 0); 234 break; 235 case "pm": 236 pushStringArrayElement(qName, attributes, 1); 237 break; 238 default: 239 pushIgnoredContainer(qName); 240 break; 241 } 242 break; 243 case "eraNames": 244 // CLDR era names are inconsistent in terms of their lengths. For example, 245 // the full names of Japanese imperial eras are eraAbbr, while the full names 246 // of the Julian eras are eraNames. 247 if (currentCalendarType == null) { 248 assert currentContainer instanceof IgnoredContainer; 249 pushIgnoredContainer(qName); 250 } else { 251 String key = currentCalendarType.keyElementName() + "long.Eras"; // for now 252 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); 253 } 254 break; 255 case "eraAbbr": 256 // for FormatData 257 // create string array entry 258 if (currentCalendarType == null) { 259 assert currentContainer instanceof IgnoredContainer; 260 pushIgnoredContainer(qName); 261 } else { 262 String key = currentCalendarType.keyElementName() + "Eras"; 263 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); 264 } 265 break; 266 case "eraNarrow": 267 // mainly used for the Japanese imperial calendar 268 if (currentCalendarType == null) { 269 assert currentContainer instanceof IgnoredContainer; 270 pushIgnoredContainer(qName); 271 } else { 272 String key = currentCalendarType.keyElementName() + "short.Eras"; 273 pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); 274 } 275 break; 276 case "era": 277 // for FormatData 278 // add to string array entry of eraAbbr element 279 if (currentCalendarType == null) { 280 assert currentContainer instanceof IgnoredContainer; 281 pushIgnoredContainer(qName); 282 } else { 283 int index = Integer.parseInt(attributes.getValue("type")); 284 index = currentCalendarType.normalizeEraIndex(index); 285 if (index >= 0) { 286 pushStringArrayElement(qName, attributes, index); 287 } else { 288 pushIgnoredContainer(qName); 289 } 290 if (currentContainer.getParent() == null) { 291 throw new InternalError("currentContainer: null parent"); 292 } 293 } 294 break; 295 296 // 297 // Time zone names 298 // 299 case "timeZoneNames": 300 pushContainer(qName, attributes); 301 break; 302 case "zone": 303 { 304 String zone = attributes.getValue("type"); 305 zonePrefix = CLDRConverter.TIMEZONE_ID_PREFIX; 306 put(zonePrefix + zone, new HashMap<String, String>()); 307 pushKeyContainer(qName, attributes, zone); 308 } 309 break; 310 case "metazone": 311 { 312 String zone = attributes.getValue("type"); 313 zonePrefix = CLDRConverter.METAZONE_ID_PREFIX; 314 put(zonePrefix + zone, new HashMap<String, String>()); 315 pushKeyContainer(qName, attributes, zone); 316 } 317 break; 318 case "long": 319 zoneNameStyle = "long"; 320 pushContainer(qName, attributes); 321 break; 322 case "short": 323 zoneNameStyle = "short"; 324 pushContainer(qName, attributes); 325 break; 326 case "generic": // not used in JDK 327 pushIgnoredContainer(qName); 328 break; 329 case "standard": // standard time 330 pushStringEntry(qName, attributes, CLDRConverter.TIMEZONE_NAME_PREFIX + "standard." + zoneNameStyle); 331 break; 332 case "daylight": 333 pushStringEntry(qName, attributes, CLDRConverter.TIMEZONE_NAME_PREFIX + "daylight." + zoneNameStyle); 334 break; 335 case "exemplarCity": 336 pushIgnoredContainer(qName); 337 break; 338 339 // 340 // Number format information 341 // 342 case "decimalFormatLength": 343 if (attributes.getValue("type") == null) { 344 // skipping type="short" data 345 // for FormatData 346 // copy string for later assembly into NumberPatterns 347 pushStringEntry(qName, attributes, "NumberPatterns/decimal"); 348 } else { 349 pushIgnoredContainer(qName); 350 } 351 break; 352 case "currencyFormat": 353 // for FormatData 354 // copy string for later assembly into NumberPatterns 355 pushStringEntry(qName, attributes, "NumberPatterns/currency"); 356 break; 357 case "percentFormat": 358 // for FormatData 359 // copy string for later assembly into NumberPatterns 360 pushStringEntry(qName, attributes, "NumberPatterns/percent"); 361 break; 362 case "defaultNumberingSystem": 363 // default numbering system if multiple numbering systems are used. 364 pushStringEntry(qName, attributes, "DefaultNumberingSystem"); 365 break; 366 case "symbols": 367 // for FormatData 368 // look up numberingSystems 369 symbols: { 370 String script = attributes.getValue("numberSystem"); 371 if (script == null) { 372 // Has no script. Just ignore. 373 pushIgnoredContainer(qName); 374 break; 375 } 376 377 // Use keys as <script>."NumberElements/<symbol>" 378 currentNumberingSystem = script + "."; 379 String digits = CLDRConverter.handlerNumbering.get(script); 380 if (digits == null) { 381 throw new InternalError("null digits for " + script); 382 } 383 if (Character.isSurrogate(digits.charAt(0))) { 384 // DecimalFormatSymbols doesn't support supplementary characters as digit zero. 385 pushIgnoredContainer(qName); 386 break; 387 } 388 // in case digits are in the reversed order, reverse back the order. 389 if (digits.charAt(0) > digits.charAt(digits.length() - 1)) { 390 StringBuilder sb = new StringBuilder(digits); 391 digits = sb.reverse().toString(); 392 } 393 // Check if the order is sequential. 394 char c0 = digits.charAt(0); 395 for (int i = 1; i < digits.length(); i++) { 396 if (digits.charAt(i) != c0 + i) { 397 pushIgnoredContainer(qName); 398 break symbols; 399 } 400 } 401 @SuppressWarnings("unchecked") 402 List<String> numberingScripts = (List<String>) get("numberingScripts"); 403 if (numberingScripts == null) { 404 numberingScripts = new ArrayList<>(); 405 put("numberingScripts", numberingScripts); 406 } 407 numberingScripts.add(script); 408 put(currentNumberingSystem + "NumberElements/zero", digits.substring(0, 1)); 409 pushContainer(qName, attributes); 410 } 411 break; 412 case "decimal": 413 // for FormatData 414 // copy string for later assembly into NumberElements 415 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/decimal"); 416 break; 417 case "group": 418 // for FormatData 419 // copy string for later assembly into NumberElements 420 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/group"); 421 break; 422 case "list": 423 // for FormatData 424 // copy string for later assembly into NumberElements 425 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/list"); 426 break; 427 case "percentSign": 428 // for FormatData 429 // copy string for later assembly into NumberElements 430 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/percent"); 431 break; 432 case "nativeZeroDigit": 433 // for FormatData 434 // copy string for later assembly into NumberElements 435 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/zero"); 436 break; 437 case "patternDigit": 438 // for FormatData 439 // copy string for later assembly into NumberElements 440 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/pattern"); 441 break; 442 case "plusSign": 443 // TODO: DecimalFormatSymbols doesn't support plusSign 444 pushIgnoredContainer(qName); 445 break; 446 case "minusSign": 447 // for FormatData 448 // copy string for later assembly into NumberElements 449 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/minus"); 450 break; 451 case "exponential": 452 // for FormatData 453 // copy string for later assembly into NumberElements 454 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/exponential"); 455 break; 456 case "perMille": 457 // for FormatData 458 // copy string for later assembly into NumberElements 459 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/permille"); 460 break; 461 case "infinity": 462 // for FormatData 463 // copy string for later assembly into NumberElements 464 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/infinity"); 465 break; 466 case "nan": 467 // for FormatData 468 // copy string for later assembly into NumberElements 469 pushStringEntry(qName, attributes, currentNumberingSystem + "NumberElements/nan"); 470 break; 471 case "timeFormatLength": 472 { 473 // for FormatData 474 // copy string for later assembly into DateTimePatterns 475 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 476 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/" + attributes.getValue("type") + "-time"); 477 } 478 break; 479 case "dateFormatLength": 480 { 481 // for FormatData 482 // copy string for later assembly into DateTimePatterns 483 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 484 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/" + attributes.getValue("type") + "-date"); 485 } 486 break; 487 case "dateTimeFormat": 488 { 489 // for FormatData 490 // copy string for later assembly into DateTimePatterns 491 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 492 pushStringEntry(qName, attributes, prefix + "DateTimePatterns/date-time"); 493 } 494 break; 495 case "localizedPatternChars": 496 { 497 // for FormatData 498 // copy string for later adaptation to JRE use 499 String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); 500 pushStringEntry(qName, attributes, prefix + "DateTimePatternChars"); 501 } 502 break; 503 504 default: 505 // treat anything else as a container 506 pushContainer(qName, attributes); 507 break; 508 } 509 } 510 511 @Override 512 public void endElement(String uri, String localName, String qName) throws SAXException { 513 assert qName.equals(currentContainer.getqName()) : "current=" + currentContainer.getqName() + ", param=" + qName; 514 switch (qName) { 515 case "calendar": 516 assert !(currentContainer instanceof Entry); 517 currentCalendarType = null; 518 break; 519 520 case "defaultNumberingSystem": 521 if (currentContainer instanceof StringEntry) { 522 defaultNumberingSystem = ((StringEntry) currentContainer).getValue(); 523 assert defaultNumberingSystem != null; 524 put(((StringEntry) currentContainer).getKey(), defaultNumberingSystem); 525 } else { 526 defaultNumberingSystem = null; 527 } 528 break; 529 530 case "timeZoneNames": 531 zonePrefix = null; 532 break; 533 case "standard": 534 case "daylight": 535 if (zonePrefix != null && (currentContainer instanceof Entry)) { 536 @SuppressWarnings("unchecked") 537 Map<String, String> valmap = (Map<String, String>) get(zonePrefix + getContainerKey()); 538 Entry<?> entry = (Entry<?>) currentContainer; 539 valmap.put(entry.getKey(), (String) entry.getValue()); 540 } 541 break; 542 default: 543 if (currentContainer instanceof Entry) { 544 Entry<?> entry = (Entry<?>) currentContainer; 545 Object value = entry.getValue(); 546 if (value != null) { 547 put(entry.getKey(), value); 548 } 549 } 550 } 551 currentContainer = currentContainer.getParent(); 552 } 553 }