--- /dev/null Mon Aug 13 12:09:36 2012 +++ new/make/tools/src/build/tools/cldrconverter/LDMLParseHandler.java Mon Aug 13 12:09:32 2012 @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package build.tools.cldrconverter; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +/** + * Handles parsing of files in Locale Data Markup Language and produces a map + * that uses the keys and values of JRE locale data. + */ +class LDMLParseHandler extends AbstractLDMLHandler { + private String defaultNumberingSystem; + private String currentNumberingSystem = ""; + private CalendarType currentCalendarType; + private String zoneNameStyle; // "long" or "short" for time zone names + private String zonePrefix; + private final String id; + + LDMLParseHandler(String id) { + this.id = id; + } + + @Override + public InputSource resolveEntity(String publicID, String systemID) throws IOException, SAXException { + // avoid HTTP traffic to unicode.org + if (systemID.startsWith(CLDRConverter.LDML_DTD_SYSTEM_ID)) { + return new InputSource((new File(CLDRConverter.LOCAL_LDML_DTD)).toURI().toString()); + } + return null; + } + + @Override + public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { + switch (qName) { + // + // Generic information + // + case "identity": + // ignore this element - it has language and territory elements that aren't locale data + pushIgnoredContainer(qName); + break; + case "language": + // for LocaleNames + // copy string + pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); + break; + case "script": + // for LocaleNames + // copy string + pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); + break; + case "territory": + // for LocaleNames + // copy string + pushStringEntry(qName, attributes, CLDRConverter.LOCALE_NAME_PREFIX + attributes.getValue("type")); + break; + + // + // Currency information + // + case "currency": + // for CurrencyNames + // stash away "type" value for nested + pushKeyContainer(qName, attributes, attributes.getValue("type")); + break; + case "symbol": + // for CurrencyNames + // need to get the key from the containing element + pushStringEntry(qName, attributes, CLDRConverter.CURRENCY_SYMBOL_PREFIX + getContainerKey()); + break; + case "displayName": + // for CurrencyNames + // need to get the key from the containing element + // ignore if is has "count" attribute + String containerKey = getContainerKey(); + if (containerKey != null && attributes.getValue("count") == null) { + pushStringEntry(qName, attributes, + CLDRConverter.CURRENCY_NAME_PREFIX + containerKey.toLowerCase(Locale.ROOT), + attributes.getValue("type")); + } else { + pushIgnoredContainer(qName); + } + break; + + // + // Calendar information + // + case "calendar": + { + // mostly for FormatData (CalendarData items firstDay and minDays are also nested) + // use only if it's supported by java.util.Calendar. + String calendarName = attributes.getValue("type"); + currentCalendarType = CalendarType.forName(calendarName); + if (currentCalendarType != null) { + pushContainer(qName, attributes); + } else { + pushIgnoredContainer(qName); + } + } + break; + case "monthContext": + { + // for FormatData + // need to keep stand-alone and format, to allow for inheritance in CLDR + String type = attributes.getValue("type"); + if ("stand-alone".equals(type) || "format".equals(type)) { + pushKeyContainer(qName, attributes, type); + } else { + pushIgnoredContainer(qName); + } + } + break; + case "monthWidth": + { + // for FormatData + // create string array for the two types that the JRE knows + // keep info about the context type so we can sort out inheritance later + String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); + switch (attributes.getValue("type")) { + case "wide": + pushStringArrayEntry(qName, attributes, prefix + "MonthNames/" + getContainerKey(), 13); + break; + case "abbreviated": + pushStringArrayEntry(qName, attributes, prefix + "MonthAbbreviations/" + getContainerKey(), 13); + break; + default: + pushIgnoredContainer(qName); + break; + } + } + break; + case "month": + // for FormatData + // add to string array entry of monthWidth element + pushStringArrayElement(qName, attributes, Integer.parseInt(attributes.getValue("type")) - 1); + break; + case "dayContext": + { + // for FormatData + // need to keep stand-alone and format, to allow for multiple inheritance in CLDR + String type = attributes.getValue("type"); + if ("stand-alone".equals(type) || "format".equals(type)) { + pushKeyContainer(qName, attributes, type); + } else { + pushIgnoredContainer(qName); + } + } + break; + case "dayWidth": + { + // for FormatData + // create string array for the two types that the JRE knows + // keep info about the context type so we can sort out inheritance later + String prefix = (currentCalendarType == null) ? "" : currentCalendarType.keyElementName(); + switch (attributes.getValue("type")) { + case "wide": + pushStringArrayEntry(qName, attributes, prefix + "DayNames/" + getContainerKey(), 7); + break; + case "abbreviated": + pushStringArrayEntry(qName, attributes, prefix + "DayAbbreviations/" + getContainerKey(), 7); + break; + default: + pushIgnoredContainer(qName); + break; + } + } + break; + case "day": + // for FormatData + // add to string array entry of monthWidth element + pushStringArrayElement(qName, attributes, Integer.parseInt(DAY_OF_WEEK_MAP.get(attributes.getValue("type"))) - 1); + break; + case "dayPeriodContext": + // for FormatData + // need to keep stand-alone and format, to allow for multiple inheritance in CLDR + // for FormatData + // need to keep stand-alone and format, to allow for multiple inheritance in CLDR + { + String type = attributes.getValue("type"); + if ("stand-alone".equals(type) || "format".equals(type)) { + pushKeyContainer(qName, attributes, type); + } else { + pushIgnoredContainer(qName); + } + } + break; + case "dayPeriodWidth": + // for FormatData + // create string array entry for am/pm. only keeping wide + if ("wide".equals(attributes.getValue("type"))) { + pushStringArrayEntry(qName, attributes, "AmPmMarkers/" + getContainerKey(), 2); + } else { + pushIgnoredContainer(qName); + } + break; + case "dayPeriod": + // for FormatData + // add to string array entry of AmPmMarkers element + switch (attributes.getValue("type")) { + case "am": + pushStringArrayElement(qName, attributes, 0); + break; + case "pm": + pushStringArrayElement(qName, attributes, 1); + break; + default: + pushIgnoredContainer(qName); + break; + } + break; + case "eraNames": + // CLDR era names are inconsistent in terms of their lengths. For example, + // the full names of Japanese imperial eras are eraAbbr, while the full names + // of the Julian eras are eraNames. + if (currentCalendarType == null) { + assert currentContainer instanceof IgnoredContainer; + pushIgnoredContainer(qName); + } else { + String key = currentCalendarType.keyElementName() + "long.Eras"; // for now + pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); + } + break; + case "eraAbbr": + // for FormatData + // create string array entry + if (currentCalendarType == null) { + assert currentContainer instanceof IgnoredContainer; + pushIgnoredContainer(qName); + } else { + String key = currentCalendarType.keyElementName() + "Eras"; + pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); + } + break; + case "eraNarrow": + // mainly used for the Japanese imperial calendar + if (currentCalendarType == null) { + assert currentContainer instanceof IgnoredContainer; + pushIgnoredContainer(qName); + } else { + String key = currentCalendarType.keyElementName() + "short.Eras"; + pushStringArrayEntry(qName, attributes, key, currentCalendarType.getEraLength(qName)); + } + break; + case "era": + // for FormatData + // add to string array entry of eraAbbr element + if (currentCalendarType == null) { + assert currentContainer instanceof IgnoredContainer; + pushIgnoredContainer(qName); + } else { + int index = Integer.parseInt(attributes.getValue("type")); + index = currentCalendarType.normalizeEraIndex(index); + if (index >= 0) { + pushStringArrayElement(qName, attributes, index); + } else { + pushIgnoredContainer(qName); + } + if (currentContainer.getParent() == null) { + throw new InternalError("currentContainer: null parent"); + } + } + break; + + // + // Time zone names + // + case "timeZoneNames": + pushContainer(qName, attributes); + break; + case "zone": + { + String zone = attributes.getValue("type"); + zonePrefix = CLDRConverter.TIMEZONE_ID_PREFIX; + put(zonePrefix + zone, new HashMap()); + pushKeyContainer(qName, attributes, zone); + } + break; + case "metazone": + { + String zone = attributes.getValue("type"); + zonePrefix = CLDRConverter.METAZONE_ID_PREFIX; + put(zonePrefix + zone, new HashMap()); + pushKeyContainer(qName, attributes, zone); + } + break; + case "long": + zoneNameStyle = "long"; + pushContainer(qName, attributes); + break; + case "short": + zoneNameStyle = "short"; + pushContainer(qName, attributes); + break; + case "generic": // not used in JDK + pushIgnoredContainer(qName); + break; + case "standard": // standard time + pushStringEntry(qName, attributes, CLDRConverter.TIMEZONE_NAME_PREFIX + "standard." + zoneNameStyle); + break; + case "daylight": + pushStringEntry(qName, attributes, CLDRConverter.TIMEZONE_NAME_PREFIX + "daylight." + zoneNameStyle); + break; + case "exemplarCity": + pushIgnoredContainer(qName); + break; + + // + // Number format information + // + case "decimalFormatLength": + if (attributes.getValue("type") == null) { + // skipping type="short" data + // for FormatData + // copy string for later assembly into NumberPatterns + pushStringEntry(qName, attributes, "NumberPatterns/decimal"); + } else { + pushIgnoredContainer(qName); + } + break; + case "currencyFormat": + // for FormatData + // copy string for later assembly into NumberPatterns + pushStringEntry(qName, attributes, "NumberPatterns/currency"); + break; + case "percentFormat": + // for FormatData + // copy string for later assembly into NumberPatterns + pushStringEntry(qName, attributes, "NumberPatterns/percent"); + break; + case "defaultNumberingSystem": + // default numbering system if multiple numbering systems are used. + pushStringEntry(qName, attributes, "DefaultNumberingSystem"); + break; + case "symbols": + // for FormatData + // look up numberingSystems + symbols: { + String script = attributes.getValue("numberSystem"); + if (script == null) { + // Has no script. Just ignore. + pushIgnoredContainer(qName); + break; + } + + // Use keys as