src/share/classes/sun/util/locale/provider/RuleBasedBreakIterator.java

Print this page
rev 5615 : 6336885: RFE: Locale Data Deployment Enhancements
4609153: Provide locale data for Indic locales
5104387: Support for gl_ES locale (galician language)
6337471: desktop/system locale preferences support
7056139: (cal) SPI support for locale-dependent Calendar parameters
7058206: Provide CalendarData SPI for week params and display field value names
7073852: Support multiple scripts for digits and decimal symbols per locale
7079560: [Fmt-Da] Context dependent month names support in SimpleDateFormat
7171324: getAvailableLocales() of locale sensitive services should return the actual availability of locales
7151414: (cal) Support calendar type identification
7168528: LocaleServiceProvider needs to be aware of Locale extensions
7171372: (cal) locale's default Calendar should be created if unknown calendar is specified
Summary: JEP 127: Improve Locale Data Packaging and Adopt Unicode CLDR Data (part 1 w/o Jigsaw. by Naoto Sato and Masayoshi Okutsu)

*** 1,7 **** /* ! * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this --- 1,7 ---- /* ! * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this
*** 36,59 **** * * This notice and attribution to Taligent may not be removed. * Taligent is a registered trademark of Taligent, Inc. */ ! package java.text; import java.io.BufferedInputStream; import java.io.IOException; import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; ! import java.util.Vector; ! import java.util.Stack; ! import java.util.Hashtable; ! import java.util.Enumeration; ! import java.util.MissingResourceException; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import sun.text.CompactByteArray; import sun.text.SupplementaryCharacterData; /** * <p>A subclass of BreakIterator whose behavior is specified using a list of rules.</p> --- 36,56 ---- * * This notice and attribution to Taligent may not be removed. * Taligent is a registered trademark of Taligent, Inc. */ ! package sun.util.locale.provider; import java.io.BufferedInputStream; import java.io.IOException; import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; ! import java.text.BreakIterator; import java.text.CharacterIterator; import java.text.StringCharacterIterator; + import java.util.MissingResourceException; import sun.text.CompactByteArray; import sun.text.SupplementaryCharacterData; /** * <p>A subclass of BreakIterator whose behavior is specified using a list of rules.</p>
*** 317,327 **** /** * Constructs a RuleBasedBreakIterator according to the datafile * provided. */ ! public RuleBasedBreakIterator(String datafile) throws IOException, MissingResourceException { readTables(datafile); } /** --- 314,324 ---- /** * Constructs a RuleBasedBreakIterator according to the datafile * provided. */ ! RuleBasedBreakIterator(String datafile) throws IOException, MissingResourceException { readTables(datafile); } /**
*** 370,405 **** * u4 nonBMPdata[numNonBMPdataLength]; * u1 additionalData[additionalDataLength]; * } * </pre> */ ! protected void readTables(String datafile) throws IOException, MissingResourceException { byte[] buffer = readFile(datafile); /* Read header_info. */ ! int stateTableLength = BreakIterator.getInt(buffer, 0); ! int backwardsStateTableLength = BreakIterator.getInt(buffer, 4); ! int endStatesLength = BreakIterator.getInt(buffer, 8); ! int lookaheadStatesLength = BreakIterator.getInt(buffer, 12); ! int BMPdataLength = BreakIterator.getInt(buffer, 16); ! int nonBMPdataLength = BreakIterator.getInt(buffer, 20); ! int additionalDataLength = BreakIterator.getInt(buffer, 24); ! checksum = BreakIterator.getLong(buffer, 28); /* Read stateTable[numCategories * numRows] */ stateTable = new short[stateTableLength]; int offset = HEADER_LENGTH; for (int i = 0; i < stateTableLength; i++, offset+=2) { ! stateTable[i] = BreakIterator.getShort(buffer, offset); } /* Read backwardsStateTable[numCategories * numRows] */ backwardsStateTable = new short[backwardsStateTableLength]; for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) { ! backwardsStateTable[i] = BreakIterator.getShort(buffer, offset); } /* Read endStates[numRows] */ endStates = new boolean[endStatesLength]; for (int i = 0; i < endStatesLength; i++, offset++) { --- 367,402 ---- * u4 nonBMPdata[numNonBMPdataLength]; * u1 additionalData[additionalDataLength]; * } * </pre> */ ! protected final void readTables(String datafile) throws IOException, MissingResourceException { byte[] buffer = readFile(datafile); /* Read header_info. */ ! int stateTableLength = getInt(buffer, 0); ! int backwardsStateTableLength = getInt(buffer, 4); ! int endStatesLength = getInt(buffer, 8); ! int lookaheadStatesLength = getInt(buffer, 12); ! int BMPdataLength = getInt(buffer, 16); ! int nonBMPdataLength = getInt(buffer, 20); ! int additionalDataLength = getInt(buffer, 24); ! checksum = getLong(buffer, 28); /* Read stateTable[numCategories * numRows] */ stateTable = new short[stateTableLength]; int offset = HEADER_LENGTH; for (int i = 0; i < stateTableLength; i++, offset+=2) { ! stateTable[i] = getShort(buffer, offset); } /* Read backwardsStateTable[numCategories * numRows] */ backwardsStateTable = new short[backwardsStateTableLength]; for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) { ! backwardsStateTable[i] = getShort(buffer, offset); } /* Read endStates[numRows] */ endStates = new boolean[endStatesLength]; for (int i = 0; i < endStatesLength; i++, offset++) {
*** 413,433 **** } /* Read a category table and indices for BMP characters. */ short[] temp1 = new short[BMP_INDICES_LENGTH]; // BMPindices for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) { ! temp1[i] = BreakIterator.getShort(buffer, offset); } byte[] temp2 = new byte[BMPdataLength]; // BMPdata System.arraycopy(buffer, offset, temp2, 0, BMPdataLength); offset += BMPdataLength; charCategoryTable = new CompactByteArray(temp1, temp2); /* Read a category table for non-BMP characters. */ int[] temp3 = new int[nonBMPdataLength]; for (int i = 0; i < nonBMPdataLength; i++, offset+=4) { ! temp3[i] = BreakIterator.getInt(buffer, offset); } supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3); /* Read additional data */ if (additionalDataLength > 0) { --- 410,430 ---- } /* Read a category table and indices for BMP characters. */ short[] temp1 = new short[BMP_INDICES_LENGTH]; // BMPindices for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) { ! temp1[i] = getShort(buffer, offset); } byte[] temp2 = new byte[BMPdataLength]; // BMPdata System.arraycopy(buffer, offset, temp2, 0, BMPdataLength); offset += BMPdataLength; charCategoryTable = new CompactByteArray(temp1, temp2); /* Read a category table for non-BMP characters. */ int[] temp3 = new int[nonBMPdataLength]; for (int i = 0; i < nonBMPdataLength; i++, offset+=4) { ! temp3[i] = getInt(buffer, offset); } supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3); /* Read additional data */ if (additionalDataLength > 0) {
*** 444,453 **** --- 441,451 ---- BufferedInputStream is; try { is = AccessController.doPrivileged( new PrivilegedExceptionAction<BufferedInputStream>() { + @Override public BufferedInputStream run() throws Exception { return new BufferedInputStream(getClass().getResourceAsStream("/sun/text/resources/" + datafile)); } } );
*** 479,489 **** throw new MissingResourceException("Unsupported version(" + buf[offset] + ")", datafile, ""); } /* Read data: totalDataSize + 8(for checksum) */ ! len = BreakIterator.getInt(buf, ++offset); buf = new byte[len]; if (is.read(buf) != len) { throw new MissingResourceException("Wrong data length", datafile, ""); } --- 477,487 ---- throw new MissingResourceException("Unsupported version(" + buf[offset] + ")", datafile, ""); } /* Read data: totalDataSize + 8(for checksum) */ ! len = getInt(buf, ++offset); buf = new byte[len]; if (is.read(buf) != len) { throw new MissingResourceException("Wrong data length", datafile, ""); }
*** 507,516 **** --- 505,515 ---- /** * Clones this iterator. * @return A newly-constructed RuleBasedBreakIterator with the same * behavior as this one. */ + @Override public Object clone() { RuleBasedBreakIterator result = (RuleBasedBreakIterator) super.clone(); if (text != null) { result.text = (CharacterIterator) text.clone(); }
*** 519,528 **** --- 518,528 ---- /** * Returns true if both BreakIterators are of the same class, have the same * rules, and iterate over the same text. */ + @Override public boolean equals(Object that) { try { if (that == null) { return false; }
*** 543,564 **** } /** * Returns text */ public String toString() { ! StringBuffer sb = new StringBuffer(); sb.append('['); ! sb.append("checksum=0x" + Long.toHexString(checksum)); sb.append(']'); return sb.toString(); } /** * Compute a hashcode for this BreakIterator * @return A hash code */ public int hashCode() { return (int)checksum; } //======================================================================= --- 543,567 ---- } /** * Returns text */ + @Override public String toString() { ! StringBuilder sb = new StringBuilder(); sb.append('['); ! sb.append("checksum=0x"); ! sb.append(Long.toHexString(checksum)); sb.append(']'); return sb.toString(); } /** * Compute a hashcode for this BreakIterator * @return A hash code */ + @Override public int hashCode() { return (int)checksum; } //=======================================================================
*** 568,577 **** --- 571,581 ---- /** * Sets the current iteration position to the beginning of the text. * (i.e., the CharacterIterator's starting offset). * @return The offset of the beginning of the text. */ + @Override public int first() { CharacterIterator t = getText(); t.first(); return t.getIndex();
*** 580,589 **** --- 584,594 ---- /** * Sets the current iteration position to the end of the text. * (i.e., the CharacterIterator's ending offset). * @return The text's past-the-end offset. */ + @Override public int last() { CharacterIterator t = getText(); // I'm not sure why, but t.last() returns the offset of the last character, // rather than the past-the-end offset
*** 598,607 **** --- 603,613 ---- * @param n The number of steps to move. The sign indicates the direction * (negative is backwards, and positive is forwards). * @return The character offset of the boundary position n boundaries away from * the current one. */ + @Override public int next(int n) { int result = current(); while (n > 0) { result = handleNext(); --n;
*** 615,624 **** --- 621,631 ---- /** * Advances the iterator to the next boundary position. * @return The position of the first boundary after this one. */ + @Override public int next() { return handleNext(); } private int cachedLastKnownBreak = BreakIterator.DONE;
*** 625,634 **** --- 632,642 ---- /** * Advances the iterator backwards, to the last boundary preceding this one. * @return The position of the last boundary position preceding this one. */ + @Override public int previous() { // if we're already sitting at the beginning of the text, return DONE CharacterIterator text = getText(); if (current() == text.getBeginIndex()) { return BreakIterator.DONE;
*** 721,731 **** */ int getNext() { int index = text.getIndex(); int endIndex = text.getEndIndex(); if (index == endIndex || ! (index = index + getCurrentCodePointCount()) >= endIndex) { return CharacterIterator.DONE; } text.setIndex(index); return getCurrent(); } --- 729,739 ---- */ int getNext() { int index = text.getIndex(); int endIndex = text.getEndIndex(); if (index == endIndex || ! (index += getCurrentCodePointCount()) >= endIndex) { return CharacterIterator.DONE; } text.setIndex(index); return getCurrent(); }
*** 756,765 **** --- 764,774 ---- * Sets the iterator to refer to the first boundary position following * the specified position. * @offset The position from which to begin searching for a break position. * @return The position of the first break after the current position. */ + @Override public int following(int offset) { CharacterIterator text = getText(); checkOffset(offset, text);
*** 799,808 **** --- 808,818 ---- * Sets the iterator to refer to the last boundary position before the * specified position. * @offset The position to begin searching for a break from. * @return The position of the last boundary before the starting position. */ + @Override public int preceding(int offset) { // if we start by updating the current iteration position to the // position specified by the caller, we can just use previous() // to carry out this operation CharacterIterator text = getText();
*** 816,825 **** --- 826,836 ---- * effect, leaves the iterator pointing to the first boundary position at * or after "offset". * @param offset the offset to check. * @return True if "offset" is a boundary position. */ + @Override public boolean isBoundary(int offset) { CharacterIterator text = getText(); checkOffset(offset, text); if (offset == text.getBeginIndex()) { return true;
*** 835,844 **** --- 846,856 ---- /** * Returns the current iteration position. * @return The current iteration position. */ + @Override public int current() { return getText().getIndex(); } /**
*** 846,855 **** --- 858,868 ---- * of this method returns the actual CharacterIterator we're using internally. * Changing the state of this iterator can have undefined consequences. If * you need to change it, clone it first. * @return An iterator over the text being analyzed. */ + @Override public CharacterIterator getText() { // The iterator is initialized pointing to no text at all, so if this // function is called while we're in that state, we have to fudge an // iterator to return. if (text == null) {
*** 861,870 **** --- 874,884 ---- /** * Set the iterator to analyze a new piece of text. This function resets * the current iteration position to the beginning of the text. * @param newText An iterator over the text to analyze. */ + @Override public void setText(CharacterIterator newText) { // Test iterator to see if we need to wrap it in a SafeCharIterator. // The correct behavior for CharacterIterators is to allow the // position to be set to the endpoint of the iterator. Many // CharacterIterators do not uphold this, so this is a workaround
*** 1042,1060 **** --- 1056,1097 ---- */ protected int lookupBackwardState(int state, int category) { return backwardsStateTable[state * numCategories + category]; } + static long getLong(byte[] buf, int offset) { + long num = buf[offset]&0xFF; + for (int i = 1; i < 8; i++) { + num = num<<8 | (buf[offset+i]&0xFF); + } + return num; + } + + static int getInt(byte[] buf, int offset) { + int num = buf[offset]&0xFF; + for (int i = 1; i < 4; i++) { + num = num<<8 | (buf[offset+i]&0xFF); + } + return num; + } + + static short getShort(byte[] buf, int offset) { + short num = (short)(buf[offset]&0xFF); + num = (short)(num<<8 | (buf[offset+1]&0xFF)); + return num; + } + /* * This class exists to work around a bug in incorrect implementations * of CharacterIterator, which incorrectly handle setIndex(endIndex). * This iterator relies only on base.setIndex(n) where n is less than * endIndex. * * One caveat: if the base iterator's begin and end indices change * the change will not be reflected by this wrapper. Does that matter? */ + // TODO: Review this class to see if it's still required. private static final class SafeCharIterator implements CharacterIterator, Cloneable { private CharacterIterator base; private int rangeStart;
*** 1066,1092 **** --- 1103,1133 ---- this.rangeStart = base.getBeginIndex(); this.rangeLimit = base.getEndIndex(); this.currentIndex = base.getIndex(); } + @Override public char first() { return setIndex(rangeStart); } + @Override public char last() { return setIndex(rangeLimit - 1); } + @Override public char current() { if (currentIndex < rangeStart || currentIndex >= rangeLimit) { return DONE; } else { return base.setIndex(currentIndex); } } + @Override public char next() { currentIndex++; if (currentIndex >= rangeLimit) { currentIndex = rangeLimit;
*** 1095,1104 **** --- 1136,1146 ---- else { return base.setIndex(currentIndex); } } + @Override public char previous() { currentIndex--; if (currentIndex < rangeStart) { currentIndex = rangeStart;
*** 1107,1137 **** --- 1149,1184 ---- else { return base.setIndex(currentIndex); } } + @Override public char setIndex(int i) { if (i < rangeStart || i > rangeLimit) { throw new IllegalArgumentException("Invalid position"); } currentIndex = i; return current(); } + @Override public int getBeginIndex() { return rangeStart; } + @Override public int getEndIndex() { return rangeLimit; } + @Override public int getIndex() { return currentIndex; } + @Override public Object clone() { SafeCharIterator copy = null; try { copy = (SafeCharIterator) super.clone();