src/share/classes/sun/util/locale/provider/RuleBasedBreakIterator.java
Print this page
rev 5615 : 6336885: RFE: Locale Data Deployment Enhancements
4609153: Provide locale data for Indic locales
5104387: Support for gl_ES locale (galician language)
6337471: desktop/system locale preferences support
7056139: (cal) SPI support for locale-dependent Calendar parameters
7058206: Provide CalendarData SPI for week params and display field value names
7073852: Support multiple scripts for digits and decimal symbols per locale
7079560: [Fmt-Da] Context dependent month names support in SimpleDateFormat
7171324: getAvailableLocales() of locale sensitive services should return the actual availability of locales
7151414: (cal) Support calendar type identification
7168528: LocaleServiceProvider needs to be aware of Locale extensions
7171372: (cal) locale's default Calendar should be created if unknown calendar is specified
Summary: JEP 127: Improve Locale Data Packaging and Adopt Unicode CLDR Data (part 1 w/o Jigsaw. by Naoto Sato and Masayoshi Okutsu)
*** 1,7 ****
/*
! * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
--- 1,7 ----
/*
! * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
*** 36,59 ****
*
* This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.
*/
! package java.text;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
! import java.util.Vector;
! import java.util.Stack;
! import java.util.Hashtable;
! import java.util.Enumeration;
! import java.util.MissingResourceException;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import sun.text.CompactByteArray;
import sun.text.SupplementaryCharacterData;
/**
* <p>A subclass of BreakIterator whose behavior is specified using a list of rules.</p>
--- 36,56 ----
*
* This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.
*/
! package sun.util.locale.provider;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
! import java.text.BreakIterator;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
+ import java.util.MissingResourceException;
import sun.text.CompactByteArray;
import sun.text.SupplementaryCharacterData;
/**
* <p>A subclass of BreakIterator whose behavior is specified using a list of rules.</p>
*** 317,327 ****
/**
* Constructs a RuleBasedBreakIterator according to the datafile
* provided.
*/
! public RuleBasedBreakIterator(String datafile)
throws IOException, MissingResourceException {
readTables(datafile);
}
/**
--- 314,324 ----
/**
* Constructs a RuleBasedBreakIterator according to the datafile
* provided.
*/
! RuleBasedBreakIterator(String datafile)
throws IOException, MissingResourceException {
readTables(datafile);
}
/**
*** 370,405 ****
* u4 nonBMPdata[numNonBMPdataLength];
* u1 additionalData[additionalDataLength];
* }
* </pre>
*/
! protected void readTables(String datafile)
throws IOException, MissingResourceException {
byte[] buffer = readFile(datafile);
/* Read header_info. */
! int stateTableLength = BreakIterator.getInt(buffer, 0);
! int backwardsStateTableLength = BreakIterator.getInt(buffer, 4);
! int endStatesLength = BreakIterator.getInt(buffer, 8);
! int lookaheadStatesLength = BreakIterator.getInt(buffer, 12);
! int BMPdataLength = BreakIterator.getInt(buffer, 16);
! int nonBMPdataLength = BreakIterator.getInt(buffer, 20);
! int additionalDataLength = BreakIterator.getInt(buffer, 24);
! checksum = BreakIterator.getLong(buffer, 28);
/* Read stateTable[numCategories * numRows] */
stateTable = new short[stateTableLength];
int offset = HEADER_LENGTH;
for (int i = 0; i < stateTableLength; i++, offset+=2) {
! stateTable[i] = BreakIterator.getShort(buffer, offset);
}
/* Read backwardsStateTable[numCategories * numRows] */
backwardsStateTable = new short[backwardsStateTableLength];
for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) {
! backwardsStateTable[i] = BreakIterator.getShort(buffer, offset);
}
/* Read endStates[numRows] */
endStates = new boolean[endStatesLength];
for (int i = 0; i < endStatesLength; i++, offset++) {
--- 367,402 ----
* u4 nonBMPdata[numNonBMPdataLength];
* u1 additionalData[additionalDataLength];
* }
* </pre>
*/
! protected final void readTables(String datafile)
throws IOException, MissingResourceException {
byte[] buffer = readFile(datafile);
/* Read header_info. */
! int stateTableLength = getInt(buffer, 0);
! int backwardsStateTableLength = getInt(buffer, 4);
! int endStatesLength = getInt(buffer, 8);
! int lookaheadStatesLength = getInt(buffer, 12);
! int BMPdataLength = getInt(buffer, 16);
! int nonBMPdataLength = getInt(buffer, 20);
! int additionalDataLength = getInt(buffer, 24);
! checksum = getLong(buffer, 28);
/* Read stateTable[numCategories * numRows] */
stateTable = new short[stateTableLength];
int offset = HEADER_LENGTH;
for (int i = 0; i < stateTableLength; i++, offset+=2) {
! stateTable[i] = getShort(buffer, offset);
}
/* Read backwardsStateTable[numCategories * numRows] */
backwardsStateTable = new short[backwardsStateTableLength];
for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) {
! backwardsStateTable[i] = getShort(buffer, offset);
}
/* Read endStates[numRows] */
endStates = new boolean[endStatesLength];
for (int i = 0; i < endStatesLength; i++, offset++) {
*** 413,433 ****
}
/* Read a category table and indices for BMP characters. */
short[] temp1 = new short[BMP_INDICES_LENGTH]; // BMPindices
for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) {
! temp1[i] = BreakIterator.getShort(buffer, offset);
}
byte[] temp2 = new byte[BMPdataLength]; // BMPdata
System.arraycopy(buffer, offset, temp2, 0, BMPdataLength);
offset += BMPdataLength;
charCategoryTable = new CompactByteArray(temp1, temp2);
/* Read a category table for non-BMP characters. */
int[] temp3 = new int[nonBMPdataLength];
for (int i = 0; i < nonBMPdataLength; i++, offset+=4) {
! temp3[i] = BreakIterator.getInt(buffer, offset);
}
supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3);
/* Read additional data */
if (additionalDataLength > 0) {
--- 410,430 ----
}
/* Read a category table and indices for BMP characters. */
short[] temp1 = new short[BMP_INDICES_LENGTH]; // BMPindices
for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) {
! temp1[i] = getShort(buffer, offset);
}
byte[] temp2 = new byte[BMPdataLength]; // BMPdata
System.arraycopy(buffer, offset, temp2, 0, BMPdataLength);
offset += BMPdataLength;
charCategoryTable = new CompactByteArray(temp1, temp2);
/* Read a category table for non-BMP characters. */
int[] temp3 = new int[nonBMPdataLength];
for (int i = 0; i < nonBMPdataLength; i++, offset+=4) {
! temp3[i] = getInt(buffer, offset);
}
supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3);
/* Read additional data */
if (additionalDataLength > 0) {
*** 444,453 ****
--- 441,451 ----
BufferedInputStream is;
try {
is = AccessController.doPrivileged(
new PrivilegedExceptionAction<BufferedInputStream>() {
+ @Override
public BufferedInputStream run() throws Exception {
return new BufferedInputStream(getClass().getResourceAsStream("/sun/text/resources/" + datafile));
}
}
);
*** 479,489 ****
throw new MissingResourceException("Unsupported version(" + buf[offset] + ")",
datafile, "");
}
/* Read data: totalDataSize + 8(for checksum) */
! len = BreakIterator.getInt(buf, ++offset);
buf = new byte[len];
if (is.read(buf) != len) {
throw new MissingResourceException("Wrong data length",
datafile, "");
}
--- 477,487 ----
throw new MissingResourceException("Unsupported version(" + buf[offset] + ")",
datafile, "");
}
/* Read data: totalDataSize + 8(for checksum) */
! len = getInt(buf, ++offset);
buf = new byte[len];
if (is.read(buf) != len) {
throw new MissingResourceException("Wrong data length",
datafile, "");
}
*** 507,516 ****
--- 505,515 ----
/**
* Clones this iterator.
* @return A newly-constructed RuleBasedBreakIterator with the same
* behavior as this one.
*/
+ @Override
public Object clone() {
RuleBasedBreakIterator result = (RuleBasedBreakIterator) super.clone();
if (text != null) {
result.text = (CharacterIterator) text.clone();
}
*** 519,528 ****
--- 518,528 ----
/**
* Returns true if both BreakIterators are of the same class, have the same
* rules, and iterate over the same text.
*/
+ @Override
public boolean equals(Object that) {
try {
if (that == null) {
return false;
}
*** 543,564 ****
}
/**
* Returns text
*/
public String toString() {
! StringBuffer sb = new StringBuffer();
sb.append('[');
! sb.append("checksum=0x" + Long.toHexString(checksum));
sb.append(']');
return sb.toString();
}
/**
* Compute a hashcode for this BreakIterator
* @return A hash code
*/
public int hashCode() {
return (int)checksum;
}
//=======================================================================
--- 543,567 ----
}
/**
* Returns text
*/
+ @Override
public String toString() {
! StringBuilder sb = new StringBuilder();
sb.append('[');
! sb.append("checksum=0x");
! sb.append(Long.toHexString(checksum));
sb.append(']');
return sb.toString();
}
/**
* Compute a hashcode for this BreakIterator
* @return A hash code
*/
+ @Override
public int hashCode() {
return (int)checksum;
}
//=======================================================================
*** 568,577 ****
--- 571,581 ----
/**
* Sets the current iteration position to the beginning of the text.
* (i.e., the CharacterIterator's starting offset).
* @return The offset of the beginning of the text.
*/
+ @Override
public int first() {
CharacterIterator t = getText();
t.first();
return t.getIndex();
*** 580,589 ****
--- 584,594 ----
/**
* Sets the current iteration position to the end of the text.
* (i.e., the CharacterIterator's ending offset).
* @return The text's past-the-end offset.
*/
+ @Override
public int last() {
CharacterIterator t = getText();
// I'm not sure why, but t.last() returns the offset of the last character,
// rather than the past-the-end offset
*** 598,607 ****
--- 603,613 ----
* @param n The number of steps to move. The sign indicates the direction
* (negative is backwards, and positive is forwards).
* @return The character offset of the boundary position n boundaries away from
* the current one.
*/
+ @Override
public int next(int n) {
int result = current();
while (n > 0) {
result = handleNext();
--n;
*** 615,624 ****
--- 621,631 ----
/**
* Advances the iterator to the next boundary position.
* @return The position of the first boundary after this one.
*/
+ @Override
public int next() {
return handleNext();
}
private int cachedLastKnownBreak = BreakIterator.DONE;
*** 625,634 ****
--- 632,642 ----
/**
* Advances the iterator backwards, to the last boundary preceding this one.
* @return The position of the last boundary position preceding this one.
*/
+ @Override
public int previous() {
// if we're already sitting at the beginning of the text, return DONE
CharacterIterator text = getText();
if (current() == text.getBeginIndex()) {
return BreakIterator.DONE;
*** 721,731 ****
*/
int getNext() {
int index = text.getIndex();
int endIndex = text.getEndIndex();
if (index == endIndex ||
! (index = index + getCurrentCodePointCount()) >= endIndex) {
return CharacterIterator.DONE;
}
text.setIndex(index);
return getCurrent();
}
--- 729,739 ----
*/
int getNext() {
int index = text.getIndex();
int endIndex = text.getEndIndex();
if (index == endIndex ||
! (index += getCurrentCodePointCount()) >= endIndex) {
return CharacterIterator.DONE;
}
text.setIndex(index);
return getCurrent();
}
*** 756,765 ****
--- 764,774 ----
* Sets the iterator to refer to the first boundary position following
* the specified position.
* @offset The position from which to begin searching for a break position.
* @return The position of the first break after the current position.
*/
+ @Override
public int following(int offset) {
CharacterIterator text = getText();
checkOffset(offset, text);
*** 799,808 ****
--- 808,818 ----
* Sets the iterator to refer to the last boundary position before the
* specified position.
* @offset The position to begin searching for a break from.
* @return The position of the last boundary before the starting position.
*/
+ @Override
public int preceding(int offset) {
// if we start by updating the current iteration position to the
// position specified by the caller, we can just use previous()
// to carry out this operation
CharacterIterator text = getText();
*** 816,825 ****
--- 826,836 ----
* effect, leaves the iterator pointing to the first boundary position at
* or after "offset".
* @param offset the offset to check.
* @return True if "offset" is a boundary position.
*/
+ @Override
public boolean isBoundary(int offset) {
CharacterIterator text = getText();
checkOffset(offset, text);
if (offset == text.getBeginIndex()) {
return true;
*** 835,844 ****
--- 846,856 ----
/**
* Returns the current iteration position.
* @return The current iteration position.
*/
+ @Override
public int current() {
return getText().getIndex();
}
/**
*** 846,855 ****
--- 858,868 ----
* of this method returns the actual CharacterIterator we're using internally.
* Changing the state of this iterator can have undefined consequences. If
* you need to change it, clone it first.
* @return An iterator over the text being analyzed.
*/
+ @Override
public CharacterIterator getText() {
// The iterator is initialized pointing to no text at all, so if this
// function is called while we're in that state, we have to fudge an
// iterator to return.
if (text == null) {
*** 861,870 ****
--- 874,884 ----
/**
* Set the iterator to analyze a new piece of text. This function resets
* the current iteration position to the beginning of the text.
* @param newText An iterator over the text to analyze.
*/
+ @Override
public void setText(CharacterIterator newText) {
// Test iterator to see if we need to wrap it in a SafeCharIterator.
// The correct behavior for CharacterIterators is to allow the
// position to be set to the endpoint of the iterator. Many
// CharacterIterators do not uphold this, so this is a workaround
*** 1042,1060 ****
--- 1056,1097 ----
*/
protected int lookupBackwardState(int state, int category) {
return backwardsStateTable[state * numCategories + category];
}
+ static long getLong(byte[] buf, int offset) {
+ long num = buf[offset]&0xFF;
+ for (int i = 1; i < 8; i++) {
+ num = num<<8 | (buf[offset+i]&0xFF);
+ }
+ return num;
+ }
+
+ static int getInt(byte[] buf, int offset) {
+ int num = buf[offset]&0xFF;
+ for (int i = 1; i < 4; i++) {
+ num = num<<8 | (buf[offset+i]&0xFF);
+ }
+ return num;
+ }
+
+ static short getShort(byte[] buf, int offset) {
+ short num = (short)(buf[offset]&0xFF);
+ num = (short)(num<<8 | (buf[offset+1]&0xFF));
+ return num;
+ }
+
/*
* This class exists to work around a bug in incorrect implementations
* of CharacterIterator, which incorrectly handle setIndex(endIndex).
* This iterator relies only on base.setIndex(n) where n is less than
* endIndex.
*
* One caveat: if the base iterator's begin and end indices change
* the change will not be reflected by this wrapper. Does that matter?
*/
+ // TODO: Review this class to see if it's still required.
private static final class SafeCharIterator implements CharacterIterator,
Cloneable {
private CharacterIterator base;
private int rangeStart;
*** 1066,1092 ****
--- 1103,1133 ----
this.rangeStart = base.getBeginIndex();
this.rangeLimit = base.getEndIndex();
this.currentIndex = base.getIndex();
}
+ @Override
public char first() {
return setIndex(rangeStart);
}
+ @Override
public char last() {
return setIndex(rangeLimit - 1);
}
+ @Override
public char current() {
if (currentIndex < rangeStart || currentIndex >= rangeLimit) {
return DONE;
}
else {
return base.setIndex(currentIndex);
}
}
+ @Override
public char next() {
currentIndex++;
if (currentIndex >= rangeLimit) {
currentIndex = rangeLimit;
*** 1095,1104 ****
--- 1136,1146 ----
else {
return base.setIndex(currentIndex);
}
}
+ @Override
public char previous() {
currentIndex--;
if (currentIndex < rangeStart) {
currentIndex = rangeStart;
*** 1107,1137 ****
--- 1149,1184 ----
else {
return base.setIndex(currentIndex);
}
}
+ @Override
public char setIndex(int i) {
if (i < rangeStart || i > rangeLimit) {
throw new IllegalArgumentException("Invalid position");
}
currentIndex = i;
return current();
}
+ @Override
public int getBeginIndex() {
return rangeStart;
}
+ @Override
public int getEndIndex() {
return rangeLimit;
}
+ @Override
public int getIndex() {
return currentIndex;
}
+ @Override
public Object clone() {
SafeCharIterator copy = null;
try {
copy = (SafeCharIterator) super.clone();