< prev index next >
src/java.base/share/classes/sun/text/RuleBasedBreakIterator.java
Print this page
*** 1,7 ****
/*
! * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
--- 1,7 ----
/*
! * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
*** 36,54 ****
*
* This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.
*/
! package sun.util.locale.provider;
! import java.io.BufferedInputStream;
! import java.io.InputStream;
! import java.io.IOException;
! import java.lang.reflect.Module;
! import java.security.AccessController;
! import java.security.PrivilegedActionException;
! import java.security.PrivilegedExceptionAction;
import java.text.BreakIterator;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.MissingResourceException;
import sun.text.CompactByteArray;
--- 36,49 ----
*
* This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.
*/
! package sun.text;
! import java.nio.BufferUnderflowException;
! import java.nio.ByteBuffer;
import java.text.BreakIterator;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.MissingResourceException;
import sun.text.CompactByteArray;
*** 216,226 ****
* href="http://www.ibm.com/java/education/boundaries/boundaries.html">http://www.ibm.com/java/education/boundaries/boundaries.html</a>.
* For examples, see the resource data (which is annotated).</p>
*
* @author Richard Gillam
*/
! class RuleBasedBreakIterator extends BreakIterator {
/**
* A token used as a character-category value to identify ignore characters
*/
protected static final byte IGNORE = -1;
--- 211,221 ----
* href="http://www.ibm.com/java/education/boundaries/boundaries.html">http://www.ibm.com/java/education/boundaries/boundaries.html</a>.
* For examples, see the resource data (which is annotated).</p>
*
* @author Richard Gillam
*/
! public class RuleBasedBreakIterator extends BreakIterator {
/**
* A token used as a character-category value to identify ignore characters
*/
protected static final byte IGNORE = -1;
*** 248,262 ****
* Version number of the dictionary that was read in.
*/
static final byte supportedVersion = 1;
/**
- * Header size in byte count
- */
- private static final int HEADER_LENGTH = 36;
-
- /**
* An array length of indices for BMP characters
*/
private static final int BMP_INDICES_LENGTH = 512;
/**
--- 243,252 ----
*** 313,332 ****
//=======================================================================
// constructors
//=======================================================================
/**
! * Constructs a RuleBasedBreakIterator according to the module and the datafile
! * provided.
*/
! RuleBasedBreakIterator(Module module, String datafile)
! throws IOException, MissingResourceException {
! readTables(module, datafile);
}
/**
! * Read datafile. The datafile's format is as follows:
* <pre>
* BreakIteratorData {
* u1 magic[7];
* u1 version;
* u4 totalDataSize;
--- 303,332 ----
//=======================================================================
// constructors
//=======================================================================
/**
! * Constructs a RuleBasedBreakIterator using the given rule data.
! *
! * @throws MissingResourceException if the rule data is invalid or corrupted
*/
! public RuleBasedBreakIterator(String ruleFile, byte[] ruleData) {
! ByteBuffer bb = ByteBuffer.wrap(ruleData);
! try {
! validateRuleData(ruleFile, bb);
! setupTables(ruleFile, bb);
! } catch (BufferUnderflowException bue) {
! MissingResourceException e;
! e = new MissingResourceException("Corrupted rule data file", ruleFile, "");
! e.initCause(bue);
! throw e;
! }
}
/**
! * Initializes the fields with the given rule data.
! * The data format is as follows:
* <pre>
* BreakIteratorData {
* u1 magic[7];
* u1 version;
* u4 totalDataSize;
*** 368,504 ****
* u1 BMPdata[BMPdataLength];
* u4 nonBMPdata[numNonBMPdataLength];
* u1 additionalData[additionalDataLength];
* }
* </pre>
*/
! protected final void readTables(Module module, String datafile)
! throws IOException, MissingResourceException {
!
! byte[] buffer = readFile(module, datafile);
!
/* Read header_info. */
! int stateTableLength = getInt(buffer, 0);
! int backwardsStateTableLength = getInt(buffer, 4);
! int endStatesLength = getInt(buffer, 8);
! int lookaheadStatesLength = getInt(buffer, 12);
! int BMPdataLength = getInt(buffer, 16);
! int nonBMPdataLength = getInt(buffer, 20);
! int additionalDataLength = getInt(buffer, 24);
! checksum = getLong(buffer, 28);
/* Read stateTable[numCategories * numRows] */
stateTable = new short[stateTableLength];
! int offset = HEADER_LENGTH;
! for (int i = 0; i < stateTableLength; i++, offset+=2) {
! stateTable[i] = getShort(buffer, offset);
}
/* Read backwardsStateTable[numCategories * numRows] */
backwardsStateTable = new short[backwardsStateTableLength];
! for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) {
! backwardsStateTable[i] = getShort(buffer, offset);
}
/* Read endStates[numRows] */
endStates = new boolean[endStatesLength];
! for (int i = 0; i < endStatesLength; i++, offset++) {
! endStates[i] = buffer[offset] == 1;
}
/* Read lookaheadStates[numRows] */
lookaheadStates = new boolean[lookaheadStatesLength];
! for (int i = 0; i < lookaheadStatesLength; i++, offset++) {
! lookaheadStates[i] = buffer[offset] == 1;
}
/* Read a category table and indices for BMP characters. */
short[] temp1 = new short[BMP_INDICES_LENGTH]; // BMPindices
! for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) {
! temp1[i] = getShort(buffer, offset);
}
byte[] temp2 = new byte[BMPdataLength]; // BMPdata
! System.arraycopy(buffer, offset, temp2, 0, BMPdataLength);
! offset += BMPdataLength;
charCategoryTable = new CompactByteArray(temp1, temp2);
/* Read a category table for non-BMP characters. */
int[] temp3 = new int[nonBMPdataLength];
! for (int i = 0; i < nonBMPdataLength; i++, offset+=4) {
! temp3[i] = getInt(buffer, offset);
}
supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3);
/* Read additional data */
if (additionalDataLength > 0) {
additionalData = new byte[additionalDataLength];
! System.arraycopy(buffer, offset, additionalData, 0, additionalDataLength);
}
/* Set numCategories */
numCategories = stateTable.length / endStates.length;
}
! protected byte[] readFile(final Module module, final String datafile)
! throws IOException, MissingResourceException {
!
! BufferedInputStream is;
! try {
! PrivilegedExceptionAction<BufferedInputStream> pa = () -> {
! String pathName = "jdk.localedata".equals(module.getName()) ?
! "sun/text/resources/ext/" :
! "sun/text/resources/";
! InputStream in = module.getResourceAsStream(pathName + datafile);
! if (in == null) {
! // Try to load the file with "java.base" module instance. Assumption
! // here is that the fall back data files to be read should reside in
! // java.base.
! in = RuleBasedBreakIterator.class.getModule().getResourceAsStream("sun/text/resources/" + datafile);
! }
!
! return new BufferedInputStream(in);
! };
! is = AccessController.doPrivileged(pa);
! } catch (PrivilegedActionException e) {
! throw new InternalError(e.toString(), e);
! }
!
! int offset = 0;
!
! /* First, read magic, version, and header_info. */
! int len = LABEL_LENGTH + 5;
! byte[] buf = new byte[len];
! if (is.read(buf) != len) {
! throw new MissingResourceException("Wrong header length",
! datafile, "");
! }
!
! /* Validate the magic number. */
! for (int i = 0; i < LABEL_LENGTH; i++, offset++) {
! if (buf[offset] != LABEL[offset]) {
throw new MissingResourceException("Wrong magic number",
! datafile, "");
}
}
! /* Validate the version number. */
! if (buf[offset] != supportedVersion) {
! throw new MissingResourceException("Unsupported version(" + buf[offset] + ")",
! datafile, "");
}
! /* Read data: totalDataSize + 8(for checksum) */
! len = getInt(buf, ++offset);
! buf = new byte[len];
! if (is.read(buf) != len) {
throw new MissingResourceException("Wrong data length",
! datafile, "");
}
-
- is.close();
-
- return buf;
}
byte[] getAdditionalData() {
return additionalData;
}
--- 368,472 ----
* u1 BMPdata[BMPdataLength];
* u4 nonBMPdata[numNonBMPdataLength];
* u1 additionalData[additionalDataLength];
* }
* </pre>
+ *
+ * @throws BufferUnderflowException if the end-of-data is reached before
+ * setting up all the tables
*/
! private void setupTables(String ruleFile, ByteBuffer bb) {
/* Read header_info. */
! int stateTableLength = bb.getInt();
! int backwardsStateTableLength = bb.getInt();
! int endStatesLength = bb.getInt();
! int lookaheadStatesLength = bb.getInt();
! int BMPdataLength = bb.getInt();
! int nonBMPdataLength = bb.getInt();
! int additionalDataLength = bb.getInt();
! checksum = bb.getLong();
/* Read stateTable[numCategories * numRows] */
stateTable = new short[stateTableLength];
! for (int i = 0; i < stateTableLength; i++) {
! stateTable[i] = bb.getShort();
}
/* Read backwardsStateTable[numCategories * numRows] */
backwardsStateTable = new short[backwardsStateTableLength];
! for (int i = 0; i < backwardsStateTableLength; i++) {
! backwardsStateTable[i] = bb.getShort();
}
/* Read endStates[numRows] */
endStates = new boolean[endStatesLength];
! for (int i = 0; i < endStatesLength; i++) {
! endStates[i] = bb.get() == 1;
}
/* Read lookaheadStates[numRows] */
lookaheadStates = new boolean[lookaheadStatesLength];
! for (int i = 0; i < lookaheadStatesLength; i++) {
! lookaheadStates[i] = bb.get() == 1;
}
/* Read a category table and indices for BMP characters. */
short[] temp1 = new short[BMP_INDICES_LENGTH]; // BMPindices
! for (int i = 0; i < BMP_INDICES_LENGTH; i++) {
! temp1[i] = bb.getShort();
}
byte[] temp2 = new byte[BMPdataLength]; // BMPdata
! bb.get(temp2);
charCategoryTable = new CompactByteArray(temp1, temp2);
/* Read a category table for non-BMP characters. */
int[] temp3 = new int[nonBMPdataLength];
! for (int i = 0; i < nonBMPdataLength; i++) {
! temp3[i] = bb.getInt();
}
supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3);
/* Read additional data */
if (additionalDataLength > 0) {
additionalData = new byte[additionalDataLength];
! bb.get(additionalData);
}
+ assert bb.position() == bb.limit();
/* Set numCategories */
numCategories = stateTable.length / endStates.length;
}
! /**
! * Validates the magic number, version, and the length of the given data.
! *
! * @throws BufferUnderflowException if the end-of-data is reached while
! * validating data
! * @throws MissingResourceException if valification failed
! */
! void validateRuleData(String ruleFile, ByteBuffer bb) {
! /* Verify the magic number. */
! for (int i = 0; i < LABEL_LENGTH; i++) {
! if (bb.get() != LABEL[i]) {
throw new MissingResourceException("Wrong magic number",
! ruleFile, "");
}
}
! /* Verify the version number. */
! byte version = bb.get();
! if (version != supportedVersion) {
! throw new MissingResourceException("Unsupported version(" + version + ")",
! ruleFile, "");
}
! // Check the length of the rest of data
! int len = bb.getInt();
! if (bb.position() + len != bb.limit()) {
throw new MissingResourceException("Wrong data length",
! ruleFile, "");
}
}
byte[] getAdditionalData() {
return additionalData;
}
*** 1059,1090 ****
*/
protected int lookupBackwardState(int state, int category) {
return backwardsStateTable[state * numCategories + category];
}
- static long getLong(byte[] buf, int offset) {
- long num = buf[offset]&0xFF;
- for (int i = 1; i < 8; i++) {
- num = num<<8 | (buf[offset+i]&0xFF);
- }
- return num;
- }
-
- static int getInt(byte[] buf, int offset) {
- int num = buf[offset]&0xFF;
- for (int i = 1; i < 4; i++) {
- num = num<<8 | (buf[offset+i]&0xFF);
- }
- return num;
- }
-
- static short getShort(byte[] buf, int offset) {
- short num = (short)(buf[offset]&0xFF);
- num = (short)(num<<8 | (buf[offset+1]&0xFF));
- return num;
- }
-
/*
* This class exists to work around a bug in incorrect implementations
* of CharacterIterator, which incorrectly handle setIndex(endIndex).
* This iterator relies only on base.setIndex(n) where n is less than
* endIndex.
--- 1027,1036 ----
< prev index next >