< prev index next >

src/java.base/share/classes/sun/text/RuleBasedBreakIterator.java

Print this page

        

*** 1,7 **** /* ! * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this --- 1,7 ---- /* ! * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this
*** 36,54 **** * * This notice and attribution to Taligent may not be removed. * Taligent is a registered trademark of Taligent, Inc. */ ! package sun.util.locale.provider; ! import java.io.BufferedInputStream; ! import java.io.InputStream; ! import java.io.IOException; ! import java.lang.reflect.Module; ! import java.security.AccessController; ! import java.security.PrivilegedActionException; ! import java.security.PrivilegedExceptionAction; import java.text.BreakIterator; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.MissingResourceException; import sun.text.CompactByteArray; --- 36,49 ---- * * This notice and attribution to Taligent may not be removed. * Taligent is a registered trademark of Taligent, Inc. */ ! package sun.text; ! import java.nio.BufferUnderflowException; ! import java.nio.ByteBuffer; import java.text.BreakIterator; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.MissingResourceException; import sun.text.CompactByteArray;
*** 216,226 **** * href="http://www.ibm.com/java/education/boundaries/boundaries.html">http://www.ibm.com/java/education/boundaries/boundaries.html</a>. * &nbsp; For examples, see the resource data (which is annotated).</p> * * @author Richard Gillam */ ! class RuleBasedBreakIterator extends BreakIterator { /** * A token used as a character-category value to identify ignore characters */ protected static final byte IGNORE = -1; --- 211,221 ---- * href="http://www.ibm.com/java/education/boundaries/boundaries.html">http://www.ibm.com/java/education/boundaries/boundaries.html</a>. * &nbsp; For examples, see the resource data (which is annotated).</p> * * @author Richard Gillam */ ! public class RuleBasedBreakIterator extends BreakIterator { /** * A token used as a character-category value to identify ignore characters */ protected static final byte IGNORE = -1;
*** 248,262 **** * Version number of the dictionary that was read in. */ static final byte supportedVersion = 1; /** - * Header size in byte count - */ - private static final int HEADER_LENGTH = 36; - - /** * An array length of indices for BMP characters */ private static final int BMP_INDICES_LENGTH = 512; /** --- 243,252 ----
*** 313,332 **** //======================================================================= // constructors //======================================================================= /** ! * Constructs a RuleBasedBreakIterator according to the module and the datafile ! * provided. */ ! RuleBasedBreakIterator(Module module, String datafile) ! throws IOException, MissingResourceException { ! readTables(module, datafile); } /** ! * Read datafile. The datafile's format is as follows: * <pre> * BreakIteratorData { * u1 magic[7]; * u1 version; * u4 totalDataSize; --- 303,332 ---- //======================================================================= // constructors //======================================================================= /** ! * Constructs a RuleBasedBreakIterator using the given rule data. ! * ! * @throws MissingResourceException if the rule data is invalid or corrupted */ ! public RuleBasedBreakIterator(String ruleFile, byte[] ruleData) { ! ByteBuffer bb = ByteBuffer.wrap(ruleData); ! try { ! validateRuleData(ruleFile, bb); ! setupTables(ruleFile, bb); ! } catch (BufferUnderflowException bue) { ! MissingResourceException e; ! e = new MissingResourceException("Corrupted rule data file", ruleFile, ""); ! e.initCause(bue); ! throw e; ! } } /** ! * Initializes the fields with the given rule data. ! * The data format is as follows: * <pre> * BreakIteratorData { * u1 magic[7]; * u1 version; * u4 totalDataSize;
*** 368,504 **** * u1 BMPdata[BMPdataLength]; * u4 nonBMPdata[numNonBMPdataLength]; * u1 additionalData[additionalDataLength]; * } * </pre> */ ! protected final void readTables(Module module, String datafile) ! throws IOException, MissingResourceException { ! ! byte[] buffer = readFile(module, datafile); ! /* Read header_info. */ ! int stateTableLength = getInt(buffer, 0); ! int backwardsStateTableLength = getInt(buffer, 4); ! int endStatesLength = getInt(buffer, 8); ! int lookaheadStatesLength = getInt(buffer, 12); ! int BMPdataLength = getInt(buffer, 16); ! int nonBMPdataLength = getInt(buffer, 20); ! int additionalDataLength = getInt(buffer, 24); ! checksum = getLong(buffer, 28); /* Read stateTable[numCategories * numRows] */ stateTable = new short[stateTableLength]; ! int offset = HEADER_LENGTH; ! for (int i = 0; i < stateTableLength; i++, offset+=2) { ! stateTable[i] = getShort(buffer, offset); } /* Read backwardsStateTable[numCategories * numRows] */ backwardsStateTable = new short[backwardsStateTableLength]; ! for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) { ! backwardsStateTable[i] = getShort(buffer, offset); } /* Read endStates[numRows] */ endStates = new boolean[endStatesLength]; ! for (int i = 0; i < endStatesLength; i++, offset++) { ! endStates[i] = buffer[offset] == 1; } /* Read lookaheadStates[numRows] */ lookaheadStates = new boolean[lookaheadStatesLength]; ! for (int i = 0; i < lookaheadStatesLength; i++, offset++) { ! lookaheadStates[i] = buffer[offset] == 1; } /* Read a category table and indices for BMP characters. */ short[] temp1 = new short[BMP_INDICES_LENGTH]; // BMPindices ! for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) { ! temp1[i] = getShort(buffer, offset); } byte[] temp2 = new byte[BMPdataLength]; // BMPdata ! System.arraycopy(buffer, offset, temp2, 0, BMPdataLength); ! offset += BMPdataLength; charCategoryTable = new CompactByteArray(temp1, temp2); /* Read a category table for non-BMP characters. */ int[] temp3 = new int[nonBMPdataLength]; ! for (int i = 0; i < nonBMPdataLength; i++, offset+=4) { ! temp3[i] = getInt(buffer, offset); } supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3); /* Read additional data */ if (additionalDataLength > 0) { additionalData = new byte[additionalDataLength]; ! System.arraycopy(buffer, offset, additionalData, 0, additionalDataLength); } /* Set numCategories */ numCategories = stateTable.length / endStates.length; } ! protected byte[] readFile(final Module module, final String datafile) ! throws IOException, MissingResourceException { ! ! BufferedInputStream is; ! try { ! PrivilegedExceptionAction<BufferedInputStream> pa = () -> { ! String pathName = "jdk.localedata".equals(module.getName()) ? ! "sun/text/resources/ext/" : ! "sun/text/resources/"; ! InputStream in = module.getResourceAsStream(pathName + datafile); ! if (in == null) { ! // Try to load the file with "java.base" module instance. Assumption ! // here is that the fall back data files to be read should reside in ! // java.base. ! in = RuleBasedBreakIterator.class.getModule().getResourceAsStream("sun/text/resources/" + datafile); ! } ! ! return new BufferedInputStream(in); ! }; ! is = AccessController.doPrivileged(pa); ! } catch (PrivilegedActionException e) { ! throw new InternalError(e.toString(), e); ! } ! ! int offset = 0; ! ! /* First, read magic, version, and header_info. */ ! int len = LABEL_LENGTH + 5; ! byte[] buf = new byte[len]; ! if (is.read(buf) != len) { ! throw new MissingResourceException("Wrong header length", ! datafile, ""); ! } ! ! /* Validate the magic number. */ ! for (int i = 0; i < LABEL_LENGTH; i++, offset++) { ! if (buf[offset] != LABEL[offset]) { throw new MissingResourceException("Wrong magic number", ! datafile, ""); } } ! /* Validate the version number. */ ! if (buf[offset] != supportedVersion) { ! throw new MissingResourceException("Unsupported version(" + buf[offset] + ")", ! datafile, ""); } ! /* Read data: totalDataSize + 8(for checksum) */ ! len = getInt(buf, ++offset); ! buf = new byte[len]; ! if (is.read(buf) != len) { throw new MissingResourceException("Wrong data length", ! datafile, ""); } - - is.close(); - - return buf; } byte[] getAdditionalData() { return additionalData; } --- 368,472 ---- * u1 BMPdata[BMPdataLength]; * u4 nonBMPdata[numNonBMPdataLength]; * u1 additionalData[additionalDataLength]; * } * </pre> + * + * @throws BufferUnderflowException if the end-of-data is reached before + * setting up all the tables */ ! private void setupTables(String ruleFile, ByteBuffer bb) { /* Read header_info. */ ! int stateTableLength = bb.getInt(); ! int backwardsStateTableLength = bb.getInt(); ! int endStatesLength = bb.getInt(); ! int lookaheadStatesLength = bb.getInt(); ! int BMPdataLength = bb.getInt(); ! int nonBMPdataLength = bb.getInt(); ! int additionalDataLength = bb.getInt(); ! checksum = bb.getLong(); /* Read stateTable[numCategories * numRows] */ stateTable = new short[stateTableLength]; ! for (int i = 0; i < stateTableLength; i++) { ! stateTable[i] = bb.getShort(); } /* Read backwardsStateTable[numCategories * numRows] */ backwardsStateTable = new short[backwardsStateTableLength]; ! for (int i = 0; i < backwardsStateTableLength; i++) { ! backwardsStateTable[i] = bb.getShort(); } /* Read endStates[numRows] */ endStates = new boolean[endStatesLength]; ! for (int i = 0; i < endStatesLength; i++) { ! endStates[i] = bb.get() == 1; } /* Read lookaheadStates[numRows] */ lookaheadStates = new boolean[lookaheadStatesLength]; ! for (int i = 0; i < lookaheadStatesLength; i++) { ! lookaheadStates[i] = bb.get() == 1; } /* Read a category table and indices for BMP characters. */ short[] temp1 = new short[BMP_INDICES_LENGTH]; // BMPindices ! for (int i = 0; i < BMP_INDICES_LENGTH; i++) { ! temp1[i] = bb.getShort(); } byte[] temp2 = new byte[BMPdataLength]; // BMPdata ! bb.get(temp2); charCategoryTable = new CompactByteArray(temp1, temp2); /* Read a category table for non-BMP characters. */ int[] temp3 = new int[nonBMPdataLength]; ! for (int i = 0; i < nonBMPdataLength; i++) { ! temp3[i] = bb.getInt(); } supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3); /* Read additional data */ if (additionalDataLength > 0) { additionalData = new byte[additionalDataLength]; ! bb.get(additionalData); } + assert bb.position() == bb.limit(); /* Set numCategories */ numCategories = stateTable.length / endStates.length; } ! /** ! * Validates the magic number, version, and the length of the given data. ! * ! * @throws BufferUnderflowException if the end-of-data is reached while ! * validating data ! * @throws MissingResourceException if valification failed ! */ ! void validateRuleData(String ruleFile, ByteBuffer bb) { ! /* Verify the magic number. */ ! for (int i = 0; i < LABEL_LENGTH; i++) { ! if (bb.get() != LABEL[i]) { throw new MissingResourceException("Wrong magic number", ! ruleFile, ""); } } ! /* Verify the version number. */ ! byte version = bb.get(); ! if (version != supportedVersion) { ! throw new MissingResourceException("Unsupported version(" + version + ")", ! ruleFile, ""); } ! // Check the length of the rest of data ! int len = bb.getInt(); ! if (bb.position() + len != bb.limit()) { throw new MissingResourceException("Wrong data length", ! ruleFile, ""); } } byte[] getAdditionalData() { return additionalData; }
*** 1059,1090 **** */ protected int lookupBackwardState(int state, int category) { return backwardsStateTable[state * numCategories + category]; } - static long getLong(byte[] buf, int offset) { - long num = buf[offset]&0xFF; - for (int i = 1; i < 8; i++) { - num = num<<8 | (buf[offset+i]&0xFF); - } - return num; - } - - static int getInt(byte[] buf, int offset) { - int num = buf[offset]&0xFF; - for (int i = 1; i < 4; i++) { - num = num<<8 | (buf[offset+i]&0xFF); - } - return num; - } - - static short getShort(byte[] buf, int offset) { - short num = (short)(buf[offset]&0xFF); - num = (short)(num<<8 | (buf[offset+1]&0xFF)); - return num; - } - /* * This class exists to work around a bug in incorrect implementations * of CharacterIterator, which incorrectly handle setIndex(endIndex). * This iterator relies only on base.setIndex(n) where n is less than * endIndex. --- 1027,1036 ----
< prev index next >