< prev index next >

src/java.base/share/classes/sun/text/RuleBasedBreakIterator.java

Print this page

        

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this

@@ -36,19 +36,14 @@
  *
  * This notice and attribution to Taligent may not be removed.
  * Taligent is a registered trademark of Taligent, Inc.
  */
 
-package sun.util.locale.provider;
+package sun.text;
 
-import java.io.BufferedInputStream;
-import java.io.InputStream;
-import java.io.IOException;
-import java.lang.reflect.Module;
-import java.security.AccessController;
-import java.security.PrivilegedActionException;
-import java.security.PrivilegedExceptionAction;
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
 import java.text.BreakIterator;
 import java.text.CharacterIterator;
 import java.text.StringCharacterIterator;
 import java.util.MissingResourceException;
 import sun.text.CompactByteArray;

@@ -216,11 +211,11 @@
  * href="http://www.ibm.com/java/education/boundaries/boundaries.html">http://www.ibm.com/java/education/boundaries/boundaries.html</a>.
  * &nbsp; For examples, see the resource data (which is annotated).</p>
  *
  * @author Richard Gillam
  */
-class RuleBasedBreakIterator extends BreakIterator {
+public class RuleBasedBreakIterator extends BreakIterator {
 
     /**
      * A token used as a character-category value to identify ignore characters
      */
     protected static final byte IGNORE = -1;

@@ -248,15 +243,10 @@
      * Version number of the dictionary that was read in.
      */
     static final byte supportedVersion = 1;
 
     /**
-     * Header size in byte count
-     */
-    private static final int HEADER_LENGTH = 36;
-
-    /**
      * An array length of indices for BMP characters
      */
     private static final int BMP_INDICES_LENGTH = 512;
 
     /**

@@ -313,20 +303,30 @@
     //=======================================================================
     // constructors
     //=======================================================================
 
     /**
-     * Constructs a RuleBasedBreakIterator according to the module and the datafile
-     * provided.
+     * Constructs a RuleBasedBreakIterator using the given rule data.
+     *
+     * @throws MissingResourceException if the rule data is invalid or corrupted
      */
-    RuleBasedBreakIterator(Module module, String datafile)
-        throws IOException, MissingResourceException {
-        readTables(module, datafile);
+    public RuleBasedBreakIterator(String ruleFile, byte[] ruleData) {
+        ByteBuffer bb = ByteBuffer.wrap(ruleData);
+        try {
+            validateRuleData(ruleFile, bb);
+            setupTables(ruleFile, bb);
+        } catch (BufferUnderflowException bue) {
+            MissingResourceException e;
+            e = new MissingResourceException("Corrupted rule data file", ruleFile, "");
+            e.initCause(bue);
+            throw e;
+        }
     }
 
     /**
-     * Read datafile. The datafile's format is as follows:
+     * Initializes the fields with the given rule data.
+     * The data format is as follows:
      * <pre>
      *   BreakIteratorData {
      *       u1           magic[7];
      *       u1           version;
      *       u4           totalDataSize;

@@ -368,137 +368,105 @@
      *       u1           BMPdata[BMPdataLength];
      *       u4           nonBMPdata[numNonBMPdataLength];
      *       u1           additionalData[additionalDataLength];
      *   }
      * </pre>
+     *
+     * @throws BufferUnderflowException if the end-of-data is reached before
+     *                                  setting up all the tables
      */
-    protected final void readTables(Module module, String datafile)
-        throws IOException, MissingResourceException {
-
-        byte[] buffer = readFile(module, datafile);
-
+    private void setupTables(String ruleFile, ByteBuffer bb) {
         /* Read header_info. */
-        int stateTableLength = getInt(buffer, 0);
-        int backwardsStateTableLength = getInt(buffer, 4);
-        int endStatesLength = getInt(buffer, 8);
-        int lookaheadStatesLength = getInt(buffer, 12);
-        int BMPdataLength = getInt(buffer, 16);
-        int nonBMPdataLength = getInt(buffer, 20);
-        int additionalDataLength = getInt(buffer, 24);
-        checksum = getLong(buffer, 28);
+        int stateTableLength = bb.getInt();
+        int backwardsStateTableLength = bb.getInt();
+        int endStatesLength = bb.getInt();
+        int lookaheadStatesLength = bb.getInt();
+        int BMPdataLength = bb.getInt();
+        int nonBMPdataLength = bb.getInt();
+        int additionalDataLength = bb.getInt();
+        checksum = bb.getLong();
 
         /* Read stateTable[numCategories * numRows] */
         stateTable = new short[stateTableLength];
-        int offset = HEADER_LENGTH;
-        for (int i = 0; i < stateTableLength; i++, offset+=2) {
-           stateTable[i] = getShort(buffer, offset);
+        for (int i = 0; i < stateTableLength; i++) {
+            stateTable[i] = bb.getShort();
         }
 
         /* Read backwardsStateTable[numCategories * numRows] */
         backwardsStateTable = new short[backwardsStateTableLength];
-        for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) {
-           backwardsStateTable[i] = getShort(buffer, offset);
+        for (int i = 0; i < backwardsStateTableLength; i++) {
+            backwardsStateTable[i] = bb.getShort();
         }
 
         /* Read endStates[numRows] */
         endStates = new boolean[endStatesLength];
-        for (int i = 0; i < endStatesLength; i++, offset++) {
-           endStates[i] = buffer[offset] == 1;
+        for (int i = 0; i < endStatesLength; i++) {
+            endStates[i] = bb.get() == 1;
         }
 
         /* Read lookaheadStates[numRows] */
         lookaheadStates = new boolean[lookaheadStatesLength];
-        for (int i = 0; i < lookaheadStatesLength; i++, offset++) {
-           lookaheadStates[i] = buffer[offset] == 1;
+        for (int i = 0; i < lookaheadStatesLength; i++) {
+            lookaheadStates[i] = bb.get() == 1;
         }
 
         /* Read a category table and indices for BMP characters. */
         short[] temp1 = new short[BMP_INDICES_LENGTH];  // BMPindices
-        for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) {
-            temp1[i] = getShort(buffer, offset);
+        for (int i = 0; i < BMP_INDICES_LENGTH; i++) {
+            temp1[i] = bb.getShort();
         }
         byte[] temp2 = new byte[BMPdataLength];  // BMPdata
-        System.arraycopy(buffer, offset, temp2, 0, BMPdataLength);
-        offset += BMPdataLength;
+        bb.get(temp2);
         charCategoryTable = new CompactByteArray(temp1, temp2);
 
         /* Read a category table for non-BMP characters. */
         int[] temp3 = new int[nonBMPdataLength];
-        for (int i = 0; i < nonBMPdataLength; i++, offset+=4) {
-            temp3[i] = getInt(buffer, offset);
+        for (int i = 0; i < nonBMPdataLength; i++) {
+            temp3[i] = bb.getInt();
         }
         supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3);
 
         /* Read additional data */
         if (additionalDataLength > 0) {
             additionalData = new byte[additionalDataLength];
-            System.arraycopy(buffer, offset, additionalData, 0, additionalDataLength);
+            bb.get(additionalData);
         }
+        assert bb.position() == bb.limit();
 
         /* Set numCategories */
         numCategories = stateTable.length / endStates.length;
     }
 
-    protected byte[] readFile(final Module module, final String datafile)
-        throws IOException, MissingResourceException {
-
-        BufferedInputStream is;
-        try {
-            PrivilegedExceptionAction<BufferedInputStream> pa = () -> {
-                String pathName = "jdk.localedata".equals(module.getName()) ?
-                     "sun/text/resources/ext/" :
-                     "sun/text/resources/";
-                InputStream in = module.getResourceAsStream(pathName + datafile);
-                if (in == null) {
-                    // Try to load the file with "java.base" module instance. Assumption
-                    // here is that the fall back data files to be read should reside in
-                    // java.base.
-                    in = RuleBasedBreakIterator.class.getModule().getResourceAsStream("sun/text/resources/" + datafile);
-                }
-
-                return new BufferedInputStream(in);
-            };
-            is = AccessController.doPrivileged(pa);
-        } catch (PrivilegedActionException e) {
-            throw new InternalError(e.toString(), e);
-        }
-
-        int offset = 0;
-
-        /* First, read magic, version, and header_info. */
-        int len = LABEL_LENGTH + 5;
-        byte[] buf = new byte[len];
-        if (is.read(buf) != len) {
-            throw new MissingResourceException("Wrong header length",
-                                               datafile, "");
-        }
-
-        /* Validate the magic number. */
-        for (int i = 0; i < LABEL_LENGTH; i++, offset++) {
-            if (buf[offset] != LABEL[offset]) {
+    /**
+     * Validates the magic number, version, and the length of the given data.
+     *
+     * @throws BufferUnderflowException if the end-of-data is reached while
+     *                                  validating data
+     * @throws MissingResourceException if valification failed
+     */
+    void validateRuleData(String ruleFile, ByteBuffer bb) {
+        /* Verify the magic number. */
+        for (int i = 0; i < LABEL_LENGTH; i++) {
+            if (bb.get() != LABEL[i]) {
                 throw new MissingResourceException("Wrong magic number",
-                                                   datafile, "");
+                                                   ruleFile, "");
             }
         }
 
-        /* Validate the version number. */
-        if (buf[offset] != supportedVersion) {
-            throw new MissingResourceException("Unsupported version(" + buf[offset] + ")",
-                                               datafile, "");
+        /* Verify the version number. */
+        byte version = bb.get();
+        if (version != supportedVersion) {
+            throw new MissingResourceException("Unsupported version(" + version + ")",
+                                               ruleFile, "");
         }
 
-        /* Read data: totalDataSize + 8(for checksum) */
-        len = getInt(buf, ++offset);
-        buf = new byte[len];
-        if (is.read(buf) != len) {
+        // Check the length of the rest of data
+        int len = bb.getInt();
+        if (bb.position() + len != bb.limit()) {
             throw new MissingResourceException("Wrong data length",
-                                               datafile, "");
+                                               ruleFile, "");
         }
-
-        is.close();
-
-        return buf;
     }
 
     byte[] getAdditionalData() {
         return additionalData;
     }

@@ -1059,32 +1027,10 @@
      */
     protected int lookupBackwardState(int state, int category) {
         return backwardsStateTable[state * numCategories + category];
     }
 
-    static long getLong(byte[] buf, int offset) {
-        long num = buf[offset]&0xFF;
-        for (int i = 1; i < 8; i++) {
-            num = num<<8 | (buf[offset+i]&0xFF);
-        }
-        return num;
-    }
-
-    static int getInt(byte[] buf, int offset) {
-        int num = buf[offset]&0xFF;
-        for (int i = 1; i < 4; i++) {
-            num = num<<8 | (buf[offset+i]&0xFF);
-        }
-        return num;
-    }
-
-    static short getShort(byte[] buf, int offset) {
-        short num = (short)(buf[offset]&0xFF);
-        num = (short)(num<<8 | (buf[offset+1]&0xFF));
-        return num;
-    }
-
     /*
      * This class exists to work around a bug in incorrect implementations
      * of CharacterIterator, which incorrectly handle setIndex(endIndex).
      * This iterator relies only on base.setIndex(n) where n is less than
      * endIndex.
< prev index next >