Old src/java.base/share/tools/org/openjdk/buildtools/generatecharacter/Utility.java

   1 /*
   2  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.generatecharacter;
  27 
  28 import java.text.*;
  29 import java.util.*;
  30 
  31 public class Utility {
  32     static byte peekByte(String s, int index) {
  33         char c = s.charAt(index/2);
  34         return ((index&1)==0)?(byte)(c>>8):(byte)c;
  35     }
  36 
  37     static short peekShort(String s, int index) {
  38         return (short)s.charAt(index);
  39     }
  40 
  41     static int peekInt(String s, int index) {
  42         index *= 2;
  43         return (((int)s.charAt(index)) << 16) | s.charAt(index+1);
  44     }
  45 
  46     static void poke(String s, int index, byte value) {
  47         int mask = 0xFF00;
  48         int ivalue = value;
  49         if ((index&1)==0) {
  50             ivalue <<= 8;
  51             mask = 0x00FF;
  52         }
  53         index /= 2;
  54         if (index == s.length()) {
  55             s = s + (char)ivalue;
  56         }
  57         else if (index == 0) {
  58             s = (char)(ivalue|(s.charAt(0)&mask)) + s.substring(1);
  59         }
  60         else {
  61             s = s.substring(0, index) + (char)(ivalue|(s.charAt(index)&mask))
  62                 + s.substring(index+1);
  63         }
  64     }
  65 
  66     static void poke(String s, int index, short value) {
  67         if (index == s.length()) {
  68             s = s + (char)value;
  69         }
  70         else if (index == 0) {
  71             s = (char)value + s.substring(1);
  72         }
  73         else {
  74             s = s.substring(0, index) + (char)value + s.substring(index+1);
  75         }
  76     }
  77 
  78     static void poke(String s, int index, int value) {
  79         index *= 2;
  80         char hi = (char)(value >> 16);
  81         if (index == s.length()) {
  82             s = s + hi + (char)value;
  83         }
  84         else if (index == 0) {
  85             s = hi + (char)value + s.substring(2);
  86         }
  87         else {
  88             s = s.substring(0, index) + hi + (char)value + s.substring(index+2);
  89         }
  90     }
  91 
  92     /**
  93      * The ESCAPE character is used during run-length encoding.  It signals
  94      * a run of identical chars.
  95      */
  96     static final char ESCAPE = '\uA5A5';
  97 
  98     /**
  99      * The ESCAPE_BYTE character is used during run-length encoding.  It signals
 100      * a run of identical bytes.
 101      */
 102     static final byte ESCAPE_BYTE = (byte)0xA5;
 103 
 104     /**
 105      * Construct a string representing a short array.  Use run-length encoding.
 106      * A character represents itself, unless it is the ESCAPE character.  Then
 107      * the following notations are possible:
 108      *   ESCAPE ESCAPE   ESCAPE literal
 109      *   ESCAPE n c      n instances of character c
 110      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
 111      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
 112      * If we encounter a run where n == ESCAPE, we represent this as:
 113      *   c ESCAPE n-1 c
 114      * The ESCAPE value is chosen so as not to collide with commonly
 115      * seen values.
 116      */
 117     static final String arrayToRLEString(short[] a) {
 118         StringBuffer buffer = new StringBuffer();
 119         // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
 120         buffer.append((char) (a.length >> 16));
 121         buffer.append((char) a.length);
 122         short runValue = a[0];
 123         int runLength = 1;
 124         for (int i=1; i<a.length; ++i) {
 125             short s = a[i];
 126             if (s == runValue && runLength < 0xFFFF) ++runLength;
 127             else {
 128                 encodeRun(buffer, runValue, runLength);
 129                 runValue = s;
 130                 runLength = 1;
 131             }
 132         }
 133         encodeRun(buffer, runValue, runLength);
 134         return buffer.toString();
 135     }
 136 
 137     /**
 138      * Construct a string representing a byte array.  Use run-length encoding.
 139      * Two bytes are packed into a single char, with a single extra zero byte at
 140      * the end if needed.  A byte represents itself, unless it is the
 141      * ESCAPE_BYTE.  Then the following notations are possible:
 142      *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
 143      *   ESCAPE_BYTE n b           n instances of byte b
 144      * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
 145      * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
 146      * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
 147      *   b ESCAPE_BYTE n-1 b
 148      * The ESCAPE_BYTE value is chosen so as not to collide with commonly
 149      * seen values.
 150      */
 151     static final String arrayToRLEString(byte[] a) {
 152         StringBuffer buffer = new StringBuffer();
 153         buffer.append((char) (a.length >> 16));
 154         buffer.append((char) a.length);
 155         byte runValue = a[0];
 156         int runLength = 1;
 157         byte[] state = new byte[2];
 158         for (int i=1; i<a.length; ++i) {
 159             byte b = a[i];
 160             if (b == runValue && runLength < 0xFF) ++runLength;
 161             else {
 162                 encodeRun(buffer, runValue, runLength, state);
 163                 runValue = b;
 164                 runLength = 1;
 165             }
 166         }
 167         encodeRun(buffer, runValue, runLength, state);
 168 
 169         // We must save the final byte, if there is one, by padding
 170         // an extra zero.
 171         if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
 172 
 173         return buffer.toString();
 174     }
 175 
 176     /**
 177      * Encode a run, possibly a degenerate run (of < 4 values).
 178      * @param length The length of the run; must be > 0 && <= 0xFFFF.
 179      */
 180     private static final void encodeRun(StringBuffer buffer, short value, int length) {
 181         if (length < 4) {
 182             for (int j=0; j<length; ++j) {
 183                 if (value == (int) ESCAPE) buffer.append(ESCAPE);
 184                 buffer.append((char) value);
 185             }
 186         }
 187         else {
 188             if (length == (int) ESCAPE) {
 189                 if (value == (int) ESCAPE) buffer.append(ESCAPE);
 190                 buffer.append((char) value);
 191                 --length;
 192             }
 193             buffer.append(ESCAPE);
 194             buffer.append((char) length);
 195             buffer.append((char) value); // Don't need to escape this value
 196         }
 197     }
 198 
 199     /**
 200      * Encode a run, possibly a degenerate run (of < 4 values).
 201      * @param length The length of the run; must be > 0 && <= 0xFF.
 202      */
 203     private static final void encodeRun(StringBuffer buffer, byte value, int length,
 204                                         byte[] state) {
 205         if (length < 4) {
 206             for (int j=0; j<length; ++j) {
 207                 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
 208                 appendEncodedByte(buffer, value, state);
 209             }
 210         }
 211         else {
 212             if (length == ESCAPE_BYTE) {
 213                 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
 214                 appendEncodedByte(buffer, value, state);
 215                 --length;
 216             }
 217             appendEncodedByte(buffer, ESCAPE_BYTE, state);
 218             appendEncodedByte(buffer, (byte)length, state);
 219             appendEncodedByte(buffer, value, state); // Don't need to escape this value
 220         }
 221     }
 222 
 223     /**
 224      * Append a byte to the given StringBuffer, packing two bytes into each
 225      * character.  The state parameter maintains intermediary data between
 226      * calls.
 227      * @param state A two-element array, with state[0] == 0 if this is the
 228      * first byte of a pair, or state[0] != 0 if this is the second byte
 229      * of a pair, in which case state[1] is the first byte.
 230      */
 231     private static final void appendEncodedByte(StringBuffer buffer, byte value,
 232                                                 byte[] state) {
 233         if (state[0] != 0) {
 234             char c = (char) ((state[1] << 8) | (((int) value) & 0xFF));
 235             buffer.append(c);
 236             state[0] = 0;
 237         }
 238         else {
 239             state[0] = 1;
 240             state[1] = value;
 241         }
 242     }
 243 
 244     /**
 245      * Construct an array of shorts from a run-length encoded string.
 246      */
 247     static final short[] RLEStringToShortArray(String s) {
 248         int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
 249         short[] array = new short[length];
 250         int ai = 0;
 251         for (int i=2; i<s.length(); ++i) {
 252             char c = s.charAt(i);
 253             if (c == ESCAPE) {
 254                 c = s.charAt(++i);
 255                 if (c == ESCAPE) array[ai++] = (short) c;
 256                 else {
 257                     int runLength = (int) c;
 258                     short runValue = (short) s.charAt(++i);
 259                     for (int j=0; j<runLength; ++j) array[ai++] = runValue;
 260                 }
 261             }
 262             else {
 263                 array[ai++] = (short) c;
 264             }
 265         }
 266 
 267         if (ai != length)
 268             throw new InternalError("Bad run-length encoded short array");
 269 
 270         return array;
 271     }
 272 
 273     /**
 274      * Construct an array of bytes from a run-length encoded string.
 275      */
 276     static final byte[] RLEStringToByteArray(String s) {
 277         int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
 278         byte[] array = new byte[length];
 279         boolean nextChar = true;
 280         char c = 0;
 281         int node = 0;
 282         int runLength = 0;
 283         int i = 2;
 284         for (int ai=0; ai<length; ) {
 285             // This part of the loop places the next byte into the local
 286             // variable 'b' each time through the loop.  It keeps the
 287             // current character in 'c' and uses the boolean 'nextChar'
 288             // to see if we've taken both bytes out of 'c' yet.
 289             byte b;
 290             if (nextChar) {
 291                 c = s.charAt(i++);
 292                 b = (byte) (c >> 8);
 293                 nextChar = false;
 294             }
 295             else {
 296                 b = (byte) (c & 0xFF);
 297                 nextChar = true;
 298             }
 299 
 300             // This part of the loop is a tiny state machine which handles
 301             // the parsing of the run-length encoding.  This would be simpler
 302             // if we could look ahead, but we can't, so we use 'node' to
 303             // move between three nodes in the state machine.
 304             switch (node) {
 305             case 0:
 306                 // Normal idle node
 307                 if (b == ESCAPE_BYTE) {
 308                     node = 1;
 309                 }
 310                 else {
 311                     array[ai++] = b;
 312                 }
 313                 break;
 314             case 1:
 315                 // We have seen one ESCAPE_BYTE; we expect either a second
 316                 // one, or a run length and value.
 317                 if (b == ESCAPE_BYTE) {
 318                     array[ai++] = ESCAPE_BYTE;
 319                     node = 0;
 320                 }
 321                 else {
 322                     runLength = b;
 323                     // Interpret signed byte as unsigned
 324                     if (runLength < 0) runLength += 0x100;
 325                     node = 2;
 326                 }
 327                 break;
 328             case 2:
 329                 // We have seen an ESCAPE_BYTE and length byte.  We interpret
 330                 // the next byte as the value to be repeated.
 331                 for (int j=0; j<runLength; ++j) array[ai++] = b;
 332                 node = 0;
 333                 break;
 334             }
 335         }
 336 
 337         if (node != 0)
 338             throw new InternalError("Bad run-length encoded byte array");
 339 
 340         if (i != s.length())
 341             throw new InternalError("Excess data in RLE byte array string");
 342 
 343         return array;
 344     }
 345 
 346     /**
 347      * Format a String for representation in a source file.  This includes
 348      * breaking it into lines escaping characters using octal notation
 349      * when necessary (control characters and double quotes).
 350      */
 351     static final String formatForSource(String s) {
 352         return formatForSource(s, "        ");
 353     }
 354 
 355     /**
 356      * Format a String for representation in a source file.  This includes
 357      * breaking it into lines escaping characters using octal notation
 358      * when necessary (control characters and double quotes).
 359      */
 360     static final String formatForSource(String s, String indent) {
 361         StringBuffer buffer = new StringBuffer();
 362         for (int i=0; i<s.length();) {
 363             if (i > 0) buffer.append("+\n");
 364             int limit = buffer.length() + 78; // Leave 2 for trailing <"+>
 365             buffer.append(indent + '"');
 366             while (i<s.length() && buffer.length()<limit) {
 367                 char c = s.charAt(i++);
 368                 /* This works too but it's kind of unnecessary; might as
 369                    well keep things simple.
 370                 if (c == '\\' || c == '"') {
 371                     // Escape backslash and double-quote.  Don't need to
 372                     // escape single-quote.
 373                     buffer.append("\\" + c);
 374                 }
 375                 else if (c >= '\u0020' && c <= '\u007E') {
 376                     // Printable ASCII ranges from ' ' to '~'
 377                     buffer.append(c);
 378                 }
 379                 else
 380                 */
 381                 if (c <= '\377') {
 382                     // Represent control characters
 383                     // using octal notation; otherwise the string we form
 384                     // won't compile, since Unicode escape sequences are
 385                     // processed before tokenization.
 386                     buffer.append('\\');
 387                     buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
 388                     buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
 389                     buffer.append(HEX_DIGIT[(c & 0007)]);
 390                 }
 391                 else {
 392                     // Handle the rest with Unicode
 393                     buffer.append("\\u");
 394                     buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
 395                     buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
 396                     buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
 397                     buffer.append(HEX_DIGIT[(c & 0x000F)]);
 398                 }
 399             }
 400             buffer.append('"');
 401         }
 402         return buffer.toString();
 403     }
 404 
 405     static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
 406                                      '8','9','A','B','C','D','E','F'};
 407 }