1 /*
   2  * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.util;
  27 
  28 /** Utility class for static conversion methods between numbers
  29  *  and strings in various formats.
  30  *
  31  *  <p>Note regarding UTF-8.
  32  *  The JVMS defines its own version of the UTF-8 format so that it
  33  *  contains no zero bytes (modified UTF-8). This is not actually the same
  34  *  as Charset.forName("UTF-8").
  35  *
  36  *  <p>
  37  *  See also:
  38  *  <ul>
  39  *  <li><a href="http://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.4.7">
  40  *    JVMS 4.4.7 </a></li>
  41  *  <li><a href="http://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#modified-utf-8">
  42       java.io.DataInput: Modified UTF-8 </a></li>
  43     <li><a href="https://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8">
  44       Modified UTF-8 (Wikipedia) </a></li>
  45  *  </ul>
  46  *
  47  *  The methods here support modified UTF-8.
  48  *
  49  *  <p><b>This is NOT part of any supported API.
  50  *  If you write code that depends on this, you do so at your own risk.
  51  *  This code and its internal interfaces are subject to change or
  52  *  deletion without notice.</b>
  53  */
  54 public class Convert {
  55 
  56     /** Convert string to integer.
  57      */
  58     public static int string2int(String s, int radix)
  59         throws NumberFormatException {
  60         if (radix == 10) {
  61             return Integer.parseInt(s, radix);
  62         } else {
  63             char[] cs = s.toCharArray();
  64             int limit = Integer.MAX_VALUE / (radix/2);
  65             int n = 0;
  66             for (char c : cs) {
  67                 int d = Character.digit(c, radix);
  68                 if (n < 0 ||
  69                     n > limit ||
  70                     n * radix > Integer.MAX_VALUE - d)
  71                     throw new NumberFormatException();
  72                 n = n * radix + d;
  73             }
  74             return n;
  75         }
  76     }
  77 
  78     /** Convert string to long integer.
  79      */
  80     public static long string2long(String s, int radix)
  81         throws NumberFormatException {
  82         if (radix == 10) {
  83             return Long.parseLong(s, radix);
  84         } else {
  85             char[] cs = s.toCharArray();
  86             long limit = Long.MAX_VALUE / (radix/2);
  87             long n = 0;
  88             for (char c : cs) {
  89                 int d = Character.digit(c, radix);
  90                 if (n < 0 ||
  91                     n > limit ||
  92                     n * radix > Long.MAX_VALUE - d)
  93                     throw new NumberFormatException();
  94                 n = n * radix + d;
  95             }
  96             return n;
  97         }
  98     }
  99 
 100 /* Conversion routines between names, strings, and byte arrays in Utf8 format
 101  */
 102 
 103     /** Convert `len' bytes from utf8 to characters.
 104      *  Parameters are as in System.arraycopy
 105      *  Return first index in `dst' past the last copied char.
 106      *  @param src        The array holding the bytes to convert.
 107      *  @param sindex     The start index from which bytes are converted.
 108      *  @param dst        The array holding the converted characters..
 109      *  @param dindex     The start index from which converted characters
 110      *                    are written.
 111      *  @param len        The maximum number of bytes to convert.
 112      */
 113     public static int utf2chars(byte[] src, int sindex,
 114                                 char[] dst, int dindex,
 115                                 int len) {
 116         int i = sindex;
 117         int j = dindex;
 118         int limit = sindex + len;
 119         while (i < limit) {
 120             int b = src[i++] & 0xFF;
 121             if (b >= 0xE0) {
 122                 b = (b & 0x0F) << 12;
 123                 b = b | (src[i++] & 0x3F) << 6;
 124                 b = b | (src[i++] & 0x3F);
 125             } else if (b >= 0xC0) {
 126                 b = (b & 0x1F) << 6;
 127                 b = b | (src[i++] & 0x3F);
 128             }
 129             dst[j++] = (char)b;
 130         }
 131         return j;
 132     }
 133 
 134     /** Return bytes in Utf8 representation as an array of characters.
 135      *  @param src        The array holding the bytes.
 136      *  @param sindex     The start index from which bytes are converted.
 137      *  @param len        The maximum number of bytes to convert.
 138      */
 139     public static char[] utf2chars(byte[] src, int sindex, int len) {
 140         char[] dst = new char[len];
 141         int len1 = utf2chars(src, sindex, dst, 0, len);
 142         char[] result = new char[len1];
 143         System.arraycopy(dst, 0, result, 0, len1);
 144         return result;
 145     }
 146 
 147     /** Return all bytes of a given array in Utf8 representation
 148      *  as an array of characters.
 149      *  @param src        The array holding the bytes.
 150      */
 151     public static char[] utf2chars(byte[] src) {
 152         return utf2chars(src, 0, src.length);
 153     }
 154 
 155     /** Return bytes in Utf8 representation as a string.
 156      *  @param src        The array holding the bytes.
 157      *  @param sindex     The start index from which bytes are converted.
 158      *  @param len        The maximum number of bytes to convert.
 159      */
 160     public static String utf2string(byte[] src, int sindex, int len) {
 161         char dst[] = new char[len];
 162         int len1 = utf2chars(src, sindex, dst, 0, len);
 163         return new String(dst, 0, len1);
 164     }
 165 
 166     /** Return all bytes of a given array in Utf8 representation
 167      *  as a string.
 168      *  @param src        The array holding the bytes.
 169      */
 170     public static String utf2string(byte[] src) {
 171         return utf2string(src, 0, src.length);
 172     }
 173 
 174     /** Copy characters in source array to bytes in target array,
 175      *  converting them to Utf8 representation.
 176      *  The target array must be large enough to hold the result.
 177      *  returns first index in `dst' past the last copied byte.
 178      *  @param src        The array holding the characters to convert.
 179      *  @param sindex     The start index from which characters are converted.
 180      *  @param dst        The array holding the converted characters..
 181      *  @param dindex     The start index from which converted bytes
 182      *                    are written.
 183      *  @param len        The maximum number of characters to convert.
 184      */
 185     public static int chars2utf(char[] src, int sindex,
 186                                 byte[] dst, int dindex,
 187                                 int len) {
 188         int j = dindex;
 189         int limit = sindex + len;
 190         for (int i = sindex; i < limit; i++) {
 191             char ch = src[i];
 192             if (1 <= ch && ch <= 0x7F) {
 193                 dst[j++] = (byte)ch;
 194             } else if (ch <= 0x7FF) {
 195                 dst[j++] = (byte)(0xC0 | (ch >> 6));
 196                 dst[j++] = (byte)(0x80 | (ch & 0x3F));
 197             } else {
 198                 dst[j++] = (byte)(0xE0 | (ch >> 12));
 199                 dst[j++] = (byte)(0x80 | ((ch >> 6) & 0x3F));
 200                 dst[j++] = (byte)(0x80 | (ch & 0x3F));
 201             }
 202         }
 203         return j;
 204     }
 205 
 206     /** Return characters as an array of bytes in Utf8 representation.
 207      *  @param src        The array holding the characters.
 208      *  @param sindex     The start index from which characters are converted.
 209      *  @param len        The maximum number of characters to convert.
 210      */
 211     public static byte[] chars2utf(char[] src, int sindex, int len) {
 212         byte[] dst = new byte[len * 3];
 213         int len1 = chars2utf(src, sindex, dst, 0, len);
 214         byte[] result = new byte[len1];
 215         System.arraycopy(dst, 0, result, 0, len1);
 216         return result;
 217     }
 218 
 219     /** Return all characters in given array as an array of bytes
 220      *  in Utf8 representation.
 221      *  @param src        The array holding the characters.
 222      */
 223     public static byte[] chars2utf(char[] src) {
 224         return chars2utf(src, 0, src.length);
 225     }
 226 
 227     /** Return string as an array of bytes in in Utf8 representation.
 228      */
 229     public static byte[] string2utf(String s) {
 230         return chars2utf(s.toCharArray());
 231     }
 232 
 233     /**
 234      * Escapes each character in a string that has an escape sequence or
 235      * is non-printable ASCII.  Leaves non-ASCII characters alone.
 236      */
 237     public static String quote(String s) {
 238         StringBuilder buf = new StringBuilder();
 239         for (int i = 0; i < s.length(); i++) {
 240             buf.append(quote(s.charAt(i)));
 241         }
 242         return buf.toString();
 243     }
 244 
 245     /**
 246      * Escapes a character if it has an escape sequence or is
 247      * non-printable ASCII.  Leaves non-ASCII characters alone.
 248      */
 249     public static String quote(char ch) {
 250         switch (ch) {
 251         case '\b':  return "\\b";
 252         case '\f':  return "\\f";
 253         case '\n':  return "\\n";
 254         case '\r':  return "\\r";
 255         case '\t':  return "\\t";
 256         case '\'':  return "\\'";
 257         case '\"':  return "\\\"";
 258         case '\\':  return "\\\\";
 259         default:
 260             return (isPrintableAscii(ch))
 261                 ? String.valueOf(ch)
 262                 : String.format("\\u%04x", (int) ch);
 263         }
 264     }
 265 
 266     /**
 267      * Is a character printable ASCII?
 268      */
 269     private static boolean isPrintableAscii(char ch) {
 270         return ch >= ' ' && ch <= '~';
 271     }
 272 
 273     /** Escape all unicode characters in string.
 274      */
 275     public static String escapeUnicode(String s) {
 276         int len = s.length();
 277         int i = 0;
 278         while (i < len) {
 279             char ch = s.charAt(i);
 280             if (ch > 255) {
 281                 StringBuilder buf = new StringBuilder();
 282                 buf.append(s.substring(0, i));
 283                 while (i < len) {
 284                     ch = s.charAt(i);
 285                     if (ch > 255) {
 286                         buf.append("\\u");
 287                         buf.append(Character.forDigit((ch >> 12) % 16, 16));
 288                         buf.append(Character.forDigit((ch >>  8) % 16, 16));
 289                         buf.append(Character.forDigit((ch >>  4) % 16, 16));
 290                         buf.append(Character.forDigit((ch      ) % 16, 16));
 291                     } else {
 292                         buf.append(ch);
 293                     }
 294                     i++;
 295                 }
 296                 s = buf.toString();
 297             } else {
 298                 i++;
 299             }
 300         }
 301         return s;
 302     }
 303 
 304 /* Conversion routines for qualified name splitting
 305  */
 306     /** Return the last part of a qualified name.
 307      *  @param name the qualified name
 308      *  @return the last part of the qualified name
 309      */
 310     public static Name shortName(Name name) {
 311         int start = name.lastIndexOf((byte)'.') + 1;
 312         int end = name.getByteLength();
 313         if (start == 0 && end == name.length()) {
 314             return name;
 315         }
 316         return name.subName(
 317             name.lastIndexOf((byte)'.') + 1, name.getByteLength());
 318     }
 319 
 320     /** Return the last part of a qualified name from its string representation
 321      *  @param name the string representation of the qualified name
 322      *  @return the last part of the qualified name
 323      */
 324     public static String shortName(String name) {
 325         return name.substring(name.lastIndexOf('.') + 1);
 326     }
 327 
 328     /** Return the package name of a class name, excluding the trailing '.',
 329      *  "" if not existent.
 330      */
 331     public static Name packagePart(Name classname) {
 332         return classname.subName(0, classname.lastIndexOf((byte)'.'));
 333     }
 334 
 335     public static String packagePart(String classname) {
 336         int lastDot = classname.lastIndexOf('.');
 337         return (lastDot < 0 ? "" : classname.substring(0, lastDot));
 338     }
 339 
 340     public static List<Name> enclosingCandidates(Name name) {
 341         List<Name> names = List.nil();
 342         int index;
 343         while ((index = name.lastIndexOf((byte)'$')) > 0) {
 344             name = name.subName(0, index);
 345             names = names.prepend(name);
 346         }
 347         return names;
 348     }
 349 
 350     public static List<Name> classCandidates(Name name) {
 351         List<Name> names = List.nil();
 352         String nameStr = name.toString();
 353         int index = -1;
 354         while ((index = nameStr.indexOf('.', index + 1)) > 0) {
 355             String pack = nameStr.substring(0, index + 1);
 356             String clz = nameStr.substring(index + 1).replace('.', '$');
 357             names = names.prepend(name.table.names.fromString(pack + clz));
 358         }
 359         return names.reverse();
 360     }
 361 }