1 /*
   2  * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.invoke.util;
  27 
  28 /**
  29  * Utility routines for dealing with bytecode-level names.
  30  * Includes universal mangling rules for the JVM.
  31  *
  32  * <h3>Avoiding Dangerous Characters </h3>
  33  *
  34  * <p>
  35  * The JVM defines a very small set of characters which are illegal
  36  * in name spellings.  We will slightly extend and regularize this set
  37  * into a group of <cite>dangerous characters</cite>.
  38  * These characters will then be replaced, in mangled names, by escape sequences.
  39  * In addition, accidental escape sequences must be further escaped.
  40  * Finally, a special prefix will be applied if and only if
  41  * the mangling would otherwise fail to begin with the escape character.
  42  * This happens to cover the corner case of the null string,
  43  * and also clearly marks symbols which need demangling.
  44  * </p>
  45  * <p>
  46  * Dangerous characters are the union of all characters forbidden
  47  * or otherwise restricted by the JVM specification,
  48  * plus their mates, if they are brackets
  49  * (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
  50  * <code><big><b>&lt;</b></big></code> and <code><big><b>&gt;</b></big></code>),
  51  * plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
  52  * There is no distinction between type, method, and field names.
  53  * This makes it easier to convert between mangled names of different
  54  * types, since they do not need to be decoded (demangled).
  55  * </p>
  56  * <p>
  57  * The escape character is backslash <code><big><b>\</b></big></code>
  58  * (also known as reverse solidus).
  59  * This character is, until now, unheard of in bytecode names,
  60  * but traditional in the proposed role.
  61  *
  62  * </p>
  63  * <h3> Replacement Characters </h3>
  64  *
  65  *
  66  * <p>
  67  * Every escape sequence is two characters
  68  * (in fact, two UTF8 bytes) beginning with
  69  * the escape character and followed by a
  70  * <cite>replacement character</cite>.
  71  * (Since the replacement character is never a backslash,
  72  * iterated manglings do not double in size.)
  73  * </p>
  74  * <p>
  75  * Each dangerous character has some rough visual similarity
  76  * to its corresponding replacement character.
  77  * This makes mangled symbols easier to recognize by sight.
  78  * </p>
  79  * <p>
  80  * The dangerous characters are
  81  * <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
  82  * <code><big><b>.</b></big></code> (dot, also a package delimiter),
  83  * <code><big><b>;</b></big></code> (semicolon, used in signatures),
  84  * <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
  85  * <code><big><b>&lt;</b></big></code> (left angle),
  86  * <code><big><b>&gt;</b></big></code> (right angle),
  87  * <code><big><b>[</b></big></code> (left square bracket, used in array types),
  88  * <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
  89  * and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
  90  * Their replacements are, respectively,
  91  * <code><big><b>|</b></big></code> (vertical bar),
  92  * <code><big><b>,</b></big></code> (comma),
  93  * <code><big><b>?</b></big></code> (question mark),
  94  * <code><big><b>%</b></big></code> (percent),
  95  * <code><big><b>^</b></big></code> (caret),
  96  * <code><big><b>_</b></big></code> (underscore), and
  97  * <code><big><b>{</b></big></code> (left curly bracket),
  98  * <code><big><b>}</b></big></code> (right curly bracket),
  99  * <code><big><b>!</b></big></code> (exclamation mark).
 100  * In addition, the replacement character for the escape character itself is
 101  * <code><big><b>-</b></big></code> (hyphen),
 102  * and the replacement character for the null prefix is
 103  * <code><big><b>=</b></big></code> (equal sign).
 104  * </p>
 105  * <p>
 106  * An escape character <code><big><b>\</b></big></code>
 107  * followed by any of these replacement characters
 108  * is an escape sequence, and there are no other escape sequences.
 109  * An equal sign is only part of an escape sequence
 110  * if it is the second character in the whole string, following a backslash.
 111  * Two consecutive backslashes do <em>not</em> form an escape sequence.
 112  * </p>
 113  * <p>
 114  * Each escape sequence replaces a so-called <cite>original character</cite>
 115  * which is either one of the dangerous characters or the escape character.
 116  * A null prefix replaces an initial null string, not a character.
 117  * </p>
 118  * <p>
 119  * All this implies that escape sequences cannot overlap and may be
 120  * determined all at once for a whole string.  Note that a spelling
 121  * string can contain <cite>accidental escapes</cite>, apparent escape
 122  * sequences which must not be interpreted as manglings.
 123  * These are disabled by replacing their leading backslash with an
 124  * escape sequence (<code><big><b>\-</b></big></code>).  To mangle a string, three logical steps
 125  * are required, though they may be carried out in one pass:
 126  * </p>
 127  * <ol>
 128  *   <li>In each accidental escape, replace the backslash with an escape sequence
 129  * (<code><big><b>\-</b></big></code>).</li>
 130  *   <li>Replace each dangerous character with an escape sequence
 131  * (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
 132  *   <li>If the first two steps introduced any change, <em>and</em>
 133  * if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
 134  * </ol>
 135  *
 136  * To demangle a mangled string that begins with an escape,
 137  * remove any null prefix, and then replace (in parallel)
 138  * each escape sequence by its original character.
 139  * <p>Spelling strings which contain accidental
 140  * escapes <em>must</em> have them replaced, even if those
 141  * strings do not contain dangerous characters.
 142  * This restriction means that mangling a string always
 143  * requires a scan of the string for escapes.
 144  * But then, a scan would be required anyway,
 145  * to check for dangerous characters.
 146  *
 147  * </p>
 148  * <h3> Nice Properties </h3>
 149  *
 150  * <p>
 151  * If a bytecode name does not contain any escape sequence,
 152  * demangling is a no-op:  The string demangles to itself.
 153  * Such a string is called <cite>self-mangling</cite>.
 154  * Almost all strings are self-mangling.
 155  * In practice, to demangle almost any name &ldquo;found in nature&rdquo;,
 156  * simply verify that it does not begin with a backslash.
 157  * </p>
 158  * <p>
 159  * Mangling is a one-to-one function, while demangling
 160  * is a many-to-one function.
 161  * A mangled string is defined as <cite>validly mangled</cite> if
 162  * it is in fact the unique mangling of its spelling string.
 163  * Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
 164  * <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
 165  * <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
 166  * If a language back-end or runtime is using mangled names,
 167  * it should never present an invalidly mangled bytecode
 168  * name to the JVM.  If the runtime encounters one,
 169  * it should also report an error, since such an occurrence
 170  * probably indicates a bug in name encoding which
 171  * will lead to errors in linkage.
 172  * However, this note does not propose that the JVM verifier
 173  * detect invalidly mangled names.
 174  * </p>
 175  * <p>
 176  * As a result of these rules, it is a simple matter to
 177  * compute validly mangled substrings and concatenations
 178  * of validly mangled strings, and (with a little care)
 179  * these correspond to corresponding operations on their
 180  * spelling strings.
 181  * </p>
 182  * <ul>
 183  *   <li>Any prefix of a validly mangled string is also validly mangled,
 184  * although a null prefix may need to be removed.</li>
 185  *   <li>Any suffix of a validly mangled string is also validly mangled,
 186  * although a null prefix may need to be added.</li>
 187  *   <li>Two validly mangled strings, when concatenated,
 188  * are also validly mangled, although any null prefix
 189  * must be removed from the second string,
 190  * and a trailing backslash on the first string may need escaping,
 191  * if it would participate in an accidental escape when followed
 192  * by the first character of the second string.</li>
 193  * </ul>
 194  * <p>If languages that include non-Java symbol spellings use this
 195  * mangling convention, they will enjoy the following advantages:
 196  * </p>
 197  * <ul>
 198  *   <li>They can interoperate via symbols they share in common.</li>
 199  *   <li>Low-level tools, such as backtrace printers, will have readable displays.</li>
 200  *   <li>Future JVM and language extensions can safely use the dangerous characters
 201  * for structuring symbols, but will never interfere with valid spellings.</li>
 202  *   <li>Runtimes and compilers can use standard libraries for mangling and demangling.</li>
 203  *   <li>Occasional transliterations and name composition will be simple and regular,
 204  * for classes, methods, and fields.</li>
 205  *   <li>Bytecode names will continue to be compact.
 206  * When mangled, spellings will at most double in length, either in
 207  * UTF8 or UTF16 format, and most will not change at all.</li>
 208  * </ul>
 209  *
 210  *
 211  * <h3> Suggestions for Human Readable Presentations </h3>
 212  *
 213  *
 214  * <p>
 215  * For human readable displays of symbols,
 216  * it will be better to present a string-like quoted
 217  * representation of the spelling, because JVM users
 218  * are generally familiar with such tokens.
 219  * We suggest using single or double quotes before and after
 220  * mangled symbols which are not valid Java identifiers,
 221  * with quotes, backslashes, and non-printing characters
 222  * escaped as if for literals in the Java language.
 223  * </p>
 224  * <p>
 225  * For example, an HTML-like spelling
 226  * <code><big><b>&lt;pre&gt;</b></big></code> mangles to
 227  * <code><big><b>\^pre\_</b></big></code> and could
 228  * display more cleanly as
 229  * <code><big><b>'&lt;pre&gt;'</b></big></code>,
 230  * with the quotes included.
 231  * Such string-like conventions are <em>not</em> suitable
 232  * for mangled bytecode names, in part because
 233  * dangerous characters must be eliminated, rather
 234  * than just quoted.  Otherwise internally structured
 235  * strings like package prefixes and method signatures
 236  * could not be reliably parsed.
 237  * </p>
 238  * <p>
 239  * In such human-readable displays, invalidly mangled
 240  * names should <em>not</em> be demangled and quoted,
 241  * for this would be misleading.  Likewise, JVM symbols
 242  * which contain dangerous characters (like dots in field
 243  * names or brackets in method names) should not be
 244  * simply quoted.  The bytecode names
 245  * <code><big><b>\=phase\,1</b></big></code> and
 246  * <code><big><b>phase.1</b></big></code> are distinct,
 247  * and in demangled displays they should be presented as
 248  * <code><big><b>'phase.1'</b></big></code> and something like
 249  * <code><big><b>'phase'.1</b></big></code>, respectively.
 250  * </p>
 251  *
 252  * @author John Rose
 253  * @version 1.2, 02/06/2008
 254  * @see http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
 255  */
 256 public class BytecodeName {
 257     private BytecodeName() { }  // static only class
 258 
 259     /** Given a source name, produce the corresponding bytecode name.
 260      * The source name should not be qualified, because any syntactic
 261      * markers (dots, slashes, dollar signs, colons, etc.) will be mangled.
 262      * @param s the source name
 263      * @return a valid bytecode name which represents the source name
 264      */
 265     public static String toBytecodeName(String s) {
 266         String bn = mangle(s);
 267         assert((Object)bn == s || looksMangled(bn)) : bn;
 268         assert(s.equals(toSourceName(bn))) : s;
 269         return bn;
 270     }
 271 
 272     /** Given an unqualified bytecode name, produce the corresponding source name.
 273      * The bytecode name must not contain dangerous characters.
 274      * In particular, it must not be qualified or segmented by colon {@code ':'}.
 275      * @param s the bytecode name
 276      * @return the source name, which may possibly have unsafe characters
 277      * @throws IllegalArgumentException if the bytecode name is not {@link #isSafeBytecodeName safe}
 278      * @see #isSafeBytecodeName(java.lang.String)
 279      */
 280     public static String toSourceName(String s) {
 281         checkSafeBytecodeName(s);
 282         String sn = s;
 283         if (looksMangled(s)) {
 284             sn = demangle(s);
 285             assert(s.equals(mangle(sn))) : s+" => "+sn+" => "+mangle(sn);
 286         }
 287         return sn;
 288     }
 289 
 290     /**
 291      * Given a bytecode name from a classfile, separate it into
 292      * components delimited by dangerous characters.
 293      * Each resulting array element will be either a dangerous character,
 294      * or else a safe bytecode name.
 295      * (The safe name might possibly be mangled to hide further dangerous characters.)
 296      * For example, the qualified class name {@code java/lang/String}
 297      * will be parsed into the array {@code {"java", '/', "lang", '/', "String"}}.
 298      * The name {@code <init>} will be parsed into {@code {'<', "init", '>'}}.
 299      * The name {@code foo/bar$:baz} will be parsed into
 300      * {@code {"foo", '/', "bar", '$', ':', "baz"}}.
 301      * The name {@code ::\=:foo:\=bar\!baz} will be parsed into
 302      * {@code {':', ':', "", ':', "foo", ':', "bar:baz"}}.
 303      */
 304     public static Object[] parseBytecodeName(String s) {
 305         int slen = s.length();
 306         Object[] res = null;
 307         for (int pass = 0; pass <= 1; pass++) {
 308             int fillp = 0;
 309             int lasti = 0;
 310             for (int i = 0; i <= slen; i++) {
 311                 int whichDC = -1;
 312                 if (i < slen) {
 313                     whichDC = DANGEROUS_CHARS.indexOf(s.charAt(i));
 314                     if (whichDC < DANGEROUS_CHAR_FIRST_INDEX)  continue;
 315                 }
 316                 // got to end of string or next dangerous char
 317                 if (lasti < i) {
 318                     // normal component
 319                     if (pass != 0)
 320                         res[fillp] = toSourceName(s.substring(lasti, i));
 321                     fillp++;
 322                     lasti = i+1;
 323                 }
 324                 if (whichDC >= DANGEROUS_CHAR_FIRST_INDEX) {
 325                     if (pass != 0)
 326                         res[fillp] = DANGEROUS_CHARS_CA[whichDC];
 327                     fillp++;
 328                     lasti = i+1;
 329                 }
 330             }
 331             if (pass != 0)  break;
 332             // between passes, build the result array
 333             res = new Object[fillp];
 334             if (fillp <= 1 && lasti == 0) {
 335                 if (fillp != 0)  res[0] = toSourceName(s);
 336                 break;
 337             }
 338         }
 339         return res;
 340     }
 341 
 342     /**
 343      * Given a series of components, create a bytecode name for a classfile.
 344      * This is the inverse of {@link #parseBytecodeName(java.lang.String)}.
 345      * Each component must either be an interned one-character string of
 346      * a dangerous character, or else a safe bytecode name.
 347      * @param components a series of name components
 348      * @return the concatenation of all components
 349      * @throws IllegalArgumentException if any component contains an unsafe
 350      *          character, and is not an interned one-character string
 351      * @throws NullPointerException if any component is null
 352      */
 353     public static String unparseBytecodeName(Object[] components) {
 354         Object[] components0 = components;
 355         for (int i = 0; i < components.length; i++) {
 356             Object c = components[i];
 357             if (c instanceof String) {
 358                 String mc = toBytecodeName((String) c);
 359                 if (i == 0 && components.length == 1)
 360                     return mc;  // usual case
 361                 if ((Object)mc != c) {
 362                     if (components == components0)
 363                         components = components.clone();
 364                     components[i] = c = mc;
 365                 }
 366             }
 367         }
 368         return appendAll(components);
 369     }
 370     private static String appendAll(Object[] components) {
 371         if (components.length <= 1) {
 372             if (components.length == 1) {
 373                 return String.valueOf(components[0]);
 374             }
 375             return "";
 376         }
 377         int slen = 0;
 378         for (Object c : components) {
 379             if (c instanceof String)
 380                 slen += String.valueOf(c).length();
 381             else
 382                 slen += 1;
 383         }
 384         StringBuilder sb = new StringBuilder(slen);
 385         for (Object c : components) {
 386             sb.append(c);
 387         }
 388         return sb.toString();
 389     }
 390 
 391     /**
 392      * Given a bytecode name, produce the corresponding display name.
 393      * This is the source name, plus quotes if needed.
 394      * If the bytecode name contains dangerous characters,
 395      * assume that they are being used as punctuation,
 396      * and pass them through unchanged.
 397      * Non-empty runs of non-dangerous characters are demangled
 398      * if necessary, and the resulting names are quoted if
 399      * they are not already valid Java identifiers, or if
 400      * they contain a dangerous character (i.e., dollar sign "$").
 401      * Single quotes are used when quoting.
 402      * Within quoted names, embedded single quotes and backslashes
 403      * are further escaped by prepended backslashes.
 404      *
 405      * @param s the original bytecode name (which may be qualified)
 406      * @return a human-readable presentation
 407      */
 408     public static String toDisplayName(String s) {
 409         Object[] components = parseBytecodeName(s);
 410         for (int i = 0; i < components.length; i++) {
 411             if (!(components[i] instanceof String))
 412                 continue;
 413             String sn = (String) components[i];
 414             // note that the name is already demangled!
 415             //sn = toSourceName(sn);
 416             if (!isJavaIdent(sn) || sn.indexOf('$') >=0 ) {
 417                 components[i] = quoteDisplay(sn);
 418             }
 419         }
 420         return appendAll(components);
 421     }
 422     private static boolean isJavaIdent(String s) {
 423         int slen = s.length();
 424         if (slen == 0)  return false;
 425         if (!Character.isJavaIdentifierStart(s.charAt(0)))
 426             return false;
 427         for (int i = 1; i < slen; i++) {
 428             if (!Character.isJavaIdentifierPart(s.charAt(i)))
 429                 return false;
 430         }
 431         return true;
 432     }
 433     private static String quoteDisplay(String s) {
 434         // TO DO:  Replace wierd characters in s by C-style escapes.
 435         return "'"+s.replaceAll("['\\\\]", "\\\\$0")+"'";
 436     }
 437 
 438     private static void checkSafeBytecodeName(String s)
 439             throws IllegalArgumentException {
 440         if (!isSafeBytecodeName(s)) {
 441             throw new IllegalArgumentException(s);
 442         }
 443     }
 444 
 445     /**
 446      * Report whether a simple name is safe as a bytecode name.
 447      * Such names are acceptable in class files as class, method, and field names.
 448      * Additionally, they are free of "dangerous" characters, even if those
 449      * characters are legal in some (or all) names in class files.
 450      * @param s the proposed bytecode name
 451      * @return true if the name is non-empty and all of its characters are safe
 452      */
 453     public static boolean isSafeBytecodeName(String s) {
 454         if (s.isEmpty())  return false;
 455         // check occurrences of each DANGEROUS char
 456         for (char xc : DANGEROUS_CHARS_A) {
 457             if (xc == ESCAPE_C)  continue;  // not really that dangerous
 458             if (s.indexOf(xc) >= 0)  return false;
 459         }
 460         return true;
 461     }
 462 
 463     /**
 464      * Report whether a character is safe in a bytecode name.
 465      * This is true of any unicode character except the following
 466      * <em>dangerous characters</em>: {@code ".;:$[]<>/"}.
 467      * @param c the proposed character
 468      * @return true if the character is safe to use in classfiles
 469      */
 470     public static boolean isSafeBytecodeChar(char c) {
 471         return DANGEROUS_CHARS.indexOf(c) < DANGEROUS_CHAR_FIRST_INDEX;
 472     }
 473 
 474     private static boolean looksMangled(String s) {
 475         return s.charAt(0) == ESCAPE_C;
 476     }
 477 
 478     private static String mangle(String s) {
 479         if (s.isEmpty())
 480             return NULL_ESCAPE;
 481 
 482         // build this lazily, when we first need an escape:
 483         StringBuilder sb = null;
 484 
 485         for (int i = 0, slen = s.length(); i < slen; i++) {
 486             char c = s.charAt(i);
 487 
 488             boolean needEscape = false;
 489             if (c == ESCAPE_C) {
 490                 if (i+1 < slen) {
 491                     char c1 = s.charAt(i+1);
 492                     if ((i == 0 && c1 == NULL_ESCAPE_C)
 493                         || c1 != originalOfReplacement(c1)) {
 494                         // an accidental escape
 495                         needEscape = true;
 496                     }
 497                 }
 498             } else {
 499                 needEscape = isDangerous(c);
 500             }
 501 
 502             if (!needEscape) {
 503                 if (sb != null)  sb.append(c);
 504                 continue;
 505             }
 506 
 507             // build sb if this is the first escape
 508             if (sb == null) {
 509                 sb = new StringBuilder(s.length()+10);
 510                 // mangled names must begin with a backslash:
 511                 if (s.charAt(0) != ESCAPE_C && i > 0)
 512                     sb.append(NULL_ESCAPE);
 513                 // append the string so far, which is unremarkable:
 514                 sb.append(s, 0, i);
 515             }
 516 
 517             // rewrite \ to \-, / to \|, etc.
 518             sb.append(ESCAPE_C);
 519             sb.append(replacementOf(c));
 520         }
 521 
 522         if (sb != null)   return sb.toString();
 523 
 524         return s;
 525     }
 526 
 527     private static String demangle(String s) {
 528         // build this lazily, when we first meet an escape:
 529         StringBuilder sb = null;
 530 
 531         int stringStart = 0;
 532         if (s.startsWith(NULL_ESCAPE))
 533             stringStart = 2;
 534 
 535         for (int i = stringStart, slen = s.length(); i < slen; i++) {
 536             char c = s.charAt(i);
 537 
 538             if (c == ESCAPE_C && i+1 < slen) {
 539                 // might be an escape sequence
 540                 char rc = s.charAt(i+1);
 541                 char oc = originalOfReplacement(rc);
 542                 if (oc != rc) {
 543                     // build sb if this is the first escape
 544                     if (sb == null) {
 545                         sb = new StringBuilder(s.length());
 546                         // append the string so far, which is unremarkable:
 547                         sb.append(s, stringStart, i);
 548                     }
 549                     ++i;  // skip both characters
 550                     c = oc;
 551                 }
 552             }
 553 
 554             if (sb != null)
 555                 sb.append(c);
 556         }
 557 
 558         if (sb != null)   return sb.toString();
 559 
 560         return s.substring(stringStart);
 561     }
 562 
 563     static char ESCAPE_C = '\\';
 564     // empty escape sequence to avoid a null name or illegal prefix
 565     static char NULL_ESCAPE_C = '=';
 566     static String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C;
 567 
 568     static final String DANGEROUS_CHARS   = "\\/.;:$[]<>"; // \\ must be first
 569     static final String REPLACEMENT_CHARS =  "-|,?!%{}^_";
 570     static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\
 571     static char[] DANGEROUS_CHARS_A   = DANGEROUS_CHARS.toCharArray();
 572     static char[] REPLACEMENT_CHARS_A = REPLACEMENT_CHARS.toCharArray();
 573     static final Character[] DANGEROUS_CHARS_CA;
 574     static {
 575         Character[] dcca = new Character[DANGEROUS_CHARS.length()];
 576         for (int i = 0; i < dcca.length; i++)
 577             dcca[i] = Character.valueOf(DANGEROUS_CHARS.charAt(i));
 578         DANGEROUS_CHARS_CA = dcca;
 579     }
 580 
 581     static final long[] SPECIAL_BITMAP = new long[2];  // 128 bits
 582     static {
 583         String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS;
 584         //System.out.println("SPECIAL = "+SPECIAL);
 585         for (char c : SPECIAL.toCharArray()) {
 586             SPECIAL_BITMAP[c >>> 6] |= 1L << c;
 587         }
 588     }
 589     static boolean isSpecial(char c) {
 590         if ((c >>> 6) < SPECIAL_BITMAP.length)
 591             return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0;
 592         else
 593             return false;
 594     }
 595     static char replacementOf(char c) {
 596         if (!isSpecial(c))  return c;
 597         int i = DANGEROUS_CHARS.indexOf(c);
 598         if (i < 0)  return c;
 599         return REPLACEMENT_CHARS.charAt(i);
 600     }
 601     static char originalOfReplacement(char c) {
 602         if (!isSpecial(c))  return c;
 603         int i = REPLACEMENT_CHARS.indexOf(c);
 604         if (i < 0)  return c;
 605         return DANGEROUS_CHARS.charAt(i);
 606     }
 607     static boolean isDangerous(char c) {
 608         if (!isSpecial(c))  return false;
 609         return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX);
 610     }
 611     static int indexOfDangerousChar(String s, int from) {
 612         for (int i = from, slen = s.length(); i < slen; i++) {
 613             if (isDangerous(s.charAt(i)))
 614                 return i;
 615         }
 616         return -1;
 617     }
 618     static int lastIndexOfDangerousChar(String s, int from) {
 619         for (int i = Math.min(from, s.length()-1); i >= 0; i--) {
 620             if (isDangerous(s.charAt(i)))
 621                 return i;
 622         }
 623         return -1;
 624     }
 625 
 626 
 627 }