--- old/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/linker/NameCodec.java 2020-04-15 18:51:07.000000000 +0530 +++ /dev/null 2020-04-15 18:51:07.000000000 +0530 @@ -1,432 +0,0 @@ -/* - * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package jdk.nashorn.internal.runtime.linker; - -/** - *

- * Implements the name mangling and demangling as specified by John Rose's - * "Symbolic Freedom in the VM" article. Normally, you would - * mangle the names in the call sites as you're generating bytecode, and then - * demangle them when you receive them in bootstrap methods. - *

- *

- * This code is derived from sun.invoke.util.BytecodeName. Apart from subsetting that - * class, we don't want to create dependency between non-exported package from java.base - * to nashorn module. - *

- * - *

Comment from BytecodeName class reproduced here:

- * - * Includes universal mangling rules for the JVM. - * - *

Avoiding Dangerous Characters

- * - *

- * The JVM defines a very small set of characters which are illegal - * in name spellings. We will slightly extend and regularize this set - * into a group of dangerous characters. - * These characters will then be replaced, in mangled names, by escape sequences. - * In addition, accidental escape sequences must be further escaped. - * Finally, a special prefix will be applied if and only if - * the mangling would otherwise fail to begin with the escape character. - * This happens to cover the corner case of the null string, - * and also clearly marks symbols which need demangling. - *

- *

- * Dangerous characters are the union of all characters forbidden - * or otherwise restricted by the JVM specification, - * plus their mates, if they are brackets - * ([ and ], - * < and >), - * plus, arbitrarily, the colon character :. - * There is no distinction between type, method, and field names. - * This makes it easier to convert between mangled names of different - * types, since they do not need to be decoded (demangled). - *

- *

- * The escape character is backslash \ - * (also known as reverse solidus). - * This character is, until now, unheard of in bytecode names, - * but traditional in the proposed role. - * - *

- *

Replacement Characters

- * - * - *

- * Every escape sequence is two characters - * (in fact, two UTF8 bytes) beginning with - * the escape character and followed by a - * replacement character. - * (Since the replacement character is never a backslash, - * iterated manglings do not double in size.) - *

- *

- * Each dangerous character has some rough visual similarity - * to its corresponding replacement character. - * This makes mangled symbols easier to recognize by sight. - *

- *

- * The dangerous characters are - * / (forward slash, used to delimit package components), - * . (dot, also a package delimiter), - * ; (semicolon, used in signatures), - * $ (dollar, used in inner classes and synthetic members), - * < (left angle), - * > (right angle), - * [ (left square bracket, used in array types), - * ] (right square bracket, reserved in this scheme for language use), - * and : (colon, reserved in this scheme for language use). - * Their replacements are, respectively, - * | (vertical bar), - * , (comma), - * ? (question mark), - * % (percent), - * ^ (caret), - * _ (underscore), and - * { (left curly bracket), - * } (right curly bracket), - * ! (exclamation mark). - * In addition, the replacement character for the escape character itself is - * - (hyphen), - * and the replacement character for the null prefix is - * = (equal sign). - *

- *

- * An escape character \ - * followed by any of these replacement characters - * is an escape sequence, and there are no other escape sequences. - * An equal sign is only part of an escape sequence - * if it is the second character in the whole string, following a backslash. - * Two consecutive backslashes do not form an escape sequence. - *

- *

- * Each escape sequence replaces a so-called original character - * which is either one of the dangerous characters or the escape character. - * A null prefix replaces an initial null string, not a character. - *

- *

- * All this implies that escape sequences cannot overlap and may be - * determined all at once for a whole string. Note that a spelling - * string can contain accidental escapes, apparent escape - * sequences which must not be interpreted as manglings. - * These are disabled by replacing their leading backslash with an - * escape sequence (\-). To mangle a string, three logical steps - * are required, though they may be carried out in one pass: - *

- *
    - *
  1. In each accidental escape, replace the backslash with an escape sequence - * (\-).
  2. - *
  3. Replace each dangerous character with an escape sequence - * (\| for /, etc.).
  4. - *
  5. If the first two steps introduced any change, and - * if the string does not already begin with a backslash, prepend a null prefix (\=).
  6. - *
- * - * To demangle a mangled string that begins with an escape, - * remove any null prefix, and then replace (in parallel) - * each escape sequence by its original character. - *

Spelling strings which contain accidental - * escapes must have them replaced, even if those - * strings do not contain dangerous characters. - * This restriction means that mangling a string always - * requires a scan of the string for escapes. - * But then, a scan would be required anyway, - * to check for dangerous characters. - * - *

- *

Nice Properties

- * - *

- * If a bytecode name does not contain any escape sequence, - * demangling is a no-op: The string demangles to itself. - * Such a string is called self-mangling. - * Almost all strings are self-mangling. - * In practice, to demangle almost any name “found in nature”, - * simply verify that it does not begin with a backslash. - *

- *

- * Mangling is a one-to-one function, while demangling - * is a many-to-one function. - * A mangled string is defined as validly mangled if - * it is in fact the unique mangling of its spelling string. - * Three examples of invalidly mangled strings are \=foo, - * \-bar, and baz\!, which demangle to foo, \bar, and - * baz\!, but then remangle to foo, \bar, and \=baz\-!. - * If a language back-end or runtime is using mangled names, - * it should never present an invalidly mangled bytecode - * name to the JVM. If the runtime encounters one, - * it should also report an error, since such an occurrence - * probably indicates a bug in name encoding which - * will lead to errors in linkage. - * However, this note does not propose that the JVM verifier - * detect invalidly mangled names. - *

- *

- * As a result of these rules, it is a simple matter to - * compute validly mangled substrings and concatenations - * of validly mangled strings, and (with a little care) - * these correspond to corresponding operations on their - * spelling strings. - *

- * - *

If languages that include non-Java symbol spellings use this - * mangling convention, they will enjoy the following advantages: - *

- * - * - * - *

Suggestions for Human Readable Presentations

- * - * - *

- * For human readable displays of symbols, - * it will be better to present a string-like quoted - * representation of the spelling, because JVM users - * are generally familiar with such tokens. - * We suggest using single or double quotes before and after - * mangled symbols which are not valid Java identifiers, - * with quotes, backslashes, and non-printing characters - * escaped as if for literals in the Java language. - *

- *

- * For example, an HTML-like spelling - * <pre> mangles to - * \^pre\_ and could - * display more cleanly as - * '<pre>', - * with the quotes included. - * Such string-like conventions are not suitable - * for mangled bytecode names, in part because - * dangerous characters must be eliminated, rather - * than just quoted. Otherwise internally structured - * strings like package prefixes and method signatures - * could not be reliably parsed. - *

- *

- * In such human-readable displays, invalidly mangled - * names should not be demangled and quoted, - * for this would be misleading. Likewise, JVM symbols - * which contain dangerous characters (like dots in field - * names or brackets in method names) should not be - * simply quoted. The bytecode names - * \=phase\,1 and - * phase.1 are distinct, - * and in demangled displays they should be presented as - * 'phase.1' and something like - * 'phase'.1, respectively. - *

- */ -public final class NameCodec { - private NameCodec() { - } - - private static final char ESCAPE_C = '\\'; - // empty escape sequence to avoid a null name or illegal prefix - private static final char NULL_ESCAPE_C = '='; - private static final String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C; - - /** - * Canonical encoding for the empty name. - */ - public static final String EMPTY_NAME = new String(new char[] { ESCAPE_C, NULL_ESCAPE_C }); - - /** - * Encodes ("mangles") an unencoded symbolic name. - * @param name the symbolic name to mangle - * @return the mangled form of the symbolic name. - */ - public static String encode(final String name) { - final String bn = mangle(name); - assert((Object)bn == name || looksMangled(bn)) : bn; - assert(name.equals(decode(bn))) : name; - return bn; - } - - /** - * Decodes ("demangles") an encoded symbolic name. - * @param name the symbolic name to demangle - * @return the demangled form of the symbolic name. - */ - public static String decode(final String name) { - String sn = name; - if (!sn.isEmpty() && looksMangled(name)) { - sn = demangle(name); - assert(name.equals(mangle(sn))) : name+" => "+sn+" => "+mangle(sn); - } - return sn; - } - - private static boolean looksMangled(final String s) { - return s.charAt(0) == ESCAPE_C; - } - - private static String mangle(final String s) { - if (s.length() == 0) - return NULL_ESCAPE; - - // build this lazily, when we first need an escape: - StringBuilder sb = null; - - for (int i = 0, slen = s.length(); i < slen; i++) { - final char c = s.charAt(i); - - boolean needEscape = false; - if (c == ESCAPE_C) { - if (i+1 < slen) { - final char c1 = s.charAt(i+1); - if ((i == 0 && c1 == NULL_ESCAPE_C) - || c1 != originalOfReplacement(c1)) { - // an accidental escape - needEscape = true; - } - } - } else { - needEscape = isDangerous(c); - } - - if (!needEscape) { - if (sb != null) sb.append(c); - continue; - } - - // build sb if this is the first escape - if (sb == null) { - sb = new StringBuilder(s.length()+10); - // mangled names must begin with a backslash: - if (s.charAt(0) != ESCAPE_C && i > 0) - sb.append(NULL_ESCAPE); - // append the string so far, which is unremarkable: - sb.append(s, 0, i); - } - - // rewrite \ to \-, / to \|, etc. - sb.append(ESCAPE_C); - sb.append(replacementOf(c)); - } - - if (sb != null) return sb.toString(); - - return s; - } - - private static String demangle(final String s) { - // build this lazily, when we first meet an escape: - StringBuilder sb = null; - - int stringStart = 0; - if (s.startsWith(NULL_ESCAPE)) - stringStart = 2; - - for (int i = stringStart, slen = s.length(); i < slen; i++) { - char c = s.charAt(i); - - if (c == ESCAPE_C && i+1 < slen) { - // might be an escape sequence - final char rc = s.charAt(i+1); - final char oc = originalOfReplacement(rc); - if (oc != rc) { - // build sb if this is the first escape - if (sb == null) { - sb = new StringBuilder(s.length()); - // append the string so far, which is unremarkable: - sb.append(s, stringStart, i); - } - ++i; // skip both characters - c = oc; - } - } - - if (sb != null) - sb.append(c); - } - - if (sb != null) return sb.toString(); - - return s.substring(stringStart); - } - - private static final String DANGEROUS_CHARS = "\\/.;:$[]<>"; // \\ must be first - private static final String REPLACEMENT_CHARS = "-|,?!%{}^_"; - private static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\ - - private static final long[] SPECIAL_BITMAP = new long[2]; // 128 bits - static { - final String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS; - for (final char c : SPECIAL.toCharArray()) { - SPECIAL_BITMAP[c >>> 6] |= 1L << c; - } - } - - private static boolean isSpecial(final char c) { - if ((c >>> 6) < SPECIAL_BITMAP.length) - return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0; - else - return false; - } - - private static char replacementOf(final char c) { - if (!isSpecial(c)) return c; - final int i = DANGEROUS_CHARS.indexOf(c); - if (i < 0) return c; - return REPLACEMENT_CHARS.charAt(i); - } - - private static char originalOfReplacement(final char c) { - if (!isSpecial(c)) return c; - final int i = REPLACEMENT_CHARS.indexOf(c); - if (i < 0) return c; - return DANGEROUS_CHARS.charAt(i); - } - - private static boolean isDangerous(final char c) { - if (!isSpecial(c)) return false; - return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX); - } -}