# HG changeset patch # User jlaskey # Date 1537450919 10800 # Thu Sep 20 10:41:59 2018 -0300 # Node ID 4c7d7445b56b4701ac1c01bf82c662ccd7ed128e # Parent ccea318862aef53cb145933cd11a78845b2da353 8202442: String::unescape Reviewed-by: smarks, rriggs, sherman diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -2972,6 +2972,194 @@ } /** + * Translates all Unicode escapes and escape sequences in this string into + * characters represented by those escapes specified in sections 3.3 and + * 3.10.6 of the The Java™ Language Specification. + *

+ * Each unicode escape in the form \unnnn is translated to a + * 16-bit 'char' value. + *

+ * Backslash escape sequences are translated as follows; + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Escape sequences
EscapeNameUnicode
{@code \b}backspaceU+0008
{@code \t}horizontal tabU+0009
{@code \n}line feedU+000A
{@code \f}form feedU+000C
{@code \r}carriage returnU+000D
{@code \"}double quoteU+0022
{@code \'}single quoteU+0027
{@code \\}backslashU+005C
+ *

+ * Octal escapes {@code \0 - \377} are translated to their code + * point equivalents. + * + * @return String with all escapes translated. + * + * @throws IllegalArgumentException when the escape sequence does + * not conform to JLS 3.3 or 3.10.6. + * + * @jls 3.3 Unicode escapes + * @jls 3.10.6 Escape sequences + * + * @since 12 + * + * @deprecated Preview feature associated with Raw String Literals. + */ + @Deprecated(forRemoval=true, since="12") + public String unescape() { + UnicodeReader reader = new UnicodeReader(); + int length = reader.length(); + char[] chars = new char[length]; + int to = 0; + while (reader.hasNext()) { + char ch = reader.next(); + if (ch == '\\') { + if (!reader.hasNext()) { + reader.error("truncated escape sequence"); + } + ch = reader.next(); + switch (ch) { + case 'b': + ch = '\b'; + break; + case 'f': + ch = '\f'; + break; + case 'n': + ch = '\n'; + break; + case 'r': + ch = '\r'; + break; + case 't': + ch = '\t'; + break; + case '\\': + ch = '\\'; + break; + case '\'': + ch = '\''; + break; + case '\"': + ch = '\"'; + break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + int code = ch - '0'; + for (int i = 0; i < 2 && reader.hasNext(); i++) { + int digit = Character.digit(reader.next(), 8); + if (digit < 0) { + reader.pushBack(); + break; + } + code = code << 3 | digit; + } + if (0377 < code) { + reader.error("octal escape sequence value is too large"); + } + ch = (char)code; + break; + default: + reader.error("unrecognized escape sequence"); + } + } + chars[to++] = ch; + } + return new String(chars, 0, to); + } + + private class UnicodeReader { + final int length; + int from; + int prev; + + UnicodeReader() { + this.length = String.this.length(); + this.from = 0; + this.prev = 0; + } + + int length() { + return length; + } + + boolean hasNext() { + return from < length; + } + + char next() { + prev = from; + char next = charAt(from++); + if (next != '\\' || (hasNext() && charAt(from) != 'u')) { + return next; + } + do { + next = charAt(from++); + } while (next == 'u' && hasNext()); + if (length <= from + 2) { + error("unicode escape sequence truncated at end of string"); + } + int code = (Character.digit(next , 16) << 12) | + (Character.digit(charAt(from++), 16) << 8) | + (Character.digit(charAt(from++), 16) << 4) | + Character.digit(charAt(from++), 16); + if (code < 0) { + error("unicode escape sequence contains non hexadecimal digits"); + } + return (char)code; + } + + void pushBack() { + from = prev; + } + + void error(String message) { + throw new IllegalArgumentException(message + ", pos = " + (from - 1)); + } + } + + /** * This object (which is already a string!) is itself returned. * * @return the string itself. diff --git a/test/jdk/java/lang/String/Unescape.java b/test/jdk/java/lang/String/Unescape.java new file mode 100644 --- /dev/null +++ b/test/jdk/java/lang/String/Unescape.java @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @summary Unit tests for String#unescape + * @compile --enable-preview -source 12 -encoding utf8 Unescape.java + * @run main/othervm --enable-preview Unescape + */ + +@SuppressWarnings( "deprecation" ) +public class Unescape { + public static void main(String[] args) { + System.err.println("HERE:" + (`a\tb\u0063`) + ":"); + test1(); + test2(); + } + + /* + * Test unescaping functionality. + */ + static void test1() { + equal(`a\tb\u0063`, "a\\tb\\u0063"); + equal(`a\tb\u0063`.unescape(), "a\tbc"); + equal(`a\tb\u2022`, "a\\tb\\u2022"); + equal(`a\tb\u2022`.unescape(), "a\tb\u2022"); + equal(`\0\12\012`.unescape(), "\0\12\012"); + equal(`•\0\12\012`.unescape(), "\u2022\0\12\012"); + + equal(`\b`.unescape(), "\b"); + equal(`\f`.unescape(), "\f"); + equal(`\n`.unescape(), "\n"); + equal(`\r`.unescape(), "\r"); + equal(`\t`.unescape(), "\t"); + equal(`\0`.unescape(), "\0"); + equal(`\7`.unescape(), "\7"); + equal(`\12`.unescape(), "\12"); + equal(`\012`.unescape(), "\012"); + equal(`\u0000`.unescape(), "\u0000"); + equal(`\u2022`.unescape(), "\u2022"); + equal(`•\b`.unescape(), "•\b"); + equal(`•\f`.unescape(), "•\f"); + equal(`•\n`.unescape(), "•\n"); + equal(`•\r`.unescape(), "•\r"); + equal(`•\t`.unescape(), "•\t"); + equal(`•\0`.unescape(), "•\0"); + equal(`•\7`.unescape(), "•\7"); + equal(`•\12`.unescape(), "•\12"); + equal(`•\177`.unescape(), "•\177"); + equal(`•\u0000`.unescape(), "•\u0000"); + equal(`•\u2022`.unescape(), "•\u2022"); + + equal(`\u005c\u0072`.unescape(), "\r"); + } + + /* + * Test for IllegalArgumentException. + */ + static void test2() { + wellFormed(`\b`); + wellFormed(`\f`); + wellFormed(`\n`); + wellFormed(`\r`); + wellFormed(`\t`); + wellFormed(`\0`); + wellFormed(`\7`); + wellFormed(`\12`); + wellFormed(`\012`); + wellFormed(`\u0000`); + wellFormed(`\u2022`); + wellFormed(`•\b`); + wellFormed(`•\f`); + wellFormed(`•\n`); + wellFormed(`•\r`); + wellFormed(`•\t`); + wellFormed(`•\0`); + wellFormed(`•\7`); + wellFormed(`•\12`); + wellFormed(`•\012`); + wellFormed(`•\u0000`); + wellFormed(`•\u2022`); + + malformed(`\x`); + malformed(`\+`); + malformed(`\u`); + malformed(`\uuuuu`); + malformed(`\u2`); + malformed(`\u20`); + malformed(`\u202`); + malformed(`\u2 `); + malformed(`\u20 `); + malformed(`\u202 `); + malformed(`\uuuuu2`); + malformed(`\uuuuu20`); + malformed(`\uuuuu202`); + malformed(`\uuuuu2 `); + malformed(`\uuuuu20 `); + malformed(`\uuuuu202 `); + malformed(`\uG`); + malformed(`\u2G`); + malformed(`\u20G`); + malformed(`\uG `); + malformed(`\u2G `); + malformed(`\u20G `); + malformed(`\uuuuuG`); + malformed(`\uuuuu2G`); + malformed(`\uuuuu20G`); + malformed(`\uuuuuG `); + malformed(`\uuuuu2G `); + malformed(`\uuuuu20G `); + + malformed(`•\x`); + malformed(`•\+`); + malformed(`•\u`); + malformed(`•\uuuuu`); + malformed(`•\u2`); + malformed(`•\u20`); + malformed(`•\u202`); + malformed(`•\u2 `); + malformed(`•\u20 `); + malformed(`•\u202 `); + malformed(`•\uuuuu2`); + malformed(`•\uuuuu20`); + malformed(`•\uuuuu202`); + malformed(`•\uuuuu2 `); + malformed(`•\uuuuu20 `); + malformed(`•\uuuuu202 `); + malformed(`•\uG`); + malformed(`•\u2G`); + malformed(`•\u20G`); + malformed(`•\uG `); + malformed(`•\u2G `); + malformed(`•\u20G `); + malformed(`•\uuuuuG`); + malformed(`•\uuuuu2G`); + malformed(`•\uuuuu20G`); + malformed(`•\uuuuuG `); + malformed(`•\uuuuu2G `); + malformed(`•\uuuuu20G `); + } + + /* + * Report difference in result. + */ + static void report(String message, String inputTag, String input, + String outputTag, String output) { + System.err.println(message); + System.err.println(); + System.err.println(inputTag); + System.err.println(input.replaceAll(" ", ".")); + System.err.println(); + System.err.println(outputTag); + System.err.println(output.replaceAll(" ", ".")); + throw new RuntimeException(); + } + + /* + * Raise an exception if the two inputs are not equivalent. + */ + static void equal(String input, String expected) { + if (input == null || expected == null || !expected.equals(input)) { + report("Failed equal", "Input:", input, "Expected:", expected); + } + } + + /* + * Raise an exception if the string contains a malformed escape. + */ + static void wellFormed(String rawString) { + try { + rawString.unescape(); + } catch (IllegalArgumentException ex) { + System.err.println("Failed wellFormed"); + System.err.println(rawString); + throw new RuntimeException(); + } + } + + /* + * Raise an exception if the string does not contain a malformed escape. + */ + static void malformed(String rawString) { + try { + rawString.unescape(); + System.err.println("Failed malformed"); + System.err.println(rawString); + throw new RuntimeException(); + } catch (IllegalArgumentException ex) { + // incorrectly formed escapes + } + } +}