New test/java/lang/String/ToLowerCase.java

   1 /*
   2  * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25     @test
  26     @bug 4217441 4533872 4900935 8020037 8032012 8041791 8042589 8054307
  27     @summary toLowerCase should lower-case Greek Sigma correctly depending
  28              on the context (final/non-final).  Also it should handle
  29              Locale specific (lt, tr, and az) lowercasings and supplementary
  30              characters correctly.
  31 */
  32 
  33 import java.util.Locale;
  34 
  35 public class ToLowerCase {
  36 
  37     public static void main(String[] args) {
  38         Locale turkish = new Locale("tr", "TR");
  39         Locale lt = new Locale("lt"); // Lithanian
  40         Locale az = new Locale("az"); // Azeri
  41 
  42         // Greek Sigma final/non-final tests
  43         test("\u03A3", Locale.US, "\u03C3");
  44         test("LAST\u03A3", Locale.US, "last\u03C2");
  45         test("MID\u03A3DLE", Locale.US, "mid\u03C3dle");
  46         test("WORD1 \u03A3 WORD3", Locale.US, "word1 \u03C3 word3");
  47         test("WORD1 LAST\u03A3 WORD3", Locale.US, "word1 last\u03C2 word3");
  48         test("WORD1 MID\u03A3DLE WORD3", Locale.US, "word1 mid\u03C3dle word3");
  49         test("\u0399\u0395\u03a3\u03a5\u03a3 \u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3", Locale.US,
  50              "\u03b9\u03b5\u03c3\u03c5\u03c2 \u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2"); // "IESUS XRISTOS"
  51 
  52         // Explicit dot above for I's and J's whenever there are more accents above (Lithanian)
  53         test("I", lt, "i");
  54         test("I\u0300", lt, "i\u0307\u0300"); // "I" followed by COMBINING GRAVE ACCENT (cc==230)
  55         test("I\u0316", lt, "i\u0316"); // "I" followed by COMBINING GRAVE ACCENT BELOW (cc!=230)
  56         test("J", lt, "j");
  57         test("J\u0300", lt, "j\u0307\u0300"); // "J" followed by COMBINING GRAVE ACCENT (cc==230)
  58         test("J\u0316", lt, "j\u0316"); // "J" followed by COMBINING GRAVE ACCENT BELOW (cc!=230)
  59         test("\u012E", lt, "\u012F");
  60         test("\u012E\u0300", lt, "\u012F\u0307\u0300"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT (cc==230)
  61         test("\u012E\u0316", lt, "\u012F\u0316"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT BELOW (cc!=230)
  62         test("\u00CC", lt, "i\u0307\u0300");
  63         test("\u00CD", lt, "i\u0307\u0301");
  64         test("\u0128", lt, "i\u0307\u0303");
  65         test("I\u0300", Locale.US, "i\u0300"); // "I" followed by COMBINING GRAVE ACCENT (cc==230)
  66         test("J\u0300", Locale.US, "j\u0300"); // "J" followed by COMBINING GRAVE ACCENT (cc==230)
  67         test("\u012E\u0300", Locale.US, "\u012F\u0300"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT (cc==230)
  68         test("\u00CC", Locale.US, "\u00EC");
  69         test("\u00CD", Locale.US, "\u00ED");
  70         test("\u0128", Locale.US, "\u0129");
  71 
  72         // I-dot tests
  73         test("\u0130", turkish, "i");
  74         test("\u0130", az, "i");
  75         test("\u0130", lt, "\u0069\u0307");
  76         test("\u0130", Locale.US, "\u0069\u0307");
  77         test("\u0130", Locale.JAPAN, "\u0069\u0307");
  78         test("\u0130", Locale.ROOT, "\u0069\u0307");
  79 
  80         // Remove dot_above in the sequence I + dot_above (Turkish and Azeri)
  81         test("I\u0307", turkish, "i");
  82         test("I\u0307", az, "i");
  83         test("J\u0307", turkish, "j\u0307");
  84         test("J\u0307", az, "j\u0307");
  85 
  86         // Unless an I is before a dot_above, it turns into a dotless i (Turkish and Azeri)
  87         test("I", turkish, "\u0131");
  88         test("I", az, "\u0131");
  89         test("I", Locale.US, "i");
  90         test("IABC", turkish, "\u0131abc");
  91         test("IABC", az, "\u0131abc");
  92         test("IABC", Locale.US, "iabc");
  93 
  94         // Supplementary character tests
  95         //
  96         // U+10400 ("\uD801\uDC00"): DESERET CAPITAL LETTER LONG I
  97         // U+10401 ("\uD801\uDC01"): DESERET CAPITAL LETTER LONG E
  98         // U+10402 ("\uD801\uDC02"): DESERET CAPITAL LETTER LONG A
  99         // U+10428 ("\uD801\uDC28"): DESERET SMALL LETTER LONG I
 100         // U+10429 ("\uD801\uDC29"): DESERET SMALL LETTER LONG E
 101         // U+1042A ("\uD801\uDC2A"): DESERET SMALL LETTER LONG A
 102         //
 103         // valid code point tests:
 104         test("\uD801\uDC00\uD801\uDC01\uD801\uDC02", Locale.US, "\uD801\uDC28\uD801\uDC29\uD801\uDC2A");
 105         test("\uD801\uDC00A\uD801\uDC01B\uD801\uDC02C", Locale.US, "\uD801\uDC28a\uD801\uDC29b\uD801\uDC2Ac");
 106         // invalid code point tests:
 107         test("\uD800\uD800\uD801A\uDC00\uDC00\uDC00B", Locale.US, "\uD800\uD800\uD801a\uDC00\uDC00\uDC00b");
 108 
 109         // lower/uppercase + surrogates
 110         test("a\uD801\uDC1c", Locale.ROOT, "a\uD801\uDC44");
 111         test("A\uD801\uDC1c", Locale.ROOT, "a\uD801\uDC44");
 112         test("a\uD801\uDC00\uD801\uDC01\uD801\uDC02", Locale.US, "a\uD801\uDC28\uD801\uDC29\uD801\uDC2A");
 113         test("A\uD801\uDC00\uD801\uDC01\uD801\uDC02", Locale.US, "a\uD801\uDC28\uD801\uDC29\uD801\uDC2A");
 114 
 115         // test bmp + supp1
 116         StringBuilder src = new StringBuilder(0x20000);
 117         StringBuilder exp = new StringBuilder(0x20000);
 118         for (int cp = 0; cp < 0x20000; cp++) {
 119             if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) {
 120                 continue;
 121             }
 122             if (cp == 0x0130) {
 123                 // Although UnicodeData.txt has the lower case char as \u0069, it should be
 124                 // handled with the rules in SpecialCasing.txt, i.e., \u0069\u0307 in
 125                 // non Turkic locales.
 126                 continue;
 127             }
 128             int lowerCase = Character.toLowerCase(cp);
 129             if (lowerCase == -1) {    //Character.ERROR
 130                 continue;
 131             }
 132             src.appendCodePoint(cp);
 133             exp.appendCodePoint(lowerCase);
 134         }
 135         test(src.toString(), Locale.US, exp.toString());
 136 
 137         // test latin1
 138         src = new StringBuilder(0x100);
 139         exp = new StringBuilder(0x100);
 140         for (int cp = 0; cp < 0x100; cp++) {
 141             int lowerCase = Character.toLowerCase(cp);
 142             if (lowerCase == -1) {    //Character.ERROR
 143                 continue;
 144             }
 145             src.appendCodePoint(cp);
 146             exp.appendCodePoint(lowerCase);
 147         }
 148         test(src.toString(), Locale.US, exp.toString());
 149 
 150         // test non-latin1 -> latin1
 151         src = new StringBuilder(0x100).append("abc");
 152         exp = new StringBuilder(0x100).append("abc");
 153         for (int cp = 0x100; cp < 0x10000; cp++) {
 154             int lowerCase  = Character.toLowerCase(cp);
 155             if (lowerCase < 0x100 && cp != '\u0130') {
 156                 src.appendCodePoint(cp);
 157                 exp.appendCodePoint(lowerCase);
 158             }
 159         }
 160         test(src.toString(), Locale.US, exp.toString());
 161     }
 162 
 163     static void test(String in, Locale locale, String expected) {
 164         test0(in, locale,expected);
 165         for (String[] ss :  new String[][] {
 166                                 new String[] {"abc",      "abc"},
 167                                 new String[] {"aBc",      "abc"},
 168                                 new String[] {"ABC",      "abc"},
 169                                 new String[] {"ab\u4e00", "ab\u4e00"},
 170                                 new String[] {"aB\u4e00", "ab\u4e00"},
 171                                 new String[] {"AB\u4e00", "ab\u4e00"},
 172                                 new String[] {"ab\uD800\uDC00", "ab\uD800\uDC00"},
 173                                 new String[] {"aB\uD800\uDC00", "ab\uD800\uDC00"},
 174                                 new String[] {"AB\uD800\uDC00", "ab\uD800\uDC00"},
 175                                 new String[] {"ab\uD801\uDC1C", "ab\uD801\uDC44"},
 176                                 new String[] {"aB\uD801\uDC1C", "ab\uD801\uDC44"},
 177                                 new String[] {"AB\uD801\uDC1C", "ab\uD801\uDC44"},
 178 
 179                             }) {
 180             test0(ss[0] + " " + in, locale, ss[1] + " " + expected);
 181             test0(in + " " + ss[0], locale, expected + " " + ss[1]);
 182         }
 183     }
 184 
 185     static void test0(String in, Locale locale, String expected) {
 186         String result = in.toLowerCase(locale);
 187         if (!result.equals(expected)) {
 188             System.err.println("input: " + in + ", locale: " + locale +
 189                     ", expected: " + expected + ", actual: " + result);
 190             throw new RuntimeException();
 191         }
 192     }
 193 }