New test/java/lang/String/ToLowerCase.java

   1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25     @test
  26     @bug 4217441 4533872 4900935 8020037 8032012
  27     @summary toLowerCase should lower-case Greek Sigma correctly depending
  28              on the context (final/non-final).  Also it should handle
  29              Locale specific (lt, tr, and az) lowercasings and supplementary
  30              characters correctly.
  31 */
  32 
  33 import java.util.Locale;
  34 
  35 public class ToLowerCase {
  36 
  37     public static void main(String[] args) {
  38         Locale turkish = new Locale("tr", "TR");
  39         Locale lt = new Locale("lt"); // Lithanian
  40         Locale az = new Locale("az"); // Azeri
  41 
  42         // Greek Sigma final/non-final tests
  43         test("\u03A3", Locale.US, "\u03C3");
  44         test("LAST\u03A3", Locale.US, "last\u03C2");
  45         test("MID\u03A3DLE", Locale.US, "mid\u03C3dle");
  46         test("WORD1 \u03A3 WORD3", Locale.US, "word1 \u03C3 word3");
  47         test("WORD1 LAST\u03A3 WORD3", Locale.US, "word1 last\u03C2 word3");
  48         test("WORD1 MID\u03A3DLE WORD3", Locale.US, "word1 mid\u03C3dle word3");
  49         test("\u0399\u0395\u03a3\u03a5\u03a3 \u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3", Locale.US,
  50              "\u03b9\u03b5\u03c3\u03c5\u03c2 \u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2"); // "IESUS XRISTOS"
  51 
  52         // Explicit dot above for I's and J's whenever there are more accents above (Lithanian)
  53         test("I", lt, "i");
  54         test("I\u0300", lt, "i\u0307\u0300"); // "I" followed by COMBINING GRAVE ACCENT (cc==230)
  55         test("I\u0316", lt, "i\u0316"); // "I" followed by COMBINING GRAVE ACCENT BELOW (cc!=230)
  56         test("J", lt, "j");
  57         test("J\u0300", lt, "j\u0307\u0300"); // "J" followed by COMBINING GRAVE ACCENT (cc==230)
  58         test("J\u0316", lt, "j\u0316"); // "J" followed by COMBINING GRAVE ACCENT BELOW (cc!=230)
  59         test("\u012E", lt, "\u012F");
  60         test("\u012E\u0300", lt, "\u012F\u0307\u0300"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT (cc==230)
  61         test("\u012E\u0316", lt, "\u012F\u0316"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT BELOW (cc!=230)
  62         test("\u00CC", lt, "i\u0307\u0300");
  63         test("\u00CD", lt, "i\u0307\u0301");
  64         test("\u0128", lt, "i\u0307\u0303");
  65         test("I\u0300", Locale.US, "i\u0300"); // "I" followed by COMBINING GRAVE ACCENT (cc==230)
  66         test("J\u0300", Locale.US, "j\u0300"); // "J" followed by COMBINING GRAVE ACCENT (cc==230)
  67         test("\u012E\u0300", Locale.US, "\u012F\u0300"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT (cc==230)
  68         test("\u00CC", Locale.US, "\u00EC");
  69         test("\u00CD", Locale.US, "\u00ED");
  70         test("\u0128", Locale.US, "\u0129");
  71 
  72         // I-dot tests
  73         test("\u0130", turkish, "i");
  74         test("\u0130", az, "i");
  75         test("\u0130", lt, "i");
  76         test("\u0130", Locale.US, "i");
  77 
  78         // Remove dot_above in the sequence I + dot_above (Turkish and Azeri)
  79         test("I\u0307", turkish, "i");
  80         test("I\u0307", az, "i");
  81         test("J\u0307", turkish, "j\u0307");
  82         test("J\u0307", az, "j\u0307");
  83 
  84         // Unless an I is before a dot_above, it turns into a dotless i (Turkish and Azeri)
  85         test("I", turkish, "\u0131");
  86         test("I", az, "\u0131");
  87         test("I", Locale.US, "i");
  88         test("IABC", turkish, "\u0131abc");
  89         test("IABC", az, "\u0131abc");
  90         test("IABC", Locale.US, "iabc");
  91 
  92         // Supplementary character tests
  93         //
  94         // U+10400 ("\uD801\uDC00"): DESERET CAPITAL LETTER LONG I
  95         // U+10401 ("\uD801\uDC01"): DESERET CAPITAL LETTER LONG E
  96         // U+10402 ("\uD801\uDC02"): DESERET CAPITAL LETTER LONG A
  97         // U+10428 ("\uD801\uDC28"): DESERET SMALL LETTER LONG I
  98         // U+10429 ("\uD801\uDC29"): DESERET SMALL LETTER LONG E
  99         // U+1042A ("\uD801\uDC2A"): DESERET SMALL LETTER LONG A
 100         //
 101         // valid code point tests:
 102         test("\uD801\uDC00\uD801\uDC01\uD801\uDC02", Locale.US, "\uD801\uDC28\uD801\uDC29\uD801\uDC2A");
 103         test("\uD801\uDC00A\uD801\uDC01B\uD801\uDC02C", Locale.US, "\uD801\uDC28a\uD801\uDC29b\uD801\uDC2Ac");
 104         // invalid code point tests:
 105         test("\uD800\uD800\uD801A\uDC00\uDC00\uDC00B", Locale.US, "\uD800\uD800\uD801a\uDC00\uDC00\uDC00b");
 106 
 107         // test bmp + supp1
 108         StringBuilder src = new StringBuilder(0x20000);
 109         StringBuilder exp = new StringBuilder(0x20000);
 110         for (int cp = 0; cp < 0x20000; cp++) {
 111             if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) {
 112                 continue;
 113             }
 114             int lowerCase = Character.toLowerCase(cp);
 115             if (lowerCase == -1) {    //Character.ERROR
 116                 continue;
 117             }
 118             src.appendCodePoint(cp);
 119             exp.appendCodePoint(lowerCase);
 120         }
 121         test(src.toString(), Locale.US, exp.toString());
 122 
 123     }
 124 
 125     static void test(String in, Locale locale, String expected) {
 126         String result = in.toLowerCase(locale);
 127         if (!result.equals(expected)) {
 128             System.err.println("input: " + in + ", locale: " + locale +
 129                     ", expected: " + expected + ", actual: " + result);
 130             throw new RuntimeException();
 131         }
 132    }
 133 }