1 /* 2 * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 @test 26 @bug 4217441 4533872 4900935 8020037 8032012 8041791 8042589 8054307 27 @summary toLowerCase should lower-case Greek Sigma correctly depending 28 on the context (final/non-final). Also it should handle 29 Locale specific (lt, tr, and az) lowercasings and supplementary 30 characters correctly. 31 */ 32 33 import java.util.Locale; 34 35 public class ToLowerCase { 36 37 public static void main(String[] args) { 38 Locale turkish = new Locale("tr", "TR"); 39 Locale lt = new Locale("lt"); // Lithanian 40 Locale az = new Locale("az"); // Azeri 41 42 // Greek Sigma final/non-final tests 43 test("\u03A3", Locale.US, "\u03C3"); 44 test("LAST\u03A3", Locale.US, "last\u03C2"); 45 test("MID\u03A3DLE", Locale.US, "mid\u03C3dle"); 46 test("WORD1 \u03A3 WORD3", Locale.US, "word1 \u03C3 word3"); 47 test("WORD1 LAST\u03A3 WORD3", Locale.US, "word1 last\u03C2 word3"); 48 test("WORD1 MID\u03A3DLE WORD3", Locale.US, "word1 mid\u03C3dle word3"); 49 test("\u0399\u0395\u03a3\u03a5\u03a3 \u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3", Locale.US, 50 "\u03b9\u03b5\u03c3\u03c5\u03c2 \u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2"); // "IESUS XRISTOS" 51 52 // Explicit dot above for I's and J's whenever there are more accents above (Lithanian) 53 test("I", lt, "i"); 54 test("I\u0300", lt, "i\u0307\u0300"); // "I" followed by COMBINING GRAVE ACCENT (cc==230) 55 test("I\u0316", lt, "i\u0316"); // "I" followed by COMBINING GRAVE ACCENT BELOW (cc!=230) 56 test("J", lt, "j"); 57 test("J\u0300", lt, "j\u0307\u0300"); // "J" followed by COMBINING GRAVE ACCENT (cc==230) 58 test("J\u0316", lt, "j\u0316"); // "J" followed by COMBINING GRAVE ACCENT BELOW (cc!=230) 59 test("\u012E", lt, "\u012F"); 60 test("\u012E\u0300", lt, "\u012F\u0307\u0300"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT (cc==230) 61 test("\u012E\u0316", lt, "\u012F\u0316"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT BELOW (cc!=230) 62 test("\u00CC", lt, "i\u0307\u0300"); 63 test("\u00CD", lt, "i\u0307\u0301"); 64 test("\u0128", lt, "i\u0307\u0303"); 65 test("I\u0300", Locale.US, "i\u0300"); // "I" followed by COMBINING GRAVE ACCENT (cc==230) 66 test("J\u0300", Locale.US, "j\u0300"); // "J" followed by COMBINING GRAVE ACCENT (cc==230) 67 test("\u012E\u0300", Locale.US, "\u012F\u0300"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT (cc==230) 68 test("\u00CC", Locale.US, "\u00EC"); 69 test("\u00CD", Locale.US, "\u00ED"); 70 test("\u0128", Locale.US, "\u0129"); 71 72 // I-dot tests 73 test("\u0130", turkish, "i"); 74 test("\u0130", az, "i"); 75 test("\u0130", lt, "\u0069\u0307"); 76 test("\u0130", Locale.US, "\u0069\u0307"); 77 test("\u0130", Locale.JAPAN, "\u0069\u0307"); 78 test("\u0130", Locale.ROOT, "\u0069\u0307"); 79 80 // Remove dot_above in the sequence I + dot_above (Turkish and Azeri) 81 test("I\u0307", turkish, "i"); 82 test("I\u0307", az, "i"); 83 test("J\u0307", turkish, "j\u0307"); 84 test("J\u0307", az, "j\u0307"); 85 86 // Unless an I is before a dot_above, it turns into a dotless i (Turkish and Azeri) 87 test("I", turkish, "\u0131"); 88 test("I", az, "\u0131"); 89 test("I", Locale.US, "i"); 90 test("IABC", turkish, "\u0131abc"); 91 test("IABC", az, "\u0131abc"); 92 test("IABC", Locale.US, "iabc"); 93 94 // Supplementary character tests 95 // 96 // U+10400 ("\uD801\uDC00"): DESERET CAPITAL LETTER LONG I 97 // U+10401 ("\uD801\uDC01"): DESERET CAPITAL LETTER LONG E 98 // U+10402 ("\uD801\uDC02"): DESERET CAPITAL LETTER LONG A 99 // U+10428 ("\uD801\uDC28"): DESERET SMALL LETTER LONG I 100 // U+10429 ("\uD801\uDC29"): DESERET SMALL LETTER LONG E 101 // U+1042A ("\uD801\uDC2A"): DESERET SMALL LETTER LONG A 102 // 103 // valid code point tests: 104 test("\uD801\uDC00\uD801\uDC01\uD801\uDC02", Locale.US, "\uD801\uDC28\uD801\uDC29\uD801\uDC2A"); 105 test("\uD801\uDC00A\uD801\uDC01B\uD801\uDC02C", Locale.US, "\uD801\uDC28a\uD801\uDC29b\uD801\uDC2Ac"); 106 // invalid code point tests: 107 test("\uD800\uD800\uD801A\uDC00\uDC00\uDC00B", Locale.US, "\uD800\uD800\uD801a\uDC00\uDC00\uDC00b"); 108 109 // lower/uppercase + surrogates 110 test("a\uD801\uDC1c", Locale.ROOT, "a\uD801\uDC44"); 111 test("A\uD801\uDC1c", Locale.ROOT, "a\uD801\uDC44"); 112 test("a\uD801\uDC00\uD801\uDC01\uD801\uDC02", Locale.US, "a\uD801\uDC28\uD801\uDC29\uD801\uDC2A"); 113 test("A\uD801\uDC00\uD801\uDC01\uD801\uDC02", Locale.US, "a\uD801\uDC28\uD801\uDC29\uD801\uDC2A"); 114 115 // test bmp + supp1 116 StringBuilder src = new StringBuilder(0x20000); 117 StringBuilder exp = new StringBuilder(0x20000); 118 for (int cp = 0; cp < 0x20000; cp++) { 119 if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) { 120 continue; 121 } 122 if (cp == 0x0130) { 123 // Although UnicodeData.txt has the lower case char as \u0069, it should be 124 // handled with the rules in SpecialCasing.txt, i.e., \u0069\u0307 in 125 // non Turkic locales. 126 continue; 127 } 128 int lowerCase = Character.toLowerCase(cp); 129 if (lowerCase == -1) { //Character.ERROR 130 continue; 131 } 132 src.appendCodePoint(cp); 133 exp.appendCodePoint(lowerCase); 134 } 135 test(src.toString(), Locale.US, exp.toString()); 136 137 // test latin1 138 src = new StringBuilder(0x100); 139 exp = new StringBuilder(0x100); 140 for (int cp = 0; cp < 0x100; cp++) { 141 int lowerCase = Character.toLowerCase(cp); 142 if (lowerCase == -1) { //Character.ERROR 143 continue; 144 } 145 src.appendCodePoint(cp); 146 exp.appendCodePoint(lowerCase); 147 } 148 test(src.toString(), Locale.US, exp.toString()); 149 150 // test non-latin1 -> latin1 151 src = new StringBuilder(0x100).append("abc"); 152 exp = new StringBuilder(0x100).append("abc"); 153 for (int cp = 0x100; cp < 0x10000; cp++) { 154 int lowerCase = Character.toLowerCase(cp); 155 if (lowerCase < 0x100 && cp != '\u0130') { 156 src.appendCodePoint(cp); 157 exp.appendCodePoint(lowerCase); 158 } 159 } 160 test(src.toString(), Locale.US, exp.toString()); 161 } 162 163 static void test(String in, Locale locale, String expected) { 164 test0(in, locale,expected); 165 for (String[] ss : new String[][] { 166 new String[] {"abc", "abc"}, 167 new String[] {"aBc", "abc"}, 168 new String[] {"ABC", "abc"}, 169 new String[] {"ab\u4e00", "ab\u4e00"}, 170 new String[] {"aB\u4e00", "ab\u4e00"}, 171 new String[] {"AB\u4e00", "ab\u4e00"}, 172 new String[] {"ab\uD800\uDC00", "ab\uD800\uDC00"}, 173 new String[] {"aB\uD800\uDC00", "ab\uD800\uDC00"}, 174 new String[] {"AB\uD800\uDC00", "ab\uD800\uDC00"}, 175 new String[] {"ab\uD801\uDC1C", "ab\uD801\uDC44"}, 176 new String[] {"aB\uD801\uDC1C", "ab\uD801\uDC44"}, 177 new String[] {"AB\uD801\uDC1C", "ab\uD801\uDC44"}, 178 179 }) { 180 test0(ss[0] + " " + in, locale, ss[1] + " " + expected); 181 test0(in + " " + ss[0], locale, expected + " " + ss[1]); 182 } 183 } 184 185 static void test0(String in, Locale locale, String expected) { 186 String result = in.toLowerCase(locale); 187 if (!result.equals(expected)) { 188 System.err.println("input: " + in + ", locale: " + locale + 189 ", expected: " + expected + ", actual: " + result); 190 throw new RuntimeException(); 191 } 192 } 193 }