1 /* 2 * Copyright (c) 2002, 2006, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 /** The CharacterData class encapsulates the large tables found in 29 Java.lang.Character. */ 30 31 class CharacterDataLatin1 extends CharacterData { 32 33 /* The character properties are currently encoded into 32 bits in the following manner: 34 1 bit mirrored property 35 4 bits directionality property 36 9 bits signed offset used for converting case 37 1 bit if 1, adding the signed offset converts the character to lowercase 38 1 bit if 1, subtracting the signed offset converts the character to uppercase 39 1 bit if 1, this character has a titlecase equivalent (possibly itself) 40 3 bits 0 may not be part of an identifier 41 1 ignorable control; may continue a Unicode identifier or Java identifier 42 2 may continue a Java identifier but not a Unicode identifier (unused) 43 3 may continue a Unicode identifier or Java identifier 44 4 is a Java whitespace character 45 5 may start or continue a Java identifier; 46 may continue but not start a Unicode identifier (underscores) 47 6 may start or continue a Java identifier but not a Unicode identifier ($) 48 7 may start or continue a Unicode identifier or Java identifier 49 Thus: 50 5, 6, 7 may start a Java identifier 51 1, 2, 3, 5, 6, 7 may continue a Java identifier 52 7 may start a Unicode identifier 53 1, 3, 5, 7 may continue a Unicode identifier 54 1 is ignorable within an identifier 55 4 is Java whitespace 56 2 bits 0 this character has no numeric property 57 1 adding the digit offset to the character code and then 58 masking with 0x1F will produce the desired numeric value 59 2 this character has a "strange" numeric value 60 3 a Java supradecimal digit: adding the digit offset to the 61 character code, then masking with 0x1F, then adding 10 62 will produce the desired numeric value 63 5 bits digit offset 64 5 bits character type 65 66 The encoding of character properties is subject to change at any time. 67 */ 68 69 int getProperties(int ch) { 70 char offset = (char)ch; 71 int props = $$Lookup(offset); 72 return props; 73 } 74 75 int getType(int ch) { 76 int props = getProperties(ch); 77 return (props & $$maskType); 78 } 79 80 boolean isJavaIdentifierStart(int ch) { 81 int props = getProperties(ch); 82 return ((props & $$maskIdentifierInfo) >= $$lowJavaStart); 83 } 84 85 boolean isJavaIdentifierPart(int ch) { 86 int props = getProperties(ch); 87 return ((props & $$nonzeroJavaPart) != 0); 88 } 89 90 boolean isUnicodeIdentifierStart(int ch) { 91 int props = getProperties(ch); 92 return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart); 93 } 94 95 boolean isUnicodeIdentifierPart(int ch) { 96 int props = getProperties(ch); 97 return ((props & $$maskUnicodePart) != 0); 98 } 99 100 boolean isIdentifierIgnorable(int ch) { 101 int props = getProperties(ch); 102 return ((props & $$maskIdentifierInfo) == $$valueIgnorable); 103 } 104 105 int toLowerCase(int ch) { 106 int mapChar = ch; 107 int val = getProperties(ch); 108 109 if (((val & $$maskLowerCase) != 0) && 110 ((val & $$maskCaseOffset) != $$maskCaseOffset)) { 111 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); 112 mapChar = ch + offset; 113 } 114 return mapChar; 115 } 116 117 int toUpperCase(int ch) { 118 int mapChar = ch; 119 int val = getProperties(ch); 120 121 if ((val & $$maskUpperCase) != 0) { 122 if ((val & $$maskCaseOffset) != $$maskCaseOffset) { 123 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); 124 mapChar = ch - offset; 125 } else if (ch == 0x00B5) { 126 mapChar = 0x039C; 127 } 128 } 129 return mapChar; 130 } 131 132 int toTitleCase(int ch) { 133 return toUpperCase(ch); 134 } 135 136 int digit(int ch, int radix) { 137 int value = -1; 138 if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) { 139 int val = getProperties(ch); 140 int kind = val & $$maskType; 141 if (kind == Character.DECIMAL_DIGIT_NUMBER) { 142 value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit; 143 } 144 else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) { 145 // Java supradecimal digit 146 value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10; 147 } 148 } 149 return (value < radix) ? value : -1; 150 } 151 152 int getNumericValue(int ch) { 153 int val = getProperties(ch); 154 int retval = -1; 155 156 switch (val & $$maskNumericType) { 157 default: // cannot occur 158 case ($$valueNotNumeric): // not numeric 159 retval = -1; 160 break; 161 case ($$valueDigit): // simple numeric 162 retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit; 163 break; 164 case ($$valueStrangeNumeric) : // "strange" numeric 165 retval = -2; 166 break; 167 case ($$valueJavaSupradecimal): // Java supradecimal 168 retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10; 169 break; 170 } 171 return retval; 172 } 173 174 boolean isWhitespace(int ch) { 175 int props = getProperties(ch); 176 return ((props & $$maskIdentifierInfo) == $$valueJavaWhitespace); 177 } 178 179 byte getDirectionality(int ch) { 180 int val = getProperties(ch); 181 byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi); 182 183 if (directionality == 0xF ) { 184 directionality = -1; 185 } 186 return directionality; 187 } 188 189 boolean isMirrored(int ch) { 190 int props = getProperties(ch); 191 return ((props & $$maskMirrored) != 0); 192 } 193 194 int toUpperCaseEx(int ch) { 195 int mapChar = ch; 196 int val = getProperties(ch); 197 198 if ((val & $$maskUpperCase) != 0) { 199 if ((val & $$maskCaseOffset) != $$maskCaseOffset) { 200 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); 201 mapChar = ch - offset; 202 } 203 else { 204 switch(ch) { 205 // map overflow characters 206 case 0x00B5 : mapChar = 0x039C; break; 207 default : mapChar = Character.ERROR; break; 208 } 209 } 210 } 211 return mapChar; 212 } 213 214 static char[] sharpsMap = new char[] {'S', 'S'}; 215 216 char[] toUpperCaseCharArray(int ch) { 217 char[] upperMap = {(char)ch}; 218 if (ch == 0x00DF) { 219 upperMap = sharpsMap; 220 } 221 return upperMap; 222 } 223 224 static final CharacterDataLatin1 instance = new CharacterDataLatin1(); 225 private CharacterDataLatin1() {}; 226 227 $$Tables 228 229 static { 230 $$Initializers 231 } 232 } 233