1 /* 2 * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 import java.util.HashMap; 25 import java.util.Locale; 26 27 final public class POSIX_Unicode { 28 29 public static boolean isAlpha(int ch) { 30 return Character.isAlphabetic(ch); 31 } 32 33 public static boolean isLower(int ch) { 34 return Character.isLowerCase(ch); 35 } 36 37 public static boolean isUpper(int ch) { 38 return Character.isUpperCase(ch); 39 } 40 41 // \p{Whitespace} 42 public static boolean isSpace(int ch) { 43 return ((((1 << Character.SPACE_SEPARATOR) | 44 (1 << Character.LINE_SEPARATOR) | 45 (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1) 46 != 0 || 47 (ch >= 0x9 && ch <= 0xd) || 48 (ch == 0x85); 49 } 50 51 // \p{gc=Control} 52 public static boolean isCntrl(int ch) { 53 return Character.getType(ch) == Character.CONTROL; 54 } 55 56 // \p{gc=Punctuation} 57 public static boolean isPunct(int ch) { 58 return ((((1 << Character.CONNECTOR_PUNCTUATION) | 59 (1 << Character.DASH_PUNCTUATION) | 60 (1 << Character.START_PUNCTUATION) | 61 (1 << Character.END_PUNCTUATION) | 62 (1 << Character.OTHER_PUNCTUATION) | 63 (1 << Character.INITIAL_QUOTE_PUNCTUATION) | 64 (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1) 65 != 0; 66 } 67 68 // \p{gc=Decimal_Number} 69 // \p{Hex_Digit} -> PropList.txt: Hex_Digit 70 public static boolean isHexDigit(int ch) { 71 return Character.isDigit(ch) || 72 (ch >= 0x0030 && ch <= 0x0039) || 73 (ch >= 0x0041 && ch <= 0x0046) || 74 (ch >= 0x0061 && ch <= 0x0066) || 75 (ch >= 0xFF10 && ch <= 0xFF19) || 76 (ch >= 0xFF21 && ch <= 0xFF26) || 77 (ch >= 0xFF41 && ch <= 0xFF46); 78 } 79 80 // \p{gc=Decimal_Number} 81 public static boolean isDigit(int ch) { 82 return Character.isDigit(ch); 83 }; 84 85 // \p{alpha} 86 // \p{digit} 87 public static boolean isAlnum(int ch) { 88 return Character.isAlphabetic(ch) || Character.isDigit(ch); 89 } 90 91 // \p{Whitespace} -- 92 // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85 93 // \p{gc=Line_Separator} 94 // \p{gc=Paragraph_Separator}] 95 public static boolean isBlank(int ch) { 96 int type = Character.getType(ch); 97 return isSpace(ch) && 98 ch != 0xa & ch != 0xb && ch !=0xc && ch != 0xd && ch != 0x85 && 99 type != Character.LINE_SEPARATOR && 100 type != Character.PARAGRAPH_SEPARATOR; 101 } 102 103 // [^ 104 // \p{space} 105 // \p{gc=Control} 106 // \p{gc=Surrogate} 107 // \p{gc=Unassigned}] 108 public static boolean isGraph(int ch) { 109 int type = Character.getType(ch); 110 return !(isSpace(ch) || 111 Character.CONTROL == type || 112 Character.SURROGATE == type || 113 Character.UNASSIGNED == type); 114 } 115 116 // \p{graph} 117 // \p{blank} 118 // -- \p{cntrl} 119 public static boolean isPrint(int ch) { 120 return (isGraph(ch) || isBlank(ch)) && !isCntrl(ch); 121 } 122 123 // PropList.txt:Noncharacter_Code_Point 124 public static boolean isNoncharacterCodePoint(int ch) { 125 return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef); 126 } 127 128 // \p{alpha} 129 // \p{gc=Mark} 130 // \p{digit} 131 // \p{gc=Connector_Punctuation} 132 public static boolean isWord(int ch) { 133 return isAlpha(ch) || 134 ((((1 << Character.NON_SPACING_MARK) | 135 (1 << Character.ENCLOSING_MARK) | 136 (1 << Character.COMBINING_SPACING_MARK) | 137 (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1) 138 != 0 || 139 isDigit(ch); 140 } 141 }