1 /*
   2  * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 import java.util.HashMap;
  25 import java.util.Locale;
  26 
  27 final public class POSIX_Unicode {
  28 
  29     public static boolean isAlpha(int ch) {
  30         return Character.isAlphabetic(ch);
  31     }
  32 
  33     public static boolean isLower(int ch) {
  34         return Character.isLowerCase(ch);
  35     }
  36 
  37     public static boolean isUpper(int ch) {
  38         return Character.isUpperCase(ch);
  39     }
  40 
  41     // \p{Whitespace}
  42     public static boolean isSpace(int ch) {
  43         return ((((1 << Character.SPACE_SEPARATOR) |
  44                   (1 << Character.LINE_SEPARATOR) |
  45                   (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
  46                    != 0 ||
  47                (ch >= 0x9 && ch <= 0xd) ||
  48                (ch == 0x85);
  49     }
  50 
  51     // \p{gc=Control}
  52     public static boolean isCntrl(int ch) {
  53         return Character.getType(ch) == Character.CONTROL;
  54     }
  55 
  56     // \p{gc=Punctuation}
  57     public static boolean isPunct(int ch) {    
  58         return ((((1 << Character.CONNECTOR_PUNCTUATION) |
  59                   (1 << Character.DASH_PUNCTUATION) |
  60                   (1 << Character.START_PUNCTUATION) |
  61                   (1 << Character.END_PUNCTUATION) |
  62                   (1 << Character.OTHER_PUNCTUATION) |
  63                   (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
  64                   (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
  65               != 0;
  66     }
  67 
  68     // \p{gc=Decimal_Number}
  69     // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
  70     public static boolean isHexDigit(int ch) {
  71         return Character.isDigit(ch) ||
  72                (ch >= 0x0030 && ch <= 0x0039) ||
  73                (ch >= 0x0041 && ch <= 0x0046) ||
  74                (ch >= 0x0061 && ch <= 0x0066) ||
  75                (ch >= 0xFF10 && ch <= 0xFF19) ||
  76                (ch >= 0xFF21 && ch <= 0xFF26) ||
  77                (ch >= 0xFF41 && ch <= 0xFF46);
  78     }
  79 
  80     // \p{gc=Decimal_Number}
  81     public static boolean isDigit(int ch) {
  82         return Character.isDigit(ch);
  83     };
  84 
  85     // \p{alpha}
  86     // \p{digit}
  87     public static boolean isAlnum(int ch) {
  88         return Character.isAlphabetic(ch) || Character.isDigit(ch);
  89     }
  90 
  91     // \p{Whitespace} --
  92     // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
  93     //  \p{gc=Line_Separator}
  94     //  \p{gc=Paragraph_Separator}]
  95     public static boolean isBlank(int ch) {
  96         int type = Character.getType(ch);
  97         return isSpace(ch) &&
  98                ch != 0xa & ch != 0xb && ch !=0xc && ch != 0xd && ch != 0x85 &&
  99                type != Character.LINE_SEPARATOR &&
 100                type != Character.PARAGRAPH_SEPARATOR;
 101     }
 102 
 103     // [^
 104     //  \p{space}
 105     //  \p{gc=Control}
 106     //  \p{gc=Surrogate}
 107     //  \p{gc=Unassigned}]
 108     public static boolean isGraph(int ch) {
 109         int type = Character.getType(ch);
 110         return !(isSpace(ch) ||
 111                  Character.CONTROL == type ||
 112                  Character.SURROGATE == type ||
 113                  Character.UNASSIGNED == type);
 114     }
 115 
 116     // \p{graph}
 117     // \p{blank}
 118     // -- \p{cntrl}
 119     public static boolean isPrint(int ch) {
 120         return (isGraph(ch) || isBlank(ch)) && !isCntrl(ch);
 121     }
 122 
 123     // PropList.txt:Noncharacter_Code_Point
 124     public static boolean isNoncharacterCodePoint(int ch) {    
 125         return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
 126     }
 127 
 128     //  \p{alpha}
 129     //  \p{gc=Mark}
 130     //  \p{digit}
 131     //  \p{gc=Connector_Punctuation}
 132     public static boolean isWord(int ch) {
 133         return isAlpha(ch) ||
 134                ((((1 << Character.NON_SPACING_MARK) |
 135                   (1 << Character.ENCLOSING_MARK) |
 136                   (1 << Character.COMBINING_SPACING_MARK) |
 137                   (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
 138                != 0 ||
 139                isDigit(ch);
 140     }
 141 }