1 /*
   2  * reserved comment block
   3  * DO NOT REMOVE OR ALTER!
   4  */
   5 /*
   6  * Copyright 1999-2004 The Apache Software Foundation.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.util;
  22 
  23 import java.util.Arrays;
  24 
  25 /**
  26  * This class defines the basic properties of characters in XML 1.1. The data
  27  * in this class can be used to verify that a character is a valid
  28  * XML 1.1 character or if the character is a space, name start, or name
  29  * character.
  30  * <p>
  31  * A series of convenience methods are supplied to ease the burden
  32  * of the developer.  Using the character as an index into the <code>XML11CHARS</code>
  33  * array and applying the appropriate mask flag (e.g.
  34  * <code>MASK_VALID</code>), yields the same results as calling the
  35  * convenience methods. There is one exception: check the comments
  36  * for the <code>isValid</code> method for details.
  37  *
  38  * @author Glenn Marcy, IBM
  39  * @author Andy Clark, IBM
  40  * @author Arnaud  Le Hors, IBM
  41  * @author Neil Graham, IBM
  42  * @author Michael Glavassevich, IBM
  43  *
  44  * @version $Id: XML11Char.java,v 1.7 2010-11-01 04:40:15 joehw Exp $
  45  */
  46 public class XML11Char {
  47 
  48     //
  49     // Constants
  50     //
  51 
  52     /** Character flags for XML 1.1. */
  53     private static final byte XML11CHARS [] = new byte [1 << 16];
  54 
  55     /** XML 1.1 Valid character mask. */
  56     public static final int MASK_XML11_VALID = 0x01;
  57 
  58     /** XML 1.1 Space character mask. */
  59     public static final int MASK_XML11_SPACE = 0x02;
  60 
  61     /** XML 1.1 Name start character mask. */
  62     public static final int MASK_XML11_NAME_START = 0x04;
  63 
  64     /** XML 1.1 Name character mask. */
  65     public static final int MASK_XML11_NAME = 0x08;
  66 
  67     /** XML 1.1 control character mask */
  68     public static final int MASK_XML11_CONTROL = 0x10;
  69 
  70     /** XML 1.1 content for external entities (valid - "special" chars - control chars) */
  71     public static final int MASK_XML11_CONTENT = 0x20;
  72 
  73     /** XML namespaces 1.1 NCNameStart */
  74     public static final int MASK_XML11_NCNAME_START = 0x40;
  75 
  76     /** XML namespaces 1.1 NCName */
  77     public static final int MASK_XML11_NCNAME = 0x80;
  78 
  79     /** XML 1.1 content for internal entities (valid - "special" chars) */
  80     public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT;
  81 
  82     //
  83     // Static initialization
  84     //
  85 
  86     static {
  87 
  88         // Initializing the Character Flag Array
  89         // Code generated by: XML11CharGenerator.
  90 
  91         Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17
  92         XML11CHARS[9] = 35;
  93         XML11CHARS[10] = 3;
  94         Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17
  95         XML11CHARS[13] = 3;
  96         Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17
  97         XML11CHARS[32] = 35;
  98         Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33
  99         XML11CHARS[38] = 1;
 100         Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33
 101         Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87
 102         XML11CHARS[47] = 33;
 103         Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87
 104         XML11CHARS[58] = 45;
 105         XML11CHARS[59] = 33;
 106         XML11CHARS[60] = 1;
 107         Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33
 108         Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19
 109         Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33
 110         XML11CHARS[93] = 1;
 111         XML11CHARS[94] = 33;
 112         XML11CHARS[95] = -19;
 113         XML11CHARS[96] = 33;
 114         Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19
 115         Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33
 116         Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17
 117         XML11CHARS[133] = 35;
 118         Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17
 119         Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33
 120         XML11CHARS[183] = -87;
 121         Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33
 122         Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19
 123         XML11CHARS[215] = 33;
 124         Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19
 125         XML11CHARS[247] = 33;
 126         Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19
 127         Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87
 128         Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19
 129         XML11CHARS[894] = 33;
 130         Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19
 131         Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33
 132         Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19
 133         Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33
 134         XML11CHARS[8232] = 35;
 135         Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33
 136         Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87
 137         Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33
 138         Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19
 139         Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33
 140         Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19
 141         Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33
 142         Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19
 143         Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33
 144         Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19
 145         Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33
 146         Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19
 147 
 148     } // <clinit>()
 149 
 150     //
 151     // Public static methods
 152     //
 153 
 154     /**
 155      * Returns true if the specified character is a space character
 156      * as amdended in the XML 1.1 specification.
 157      *
 158      * @param c The character to check.
 159      */
 160     public static boolean isXML11Space(int c) {
 161         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
 162     } // isXML11Space(int):boolean
 163 
 164     /**
 165      * Returns true if the specified character is valid. This method
 166      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
 167      * <p>
 168      * If the program chooses to apply the mask directly to the
 169      * <code>XML11CHARS</code> array, then they are responsible for checking
 170      * the surrogate character range.
 171      *
 172      * @param c The character to check.
 173      */
 174     public static boolean isXML11Valid(int c) {
 175         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
 176                 || (0x10000 <= c && c <= 0x10FFFF);
 177     } // isXML11Valid(int):boolean
 178 
 179     /**
 180      * Returns true if the specified character is invalid.
 181      *
 182      * @param c The character to check.
 183      */
 184     public static boolean isXML11Invalid(int c) {
 185         return !isXML11Valid(c);
 186     } // isXML11Invalid(int):boolean
 187 
 188     /**
 189      * Returns true if the specified character is valid and permitted outside
 190      * of a character reference.
 191      * That is, this method will return false for the same set as
 192      * isXML11Valid, except it also reports false for "control characters".
 193      *
 194      * @param c The character to check.
 195      */
 196     public static boolean isXML11ValidLiteral(int c) {
 197         return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0))
 198             || (0x10000 <= c && c <= 0x10FFFF));
 199     } // isXML11ValidLiteral(int):boolean
 200 
 201     /**
 202      * Returns true if the specified character can be considered
 203      * content in an external parsed entity.
 204      *
 205      * @param c The character to check.
 206      */
 207     public static boolean isXML11Content(int c) {
 208         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) ||
 209                (0x10000 <= c && c <= 0x10FFFF);
 210     } // isXML11Content(int):boolean
 211 
 212     /**
 213      * Returns true if the specified character can be considered
 214      * content in an internal parsed entity.
 215      *
 216      * @param c The character to check.
 217      */
 218     public static boolean isXML11InternalEntityContent(int c) {
 219         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) ||
 220                (0x10000 <= c && c <= 0x10FFFF);
 221     } // isXML11InternalEntityContent(int):boolean
 222 
 223     /**
 224      * Returns true if the specified character is a valid name start
 225      * character as defined by production [4] in the XML 1.1
 226      * specification.
 227      *
 228      * @param c The character to check.
 229      */
 230     public static boolean isXML11NameStart(int c) {
 231         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
 232             || (0x10000 <= c && c < 0xF0000);
 233     } // isXML11NameStart(int):boolean
 234 
 235     /**
 236      * Returns true if the specified character is a valid name
 237      * character as defined by production [4a] in the XML 1.1
 238      * specification.
 239      *
 240      * @param c The character to check.
 241      */
 242     public static boolean isXML11Name(int c) {
 243         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
 244             || (c >= 0x10000 && c < 0xF0000);
 245     } // isXML11Name(int):boolean
 246 
 247     /**
 248      * Returns true if the specified character is a valid NCName start
 249      * character as defined by production [4] in Namespaces in XML
 250      * 1.1 recommendation.
 251      *
 252      * @param c The character to check.
 253      */
 254     public static boolean isXML11NCNameStart(int c) {
 255         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
 256             || (0x10000 <= c && c < 0xF0000);
 257     } // isXML11NCNameStart(int):boolean
 258 
 259     /**
 260      * Returns true if the specified character is a valid NCName
 261      * character as defined by production [5] in Namespaces in XML
 262      * 1.1 recommendation.
 263      *
 264      * @param c The character to check.
 265      */
 266     public static boolean isXML11NCName(int c) {
 267         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
 268             || (0x10000 <= c && c < 0xF0000);
 269     } // isXML11NCName(int):boolean
 270 
 271     /**
 272      * Returns whether the given character is a valid
 273      * high surrogate for a name character. This includes
 274      * all high surrogates for characters [0x10000-0xEFFFF].
 275      * In other words everything excluding planes 15 and 16.
 276      *
 277      * @param c The character to check.
 278      */
 279     public static boolean isXML11NameHighSurrogate(int c) {
 280         return (0xD800 <= c && c <= 0xDB7F);
 281     }
 282 
 283     /*
 284      * [5] Name ::= NameStartChar NameChar*
 285      */
 286     /**
 287      * Check to see if a string is a valid Name according to [5]
 288      * in the XML 1.1 Recommendation
 289      *
 290      * @param name string to check
 291      * @return true if name is a valid Name
 292      */
 293     public static boolean isXML11ValidName(String name) {
 294         final int length = name.length();
 295         if (length == 0) {
 296             return false;
 297         }
 298         int i = 1;
 299         char ch = name.charAt(0);
 300         if (!isXML11NameStart(ch)) {
 301             if (length > 1 && isXML11NameHighSurrogate(ch)) {
 302                 char ch2 = name.charAt(1);
 303                 if (!XMLChar.isLowSurrogate(ch2) ||
 304                     !isXML11NameStart(XMLChar.supplemental(ch, ch2))) {
 305                     return false;
 306                 }
 307                 i = 2;
 308             }
 309             else {
 310                 return false;
 311             }
 312         }
 313         while (i < length) {
 314             ch = name.charAt(i);
 315             if (!isXML11Name(ch)) {
 316                 if (++i < length && isXML11NameHighSurrogate(ch)) {
 317                     char ch2 = name.charAt(i);
 318                     if (!XMLChar.isLowSurrogate(ch2) ||
 319                         !isXML11Name(XMLChar.supplemental(ch, ch2))) {
 320                         return false;
 321                     }
 322                 }
 323                 else {
 324                     return false;
 325                 }
 326             }
 327             ++i;
 328         }
 329         return true;
 330     } // isXML11ValidName(String):boolean
 331 
 332     /*
 333      * from the namespace 1.1 rec
 334      * [4] NCName ::= NCNameStartChar NCNameChar*
 335      */
 336     /**
 337      * Check to see if a string is a valid NCName according to [4]
 338      * from the XML Namespaces 1.1 Recommendation
 339      *
 340      * @param ncName string to check
 341      * @return true if name is a valid NCName
 342      */
 343     public static boolean isXML11ValidNCName(String ncName) {
 344         final int length = ncName.length();
 345         if (length == 0) {
 346             return false;
 347         }
 348         int i = 1;
 349         char ch = ncName.charAt(0);
 350         if (!isXML11NCNameStart(ch)) {
 351             if (length > 1 && isXML11NameHighSurrogate(ch)) {
 352                 char ch2 = ncName.charAt(1);
 353                 if (!XMLChar.isLowSurrogate(ch2) ||
 354                     !isXML11NCNameStart(XMLChar.supplemental(ch, ch2))) {
 355                     return false;
 356                 }
 357                 i = 2;
 358             }
 359             else {
 360                 return false;
 361             }
 362         }
 363         while (i < length) {
 364             ch = ncName.charAt(i);
 365             if (!isXML11NCName(ch)) {
 366                 if (++i < length && isXML11NameHighSurrogate(ch)) {
 367                     char ch2 = ncName.charAt(i);
 368                     if (!XMLChar.isLowSurrogate(ch2) ||
 369                         !isXML11NCName(XMLChar.supplemental(ch, ch2))) {
 370                         return false;
 371                     }
 372                 }
 373                 else {
 374                     return false;
 375                 }
 376             }
 377             ++i;
 378         }
 379         return true;
 380     } // isXML11ValidNCName(String):boolean
 381 
 382     /*
 383      * [7] Nmtoken ::= (NameChar)+
 384      */
 385     /**
 386      * Check to see if a string is a valid Nmtoken according to [7]
 387      * in the XML 1.1 Recommendation
 388      *
 389      * @param nmtoken string to check
 390      * @return true if nmtoken is a valid Nmtoken
 391      */
 392     public static boolean isXML11ValidNmtoken(String nmtoken) {
 393         final int length = nmtoken.length();
 394         if (length == 0) {
 395             return false;
 396         }
 397         for (int i = 0; i < length; ++i) {
 398             char ch = nmtoken.charAt(i);
 399             if (!isXML11Name(ch)) {
 400                 if (++i < length && isXML11NameHighSurrogate(ch)) {
 401                     char ch2 = nmtoken.charAt(i);
 402                     if (!XMLChar.isLowSurrogate(ch2) ||
 403                         !isXML11Name(XMLChar.supplemental(ch, ch2))) {
 404                         return false;
 405                     }
 406                 }
 407                 else {
 408                     return false;
 409                 }
 410             }
 411         }
 412         return true;
 413     } // isXML11ValidName(String):boolean
 414 
 415 } // class XML11Char