< prev index next >

src/jdk.xml.bind/share/classes/com/sun/xml/internal/dtdparser/XmlChars.java

Print this page


   1 /*
   2  * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  30  * Methods in this class are used to determine whether characters may
  31  * appear in certain roles in XML documents.  Such methods are used
  32  * both to parse and to create such documents.
  33  *
  34  * @author David Brownell
  35  * @version 1.1, 00/08/05
  36  */
  37 public class XmlChars {
  38     // can't construct instances
  39     private XmlChars() {
  40     }
  41 
  42     /**
  43      * Returns true if the argument, a UCS-4 character code, is valid in
  44      * XML documents.  Unicode characters fit into the low sixteen
  45      * bits of a UCS-4 character, and pairs of Unicode <em>surrogate
  46      * characters</em> can be combined to encode UCS-4 characters in
  47      * documents containing only Unicode.  (The <code>char</code> datatype
  48      * in the Java Programming Language represents Unicode characters,
  49      * including unpaired surrogates.)
  50      * <p/>
  51      * <P> In XML, UCS-4 characters can also be encoded by the use of
  52      * <em>character references</em> such as <b>&amp;#x12345678;</b>, which
  53      * happens to refer to a character that is disallowed in XML documents.
  54      * UCS-4 characters allowed in XML documents can be expressed with
  55      * one or two Unicode characters.
  56      *
  57      * @param ucs4char The 32-bit UCS-4 character being tested.
  58      */
  59     static public boolean isChar(int ucs4char) {
  60         // [2] Char ::= #x0009 | #x000A | #x000D
  61         //            | [#x0020-#xD7FF]
  62         //    ... surrogates excluded!
  63         //            | [#xE000-#xFFFD]
  64         //             | [#x10000-#x10ffff]
  65         return ((ucs4char >= 0x0020 && ucs4char <= 0xD7FF)
  66                 || ucs4char == 0x000A || ucs4char == 0x0009
  67                 || ucs4char == 0x000D
  68                 || (ucs4char >= 0xE000 && ucs4char <= 0xFFFD)
  69                 || (ucs4char >= 0x10000 && ucs4char <= 0x10ffff));
  70     }


 347         case Character.LETTER_NUMBER:            // Nl
 348             // ... and these are name characters 'other
 349             // than name start characters'
 350         case Character.COMBINING_SPACING_MARK:    // Mc
 351         case Character.ENCLOSING_MARK:        // Me
 352         case Character.NON_SPACING_MARK:        // Mn
 353         case Character.MODIFIER_LETTER:        // Lm
 354         case Character.DECIMAL_DIGIT_NUMBER:        // Nd
 355 
 356             // OK, here we just have some exceptions to check...
 357             return !isCompatibilityChar(c)
 358                     // per "5.14 of Unicode", rule out some combiners
 359                     && !(c >= 0x20dd && c <= 0x20e0);
 360 
 361         default:
 362             // added a character ...
 363             return c == 0x0387;
 364         }
 365     }
 366 
 367     private static boolean isDigit(char c) {
 368         // [88] Digit ::= ...
 369 
 370         //
 371         // java.lang.Character.isDigit is correct from the XML point
 372         // of view except that it allows "fullwidth" digits.
 373         //
 374         return Character.isDigit(c)
 375                 && !((c >= 0xff10) && (c <= 0xff19));
 376     }
 377 
 378     private static boolean isExtender(char c) {
 379         // [89] Extender ::= ...
 380         return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387
 381                 || c == 0x0640 || c == 0x0e46 || c == 0x0ec6
 382                 || c == 0x3005 || (c >= 0x3031 && c <= 0x3035)
 383                 || (c >= 0x309d && c <= 0x309e)
 384                 || (c >= 0x30fc && c <= 0x30fe)
 385                 ;
 386     }
 387 }
   1 /*
   2  * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  30  * Methods in this class are used to determine whether characters may
  31  * appear in certain roles in XML documents.  Such methods are used
  32  * both to parse and to create such documents.
  33  *
  34  * @author David Brownell
  35  * @version 1.1, 00/08/05
  36  */
  37 public class XmlChars {
  38     // can't construct instances
  39     private XmlChars() {
  40     }
  41 
  42     /**
  43      * Returns true if the argument, a UCS-4 character code, is valid in
  44      * XML documents.  Unicode characters fit into the low sixteen
  45      * bits of a UCS-4 character, and pairs of Unicode <em>surrogate
  46      * characters</em> can be combined to encode UCS-4 characters in
  47      * documents containing only Unicode.  (The <code>char</code> datatype
  48      * in the Java Programming Language represents Unicode characters,
  49      * including unpaired surrogates.)
  50      * <p>
  51      * <P> In XML, UCS-4 characters can also be encoded by the use of
  52      * <em>character references</em> such as <b>&amp;#x12345678;</b>, which
  53      * happens to refer to a character that is disallowed in XML documents.
  54      * UCS-4 characters allowed in XML documents can be expressed with
  55      * one or two Unicode characters.
  56      *
  57      * @param ucs4char The 32-bit UCS-4 character being tested.
  58      */
  59     static public boolean isChar(int ucs4char) {
  60         // [2] Char ::= #x0009 | #x000A | #x000D
  61         //            | [#x0020-#xD7FF]
  62         //    ... surrogates excluded!
  63         //            | [#xE000-#xFFFD]
  64         //             | [#x10000-#x10ffff]
  65         return ((ucs4char >= 0x0020 && ucs4char <= 0xD7FF)
  66                 || ucs4char == 0x000A || ucs4char == 0x0009
  67                 || ucs4char == 0x000D
  68                 || (ucs4char >= 0xE000 && ucs4char <= 0xFFFD)
  69                 || (ucs4char >= 0x10000 && ucs4char <= 0x10ffff));
  70     }


 347         case Character.LETTER_NUMBER:            // Nl
 348             // ... and these are name characters 'other
 349             // than name start characters'
 350         case Character.COMBINING_SPACING_MARK:    // Mc
 351         case Character.ENCLOSING_MARK:        // Me
 352         case Character.NON_SPACING_MARK:        // Mn
 353         case Character.MODIFIER_LETTER:        // Lm
 354         case Character.DECIMAL_DIGIT_NUMBER:        // Nd
 355 
 356             // OK, here we just have some exceptions to check...
 357             return !isCompatibilityChar(c)
 358                     // per "5.14 of Unicode", rule out some combiners
 359                     && !(c >= 0x20dd && c <= 0x20e0);
 360 
 361         default:
 362             // added a character ...
 363             return c == 0x0387;
 364         }
 365     }
 366 











 367     private static boolean isExtender(char c) {
 368         // [89] Extender ::= ...
 369         return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387
 370                 || c == 0x0640 || c == 0x0e46 || c == 0x0ec6
 371                 || c == 0x3005 || (c >= 0x3031 && c <= 0x3035)
 372                 || (c >= 0x309d && c <= 0x309e)
 373                 || (c >= 0x30fc && c <= 0x30fe)
 374                 ;
 375     }
 376 }
< prev index next >