1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl.xpath.regex;
  22 
  23 import java.util.HashMap;
  24 import java.util.Locale;
  25 import java.util.Map;
  26 
  27 /**
  28  * A regular expression parser for the XML Schema.
  29  *
  30  * @xerces.internal
  31  *
  32  * @author TAMURA Kent <kent@trl.ibm.co.jp>
  33  * @version $Id: ParserForXMLSchema.java,v 1.9 2010-11-12 18:09:45 joehw Exp $
  34  */
  35 class ParserForXMLSchema extends RegexParser {
  36 
  37     public ParserForXMLSchema() {
  38         //this.setLocale(Locale.getDefault());
  39     }
  40     public ParserForXMLSchema(Locale locale) {
  41         super(locale);
  42     }
  43 
  44     Token processCaret() throws ParseException {
  45         this.next();
  46         return Token.createChar('^');
  47     }
  48     Token processDollar() throws ParseException {
  49         this.next();
  50         return Token.createChar('$');
  51      }
  52     Token processLookahead() throws ParseException {
  53         throw ex("parser.process.1", this.offset);
  54     }
  55     Token processNegativelookahead() throws ParseException {
  56         throw ex("parser.process.1", this.offset);
  57     }
  58     Token processLookbehind() throws ParseException {
  59         throw ex("parser.process.1", this.offset);
  60     }
  61     Token processNegativelookbehind() throws ParseException {
  62         throw ex("parser.process.1", this.offset);
  63     }
  64     Token processBacksolidus_A() throws ParseException {
  65         throw ex("parser.process.1", this.offset);
  66     }
  67     Token processBacksolidus_Z() throws ParseException {
  68         throw ex("parser.process.1", this.offset);
  69     }
  70     Token processBacksolidus_z() throws ParseException {
  71         throw ex("parser.process.1", this.offset);
  72     }
  73     Token processBacksolidus_b() throws ParseException {
  74         throw ex("parser.process.1", this.offset);
  75     }
  76     Token processBacksolidus_B() throws ParseException {
  77         throw ex("parser.process.1", this.offset);
  78     }
  79     Token processBacksolidus_lt() throws ParseException {
  80         throw ex("parser.process.1", this.offset);
  81     }
  82     Token processBacksolidus_gt() throws ParseException {
  83         throw ex("parser.process.1", this.offset);
  84     }
  85     Token processStar(Token tok) throws ParseException {
  86         this.next();
  87         return Token.createClosure(tok);
  88     }
  89     Token processPlus(Token tok) throws ParseException {
  90         // X+ -> XX*
  91         this.next();
  92         return Token.createConcat(tok, Token.createClosure(tok));
  93     }
  94     Token processQuestion(Token tok) throws ParseException {
  95         // X? -> X|
  96         this.next();
  97         Token par = Token.createUnion();
  98         par.addChild(tok);
  99         par.addChild(Token.createEmpty());
 100         return par;
 101     }
 102     boolean checkQuestion(int off) {
 103         return false;
 104     }
 105     Token processParen() throws ParseException {
 106         this.next();
 107         Token tok = Token.createParen(this.parseRegex(), 0);
 108         if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);
 109         this.next();                            // Skips ')'
 110         return tok;
 111     }
 112     Token processParen2() throws ParseException {
 113         throw ex("parser.process.1", this.offset);
 114     }
 115     Token processCondition() throws ParseException {
 116         throw ex("parser.process.1", this.offset);
 117     }
 118     Token processModifiers() throws ParseException {
 119         throw ex("parser.process.1", this.offset);
 120     }
 121     Token processIndependent() throws ParseException {
 122         throw ex("parser.process.1", this.offset);
 123     }
 124     Token processBacksolidus_c() throws ParseException {
 125         this.next();
 126         return this.getTokenForShorthand('c');
 127     }
 128     Token processBacksolidus_C() throws ParseException {
 129         this.next();
 130         return this.getTokenForShorthand('C');
 131     }
 132     Token processBacksolidus_i() throws ParseException {
 133         this.next();
 134         return this.getTokenForShorthand('i');
 135     }
 136     Token processBacksolidus_I() throws ParseException {
 137         this.next();
 138         return this.getTokenForShorthand('I');
 139     }
 140     Token processBacksolidus_g() throws ParseException {
 141         throw this.ex("parser.process.1", this.offset-2);
 142     }
 143     Token processBacksolidus_X() throws ParseException {
 144         throw ex("parser.process.1", this.offset-2);
 145     }
 146     Token processBackreference() throws ParseException {
 147         throw ex("parser.process.1", this.offset-4);
 148     }
 149 
 150     int processCIinCharacterClass(RangeToken tok, int c) {
 151         tok.mergeRanges(this.getTokenForShorthand(c));
 152         return -1;
 153     }
 154 
 155 
 156     /**
 157      * Parses a character-class-expression, not a character-class-escape.
 158      *
 159      * c-c-expression   ::= '[' c-group ']'
 160      * c-group          ::= positive-c-group | negative-c-group | c-c-subtraction
 161      * positive-c-group ::= (c-range | c-c-escape)+
 162      * negative-c-group ::= '^' positive-c-group
 163      * c-c-subtraction  ::= (positive-c-group | negative-c-group) subtraction
 164      * subtraction      ::= '-' c-c-expression
 165      * c-range          ::= single-range | from-to-range
 166      * single-range     ::= multi-c-escape | category-c-escape | block-c-escape | <any XML char>
 167      * cc-normal-c      ::= <any character except [, ], \>
 168      * from-to-range    ::= cc-normal-c '-' cc-normal-c
 169      *
 170      * @param useNrage Ignored.
 171      * @return This returns no NrageToken.
 172      */
 173     protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
 174         this.setContext(S_INBRACKETS);
 175         this.next();                            // '['
 176         boolean nrange = false;
 177         boolean wasDecoded = false;                     // used to detect if the last - was escaped.
 178         RangeToken base = null;
 179         RangeToken tok;
 180         if (this.read() == T_CHAR && this.chardata == '^') {
 181             nrange = true;
 182             this.next();                        // '^'
 183             base = Token.createRange();
 184             base.addRange(0, Token.UTF16_MAX);
 185             tok = Token.createRange();
 186         } else {
 187             tok = Token.createRange();
 188         }
 189         int type;
 190         boolean firstloop = true;
 191         while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
 192 
 193                 wasDecoded = false;
 194             // single-range | from-to-range | subtraction
 195             if (type == T_CHAR && this.chardata == ']' && !firstloop) {
 196                 if (nrange) {
 197                     base.subtractRanges(tok);
 198                     tok = base;
 199                 }
 200                 break;
 201             }
 202             int c = this.chardata;
 203             boolean end = false;
 204             if (type == T_BACKSOLIDUS) {
 205                 switch (c) {
 206                   case 'd':  case 'D':
 207                   case 'w':  case 'W':
 208                   case 's':  case 'S':
 209                     tok.mergeRanges(this.getTokenForShorthand(c));
 210                     end = true;
 211                     break;
 212 
 213                   case 'i':  case 'I':
 214                   case 'c':  case 'C':
 215                     c = this.processCIinCharacterClass(tok, c);
 216                     if (c < 0)  end = true;
 217                     break;
 218 
 219                   case 'p':
 220                   case 'P':
 221                     int pstart = this.offset;
 222                     RangeToken tok2 = this.processBacksolidus_pP(c);
 223                     if (tok2 == null)  throw this.ex("parser.atom.5", pstart);
 224                     tok.mergeRanges(tok2);
 225                     end = true;
 226                     break;
 227 
 228                  case '-':
 229                         c = this.decodeEscaped();
 230                         wasDecoded = true;
 231                         break;
 232 
 233                   default:
 234                     c = this.decodeEscaped();
 235                 } // \ + c
 236             } // backsolidus
 237             else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
 238                                                 // Subraction
 239                 if (nrange) {
 240                     base.subtractRanges(tok);
 241                     tok = base;
 242                 }
 243                 RangeToken range2 = this.parseCharacterClass(false);
 244                 tok.subtractRanges(range2);
 245                 if (this.read() != T_CHAR || this.chardata != ']')
 246                     throw this.ex("parser.cc.5", this.offset);
 247                 break;                          // Exit this loop
 248             }
 249             this.next();
 250             if (!end) {                         // if not shorthands...
 251                 if (type == T_CHAR) {
 252                     if (c == '[')  throw this.ex("parser.cc.6", this.offset-2);
 253                     if (c == ']')  throw this.ex("parser.cc.7", this.offset-2);
 254                     if (c == '-' && this.chardata != ']' && !firstloop)  throw this.ex("parser.cc.8", this.offset-2);   // if regex = '[-]' then invalid
 255                 }
 256                 if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && firstloop) { // Here is no '-'.
 257                     if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
 258                         tok.addRange(c, c);
 259                     }
 260                     else {
 261                         addCaseInsensitiveChar(tok, c);
 262                     }
 263                 } else {                        // Found '-'
 264                                                 // Is this '-' is a from-to token??
 265                     this.next(); // Skips '-'
 266                     if ((type = this.read()) == T_EOF)  throw this.ex("parser.cc.2", this.offset);
 267                                                 // c '-' ']' -> '-' is a single-range.
 268                     if(type == T_CHAR && this.chardata == ']') {                                // if - is at the last position of the group
 269                         if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
 270                             tok.addRange(c, c);
 271                         }
 272                         else {
 273                             addCaseInsensitiveChar(tok, c);
 274                         }
 275                         tok.addRange('-', '-');
 276                     }
 277                     else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
 278                         throw this.ex("parser.cc.8", this.offset-1);
 279                     } else {
 280 
 281                         int rangeend = this.chardata;
 282                         if (type == T_CHAR) {
 283                             if (rangeend == '[')  throw this.ex("parser.cc.6", this.offset-1);
 284                             if (rangeend == ']')  throw this.ex("parser.cc.7", this.offset-1);
 285                             if (rangeend == '-')  throw this.ex("parser.cc.8", this.offset-2);
 286                         }
 287                         else if (type == T_BACKSOLIDUS)
 288                             rangeend = this.decodeEscaped();
 289                         this.next();
 290 
 291                         if (c > rangeend)  throw this.ex("parser.ope.3", this.offset-1);
 292                         if (!this.isSet(RegularExpression.IGNORE_CASE) ||
 293                                 (c > 0xffff && rangeend > 0xffff)) {
 294                             tok.addRange(c, rangeend);
 295                         }
 296                         else {
 297                             addCaseInsensitiveCharRange(tok, c, rangeend);
 298                         }
 299                     }
 300                 }
 301             }
 302             firstloop = false;
 303         }
 304         if (this.read() == T_EOF)
 305             throw this.ex("parser.cc.2", this.offset);
 306         tok.sortRanges();
 307         tok.compactRanges();
 308         //tok.dumpRanges();
 309         this.setContext(S_NORMAL);
 310         this.next();                    // Skips ']'
 311 
 312         return tok;
 313     }
 314 
 315     protected RangeToken parseSetOperations() throws ParseException {
 316         throw this.ex("parser.process.1", this.offset);
 317     }
 318 
 319     Token getTokenForShorthand(int ch) {
 320         switch (ch) {
 321           case 'd':
 322             return ParserForXMLSchema.getRange("xml:isDigit", true);
 323           case 'D':
 324             return ParserForXMLSchema.getRange("xml:isDigit", false);
 325           case 'w':
 326             return ParserForXMLSchema.getRange("xml:isWord", true);
 327           case 'W':
 328             return ParserForXMLSchema.getRange("xml:isWord", false);
 329           case 's':
 330             return ParserForXMLSchema.getRange("xml:isSpace", true);
 331           case 'S':
 332             return ParserForXMLSchema.getRange("xml:isSpace", false);
 333           case 'c':
 334             return ParserForXMLSchema.getRange("xml:isNameChar", true);
 335           case 'C':
 336             return ParserForXMLSchema.getRange("xml:isNameChar", false);
 337           case 'i':
 338             return ParserForXMLSchema.getRange("xml:isInitialNameChar", true);
 339           case 'I':
 340             return ParserForXMLSchema.getRange("xml:isInitialNameChar", false);
 341           default:
 342             throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
 343         }
 344     }
 345     int decodeEscaped() throws ParseException {
 346         if (this.read() != T_BACKSOLIDUS)  throw ex("parser.next.1", this.offset-1);
 347         int c = this.chardata;
 348         switch (c) {
 349           case 'n':  c = '\n';  break; // LINE FEED U+000A
 350           case 'r':  c = '\r';  break; // CRRIAGE RETURN U+000D
 351           case 't':  c = '\t';  break; // HORIZONTAL TABULATION U+0009
 352           case '\\':
 353           case '|':
 354           case '.':
 355           case '^':
 356           case '-':
 357           case '?':
 358           case '*':
 359           case '+':
 360           case '{':
 361           case '}':
 362           case '(':
 363           case ')':
 364           case '[':
 365           case ']':
 366             break; // return actucal char
 367           default:
 368             throw ex("parser.process.1", this.offset-2);
 369         }
 370         return c;
 371     }
 372 
 373     static private Map<String, Token> ranges = null;
 374     static private Map<String, Token> ranges2 = null;
 375     static synchronized protected RangeToken getRange(String name, boolean positive) {
 376         if (ranges == null) {
 377             ranges = new HashMap<>();
 378             ranges2 = new HashMap<>();
 379 
 380             Token tok = Token.createRange();
 381             setupRange(tok, SPACES);
 382             ranges.put("xml:isSpace", tok);
 383             ranges2.put("xml:isSpace", Token.complementRanges(tok));
 384 
 385             tok = Token.createRange();
 386             setupRange(tok, DIGITS);
 387             setupRange(tok, DIGITS_INT);
 388             ranges.put("xml:isDigit", tok);
 389             ranges2.put("xml:isDigit", Token.complementRanges(tok));
 390 
 391             tok = Token.createRange();
 392             setupRange(tok, LETTERS);
 393             setupRange(tok, LETTERS_INT);
 394             tok.mergeRanges(ranges.get("xml:isDigit"));
 395             ranges.put("xml:isWord", tok);
 396             ranges2.put("xml:isWord", Token.complementRanges(tok));
 397 
 398             tok = Token.createRange();
 399             setupRange(tok, NAMECHARS);
 400             ranges.put("xml:isNameChar", tok);
 401             ranges2.put("xml:isNameChar", Token.complementRanges(tok));
 402 
 403             tok = Token.createRange();
 404             setupRange(tok, LETTERS);
 405             tok.addRange('_', '_');
 406             tok.addRange(':', ':');
 407             ranges.put("xml:isInitialNameChar", tok);
 408             ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok));
 409         }
 410         RangeToken tok = positive ? (RangeToken)ranges.get(name)
 411             : (RangeToken)ranges2.get(name);
 412         return tok;
 413     }
 414 
 415     static void setupRange(Token range, String src) {
 416         int len = src.length();
 417         for (int i = 0;  i < len;  i += 2)
 418             range.addRange(src.charAt(i), src.charAt(i+1));
 419     }
 420 
 421     static void setupRange(Token range, int[] src) {
 422         int len = src.length;
 423         for (int i = 0;  i < len;  i += 2)
 424             range.addRange(src[i], src[i+1]);
 425     }
 426 
 427     private static final String SPACES = "\t\n\r\r  ";
 428     private static final String NAMECHARS =
 429         "\u002d\u002e\u0030\u003a\u0041\u005a\u005f\u005f\u0061\u007a\u00b7\u00b7\u00c0\u00d6"
 430         +"\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148\u014a\u017e\u0180\u01c3\u01cd\u01f0"
 431         +"\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1\u02d0\u02d1\u0300\u0345\u0360\u0361"
 432         +"\u0386\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da\u03dc\u03dc"
 433         +"\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c\u045e\u0481"
 434         +"\u0483\u0486\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
 435         +"\u0531\u0556\u0559\u0559\u0561\u0586\u0591\u05a1\u05a3\u05b9\u05bb\u05bd\u05bf\u05bf"
 436         +"\u05c1\u05c2\u05c4\u05c4\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0640\u0652\u0660\u0669"
 437         +"\u0670\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06e8\u06ea\u06ed\u06f0\u06f9"
 438         +"\u0901\u0903\u0905\u0939\u093c\u094d\u0951\u0954\u0958\u0963\u0966\u096f\u0981\u0983"
 439         +"\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2\u09b6\u09b9\u09bc\u09bc"
 440         +"\u09be\u09c4\u09c7\u09c8\u09cb\u09cd\u09d7\u09d7\u09dc\u09dd\u09df\u09e3\u09e6\u09f1"
 441         +"\u0a02\u0a02\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36"
 442         +"\u0a38\u0a39\u0a3c\u0a3c\u0a3e\u0a42\u0a47\u0a48\u0a4b\u0a4d\u0a59\u0a5c\u0a5e\u0a5e"
 443         +"\u0a66\u0a74\u0a81\u0a83\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0"
 444         +"\u0ab2\u0ab3\u0ab5\u0ab9\u0abc\u0ac5\u0ac7\u0ac9\u0acb\u0acd\u0ae0\u0ae0\u0ae6\u0aef"
 445         +"\u0b01\u0b03\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33\u0b36\u0b39"
 446         +"\u0b3c\u0b43\u0b47\u0b48\u0b4b\u0b4d\u0b56\u0b57\u0b5c\u0b5d\u0b5f\u0b61\u0b66\u0b6f"
 447         +"\u0b82\u0b83\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f"
 448         +"\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9\u0bbe\u0bc2\u0bc6\u0bc8\u0bca\u0bcd"
 449         +"\u0bd7\u0bd7\u0be7\u0bef\u0c01\u0c03\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33"
 450         +"\u0c35\u0c39\u0c3e\u0c44\u0c46\u0c48\u0c4a\u0c4d\u0c55\u0c56\u0c60\u0c61\u0c66\u0c6f"
 451         +"\u0c82\u0c83\u0c85\u0c8c\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cbe\u0cc4"
 452         +"\u0cc6\u0cc8\u0cca\u0ccd\u0cd5\u0cd6\u0cde\u0cde\u0ce0\u0ce1\u0ce6\u0cef\u0d02\u0d03"
 453         +"\u0d05\u0d0c\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d3e\u0d43\u0d46\u0d48\u0d4a\u0d4d"
 454         +"\u0d57\u0d57\u0d60\u0d61\u0d66\u0d6f\u0e01\u0e2e\u0e30\u0e3a\u0e40\u0e4e\u0e50\u0e59"
 455         +"\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97\u0e99\u0e9f"
 456         +"\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb9\u0ebb\u0ebd"
 457         +"\u0ec0\u0ec4\u0ec6\u0ec6\u0ec8\u0ecd\u0ed0\u0ed9\u0f18\u0f19\u0f20\u0f29\u0f35\u0f35"
 458         +"\u0f37\u0f37\u0f39\u0f39\u0f3e\u0f47\u0f49\u0f69\u0f71\u0f84\u0f86\u0f8b\u0f90\u0f95"
 459         +"\u0f97\u0f97\u0f99\u0fad\u0fb1\u0fb7\u0fb9\u0fb9\u10a0\u10c5\u10d0\u10f6\u1100\u1100"
 460         +"\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c\u113e\u113e"
 461         +"\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159\u115f\u1161"
 462         +"\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173\u1175\u1175"
 463         +"\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba\u11bc\u11c2"
 464         +"\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15\u1f18\u1f1d"
 465         +"\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d\u1f5f\u1f7d"
 466         +"\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3\u1fd6\u1fdb"
 467         +"\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u20d0\u20dc\u20e1\u20e1\u2126\u2126\u212a\u212b"
 468         +"\u212e\u212e\u2180\u2182\u3005\u3005\u3007\u3007\u3021\u302f\u3031\u3035\u3041\u3094"
 469         +"\u3099\u309a\u309d\u309e\u30a1\u30fa\u30fc\u30fe\u3105\u312c\u4e00\u9fa5\uac00\ud7a3"
 470         +"";
 471     private static final String LETTERS =
 472         "\u0041\u005a\u0061\u007a\u00c0\u00d6\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148"
 473         +"\u014a\u017e\u0180\u01f0\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1"
 474         +"\u02b0\u02d1"
 475         +"\u0386\u0386\u0388\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da"
 476         +"\u03dc\u03dc\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c"
 477         +"\u045e\u0481\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
 478         +"\u0531\u0556\u0559\u0559\u0561\u0586\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0641\u064a"
 479         +"\u0671\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06d5\u06e5\u06e6\u0905\u0939"
 480         +"\u093d\u093d\u0958\u0961\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2"
 481         +"\u09b6\u09b9\u09dc\u09dd\u09df\u09e1\u09f0\u09f1\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28"
 482         +"\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59\u0a5c\u0a5e\u0a5e\u0a72\u0a74"
 483         +"\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0\u0ab2\u0ab3\u0ab5\u0ab9"
 484         +"\u0abd\u0abd\u0ae0\u0ae0\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33"
 485         +"\u0b36\u0b39\u0b3d\u0b3d\u0b5c\u0b5d\u0b5f\u0b61\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95"
 486         +"\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9"
 487         +"\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33\u0c35\u0c39\u0c60\u0c61\u0c85\u0c8c"
 488         +"\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cde\u0cde\u0ce0\u0ce1\u0d05\u0d0c"
 489         +"\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d60\u0d61\u0e01\u0e2e\u0e30\u0e30\u0e32\u0e33"
 490         +"\u0e40\u0e45\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97"
 491         +"\u0e99\u0e9f\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb0"
 492         +"\u0eb2\u0eb3\u0ebd\u0ebd\u0ec0\u0ec4\u0f40\u0f47\u0f49\u0f69\u10a0\u10c5\u10d0\u10f6"
 493         +"\u1100\u1100\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c"
 494         +"\u113e\u113e\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159"
 495         +"\u115f\u1161\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173"
 496         +"\u1175\u1175\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba"
 497         +"\u11bc\u11c2\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15"
 498         +"\u1f18\u1f1d\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d"
 499         +"\u1f5f\u1f7d\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3"
 500         +"\u1fd6\u1fdb\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u2126\u2126\u212a\u212b\u212e\u212e"
 501         +"\u2180\u2182\u3007\u3007\u3021\u3029\u3041\u3094\u30a1\u30fa\u3105\u312c\u4e00\u9fa5"
 502         +"\uac00\ud7a3\uff66\uff9f";
 503 
 504     private static final int[] LETTERS_INT = {0x1d790, 0x1d7a8, 0x1d7aa, 0x1d7c9, 0x2fa1b, 0x2fa1d};
 505 
 506     private static final String DIGITS =
 507         "\u0030\u0039\u0660\u0669\u06F0\u06F9\u0966\u096F\u09E6\u09EF\u0A66\u0A6F\u0AE6\u0AEF"
 508         +"\u0B66\u0B6F\u0BE7\u0BEF\u0C66\u0C6F\u0CE6\u0CEF\u0D66\u0D6F\u0E50\u0E59\u0ED0\u0ED9"
 509         +"\u0F20\u0F29\u1040\u1049\u1369\u1371\u17E0\u17E9\u1810\u1819\uFF10\uFF19";
 510 
 511     private static final int[] DIGITS_INT = {0x1D7CE, 0x1D7FF};
 512 
 513 }