New src/java.base/share/tools/org/openjdk/buildtools/generatebreakiteratordata/CharSet.java

   1 /*
   2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  28  * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
  29  *
  30  * The original version of this source code and documentation
  31  * is copyrighted and owned by Taligent, Inc., a wholly-owned
  32  * subsidiary of IBM. These materials are provided under terms
  33  * of a License Agreement between Taligent and Sun. This technology
  34  * is protected by multiple US and International patents.
  35  *
  36  * This notice and attribution to Taligent may not be removed.
  37  * Taligent is a registered trademark of Taligent, Inc.
  38  */
  39 
  40 package org.openjdk.buildtools.generatebreakiteratordata;
  41 
  42 import java.util.Arrays;
  43 import java.util.Hashtable;
  44 
  45 /**
  46  * An object representing a set of characters.  (This is a "set" in the
  47  * mathematical sense: an unduplicated list of characters on which set
  48  * operations such as union and intersection can be performed.)  The
  49  * set information is stored in compressed, optimized form: The object
  50  * contains an integer array with an even number of characters.  Each
  51  * pair of characters represents a range of characters contained in the set
  52  * (a pair of the same character represents a single character).  The
  53  * characters are sorted in increasing order.
  54  */
  55 class CharSet {
  56     /**
  57      * The structure containing the set information.  The characters
  58      * in this array are organized into pairs, each pair representing
  59      * a range of characters contained in the set
  60      */
  61     private int[] chars;
  62 
  63     //==========================================================================
  64     // parseString() and associated routines
  65     //==========================================================================
  66     /**
  67      * A cache which is used to speed up parseString() whenever it is
  68      * used to parse a description that has been parsed before
  69      */
  70     private static Hashtable<String, CharSet> expressionCache = null;
  71 
  72     /**
  73      * Builds a CharSet based on a textual description.  For the syntax of
  74      * the description, see the documentation of RuleBasedBreakIterator.
  75      * @see java.text.RuleBasedBreakIterator
  76      */
  77     public static CharSet parseString(String s) {
  78         CharSet result = null;
  79 
  80         // if "s" is in the expression cache, pull the result out
  81         // of the expresison cache
  82         if (expressionCache != null) {
  83             result = expressionCache.get(s);
  84         }
  85 
  86         // otherwise, use doParseString() to actually parse the string,
  87         // and then add a corresponding entry to the expression cache
  88         if (result == null) {
  89             result = doParseString(s);
  90             if (expressionCache == null) {
  91                 expressionCache = new Hashtable<>();
  92             }
  93             expressionCache.put(s, result);
  94         }
  95         result = (CharSet)(result.clone());
  96         return result;
  97     }
  98 
  99     /**
 100      * This function is used by parseString() to actually parse the string
 101      */
 102     private static CharSet doParseString(String s) {
 103         CharSet result = new CharSet();
 104         int p = 0;
 105 
 106         boolean haveDash = false;
 107         boolean haveTilde = false;
 108         boolean wIsReal = false;
 109         int w = 0x0000;
 110 
 111         // for each character in the description...
 112         while (p < s.length()) {
 113             int c = s.codePointAt(p);
 114 
 115             // if it's an opening bracket...
 116             if (c == '[') {
 117                 // flush the single-character cache
 118                 if (wIsReal) {
 119                     result.internalUnion(new CharSet(w));
 120                 }
 121 
 122                 // locate the matching closing bracket
 123                 int bracketLevel = 1;
 124                 int q = p + 1;
 125                 while (bracketLevel != 0) {
 126                     // if no matching bracket by end of string then...
 127                     if (q >= s.length()) {
 128                         throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
 129                     }
 130                     int ch = s.codePointAt(q);
 131                     switch (ch) {
 132                     case '\\': // need to step over next character
 133                         ch = s.codePointAt(++q);
 134                         break;
 135                     case '[':
 136                         ++bracketLevel;
 137                         break;
 138                     case ']':
 139                         --bracketLevel;
 140                         break;
 141                     }
 142                     q += Character.charCount(ch);
 143                 }
 144                 --q;
 145 
 146                 // call parseString() recursively to parse the text inside
 147                 // the brackets, then either add or subtract the result from
 148                 // our running result depending on whether or not the []
 149                 // expresison was preceded by a ^
 150                 if (!haveTilde) {
 151                     result.internalUnion(CharSet.parseString(s.substring(p + 1, q)));
 152                 }
 153                 else {
 154                     result.internalDifference(CharSet.parseString(s.substring(p + 1, q)));
 155                 }
 156                 haveTilde = false;
 157                 haveDash = false;
 158                 wIsReal = false;
 159                 p = q + 1;
 160             }
 161 
 162             // if the character is a colon...
 163             else if (c == ':') {
 164                 // flush the single-character cache
 165                 if (wIsReal) {
 166                     result.internalUnion(new CharSet(w));
 167                 }
 168 
 169                 // locate the matching colon (and throw an error if there
 170                 // isn't one)
 171                 int q = s.indexOf(':', p + 1);
 172                 if (q == -1) {
 173                     throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
 174                 }
 175 
 176                 // use charSetForCategory() to parse the text in the colons,
 177                 // and either add or substract the result from our running
 178                 // result depending on whether the :: expression was
 179                 // preceded by a ^
 180                 if (!haveTilde) {
 181                     result.internalUnion(charSetForCategory(s.substring(p + 1, q)));
 182                 }
 183                 else {
 184                     result.internalDifference(charSetForCategory(s.substring(p + 1, q)));
 185                 }
 186 
 187                 // reset everything and advance to the next character
 188                 haveTilde = false;
 189                 haveDash = false;
 190                 wIsReal = false;
 191                 p = q + 1;
 192             }
 193 
 194             // if the character is a dash, set an appropriate flag
 195             else if (c == '-') {
 196                 if (wIsReal) {
 197                     haveDash = true;
 198                 }
 199                 ++p;
 200             }
 201 
 202             // if the character is a caret, flush the single-character
 203             // cache and set an appropriate flag.  If the set is empty
 204             // (i.e., if the expression begins with ^), invert the set
 205             // (i.e., set it to include everything).  The idea here is
 206             // that a set that includes nothing but ^ expressions
 207             // means "everything but these things".
 208             else if (c == '^') {
 209                 if (wIsReal) {
 210                     result.internalUnion(new CharSet(w));
 211                     wIsReal = false;
 212                 }
 213                 haveTilde = true;
 214                 ++p;
 215                 if (result.empty()) {
 216                     result.internalComplement();
 217                 }
 218             }
 219 
 220             // throw an exception on an illegal character
 221             else if (c >= ' ' && c < '\u007f' && !Character.isLetter((char)c)
 222                      && !Character.isDigit((char)c) && c != '\\') {
 223                 throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
 224             }
 225 
 226             // otherwise, we end up here...
 227             else {
 228                 // on a backslash, advance to the next character
 229                 if (c == '\\') {
 230                     ++p;
 231                 }
 232 
 233                 // if the preceding character was a dash, this character
 234                 // defines the end of a range.  Add or subtract that range
 235                 // from the running result depending on whether or not it
 236                 // was preceded by a ^
 237                 if (haveDash) {
 238                     if (s.codePointAt(p) < w) {
 239                         throw new IllegalArgumentException("U+" +
 240                             Integer.toHexString(s.codePointAt(p))
 241                             + " is less than U+" + Integer.toHexString(w) + ".  Dash expressions "
 242                             + "can't have their endpoints in reverse order.");
 243                     }
 244 
 245                     int ch = s.codePointAt(p);
 246                     if (!haveTilde) {
 247                         result.internalUnion(new CharSet(w, ch));
 248                     }
 249                     else {
 250                         result.internalDifference(new CharSet(w, ch));
 251                     }
 252                     p += Character.charCount(ch);
 253                     haveDash = false;
 254                     haveTilde = false;
 255                     wIsReal = false;
 256                 }
 257 
 258                 // if the preceding character was a caret, remove this character
 259                 // from the running result
 260                 else if (haveTilde) {
 261                     w = s.codePointAt(p);
 262                     result.internalDifference(new CharSet(w));
 263                     p += Character.charCount(w);
 264                     haveTilde = false;
 265                     wIsReal = false;
 266                 }
 267 
 268                 // otherwise, flush the single-character cache and then
 269                 // put this character into the cache
 270                 else if (wIsReal) {
 271                     result.internalUnion(new CharSet(w));
 272                     w = s.codePointAt(p);
 273                     p += Character.charCount(w);
 274                     wIsReal = true;
 275                 } else {
 276                     w = s.codePointAt(p);
 277                     p += Character.charCount(w);
 278                     wIsReal = true;
 279                 }
 280             }
 281         }
 282 
 283         // finally, flush the single-character cache one last time
 284         if (wIsReal) {
 285             result.internalUnion(new CharSet(w));
 286         }
 287 
 288         return result;
 289     }
 290 
 291     /**
 292      * Creates a CharSet containing all the characters in a particular
 293      * Unicode category.  The text is either a two-character code from
 294      * the Unicode database or a single character that begins one or more
 295      * two-character codes.
 296      */
 297     private static CharSet charSetForCategory(String category) {
 298         // throw an exception if we have anything other than one or two
 299         // characters inside the colons
 300         if (category.length() == 0 || category.length() >= 3) {
 301             throw new IllegalArgumentException("Invalid character category: " + category);
 302         }
 303 
 304         // if we have two characters, search the category map for that code
 305         // and either construct and return a CharSet from the data in the
 306         // category map or throw an exception
 307         if (category.length() == 2) {
 308             for (int i = 0; i < CharacterCategory.categoryNames.length; i++) {
 309                 if (CharacterCategory.categoryNames[i].equals(category)) {
 310                     return new CharSet(CharacterCategory.getCategoryMap(i));
 311                 }
 312             }
 313             throw new IllegalArgumentException("Invalid character category: " + category);
 314         }
 315 
 316         // if we have one character, search the category map for codes beginning
 317         // with that letter, and union together all of the matching sets that
 318         // we find (or throw an exception if there are no matches)
 319         else if (category.length() == 1) {
 320             CharSet result = new CharSet();
 321             for (int i = 0; i < CharacterCategory.categoryNames.length; i++) {
 322                 if (CharacterCategory.categoryNames[i].startsWith(category)) {
 323                     result = result.union(new CharSet(CharacterCategory.getCategoryMap(i)));
 324                 }
 325             }
 326             if (result.empty()) {
 327                 throw new IllegalArgumentException("Invalid character category: " + category);
 328             }
 329             else {
 330                 return result;
 331             }
 332         }
 333         return new CharSet(); // should never get here, but to make the compiler happy...
 334     }
 335 
 336     /**
 337      * Returns a copy of CharSet's expression cache and sets CharSet's
 338      * expression cache to empty.
 339      */
 340     public static Hashtable<String, CharSet> releaseExpressionCache() {
 341         Hashtable<String, CharSet> result = expressionCache;
 342         expressionCache = null;
 343         return result;
 344     }
 345 
 346     //==========================================================================
 347     // CharSet manipulation
 348     //==========================================================================
 349     /**
 350      * Creates an empty CharSet.
 351      */
 352     public CharSet() {
 353         chars = new int[0];
 354     }
 355 
 356     /**
 357      * Creates a CharSet containing a single character.
 358      * @param c The character to put into the CharSet
 359      */
 360     public CharSet(int c) {
 361         chars = new int[2];
 362         chars[0] = c;
 363         chars[1] = c;
 364     }
 365 
 366     /**
 367      * Creates a CharSet containing a range of characters.
 368      * @param lo The lowest-numbered character to include in the range
 369      * @param hi The highest-numbered character to include in the range
 370      */
 371     public CharSet(int lo, int hi) {
 372         chars = new int[2];
 373         if (lo <= hi) {
 374             chars[0] = lo;
 375             chars[1] = hi;
 376         }
 377         else {
 378             chars[0] = hi;
 379             chars[1] = lo;
 380         }
 381     }
 382 
 383     /**
 384      * Creates a CharSet, initializing it from the internal storage
 385      * of another CharSet (this function performs no error checking
 386      * on "chars", so if it's malformed, undefined behavior will result)
 387      */
 388     private CharSet(int[] chars) {
 389         this.chars = chars;
 390     }
 391 
 392     /**
 393      * Returns a CharSet representing the union of two CharSets.
 394      */
 395     public CharSet union(CharSet that) {
 396         return new CharSet(doUnion(that.chars));
 397     }
 398 
 399     /**
 400      * Adds the characters in "that" to this CharSet
 401      */
 402     private void internalUnion(CharSet that) {
 403         chars = doUnion(that.chars);
 404     }
 405 
 406     /**
 407      * The actual implementation of the union functions
 408      */
 409     private int[] doUnion(int[] c2) {
 410         int[] result = new int[chars.length+c2.length];
 411 
 412         int i = 0;
 413         int j = 0;
 414         int index = 0;
 415 
 416         // consider all the characters in both strings
 417         while (i < chars.length && j < c2.length) {
 418             int ub;
 419 
 420             // the first character in the result is the lower of the
 421             // starting characters of the two strings, and "ub" gets
 422             // set to the upper bound of that range
 423             if (chars[i] < c2[j]) {
 424                 result[index++] = chars[i];
 425                 ub = chars[++i];
 426             }
 427             else {
 428                 result[index++] = c2[j];
 429                 ub = c2[++j];
 430             }
 431 
 432             // for as long as one of our two pointers is pointing to a range's
 433             // end point, or i is pointing to a character that is less than
 434             // "ub" plus one (the "plus one" stitches touching ranges together)...
 435             while (i % 2 == 1 ||
 436                    j % 2 == 1 ||
 437                    (i < chars.length && chars[i] <= ub + 1)) {
 438 
 439                 // advance i to the first character that is greater than
 440                 // "ub" plus one
 441                 while (i < chars.length && chars[i] <= ub + 1) {
 442                     ++i;
 443                 }
 444 
 445                 // if i points to the endpoint of a range, update "ub"
 446                 // to that character, or if i points to the start of
 447                 // a range and the endpoint of the preceding range is
 448                 // greater than "ub", update "up" to _that_ character
 449                 if (i % 2 == 1) {
 450                     ub = chars[i];
 451                 }
 452                 else if (i > 0 && chars[i - 1] > ub) {
 453                     ub = chars[i - 1];
 454                 }
 455 
 456                 // now advance j to the first character that is greater
 457                 // that "ub" plus one
 458                 while (j < c2.length && c2[j] <= ub + 1) {
 459                     ++j;
 460                 }
 461 
 462                 // if j points to the endpoint of a range, update "ub"
 463                 // to that character, or if j points to the start of
 464                 // a range and the endpoint of the preceding range is
 465                 // greater than "ub", update "up" to _that_ character
 466                 if (j % 2 == 1) {
 467                     ub = c2[j];
 468                 }
 469                 else if (j > 0 && c2[j - 1] > ub) {
 470                     ub = c2[j - 1];
 471                 }
 472             }
 473             // when we finally fall out of this loop, we will have stitched
 474             // together a series of ranges that overlap or touch, i and j
 475             // will both point to starting points of ranges, and "ub" will
 476             // be the endpoint of the range we're working on.  Write "ub"
 477             // to the result
 478             result[index++] = ub;
 479 
 480         // loop back around to create the next range in the result
 481         }
 482 
 483         // we fall out to here when we've exhausted all the characters in
 484         // one of the operands.  We can append all of the remaining characters
 485         // in the other operand without doing any extra work.
 486         if (i < chars.length) {
 487             for (int k = i; k < chars.length; k++) {
 488                 result[index++] = chars[k];
 489             }
 490         }
 491         if (j < c2.length) {
 492             for (int k = j; k < c2.length; k++) {
 493                 result[index++] = c2[k];
 494             }
 495         }
 496 
 497         if (result.length > index) {
 498             int[] tmpbuf = new int[index];
 499             System.arraycopy(result, 0, tmpbuf, 0, index);
 500             return tmpbuf;
 501         }
 502 
 503         return result;
 504     }
 505 
 506     /**
 507      * Returns the intersection of two CharSets.
 508      */
 509     public CharSet intersection(CharSet that) {
 510         return new CharSet(doIntersection(that.chars));
 511     }
 512 
 513     /**
 514      * Removes from this CharSet any characters that aren't also in "that"
 515      */
 516     private void internalIntersection(CharSet that) {
 517         chars = doIntersection(that.chars);
 518     }
 519 
 520     /**
 521      * The internal implementation of the two intersection functions
 522      */
 523     private int[] doIntersection(int[] c2) {
 524         int[] result = new int[chars.length+c2.length];
 525 
 526         int i = 0;
 527         int j = 0;
 528         int oldI;
 529         int oldJ;
 530         int index = 0;
 531 
 532         // iterate until we've exhausted one of the operands
 533         while (i < chars.length && j < c2.length) {
 534 
 535             // advance j until it points to a character that is larger than
 536             // the one i points to.  If this is the beginning of a one-
 537             // character range, advance j to point to the end
 538             if (i < chars.length && i % 2 == 0) {
 539                 while (j < c2.length && c2[j] < chars[i]) {
 540                     ++j;
 541                 }
 542                 if (j < c2.length && j % 2 == 0 && c2[j] == chars[i]) {
 543                     ++j;
 544                 }
 545             }
 546 
 547             // if j points to the endpoint of a range, save the current
 548             // value of i, then advance i until it reaches a character
 549             // which is larger than the character pointed at
 550             // by j.  All of the characters we've advanced over (except
 551             // the one currently pointed to by i) are added to the result
 552             oldI = i;
 553             while (j % 2 == 1 && i < chars.length && chars[i] <= c2[j]) {
 554                 ++i;
 555             }
 556             for (int k = oldI; k < i; k++) {
 557                 result[index++] = chars[k];
 558             }
 559 
 560             // if i points to the endpoint of a range, save the current
 561             // value of j, then advance j until it reaches a character
 562             // which is larger than the character pointed at
 563             // by i.  All of the characters we've advanced over (except
 564             // the one currently pointed to by i) are added to the result
 565             oldJ = j;
 566             while (i % 2 == 1 && j < c2.length && c2[j] <= chars[i]) {
 567                 ++j;
 568             }
 569             for (int k = oldJ; k < j; k++) {
 570                 result[index++] = c2[k];
 571             }
 572 
 573             // advance i until it points to a character larger than j
 574             // If it points at the beginning of a one-character range,
 575             // advance it to the end of that range
 576             if (j < c2.length && j % 2 == 0) {
 577                 while (i < chars.length && chars[i] < c2[j]) {
 578                     ++i;
 579                 }
 580                 if (i < chars.length && i % 2 == 0 && c2[j] == chars[i]) {
 581                     ++i;
 582                 }
 583             }
 584         }
 585 
 586         if (result.length > index) {
 587             int[] tmpbuf = new int[index];
 588             System.arraycopy(result, 0, tmpbuf, 0, index);
 589             return tmpbuf;
 590         }
 591 
 592         return result;
 593     }
 594 
 595     /**
 596      * Returns a CharSet containing all the characters in "this" that
 597      * aren't also in "that"
 598      */
 599     public CharSet difference(CharSet that) {
 600         return new CharSet(doIntersection(that.doComplement()));
 601     }
 602 
 603     /**
 604      * Removes from "this" all the characters that are also in "that"
 605      */
 606     private void internalDifference(CharSet that) {
 607         chars = doIntersection(that.doComplement());
 608     }
 609 
 610     /**
 611      * Returns a CharSet containing all the characters which are not
 612      * in "this"
 613      */
 614     public CharSet complement() {
 615         return new CharSet(doComplement());
 616     }
 617 
 618     /**
 619      * Complements "this".  All the characters it contains are removed,
 620      * and all the characters it doesn't contain are added.
 621      */
 622     private void internalComplement() {
 623         chars = doComplement();
 624     }
 625 
 626     /**
 627      * The internal implementation function for the complement routines
 628      */
 629     private int[] doComplement() {
 630         // the complement of an empty CharSet is one containing everything
 631         if (empty()) {
 632             int[] result = new int[2];
 633             result[0] = 0x0000;
 634             result[1] = 0x10FFFF;
 635             return result;
 636         }
 637 
 638         int[] result = new int[chars.length+2];
 639 
 640         int i = 0;
 641         int index = 0;
 642 
 643         // the result begins with \u0000 unless the original CharSet does
 644         if (chars[0] != 0x0000) {
 645             result[index++] = 0x0000;
 646         }
 647 
 648         // walk through the characters in this CharSet.  Append a pair of
 649         // characters the first of which is one less than the first
 650         // character we see and the second of which is one plus the second
 651         // character we see (don't write the first character if it's \u0000,
 652         // and don't write the second character if it's \uffff.
 653         while (i < chars.length) {
 654             if (chars[i] != 0x0000) {
 655                 result[index++] = chars[i] - 1;
 656             }
 657             if (chars[i + 1] != 0x10FFFF) {
 658                 result[index++] = chars[i + 1] + 1;
 659             }
 660             i += 2;
 661         }
 662 
 663         // add 0x10ffff to the end of the result, unless it was in
 664         // the original set
 665         if (chars[i-1] != 0x10FFFF) {
 666             result[index++] = 0x10FFFF;
 667         }
 668 
 669         if (result.length > index) {
 670             int[] tmpbuf = new int[index];
 671             System.arraycopy(result, 0, tmpbuf, 0, index);
 672             return tmpbuf;
 673         }
 674 
 675         return result;
 676     }
 677 
 678     /**
 679      * Returns true if this CharSet contains the specified character
 680      * @param c The character we're testing for set membership
 681      */
 682     public boolean contains(int c) {
 683         // search for the first range endpoint that is greater than or
 684         // equal to c
 685         int i = 1;
 686         while (i < chars.length && chars[i] < c) {
 687             i += 2;
 688         }
 689 
 690         // if we've walked off the end, we don't contain c
 691         if (i == chars.length) {
 692             return false;
 693         }
 694 
 695         // otherwise, we contain c if the beginning of the range is less
 696         // than or equal to c
 697         return chars[i - 1] <= c;
 698     }
 699 
 700     /**
 701      * Returns true if "that" is another instance of CharSet containing
 702      * the exact same characters as this one
 703      */
 704     public boolean equals(Object that) {
 705         return (that instanceof CharSet) && Arrays.equals(chars, ((CharSet)that).chars);
 706     }
 707 
 708     /**
 709      * Returns the hash code for this set of characters
 710      */
 711     public int hashCode() {
 712        return Arrays.hashCode(chars);
 713     }
 714 
 715     /**
 716      * Creates a new CharSet that is equal to this one
 717      */
 718     public Object clone() {
 719         return new CharSet(chars);
 720     }
 721 
 722     /**
 723      * Returns true if this CharSet contains no characters
 724      */
 725     public boolean empty() {
 726         return chars.length == 0;
 727     }
 728 
 729     /**
 730      * Returns a textual representation of this CharSet.  If the result
 731      * of calling this function is passed to CharSet.parseString(), it
 732      * will produce another CharSet that is equal to this one.
 733      */
 734     public String toString() {
 735         StringBuffer result = new StringBuffer();
 736 
 737         // the result begins with an opening bracket
 738         result.append('[');
 739 
 740         // iterate through the ranges in the CharSet
 741         for (int i = 0; i < chars.length; i += 2) {
 742             // for a range with the same beginning and ending point,
 743             // output that character
 744             if (chars[i] == chars[i + 1]) {
 745                 result.append("0x");
 746                 result.append(Integer.toHexString(chars[i]));
 747             }
 748 
 749             // otherwise, output the start and end points of the range
 750             // separated by a dash
 751             else {
 752                 result.append("0x");
 753                 result.append(Integer.toHexString(chars[i]));
 754                 result.append("-0x");
 755                 result.append(Integer.toHexString(chars[i + 1]));
 756             }
 757         }
 758 
 759         // the result ends with a closing bracket
 760         result.append(']');
 761         return result.toString();
 762     }
 763 
 764     /**
 765      * Returns an integer array representing the contents of this CharSet
 766      * in the same form in which they're stored internally: as pairs
 767      * of characters representing the start and end points of ranges
 768      */
 769     public int[] getRanges() {
 770         return chars;
 771     }
 772 
 773     /**
 774      * Returns an Enumeration that will return the ranges of characters
 775      * contained in this CharSet one at a time
 776      */
 777     public Enumeration getChars() {
 778         return new Enumeration(this);
 779     }
 780 
 781     //==========================================================================
 782     // CharSet.Enumeration
 783     //==========================================================================
 784 
 785     /**
 786      * An Enumeration that can be used to extract the character ranges
 787      * from a CharSet one at a time
 788      */
 789     public class Enumeration implements java.util.Enumeration<int[]> {
 790         /**
 791          * Initializes a CharSet.Enumeration
 792          */
 793         Enumeration(CharSet cs) {
 794             this.chars = cs.chars;
 795             p = 0;
 796         }
 797 
 798         /**
 799          * Returns true if the enumeration hasn't yet returned
 800          * all the ranges in the CharSet
 801          */
 802         public boolean hasMoreElements() {
 803             return p < chars.length;
 804         }
 805 
 806         /**
 807          * Returns the next range in the CarSet
 808          */
 809         public int[] nextElement() {
 810             int[] result = new int[2];
 811             result[0] = chars[p++];
 812             result[1] = chars[p++];
 813             return result;
 814         }
 815 
 816         int p;
 817         int[] chars;
 818     }
 819 }