1 /*
   2  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  28  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  29  *
  30  *   The original version of this source code and documentation is copyrighted
  31  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  32  * materials are provided under terms of a License Agreement between Taligent
  33  * and Sun. This technology is protected by multiple US and International
  34  * patents. This notice and attribution to Taligent may not be removed.
  35  *   Taligent is a registered trademark of Taligent, Inc.
  36  *
  37  */
  38 
  39 package java.text;
  40 
  41 import java.lang.Character;
  42 import java.util.Vector;
  43 import sun.text.CollatorUtilities;
  44 import sun.text.normalizer.NormalizerBase;
  45 
  46 /**
  47  * The <code>CollationElementIterator</code> class is used as an iterator
  48  * to walk through each character of an international string. Use the iterator
  49  * to return the ordering priority of the positioned character. The ordering
  50  * priority of a character, which we refer to as a key, defines how a character
  51  * is collated in the given collation object.
  52  *
  53  * <p>
  54  * For example, consider the following in Spanish:
  55  * <blockquote>
  56  * <pre>
  57  * "ca" -> the first key is key('c') and second key is key('a').
  58  * "cha" -> the first key is key('ch') and second key is key('a').
  59  * </pre>
  60  * </blockquote>
  61  * And in German,
  62  * <blockquote>
  63  * <pre>
  64  * "\u00e4b"-> the first key is key('a'), the second key is key('e'), and
  65  * the third key is key('b').
  66  * </pre>
  67  * </blockquote>
  68  * The key of a character is an integer composed of primary order(short),
  69  * secondary order(byte), and tertiary order(byte). Java strictly defines
  70  * the size and signedness of its primitive data types. Therefore, the static
  71  * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
  72  * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
  73  * and <code>short</code> respectively to ensure the correctness of the key
  74  * value.
  75  *
  76  * <p>
  77  * Example of the iterator usage,
  78  * <blockquote>
  79  * <pre>
  80  *
  81  *  String testString = "This is a test";
  82  *  Collator col = Collator.getInstance();
  83  *  if (col instanceof RuleBasedCollator) {
  84  *      RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)col;
  85  *      CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
  86  *      int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
  87  *          :
  88  *  }
  89  * </pre>
  90  * </blockquote>
  91  *
  92  * <p>
  93  * <code>CollationElementIterator.next</code> returns the collation order
  94  * of the next character. A collation order consists of primary order,
  95  * secondary order and tertiary order. The data type of the collation
  96  * order is <strong>int</strong>. The first 16 bits of a collation order
  97  * is its primary order; the next 8 bits is the secondary order and the
  98  * last 8 bits is the tertiary order.
  99  *
 100  * <p><b>Note:</b> <code>CollationElementIterator</code> is a part of
 101  * <code>RuleBasedCollator</code> implementation. It is only usable
 102  * with <code>RuleBasedCollator</code> instances.
 103  *
 104  * @see                Collator
 105  * @see                RuleBasedCollator
 106  * @author             Helena Shih, Laura Werner, Richard Gillam
 107  */
 108 public final class CollationElementIterator
 109 {
 110     /**
 111      * Null order which indicates the end of string is reached by the
 112      * cursor.
 113      */
 114     public final static int NULLORDER = 0xffffffff;
 115 
 116     /**
 117      * CollationElementIterator constructor.  This takes the source string and
 118      * the collation object.  The cursor will walk thru the source string based
 119      * on the predefined collation rules.  If the source string is empty,
 120      * NULLORDER will be returned on the calls to next().
 121      * @param sourceText the source string.
 122      * @param order the collation object.
 123      */
 124     CollationElementIterator(String sourceText, RuleBasedCollator owner) {
 125         this.owner = owner;
 126         ordering = owner.getTables();
 127         if ( sourceText.length() != 0 ) {
 128             NormalizerBase.Mode mode =
 129                 CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 130             text = new NormalizerBase(sourceText, mode);
 131         }
 132     }
 133 
 134     /**
 135      * CollationElementIterator constructor.  This takes the source string and
 136      * the collation object.  The cursor will walk thru the source string based
 137      * on the predefined collation rules.  If the source string is empty,
 138      * NULLORDER will be returned on the calls to next().
 139      * @param sourceText the source string.
 140      * @param order the collation object.
 141      */
 142     CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) {
 143         this.owner = owner;
 144         ordering = owner.getTables();
 145         NormalizerBase.Mode mode =
 146             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 147         text = new NormalizerBase(sourceText, mode);
 148     }
 149 
 150     /**
 151      * Resets the cursor to the beginning of the string.  The next call
 152      * to next() will return the first collation element in the string.
 153      */
 154     public void reset()
 155     {
 156         if (text != null) {
 157             text.reset();
 158             NormalizerBase.Mode mode =
 159                 CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 160             text.setMode(mode);
 161         }
 162         buffer = null;
 163         expIndex = 0;
 164         swapOrder = 0;
 165     }
 166 
 167     /**
 168      * Get the next collation element in the string.  <p>This iterator iterates
 169      * over a sequence of collation elements that were built from the string.
 170      * Because there isn't necessarily a one-to-one mapping from characters to
 171      * collation elements, this doesn't mean the same thing as "return the
 172      * collation element [or ordering priority] of the next character in the
 173      * string".</p>
 174      * <p>This function returns the collation element that the iterator is currently
 175      * pointing to and then updates the internal pointer to point to the next element.
 176      * previous() updates the pointer first and then returns the element.  This
 177      * means that when you change direction while iterating (i.e., call next() and
 178      * then call previous(), or call previous() and then call next()), you'll get
 179      * back the same element twice.</p>
 180      */
 181     public int next()
 182     {
 183         if (text == null) {
 184             return NULLORDER;
 185         }
 186         NormalizerBase.Mode textMode = text.getMode();
 187         // convert the owner's mode to something the Normalizer understands
 188         NormalizerBase.Mode ownerMode =
 189             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 190         if (textMode != ownerMode) {
 191             text.setMode(ownerMode);
 192         }
 193 
 194         // if buffer contains any decomposed char values
 195         // return their strength orders before continuing in
 196         // the Normalizer's CharacterIterator.
 197         if (buffer != null) {
 198             if (expIndex < buffer.length) {
 199                 return strengthOrder(buffer[expIndex++]);
 200             } else {
 201                 buffer = null;
 202                 expIndex = 0;
 203             }
 204         } else if (swapOrder != 0) {
 205             if (Character.isSupplementaryCodePoint(swapOrder)) {
 206                 char[] chars = Character.toChars(swapOrder);
 207                 swapOrder = chars[1];
 208                 return chars[0] << 16;
 209             }
 210             int order = swapOrder << 16;
 211             swapOrder = 0;
 212             return order;
 213         }
 214         int ch  = text.next();
 215 
 216         // are we at the end of Normalizer's text?
 217         if (ch == NormalizerBase.DONE) {
 218             return NULLORDER;
 219         }
 220 
 221         int value = ordering.getUnicodeOrder(ch);
 222         if (value == RuleBasedCollator.UNMAPPED) {
 223             swapOrder = ch;
 224             return UNMAPPEDCHARVALUE;
 225         }
 226         else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
 227             value = nextContractChar(ch);
 228         }
 229         if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
 230             buffer = ordering.getExpandValueList(value);
 231             expIndex = 0;
 232             value = buffer[expIndex++];
 233         }
 234 
 235         if (ordering.isSEAsianSwapping()) {
 236             int consonant;
 237             if (isThaiPreVowel(ch)) {
 238                 consonant = text.next();
 239                 if (isThaiBaseConsonant(consonant)) {
 240                     buffer = makeReorderedBuffer(consonant, value, buffer, true);
 241                     value = buffer[0];
 242                     expIndex = 1;
 243                 } else if (consonant != NormalizerBase.DONE) {
 244                     text.previous();
 245                 }
 246             }
 247             if (isLaoPreVowel(ch)) {
 248                 consonant = text.next();
 249                 if (isLaoBaseConsonant(consonant)) {
 250                     buffer = makeReorderedBuffer(consonant, value, buffer, true);
 251                     value = buffer[0];
 252                     expIndex = 1;
 253                 } else if (consonant != NormalizerBase.DONE) {
 254                     text.previous();
 255                 }
 256             }
 257         }
 258 
 259         return strengthOrder(value);
 260     }
 261 
 262     /**
 263      * Get the previous collation element in the string.  <p>This iterator iterates
 264      * over a sequence of collation elements that were built from the string.
 265      * Because there isn't necessarily a one-to-one mapping from characters to
 266      * collation elements, this doesn't mean the same thing as "return the
 267      * collation element [or ordering priority] of the previous character in the
 268      * string".</p>
 269      * <p>This function updates the iterator's internal pointer to point to the
 270      * collation element preceding the one it's currently pointing to and then
 271      * returns that element, while next() returns the current element and then
 272      * updates the pointer.  This means that when you change direction while
 273      * iterating (i.e., call next() and then call previous(), or call previous()
 274      * and then call next()), you'll get back the same element twice.</p>
 275      * @since 1.2
 276      */
 277     public int previous()
 278     {
 279         if (text == null) {
 280             return NULLORDER;
 281         }
 282         NormalizerBase.Mode textMode = text.getMode();
 283         // convert the owner's mode to something the Normalizer understands
 284         NormalizerBase.Mode ownerMode =
 285             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 286         if (textMode != ownerMode) {
 287             text.setMode(ownerMode);
 288         }
 289         if (buffer != null) {
 290             if (expIndex > 0) {
 291                 return strengthOrder(buffer[--expIndex]);
 292             } else {
 293                 buffer = null;
 294                 expIndex = 0;
 295             }
 296         } else if (swapOrder != 0) {
 297             if (Character.isSupplementaryCodePoint(swapOrder)) {
 298                 char[] chars = Character.toChars(swapOrder);
 299                 swapOrder = chars[1];
 300                 return chars[0] << 16;
 301             }
 302             int order = swapOrder << 16;
 303             swapOrder = 0;
 304             return order;
 305         }
 306         int ch = text.previous();
 307         if (ch == NormalizerBase.DONE) {
 308             return NULLORDER;
 309         }
 310 
 311         int value = ordering.getUnicodeOrder(ch);
 312 
 313         if (value == RuleBasedCollator.UNMAPPED) {
 314             swapOrder = UNMAPPEDCHARVALUE;
 315             return ch;
 316         } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
 317             value = prevContractChar(ch);
 318         }
 319         if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
 320             buffer = ordering.getExpandValueList(value);
 321             expIndex = buffer.length;
 322             value = buffer[--expIndex];
 323         }
 324 
 325         if (ordering.isSEAsianSwapping()) {
 326             int vowel;
 327             if (isThaiBaseConsonant(ch)) {
 328                 vowel = text.previous();
 329                 if (isThaiPreVowel(vowel)) {
 330                     buffer = makeReorderedBuffer(vowel, value, buffer, false);
 331                     expIndex = buffer.length - 1;
 332                     value = buffer[expIndex];
 333                 } else {
 334                     text.next();
 335                 }
 336             }
 337             if (isLaoBaseConsonant(ch)) {
 338                 vowel = text.previous();
 339                 if (isLaoPreVowel(vowel)) {
 340                     buffer = makeReorderedBuffer(vowel, value, buffer, false);
 341                     expIndex = buffer.length - 1;
 342                     value = buffer[expIndex];
 343                 } else {
 344                     text.next();
 345                 }
 346             }
 347         }
 348 
 349         return strengthOrder(value);
 350     }
 351 
 352     /**
 353      * Return the primary component of a collation element.
 354      * @param order the collation element
 355      * @return the element's primary component
 356      */
 357     public final static int primaryOrder(int order)
 358     {
 359         order &= RBCollationTables.PRIMARYORDERMASK;
 360         return (order >>> RBCollationTables.PRIMARYORDERSHIFT);
 361     }
 362     /**
 363      * Return the secondary component of a collation element.
 364      * @param order the collation element
 365      * @return the element's secondary component
 366      */
 367     public final static short secondaryOrder(int order)
 368     {
 369         order = order & RBCollationTables.SECONDARYORDERMASK;
 370         return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT));
 371     }
 372     /**
 373      * Return the tertiary component of a collation element.
 374      * @param order the collation element
 375      * @return the element's tertiary component
 376      */
 377     public final static short tertiaryOrder(int order)
 378     {
 379         return ((short)(order &= RBCollationTables.TERTIARYORDERMASK));
 380     }
 381 
 382     /**
 383      *  Get the comparison order in the desired strength.  Ignore the other
 384      *  differences.
 385      *  @param order The order value
 386      */
 387     final int strengthOrder(int order)
 388     {
 389         int s = owner.getStrength();
 390         if (s == Collator.PRIMARY)
 391         {
 392             order &= RBCollationTables.PRIMARYDIFFERENCEONLY;
 393         } else if (s == Collator.SECONDARY)
 394         {
 395             order &= RBCollationTables.SECONDARYDIFFERENCEONLY;
 396         }
 397         return order;
 398     }
 399 
 400     /**
 401      * Sets the iterator to point to the collation element corresponding to
 402      * the specified character (the parameter is a CHARACTER offset in the
 403      * original string, not an offset into its corresponding sequence of
 404      * collation elements).  The value returned by the next call to next()
 405      * will be the collation element corresponding to the specified position
 406      * in the text.  If that position is in the middle of a contracting
 407      * character sequence, the result of the next call to next() is the
 408      * collation element for that sequence.  This means that getOffset()
 409      * is not guaranteed to return the same value as was passed to a preceding
 410      * call to setOffset().
 411      *
 412      * @param newOffset The new character offset into the original text.
 413      * @since 1.2
 414      */
 415     @SuppressWarnings("deprecation") // getBeginIndex, getEndIndex and setIndex are deprecated
 416     public void setOffset(int newOffset)
 417     {
 418         if (text != null) {
 419             if (newOffset < text.getBeginIndex()
 420                 || newOffset >= text.getEndIndex()) {
 421                     text.setIndexOnly(newOffset);
 422             } else {
 423                 int c = text.setIndex(newOffset);
 424 
 425                 // if the desired character isn't used in a contracting character
 426                 // sequence, bypass all the backing-up logic-- we're sitting on
 427                 // the right character already
 428                 if (ordering.usedInContractSeq(c)) {
 429                     // walk backwards through the string until we see a character
 430                     // that DOESN'T participate in a contracting character sequence
 431                     while (ordering.usedInContractSeq(c)) {
 432                         c = text.previous();
 433                     }
 434                     // now walk forward using this object's next() method until
 435                     // we pass the starting point and set our current position
 436                     // to the beginning of the last "character" before or at
 437                     // our starting position
 438                     int last = text.getIndex();
 439                     while (text.getIndex() <= newOffset) {
 440                         last = text.getIndex();
 441                         next();
 442                     }
 443                     text.setIndexOnly(last);
 444                     // we don't need this, since last is the last index
 445                     // that is the starting of the contraction which encompass
 446                     // newOffset
 447                     // text.previous();
 448                 }
 449             }
 450         }
 451         buffer = null;
 452         expIndex = 0;
 453         swapOrder = 0;
 454     }
 455 
 456     /**
 457      * Returns the character offset in the original text corresponding to the next
 458      * collation element.  (That is, getOffset() returns the position in the text
 459      * corresponding to the collation element that will be returned by the next
 460      * call to next().)  This value will always be the index of the FIRST character
 461      * corresponding to the collation element (a contracting character sequence is
 462      * when two or more characters all correspond to the same collation element).
 463      * This means if you do setOffset(x) followed immediately by getOffset(), getOffset()
 464      * won't necessarily return x.
 465      *
 466      * @return The character offset in the original text corresponding to the collation
 467      * element that will be returned by the next call to next().
 468      * @since 1.2
 469      */
 470     public int getOffset()
 471     {
 472         return (text != null) ? text.getIndex() : 0;
 473     }
 474 
 475 
 476     /**
 477      * Return the maximum length of any expansion sequences that end
 478      * with the specified comparison order.
 479      * @param order a collation order returned by previous or next.
 480      * @return the maximum length of any expansion sequences ending
 481      *         with the specified order.
 482      * @since 1.2
 483      */
 484     public int getMaxExpansion(int order)
 485     {
 486         return ordering.getMaxExpansion(order);
 487     }
 488 
 489     /**
 490      * Set a new string over which to iterate.
 491      *
 492      * @param source  the new source text
 493      * @since 1.2
 494      */
 495     public void setText(String source)
 496     {
 497         buffer = null;
 498         swapOrder = 0;
 499         expIndex = 0;
 500         NormalizerBase.Mode mode =
 501             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 502         if (text == null) {
 503             text = new NormalizerBase(source, mode);
 504         } else {
 505             text.setMode(mode);
 506             text.setText(source);
 507         }
 508     }
 509 
 510     /**
 511      * Set a new string over which to iterate.
 512      *
 513      * @param source  the new source text.
 514      * @since 1.2
 515      */
 516     public void setText(CharacterIterator source)
 517     {
 518         buffer = null;
 519         swapOrder = 0;
 520         expIndex = 0;
 521         NormalizerBase.Mode mode =
 522             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 523         if (text == null) {
 524             text = new NormalizerBase(source, mode);
 525         } else {
 526             text.setMode(mode);
 527             text.setText(source);
 528         }
 529     }
 530 
 531     //============================================================
 532     // privates
 533     //============================================================
 534 
 535     /**
 536      * Determine if a character is a Thai vowel (which sorts after
 537      * its base consonant).
 538      */
 539     private final static boolean isThaiPreVowel(int ch) {
 540         return (ch >= 0x0e40) && (ch <= 0x0e44);
 541     }
 542 
 543     /**
 544      * Determine if a character is a Thai base consonant
 545      */
 546     private final static boolean isThaiBaseConsonant(int ch) {
 547         return (ch >= 0x0e01) && (ch <= 0x0e2e);
 548     }
 549 
 550     /**
 551      * Determine if a character is a Lao vowel (which sorts after
 552      * its base consonant).
 553      */
 554     private final static boolean isLaoPreVowel(int ch) {
 555         return (ch >= 0x0ec0) && (ch <= 0x0ec4);
 556     }
 557 
 558     /**
 559      * Determine if a character is a Lao base consonant
 560      */
 561     private final static boolean isLaoBaseConsonant(int ch) {
 562         return (ch >= 0x0e81) && (ch <= 0x0eae);
 563     }
 564 
 565     /**
 566      * This method produces a buffer which contains the collation
 567      * elements for the two characters, with colFirst's values preceding
 568      * another character's.  Presumably, the other character precedes colFirst
 569      * in logical order (otherwise you wouldn't need this method would you?).
 570      * The assumption is that the other char's value(s) have already been
 571      * computed.  If this char has a single element it is passed to this
 572      * method as lastValue, and lastExpansion is null.  If it has an
 573      * expansion it is passed in lastExpansion, and colLastValue is ignored.
 574      */
 575     private int[] makeReorderedBuffer(int colFirst,
 576                                       int lastValue,
 577                                       int[] lastExpansion,
 578                                       boolean forward) {
 579 
 580         int[] result;
 581 
 582         int firstValue = ordering.getUnicodeOrder(colFirst);
 583         if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) {
 584             firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst);
 585         }
 586 
 587         int[] firstExpansion = null;
 588         if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) {
 589             firstExpansion = ordering.getExpandValueList(firstValue);
 590         }
 591 
 592         if (!forward) {
 593             int temp1 = firstValue;
 594             firstValue = lastValue;
 595             lastValue = temp1;
 596             int[] temp2 = firstExpansion;
 597             firstExpansion = lastExpansion;
 598             lastExpansion = temp2;
 599         }
 600 
 601         if (firstExpansion == null && lastExpansion == null) {
 602             result = new int [2];
 603             result[0] = firstValue;
 604             result[1] = lastValue;
 605         }
 606         else {
 607             int firstLength = firstExpansion==null? 1 : firstExpansion.length;
 608             int lastLength = lastExpansion==null? 1 : lastExpansion.length;
 609             result = new int[firstLength + lastLength];
 610 
 611             if (firstExpansion == null) {
 612                 result[0] = firstValue;
 613             }
 614             else {
 615                 System.arraycopy(firstExpansion, 0, result, 0, firstLength);
 616             }
 617 
 618             if (lastExpansion == null) {
 619                 result[firstLength] = lastValue;
 620             }
 621             else {
 622                 System.arraycopy(lastExpansion, 0, result, firstLength, lastLength);
 623             }
 624         }
 625 
 626         return result;
 627     }
 628 
 629     /**
 630      *  Check if a comparison order is ignorable.
 631      *  @return true if a character is ignorable, false otherwise.
 632      */
 633     final static boolean isIgnorable(int order)
 634     {
 635         return ((primaryOrder(order) == 0) ? true : false);
 636     }
 637 
 638     /**
 639      * Get the ordering priority of the next contracting character in the
 640      * string.
 641      * @param ch the starting character of a contracting character token
 642      * @return the next contracting character's ordering.  Returns NULLORDER
 643      * if the end of string is reached.
 644      */
 645     private int nextContractChar(int ch)
 646     {
 647         // First get the ordering of this single character,
 648         // which is always the first element in the list
 649         Vector<EntryPair> list = ordering.getContractValues(ch);
 650         EntryPair pair = list.firstElement();
 651         int order = pair.value;
 652 
 653         // find out the length of the longest contracting character sequence in the list.
 654         // There's logic in the builder code to make sure the longest sequence is always
 655         // the last.
 656         pair = list.lastElement();
 657         int maxLength = pair.entryName.length();
 658 
 659         // (the Normalizer is cloned here so that the seeking we do in the next loop
 660         // won't affect our real position in the text)
 661         NormalizerBase tempText = (NormalizerBase)text.clone();
 662 
 663         // extract the next maxLength characters in the string (we have to do this using the
 664         // Normalizer to ensure that our offsets correspond to those the rest of the
 665         // iterator is using) and store it in "fragment".
 666         tempText.previous();
 667         key.setLength(0);
 668         int c = tempText.next();
 669         while (maxLength > 0 && c != NormalizerBase.DONE) {
 670             if (Character.isSupplementaryCodePoint(c)) {
 671                 key.append(Character.toChars(c));
 672                 maxLength -= 2;
 673             } else {
 674                 key.append((char)c);
 675                 --maxLength;
 676             }
 677             c = tempText.next();
 678         }
 679         String fragment = key.toString();
 680         // now that we have that fragment, iterate through this list looking for the
 681         // longest sequence that matches the characters in the actual text.  (maxLength
 682         // is used here to keep track of the length of the longest sequence)
 683         // Upon exit from this loop, maxLength will contain the length of the matching
 684         // sequence and order will contain the collation-element value corresponding
 685         // to this sequence
 686         maxLength = 1;
 687         for (int i = list.size() - 1; i > 0; i--) {
 688             pair = list.elementAt(i);
 689             if (!pair.fwd)
 690                 continue;
 691 
 692             if (fragment.startsWith(pair.entryName) && pair.entryName.length()
 693                     > maxLength) {
 694                 maxLength = pair.entryName.length();
 695                 order = pair.value;
 696             }
 697         }
 698 
 699         // seek our current iteration position to the end of the matching sequence
 700         // and return the appropriate collation-element value (if there was no matching
 701         // sequence, we're already seeked to the right position and order already contains
 702         // the correct collation-element value for the single character)
 703         while (maxLength > 1) {
 704             c = text.next();
 705             maxLength -= Character.charCount(c);
 706         }
 707         return order;
 708     }
 709 
 710     /**
 711      * Get the ordering priority of the previous contracting character in the
 712      * string.
 713      * @param ch the starting character of a contracting character token
 714      * @return the next contracting character's ordering.  Returns NULLORDER
 715      * if the end of string is reached.
 716      */
 717     private int prevContractChar(int ch)
 718     {
 719         // This function is identical to nextContractChar(), except that we've
 720         // switched things so that the next() and previous() calls on the Normalizer
 721         // are switched and so that we skip entry pairs with the fwd flag turned on
 722         // rather than off.  Notice that we still use append() and startsWith() when
 723         // working on the fragment.  This is because the entry pairs that are used
 724         // in reverse iteration have their names reversed already.
 725         Vector<EntryPair> list = ordering.getContractValues(ch);
 726         EntryPair pair = list.firstElement();
 727         int order = pair.value;
 728 
 729         pair = list.lastElement();
 730         int maxLength = pair.entryName.length();
 731 
 732         NormalizerBase tempText = (NormalizerBase)text.clone();
 733 
 734         tempText.next();
 735         key.setLength(0);
 736         int c = tempText.previous();
 737         while (maxLength > 0 && c != NormalizerBase.DONE) {
 738             if (Character.isSupplementaryCodePoint(c)) {
 739                 key.append(Character.toChars(c));
 740                 maxLength -= 2;
 741             } else {
 742                 key.append((char)c);
 743                 --maxLength;
 744             }
 745             c = tempText.previous();
 746         }
 747         String fragment = key.toString();
 748 
 749         maxLength = 1;
 750         for (int i = list.size() - 1; i > 0; i--) {
 751             pair = list.elementAt(i);
 752             if (pair.fwd)
 753                 continue;
 754 
 755             if (fragment.startsWith(pair.entryName) && pair.entryName.length()
 756                     > maxLength) {
 757                 maxLength = pair.entryName.length();
 758                 order = pair.value;
 759             }
 760         }
 761 
 762         while (maxLength > 1) {
 763             c = text.previous();
 764             maxLength -= Character.charCount(c);
 765         }
 766         return order;
 767     }
 768 
 769     final static int UNMAPPEDCHARVALUE = 0x7FFF0000;
 770 
 771     private NormalizerBase text = null;
 772     private int[] buffer = null;
 773     private int expIndex = 0;
 774     private StringBuffer key = new StringBuffer(5);
 775     private int swapOrder = 0;
 776     private RBCollationTables ordering;
 777     private RuleBasedCollator owner;
 778 }
--- EOF ---