New src/java.base/share/classes/java/text/CollationElementIterator.java

   1 /*
   2  * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  28  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  29  *
  30  *   The original version of this source code and documentation is copyrighted
  31  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  32  * materials are provided under terms of a License Agreement between Taligent
  33  * and Sun. This technology is protected by multiple US and International
  34  * patents. This notice and attribution to Taligent may not be removed.
  35  *   Taligent is a registered trademark of Taligent, Inc.
  36  *
  37  */
  38 
  39 package java.text;
  40 
  41 import java.lang.Character;
  42 import java.util.Vector;
  43 import sun.text.CollatorUtilities;
  44 import sun.text.normalizer.NormalizerBase;
  45 
  46 /**
  47  * The <code>CollationElementIterator</code> class is used as an iterator
  48  * to walk through each character of an international string. Use the iterator
  49  * to return the ordering priority of the positioned character. The ordering
  50  * priority of a character, which we refer to as a key, defines how a character
  51  * is collated in the given collation object.
  52  *
  53  * <p>
  54  * For example, consider the following in Spanish:
  55  * <blockquote>
  56  * <pre>
  57  * "ca" &rarr; the first key is key('c') and second key is key('a').
  58  * "cha" &rarr; the first key is key('ch') and second key is key('a').
  59  * </pre>
  60  * </blockquote>
  61  * And in German,
  62  * <blockquote>
  63  * <pre>
  64  * "\u00e4b" &rarr; the first key is key('a'), the second key is key('e'), and
  65  * the third key is key('b').
  66  * </pre>
  67  * </blockquote>
  68  * The key of a character is an integer composed of primary order(short),
  69  * secondary order(byte), and tertiary order(byte). Java strictly defines
  70  * the size and signedness of its primitive data types. Therefore, the static
  71  * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
  72  * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
  73  * and <code>short</code> respectively to ensure the correctness of the key
  74  * value.
  75  *
  76  * <p>
  77  * Example of the iterator usage,
  78  * <blockquote>
  79  * <pre>
  80  *
  81  *  String testString = "This is a test";
  82  *  Collator col = Collator.getInstance();
  83  *  if (col instanceof RuleBasedCollator) {
  84  *      RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)col;
  85  *      CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
  86  *      int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
  87  *          :
  88  *  }
  89  * </pre>
  90  * </blockquote>
  91  *
  92  * <p>
  93  * <code>CollationElementIterator.next</code> returns the collation order
  94  * of the next character. A collation order consists of primary order,
  95  * secondary order and tertiary order. The data type of the collation
  96  * order is <strong>int</strong>. The first 16 bits of a collation order
  97  * is its primary order; the next 8 bits is the secondary order and the
  98  * last 8 bits is the tertiary order.
  99  *
 100  * <p><b>Note:</b> <code>CollationElementIterator</code> is a part of
 101  * <code>RuleBasedCollator</code> implementation. It is only usable
 102  * with <code>RuleBasedCollator</code> instances.
 103  *
 104  * @see                Collator
 105  * @see                RuleBasedCollator
 106  * @author             Helena Shih, Laura Werner, Richard Gillam
 107  * @since 1.1
 108  */
 109 public final class CollationElementIterator
 110 {
 111     /**
 112      * Null order which indicates the end of string is reached by the
 113      * cursor.
 114      */
 115     public static final int NULLORDER = 0xffffffff;
 116 
 117     /**
 118      * CollationElementIterator constructor.  This takes the source string and
 119      * the collation object.  The cursor will walk thru the source string based
 120      * on the predefined collation rules.  If the source string is empty,
 121      * NULLORDER will be returned on the calls to next().
 122      * @param sourceText the source string.
 123      * @param owner the collation object.
 124      */
 125     CollationElementIterator(String sourceText, RuleBasedCollator owner) {
 126         this.owner = owner;
 127         ordering = owner.getTables();
 128         if (!sourceText.isEmpty()) {
 129             NormalizerBase.Mode mode =
 130                 CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 131             text = new NormalizerBase(sourceText, mode);
 132         }
 133     }
 134 
 135     /**
 136      * CollationElementIterator constructor.  This takes the source string and
 137      * the collation object.  The cursor will walk thru the source string based
 138      * on the predefined collation rules.  If the source string is empty,
 139      * NULLORDER will be returned on the calls to next().
 140      * @param sourceText the source string.
 141      * @param owner the collation object.
 142      */
 143     CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) {
 144         this.owner = owner;
 145         ordering = owner.getTables();
 146         NormalizerBase.Mode mode =
 147             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 148         text = new NormalizerBase(sourceText, mode);
 149     }
 150 
 151     /**
 152      * Resets the cursor to the beginning of the string.  The next call
 153      * to next() will return the first collation element in the string.
 154      */
 155     public void reset()
 156     {
 157         if (text != null) {
 158             text.reset();
 159             NormalizerBase.Mode mode =
 160                 CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 161             text.setMode(mode);
 162         }
 163         buffer = null;
 164         expIndex = 0;
 165         swapOrder = 0;
 166     }
 167 
 168     /**
 169      * Get the next collation element in the string.  <p>This iterator iterates
 170      * over a sequence of collation elements that were built from the string.
 171      * Because there isn't necessarily a one-to-one mapping from characters to
 172      * collation elements, this doesn't mean the same thing as "return the
 173      * collation element [or ordering priority] of the next character in the
 174      * string".</p>
 175      * <p>This function returns the collation element that the iterator is currently
 176      * pointing to and then updates the internal pointer to point to the next element.
 177      * previous() updates the pointer first and then returns the element.  This
 178      * means that when you change direction while iterating (i.e., call next() and
 179      * then call previous(), or call previous() and then call next()), you'll get
 180      * back the same element twice.</p>
 181      *
 182      * @return the next collation element
 183      */
 184     public int next()
 185     {
 186         if (text == null) {
 187             return NULLORDER;
 188         }
 189         NormalizerBase.Mode textMode = text.getMode();
 190         // convert the owner's mode to something the Normalizer understands
 191         NormalizerBase.Mode ownerMode =
 192             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 193         if (textMode != ownerMode) {
 194             text.setMode(ownerMode);
 195         }
 196 
 197         // if buffer contains any decomposed char values
 198         // return their strength orders before continuing in
 199         // the Normalizer's CharacterIterator.
 200         if (buffer != null) {
 201             if (expIndex < buffer.length) {
 202                 return strengthOrder(buffer[expIndex++]);
 203             } else {
 204                 buffer = null;
 205                 expIndex = 0;
 206             }
 207         } else if (swapOrder != 0) {
 208             if (Character.isSupplementaryCodePoint(swapOrder)) {
 209                 char[] chars = Character.toChars(swapOrder);
 210                 swapOrder = chars[1];
 211                 return chars[0] << 16;
 212             }
 213             int order = swapOrder << 16;
 214             swapOrder = 0;
 215             return order;
 216         }
 217         int ch  = text.next();
 218 
 219         // are we at the end of Normalizer's text?
 220         if (ch == NormalizerBase.DONE) {
 221             return NULLORDER;
 222         }
 223 
 224         int value = ordering.getUnicodeOrder(ch);
 225         if (value == RuleBasedCollator.UNMAPPED) {
 226             swapOrder = ch;
 227             return UNMAPPEDCHARVALUE;
 228         }
 229         else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
 230             value = nextContractChar(ch);
 231         }
 232         if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
 233             buffer = ordering.getExpandValueList(value);
 234             expIndex = 0;
 235             value = buffer[expIndex++];
 236         }
 237 
 238         if (ordering.isSEAsianSwapping()) {
 239             int consonant;
 240             if (isThaiPreVowel(ch)) {
 241                 consonant = text.next();
 242                 if (isThaiBaseConsonant(consonant)) {
 243                     buffer = makeReorderedBuffer(consonant, value, buffer, true);
 244                     value = buffer[0];
 245                     expIndex = 1;
 246                 } else if (consonant != NormalizerBase.DONE) {
 247                     text.previous();
 248                 }
 249             }
 250             if (isLaoPreVowel(ch)) {
 251                 consonant = text.next();
 252                 if (isLaoBaseConsonant(consonant)) {
 253                     buffer = makeReorderedBuffer(consonant, value, buffer, true);
 254                     value = buffer[0];
 255                     expIndex = 1;
 256                 } else if (consonant != NormalizerBase.DONE) {
 257                     text.previous();
 258                 }
 259             }
 260         }
 261 
 262         return strengthOrder(value);
 263     }
 264 
 265     /**
 266      * Get the previous collation element in the string.  <p>This iterator iterates
 267      * over a sequence of collation elements that were built from the string.
 268      * Because there isn't necessarily a one-to-one mapping from characters to
 269      * collation elements, this doesn't mean the same thing as "return the
 270      * collation element [or ordering priority] of the previous character in the
 271      * string".</p>
 272      * <p>This function updates the iterator's internal pointer to point to the
 273      * collation element preceding the one it's currently pointing to and then
 274      * returns that element, while next() returns the current element and then
 275      * updates the pointer.  This means that when you change direction while
 276      * iterating (i.e., call next() and then call previous(), or call previous()
 277      * and then call next()), you'll get back the same element twice.</p>
 278      *
 279      * @return the previous collation element
 280      * @since 1.2
 281      */
 282     public int previous()
 283     {
 284         if (text == null) {
 285             return NULLORDER;
 286         }
 287         NormalizerBase.Mode textMode = text.getMode();
 288         // convert the owner's mode to something the Normalizer understands
 289         NormalizerBase.Mode ownerMode =
 290             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 291         if (textMode != ownerMode) {
 292             text.setMode(ownerMode);
 293         }
 294         if (buffer != null) {
 295             if (expIndex > 0) {
 296                 return strengthOrder(buffer[--expIndex]);
 297             } else {
 298                 buffer = null;
 299                 expIndex = 0;
 300             }
 301         } else if (swapOrder != 0) {
 302             if (Character.isSupplementaryCodePoint(swapOrder)) {
 303                 char[] chars = Character.toChars(swapOrder);
 304                 swapOrder = chars[1];
 305                 return chars[0] << 16;
 306             }
 307             int order = swapOrder << 16;
 308             swapOrder = 0;
 309             return order;
 310         }
 311         int ch = text.previous();
 312         if (ch == NormalizerBase.DONE) {
 313             return NULLORDER;
 314         }
 315 
 316         int value = ordering.getUnicodeOrder(ch);
 317 
 318         if (value == RuleBasedCollator.UNMAPPED) {
 319             swapOrder = UNMAPPEDCHARVALUE;
 320             return ch;
 321         } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
 322             value = prevContractChar(ch);
 323         }
 324         if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
 325             buffer = ordering.getExpandValueList(value);
 326             expIndex = buffer.length;
 327             value = buffer[--expIndex];
 328         }
 329 
 330         if (ordering.isSEAsianSwapping()) {
 331             int vowel;
 332             if (isThaiBaseConsonant(ch)) {
 333                 vowel = text.previous();
 334                 if (isThaiPreVowel(vowel)) {
 335                     buffer = makeReorderedBuffer(vowel, value, buffer, false);
 336                     expIndex = buffer.length - 1;
 337                     value = buffer[expIndex];
 338                 } else {
 339                     text.next();
 340                 }
 341             }
 342             if (isLaoBaseConsonant(ch)) {
 343                 vowel = text.previous();
 344                 if (isLaoPreVowel(vowel)) {
 345                     buffer = makeReorderedBuffer(vowel, value, buffer, false);
 346                     expIndex = buffer.length - 1;
 347                     value = buffer[expIndex];
 348                 } else {
 349                     text.next();
 350                 }
 351             }
 352         }
 353 
 354         return strengthOrder(value);
 355     }
 356 
 357     /**
 358      * Return the primary component of a collation element.
 359      * @param order the collation element
 360      * @return the element's primary component
 361      */
 362     public static final int primaryOrder(int order)
 363     {
 364         order &= RBCollationTables.PRIMARYORDERMASK;
 365         return (order >>> RBCollationTables.PRIMARYORDERSHIFT);
 366     }
 367     /**
 368      * Return the secondary component of a collation element.
 369      * @param order the collation element
 370      * @return the element's secondary component
 371      */
 372     public static final short secondaryOrder(int order)
 373     {
 374         order = order & RBCollationTables.SECONDARYORDERMASK;
 375         return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT));
 376     }
 377     /**
 378      * Return the tertiary component of a collation element.
 379      * @param order the collation element
 380      * @return the element's tertiary component
 381      */
 382     public static final short tertiaryOrder(int order)
 383     {
 384         return ((short)(order &= RBCollationTables.TERTIARYORDERMASK));
 385     }
 386 
 387     /**
 388      *  Get the comparison order in the desired strength.  Ignore the other
 389      *  differences.
 390      *  @param order The order value
 391      */
 392     final int strengthOrder(int order)
 393     {
 394         int s = owner.getStrength();
 395         if (s == Collator.PRIMARY)
 396         {
 397             order &= RBCollationTables.PRIMARYDIFFERENCEONLY;
 398         } else if (s == Collator.SECONDARY)
 399         {
 400             order &= RBCollationTables.SECONDARYDIFFERENCEONLY;
 401         }
 402         return order;
 403     }
 404 
 405     /**
 406      * Sets the iterator to point to the collation element corresponding to
 407      * the specified character (the parameter is a CHARACTER offset in the
 408      * original string, not an offset into its corresponding sequence of
 409      * collation elements).  The value returned by the next call to next()
 410      * will be the collation element corresponding to the specified position
 411      * in the text.  If that position is in the middle of a contracting
 412      * character sequence, the result of the next call to next() is the
 413      * collation element for that sequence.  This means that getOffset()
 414      * is not guaranteed to return the same value as was passed to a preceding
 415      * call to setOffset().
 416      *
 417      * @param newOffset The new character offset into the original text.
 418      * @since 1.2
 419      */
 420     @SuppressWarnings("deprecation") // getBeginIndex, getEndIndex and setIndex are deprecated
 421     public void setOffset(int newOffset)
 422     {
 423         if (text != null) {
 424             if (newOffset < text.getBeginIndex()
 425                 || newOffset >= text.getEndIndex()) {
 426                     text.setIndexOnly(newOffset);
 427             } else {
 428                 int c = text.setIndex(newOffset);
 429 
 430                 // if the desired character isn't used in a contracting character
 431                 // sequence, bypass all the backing-up logic-- we're sitting on
 432                 // the right character already
 433                 if (ordering.usedInContractSeq(c)) {
 434                     // walk backwards through the string until we see a character
 435                     // that DOESN'T participate in a contracting character sequence
 436                     while (ordering.usedInContractSeq(c)) {
 437                         c = text.previous();
 438                     }
 439                     // now walk forward using this object's next() method until
 440                     // we pass the starting point and set our current position
 441                     // to the beginning of the last "character" before or at
 442                     // our starting position
 443                     int last = text.getIndex();
 444                     while (text.getIndex() <= newOffset) {
 445                         last = text.getIndex();
 446                         next();
 447                     }
 448                     text.setIndexOnly(last);
 449                     // we don't need this, since last is the last index
 450                     // that is the starting of the contraction which encompass
 451                     // newOffset
 452                     // text.previous();
 453                 }
 454             }
 455         }
 456         buffer = null;
 457         expIndex = 0;
 458         swapOrder = 0;
 459     }
 460 
 461     /**
 462      * Returns the character offset in the original text corresponding to the next
 463      * collation element.  (That is, getOffset() returns the position in the text
 464      * corresponding to the collation element that will be returned by the next
 465      * call to next().)  This value will always be the index of the FIRST character
 466      * corresponding to the collation element (a contracting character sequence is
 467      * when two or more characters all correspond to the same collation element).
 468      * This means if you do setOffset(x) followed immediately by getOffset(), getOffset()
 469      * won't necessarily return x.
 470      *
 471      * @return The character offset in the original text corresponding to the collation
 472      * element that will be returned by the next call to next().
 473      * @since 1.2
 474      */
 475     public int getOffset()
 476     {
 477         return (text != null) ? text.getIndex() : 0;
 478     }
 479 
 480 
 481     /**
 482      * Return the maximum length of any expansion sequences that end
 483      * with the specified comparison order.
 484      * @param order a collation order returned by previous or next.
 485      * @return the maximum length of any expansion sequences ending
 486      *         with the specified order.
 487      * @since 1.2
 488      */
 489     public int getMaxExpansion(int order)
 490     {
 491         return ordering.getMaxExpansion(order);
 492     }
 493 
 494     /**
 495      * Set a new string over which to iterate.
 496      *
 497      * @param source  the new source text
 498      * @since 1.2
 499      */
 500     public void setText(String source)
 501     {
 502         buffer = null;
 503         swapOrder = 0;
 504         expIndex = 0;
 505         NormalizerBase.Mode mode =
 506             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 507         if (text == null) {
 508             text = new NormalizerBase(source, mode);
 509         } else {
 510             text.setMode(mode);
 511             text.setText(source);
 512         }
 513     }
 514 
 515     /**
 516      * Set a new string over which to iterate.
 517      *
 518      * @param source  the new source text.
 519      * @since 1.2
 520      */
 521     public void setText(CharacterIterator source)
 522     {
 523         buffer = null;
 524         swapOrder = 0;
 525         expIndex = 0;
 526         NormalizerBase.Mode mode =
 527             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
 528         if (text == null) {
 529             text = new NormalizerBase(source, mode);
 530         } else {
 531             text.setMode(mode);
 532             text.setText(source);
 533         }
 534     }
 535 
 536     //============================================================
 537     // privates
 538     //============================================================
 539 
 540     /**
 541      * Determine if a character is a Thai vowel (which sorts after
 542      * its base consonant).
 543      */
 544     private static final boolean isThaiPreVowel(int ch) {
 545         return (ch >= 0x0e40) && (ch <= 0x0e44);
 546     }
 547 
 548     /**
 549      * Determine if a character is a Thai base consonant
 550      */
 551     private static final boolean isThaiBaseConsonant(int ch) {
 552         return (ch >= 0x0e01) && (ch <= 0x0e2e);
 553     }
 554 
 555     /**
 556      * Determine if a character is a Lao vowel (which sorts after
 557      * its base consonant).
 558      */
 559     private static final boolean isLaoPreVowel(int ch) {
 560         return (ch >= 0x0ec0) && (ch <= 0x0ec4);
 561     }
 562 
 563     /**
 564      * Determine if a character is a Lao base consonant
 565      */
 566     private static final boolean isLaoBaseConsonant(int ch) {
 567         return (ch >= 0x0e81) && (ch <= 0x0eae);
 568     }
 569 
 570     /**
 571      * This method produces a buffer which contains the collation
 572      * elements for the two characters, with colFirst's values preceding
 573      * another character's.  Presumably, the other character precedes colFirst
 574      * in logical order (otherwise you wouldn't need this method would you?).
 575      * The assumption is that the other char's value(s) have already been
 576      * computed.  If this char has a single element it is passed to this
 577      * method as lastValue, and lastExpansion is null.  If it has an
 578      * expansion it is passed in lastExpansion, and colLastValue is ignored.
 579      */
 580     private int[] makeReorderedBuffer(int colFirst,
 581                                       int lastValue,
 582                                       int[] lastExpansion,
 583                                       boolean forward) {
 584 
 585         int[] result;
 586 
 587         int firstValue = ordering.getUnicodeOrder(colFirst);
 588         if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) {
 589             firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst);
 590         }
 591 
 592         int[] firstExpansion = null;
 593         if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) {
 594             firstExpansion = ordering.getExpandValueList(firstValue);
 595         }
 596 
 597         if (!forward) {
 598             int temp1 = firstValue;
 599             firstValue = lastValue;
 600             lastValue = temp1;
 601             int[] temp2 = firstExpansion;
 602             firstExpansion = lastExpansion;
 603             lastExpansion = temp2;
 604         }
 605 
 606         if (firstExpansion == null && lastExpansion == null) {
 607             result = new int [2];
 608             result[0] = firstValue;
 609             result[1] = lastValue;
 610         }
 611         else {
 612             int firstLength = firstExpansion==null? 1 : firstExpansion.length;
 613             int lastLength = lastExpansion==null? 1 : lastExpansion.length;
 614             result = new int[firstLength + lastLength];
 615 
 616             if (firstExpansion == null) {
 617                 result[0] = firstValue;
 618             }
 619             else {
 620                 System.arraycopy(firstExpansion, 0, result, 0, firstLength);
 621             }
 622 
 623             if (lastExpansion == null) {
 624                 result[firstLength] = lastValue;
 625             }
 626             else {
 627                 System.arraycopy(lastExpansion, 0, result, firstLength, lastLength);
 628             }
 629         }
 630 
 631         return result;
 632     }
 633 
 634     /**
 635      *  Check if a comparison order is ignorable.
 636      *  @return true if a character is ignorable, false otherwise.
 637      */
 638     static final boolean isIgnorable(int order)
 639     {
 640         return ((primaryOrder(order) == 0) ? true : false);
 641     }
 642 
 643     /**
 644      * Get the ordering priority of the next contracting character in the
 645      * string.
 646      * @param ch the starting character of a contracting character token
 647      * @return the next contracting character's ordering.  Returns NULLORDER
 648      * if the end of string is reached.
 649      */
 650     private int nextContractChar(int ch)
 651     {
 652         // First get the ordering of this single character,
 653         // which is always the first element in the list
 654         Vector<EntryPair> list = ordering.getContractValues(ch);
 655         EntryPair pair = list.firstElement();
 656         int order = pair.value;
 657 
 658         // find out the length of the longest contracting character sequence in the list.
 659         // There's logic in the builder code to make sure the longest sequence is always
 660         // the last.
 661         pair = list.lastElement();
 662         int maxLength = pair.entryName.length();
 663 
 664         // (the Normalizer is cloned here so that the seeking we do in the next loop
 665         // won't affect our real position in the text)
 666         NormalizerBase tempText = (NormalizerBase)text.clone();
 667 
 668         // extract the next maxLength characters in the string (we have to do this using the
 669         // Normalizer to ensure that our offsets correspond to those the rest of the
 670         // iterator is using) and store it in "fragment".
 671         tempText.previous();
 672         key.setLength(0);
 673         int c = tempText.next();
 674         while (maxLength > 0 && c != NormalizerBase.DONE) {
 675             if (Character.isSupplementaryCodePoint(c)) {
 676                 key.append(Character.toChars(c));
 677                 maxLength -= 2;
 678             } else {
 679                 key.append((char)c);
 680                 --maxLength;
 681             }
 682             c = tempText.next();
 683         }
 684         String fragment = key.toString();
 685         // now that we have that fragment, iterate through this list looking for the
 686         // longest sequence that matches the characters in the actual text.  (maxLength
 687         // is used here to keep track of the length of the longest sequence)
 688         // Upon exit from this loop, maxLength will contain the length of the matching
 689         // sequence and order will contain the collation-element value corresponding
 690         // to this sequence
 691         maxLength = 1;
 692         for (int i = list.size() - 1; i > 0; i--) {
 693             pair = list.elementAt(i);
 694             if (!pair.fwd)
 695                 continue;
 696 
 697             if (fragment.startsWith(pair.entryName) && pair.entryName.length()
 698                     > maxLength) {
 699                 maxLength = pair.entryName.length();
 700                 order = pair.value;
 701             }
 702         }
 703 
 704         // seek our current iteration position to the end of the matching sequence
 705         // and return the appropriate collation-element value (if there was no matching
 706         // sequence, we're already seeked to the right position and order already contains
 707         // the correct collation-element value for the single character)
 708         while (maxLength > 1) {
 709             c = text.next();
 710             maxLength -= Character.charCount(c);
 711         }
 712         return order;
 713     }
 714 
 715     /**
 716      * Get the ordering priority of the previous contracting character in the
 717      * string.
 718      * @param ch the starting character of a contracting character token
 719      * @return the next contracting character's ordering.  Returns NULLORDER
 720      * if the end of string is reached.
 721      */
 722     private int prevContractChar(int ch)
 723     {
 724         // This function is identical to nextContractChar(), except that we've
 725         // switched things so that the next() and previous() calls on the Normalizer
 726         // are switched and so that we skip entry pairs with the fwd flag turned on
 727         // rather than off.  Notice that we still use append() and startsWith() when
 728         // working on the fragment.  This is because the entry pairs that are used
 729         // in reverse iteration have their names reversed already.
 730         Vector<EntryPair> list = ordering.getContractValues(ch);
 731         EntryPair pair = list.firstElement();
 732         int order = pair.value;
 733 
 734         pair = list.lastElement();
 735         int maxLength = pair.entryName.length();
 736 
 737         NormalizerBase tempText = (NormalizerBase)text.clone();
 738 
 739         tempText.next();
 740         key.setLength(0);
 741         int c = tempText.previous();
 742         while (maxLength > 0 && c != NormalizerBase.DONE) {
 743             if (Character.isSupplementaryCodePoint(c)) {
 744                 key.append(Character.toChars(c));
 745                 maxLength -= 2;
 746             } else {
 747                 key.append((char)c);
 748                 --maxLength;
 749             }
 750             c = tempText.previous();
 751         }
 752         String fragment = key.toString();
 753 
 754         maxLength = 1;
 755         for (int i = list.size() - 1; i > 0; i--) {
 756             pair = list.elementAt(i);
 757             if (pair.fwd)
 758                 continue;
 759 
 760             if (fragment.startsWith(pair.entryName) && pair.entryName.length()
 761                     > maxLength) {
 762                 maxLength = pair.entryName.length();
 763                 order = pair.value;
 764             }
 765         }
 766 
 767         while (maxLength > 1) {
 768             c = text.previous();
 769             maxLength -= Character.charCount(c);
 770         }
 771         return order;
 772     }
 773 
 774     static final int UNMAPPEDCHARVALUE = 0x7FFF0000;
 775 
 776     private NormalizerBase text = null;
 777     private int[] buffer = null;
 778     private int expIndex = 0;
 779     private StringBuffer key = new StringBuffer(5);
 780     private int swapOrder = 0;
 781     private RBCollationTables ordering;
 782     private RuleBasedCollator owner;
 783 }