1. /*
  2. * @(#)CollationElementIterator.java 1.48 03/12/19
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  9. * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  10. *
  11. * The original version of this source code and documentation is copyrighted
  12. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  13. * materials are provided under terms of a License Agreement between Taligent
  14. * and Sun. This technology is protected by multiple US and International
  15. * patents. This notice and attribution to Taligent may not be removed.
  16. * Taligent is a registered trademark of Taligent, Inc.
  17. *
  18. */
  19. package java.text;
  20. import java.lang.Character;
  21. import java.util.Vector;
  22. import sun.text.Normalizer;
  23. import sun.text.NormalizerUtilities;
  24. /**
  25. * The <code>CollationElementIterator</code> class is used as an iterator
  26. * to walk through each character of an international string. Use the iterator
  27. * to return the ordering priority of the positioned character. The ordering
  28. * priority of a character, which we refer to as a key, defines how a character
  29. * is collated in the given collation object.
  30. *
  31. * <p>
  32. * For example, consider the following in Spanish:
  33. * <blockquote>
  34. * <pre>
  35. * "ca" -> the first key is key('c') and second key is key('a').
  36. * "cha" -> the first key is key('ch') and second key is key('a').
  37. * </pre>
  38. * </blockquote>
  39. * And in German,
  40. * <blockquote>
  41. * <pre>
  42. * "\u00e4b"-> the first key is key('a'), the second key is key('e'), and
  43. * the third key is key('b').
  44. * </pre>
  45. * </blockquote>
  46. * The key of a character is an integer composed of primary order(short),
  47. * secondary order(byte), and tertiary order(byte). Java strictly defines
  48. * the size and signedness of its primitive data types. Therefore, the static
  49. * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
  50. * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
  51. * and <code>short</code> respectively to ensure the correctness of the key
  52. * value.
  53. *
  54. * <p>
  55. * Example of the iterator usage,
  56. * <blockquote>
  57. * <pre>
  58. *
  59. * String testString = "This is a test";
  60. * RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)Collator.getInstance();
  61. * CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
  62. * int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
  63. * </pre>
  64. * </blockquote>
  65. *
  66. * <p>
  67. * <code>CollationElementIterator.next</code> returns the collation order
  68. * of the next character. A collation order consists of primary order,
  69. * secondary order and tertiary order. The data type of the collation
  70. * order is <strong>int</strong>. The first 16 bits of a collation order
  71. * is its primary order; the next 8 bits is the secondary order and the
  72. * last 8 bits is the tertiary order.
  73. *
  74. * @see Collator
  75. * @see RuleBasedCollator
  76. * @version 1.24 07/27/98
  77. * @author Helena Shih, Laura Werner, Richard Gillam
  78. */
  79. public final class CollationElementIterator
  80. {
  81. /**
  82. * Null order which indicates the end of string is reached by the
  83. * cursor.
  84. */
  85. public final static int NULLORDER = 0xffffffff;
  86. /**
  87. * CollationElementIterator constructor. This takes the source string and
  88. * the collation object. The cursor will walk thru the source string based
  89. * on the predefined collation rules. If the source string is empty,
  90. * NULLORDER will be returned on the calls to next().
  91. * @param sourceText the source string.
  92. * @param order the collation object.
  93. */
  94. CollationElementIterator(String sourceText, RuleBasedCollator owner) {
  95. this.owner = owner;
  96. ordering = owner.getTables();
  97. if ( sourceText.length() != 0 ) {
  98. Normalizer.Mode mode =
  99. NormalizerUtilities.toNormalizerMode(owner.getDecomposition());
  100. text = new Normalizer(sourceText, mode);
  101. }
  102. }
  103. /**
  104. * CollationElementIterator constructor. This takes the source string and
  105. * the collation object. The cursor will walk thru the source string based
  106. * on the predefined collation rules. If the source string is empty,
  107. * NULLORDER will be returned on the calls to next().
  108. * @param sourceText the source string.
  109. * @param order the collation object.
  110. */
  111. CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) {
  112. this.owner = owner;
  113. ordering = owner.getTables();
  114. Normalizer.Mode mode =
  115. NormalizerUtilities.toNormalizerMode(owner.getDecomposition());
  116. text = new Normalizer(sourceText, mode);
  117. }
  118. /**
  119. * Resets the cursor to the beginning of the string. The next call
  120. * to next() will return the first collation element in the string.
  121. */
  122. public void reset()
  123. {
  124. if (text != null) {
  125. text.reset();
  126. Normalizer.Mode mode =
  127. NormalizerUtilities.toNormalizerMode(owner.getDecomposition());
  128. text.setMode(mode);
  129. }
  130. buffer = null;
  131. expIndex = 0;
  132. swapOrder = 0;
  133. }
  134. /**
  135. * Get the next collation element in the string. <p>This iterator iterates
  136. * over a sequence of collation elements that were built from the string.
  137. * Because there isn't necessarily a one-to-one mapping from characters to
  138. * collation elements, this doesn't mean the same thing as "return the
  139. * collation element [or ordering priority] of the next character in the
  140. * string".</p>
  141. * <p>This function returns the collation element that the iterator is currently
  142. * pointing to and then updates the internal pointer to point to the next element.
  143. * previous() updates the pointer first and then returns the element. This
  144. * means that when you change direction while iterating (i.e., call next() and
  145. * then call previous(), or call previous() and then call next()), you'll get
  146. * back the same element twice.</p>
  147. */
  148. public int next()
  149. {
  150. if (text == null) {
  151. return NULLORDER;
  152. }
  153. Normalizer.Mode textMode = text.getMode();
  154. // convert the owner's mode to something the Normalizer understands
  155. Normalizer.Mode ownerMode =
  156. NormalizerUtilities.toNormalizerMode(owner.getDecomposition());
  157. if (textMode != ownerMode) {
  158. text.setMode(ownerMode);
  159. }
  160. // if buffer contains any decomposed char values
  161. // return their strength orders before continuing in
  162. // the the Normalizer's CharacterIterator.
  163. if (buffer != null) {
  164. if (expIndex < buffer.length) {
  165. return strengthOrder(buffer[expIndex++]);
  166. } else {
  167. buffer = null;
  168. expIndex = 0;
  169. }
  170. } else if (swapOrder != 0) {
  171. if (Character.isSupplementaryCodePoint(swapOrder)) {
  172. char[] chars = Character.toChars(swapOrder);
  173. swapOrder = chars[1];
  174. return chars[0] << 16;
  175. }
  176. int order = swapOrder << 16;
  177. swapOrder = 0;
  178. return order;
  179. }
  180. int ch = text.next();
  181. // are we at the end of Normalizer's text?
  182. if (ch == Normalizer.DONE) {
  183. return NULLORDER;
  184. }
  185. int value = ordering.getUnicodeOrder(ch);
  186. if (value == RuleBasedCollator.UNMAPPED) {
  187. swapOrder = ch;
  188. return UNMAPPEDCHARVALUE;
  189. }
  190. else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
  191. value = nextContractChar(ch);
  192. }
  193. if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
  194. buffer = ordering.getExpandValueList(value);
  195. expIndex = 0;
  196. value = buffer[expIndex++];
  197. }
  198. if (ordering.isSEAsianSwapping()) {
  199. int consonant;
  200. if (isThaiPreVowel(ch)) {
  201. consonant = text.next();
  202. if (isThaiBaseConsonant(consonant)) {
  203. buffer = makeReorderedBuffer(consonant, value, buffer, true);
  204. value = buffer[0];
  205. expIndex = 1;
  206. } else {
  207. text.previous();
  208. }
  209. }
  210. if (isLaoPreVowel(ch)) {
  211. consonant = text.next();
  212. if (isLaoBaseConsonant(consonant)) {
  213. buffer = makeReorderedBuffer(consonant, value, buffer, true);
  214. value = buffer[0];
  215. expIndex = 1;
  216. } else {
  217. text.previous();
  218. }
  219. }
  220. }
  221. return strengthOrder(value);
  222. }
  223. /**
  224. * Get the previous collation element in the string. <p>This iterator iterates
  225. * over a sequence of collation elements that were built from the string.
  226. * Because there isn't necessarily a one-to-one mapping from characters to
  227. * collation elements, this doesn't mean the same thing as "return the
  228. * collation element [or ordering priority] of the previous character in the
  229. * string".</p>
  230. * <p>This function updates the iterator's internal pointer to point to the
  231. * collation element preceding the one it's currently pointing to and then
  232. * returns that element, while next() returns the current element and then
  233. * updates the pointer. This means that when you change direction while
  234. * iterating (i.e., call next() and then call previous(), or call previous()
  235. * and then call next()), you'll get back the same element twice.</p>
  236. * @since 1.2
  237. */
  238. public int previous()
  239. {
  240. if (text == null) {
  241. return NULLORDER;
  242. }
  243. Normalizer.Mode textMode = text.getMode();
  244. // convert the owner's mode to something the Normalizer understands
  245. Normalizer.Mode ownerMode =
  246. NormalizerUtilities.toNormalizerMode(owner.getDecomposition());
  247. if (textMode != ownerMode) {
  248. text.setMode(ownerMode);
  249. }
  250. if (buffer != null) {
  251. if (expIndex > 0) {
  252. return strengthOrder(buffer[--expIndex]);
  253. } else {
  254. buffer = null;
  255. expIndex = 0;
  256. }
  257. } else if (swapOrder != 0) {
  258. if (Character.isSupplementaryCodePoint(swapOrder)) {
  259. char[] chars = Character.toChars(swapOrder);
  260. swapOrder = chars[1];
  261. return chars[0] << 16;
  262. }
  263. int order = swapOrder << 16;
  264. swapOrder = 0;
  265. return order;
  266. }
  267. int ch = text.previous();
  268. if (ch == Normalizer.DONE) {
  269. return NULLORDER;
  270. }
  271. int value = ordering.getUnicodeOrder(ch);
  272. if (value == RuleBasedCollator.UNMAPPED) {
  273. swapOrder = UNMAPPEDCHARVALUE;
  274. return ch;
  275. } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
  276. value = prevContractChar(ch);
  277. }
  278. if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
  279. buffer = ordering.getExpandValueList(value);
  280. expIndex = buffer.length;
  281. value = buffer[--expIndex];
  282. }
  283. if (ordering.isSEAsianSwapping()) {
  284. int vowel;
  285. if (isThaiBaseConsonant(ch)) {
  286. vowel = text.previous();
  287. if (isThaiPreVowel(vowel)) {
  288. buffer = makeReorderedBuffer(vowel, value, buffer, false);
  289. expIndex = buffer.length - 1;
  290. value = buffer[expIndex];
  291. } else {
  292. text.next();
  293. }
  294. }
  295. if (isLaoBaseConsonant(ch)) {
  296. vowel = text.previous();
  297. if (isLaoPreVowel(vowel)) {
  298. buffer = makeReorderedBuffer(vowel, value, buffer, false);
  299. expIndex = buffer.length - 1;
  300. value = buffer[expIndex];
  301. } else {
  302. text.next();
  303. }
  304. }
  305. }
  306. return strengthOrder(value);
  307. }
  308. /**
  309. * Return the primary component of a collation element.
  310. * @param order the collation element
  311. * @return the element's primary component
  312. */
  313. public final static int primaryOrder(int order)
  314. {
  315. order &= RBCollationTables.PRIMARYORDERMASK;
  316. return (order >>> RBCollationTables.PRIMARYORDERSHIFT);
  317. }
  318. /**
  319. * Return the secondary component of a collation element.
  320. * @param order the collation element
  321. * @return the element's secondary component
  322. */
  323. public final static short secondaryOrder(int order)
  324. {
  325. order = order & RBCollationTables.SECONDARYORDERMASK;
  326. return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT));
  327. }
  328. /**
  329. * Return the tertiary component of a collation element.
  330. * @param order the collation element
  331. * @return the element's tertiary component
  332. */
  333. public final static short tertiaryOrder(int order)
  334. {
  335. return ((short)(order &= RBCollationTables.TERTIARYORDERMASK));
  336. }
  337. /**
  338. * Get the comparison order in the desired strength. Ignore the other
  339. * differences.
  340. * @param order The order value
  341. */
  342. final int strengthOrder(int order)
  343. {
  344. int s = owner.getStrength();
  345. if (s == Collator.PRIMARY)
  346. {
  347. order &= RBCollationTables.PRIMARYDIFFERENCEONLY;
  348. } else if (s == Collator.SECONDARY)
  349. {
  350. order &= RBCollationTables.SECONDARYDIFFERENCEONLY;
  351. }
  352. return order;
  353. }
  354. /**
  355. * Sets the iterator to point to the collation element corresponding to
  356. * the specified character (the parameter is a CHARACTER offset in the
  357. * original string, not an offset into its corresponding sequence of
  358. * collation elements). The value returned by the next call to next()
  359. * will be the collation element corresponding to the specified position
  360. * in the text. If that position is in the middle of a contracting
  361. * character sequence, the result of the next call to next() is the
  362. * collation element for that sequence. This means that getOffset()
  363. * is not guaranteed to return the same value as was passed to a preceding
  364. * call to setOffset().
  365. *
  366. * @param newOffset The new character offset into the original text.
  367. * @since 1.2
  368. */
  369. public void setOffset(int newOffset)
  370. {
  371. if (text != null) {
  372. if (newOffset < text.getBeginIndex()
  373. || newOffset >= text.getEndIndex()) {
  374. text.setIndexOnly(newOffset);
  375. } else {
  376. int c = text.setIndex(newOffset);
  377. // if the desired character isn't used in a contracting character
  378. // sequence, bypass all the backing-up logic-- we're sitting on
  379. // the right character already
  380. if (ordering.usedInContractSeq(c)) {
  381. // walk backwards through the string until we see a character
  382. // that DOESN'T participate in a contracting character sequence
  383. while (ordering.usedInContractSeq(c)) {
  384. c = text.previous();
  385. }
  386. // now walk forward using this object's next() method until
  387. // we pass the starting point and set our current position
  388. // to the beginning of the last "character" before or at
  389. // our starting position
  390. int last = text.getIndex();
  391. while (text.getIndex() <= newOffset) {
  392. last = text.getIndex();
  393. next();
  394. }
  395. text.setIndexOnly(last);
  396. // we don't need this, since last is the last index
  397. // that is the starting of the contraction which encompass
  398. // newOffset
  399. // text.previous();
  400. }
  401. }
  402. }
  403. buffer = null;
  404. expIndex = 0;
  405. swapOrder = 0;
  406. }
  407. /**
  408. * Returns the character offset in the original text corresponding to the next
  409. * collation element. (That is, getOffset() returns the position in the text
  410. * corresponding to the collation element that will be returned by the next
  411. * call to next().) This value will always be the index of the FIRST character
  412. * corresponding to the collation element (a contracting character sequence is
  413. * when two or more characters all correspond to the same collation element).
  414. * This means if you do setOffset(x) followed immediately by getOffset(), getOffset()
  415. * won't necessarily return x.
  416. *
  417. * @return The character offset in the original text corresponding to the collation
  418. * element that will be returned by the next call to next().
  419. * @since 1.2
  420. */
  421. public int getOffset()
  422. {
  423. return (text != null) ? text.getIndex() : 0;
  424. }
  425. /**
  426. * Return the maximum length of any expansion sequences that end
  427. * with the specified comparison order.
  428. * @param order a collation order returned by previous or next.
  429. * @return the maximum length of any expansion sequences ending
  430. * with the specified order.
  431. * @since 1.2
  432. */
  433. public int getMaxExpansion(int order)
  434. {
  435. return ordering.getMaxExpansion(order);
  436. }
  437. /**
  438. * Set a new string over which to iterate.
  439. *
  440. * @param source the new source text
  441. * @since 1.2
  442. */
  443. public void setText(String source)
  444. {
  445. buffer = null;
  446. swapOrder = 0;
  447. expIndex = 0;
  448. Normalizer.Mode mode =
  449. NormalizerUtilities.toNormalizerMode(owner.getDecomposition());
  450. if (text == null) {
  451. text = new Normalizer(source, mode);
  452. } else {
  453. text.setMode(mode);
  454. text.setText(source);
  455. }
  456. }
  457. /**
  458. * Set a new string over which to iterate.
  459. *
  460. * @param source the new source text.
  461. * @since 1.2
  462. */
  463. public void setText(CharacterIterator source)
  464. {
  465. buffer = null;
  466. swapOrder = 0;
  467. expIndex = 0;
  468. Normalizer.Mode mode =
  469. NormalizerUtilities.toNormalizerMode(owner.getDecomposition());
  470. if (text == null) {
  471. text = new Normalizer(source, mode);
  472. } else {
  473. text.setMode(mode);
  474. text.setText(source);
  475. }
  476. }
  477. //============================================================
  478. // privates
  479. //============================================================
  480. /**
  481. * Determine if a character is a Thai vowel (which sorts after
  482. * its base consonant).
  483. */
  484. private final static boolean isThaiPreVowel(int ch) {
  485. return (ch >= 0x0e40) && (ch <= 0x0e44);
  486. }
  487. /**
  488. * Determine if a character is a Thai base consonant
  489. */
  490. private final static boolean isThaiBaseConsonant(int ch) {
  491. return (ch >= 0x0e01) && (ch <= 0x0e2e);
  492. }
  493. /**
  494. * Determine if a character is a Lao vowel (which sorts after
  495. * its base consonant).
  496. */
  497. private final static boolean isLaoPreVowel(int ch) {
  498. return (ch >= 0x0ec0) && (ch <= 0x0ec4);
  499. }
  500. /**
  501. * Determine if a character is a Lao base consonant
  502. */
  503. private final static boolean isLaoBaseConsonant(int ch) {
  504. return (ch >= 0x0e81) && (ch <= 0x0eae);
  505. }
  506. /**
  507. * This method produces a buffer which contains the collation
  508. * elements for the two characters, with colFirst's values preceding
  509. * another character's. Presumably, the other character precedes colFirst
  510. * in logical order (otherwise you wouldn't need this method would you?).
  511. * The assumption is that the other char's value(s) have already been
  512. * computed. If this char has a single element it is passed to this
  513. * method as lastValue, and lastExpansion is null. If it has an
  514. * expansion it is passed in lastExpansion, and colLastValue is ignored.
  515. */
  516. private int[] makeReorderedBuffer(int colFirst,
  517. int lastValue,
  518. int[] lastExpansion,
  519. boolean forward) {
  520. int[] result;
  521. int firstValue = ordering.getUnicodeOrder(colFirst);
  522. if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) {
  523. firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst);
  524. }
  525. int[] firstExpansion = null;
  526. if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) {
  527. firstExpansion = ordering.getExpandValueList(firstValue);
  528. }
  529. if (!forward) {
  530. int temp1 = firstValue;
  531. firstValue = lastValue;
  532. lastValue = temp1;
  533. int[] temp2 = firstExpansion;
  534. firstExpansion = lastExpansion;
  535. lastExpansion = temp2;
  536. }
  537. if (firstExpansion == null && lastExpansion == null) {
  538. result = new int [2];
  539. result[0] = firstValue;
  540. result[1] = lastValue;
  541. }
  542. else {
  543. int firstLength = firstExpansion==null? 1 : firstExpansion.length;
  544. int lastLength = lastExpansion==null? 1 : lastExpansion.length;
  545. result = new int[firstLength + lastLength];
  546. if (firstExpansion == null) {
  547. result[0] = firstValue;
  548. }
  549. else {
  550. System.arraycopy(firstExpansion, 0, result, 0, firstLength);
  551. }
  552. if (lastExpansion == null) {
  553. result[firstLength] = lastValue;
  554. }
  555. else {
  556. System.arraycopy(lastExpansion, 0, result, firstLength, lastLength);
  557. }
  558. }
  559. return result;
  560. }
  561. /**
  562. * Check if a comparison order is ignorable.
  563. * @return true if a character is ignorable, false otherwise.
  564. */
  565. final static boolean isIgnorable(int order)
  566. {
  567. return ((primaryOrder(order) == 0) ? true : false);
  568. }
  569. /**
  570. * Get the ordering priority of the next contracting character in the
  571. * string.
  572. * @param ch the starting character of a contracting character token
  573. * @return the next contracting character's ordering. Returns NULLORDER
  574. * if the end of string is reached.
  575. */
  576. private int nextContractChar(int ch)
  577. {
  578. // First get the ordering of this single character,
  579. // which is always the first element in the list
  580. Vector list = ordering.getContractValues(ch);
  581. EntryPair pair = (EntryPair)list.firstElement();
  582. int order = pair.value;
  583. // find out the length of the longest contracting character sequence in the list.
  584. // There's logic in the builder code to make sure the longest sequence is always
  585. // the last.
  586. pair = (EntryPair)list.lastElement();
  587. int maxLength = pair.entryName.length();
  588. // (the Normalizer is cloned here so that the seeking we do in the next loop
  589. // won't affect our real position in the text)
  590. Normalizer tempText = (Normalizer)text.clone();
  591. // extract the next maxLength characters in the string (we have to do this using the
  592. // Normalizer to ensure that our offsets correspond to those the rest of the
  593. // iterator is using) and store it in "fragment".
  594. tempText.previous();
  595. key.setLength(0);
  596. int c = tempText.next();
  597. while (maxLength > 0 && c != Normalizer.DONE) {
  598. if (Character.isSupplementaryCodePoint(c)) {
  599. key.append(Character.toChars(c));
  600. maxLength -= 2;
  601. } else {
  602. key.append((char)c);
  603. --maxLength;
  604. }
  605. c = tempText.next();
  606. }
  607. String fragment = key.toString();
  608. // now that we have that fragment, iterate through this list looking for the
  609. // longest sequence that matches the characters in the actual text. (maxLength
  610. // is used here to keep track of the length of the longest sequence)
  611. // Upon exit from this loop, maxLength will contain the length of the matching
  612. // sequence and order will contain the collation-element value corresponding
  613. // to this sequence
  614. maxLength = 1;
  615. for (int i = list.size() - 1; i > 0; i--) {
  616. pair = (EntryPair)list.elementAt(i);
  617. if (!pair.fwd)
  618. continue;
  619. if (fragment.startsWith(pair.entryName) && pair.entryName.length()
  620. > maxLength) {
  621. maxLength = pair.entryName.length();
  622. order = pair.value;
  623. }
  624. }
  625. // seek our current iteration position to the end of the matching sequence
  626. // and return the appropriate collation-element value (if there was no matching
  627. // sequence, we're already seeked to the right position and order already contains
  628. // the correct collation-element value for the single character)
  629. while (maxLength > 1) {
  630. c = text.next();
  631. maxLength -= Character.charCount(c);
  632. }
  633. return order;
  634. }
  635. /**
  636. * Get the ordering priority of the previous contracting character in the
  637. * string.
  638. * @param ch the starting character of a contracting character token
  639. * @return the next contracting character's ordering. Returns NULLORDER
  640. * if the end of string is reached.
  641. */
  642. private int prevContractChar(int ch)
  643. {
  644. // This function is identical to nextContractChar(), except that we've
  645. // switched things so that the next() and previous() calls on the Normalizer
  646. // are switched and so that we skip entry pairs with the fwd flag turned on
  647. // rather than off. Notice that we still use append() and startsWith() when
  648. // working on the fragment. This is because the entry pairs that are used
  649. // in reverse iteration have their names reversed already.
  650. Vector list = ordering.getContractValues(ch);
  651. EntryPair pair = (EntryPair)list.firstElement();
  652. int order = pair.value;
  653. pair = (EntryPair)list.lastElement();
  654. int maxLength = pair.entryName.length();
  655. Normalizer tempText = (Normalizer)text.clone();
  656. tempText.next();
  657. key.setLength(0);
  658. int c = tempText.previous();
  659. while (maxLength > 0 && c != Normalizer.DONE) {
  660. if (Character.isSupplementaryCodePoint(c)) {
  661. key.append(Character.toChars(c));
  662. maxLength -= 2;
  663. } else {
  664. key.append((char)c);
  665. --maxLength;
  666. }
  667. c = tempText.previous();
  668. }
  669. String fragment = key.toString();
  670. maxLength = 1;
  671. for (int i = list.size() - 1; i > 0; i--) {
  672. pair = (EntryPair)list.elementAt(i);
  673. if (pair.fwd)
  674. continue;
  675. if (fragment.startsWith(pair.entryName) && pair.entryName.length()
  676. > maxLength) {
  677. maxLength = pair.entryName.length();
  678. order = pair.value;
  679. }
  680. }
  681. while (maxLength > 1) {
  682. c = text.previous();
  683. maxLength -= Character.charCount(c);
  684. }
  685. return order;
  686. }
  687. final static int UNMAPPEDCHARVALUE = 0x7FFF0000;
  688. private Normalizer text = null;
  689. private int[] buffer = null;
  690. private int expIndex = 0;
  691. private StringBuffer key = new StringBuffer(5);
  692. private int swapOrder = 0;
  693. private RBCollationTables ordering;
  694. private RuleBasedCollator owner;
  695. }