1. /*
  2. * @(#)CollationElementIterator.java 1.25 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * @(#)CollationElementIterator.java 1.25 01/11/29
  9. *
  10. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  11. * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  12. *
  13. * Portions copyright (c) 1996-1998 Sun Microsystems, Inc. All Rights Reserved.
  14. *
  15. * The original version of this source code and documentation is copyrighted
  16. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  17. * materials are provided under terms of a License Agreement between Taligent
  18. * and Sun. This technology is protected by multiple US and International
  19. * patents. This notice and attribution to Taligent may not be removed.
  20. * Taligent is a registered trademark of Taligent, Inc.
  21. *
  22. * Permission to use, copy, modify, and distribute this software
  23. * and its documentation for NON-COMMERCIAL purposes and without
  24. * fee is hereby granted provided that this copyright notice
  25. * appears in all copies. Please refer to the file "copyright.html"
  26. * for further important copyright and licensing information.
  27. *
  28. * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
  29. * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  30. * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  31. * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
  32. * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
  33. * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
  34. *
  35. */
  36. package java.text;
  37. import java.lang.Character;
  38. import java.util.Vector;
  39. /**
  40. * The <code>CollationElementIterator</code> class is used as an iterator
  41. * to walk through each character of an international string. Use the iterator
  42. * to return the ordering priority of the positioned character. The ordering
  43. * priority of a character, which we refer to as a key, defines how a character
  44. * is collated in the given collation object.
  45. *
  46. * <p>
  47. * For example, consider the following in Spanish:
  48. * <blockquote>
  49. * <pre>
  50. * "ca" -> the first key is key('c') and second key is key('a').
  51. * "cha" -> the first key is key('ch') and second key is key('a').
  52. * </pre>
  53. * </blockquote>
  54. * And in German,
  55. * <blockquote>
  56. * <pre>
  57. * "\u00e4b"-> the first key is key('a'), the second key is key('e'), and
  58. * the third key is key('b').
  59. * </pre>
  60. * </blockquote>
  61. * The key of a character is an integer composed of primary order(short),
  62. * secondary order(byte), and tertiary order(byte). Java strictly defines
  63. * the size and signedness of its primitive data types. Therefore, the static
  64. * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
  65. * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
  66. * and <code>short</code> respectively to ensure the correctness of the key
  67. * value.
  68. *
  69. * <p>
  70. * Example of the iterator usage,
  71. * <blockquote>
  72. * <pre>
  73. * // get the first key of the string
  74. * String str = "This is a test";
  75. * CollationElementIterator c =
  76. * new CollationElementIterator(str, 0, str.length(),
  77. * Collator.getInstance());
  78. * int primaryOrder = CollationElementIterator.primaryOrder(c->next());
  79. * </pre>
  80. * </blockquote>
  81. *
  82. * <p>
  83. * <code>CollationElementIterator.next</code> returns the collation order
  84. * of the next character. A collation order consists of primary order,
  85. * secondary order and tertiary order. The data type of the collation
  86. * order is <strong>int</strong>. The first 16 bits of a collation order
  87. * is its primary order; the next 8 bits is the secondary order and the
  88. * last 8 bits is the tertiary order.
  89. *
  90. * @see Collator
  91. * @see RuleBasedCollator
  92. * @version 1.25 11/29/01
  93. * @author Helena Shih
  94. */
  95. public final class CollationElementIterator
  96. {
  97. /**
  98. * Null order which indicates the end of string is reached by the
  99. * cursor.
  100. */
  101. public final static int NULLORDER = 0xffffffff;
  102. /**
  103. * CollationElementIterator constructor. This takes the source string and
  104. * the collation object. The cursor will walk thru the source string based
  105. * on the predefined collation rules. If the source string is empty,
  106. * NULLORDER will be returned on the calls to next().
  107. * @param sourceText the source string.
  108. * @param order the collation object.
  109. */
  110. CollationElementIterator(String sourceText, RuleBasedCollator order) {
  111. ordering = order;
  112. if ( sourceText.length() != 0 ) {
  113. text = new Normalizer(sourceText, order.getDecomposition());
  114. }
  115. }
  116. /**
  117. * CollationElementIterator constructor. This takes the source string and
  118. * the collation object. The cursor will walk thru the source string based
  119. * on the predefined collation rules. If the source string is empty,
  120. * NULLORDER will be returned on the calls to next().
  121. * @param sourceText the source string.
  122. * @param order the collation object.
  123. */
  124. CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator order) {
  125. ordering = order;
  126. text = new Normalizer(sourceText, order.getDecomposition());
  127. }
  128. /**
  129. * Resets the cursor to the beginning of the string.
  130. */
  131. public void reset()
  132. {
  133. if (text != null) {
  134. text.reset();
  135. text.setDecomposition(ordering.getDecomposition());
  136. }
  137. buffer = null;
  138. expIndex = 0;
  139. swapOrder = 0;
  140. }
  141. /**
  142. * Get the ordering priority of the next character in the string.
  143. * @return the next character's ordering. Returns NULLORDER if
  144. * the end of string is reached.
  145. */
  146. public int next()
  147. {
  148. if (text == null) {
  149. return NULLORDER;
  150. } else if (text.getDecomposition() != ordering.getDecomposition()) {
  151. text.setDecomposition(ordering.getDecomposition());
  152. }
  153. if (buffer != null) {
  154. if (expIndex < buffer.length) {
  155. return strengthOrder(buffer[expIndex++]);
  156. } else {
  157. buffer = null;
  158. expIndex = 0;
  159. }
  160. } else if (swapOrder != 0) {
  161. int order = swapOrder << 16;
  162. swapOrder = 0;
  163. return order;
  164. }
  165. char ch = text.next();
  166. if (ch == Normalizer.DONE) {
  167. return NULLORDER;
  168. }
  169. int value = ordering.getUnicodeOrder(ch);
  170. if (value == RuleBasedCollator.UNMAPPED) {
  171. swapOrder = ch;
  172. return UNMAPPEDCHARVALUE;
  173. }
  174. else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
  175. value = nextContractChar(ch);
  176. }
  177. if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
  178. buffer = ordering.getExpandValueList(value);
  179. expIndex = 0;
  180. value = buffer[expIndex++];
  181. }
  182. return strengthOrder(value);
  183. }
  184. /**
  185. * Get the ordering priority of the previous collation element in the string.
  186. * @return the previous element's ordering. Returns NULLORDER if
  187. * the beginning of string is reached.
  188. */
  189. public int previous()
  190. {
  191. if (text == null) {
  192. return NULLORDER;
  193. } else if (text.getDecomposition() != ordering.getDecomposition()) {
  194. text.setDecomposition(ordering.getDecomposition());
  195. }
  196. if (buffer != null) {
  197. if (expIndex > 0) {
  198. return strengthOrder(buffer[--expIndex]);
  199. } else {
  200. buffer = null;
  201. expIndex = 0;
  202. }
  203. } else if (swapOrder != 0) {
  204. int order = swapOrder << 16;
  205. swapOrder = 0;
  206. return order;
  207. }
  208. char ch = text.previous();
  209. if (ch == Normalizer.DONE) {
  210. return NULLORDER;
  211. }
  212. int value = ordering.getUnicodeOrder(ch);
  213. if (value == RuleBasedCollator.UNMAPPED) {
  214. swapOrder = UNMAPPEDCHARVALUE;
  215. return ch;
  216. } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
  217. value = prevContractChar(ch);
  218. }
  219. if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
  220. buffer = ordering.getExpandValueList(value);
  221. expIndex = buffer.length;
  222. value = buffer[--expIndex];
  223. }
  224. return strengthOrder(value);
  225. }
  226. /**
  227. * Get the primary order of a collation order.
  228. * @param order the collation order
  229. * @return the primary order of a collation order.
  230. */
  231. public final static int primaryOrder(int order)
  232. {
  233. order &= RuleBasedCollator.PRIMARYORDERMASK;
  234. return (order >>> RuleBasedCollator.PRIMARYORDERSHIFT);
  235. }
  236. /**
  237. * Get the secondary order of a collation order.
  238. * @param order the collation order
  239. * @return the secondary order of a collation order.
  240. */
  241. public final static short secondaryOrder(int order)
  242. {
  243. order = order & RuleBasedCollator.SECONDARYORDERMASK;
  244. return ((short)(order >> RuleBasedCollator.SECONDARYORDERSHIFT));
  245. }
  246. /**
  247. * Get the tertiary order of a collation order.
  248. * @param order the collation order
  249. * @return the tertiary order of a collation order.
  250. */
  251. public final static short tertiaryOrder(int order)
  252. {
  253. return ((short)(order &= RuleBasedCollator.TERTIARYORDERMASK));
  254. }
  255. /**
  256. * Get the comparison order in the desired strength. Ignore the other
  257. * differences.
  258. * @param order The order value
  259. */
  260. final int strengthOrder(int order)
  261. {
  262. int s = ordering.getStrength();
  263. if (s == Collator.PRIMARY)
  264. {
  265. order &= RuleBasedCollator.PRIMARYDIFFERENCEONLY;
  266. } else if (s == Collator.SECONDARY)
  267. {
  268. order &= RuleBasedCollator.SECONDARYDIFFERENCEONLY;
  269. }
  270. return order;
  271. }
  272. /**
  273. * Sets the position within the source text.
  274. *
  275. * @param newOffset The new offset relative to the start of the text.
  276. */
  277. public void setOffset(int newOffset)
  278. {
  279. if (text != null)
  280. text.setOffset(newOffset);
  281. buffer = null;
  282. expIndex = 0;
  283. swapOrder = 0;
  284. }
  285. /**
  286. * Gets the offset of the current character in the the source text.
  287. * That is, the next call to next() will return the Collation element
  288. * for this character (possibly including more than one character,
  289. * if required by the language).
  290. *
  291. * @return the returned ofset.
  292. */
  293. public int getOffset()
  294. {
  295. return (text != null) ? text.getOffset() : 0;
  296. }
  297. /**
  298. * Return the maximum length of any expansion sequences that end
  299. * with the specified comparison order.
  300. * @param order a collation order returned by previous or next.
  301. * @return the maximum length of any expansion sequences ending
  302. * with the specified order.
  303. */
  304. public int getMaxExpansion(int order)
  305. {
  306. return ordering.getMaxExpansion(order);
  307. }
  308. //============================================================
  309. // Package-visible methods that should eventually be made public
  310. //============================================================
  311. /**
  312. * Set a new string over which to iterate.
  313. *
  314. * @param str the new source text.
  315. */
  316. public void setText(String source)
  317. {
  318. buffer = null;
  319. swapOrder = 0;
  320. expIndex = 0;
  321. if (text == null) {
  322. text = new Normalizer(source, ordering.getDecomposition());
  323. } else {
  324. text.setDecomposition(ordering.getDecomposition());
  325. text.setText(source);
  326. }
  327. }
  328. /**
  329. * Set a new string over which to iterate.
  330. *
  331. * @param str the new source text.
  332. */
  333. public void setText(CharacterIterator source)
  334. {
  335. buffer = null;
  336. swapOrder = 0;
  337. expIndex = 0;
  338. if (text == null) {
  339. text = new Normalizer(source, ordering.getDecomposition());
  340. } else {
  341. text.setDecomposition(ordering.getDecomposition());
  342. text.setText(source);
  343. }
  344. }
  345. //============================================================
  346. // privates
  347. //============================================================
  348. /**
  349. * Check if a comparison order is ignorable.
  350. * @return true if a character is ignorable, false otherwise.
  351. */
  352. final static boolean isIgnorable(int order)
  353. {
  354. return ((primaryOrder(order) == 0) ? true : false);
  355. }
  356. /**
  357. * Get the ordering priority of the next contracting character in the
  358. * string.
  359. * @param ch the starting character of a contracting character token
  360. * @return the next contracting character's ordering. Returns NULLORDER
  361. * if the end of string is reached.
  362. */
  363. private int nextContractChar(char ch)
  364. {
  365. // First get the ordering of this single character
  366. Vector list = ordering.getContractValues(ch);
  367. EntryPair pair = (EntryPair)list.firstElement();
  368. int order = pair.value;
  369. // Now iterate through the chars following it and
  370. // look for the longest match
  371. key.setLength(0);
  372. key.append(ch);
  373. while ((ch = text.next()) != Normalizer.DONE) {
  374. key.append(ch);
  375. int n = RuleBasedCollator.getEntry(list, key.toString(), true);
  376. if (n == RuleBasedCollator.UNMAPPED) {
  377. ch = text.previous();
  378. break;
  379. }
  380. pair = (EntryPair)list.elementAt(n);
  381. order = pair.value;
  382. }
  383. return order;
  384. }
  385. /**
  386. * Get the ordering priority of the previous contracting character in the
  387. * string.
  388. * @param ch the starting character of a contracting character token
  389. * @return the next contracting character's ordering. Returns NULLORDER
  390. * if the end of string is reached.
  391. */
  392. private int prevContractChar(char ch)
  393. {
  394. // First get the ordering of this single character
  395. Vector list = ordering.getContractValues(ch);
  396. EntryPair pair = (EntryPair)list.firstElement();
  397. int order = pair.value;
  398. // Now iterate through the chars following it and
  399. // look for the longest match
  400. key.setLength(0);
  401. key.append(ch);
  402. while ((ch = text.previous()) != Normalizer.DONE) {
  403. //System.out.println("prevContract: '" + key + "' --> " +
  404. // Integer.toHexString(order) );
  405. key.append(ch);
  406. int n = RuleBasedCollator.getEntry(list, key.toString(), false);
  407. if (n == RuleBasedCollator.UNMAPPED) {
  408. //System.out.println(" putting '" + ch + "' back");
  409. ch = text.next();
  410. break;
  411. }
  412. pair = (EntryPair)list.elementAt(n);
  413. order = pair.value;
  414. }
  415. return order;
  416. }
  417. final static int UNMAPPEDCHARVALUE = 0x7FFF0000;
  418. private Normalizer text = null;
  419. private int[] buffer = null;
  420. private int expIndex = 0;
  421. private StringBuffer key = new StringBuffer(5);
  422. private int swapOrder = 0;
  423. private RuleBasedCollator ordering;
  424. }