1. /*
  2. * @(#)Collator.java 1.34 03/01/27
  3. *
  4. * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved
  9. * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  10. *
  11. * The original version of this source code and documentation is copyrighted
  12. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  13. * materials are provided under terms of a License Agreement between Taligent
  14. * and Sun. This technology is protected by multiple US and International
  15. * patents. This notice and attribution to Taligent may not be removed.
  16. * Taligent is a registered trademark of Taligent, Inc.
  17. *
  18. */
  19. package java.text;
  20. import java.util.Locale;
  21. import java.util.MissingResourceException;
  22. import java.util.ResourceBundle;
  23. import sun.misc.SoftCache;
  24. import sun.text.resources.LocaleData;
  25. /**
  26. * The <code>Collator</code> class performs locale-sensitive
  27. * <code>String</code> comparison. You use this class to build
  28. * searching and sorting routines for natural language text.
  29. *
  30. * <p>
  31. * <code>Collator</code> is an abstract base class. Subclasses
  32. * implement specific collation strategies. One subclass,
  33. * <code>RuleBasedCollator</code>, is currently provided with
  34. * the Java 2 platform and is applicable to a wide set of languages. Other
  35. * subclasses may be created to handle more specialized needs.
  36. *
  37. * <p>
  38. * Like other locale-sensitive classes, you can use the static
  39. * factory method, <code>getInstance</code>, to obtain the appropriate
  40. * <code>Collator</code> object for a given locale. You will only need
  41. * to look at the subclasses of <code>Collator</code> if you need
  42. * to understand the details of a particular collation strategy or
  43. * if you need to modify that strategy.
  44. *
  45. * <p>
  46. * The following example shows how to compare two strings using
  47. * the <code>Collator</code> for the default locale.
  48. * <blockquote>
  49. * <pre>
  50. * // Compare two strings in the default locale
  51. * Collator myCollator = Collator.getInstance();
  52. * if( myCollator.compare("abc", "ABC") < 0 )
  53. * System.out.println("abc is less than ABC");
  54. * else
  55. * System.out.println("abc is greater than or equal to ABC");
  56. * </pre>
  57. * </blockquote>
  58. *
  59. * <p>
  60. * You can set a <code>Collator</code>'s <em>strength</em> property
  61. * to determine the level of difference considered significant in
  62. * comparisons. Four strengths are provided: <code>PRIMARY</code>,
  63. * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
  64. * The exact assignment of strengths to language features is
  65. * locale dependant. For example, in Czech, "e" and "f" are considered
  66. * primary differences, while "e" and "\u00EA" are secondary differences,
  67. * "e" and "E" are tertiary differences and "e" and "e" are identical.
  68. * The following shows how both case and accents could be ignored for
  69. * US English.
  70. * <blockquote>
  71. * <pre>
  72. * //Get the Collator for US English and set its strength to PRIMARY
  73. * Collator usCollator = Collator.getInstance(Locale.US);
  74. * usCollator.setStrength(Collator.PRIMARY);
  75. * if( usCollator.compare("abc", "ABC") == 0 ) {
  76. * System.out.println("Strings are equivalent");
  77. * }
  78. * </pre>
  79. * </blockquote>
  80. * <p>
  81. * For comparing <code>String</code>s exactly once, the <code>compare</code>
  82. * method provides the best performance. When sorting a list of
  83. * <code>String</code>s however, it is generally necessary to compare each
  84. * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
  85. * provide better performance. The <code>CollationKey</code> class converts
  86. * a <code>String</code> to a series of bits that can be compared bitwise
  87. * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
  88. * created by a <code>Collator</code> object for a given <code>String</code>.
  89. * <br>
  90. * <strong>Note:</strong> <code>CollationKey</code>s from different
  91. * <code>Collator</code>s can not be compared. See the class description
  92. * for {@link CollationKey}
  93. * for an example using <code>CollationKey</code>s.
  94. *
  95. * @see RuleBasedCollator
  96. * @see CollationKey
  97. * @see CollationElementIterator
  98. * @see Locale
  99. * @version 1.34, 01/27/03
  100. * @author Helena Shih, Laura Werner, Richard Gillam
  101. */
  102. public abstract class Collator implements java.util.Comparator, Cloneable {
  103. /**
  104. * Collator strength value. When set, only PRIMARY differences are
  105. * considered significant during comparison. The assignment of strengths
  106. * to language features is locale dependant. A common example is for
  107. * different base letters ("a" vs "b") to be considered a PRIMARY difference.
  108. * @see java.text.Collator#setStrength
  109. * @see java.text.Collator#getStrength
  110. */
  111. public final static int PRIMARY = 0;
  112. /**
  113. * Collator strength value. When set, only SECONDARY and above differences are
  114. * considered significant during comparison. The assignment of strengths
  115. * to language features is locale dependant. A common example is for
  116. * different accented forms of the same base letter ("a" vs "\u00E4") to be
  117. * considered a SECONDARY difference.
  118. * @see java.text.Collator#setStrength
  119. * @see java.text.Collator#getStrength
  120. */
  121. public final static int SECONDARY = 1;
  122. /**
  123. * Collator strength value. When set, only TERTIARY and above differences are
  124. * considered significant during comparison. The assignment of strengths
  125. * to language features is locale dependant. A common example is for
  126. * case differences ("a" vs "A") to be considered a TERTIARY difference.
  127. * @see java.text.Collator#setStrength
  128. * @see java.text.Collator#getStrength
  129. */
  130. public final static int TERTIARY = 2;
  131. /**
  132. * Collator strength value. When set, all differences are
  133. * considered significant during comparison. The assignment of strengths
  134. * to language features is locale dependant. A common example is for control
  135. * characters ("\u0001" vs "\u0002") to be considered equal at the
  136. * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
  137. * level. Additionally, differences between pre-composed accents such as
  138. * "\u00C0" (A-grave) and combining accents such as "A\u0300"
  139. * (A, combining-grave) will be considered significant at the tertiary
  140. * level if decomposition is set to NO_DECOMPOSITION.
  141. */
  142. public final static int IDENTICAL = 3;
  143. /**
  144. * Decomposition mode value. With NO_DECOMPOSITION
  145. * set, accented characters will not be decomposed for collation. This
  146. * is the default setting and provides the fastest collation but
  147. * will only produce correct results for languages that do not use accents.
  148. * @see java.text.Collator#getDecomposition
  149. * @see java.text.Collator#setDecomposition
  150. */
  151. public final static int NO_DECOMPOSITION = 0;
  152. /**
  153. * Decomposition mode value. With CANONICAL_DECOMPOSITION
  154. * set, characters that are canonical variants according to Unicode 2.0
  155. * will be decomposed for collation. This
  156. * should be used to get correct collation of accented characters.
  157. * <p>
  158. * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
  159. * described in
  160. * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
  161. * Technical Report #15</a>.
  162. * @see java.text.Collator#getDecomposition
  163. * @see java.text.Collator#setDecomposition
  164. */
  165. public final static int CANONICAL_DECOMPOSITION = 1;
  166. /**
  167. * Decomposition mode value. With FULL_DECOMPOSITION
  168. * set, both Unicode canonical variants and Unicode compatibility variants
  169. * will be decomposed for collation. This causes not only accented
  170. * characters to be collated, but also characters that have special formats
  171. * to be collated with their norminal form. For example, the half-width and
  172. * full-width ASCII and Katakana characters are then collated together.
  173. * FULL_DECOMPOSITION is the most complete and therefore the slowest
  174. * decomposition mode.
  175. * <p>
  176. * FULL_DECOMPOSITION corresponds to Normalization Form KD as
  177. * described in
  178. * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
  179. * Technical Report #15</a>.
  180. * @see java.text.Collator#getDecomposition
  181. * @see java.text.Collator#setDecomposition
  182. */
  183. public final static int FULL_DECOMPOSITION = 2;
  184. /**
  185. * Gets the Collator for the current default locale.
  186. * The default locale is determined by java.util.Locale.getDefault.
  187. * @return the Collator for the default locale.(for example, en_US)
  188. * @see java.util.Locale#getDefault
  189. */
  190. public static synchronized Collator getInstance() {
  191. return getInstance(Locale.getDefault());
  192. }
  193. /**
  194. * Gets the Collator for the desired locale.
  195. * @param desiredLocale the desired locale.
  196. * @return the Collator for the desired locale.
  197. * @see java.util.Locale
  198. * @see java.util.ResourceBundle
  199. */
  200. public static synchronized
  201. Collator getInstance(Locale desiredLocale)
  202. {
  203. RuleBasedCollator result = null;
  204. result = (RuleBasedCollator) cache.get(desiredLocale);
  205. if (result != null) {
  206. return (Collator)result.clone(); // make the world safe
  207. }
  208. // Load the resource of the desired locale from resource
  209. // manager.
  210. String colString = "";
  211. int decomp = CANONICAL_DECOMPOSITION;
  212. try {
  213. ResourceBundle resource = LocaleData.getLocaleElements(desiredLocale);
  214. colString = resource.getString("CollationElements");
  215. decomp = ((Integer)resource.getObject("CollationDecomp")).intValue();
  216. } catch (MissingResourceException e) {
  217. // Use default values
  218. }
  219. try
  220. {
  221. result = new RuleBasedCollator( CollationRules.DEFAULTRULES +
  222. colString,
  223. decomp );
  224. }
  225. catch(ParseException foo)
  226. {
  227. // predefined tables should contain correct grammar
  228. try {
  229. result = new RuleBasedCollator( CollationRules.DEFAULTRULES );
  230. } catch (ParseException bar) {
  231. // do nothing
  232. }
  233. }
  234. // Now that RuleBasedCollator adds expansions for pre-composed characters
  235. // into their decomposed equivalents, the default collators don't need
  236. // to have decomposition turned on. Laura, 5/5/98, bug 4114077
  237. result.setDecomposition(NO_DECOMPOSITION);
  238. cache.put(desiredLocale,result);
  239. return (Collator)result.clone();
  240. }
  241. /**
  242. * Compares the source string to the target string according to the
  243. * collation rules for this Collator. Returns an integer less than,
  244. * equal to or greater than zero depending on whether the source String is
  245. * less than, equal to or greater than the target string. See the Collator
  246. * class description for an example of use.
  247. * <p>
  248. * For a one time comparison, this method has the best performance. If a
  249. * given String will be involved in multiple comparisons, CollationKey.compareTo
  250. * has the best performance. See the Collator class description for an example
  251. * using CollationKeys.
  252. * @param source the source string.
  253. * @param target the target string.
  254. * @return Returns an integer value. Value is less than zero if source is less than
  255. * target, value is zero if source and target are equal, value is greater than zero
  256. * if source is greater than target.
  257. * @see java.text.CollationKey
  258. * @see java.text.Collator#getCollationKey
  259. */
  260. public abstract int compare(String source, String target);
  261. /**
  262. * Compares its two arguments for order. Returns a negative integer,
  263. * zero, or a positive integer as the first argument is less than, equal
  264. * to, or greater than the second.
  265. * <p>
  266. * This implementation merely returns
  267. * <code> compare((String)o1, (String)o2) </code>.
  268. *
  269. * @return a negative integer, zero, or a positive integer as the
  270. * first argument is less than, equal to, or greater than the
  271. * second.
  272. * @exception ClassCastException the arguments cannot be cast to Strings.
  273. * @see java.util.Comparator
  274. * @since 1.2
  275. */
  276. public int compare(Object o1, Object o2) {
  277. return compare((String)o1, (String)o2);
  278. }
  279. /**
  280. * Transforms the String into a series of bits that can be compared bitwise
  281. * to other CollationKeys. CollationKeys provide better performance than
  282. * Collator.compare when Strings are involved in multiple comparisons.
  283. * See the Collator class description for an example using CollationKeys.
  284. * @param source the string to be transformed into a collation key.
  285. * @return the CollationKey for the given String based on this Collator's collation
  286. * rules. If the source String is null, a null CollationKey is returned.
  287. * @see java.text.CollationKey
  288. * @see java.text.Collator#compare
  289. */
  290. public abstract CollationKey getCollationKey(String source);
  291. /**
  292. * Convenience method for comparing the equality of two strings based on
  293. * this Collator's collation rules.
  294. * @param source the source string to be compared with.
  295. * @param target the target string to be compared with.
  296. * @return true if the strings are equal according to the collation
  297. * rules. false, otherwise.
  298. * @see java.text.Collator#compare
  299. */
  300. public boolean equals(String source, String target)
  301. {
  302. return (compare(source, target) == Collator.EQUAL);
  303. }
  304. /**
  305. * Returns this Collator's strength property. The strength property determines
  306. * the minimum level of difference considered significant during comparison.
  307. * See the Collator class description for an example of use.
  308. * @return this Collator's current strength property.
  309. * @see java.text.Collator#setStrength
  310. * @see java.text.Collator#PRIMARY
  311. * @see java.text.Collator#SECONDARY
  312. * @see java.text.Collator#TERTIARY
  313. * @see java.text.Collator#IDENTICAL
  314. */
  315. public synchronized int getStrength()
  316. {
  317. return strength;
  318. }
  319. /**
  320. * Sets this Collator's strength property. The strength property determines
  321. * the minimum level of difference considered significant during comparison.
  322. * See the Collator class description for an example of use.
  323. * @param newStrength the new strength value.
  324. * @see java.text.Collator#getStrength
  325. * @see java.text.Collator#PRIMARY
  326. * @see java.text.Collator#SECONDARY
  327. * @see java.text.Collator#TERTIARY
  328. * @see java.text.Collator#IDENTICAL
  329. * @exception IllegalArgumentException If the new strength value is not one of
  330. * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
  331. */
  332. public synchronized void setStrength(int newStrength) {
  333. if ((newStrength != PRIMARY) &&
  334. (newStrength != SECONDARY) &&
  335. (newStrength != TERTIARY) &&
  336. (newStrength != IDENTICAL))
  337. throw new IllegalArgumentException("Incorrect comparison level.");
  338. strength = newStrength;
  339. }
  340. /**
  341. * Get the decomposition mode of this Collator. Decomposition mode
  342. * determines how Unicode composed characters are handled. Adjusting
  343. * decomposition mode allows the user to select between faster and more
  344. * complete collation behavior.
  345. * <p>The three values for decomposition mode are:
  346. * <UL>
  347. * <LI>NO_DECOMPOSITION,
  348. * <LI>CANONICAL_DECOMPOSITION
  349. * <LI>FULL_DECOMPOSITION.
  350. * </UL>
  351. * See the documentation for these three constants for a description
  352. * of their meaning.
  353. * @return the decomposition mode
  354. * @see java.text.Collator#setDecomposition
  355. * @see java.text.Collator#NO_DECOMPOSITION
  356. * @see java.text.Collator#CANONICAL_DECOMPOSITION
  357. * @see java.text.Collator#FULL_DECOMPOSITION
  358. */
  359. public synchronized int getDecomposition()
  360. {
  361. return decmp;
  362. }
  363. /**
  364. * Set the decomposition mode of this Collator. See getDecomposition
  365. * for a description of decomposition mode.
  366. * @param decompositionMode the new decomposition mode.
  367. * @see java.text.Collator#getDecomposition
  368. * @see java.text.Collator#NO_DECOMPOSITION
  369. * @see java.text.Collator#CANONICAL_DECOMPOSITION
  370. * @see java.text.Collator#FULL_DECOMPOSITION
  371. * @exception IllegalArgumentException If the given value is not a valid decomposition
  372. * mode.
  373. */
  374. public synchronized void setDecomposition(int decompositionMode) {
  375. if ((decompositionMode != NO_DECOMPOSITION) &&
  376. (decompositionMode != CANONICAL_DECOMPOSITION) &&
  377. (decompositionMode != FULL_DECOMPOSITION))
  378. throw new IllegalArgumentException("Wrong decomposition mode.");
  379. decmp = decompositionMode;
  380. }
  381. /**
  382. * Get the set of Locales for which Collators are installed.
  383. * @return the list of available locales which collators are installed.
  384. */
  385. public static synchronized Locale[] getAvailableLocales() {
  386. return LocaleData.getAvailableLocales("CollationElements");
  387. }
  388. /**
  389. * Overrides Cloneable
  390. */
  391. public Object clone()
  392. {
  393. try {
  394. return (Collator)super.clone();
  395. } catch (CloneNotSupportedException e) {
  396. throw new InternalError();
  397. }
  398. }
  399. /**
  400. * Compares the equality of two Collators.
  401. * @param that the Collator to be compared with this.
  402. * @return true if this Collator is the same as that Collator;
  403. * false otherwise.
  404. */
  405. public boolean equals(Object that)
  406. {
  407. if (this == that) return true;
  408. if (that == null) return false;
  409. if (getClass() != that.getClass()) return false;
  410. Collator other = (Collator) that;
  411. return ((strength == other.strength) &&
  412. (decmp == other.decmp));
  413. }
  414. /**
  415. * Generates the hash code for this Collator.
  416. */
  417. abstract public int hashCode();
  418. /**
  419. * Default constructor. This constructor is
  420. * protected so subclasses can get access to it. Users typically create
  421. * a Collator sub-class by calling the factory method getInstance.
  422. * @see java.text.Collator#getInstance
  423. */
  424. protected Collator()
  425. {
  426. strength = TERTIARY;
  427. decmp = CANONICAL_DECOMPOSITION;
  428. }
  429. private int strength = 0;
  430. private int decmp = 0;
  431. private static SoftCache cache = new SoftCache();
  432. //
  433. // FIXME: These three constants should be removed.
  434. //
  435. /**
  436. * LESS is returned if source string is compared to be less than target
  437. * string in the compare() method.
  438. * @see java.text.Collator#compare
  439. */
  440. final static int LESS = -1;
  441. /**
  442. * EQUAL is returned if source string is compared to be equal to target
  443. * string in the compare() method.
  444. * @see java.text.Collator#compare
  445. */
  446. final static int EQUAL = 0;
  447. /**
  448. * GREATER is returned if source string is compared to be greater than
  449. * target string in the compare() method.
  450. * @see java.text.Collator#compare
  451. */
  452. final static int GREATER = 1;
  453. }