1. /*
  2. * @(#)Collator.java 1.39 04/05/05
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved
  9. * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  10. *
  11. * The original version of this source code and documentation is copyrighted
  12. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  13. * materials are provided under terms of a License Agreement between Taligent
  14. * and Sun. This technology is protected by multiple US and International
  15. * patents. This notice and attribution to Taligent may not be removed.
  16. * Taligent is a registered trademark of Taligent, Inc.
  17. *
  18. */
  19. package java.text;
  20. import java.util.Locale;
  21. import java.util.MissingResourceException;
  22. import java.util.ResourceBundle;
  23. import sun.misc.SoftCache;
  24. import sun.text.resources.LocaleData;
  25. /**
  26. * The <code>Collator</code> class performs locale-sensitive
  27. * <code>String</code> comparison. You use this class to build
  28. * searching and sorting routines for natural language text.
  29. *
  30. * <p>
  31. * <code>Collator</code> is an abstract base class. Subclasses
  32. * implement specific collation strategies. One subclass,
  33. * <code>RuleBasedCollator</code>, is currently provided with
  34. * the Java 2 platform and is applicable to a wide set of languages. Other
  35. * subclasses may be created to handle more specialized needs.
  36. *
  37. * <p>
  38. * Like other locale-sensitive classes, you can use the static
  39. * factory method, <code>getInstance</code>, to obtain the appropriate
  40. * <code>Collator</code> object for a given locale. You will only need
  41. * to look at the subclasses of <code>Collator</code> if you need
  42. * to understand the details of a particular collation strategy or
  43. * if you need to modify that strategy.
  44. *
  45. * <p>
  46. * The following example shows how to compare two strings using
  47. * the <code>Collator</code> for the default locale.
  48. * <blockquote>
  49. * <pre>
  50. * // Compare two strings in the default locale
  51. * Collator myCollator = Collator.getInstance();
  52. * if( myCollator.compare("abc", "ABC") < 0 )
  53. * System.out.println("abc is less than ABC");
  54. * else
  55. * System.out.println("abc is greater than or equal to ABC");
  56. * </pre>
  57. * </blockquote>
  58. *
  59. * <p>
  60. * You can set a <code>Collator</code>'s <em>strength</em> property
  61. * to determine the level of difference considered significant in
  62. * comparisons. Four strengths are provided: <code>PRIMARY</code>,
  63. * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
  64. * The exact assignment of strengths to language features is
  65. * locale dependant. For example, in Czech, "e" and "f" are considered
  66. * primary differences, while "e" and "\u00EA" are secondary differences,
  67. * "e" and "E" are tertiary differences and "e" and "e" are identical.
  68. * The following shows how both case and accents could be ignored for
  69. * US English.
  70. * <blockquote>
  71. * <pre>
  72. * //Get the Collator for US English and set its strength to PRIMARY
  73. * Collator usCollator = Collator.getInstance(Locale.US);
  74. * usCollator.setStrength(Collator.PRIMARY);
  75. * if( usCollator.compare("abc", "ABC") == 0 ) {
  76. * System.out.println("Strings are equivalent");
  77. * }
  78. * </pre>
  79. * </blockquote>
  80. * <p>
  81. * For comparing <code>String</code>s exactly once, the <code>compare</code>
  82. * method provides the best performance. When sorting a list of
  83. * <code>String</code>s however, it is generally necessary to compare each
  84. * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
  85. * provide better performance. The <code>CollationKey</code> class converts
  86. * a <code>String</code> to a series of bits that can be compared bitwise
  87. * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
  88. * created by a <code>Collator</code> object for a given <code>String</code>.
  89. * <br>
  90. * <strong>Note:</strong> <code>CollationKey</code>s from different
  91. * <code>Collator</code>s can not be compared. See the class description
  92. * for {@link CollationKey}
  93. * for an example using <code>CollationKey</code>s.
  94. *
  95. * @see RuleBasedCollator
  96. * @see CollationKey
  97. * @see CollationElementIterator
  98. * @see Locale
  99. * @version 1.39, 05/05/04
  100. * @author Helena Shih, Laura Werner, Richard Gillam
  101. */
  102. public abstract class Collator
  103. implements java.util.Comparator<Object>, Cloneable
  104. {
  105. /**
  106. * Collator strength value. When set, only PRIMARY differences are
  107. * considered significant during comparison. The assignment of strengths
  108. * to language features is locale dependant. A common example is for
  109. * different base letters ("a" vs "b") to be considered a PRIMARY difference.
  110. * @see java.text.Collator#setStrength
  111. * @see java.text.Collator#getStrength
  112. */
  113. public final static int PRIMARY = 0;
  114. /**
  115. * Collator strength value. When set, only SECONDARY and above differences are
  116. * considered significant during comparison. The assignment of strengths
  117. * to language features is locale dependant. A common example is for
  118. * different accented forms of the same base letter ("a" vs "\u00E4") to be
  119. * considered a SECONDARY difference.
  120. * @see java.text.Collator#setStrength
  121. * @see java.text.Collator#getStrength
  122. */
  123. public final static int SECONDARY = 1;
  124. /**
  125. * Collator strength value. When set, only TERTIARY and above differences are
  126. * considered significant during comparison. The assignment of strengths
  127. * to language features is locale dependant. A common example is for
  128. * case differences ("a" vs "A") to be considered a TERTIARY difference.
  129. * @see java.text.Collator#setStrength
  130. * @see java.text.Collator#getStrength
  131. */
  132. public final static int TERTIARY = 2;
  133. /**
  134. * Collator strength value. When set, all differences are
  135. * considered significant during comparison. The assignment of strengths
  136. * to language features is locale dependant. A common example is for control
  137. * characters ("\u0001" vs "\u0002") to be considered equal at the
  138. * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
  139. * level. Additionally, differences between pre-composed accents such as
  140. * "\u00C0" (A-grave) and combining accents such as "A\u0300"
  141. * (A, combining-grave) will be considered significant at the IDENTICAL
  142. * level if decomposition is set to NO_DECOMPOSITION.
  143. */
  144. public final static int IDENTICAL = 3;
  145. /**
  146. * Decomposition mode value. With NO_DECOMPOSITION
  147. * set, accented characters will not be decomposed for collation. This
  148. * is the default setting and provides the fastest collation but
  149. * will only produce correct results for languages that do not use accents.
  150. * @see java.text.Collator#getDecomposition
  151. * @see java.text.Collator#setDecomposition
  152. */
  153. public final static int NO_DECOMPOSITION = 0;
  154. /**
  155. * Decomposition mode value. With CANONICAL_DECOMPOSITION
  156. * set, characters that are canonical variants according to Unicode
  157. * standard will be decomposed for collation. This should be used to get
  158. * correct collation of accented characters.
  159. * <p>
  160. * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
  161. * described in
  162. * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
  163. * Technical Report #15</a>.
  164. * @see java.text.Collator#getDecomposition
  165. * @see java.text.Collator#setDecomposition
  166. */
  167. public final static int CANONICAL_DECOMPOSITION = 1;
  168. /**
  169. * Decomposition mode value. With FULL_DECOMPOSITION
  170. * set, both Unicode canonical variants and Unicode compatibility variants
  171. * will be decomposed for collation. This causes not only accented
  172. * characters to be collated, but also characters that have special formats
  173. * to be collated with their norminal form. For example, the half-width and
  174. * full-width ASCII and Katakana characters are then collated together.
  175. * FULL_DECOMPOSITION is the most complete and therefore the slowest
  176. * decomposition mode.
  177. * <p>
  178. * FULL_DECOMPOSITION corresponds to Normalization Form KD as
  179. * described in
  180. * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
  181. * Technical Report #15</a>.
  182. * @see java.text.Collator#getDecomposition
  183. * @see java.text.Collator#setDecomposition
  184. */
  185. public final static int FULL_DECOMPOSITION = 2;
  186. /**
  187. * Gets the Collator for the current default locale.
  188. * The default locale is determined by java.util.Locale.getDefault.
  189. * @return the Collator for the default locale.(for example, en_US)
  190. * @see java.util.Locale#getDefault
  191. */
  192. public static synchronized Collator getInstance() {
  193. return getInstance(Locale.getDefault());
  194. }
  195. /**
  196. * Gets the Collator for the desired locale.
  197. * @param desiredLocale the desired locale.
  198. * @return the Collator for the desired locale.
  199. * @see java.util.Locale
  200. * @see java.util.ResourceBundle
  201. */
  202. public static synchronized
  203. Collator getInstance(Locale desiredLocale)
  204. {
  205. RuleBasedCollator result = null;
  206. result = (RuleBasedCollator) cache.get(desiredLocale);
  207. if (result != null) {
  208. return (Collator)result.clone(); // make the world safe
  209. }
  210. // Load the resource of the desired locale from resource
  211. // manager.
  212. String colString = "";
  213. int decomp = CANONICAL_DECOMPOSITION;
  214. try {
  215. ResourceBundle resource = LocaleData.getLocaleElements(desiredLocale);
  216. colString = resource.getString("CollationElements");
  217. decomp = ((Integer)resource.getObject("CollationDecomp")).intValue();
  218. } catch (MissingResourceException e) {
  219. // Use default values
  220. }
  221. try
  222. {
  223. result = new RuleBasedCollator( CollationRules.DEFAULTRULES +
  224. colString,
  225. decomp );
  226. }
  227. catch(ParseException foo)
  228. {
  229. // predefined tables should contain correct grammar
  230. try {
  231. result = new RuleBasedCollator( CollationRules.DEFAULTRULES );
  232. } catch (ParseException bar) {
  233. // do nothing
  234. }
  235. }
  236. // Now that RuleBasedCollator adds expansions for pre-composed characters
  237. // into their decomposed equivalents, the default collators don't need
  238. // to have decomposition turned on. Laura, 5/5/98, bug 4114077
  239. result.setDecomposition(NO_DECOMPOSITION);
  240. cache.put(desiredLocale,result);
  241. return (Collator)result.clone();
  242. }
  243. /**
  244. * Compares the source string to the target string according to the
  245. * collation rules for this Collator. Returns an integer less than,
  246. * equal to or greater than zero depending on whether the source String is
  247. * less than, equal to or greater than the target string. See the Collator
  248. * class description for an example of use.
  249. * <p>
  250. * For a one time comparison, this method has the best performance. If a
  251. * given String will be involved in multiple comparisons, CollationKey.compareTo
  252. * has the best performance. See the Collator class description for an example
  253. * using CollationKeys.
  254. * @param source the source string.
  255. * @param target the target string.
  256. * @return Returns an integer value. Value is less than zero if source is less than
  257. * target, value is zero if source and target are equal, value is greater than zero
  258. * if source is greater than target.
  259. * @see java.text.CollationKey
  260. * @see java.text.Collator#getCollationKey
  261. */
  262. public abstract int compare(String source, String target);
  263. /**
  264. * Compares its two arguments for order. Returns a negative integer,
  265. * zero, or a positive integer as the first argument is less than, equal
  266. * to, or greater than the second.
  267. * <p>
  268. * This implementation merely returns
  269. * <code> compare((String)o1, (String)o2) </code>.
  270. *
  271. * @return a negative integer, zero, or a positive integer as the
  272. * first argument is less than, equal to, or greater than the
  273. * second.
  274. * @exception ClassCastException the arguments cannot be cast to Strings.
  275. * @see java.util.Comparator
  276. * @since 1.2
  277. */
  278. public int compare(Object o1, Object o2) {
  279. return compare((String)o1, (String)o2);
  280. }
  281. /**
  282. * Transforms the String into a series of bits that can be compared bitwise
  283. * to other CollationKeys. CollationKeys provide better performance than
  284. * Collator.compare when Strings are involved in multiple comparisons.
  285. * See the Collator class description for an example using CollationKeys.
  286. * @param source the string to be transformed into a collation key.
  287. * @return the CollationKey for the given String based on this Collator's collation
  288. * rules. If the source String is null, a null CollationKey is returned.
  289. * @see java.text.CollationKey
  290. * @see java.text.Collator#compare
  291. */
  292. public abstract CollationKey getCollationKey(String source);
  293. /**
  294. * Convenience method for comparing the equality of two strings based on
  295. * this Collator's collation rules.
  296. * @param source the source string to be compared with.
  297. * @param target the target string to be compared with.
  298. * @return true if the strings are equal according to the collation
  299. * rules. false, otherwise.
  300. * @see java.text.Collator#compare
  301. */
  302. public boolean equals(String source, String target)
  303. {
  304. return (compare(source, target) == Collator.EQUAL);
  305. }
  306. /**
  307. * Returns this Collator's strength property. The strength property determines
  308. * the minimum level of difference considered significant during comparison.
  309. * See the Collator class description for an example of use.
  310. * @return this Collator's current strength property.
  311. * @see java.text.Collator#setStrength
  312. * @see java.text.Collator#PRIMARY
  313. * @see java.text.Collator#SECONDARY
  314. * @see java.text.Collator#TERTIARY
  315. * @see java.text.Collator#IDENTICAL
  316. */
  317. public synchronized int getStrength()
  318. {
  319. return strength;
  320. }
  321. /**
  322. * Sets this Collator's strength property. The strength property determines
  323. * the minimum level of difference considered significant during comparison.
  324. * See the Collator class description for an example of use.
  325. * @param newStrength the new strength value.
  326. * @see java.text.Collator#getStrength
  327. * @see java.text.Collator#PRIMARY
  328. * @see java.text.Collator#SECONDARY
  329. * @see java.text.Collator#TERTIARY
  330. * @see java.text.Collator#IDENTICAL
  331. * @exception IllegalArgumentException If the new strength value is not one of
  332. * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
  333. */
  334. public synchronized void setStrength(int newStrength) {
  335. if ((newStrength != PRIMARY) &&
  336. (newStrength != SECONDARY) &&
  337. (newStrength != TERTIARY) &&
  338. (newStrength != IDENTICAL))
  339. throw new IllegalArgumentException("Incorrect comparison level.");
  340. strength = newStrength;
  341. }
  342. /**
  343. * Get the decomposition mode of this Collator. Decomposition mode
  344. * determines how Unicode composed characters are handled. Adjusting
  345. * decomposition mode allows the user to select between faster and more
  346. * complete collation behavior.
  347. * <p>The three values for decomposition mode are:
  348. * <UL>
  349. * <LI>NO_DECOMPOSITION,
  350. * <LI>CANONICAL_DECOMPOSITION
  351. * <LI>FULL_DECOMPOSITION.
  352. * </UL>
  353. * See the documentation for these three constants for a description
  354. * of their meaning.
  355. * @return the decomposition mode
  356. * @see java.text.Collator#setDecomposition
  357. * @see java.text.Collator#NO_DECOMPOSITION
  358. * @see java.text.Collator#CANONICAL_DECOMPOSITION
  359. * @see java.text.Collator#FULL_DECOMPOSITION
  360. */
  361. public synchronized int getDecomposition()
  362. {
  363. return decmp;
  364. }
  365. /**
  366. * Set the decomposition mode of this Collator. See getDecomposition
  367. * for a description of decomposition mode.
  368. * @param decompositionMode the new decomposition mode.
  369. * @see java.text.Collator#getDecomposition
  370. * @see java.text.Collator#NO_DECOMPOSITION
  371. * @see java.text.Collator#CANONICAL_DECOMPOSITION
  372. * @see java.text.Collator#FULL_DECOMPOSITION
  373. * @exception IllegalArgumentException If the given value is not a valid decomposition
  374. * mode.
  375. */
  376. public synchronized void setDecomposition(int decompositionMode) {
  377. if ((decompositionMode != NO_DECOMPOSITION) &&
  378. (decompositionMode != CANONICAL_DECOMPOSITION) &&
  379. (decompositionMode != FULL_DECOMPOSITION))
  380. throw new IllegalArgumentException("Wrong decomposition mode.");
  381. decmp = decompositionMode;
  382. }
  383. /**
  384. * Returns an array of all locales for which the
  385. * <code>getInstance</code> methods of this class can return
  386. * localized instances.
  387. * The array returned must contain at least a <code>Locale</code>
  388. * instance equal to {@link java.util.Locale#US Locale.US}.
  389. *
  390. * @return An array of locales for which localized
  391. * <code>Collator</code> instances are available.
  392. */
  393. public static synchronized Locale[] getAvailableLocales() {
  394. return LocaleData.getAvailableLocales("CollationElements");
  395. }
  396. /**
  397. * Overrides Cloneable
  398. */
  399. public Object clone()
  400. {
  401. try {
  402. return (Collator)super.clone();
  403. } catch (CloneNotSupportedException e) {
  404. throw new InternalError();
  405. }
  406. }
  407. /**
  408. * Compares the equality of two Collators.
  409. * @param that the Collator to be compared with this.
  410. * @return true if this Collator is the same as that Collator;
  411. * false otherwise.
  412. */
  413. public boolean equals(Object that)
  414. {
  415. if (this == that) return true;
  416. if (that == null) return false;
  417. if (getClass() != that.getClass()) return false;
  418. Collator other = (Collator) that;
  419. return ((strength == other.strength) &&
  420. (decmp == other.decmp));
  421. }
  422. /**
  423. * Generates the hash code for this Collator.
  424. */
  425. abstract public int hashCode();
  426. /**
  427. * Default constructor. This constructor is
  428. * protected so subclasses can get access to it. Users typically create
  429. * a Collator sub-class by calling the factory method getInstance.
  430. * @see java.text.Collator#getInstance
  431. */
  432. protected Collator()
  433. {
  434. strength = TERTIARY;
  435. decmp = CANONICAL_DECOMPOSITION;
  436. }
  437. private int strength = 0;
  438. private int decmp = 0;
  439. private static SoftCache cache = new SoftCache();
  440. //
  441. // FIXME: These three constants should be removed.
  442. //
  443. /**
  444. * LESS is returned if source string is compared to be less than target
  445. * string in the compare() method.
  446. * @see java.text.Collator#compare
  447. */
  448. final static int LESS = -1;
  449. /**
  450. * EQUAL is returned if source string is compared to be equal to target
  451. * string in the compare() method.
  452. * @see java.text.Collator#compare
  453. */
  454. final static int EQUAL = 0;
  455. /**
  456. * GREATER is returned if source string is compared to be greater than
  457. * target string in the compare() method.
  458. * @see java.text.Collator#compare
  459. */
  460. final static int GREATER = 1;
  461. }