1. /*
  2. * @(#)Collator.java 1.22 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * @(#)Collator.java 1.22 01/11/29
  9. *
  10. * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved
  11. * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  12. *
  13. * Portions copyright (c) 1997, 1998 Sun Microsystems, Inc. All Rights Reserved.
  14. *
  15. * The original version of this source code and documentation is copyrighted
  16. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  17. * materials are provided under terms of a License Agreement between Taligent
  18. * and Sun. This technology is protected by multiple US and International
  19. * patents. This notice and attribution to Taligent may not be removed.
  20. * Taligent is a registered trademark of Taligent, Inc.
  21. *
  22. * Permission to use, copy, modify, and distribute this software
  23. * and its documentation for NON-COMMERCIAL purposes and without
  24. * fee is hereby granted provided that this copyright notice
  25. * appears in all copies. Please refer to the file "copyright.html"
  26. * for further important copyright and licensing information.
  27. *
  28. * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
  29. * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  30. * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  31. * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
  32. * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
  33. * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
  34. *
  35. */
  36. package java.text;
  37. import java.util.Locale;
  38. import java.util.MissingResourceException;
  39. import java.util.ResourceBundle;
  40. import java.text.resources.*;
  41. import java.util.Hashtable;
  42. /**
  43. * The <code>Collator</code> class performs locale-sensitive
  44. * <code>String</code> comparison. You use this class to build
  45. * searching and sorting routines for natural language text.
  46. *
  47. * <p>
  48. * <code>Collator</code> is an abstract base class. Subclasses
  49. * implement specific collation strategies. One subclass,
  50. * <code>RuleBasedCollator</code>, is currently provided with
  51. * the JDK and is applicable to a wide set of languages. Other
  52. * subclasses may be created to handle more specialized needs.
  53. *
  54. * <p>
  55. * Like other locale-sensitive classes, you can use the static
  56. * factory method, <code>getInstance</code>, to obtain the appropriate
  57. * <code>Collator</code> object for a given locale. You will only need
  58. * to look at the subclasses of <code>Collator</code> if you need
  59. * to understand the details of a particular collation strategy or
  60. * if you need to modify that strategy.
  61. *
  62. * <p>
  63. * The following example shows how to compare two strings using
  64. * the <code>Collator</code> for the default locale.
  65. * <blockquote>
  66. * <pre>
  67. * // Compare two strings in the default locale
  68. * Collator myCollator = Collator.getInstance();
  69. * if( myCollator.compare("abc", "ABC") < 0 )
  70. * System.out.println("abc is less than ABC");
  71. * else
  72. * System.out.println("abc is greater than or equal to ABC");
  73. * </pre>
  74. * </blockquote>
  75. *
  76. * <p>
  77. * You can set a <code>Collator</code>'s <em>strength</em> property
  78. * to determine the level of difference considered significant in
  79. * comparisons. Four strengths are provided: <code>PRIMARY</code>,
  80. * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
  81. * The exact assignment of strengths to language features is
  82. * locale dependant. For example, in Czech, "e" and "f" are considered
  83. * primary differences, while "e" and "\u00EA" are secondary differences,
  84. * "e" and "E" are tertiary differences and "e" and "e" are identical.
  85. * The following shows how both case and accents could be ignored for
  86. * US English.
  87. * <blockquote>
  88. * <pre>
  89. * //Get the Collator for US English and set its strength to PRIMARY
  90. * Collator usCollator = Collator.getInstance(Locale.US);
  91. * usCollator.setStrength(Collator.PRIMARY);
  92. * if( usCollator.compare("abc", "ABC") == 0 ) {
  93. * System.out.println("Strings are equivalent");
  94. * }
  95. * </pre>
  96. * </blockquote>
  97. * <p>
  98. * For comparing <code>String</code>s exactly once, the <code>compare</code>
  99. * method provides the best performance. When sorting a list of
  100. * <code>String</code>s however, it is generally necessary to compare each
  101. * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
  102. * provide better performance. The <code>CollationKey</code> class converts
  103. * a <code>String</code> to a series of bits that can be compared bitwise
  104. * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
  105. * created by a <code>Collator</code> object for a given <code>String</code>.
  106. * <br>
  107. * <strong>Note:</strong> <code>CollationKey</code>s from different
  108. * <code>Collator</code>s can not be compared. See the class description
  109. * for {@link CollationKey}
  110. * for an example using <code>CollationKey</code>s.
  111. *
  112. * @see RuleBasedCollator
  113. * @see CollationKey
  114. * @see CollationElementIterator
  115. * @see Locale
  116. * @version 1.22 11/29/01
  117. * @author Helena Shih
  118. */
  119. public abstract class Collator implements java.util.Comparator, Cloneable {
  120. /**
  121. * Collator strength value. When set, only PRIMARY differences are
  122. * considered significant during comparison. The assignment of strengths
  123. * to language features is locale dependant. A common example is for
  124. * different base letters ("a" vs "b") to be considered a PRIMARY difference.
  125. * @see java.text.Collator#setStrength
  126. * @see java.text.Collator#getStrength
  127. */
  128. public final static int PRIMARY = 0;
  129. /**
  130. * Collator strength value. When set, only SECONDARY and above differences are
  131. * considered significant during comparison. The assignment of strengths
  132. * to language features is locale dependant. A common example is for
  133. * different accented forms of the same base letter ("a" vs "\u00E4") to be
  134. * considered a SECONDARY difference.
  135. * @see java.text.Collator#setStrength
  136. * @see java.text.Collator#getStrength
  137. */
  138. public final static int SECONDARY = 1;
  139. /**
  140. * Collator strength value. When set, only TERTIARY and above differences are
  141. * considered significant during comparison. The assignment of strengths
  142. * to language features is locale dependant. A common example is for
  143. * case differences ("a" vs "A") to be considered a TERTIARY difference.
  144. * @see java.text.Collator#setStrength
  145. * @see java.text.Collator#getStrength
  146. */
  147. public final static int TERTIARY = 2;
  148. /**
  149. * Collator strength value. When set, all differences are
  150. * considered significant during comparison. The assignment of strengths
  151. * to language features is locale dependant. A common example is for control
  152. * characters ("\u0001" vs "\u0002") to be considered equal at the
  153. * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
  154. * level. Additionally, differences between pre-composed accents such as
  155. * "\u00C0" (A-grave) and combining accents such as "A\u0300"
  156. * (A, combining-grave) will be considered significant at the tertiary
  157. * level if decomposition is set to NO_DECOMPOSITION.
  158. */
  159. public final static int IDENTICAL = 3;
  160. /**
  161. * Decomposition mode value. With NO_DECOMPOSITION
  162. * set, accented characters will not be decomposed for collation. This
  163. * provides the fastest collation but will only produce correct results
  164. * for languages that do not use accents.
  165. * @see java.text.Collator#getDecomposition
  166. * @see java.text.Collator#setDecomposition
  167. */
  168. public final static int NO_DECOMPOSITION = 0;
  169. /**
  170. * Decomposition mode value. With CANONICAL_DECOMPOSITION
  171. * set, characters that are canonical variants according to Unicode 2.0
  172. * will be decomposed for collation. This is the default setting and
  173. * should be used to get correct collation of accented characters.
  174. * <p>
  175. * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
  176. * described in
  177. * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
  178. * Technical Report #15</a>.
  179. * @see java.text.Collator#getDecomposition
  180. * @see java.text.Collator#setDecomposition
  181. */
  182. public final static int CANONICAL_DECOMPOSITION = 1;
  183. /**
  184. * Decomposition mode value. With FULL_DECOMPOSITION
  185. * set, both Unicode canonical variants and Unicode compatibility variants
  186. * will be decomposed for collation. This causes not only accented
  187. * characters to be collated, but also characters that have special formats
  188. * to be collated with their norminal form. For example, the half-width and
  189. * full-width ASCII and Katakana characters are then collated together.
  190. * FULL_DECOMPOSITION is the most complete and therefore the slowest
  191. * decomposition mode.
  192. * <p>
  193. * FULL_DECOMPOSITION corresponds to Normalization Form DC as
  194. * described in
  195. * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
  196. * Technical Report #15</a>.
  197. * @see java.text.Collator#getDecomposition
  198. * @see java.text.Collator#setDecomposition
  199. */
  200. public final static int FULL_DECOMPOSITION = 2;
  201. /**
  202. * Gets the Collator for the current default locale.
  203. * The default locale is determined by java.util.Locale.getDefault.
  204. * @return the Collator for the default locale.(for example, en_US)
  205. * @see java.util.Locale#getDefault
  206. */
  207. public static synchronized Collator getInstance() {
  208. return getInstance(Locale.getDefault());
  209. }
  210. /**
  211. * Gets the Collator for the desired locale.
  212. * @param desiredLocale the desired locale.
  213. * @return the Collator for the desired locale.
  214. * @see java.util.Locale
  215. * @see java.util.ResourceBundle
  216. */
  217. public static synchronized
  218. Collator getInstance(Locale desiredLocale)
  219. {
  220. RuleBasedCollator result = null;
  221. result = (RuleBasedCollator) cache.get(desiredLocale);
  222. if (result != null) {
  223. return (Collator)result.clone(); // make the world safe
  224. }
  225. // Load the resource of the desired locale from resource
  226. // manager.
  227. String colString = "";
  228. int decomp = CANONICAL_DECOMPOSITION;
  229. try {
  230. ResourceBundle resource = ResourceBundle.getBundle
  231. ("java.text.resources.LocaleElements",
  232. desiredLocale);
  233. colString = resource.getString("CollationElements");
  234. decomp = ((Integer)resource.getObject("CollationDecomp")).intValue();
  235. } catch (MissingResourceException e) {
  236. // Use default values
  237. }
  238. try
  239. {
  240. result = new RuleBasedCollator( CollationRules.DEFAULTRULES +
  241. colString,
  242. decomp );
  243. }
  244. catch(ParseException foo)
  245. {
  246. // predefined tables should contain correct grammar
  247. try {
  248. result = new RuleBasedCollator( CollationRules.DEFAULTRULES );
  249. } catch (ParseException bar) {
  250. // do nothing
  251. }
  252. }
  253. // Now that RuleBasedCollator adds expansions for pre-composed characters
  254. // into their decomposed equivalents, the default collators don't need
  255. // to have decomposition turned on. Laura, 5/5/98, bug 4114077
  256. result.setDecomposition(NO_DECOMPOSITION);
  257. cache.put(desiredLocale,result);
  258. return (Collator)result.clone();
  259. }
  260. /**
  261. * Compares the source string to the target string according to the
  262. * collation rules for this Collator. Returns an integer less than,
  263. * equal to or greater than zero depending on whether the source String is
  264. * less than, equal to or greater than the target string. See the Collator
  265. * class description for an example of use.
  266. * <p>
  267. * For a one time comparison, this method has the best performance. If a
  268. * given String will be involved in multiple comparisons, CollationKey.compareTo
  269. * has the best performance. See the Collator class description for an example
  270. * using CollationKeys.
  271. * @param source the source string.
  272. * @param target the target string.
  273. * @return Returns an integer value. Value is less than zero if source is less than
  274. * target, value is zero if source and target are equal, value is greater than zero
  275. * if source is greater than target.
  276. * @see java.text.CollationKey
  277. * @see java.text.Collator#getCollationKey
  278. */
  279. public abstract int compare(String source, String target);
  280. /**
  281. * Compares its two arguments for order. Returns a negative integer,
  282. * zero, or a positive integer as the first argument is less than, equal
  283. * to, or greater than the second.
  284. * <p>
  285. * This implementation merely returns
  286. * <code> compare((String)o1, (String)o2) </code>.
  287. *
  288. * @return a negative integer, zero, or a positive integer as the
  289. * first argument is less than, equal to, or greater than the
  290. * second.
  291. * @exception ClassCastException the arguments cannot be cast to Strings.
  292. * @see Comparator
  293. * @since JDK1.2
  294. */
  295. public int compare(Object o1, Object o2) {
  296. return compare((String)o1, (String)o2);
  297. }
  298. /**
  299. * Transforms the String into a series of bits that can be compared bitwise
  300. * to other CollationKeys. CollationKeys provide better performance than
  301. * Collator.compare when Strings are involved in multiple comparisons.
  302. * See the Collator class description for an example using CollationKeys.
  303. * @param source the string to be transformed into a collation key.
  304. * @return the CollationKey for the given String based on this Collator's collation
  305. * rules. If the source String is null, a null CollationKey is returned.
  306. * @see java.text.CollationKey
  307. * @see java.text.Collator#compare
  308. */
  309. public abstract CollationKey getCollationKey(String source);
  310. /**
  311. * Convenience method for comparing the equality of two strings based on
  312. * this Collator's collation rules.
  313. * @param source the source string to be compared with.
  314. * @param target the target string to be compared with.
  315. * @return true if the strings are equal according to the collation
  316. * rules. false, otherwise.
  317. * @see java.text.Collator#compare
  318. */
  319. public boolean equals(String source, String target)
  320. {
  321. return (compare(source, target) == Collator.EQUAL);
  322. }
  323. /**
  324. * Returns this Collator's strength property. The strength property determines
  325. * the minimum level of difference considered significant during comparison.
  326. * See the Collator class description for an example of use.
  327. * @return this Collator's current strength property.
  328. * @see java.text.Collator#setStrength
  329. * @see java.text.Collator#PRIMARY
  330. * @see java.text.Collator#SECONDARY
  331. * @see java.text.Collator#TERTIARY
  332. * @see java.text.Collator#IDENTICAL
  333. */
  334. public synchronized int getStrength()
  335. {
  336. return strength;
  337. }
  338. /**
  339. * Sets this Collator's strength property. The strength property determines
  340. * the minimum level of difference considered significant during comparison.
  341. * See the Collator class description for an example of use.
  342. * @param the new strength value.
  343. * @see java.text.Collator#getStrength
  344. * @see java.text.Collator#PRIMARY
  345. * @see java.text.Collator#SECONDARY
  346. * @see java.text.Collator#TERTIARY
  347. * @see java.text.Collator#IDENTICAL
  348. * @exception IllegalArgumentException If the new strength value is not one of
  349. * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
  350. */
  351. public synchronized void setStrength(int newStrength) {
  352. if ((newStrength != PRIMARY) &&
  353. (newStrength != SECONDARY) &&
  354. (newStrength != TERTIARY) &&
  355. (newStrength != IDENTICAL))
  356. throw new IllegalArgumentException("Incorrect comparison level.");
  357. strength = newStrength;
  358. }
  359. /**
  360. * Get the decomposition mode of this Collator. Decomposition mode
  361. * determines how Unicode composed characters are handled. Adjusting
  362. * decomposition mode allows the user to select between faster and more
  363. * complete collation behavior.
  364. * <p>The three values for decomposition mode are:
  365. * <UL>
  366. * <LI>NO_DECOMPOSITION,
  367. * <LI>CANONICAL_DECOMPOSITION
  368. * <LI>FULL_DECOMPOSITION.
  369. * </UL>
  370. * See the documentation for these three constants for a description
  371. * of their meaning.
  372. * @return the decomposition mode
  373. * @see java.text.Collator#setDecomposition
  374. * @see java.text.Collator#NO_DECOMPOSITION
  375. * @see java.text.Collator#CANONICAL_DECOMPOSITION
  376. * @see java.text.Collator#FULL_DECOMPOSITION
  377. */
  378. public synchronized int getDecomposition()
  379. {
  380. return decmp;
  381. }
  382. /**
  383. * Set the decomposition mode of this Collator. See getDecomposition
  384. * for a description of decomposition mode.
  385. * @param the new decomposition mode
  386. * @see java.text.Collator#getDecomposition
  387. * @see java.text.Collator#NO_DECOMPOSITION
  388. * @see java.text.Collator#CANONICAL_DECOMPOSITION
  389. * @see java.text.Collator#FULL_DECOMPOSITION
  390. * @exception IllegalArgumentException If the given value is not a valid decomposition
  391. * mode.
  392. */
  393. public synchronized void setDecomposition(int decompositionMode) {
  394. if ((decompositionMode != NO_DECOMPOSITION) &&
  395. (decompositionMode != CANONICAL_DECOMPOSITION) &&
  396. (decompositionMode != FULL_DECOMPOSITION))
  397. throw new IllegalArgumentException("Wrong decomposition mode.");
  398. decmp = decompositionMode;
  399. }
  400. /**
  401. * Get the set of Locales for which Collators are installed.
  402. * @return the list of available locales which collators are installed.
  403. */
  404. public static synchronized Locale[] getAvailableLocales() {
  405. return LocaleData.getAvailableLocales("CollationElements");
  406. }
  407. /**
  408. * Overrides Cloneable
  409. */
  410. public Object clone()
  411. {
  412. try {
  413. return (Collator)super.clone();
  414. } catch (CloneNotSupportedException e) {
  415. throw new InternalError();
  416. }
  417. }
  418. /**
  419. * Compares the equality of two Collators.
  420. * @param that the Collator to be compared with this.
  421. * @return true if this Collator is the same as that Collator;
  422. * false otherwise.
  423. */
  424. public boolean equals(Object that)
  425. {
  426. if (this == that) return true;
  427. if (that == null) return false;
  428. if (getClass() != that.getClass()) return false;
  429. Collator other = (Collator) that;
  430. return ((strength == other.strength) &&
  431. (decmp == other.decmp));
  432. }
  433. /**
  434. * Generates the hash code for this Collator.
  435. */
  436. abstract public int hashCode();
  437. /**
  438. * Default constructor. This constructor is
  439. * protected so subclasses can get access to it. Users typically create
  440. * a Collator sub-class by calling the factory method getInstance.
  441. * @see java.text.Collator#getInstance
  442. */
  443. protected Collator()
  444. {
  445. strength = TERTIARY;
  446. decmp = CANONICAL_DECOMPOSITION;
  447. }
  448. private int strength = 0;
  449. private int decmp = 0;
  450. private static Hashtable cache = new Hashtable();
  451. //
  452. // FIXME: These three constants should be removed.
  453. //
  454. /**
  455. * LESS is returned if source string is compared to be less than target
  456. * string in the compare() method.
  457. * @see java.text.Collator#compare
  458. */
  459. final static int LESS = -1;
  460. /**
  461. * EQUAL is returned if source string is compared to be equal to target
  462. * string in the compare() method.
  463. * @see java.text.Collator#compare
  464. */
  465. final static int EQUAL = 0;
  466. /**
  467. * GREATER is returned if source string is compared to be greater than
  468. * target string in the compare() method.
  469. * @see java.text.Collator#compare
  470. */
  471. final static int GREATER = 1;
  472. // Proclaims serialization compatibility to 1.1.
  473. static final long serialVersionUID = -7718728969026499504L;
  474. }