1. /*
  2. * @(#)Collator.java 1.27 00/01/19
  3. *
  4. * Copyright 1997-2000 Sun Microsystems, Inc. All Rights Reserved.
  5. *
  6. * This software is the proprietary information of Sun Microsystems, Inc.
  7. * Use is subject to license terms.
  8. *
  9. */
  10. /*
  11. * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved
  12. * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  13. *
  14. * The original version of this source code and documentation is copyrighted
  15. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  16. * materials are provided under terms of a License Agreement between Taligent
  17. * and Sun. This technology is protected by multiple US and International
  18. * patents. This notice and attribution to Taligent may not be removed.
  19. * Taligent is a registered trademark of Taligent, Inc.
  20. *
  21. */
  22. package java.text;
  23. import java.util.Locale;
  24. import java.util.MissingResourceException;
  25. import java.util.ResourceBundle;
  26. import java.text.resources.*;
  27. import sun.misc.SoftCache;
  28. /**
  29. * The <code>Collator</code> class performs locale-sensitive
  30. * <code>String</code> comparison. You use this class to build
  31. * searching and sorting routines for natural language text.
  32. *
  33. * <p>
  34. * <code>Collator</code> is an abstract base class. Subclasses
  35. * implement specific collation strategies. One subclass,
  36. * <code>RuleBasedCollator</code>, is currently provided with
  37. * the Java 2 platform and is applicable to a wide set of languages. Other
  38. * subclasses may be created to handle more specialized needs.
  39. *
  40. * <p>
  41. * Like other locale-sensitive classes, you can use the static
  42. * factory method, <code>getInstance</code>, to obtain the appropriate
  43. * <code>Collator</code> object for a given locale. You will only need
  44. * to look at the subclasses of <code>Collator</code> if you need
  45. * to understand the details of a particular collation strategy or
  46. * if you need to modify that strategy.
  47. *
  48. * <p>
  49. * The following example shows how to compare two strings using
  50. * the <code>Collator</code> for the default locale.
  51. * <blockquote>
  52. * <pre>
  53. * // Compare two strings in the default locale
  54. * Collator myCollator = Collator.getInstance();
  55. * if( myCollator.compare("abc", "ABC") < 0 )
  56. * System.out.println("abc is less than ABC");
  57. * else
  58. * System.out.println("abc is greater than or equal to ABC");
  59. * </pre>
  60. * </blockquote>
  61. *
  62. * <p>
  63. * You can set a <code>Collator</code>'s <em>strength</em> property
  64. * to determine the level of difference considered significant in
  65. * comparisons. Four strengths are provided: <code>PRIMARY</code>,
  66. * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
  67. * The exact assignment of strengths to language features is
  68. * locale dependant. For example, in Czech, "e" and "f" are considered
  69. * primary differences, while "e" and "\u00EA" are secondary differences,
  70. * "e" and "E" are tertiary differences and "e" and "e" are identical.
  71. * The following shows how both case and accents could be ignored for
  72. * US English.
  73. * <blockquote>
  74. * <pre>
  75. * //Get the Collator for US English and set its strength to PRIMARY
  76. * Collator usCollator = Collator.getInstance(Locale.US);
  77. * usCollator.setStrength(Collator.PRIMARY);
  78. * if( usCollator.compare("abc", "ABC") == 0 ) {
  79. * System.out.println("Strings are equivalent");
  80. * }
  81. * </pre>
  82. * </blockquote>
  83. * <p>
  84. * For comparing <code>String</code>s exactly once, the <code>compare</code>
  85. * method provides the best performance. When sorting a list of
  86. * <code>String</code>s however, it is generally necessary to compare each
  87. * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
  88. * provide better performance. The <code>CollationKey</code> class converts
  89. * a <code>String</code> to a series of bits that can be compared bitwise
  90. * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
  91. * created by a <code>Collator</code> object for a given <code>String</code>.
  92. * <br>
  93. * <strong>Note:</strong> <code>CollationKey</code>s from different
  94. * <code>Collator</code>s can not be compared. See the class description
  95. * for {@link CollationKey}
  96. * for an example using <code>CollationKey</code>s.
  97. *
  98. * @see RuleBasedCollator
  99. * @see CollationKey
  100. * @see CollationElementIterator
  101. * @see Locale
  102. * @version 1.27, 01/19/00
  103. * @author Helena Shih, Laura Werner, Richard Gillam
  104. */
  105. public abstract class Collator implements java.util.Comparator, Cloneable {
  106. /**
  107. * Collator strength value. When set, only PRIMARY differences are
  108. * considered significant during comparison. The assignment of strengths
  109. * to language features is locale dependant. A common example is for
  110. * different base letters ("a" vs "b") to be considered a PRIMARY difference.
  111. * @see java.text.Collator#setStrength
  112. * @see java.text.Collator#getStrength
  113. */
  114. public final static int PRIMARY = 0;
  115. /**
  116. * Collator strength value. When set, only SECONDARY and above differences are
  117. * considered significant during comparison. The assignment of strengths
  118. * to language features is locale dependant. A common example is for
  119. * different accented forms of the same base letter ("a" vs "\u00E4") to be
  120. * considered a SECONDARY difference.
  121. * @see java.text.Collator#setStrength
  122. * @see java.text.Collator#getStrength
  123. */
  124. public final static int SECONDARY = 1;
  125. /**
  126. * Collator strength value. When set, only TERTIARY and above differences are
  127. * considered significant during comparison. The assignment of strengths
  128. * to language features is locale dependant. A common example is for
  129. * case differences ("a" vs "A") to be considered a TERTIARY difference.
  130. * @see java.text.Collator#setStrength
  131. * @see java.text.Collator#getStrength
  132. */
  133. public final static int TERTIARY = 2;
  134. /**
  135. * Collator strength value. When set, all differences are
  136. * considered significant during comparison. The assignment of strengths
  137. * to language features is locale dependant. A common example is for control
  138. * characters ("\u0001" vs "\u0002") to be considered equal at the
  139. * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
  140. * level. Additionally, differences between pre-composed accents such as
  141. * "\u00C0" (A-grave) and combining accents such as "A\u0300"
  142. * (A, combining-grave) will be considered significant at the tertiary
  143. * level if decomposition is set to NO_DECOMPOSITION.
  144. */
  145. public final static int IDENTICAL = 3;
  146. /**
  147. * Decomposition mode value. With NO_DECOMPOSITION
  148. * set, accented characters will not be decomposed for collation. This
  149. * provides the fastest collation but will only produce correct results
  150. * for languages that do not use accents.
  151. * @see java.text.Collator#getDecomposition
  152. * @see java.text.Collator#setDecomposition
  153. */
  154. public final static int NO_DECOMPOSITION = 0;
  155. /**
  156. * Decomposition mode value. With CANONICAL_DECOMPOSITION
  157. * set, characters that are canonical variants according to Unicode 2.0
  158. * will be decomposed for collation. This is the default setting and
  159. * should be used to get correct collation of accented characters.
  160. * <p>
  161. * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
  162. * described in
  163. * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
  164. * Technical Report #15</a>.
  165. * @see java.text.Collator#getDecomposition
  166. * @see java.text.Collator#setDecomposition
  167. */
  168. public final static int CANONICAL_DECOMPOSITION = 1;
  169. /**
  170. * Decomposition mode value. With FULL_DECOMPOSITION
  171. * set, both Unicode canonical variants and Unicode compatibility variants
  172. * will be decomposed for collation. This causes not only accented
  173. * characters to be collated, but also characters that have special formats
  174. * to be collated with their norminal form. For example, the half-width and
  175. * full-width ASCII and Katakana characters are then collated together.
  176. * FULL_DECOMPOSITION is the most complete and therefore the slowest
  177. * decomposition mode.
  178. * <p>
  179. * FULL_DECOMPOSITION corresponds to Normalization Form DC as
  180. * described in
  181. * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
  182. * Technical Report #15</a>.
  183. * @see java.text.Collator#getDecomposition
  184. * @see java.text.Collator#setDecomposition
  185. */
  186. public final static int FULL_DECOMPOSITION = 2;
  187. /**
  188. * Gets the Collator for the current default locale.
  189. * The default locale is determined by java.util.Locale.getDefault.
  190. * @return the Collator for the default locale.(for example, en_US)
  191. * @see java.util.Locale#getDefault
  192. */
  193. public static synchronized Collator getInstance() {
  194. return getInstance(Locale.getDefault());
  195. }
  196. /**
  197. * Gets the Collator for the desired locale.
  198. * @param desiredLocale the desired locale.
  199. * @return the Collator for the desired locale.
  200. * @see java.util.Locale
  201. * @see java.util.ResourceBundle
  202. */
  203. public static synchronized
  204. Collator getInstance(Locale desiredLocale)
  205. {
  206. RuleBasedCollator result = null;
  207. result = (RuleBasedCollator) cache.get(desiredLocale);
  208. if (result != null) {
  209. return (Collator)result.clone(); // make the world safe
  210. }
  211. // Load the resource of the desired locale from resource
  212. // manager.
  213. String colString = "";
  214. int decomp = CANONICAL_DECOMPOSITION;
  215. try {
  216. ResourceBundle resource = ResourceBundle.getBundle
  217. ("java.text.resources.LocaleElements",
  218. desiredLocale);
  219. colString = resource.getString("CollationElements");
  220. decomp = ((Integer)resource.getObject("CollationDecomp")).intValue();
  221. } catch (MissingResourceException e) {
  222. // Use default values
  223. }
  224. try
  225. {
  226. result = new RuleBasedCollator( CollationRules.DEFAULTRULES +
  227. colString,
  228. decomp );
  229. }
  230. catch(ParseException foo)
  231. {
  232. // predefined tables should contain correct grammar
  233. try {
  234. result = new RuleBasedCollator( CollationRules.DEFAULTRULES );
  235. } catch (ParseException bar) {
  236. // do nothing
  237. }
  238. }
  239. // Now that RuleBasedCollator adds expansions for pre-composed characters
  240. // into their decomposed equivalents, the default collators don't need
  241. // to have decomposition turned on. Laura, 5/5/98, bug 4114077
  242. result.setDecomposition(NO_DECOMPOSITION);
  243. cache.put(desiredLocale,result);
  244. return (Collator)result.clone();
  245. }
  246. /**
  247. * Compares the source string to the target string according to the
  248. * collation rules for this Collator. Returns an integer less than,
  249. * equal to or greater than zero depending on whether the source String is
  250. * less than, equal to or greater than the target string. See the Collator
  251. * class description for an example of use.
  252. * <p>
  253. * For a one time comparison, this method has the best performance. If a
  254. * given String will be involved in multiple comparisons, CollationKey.compareTo
  255. * has the best performance. See the Collator class description for an example
  256. * using CollationKeys.
  257. * @param source the source string.
  258. * @param target the target string.
  259. * @return Returns an integer value. Value is less than zero if source is less than
  260. * target, value is zero if source and target are equal, value is greater than zero
  261. * if source is greater than target.
  262. * @see java.text.CollationKey
  263. * @see java.text.Collator#getCollationKey
  264. */
  265. public abstract int compare(String source, String target);
  266. /**
  267. * Compares its two arguments for order. Returns a negative integer,
  268. * zero, or a positive integer as the first argument is less than, equal
  269. * to, or greater than the second.
  270. * <p>
  271. * This implementation merely returns
  272. * <code> compare((String)o1, (String)o2) </code>.
  273. *
  274. * @return a negative integer, zero, or a positive integer as the
  275. * first argument is less than, equal to, or greater than the
  276. * second.
  277. * @exception ClassCastException the arguments cannot be cast to Strings.
  278. * @see java.util.Comparator
  279. * @since 1.2
  280. */
  281. public int compare(Object o1, Object o2) {
  282. return compare((String)o1, (String)o2);
  283. }
  284. /**
  285. * Transforms the String into a series of bits that can be compared bitwise
  286. * to other CollationKeys. CollationKeys provide better performance than
  287. * Collator.compare when Strings are involved in multiple comparisons.
  288. * See the Collator class description for an example using CollationKeys.
  289. * @param source the string to be transformed into a collation key.
  290. * @return the CollationKey for the given String based on this Collator's collation
  291. * rules. If the source String is null, a null CollationKey is returned.
  292. * @see java.text.CollationKey
  293. * @see java.text.Collator#compare
  294. */
  295. public abstract CollationKey getCollationKey(String source);
  296. /**
  297. * Convenience method for comparing the equality of two strings based on
  298. * this Collator's collation rules.
  299. * @param source the source string to be compared with.
  300. * @param target the target string to be compared with.
  301. * @return true if the strings are equal according to the collation
  302. * rules. false, otherwise.
  303. * @see java.text.Collator#compare
  304. */
  305. public boolean equals(String source, String target)
  306. {
  307. return (compare(source, target) == Collator.EQUAL);
  308. }
  309. /**
  310. * Returns this Collator's strength property. The strength property determines
  311. * the minimum level of difference considered significant during comparison.
  312. * See the Collator class description for an example of use.
  313. * @return this Collator's current strength property.
  314. * @see java.text.Collator#setStrength
  315. * @see java.text.Collator#PRIMARY
  316. * @see java.text.Collator#SECONDARY
  317. * @see java.text.Collator#TERTIARY
  318. * @see java.text.Collator#IDENTICAL
  319. */
  320. public synchronized int getStrength()
  321. {
  322. return strength;
  323. }
  324. /**
  325. * Sets this Collator's strength property. The strength property determines
  326. * the minimum level of difference considered significant during comparison.
  327. * See the Collator class description for an example of use.
  328. * @param the new strength value.
  329. * @see java.text.Collator#getStrength
  330. * @see java.text.Collator#PRIMARY
  331. * @see java.text.Collator#SECONDARY
  332. * @see java.text.Collator#TERTIARY
  333. * @see java.text.Collator#IDENTICAL
  334. * @exception IllegalArgumentException If the new strength value is not one of
  335. * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
  336. */
  337. public synchronized void setStrength(int newStrength) {
  338. if ((newStrength != PRIMARY) &&
  339. (newStrength != SECONDARY) &&
  340. (newStrength != TERTIARY) &&
  341. (newStrength != IDENTICAL))
  342. throw new IllegalArgumentException("Incorrect comparison level.");
  343. strength = newStrength;
  344. }
  345. /**
  346. * Get the decomposition mode of this Collator. Decomposition mode
  347. * determines how Unicode composed characters are handled. Adjusting
  348. * decomposition mode allows the user to select between faster and more
  349. * complete collation behavior.
  350. * <p>The three values for decomposition mode are:
  351. * <UL>
  352. * <LI>NO_DECOMPOSITION,
  353. * <LI>CANONICAL_DECOMPOSITION
  354. * <LI>FULL_DECOMPOSITION.
  355. * </UL>
  356. * See the documentation for these three constants for a description
  357. * of their meaning.
  358. * @return the decomposition mode
  359. * @see java.text.Collator#setDecomposition
  360. * @see java.text.Collator#NO_DECOMPOSITION
  361. * @see java.text.Collator#CANONICAL_DECOMPOSITION
  362. * @see java.text.Collator#FULL_DECOMPOSITION
  363. */
  364. public synchronized int getDecomposition()
  365. {
  366. return decmp;
  367. }
  368. /**
  369. * Set the decomposition mode of this Collator. See getDecomposition
  370. * for a description of decomposition mode.
  371. * @param the new decomposition mode
  372. * @see java.text.Collator#getDecomposition
  373. * @see java.text.Collator#NO_DECOMPOSITION
  374. * @see java.text.Collator#CANONICAL_DECOMPOSITION
  375. * @see java.text.Collator#FULL_DECOMPOSITION
  376. * @exception IllegalArgumentException If the given value is not a valid decomposition
  377. * mode.
  378. */
  379. public synchronized void setDecomposition(int decompositionMode) {
  380. if ((decompositionMode != NO_DECOMPOSITION) &&
  381. (decompositionMode != CANONICAL_DECOMPOSITION) &&
  382. (decompositionMode != FULL_DECOMPOSITION))
  383. throw new IllegalArgumentException("Wrong decomposition mode.");
  384. decmp = decompositionMode;
  385. }
  386. /**
  387. * Get the set of Locales for which Collators are installed.
  388. * @return the list of available locales which collators are installed.
  389. */
  390. public static synchronized Locale[] getAvailableLocales() {
  391. return LocaleData.getAvailableLocales("CollationElements");
  392. }
  393. /**
  394. * Overrides Cloneable
  395. */
  396. public Object clone()
  397. {
  398. try {
  399. return (Collator)super.clone();
  400. } catch (CloneNotSupportedException e) {
  401. throw new InternalError();
  402. }
  403. }
  404. /**
  405. * Compares the equality of two Collators.
  406. * @param that the Collator to be compared with this.
  407. * @return true if this Collator is the same as that Collator;
  408. * false otherwise.
  409. */
  410. public boolean equals(Object that)
  411. {
  412. if (this == that) return true;
  413. if (that == null) return false;
  414. if (getClass() != that.getClass()) return false;
  415. Collator other = (Collator) that;
  416. return ((strength == other.strength) &&
  417. (decmp == other.decmp));
  418. }
  419. /**
  420. * Generates the hash code for this Collator.
  421. */
  422. abstract public int hashCode();
  423. /**
  424. * Default constructor. This constructor is
  425. * protected so subclasses can get access to it. Users typically create
  426. * a Collator sub-class by calling the factory method getInstance.
  427. * @see java.text.Collator#getInstance
  428. */
  429. protected Collator()
  430. {
  431. strength = TERTIARY;
  432. decmp = CANONICAL_DECOMPOSITION;
  433. }
  434. private int strength = 0;
  435. private int decmp = 0;
  436. private static SoftCache cache = new SoftCache();
  437. //
  438. // FIXME: These three constants should be removed.
  439. //
  440. /**
  441. * LESS is returned if source string is compared to be less than target
  442. * string in the compare() method.
  443. * @see java.text.Collator#compare
  444. */
  445. final static int LESS = -1;
  446. /**
  447. * EQUAL is returned if source string is compared to be equal to target
  448. * string in the compare() method.
  449. * @see java.text.Collator#compare
  450. */
  451. final static int EQUAL = 0;
  452. /**
  453. * GREATER is returned if source string is compared to be greater than
  454. * target string in the compare() method.
  455. * @see java.text.Collator#compare
  456. */
  457. final static int GREATER = 1;
  458. }