1. /*
  2. * @(#)RBCollationTables.java 1.9 03/12/19
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  9. * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  10. *
  11. * The original version of this source code and documentation is copyrighted
  12. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  13. * materials are provided under terms of a License Agreement between Taligent
  14. * and Sun. This technology is protected by multiple US and International
  15. * patents. This notice and attribution to Taligent may not be removed.
  16. * Taligent is a registered trademark of Taligent, Inc.
  17. *
  18. */
  19. package java.text;
  20. import java.util.Vector;
  21. import sun.text.UCompactIntArray;
  22. import sun.text.IntHashtable;
  23. /**
  24. * This class contains the static state of a RuleBasedCollator: The various
  25. * tables that are used by the collation routines. Several RuleBasedCollators
  26. * can share a single RBCollationTables object, easing memory requirements and
  27. * improving performance.
  28. */
  29. final class RBCollationTables {
  30. //===========================================================================================
  31. // The following diagram shows the data structure of the RBCollationTables object.
  32. // Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
  33. // "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
  34. // What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
  35. // sorts 'o-umlaut' as if it's always expanded with 'e'.
  36. //
  37. // mapping table contracting list expanding list
  38. // (contains all unicode char
  39. // entries) ___ ____________ _________________________
  40. // ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')|
  41. // |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|
  42. // |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |
  43. // |____:___| | |_:_| |------------| | |-------------------------|
  44. // |____:___| | |'cH'|v('cH')| | | : |
  45. // |__'a'___|-> v('a') | |------------| | |-------------------------|
  46. // |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |
  47. // |____:___| | |------------| | |-------------------------|
  48. // |____:___| | |'CH'|v('CH')| | | : |
  49. // |___'c'__|---------------- ------------ | |-------------------------|
  50. // |____:___| | | : |
  51. // |o-umlaut|---------------------------------------- |_________________________|
  52. // |____:___|
  53. //
  54. // Noted by Helena Shih on 6/23/97
  55. //============================================================================================
  56. public RBCollationTables(String rules, int decmp) throws ParseException {
  57. this.rules = rules;
  58. RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
  59. builder.build(rules, decmp); // this object is filled in through
  60. // the BuildAPI object
  61. }
  62. final class BuildAPI {
  63. /**
  64. * Private constructor. Prevents anyone else besides RBTableBuilder
  65. * from gaining direct access to the internals of this class.
  66. */
  67. private BuildAPI() {
  68. }
  69. /**
  70. * This function is used by RBTableBuilder to fill in all the members of this
  71. * object. (Effectively, the builder class functions as a "friend" of this
  72. * class, but to avoid changing too much of the logic, it carries around "shadow"
  73. * copies of all these variables until the end of the build process and then
  74. * copies them en masse into the actual tables object once all the construction
  75. * logic is complete. This function does that "copying en masse".
  76. * @param f2ary The value for frenchSec (the French-secondary flag)
  77. * @param swap The value for SE Asian swapping rule
  78. * @param map The collator's character-mapping table (the value for mapping)
  79. * @param cTbl The collator's contracting-character table (the value for contractTable)
  80. * @param eTbl The collator's expanding-character table (the value for expandTable)
  81. * @param cFlgs The hash table of characters that participate in contracting-
  82. * character sequences (the value for contractFlags)
  83. * @param mso The value for maxSecOrder
  84. * @param mto The value for maxTerOrder
  85. */
  86. void fillInTables(boolean f2ary,
  87. boolean swap,
  88. UCompactIntArray map,
  89. Vector cTbl,
  90. Vector eTbl,
  91. IntHashtable cFlgs,
  92. short mso,
  93. short mto) {
  94. frenchSec = f2ary;
  95. seAsianSwapping = swap;
  96. mapping = map;
  97. contractTable = cTbl;
  98. expandTable = eTbl;
  99. contractFlags = cFlgs;
  100. maxSecOrder = mso;
  101. maxTerOrder = mto;
  102. }
  103. }
  104. /**
  105. * Gets the table-based rules for the collation object.
  106. * @return returns the collation rules that the table collation object
  107. * was created from.
  108. */
  109. public String getRules()
  110. {
  111. return rules;
  112. }
  113. public boolean isFrenchSec() {
  114. return frenchSec;
  115. }
  116. public boolean isSEAsianSwapping() {
  117. return seAsianSwapping;
  118. }
  119. // ==============================================================
  120. // internal (for use by CollationElementIterator)
  121. // ==============================================================
  122. /**
  123. * Get the entry of hash table of the contracting string in the collation
  124. * table.
  125. * @param ch the starting character of the contracting string
  126. */
  127. Vector getContractValues(int ch)
  128. {
  129. int index = mapping.elementAt(ch);
  130. return getContractValuesImpl(index - CONTRACTCHARINDEX);
  131. }
  132. //get contract values from contractTable by index
  133. private Vector getContractValuesImpl(int index)
  134. {
  135. if (index >= 0)
  136. {
  137. return (Vector)contractTable.elementAt(index);
  138. }
  139. else // not found
  140. {
  141. return null;
  142. }
  143. }
  144. /**
  145. * Returns true if this character appears anywhere in a contracting
  146. * character sequence. (Used by CollationElementIterator.setOffset().)
  147. */
  148. boolean usedInContractSeq(int c) {
  149. return contractFlags.get(c) == 1;
  150. }
  151. /**
  152. * Return the maximum length of any expansion sequences that end
  153. * with the specified comparison order.
  154. *
  155. * @param order a collation order returned by previous or next.
  156. * @return the maximum length of any expansion seuences ending
  157. * with the specified order.
  158. *
  159. * @see CollationElementIterator#getMaxExpansion
  160. */
  161. int getMaxExpansion(int order)
  162. {
  163. int result = 1;
  164. if (expandTable != null) {
  165. // Right now this does a linear search through the entire
  166. // expandsion table. If a collator had a large number of expansions,
  167. // this could cause a performance problem, but in practise that
  168. // rarely happens
  169. for (int i = 0; i < expandTable.size(); i++) {
  170. int[] valueList = (int [])expandTable.elementAt(i);
  171. int length = valueList.length;
  172. if (length > result && valueList[length-1] == order) {
  173. result = length;
  174. }
  175. }
  176. }
  177. return result;
  178. }
  179. /**
  180. * Get the entry of hash table of the expanding string in the collation
  181. * table.
  182. * @param idx the index of the expanding string value list
  183. */
  184. final int[] getExpandValueList(int order) {
  185. return (int[])expandTable.elementAt(order - EXPANDCHARINDEX);
  186. }
  187. /**
  188. * Get the comarison order of a character from the collation table.
  189. * @return the comparison order of a character.
  190. */
  191. int getUnicodeOrder(int ch)
  192. {
  193. return mapping.elementAt(ch);
  194. }
  195. short getMaxSecOrder() {
  196. return maxSecOrder;
  197. }
  198. short getMaxTerOrder() {
  199. return maxTerOrder;
  200. }
  201. /**
  202. * Reverse a string.
  203. */
  204. //shemran/Note: this is used for secondary order value reverse, no
  205. // need to consider supplementary pair.
  206. static void reverse (StringBuffer result, int from, int to)
  207. {
  208. int i = from;
  209. char swap;
  210. int j = to - 1;
  211. while (i < j) {
  212. swap = result.charAt(i);
  213. result.setCharAt(i, result.charAt(j));
  214. result.setCharAt(j, swap);
  215. i++;
  216. j--;
  217. }
  218. }
  219. final static int getEntry(Vector list, String name, boolean fwd) {
  220. for (int i = 0; i < list.size(); i++) {
  221. EntryPair pair = (EntryPair)list.elementAt(i);
  222. if (pair.fwd == fwd && pair.entryName.equals(name)) {
  223. return i;
  224. }
  225. }
  226. return UNMAPPED;
  227. }
  228. // ==============================================================
  229. // constants
  230. // ==============================================================
  231. //sherman/Todo: is the value big enough?????
  232. final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
  233. final static int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow
  234. final static int UNMAPPED = 0xFFFFFFFF;
  235. final static int PRIMARYORDERMASK = 0xffff0000;
  236. final static int SECONDARYORDERMASK = 0x0000ff00;
  237. final static int TERTIARYORDERMASK = 0x000000ff;
  238. final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
  239. final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
  240. final static int PRIMARYORDERSHIFT = 16;
  241. final static int SECONDARYORDERSHIFT = 8;
  242. // ==============================================================
  243. // instance variables
  244. // ==============================================================
  245. private String rules = null;
  246. private boolean frenchSec = false;
  247. private boolean seAsianSwapping = false;
  248. private UCompactIntArray mapping = null;
  249. private Vector contractTable = null;
  250. private Vector expandTable = null;
  251. private IntHashtable contractFlags = null;
  252. private short maxSecOrder = 0;
  253. private short maxTerOrder = 0;
  254. }