1. /*
  2. * @(#)RBCollationTables.java 1.7 03/01/23
  3. *
  4. * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  9. * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  10. *
  11. * The original version of this source code and documentation is copyrighted
  12. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  13. * materials are provided under terms of a License Agreement between Taligent
  14. * and Sun. This technology is protected by multiple US and International
  15. * patents. This notice and attribution to Taligent may not be removed.
  16. * Taligent is a registered trademark of Taligent, Inc.
  17. *
  18. */
  19. package java.text;
  20. import java.util.Vector;
  21. import sun.text.CompactIntArray;
  22. import sun.text.IntHashtable;
  23. /**
  24. * This class contains the static state of a RuleBasedCollator: The various
  25. * tables that are used by the collation routines. Several RuleBasedCollators
  26. * can share a single RBCollationTables object, easing memory requirements and
  27. * improving performance.
  28. */
  29. final class RBCollationTables {
  30. //===========================================================================================
  31. // The following diagram shows the data structure of the RBCollationTables object.
  32. // Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
  33. // "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
  34. // What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
  35. // sorts 'o-umlaut' as if it's always expanded with 'e'.
  36. //
  37. // mapping table contracting list expanding list
  38. // (contains all unicode char
  39. // entries) ___ ____________ _________________________
  40. // ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')|
  41. // |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|
  42. // |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |
  43. // |____:___| | |_:_| |------------| | |-------------------------|
  44. // |____:___| | |'cH'|v('cH')| | | : |
  45. // |__'a'___|-> v('a') | |------------| | |-------------------------|
  46. // |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |
  47. // |____:___| | |------------| | |-------------------------|
  48. // |____:___| | |'CH'|v('CH')| | | : |
  49. // |___'c'__|---------------- ------------ | |-------------------------|
  50. // |____:___| | | : |
  51. // |o-umlaut|---------------------------------------- |_________________________|
  52. // |____:___|
  53. //
  54. // Noted by Helena Shih on 6/23/97
  55. //============================================================================================
  56. public RBCollationTables(String rules, int decmp) throws ParseException {
  57. this.rules = rules;
  58. RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
  59. builder.build(rules, decmp); // this object is filled in through
  60. // the BuildAPI object
  61. }
  62. final class BuildAPI {
  63. /**
  64. * Private constructor. Prevents anyone else besides RBTableBuilder
  65. * from gaining direct access to the internals of this class.
  66. */
  67. private BuildAPI() {
  68. }
  69. /**
  70. * This function is used by RBTableBuilder to fill in all the members of this
  71. * object. (Effectively, the builder class functions as a "friend" of this
  72. * class, but to avoid changing too much of the logic, it carries around "shadow"
  73. * copies of all these variables until the end of the build process and then
  74. * copies them en masse into the actual tables object once all the construction
  75. * logic is complete. This function does that "copying en masse".
  76. * @param f2ary The value for frenchSec (the French-secondary flag)
  77. * @param swap The value for SE Asian swapping rule
  78. * @param map The collator's character-mapping table (the value for mapping)
  79. * @param cTbl The collator's contracting-character table (the value for contractTable)
  80. * @param eTbl The collator's expanding-character table (the value for expandTable)
  81. * @param cFlgs The hash table of characters that participate in contracting-
  82. * character sequences (the value for contractFlags)
  83. * @param mso The value for maxSecOrder
  84. * @param mto The value for maxTerOrder
  85. */
  86. void fillInTables(boolean f2ary,
  87. boolean swap,
  88. CompactIntArray map,
  89. Vector cTbl,
  90. Vector eTbl,
  91. IntHashtable cFlgs,
  92. short mso,
  93. short mto) {
  94. frenchSec = f2ary;
  95. seAsianSwapping = swap;
  96. mapping = map;
  97. contractTable = cTbl;
  98. expandTable = eTbl;
  99. contractFlags = cFlgs;
  100. maxSecOrder = mso;
  101. maxTerOrder = mto;
  102. }
  103. }
  104. /**
  105. * Gets the table-based rules for the collation object.
  106. * @return returns the collation rules that the table collation object
  107. * was created from.
  108. */
  109. public String getRules()
  110. {
  111. return rules;
  112. }
  113. public boolean isFrenchSec() {
  114. return frenchSec;
  115. }
  116. public boolean isSEAsianSwapping() {
  117. return seAsianSwapping;
  118. }
  119. // ==============================================================
  120. // internal (for use by CollationElementIterator)
  121. // ==============================================================
  122. /**
  123. * Get the entry of hash table of the contracting string in the collation
  124. * table.
  125. * @param ch the starting character of the contracting string
  126. */
  127. Vector getContractValues(char ch)
  128. {
  129. int index = mapping.elementAt(ch);
  130. return getContractValues(index - CONTRACTCHARINDEX);
  131. }
  132. Vector getContractValues(int index)
  133. {
  134. if (index >= 0)
  135. {
  136. return (Vector)contractTable.elementAt(index);
  137. }
  138. else // not found
  139. {
  140. return null;
  141. }
  142. }
  143. /**
  144. * Returns true if this character appears anywhere in a contracting
  145. * character sequence. (Used by CollationElementIterator.setOffset().)
  146. */
  147. boolean usedInContractSeq(char c) {
  148. return contractFlags.get(c) == 1;
  149. }
  150. /**
  151. * Return the maximum length of any expansion sequences that end
  152. * with the specified comparison order.
  153. *
  154. * @param order a collation order returned by previous or next.
  155. * @return the maximum length of any expansion seuences ending
  156. * with the specified order.
  157. *
  158. * @see CollationElementIterator#getMaxExpansion
  159. */
  160. int getMaxExpansion(int order)
  161. {
  162. int result = 1;
  163. if (expandTable != null) {
  164. // Right now this does a linear search through the entire
  165. // expandsion table. If a collator had a large number of expansions,
  166. // this could cause a performance problem, but in practise that
  167. // rarely happens
  168. for (int i = 0; i < expandTable.size(); i++) {
  169. int[] valueList = (int [])expandTable.elementAt(i);
  170. int length = valueList.length;
  171. if (length > result && valueList[length-1] == order) {
  172. result = length;
  173. }
  174. }
  175. }
  176. return result;
  177. }
  178. /**
  179. * Get the entry of hash table of the expanding string in the collation
  180. * table.
  181. * @param idx the index of the expanding string value list
  182. */
  183. final int[] getExpandValueList(int order) {
  184. return (int[])expandTable.elementAt(order - EXPANDCHARINDEX);
  185. }
  186. /**
  187. * Get the comarison order of a character from the collation table.
  188. * @return the comparison order of a character.
  189. */
  190. int getUnicodeOrder(char ch)
  191. {
  192. return mapping.elementAt(ch);
  193. }
  194. short getMaxSecOrder() {
  195. return maxSecOrder;
  196. }
  197. short getMaxTerOrder() {
  198. return maxTerOrder;
  199. }
  200. /**
  201. * Reverse a string.
  202. */
  203. static void reverse (StringBuffer result, int from, int to)
  204. {
  205. int i = from;
  206. char swap;
  207. int j = to - 1;
  208. while (i < j) {
  209. swap = result.charAt(i);
  210. result.setCharAt(i, result.charAt(j));
  211. result.setCharAt(j, swap);
  212. i++;
  213. j--;
  214. }
  215. }
  216. final static int getEntry(Vector list, String name, boolean fwd) {
  217. for (int i = 0; i < list.size(); i++) {
  218. EntryPair pair = (EntryPair)list.elementAt(i);
  219. if (pair.fwd == fwd && pair.entryName.equals(name)) {
  220. return i;
  221. }
  222. }
  223. return UNMAPPED;
  224. }
  225. // ==============================================================
  226. // constants
  227. // ==============================================================
  228. final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
  229. final static int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow
  230. final static int UNMAPPED = 0xFFFFFFFF;
  231. final static int PRIMARYORDERMASK = 0xffff0000;
  232. final static int SECONDARYORDERMASK = 0x0000ff00;
  233. final static int TERTIARYORDERMASK = 0x000000ff;
  234. final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
  235. final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
  236. final static int PRIMARYORDERSHIFT = 16;
  237. final static int SECONDARYORDERSHIFT = 8;
  238. // ==============================================================
  239. // instance variables
  240. // ==============================================================
  241. private String rules = null;
  242. private boolean frenchSec = false;
  243. private boolean seAsianSwapping = false;
  244. private CompactIntArray mapping = null;
  245. private Vector contractTable = null;
  246. private Vector expandTable = null;
  247. private IntHashtable contractFlags = null;
  248. private short maxSecOrder = 0;
  249. private short maxTerOrder = 0;
  250. }