1. /*
  2. * %W% %E%
  3. *
  4. * Copyright 1997-2000 Sun Microsystems, Inc. All Rights Reserved.
  5. *
  6. * This software is the proprietary information of Sun Microsystems, Inc.
  7. * Use is subject to license terms.
  8. *
  9. */
  10. /*
  11. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  12. * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  13. *
  14. * The original version of this source code and documentation is copyrighted
  15. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  16. * materials are provided under terms of a License Agreement between Taligent
  17. * and Sun. This technology is protected by multiple US and International
  18. * patents. This notice and attribution to Taligent may not be removed.
  19. * Taligent is a registered trademark of Taligent, Inc.
  20. *
  21. */
  22. package java.text;
  23. import java.util.Vector;
  24. /**
  25. * This class contains the static state of a RuleBasedCollator: The various
  26. * tables that are used by the collation routines. Several RuleBasedCollators
  27. * can share a single RBCollationTables object, easing memory requirements and
  28. * improving performance.
  29. */
  30. final class RBCollationTables {
  31. //===========================================================================================
  32. // The following diagram shows the data structure of the RBCollationTables object.
  33. // Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
  34. // "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
  35. // What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
  36. // sorts 'o-umlaut' as if it's always expanded with 'e'.
  37. //
  38. // mapping table contracting list expanding list
  39. // (contains all unicode char
  40. // entries) ___ ____________ _________________________
  41. // ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')|
  42. // |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|
  43. // |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |
  44. // |____:___| | |_:_| |------------| | |-------------------------|
  45. // |____:___| | |'cH'|v('cH')| | | : |
  46. // |__'a'___|-> v('a') | |------------| | |-------------------------|
  47. // |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |
  48. // |____:___| | |------------| | |-------------------------|
  49. // |____:___| | |'CH'|v('CH')| | | : |
  50. // |___'c'__|---------------- ------------ | |-------------------------|
  51. // |____:___| | | : |
  52. // |o-umlaut|---------------------------------------- |_________________________|
  53. // |____:___|
  54. //
  55. // Noted by Helena Shih on 6/23/97
  56. //============================================================================================
  57. public RBCollationTables(String rules, int decmp) throws ParseException {
  58. this.rules = rules;
  59. RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
  60. builder.build(rules, decmp); // this object is filled in through
  61. // the BuildAPI object
  62. }
  63. final class BuildAPI {
  64. /**
  65. * Private constructor. Prevents anyone else besides RBTableBuilder
  66. * from gaining direct access to the internals of this class.
  67. */
  68. private BuildAPI() {
  69. }
  70. /**
  71. * This function is used by RBTableBuilder to fill in all the members of this
  72. * object. (Effectively, the builder class functions as a "friend" of this
  73. * class, but to avoid changing too much of the logic, it carries around "shadow"
  74. * copies of all these variables until the end of the build process and then
  75. * copies them en masse into the actual tables object once all the construction
  76. * logic is complete. This function does that "copying en masse".
  77. * @param f2ary The value for frenchSec (the French-secondary flag)
  78. * @param map The collator's character-mapping table (the value for mapping)
  79. * @param cTbl The collator's contracting-character table (the value for contractTable)
  80. * @param eTbl The collator's expanding-character table (the value for expandTable)
  81. * @param cFlgs The hash table of characters that participate in contracting-
  82. * character sequences (the value for contractFlags)
  83. * @param mso The value for maxSecOrder
  84. * @param mto The value for maxTerOrder
  85. */
  86. void fillInTables(boolean f2ary,
  87. CompactIntArray map,
  88. Vector cTbl,
  89. Vector eTbl,
  90. IntHashtable cFlgs,
  91. short mso,
  92. short mto) {
  93. frenchSec = f2ary;
  94. mapping = map;
  95. contractTable = cTbl;
  96. expandTable = eTbl;
  97. contractFlags = cFlgs;
  98. maxSecOrder = mso;
  99. maxTerOrder = mto;
  100. }
  101. }
  102. /**
  103. * Gets the table-based rules for the collation object.
  104. * @return returns the collation rules that the table collation object
  105. * was created from.
  106. */
  107. public String getRules()
  108. {
  109. return rules;
  110. }
  111. public boolean isFrenchSec() {
  112. return frenchSec;
  113. }
  114. // ==============================================================
  115. // internal (for use by CollationElementIterator)
  116. // ==============================================================
  117. /**
  118. * Get the entry of hash table of the contracting string in the collation
  119. * table.
  120. * @param ch the starting character of the contracting string
  121. */
  122. Vector getContractValues(char ch)
  123. {
  124. int index = mapping.elementAt(ch);
  125. return getContractValues(index - CONTRACTCHARINDEX);
  126. }
  127. Vector getContractValues(int index)
  128. {
  129. if (index >= 0)
  130. {
  131. return (Vector)contractTable.elementAt(index);
  132. }
  133. else // not found
  134. {
  135. return null;
  136. }
  137. }
  138. /**
  139. * Returns true if this character appears anywhere in a contracting
  140. * character sequence. (Used by CollationElementIterator.setOffset().)
  141. */
  142. boolean usedInContractSeq(char c) {
  143. return contractFlags.get(c) == 1;
  144. }
  145. /**
  146. * Return the maximum length of any expansion sequences that end
  147. * with the specified comparison order.
  148. *
  149. * @param order a collation order returned by previous or next.
  150. * @return the maximum length of any expansion seuences ending
  151. * with the specified order.
  152. *
  153. * @see CollationElementIterator#getMaxExpansion
  154. */
  155. int getMaxExpansion(int order)
  156. {
  157. int result = 1;
  158. if (expandTable != null) {
  159. // Right now this does a linear search through the entire
  160. // expandsion table. If a collator had a large number of expansions,
  161. // this could cause a performance problem, but in practise that
  162. // rarely happens
  163. for (int i = 0; i < expandTable.size(); i++) {
  164. int[] valueList = (int [])expandTable.elementAt(i);
  165. int length = valueList.length;
  166. if (length > result && valueList[length-1] == order) {
  167. result = length;
  168. }
  169. }
  170. }
  171. return result;
  172. }
  173. /**
  174. * Get the entry of hash table of the expanding string in the collation
  175. * table.
  176. * @param idx the index of the expanding string value list
  177. */
  178. final int[] getExpandValueList(int order) {
  179. return (int[])expandTable.elementAt(order - EXPANDCHARINDEX);
  180. }
  181. /**
  182. * Get the comarison order of a character from the collation table.
  183. * @return the comparison order of a character.
  184. */
  185. int getUnicodeOrder(char ch)
  186. {
  187. return mapping.elementAt(ch);
  188. }
  189. short getMaxSecOrder() {
  190. return maxSecOrder;
  191. }
  192. short getMaxTerOrder() {
  193. return maxTerOrder;
  194. }
  195. /**
  196. * Reverse a string.
  197. */
  198. static void reverse (StringBuffer result, int from, int to)
  199. {
  200. int i = from;
  201. char swap;
  202. int j = to - 1;
  203. while (i < j) {
  204. swap = result.charAt(i);
  205. result.setCharAt(i, result.charAt(j));
  206. result.setCharAt(j, swap);
  207. i++;
  208. j--;
  209. }
  210. }
  211. final static int getEntry(Vector list, String name, boolean fwd) {
  212. for (int i = 0; i < list.size(); i++) {
  213. EntryPair pair = (EntryPair)list.elementAt(i);
  214. if (pair.fwd == fwd && pair.entryName.equals(name)) {
  215. return i;
  216. }
  217. }
  218. return UNMAPPED;
  219. }
  220. // ==============================================================
  221. // constants
  222. // ==============================================================
  223. final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
  224. final static int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow
  225. final static int UNMAPPED = 0xFFFFFFFF;
  226. final static int PRIMARYORDERMASK = 0xffff0000;
  227. final static int SECONDARYORDERMASK = 0x0000ff00;
  228. final static int TERTIARYORDERMASK = 0x000000ff;
  229. final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
  230. final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
  231. final static int PRIMARYORDERSHIFT = 16;
  232. final static int SECONDARYORDERSHIFT = 8;
  233. // ==============================================================
  234. // instance variables
  235. // ==============================================================
  236. private String rules = null;
  237. private boolean frenchSec = false;
  238. private CompactIntArray mapping = null;
  239. private Vector contractTable = null;
  240. private Vector expandTable = null;
  241. private IntHashtable contractFlags = null;
  242. private short maxSecOrder = 0;
  243. private short maxTerOrder = 0;
  244. }