1. /*
  2. * @(#)CompactCharArray.java 1.15 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * @(#)CompactCharArray.java 1.15 01/11/29
  9. *
  10. * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
  11. * (C) Copyright IBM Corp. 1996 - All Rights Reserved
  12. *
  13. * Portions copyright (c) 1996-1998 Sun Microsystems, Inc. All Rights Reserved.
  14. *
  15. * The original version of this source code and documentation is copyrighted
  16. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  17. * materials are provided under terms of a License Agreement between Taligent
  18. * and Sun. This technology is protected by multiple US and International
  19. * patents. This notice and attribution to Taligent may not be removed.
  20. * Taligent is a registered trademark of Taligent, Inc.
  21. *
  22. * Permission to use, copy, modify, and distribute this software
  23. * and its documentation for NON-COMMERCIAL purposes and without
  24. * fee is hereby granted provided that this copyright notice
  25. * appears in all copies. Please refer to the file "copyright.html"
  26. * for further important copyright and licensing information.
  27. *
  28. * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
  29. * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  30. * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  31. * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
  32. * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
  33. * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
  34. *
  35. */
  36. package java.text;
  37. /**
  38. * class CompactATypeArray : use only on primitive data types
  39. * Provides a compact way to store information that is indexed by Unicode
  40. * values, such as character properties, types, keyboard values, etc.This
  41. * is very useful when you have a block of Unicode data that contains
  42. * significant values while the rest of the Unicode data is unused in the
  43. * application or when you have a lot of redundance, such as where all 21,000
  44. * Han ideographs have the same value. However, lookup is much faster than a
  45. * hash table.
  46. * A compact array of any primitive data type serves two purposes:
  47. * <UL type = round>
  48. * <LI>Fast access of the indexed values.
  49. * <LI>Smaller memory footprint.
  50. * </UL>
  51. * A compact array is composed of a index array and value array. The index
  52. * array contains the indicies of Unicode characters to the value array.
  53. *
  54. * @see CompactByteArray
  55. * @see CompactIntArray
  56. * @see CompactShortArray
  57. * @see CompactStringArray
  58. * @version 1.15 11/29/01
  59. * @author Helena Shih
  60. */
  61. final class CompactCharArray implements Cloneable{
  62. /**
  63. * The total number of Unicode characters.
  64. */
  65. public static final int UNICODECOUNT =65536;
  66. /**
  67. * Default constructor for CompactCharArray, the default value of the
  68. * compact array is '\u0000'.
  69. */
  70. public CompactCharArray()
  71. {
  72. this('\u0000');
  73. }
  74. /**
  75. * Contructor for CompactCharArray.
  76. * @param defaultValue the default value of the compact array.
  77. */
  78. public CompactCharArray(char defaultValue)
  79. {
  80. int i;
  81. values = new char[UNICODECOUNT];
  82. indices = new short[INDEXCOUNT];
  83. hashes = new int[INDEXCOUNT];
  84. for (i = 0; i < UNICODECOUNT; ++i) {
  85. values[i] = defaultValue;
  86. }
  87. for (i = 0; i < INDEXCOUNT; ++i) {
  88. indices[i] = (short)(i<<BLOCKSHIFT);
  89. hashes[i] = 0;
  90. }
  91. isCompact = false;
  92. }
  93. /**
  94. * Constructor for CompactCharArray.
  95. * @param indexArray the indicies of the compact array.
  96. * @param newValues the values of the compact array.
  97. * @exception IllegalArgumentException If the index is out of range.
  98. */
  99. public CompactCharArray(short indexArray[], char newValues[])
  100. {
  101. int i;
  102. if (indexArray.length != INDEXCOUNT)
  103. throw new IllegalArgumentException("Index out of bounds.");
  104. for (i = 0; i < INDEXCOUNT; ++i) {
  105. short index = indexArray[i];
  106. if ((index < 0) || (index >= newValues.length+BLOCKCOUNT))
  107. throw new IllegalArgumentException("Index out of bounds.");
  108. }
  109. indices = indexArray;
  110. values = newValues;
  111. isCompact = true;
  112. }
  113. /**
  114. * Get the mapped value of a Unicode character.
  115. * @param index the character to get the mapped value with
  116. * @return the mapped value of the given character
  117. */
  118. public char elementAt(char index) // parameterized on short
  119. {
  120. return (values[(indices[index >> BLOCKSHIFT] & 0xFFFF)
  121. + (index & BLOCKMASK)]);
  122. }
  123. /**
  124. * Set a new value for a Unicode character.
  125. * Set automatically expands the array if it is compacted.
  126. * @param index the character to set the mapped value with
  127. * @param value the new mapped value
  128. */
  129. public void setElementAt(char index, char value)
  130. {
  131. if (isCompact)
  132. expand();
  133. values[(int)index] = value;
  134. touchBlock(index >> BLOCKSHIFT, value);
  135. }
  136. /**
  137. * Set new values for a range of Unicode character.
  138. * @param start the starting offset of the range
  139. * @param end the endding offset of the range
  140. * @param value the new mapped value
  141. */
  142. public void setElementAt(char start, char end, char value)
  143. {
  144. int i;
  145. if (isCompact) {
  146. expand();
  147. }
  148. for (i = start; i <= end; ++i) {
  149. values[i] = value;
  150. touchBlock(i >> BLOCKSHIFT, value);
  151. }
  152. }
  153. /**
  154. *Compact the array.
  155. */
  156. public void compact()
  157. {
  158. if (!isCompact) {
  159. int limitCompacted = 0;
  160. int iBlockStart = 0;
  161. short iUntouched = -1;
  162. for (int i = 0; i < indices.length; ++i, iBlockStart += BLOCKCOUNT) {
  163. indices[i] = -1;
  164. boolean touched = blockTouched(i);
  165. if (!touched && iUntouched != -1) {
  166. // If no values in this block were set, we can just set its
  167. // index to be the same as some other block with no values
  168. // set, assuming we've seen one yet.
  169. indices[i] = iUntouched;
  170. } else {
  171. int jBlockStart = 0;
  172. int j = 0;
  173. for (j = 0; j < limitCompacted;
  174. ++j, jBlockStart += BLOCKCOUNT) {
  175. if (hashes[i] == hashes[j] &&
  176. arrayRegionMatches(values, iBlockStart,
  177. values, jBlockStart, BLOCKCOUNT)) {
  178. indices[i] = (short)jBlockStart;
  179. }
  180. }
  181. if (indices[i] == -1) {
  182. // we didn't match, so copy & update
  183. System.arraycopy(values, iBlockStart,
  184. values, jBlockStart, BLOCKCOUNT);
  185. indices[i] = (short)jBlockStart;
  186. hashes[j] = hashes[i];
  187. ++limitCompacted;
  188. if (!touched) {
  189. // If this is the first untouched block we've seen,
  190. // remember its index.
  191. iUntouched = (short)jBlockStart;
  192. }
  193. }
  194. }
  195. }
  196. // we are done compacting, so now make the array shorter
  197. int newSize = limitCompacted*BLOCKCOUNT;
  198. char[] result = new char[newSize];
  199. System.arraycopy(values, 0, result, 0, newSize);
  200. values = result;
  201. isCompact = true;
  202. hashes = null;
  203. }
  204. }
  205. /**
  206. * Convenience utility to compare two arrays of doubles.
  207. * @param len the length to compare.
  208. * The start indices and start+len must be valid.
  209. */
  210. final static boolean arrayRegionMatches(char[] source, int sourceStart,
  211. char[] target, int targetStart,
  212. int len)
  213. {
  214. int sourceEnd = sourceStart + len;
  215. int delta = targetStart - sourceStart;
  216. for (int i = sourceStart; i < sourceEnd; i++) {
  217. if (source[i] != target[i + delta])
  218. return false;
  219. }
  220. return true;
  221. }
  222. /**
  223. * Remember that a specified block was "touched", i.e. had a value set.
  224. * Untouched blocks can be skipped when compacting the array
  225. */
  226. private final void touchBlock(int i, int value) {
  227. hashes[i] = (hashes[i] + (value<<1)) | 1;
  228. }
  229. /**
  230. * Query whether a specified block was "touched", i.e. had a value set.
  231. * Untouched blocks can be skipped when compacting the array
  232. */
  233. private final boolean blockTouched(int i) {
  234. return hashes[i] != 0;
  235. }
  236. /** For internal use only. Do not modify the result, the behavior of
  237. * modified results are undefined.
  238. */
  239. public short getIndexArray()[]
  240. {
  241. return indices;
  242. }
  243. /** For internal use only. Do not modify the result, the behavior of
  244. * modified results are undefined.
  245. */
  246. public char getStringArray()[]
  247. {
  248. return values;
  249. }
  250. /**
  251. * Overrides Cloneable
  252. */
  253. public Object clone()
  254. {
  255. try {
  256. CompactCharArray other = (CompactCharArray) super.clone();
  257. other.values = (char[])values.clone();
  258. other.indices = (short[])indices.clone();
  259. if (hashes != null) other.hashes = (int[])hashes.clone();
  260. return other;
  261. } catch (CloneNotSupportedException e) {
  262. throw new InternalError();
  263. }
  264. }
  265. /**
  266. * Compares the equality of two compact array objects.
  267. * @param obj the compact array object to be compared with this.
  268. * @return true if the current compact array object is the same
  269. * as the compact array object obj; false otherwise.
  270. */
  271. public boolean equals(Object obj) {
  272. if (obj == null) return false;
  273. if (this == obj) // quick check
  274. return true;
  275. if (getClass() != obj.getClass()) // same class?
  276. return false;
  277. CompactCharArray other = (CompactCharArray) obj;
  278. for (int i = 0; i < UNICODECOUNT; i++) {
  279. // could be sped up later
  280. if (elementAt((char)i) != other.elementAt((char)i))
  281. return false;
  282. }
  283. return true; // we made it through the guantlet.
  284. }
  285. /**
  286. * Generates the hash code for the compact array object
  287. */
  288. public int hashCode() {
  289. int result = 0;
  290. int increment = Math.min(3, values.length16);
  291. for (int i = 0; i < values.length; i+= increment) {
  292. result = result * 37 + values[i];
  293. }
  294. return result;
  295. }
  296. // --------------------------------------------------------------
  297. // private
  298. // --------------------------------------------------------------
  299. /**
  300. * Expanding takes the array back to a 65536 element array.
  301. */
  302. private void expand()
  303. {
  304. int i;
  305. if (isCompact) {
  306. char[] tempArray;
  307. tempArray = new char[UNICODECOUNT];
  308. hashes = new int[INDEXCOUNT];
  309. for (i = 0; i < UNICODECOUNT; ++i) {
  310. char value = elementAt((char)i);
  311. tempArray[i] = value;
  312. touchBlock(i >> BLOCKSHIFT, value);
  313. }
  314. for (i = 0; i < INDEXCOUNT; ++i) {
  315. indices[i] = (short)(i<<BLOCKSHIFT);
  316. }
  317. values = null;
  318. values = tempArray;
  319. isCompact = false;
  320. }
  321. }
  322. private char getArrayValue(int n)
  323. {
  324. return values[n];
  325. }
  326. private short getIndexArrayValue(int n)
  327. {
  328. return indices[n];
  329. }
  330. private static final int BLOCKSHIFT =7;
  331. private static final int BLOCKCOUNT =(1<<BLOCKSHIFT);
  332. private static final int INDEXSHIFT =(16-BLOCKSHIFT);
  333. private static final int INDEXCOUNT =(1<<INDEXSHIFT);
  334. private static final int BLOCKMASK = BLOCKCOUNT - 1;
  335. private char[] values; // char -> short (char parameterized short)
  336. private short indices[];
  337. private int[] hashes;
  338. private boolean isCompact;
  339. };