1. /*
  2. * @(#)CompactStringArray.java 1.13 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * @(#)CompactStringArray.java 1.13 01/11/29
  9. *
  10. * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
  11. * (C) Copyright IBM Corp. 1996 - All Rights Reserved
  12. *
  13. * Portions copyright (c) 1996-1998 Sun Microsystems, Inc. All Rights Reserved.
  14. *
  15. * The original version of this source code and documentation is copyrighted
  16. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  17. * materials are provided under terms of a License Agreement between Taligent
  18. * and Sun. This technology is protected by multiple US and International
  19. * patents. This notice and attribution to Taligent may not be removed.
  20. * Taligent is a registered trademark of Taligent, Inc.
  21. *
  22. * Permission to use, copy, modify, and distribute this software
  23. * and its documentation for NON-COMMERCIAL purposes and without
  24. * fee is hereby granted provided that this copyright notice
  25. * appears in all copies. Please refer to the file "copyright.html"
  26. * for further important copyright and licensing information.
  27. *
  28. * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
  29. * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  30. * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  31. * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
  32. * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
  33. * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
  34. *
  35. */
  36. package java.text;
  37. /**
  38. * class CompactATypeArray : use only on primitive data types
  39. * Provides a compact way to store information that is indexed by Unicode
  40. * values, such as character properties, types, keyboard values, etc.This
  41. * is very useful when you have a block of Unicode data that contains
  42. * significant values while the rest of the Unicode data is unused in the
  43. * application or when you have a lot of redundance, such as where all 21,000
  44. * Han ideographs have the same value. However, lookup is much faster than a
  45. * hash table.
  46. * A compact array of any primitive data type serves two purposes:
  47. * <UL type = round>
  48. * <LI>Fast access of the indexed values.
  49. * <LI>Smaller memory footprint.
  50. * </UL>
  51. * A compact array is composed of a index array and value array. The index
  52. * array contains the indicies of Unicode characters to the value array.
  53. *
  54. * @see CompactShortArray
  55. * @see CompactByteArray
  56. * @see CompactIntArray
  57. * @see CompactCharArray
  58. * @version 1.13 11/29/01
  59. * @author Helena Shih
  60. */
  61. final class CompactStringArray implements Cloneable {
  62. /**
  63. * The total number of Unicode characters.
  64. */
  65. public static final int UNICODECOUNT =65536;
  66. /**
  67. * Default constructor for CompactStringArray, the default value of the
  68. * compact array is "".
  69. */
  70. public CompactStringArray()
  71. {
  72. this("");
  73. }
  74. /**
  75. * Constructor for CompactStringArray.
  76. * @param defaultValue the default value of the compact array.
  77. */
  78. public CompactStringArray(String defaultValue)
  79. {
  80. int i;
  81. values = new char[UNICODECOUNT]; /*type = char*/
  82. indices = new short[INDEXCOUNT];
  83. setElementAt((char)0,'\uFFFF',defaultValue);
  84. for (i = 0; i < INDEXCOUNT; ++i) {
  85. indices[i] = (short)(i<<BLOCKSHIFT);
  86. }
  87. isCompact = false;
  88. }
  89. /**
  90. * Constructor for CompactStringArray.
  91. * @param indexArray the indicies of the compact array.
  92. * @param newValues the values of the compact array.
  93. * @exception IllegalArgumentException If the index is out of range.
  94. */
  95. public CompactStringArray(short indexArray[],
  96. char[] newValues,
  97. String exceptions)
  98. {
  99. int i;
  100. if (indexArray.length != INDEXCOUNT)
  101. throw new IllegalArgumentException("Index out of bounds.");
  102. for (i = 0; i < INDEXCOUNT; ++i) {
  103. short index = indexArray[i];
  104. if ((index < 0) || (index >= newValues.length+BLOCKCOUNT))
  105. throw new IllegalArgumentException("Index out of bounds.");
  106. }
  107. indices = indexArray;
  108. values = newValues;
  109. }
  110. /**
  111. * Get the mapped value (String) of a Unicode character.
  112. * @param index the character to get the mapped value with
  113. * @param toAppendTo the string buffer to append the values to
  114. */
  115. public void elementAt(char index, StringBuffer toAppendTo)
  116. {
  117. char result = (values[(indices[index>>BLOCKSHIFT] & 0xFFFF) +
  118. (index & BLOCKMASK)]);
  119. if (result >= '\uE000' && result <= '\uF800') {
  120. for (int i = (int) result - 0xE000; ; ++i) {
  121. result = exceptions.charAt(i);
  122. if (result == '\uFFFF') return;
  123. toAppendTo.append(result);
  124. }
  125. } else {
  126. toAppendTo.append(result);
  127. }
  128. }
  129. /**
  130. * Get the mapped value of a Unicode character.
  131. * @param index the character to get the mapped value with
  132. * @return the mapped value of the given character
  133. */
  134. public String elementAt(char index) {
  135. StringBuffer result = new StringBuffer();
  136. elementAt(index,result);
  137. return result.toString();
  138. }
  139. /**
  140. * Set a new value for a Unicode character.
  141. * Set automatically expands the array if it is compacted.
  142. * @param index the character to set the mapped value with
  143. * @param value the new mapped value
  144. */
  145. public void setElementAt(char index, String value)
  146. {
  147. if (isCompact)
  148. expand();
  149. if (value.length() == 1) {
  150. char ch = value.charAt(0);
  151. if (ch < '\uE000' || ch >= '\uF800') {
  152. values[(int)index] = ch;
  153. return;
  154. }
  155. }
  156. // search for the string to see if it is already present
  157. String temp = value + '\uFFFF';
  158. int position = exceptions.toString().indexOf(temp);
  159. if (position != -1) {
  160. values[(int)index] = (char)(0xE000 + position);
  161. return;
  162. };
  163. // if not found, append.
  164. values[(int)index] = (char) (0xE000 + exceptions.length());
  165. for (int i = 0; i < value.length(); ++i) {
  166. exceptions.append(value.charAt(i));
  167. }
  168. exceptions.append('\uFFFF'); // termination
  169. }
  170. /**
  171. * Set new values for a range of Unicode character.
  172. * @param start the starting offset of the range
  173. * @param end the ending offset of the range
  174. * @param value the new mapped value
  175. */
  176. public void setElementAt(char start, char end, String value)
  177. {
  178. if (start >= end) return; // catch degenerate case
  179. setElementAt(start,value);
  180. char firstValue = values[(int)start];
  181. for (int i = start + 1; i <= end; ++i) {
  182. values[i] = firstValue;
  183. }
  184. }
  185. /**
  186. * Compact the array.
  187. */
  188. public void compact()
  189. {
  190. if (isCompact == false) {
  191. char[] tempIndex;
  192. int tempIndexCount;
  193. char[] tempArray;
  194. short iBlock, iIndex;
  195. // make temp storage, larger than we need
  196. tempIndex = new char[UNICODECOUNT];
  197. // set up first block.
  198. tempIndexCount = BLOCKCOUNT;
  199. for (iIndex = 0; iIndex < BLOCKCOUNT; ++iIndex) {
  200. tempIndex[iIndex] = (char)iIndex;
  201. }; // endfor (iIndex = 0; .....)
  202. indices[0] = (short)0;
  203. // for each successive block, find out its first position
  204. // in the compacted array
  205. for (iBlock = 1; iBlock < INDEXCOUNT; ++iBlock) {
  206. int newCount, firstPosition, block;
  207. block = iBlock<<BLOCKSHIFT;
  208. if (DEBUGSMALL) if (block > DEBUGSMALLLIMIT) break;
  209. firstPosition = FindOverlappingPosition(block, tempIndex,
  210. tempIndexCount);
  211. newCount = firstPosition + BLOCKCOUNT;
  212. if (newCount > tempIndexCount) {
  213. for (iIndex = (short)tempIndexCount;
  214. iIndex < newCount;
  215. ++iIndex) {
  216. tempIndex[iIndex]
  217. = (char)(iIndex - firstPosition + block);
  218. } // endfor (iIndex = tempIndexCount....)
  219. tempIndexCount = newCount;
  220. } // endif (newCount > tempIndexCount)
  221. indices[iBlock] = (short)firstPosition;
  222. } // endfor (iBlock = 1.....)
  223. // now allocate and copy the items into the array
  224. tempArray = new char[tempIndexCount];
  225. for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) {
  226. tempArray[iIndex] = values[tempIndex[iIndex]];
  227. }
  228. values = null;
  229. values = tempArray;
  230. isCompact = true;
  231. } // endif (isCompact != false)
  232. }
  233. /** For internal use only. Do not modify the result, the behavior of
  234. * modified results are undefined.
  235. */
  236. public short getIndexArray()[]
  237. {
  238. return indices;
  239. }
  240. /** For internal use only. Do not modify the result, the behavior of
  241. * modified results are undefined.
  242. */
  243. public char getStringArray()[]
  244. {
  245. return values;
  246. }
  247. /**
  248. * Overrides Cloneable
  249. */
  250. public Object clone()
  251. {
  252. try {
  253. CompactStringArray other = (CompactStringArray) super.clone();
  254. other.values = (char[])values.clone();
  255. other.indices = (short[])indices.clone();
  256. other.exceptions = new StringBuffer(exceptions.toString());
  257. return other;
  258. } catch (CloneNotSupportedException e) {
  259. throw new InternalError();
  260. }
  261. }
  262. /**
  263. * Compares the equality of two compact array objects.
  264. * @param obj the compact array object to be compared with this.
  265. * @return true if the current compact array object is the same
  266. * as the compact array object obj; false otherwise.
  267. */
  268. public boolean equals(Object obj) {
  269. if (obj == null) return false;
  270. if (this == obj) // quick check
  271. return true;
  272. if (getClass() != obj.getClass()) // same class?
  273. return false;
  274. CompactStringArray other = (CompactStringArray) obj;
  275. for (int i = 0; i < UNICODECOUNT; i++) {
  276. // could be sped up later
  277. if (elementAt((char)i) != other.elementAt((char)i))
  278. return false;
  279. }
  280. return true; // we made it through the guantlet.
  281. }
  282. /**
  283. * Generates the hash code for the compact array object
  284. */
  285. public int hashCode() {
  286. int result = 0;
  287. int increment = Math.min(3, values.length16);
  288. for (int i = 0; i < values.length; i+= increment) {
  289. result = result * 37 + values[i];
  290. }
  291. return result;
  292. }
  293. /**
  294. * package private : for internal use only
  295. */
  296. void writeArrays()
  297. {
  298. int i;
  299. int cnt = 0;
  300. if (values.length > 0)
  301. cnt = values.length;
  302. else
  303. cnt = values.length + UNICODECOUNT;
  304. System.out.println("{");
  305. for (i = 0; i < INDEXCOUNT-1; i++)
  306. {
  307. System.out.print("(short)" + (int)((getIndexArrayValue(i) >= 0) ?
  308. (int)getIndexArrayValue(i) :
  309. (int)(getIndexArrayValue(i)+UNICODECOUNT)) + ", ");
  310. if (i != 0)
  311. if (i % 10 == 0)
  312. System.out.println();
  313. }
  314. System.out.println("(char)" +
  315. (int)((getIndexArrayValue(INDEXCOUNT-1) >= 0) ?
  316. (int)getIndexArrayValue(i) :
  317. (int)(getIndexArrayValue(i)+UNICODECOUNT)) + " }");
  318. System.out.println("{");
  319. for (i = 0; i < cnt-1; i++)
  320. {
  321. char ch = getArrayValue(i);
  322. if (ch < 0x20 || (ch > 0x7E && ch < 0xA0) || ch > 0x100)
  323. System.out.print("(char)0x" +
  324. Integer.toString((int)ch,16).toUpperCase() + ",");
  325. else System.out.print("\'" + ch + "\',");
  326. if (i != 0)
  327. if (i % 10 == 0)
  328. System.out.println();
  329. }
  330. System.out.println("(char)" + (int)getArrayValue(cnt-1) + " }");
  331. System.out.println("\"" + exceptions.toString() + "\"");
  332. }
  333. // Print char Array : Debug only
  334. void printIndex(char start, short count)
  335. {
  336. int i;
  337. for (i = start; i < count; ++i)
  338. {
  339. System.out.println(i + " -> : " +
  340. (int)((indices[i] >= 0) ?
  341. indices[i] : indices[i] + UNICODECOUNT));
  342. }
  343. System.out.println();
  344. }
  345. void printPlainArray(int start,int count, char[] tempIndex)
  346. {
  347. int iIndex;
  348. if (tempIndex != null)
  349. {
  350. for (iIndex = start; iIndex < start + count; ++iIndex)
  351. {
  352. System.out.print(" " + (int)getArrayValue(tempIndex[iIndex]));
  353. }
  354. }
  355. else
  356. {
  357. for (iIndex = start; iIndex < start + count; ++iIndex)
  358. {
  359. System.out.print(" " + (int)getArrayValue(iIndex));
  360. }
  361. }
  362. System.out.println(" Range: start " + start + " , count " + count);
  363. }
  364. /**
  365. * private functions
  366. */
  367. /**
  368. * Expanded takes the array back to a 65536 element array
  369. */
  370. private void expand()
  371. {
  372. int i;
  373. if (isCompact) {
  374. char[] tempArray;
  375. tempArray = new char[UNICODECOUNT];
  376. for (i = 0; i < UNICODECOUNT; ++i) {
  377. tempArray[i] =(values[((int)indices[i>>BLOCKSHIFT] & 0xFFFF) +
  378. (i & BLOCKMASK)]);;
  379. }
  380. for (i = 0; i < INDEXCOUNT; ++i) {
  381. indices[i] = (short)(i<<BLOCKSHIFT);
  382. }
  383. values = null;
  384. values = tempArray;
  385. isCompact = false;
  386. }
  387. }
  388. // # of elements in the indexed array
  389. private short capacity()
  390. {
  391. return (short)values.length;
  392. }
  393. private char getArrayValue(int n)
  394. {
  395. return values[n];
  396. }
  397. private short getIndexArrayValue(int n)
  398. {
  399. return indices[n];
  400. }
  401. private int
  402. FindOverlappingPosition(int start, char[] tempIndex, int tempIndexCount)
  403. {
  404. int i;
  405. short j;
  406. short currentCount;
  407. if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
  408. printPlainArray(start, BLOCKCOUNT, null);
  409. printPlainArray(0, tempIndexCount, tempIndex);
  410. }
  411. for (i = 0; i < tempIndexCount; i += BLOCKCOUNT) {
  412. currentCount = (short)BLOCKCOUNT;
  413. if (i + BLOCKCOUNT > tempIndexCount) {
  414. currentCount = (short)(tempIndexCount - i);
  415. }
  416. for (j = 0; j < currentCount; ++j) {
  417. if (values[start + j] != values[tempIndex[i + j]]) break;
  418. }
  419. if (j == currentCount) break;
  420. }
  421. if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
  422. for (j = 1; j < i; ++j) {
  423. System.out.print(" ");
  424. }
  425. printPlainArray(start, BLOCKCOUNT, null);
  426. System.out.println(" Found At: " + i);
  427. }
  428. return i;
  429. }
  430. private static final int DEBUGSHOWOVERLAPLIMIT = 100;
  431. private static final boolean DEBUGTRACE = false;
  432. private static final boolean DEBUGSMALL = false;
  433. private static final boolean DEBUGOVERLAP = false;
  434. private static final int DEBUGSMALLLIMIT = 30000;
  435. private static final int BLOCKSHIFT =7;
  436. private static final int BLOCKCOUNT =(1<<BLOCKSHIFT);
  437. private static final int INDEXSHIFT =(16-BLOCKSHIFT);
  438. private static final int INDEXCOUNT =(1<<INDEXSHIFT);
  439. private static final int BLOCKMASK = BLOCKCOUNT - 1;
  440. private char[] values; // char -> short (char parameterized short)
  441. private short indices[];
  442. private StringBuffer exceptions = new StringBuffer();
  443. private boolean isCompact;
  444. };