1. /*
  2. * @(#)CompactStringArray.java 1.13 00/01/19
  3. *
  4. * Copyright 1996-2000 Sun Microsystems, Inc. All Rights Reserved.
  5. *
  6. * This software is the proprietary information of Sun Microsystems, Inc.
  7. * Use is subject to license terms.
  8. *
  9. */
  10. /*
  11. * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
  12. * (C) Copyright IBM Corp. 1996 - All Rights Reserved
  13. *
  14. * The original version of this source code and documentation is copyrighted
  15. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  16. * materials are provided under terms of a License Agreement between Taligent
  17. * and Sun. This technology is protected by multiple US and International
  18. * patents. This notice and attribution to Taligent may not be removed.
  19. * Taligent is a registered trademark of Taligent, Inc.
  20. *
  21. */
  22. package java.text;
  23. /**
  24. * class CompactATypeArray : use only on primitive data types
  25. * Provides a compact way to store information that is indexed by Unicode
  26. * values, such as character properties, types, keyboard values, etc.This
  27. * is very useful when you have a block of Unicode data that contains
  28. * significant values while the rest of the Unicode data is unused in the
  29. * application or when you have a lot of redundance, such as where all 21,000
  30. * Han ideographs have the same value. However, lookup is much faster than a
  31. * hash table.
  32. * A compact array of any primitive data type serves two purposes:
  33. * <UL type = round>
  34. * <LI>Fast access of the indexed values.
  35. * <LI>Smaller memory footprint.
  36. * </UL>
  37. * A compact array is composed of a index array and value array. The index
  38. * array contains the indicies of Unicode characters to the value array.
  39. *
  40. * @see CompactShortArray
  41. * @see CompactByteArray
  42. * @see CompactIntArray
  43. * @see CompactCharArray
  44. * @version 1.13 01/19/00
  45. * @author Helena Shih
  46. */
  47. final class CompactStringArray implements Cloneable {
  48. /**
  49. * The total number of Unicode characters.
  50. */
  51. public static final int UNICODECOUNT =65536;
  52. /**
  53. * Default constructor for CompactStringArray, the default value of the
  54. * compact array is "".
  55. */
  56. public CompactStringArray()
  57. {
  58. this("");
  59. }
  60. /**
  61. * Constructor for CompactStringArray.
  62. * @param defaultValue the default value of the compact array.
  63. */
  64. public CompactStringArray(String defaultValue)
  65. {
  66. int i;
  67. values = new char[UNICODECOUNT]; /*type = char*/
  68. indices = new short[INDEXCOUNT];
  69. setElementAt((char)0,'\uFFFF',defaultValue);
  70. for (i = 0; i < INDEXCOUNT; ++i) {
  71. indices[i] = (short)(i<<BLOCKSHIFT);
  72. }
  73. isCompact = false;
  74. }
  75. /**
  76. * Constructor for CompactStringArray.
  77. * @param indexArray the indicies of the compact array.
  78. * @param newValues the values of the compact array.
  79. * @exception IllegalArgumentException If the index is out of range.
  80. */
  81. public CompactStringArray(short indexArray[],
  82. char[] newValues,
  83. String exceptions)
  84. {
  85. int i;
  86. if (indexArray.length != INDEXCOUNT)
  87. throw new IllegalArgumentException("Index out of bounds.");
  88. for (i = 0; i < INDEXCOUNT; ++i) {
  89. short index = indexArray[i];
  90. if ((index < 0) || (index >= newValues.length+BLOCKCOUNT))
  91. throw new IllegalArgumentException("Index out of bounds.");
  92. }
  93. indices = indexArray;
  94. values = newValues;
  95. }
  96. /**
  97. * Get the mapped value (String) of a Unicode character.
  98. * @param index the character to get the mapped value with
  99. * @param toAppendTo the string buffer to append the values to
  100. */
  101. public void elementAt(char index, StringBuffer toAppendTo)
  102. {
  103. char result = (values[(indices[index>>BLOCKSHIFT] & 0xFFFF) +
  104. (index & BLOCKMASK)]);
  105. if (result >= '\uE000' && result <= '\uF800') {
  106. for (int i = (int) result - 0xE000; ; ++i) {
  107. result = exceptions.charAt(i);
  108. if (result == '\uFFFF') return;
  109. toAppendTo.append(result);
  110. }
  111. } else {
  112. toAppendTo.append(result);
  113. }
  114. }
  115. /**
  116. * Get the mapped value of a Unicode character.
  117. * @param index the character to get the mapped value with
  118. * @return the mapped value of the given character
  119. */
  120. public String elementAt(char index) {
  121. StringBuffer result = new StringBuffer();
  122. elementAt(index,result);
  123. return result.toString();
  124. }
  125. /**
  126. * Set a new value for a Unicode character.
  127. * Set automatically expands the array if it is compacted.
  128. * @param index the character to set the mapped value with
  129. * @param value the new mapped value
  130. */
  131. public void setElementAt(char index, String value)
  132. {
  133. if (isCompact)
  134. expand();
  135. if (value.length() == 1) {
  136. char ch = value.charAt(0);
  137. if (ch < '\uE000' || ch >= '\uF800') {
  138. values[(int)index] = ch;
  139. return;
  140. }
  141. }
  142. // search for the string to see if it is already present
  143. String temp = value + '\uFFFF';
  144. int position = exceptions.toString().indexOf(temp);
  145. if (position != -1) {
  146. values[(int)index] = (char)(0xE000 + position);
  147. return;
  148. };
  149. // if not found, append.
  150. values[(int)index] = (char) (0xE000 + exceptions.length());
  151. for (int i = 0; i < value.length(); ++i) {
  152. exceptions.append(value.charAt(i));
  153. }
  154. exceptions.append('\uFFFF'); // termination
  155. }
  156. /**
  157. * Set new values for a range of Unicode character.
  158. * @param start the starting offset of the range
  159. * @param end the ending offset of the range
  160. * @param value the new mapped value
  161. */
  162. public void setElementAt(char start, char end, String value)
  163. {
  164. if (start >= end) return; // catch degenerate case
  165. setElementAt(start,value);
  166. char firstValue = values[(int)start];
  167. for (int i = start + 1; i <= end; ++i) {
  168. values[i] = firstValue;
  169. }
  170. }
  171. /**
  172. * Compact the array.
  173. */
  174. public void compact()
  175. {
  176. if (isCompact == false) {
  177. char[] tempIndex;
  178. int tempIndexCount;
  179. char[] tempArray;
  180. short iBlock, iIndex;
  181. // make temp storage, larger than we need
  182. tempIndex = new char[UNICODECOUNT];
  183. // set up first block.
  184. tempIndexCount = BLOCKCOUNT;
  185. for (iIndex = 0; iIndex < BLOCKCOUNT; ++iIndex) {
  186. tempIndex[iIndex] = (char)iIndex;
  187. }; // endfor (iIndex = 0; .....)
  188. indices[0] = (short)0;
  189. // for each successive block, find out its first position
  190. // in the compacted array
  191. for (iBlock = 1; iBlock < INDEXCOUNT; ++iBlock) {
  192. int newCount, firstPosition, block;
  193. block = iBlock<<BLOCKSHIFT;
  194. if (DEBUGSMALL) if (block > DEBUGSMALLLIMIT) break;
  195. firstPosition = FindOverlappingPosition(block, tempIndex,
  196. tempIndexCount);
  197. newCount = firstPosition + BLOCKCOUNT;
  198. if (newCount > tempIndexCount) {
  199. for (iIndex = (short)tempIndexCount;
  200. iIndex < newCount;
  201. ++iIndex) {
  202. tempIndex[iIndex]
  203. = (char)(iIndex - firstPosition + block);
  204. } // endfor (iIndex = tempIndexCount....)
  205. tempIndexCount = newCount;
  206. } // endif (newCount > tempIndexCount)
  207. indices[iBlock] = (short)firstPosition;
  208. } // endfor (iBlock = 1.....)
  209. // now allocate and copy the items into the array
  210. tempArray = new char[tempIndexCount];
  211. for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) {
  212. tempArray[iIndex] = values[tempIndex[iIndex]];
  213. }
  214. values = null;
  215. values = tempArray;
  216. isCompact = true;
  217. } // endif (isCompact != false)
  218. }
  219. /** For internal use only. Do not modify the result, the behavior of
  220. * modified results are undefined.
  221. */
  222. public short getIndexArray()[]
  223. {
  224. return indices;
  225. }
  226. /** For internal use only. Do not modify the result, the behavior of
  227. * modified results are undefined.
  228. */
  229. public char getStringArray()[]
  230. {
  231. return values;
  232. }
  233. /**
  234. * Overrides Cloneable
  235. */
  236. public Object clone()
  237. {
  238. try {
  239. CompactStringArray other = (CompactStringArray) super.clone();
  240. other.values = (char[])values.clone();
  241. other.indices = (short[])indices.clone();
  242. other.exceptions = new StringBuffer(exceptions.toString());
  243. return other;
  244. } catch (CloneNotSupportedException e) {
  245. throw new InternalError();
  246. }
  247. }
  248. /**
  249. * Compares the equality of two compact array objects.
  250. * @param obj the compact array object to be compared with this.
  251. * @return true if the current compact array object is the same
  252. * as the compact array object obj; false otherwise.
  253. */
  254. public boolean equals(Object obj) {
  255. if (obj == null) return false;
  256. if (this == obj) // quick check
  257. return true;
  258. if (getClass() != obj.getClass()) // same class?
  259. return false;
  260. CompactStringArray other = (CompactStringArray) obj;
  261. for (int i = 0; i < UNICODECOUNT; i++) {
  262. // could be sped up later
  263. if (elementAt((char)i) != other.elementAt((char)i))
  264. return false;
  265. }
  266. return true; // we made it through the guantlet.
  267. }
  268. /**
  269. * Generates the hash code for the compact array object
  270. */
  271. public int hashCode() {
  272. int result = 0;
  273. int increment = Math.min(3, values.length16);
  274. for (int i = 0; i < values.length; i+= increment) {
  275. result = result * 37 + values[i];
  276. }
  277. return result;
  278. }
  279. /**
  280. * package private : for internal use only
  281. */
  282. void writeArrays()
  283. {
  284. int i;
  285. int cnt = 0;
  286. if (values.length > 0)
  287. cnt = values.length;
  288. else
  289. cnt = values.length + UNICODECOUNT;
  290. System.out.println("{");
  291. for (i = 0; i < INDEXCOUNT-1; i++)
  292. {
  293. System.out.print("(short)" + (int)((getIndexArrayValue(i) >= 0) ?
  294. (int)getIndexArrayValue(i) :
  295. (int)(getIndexArrayValue(i)+UNICODECOUNT)) + ", ");
  296. if (i != 0)
  297. if (i % 10 == 0)
  298. System.out.println();
  299. }
  300. System.out.println("(char)" +
  301. (int)((getIndexArrayValue(INDEXCOUNT-1) >= 0) ?
  302. (int)getIndexArrayValue(i) :
  303. (int)(getIndexArrayValue(i)+UNICODECOUNT)) + " }");
  304. System.out.println("{");
  305. for (i = 0; i < cnt-1; i++)
  306. {
  307. char ch = getArrayValue(i);
  308. if (ch < 0x20 || (ch > 0x7E && ch < 0xA0) || ch > 0x100)
  309. System.out.print("(char)0x" +
  310. Integer.toString((int)ch,16).toUpperCase() + ",");
  311. else System.out.print("\'" + ch + "\',");
  312. if (i != 0)
  313. if (i % 10 == 0)
  314. System.out.println();
  315. }
  316. System.out.println("(char)" + (int)getArrayValue(cnt-1) + " }");
  317. System.out.println("\"" + exceptions.toString() + "\"");
  318. }
  319. // Print char Array : Debug only
  320. void printIndex(char start, short count)
  321. {
  322. int i;
  323. for (i = start; i < count; ++i)
  324. {
  325. System.out.println(i + " -> : " +
  326. (int)((indices[i] >= 0) ?
  327. indices[i] : indices[i] + UNICODECOUNT));
  328. }
  329. System.out.println();
  330. }
  331. void printPlainArray(int start,int count, char[] tempIndex)
  332. {
  333. int iIndex;
  334. if (tempIndex != null)
  335. {
  336. for (iIndex = start; iIndex < start + count; ++iIndex)
  337. {
  338. System.out.print(" " + (int)getArrayValue(tempIndex[iIndex]));
  339. }
  340. }
  341. else
  342. {
  343. for (iIndex = start; iIndex < start + count; ++iIndex)
  344. {
  345. System.out.print(" " + (int)getArrayValue(iIndex));
  346. }
  347. }
  348. System.out.println(" Range: start " + start + " , count " + count);
  349. }
  350. /**
  351. * private functions
  352. */
  353. /**
  354. * Expanded takes the array back to a 65536 element array
  355. */
  356. private void expand()
  357. {
  358. int i;
  359. if (isCompact) {
  360. char[] tempArray;
  361. tempArray = new char[UNICODECOUNT];
  362. for (i = 0; i < UNICODECOUNT; ++i) {
  363. tempArray[i] =(values[((int)indices[i>>BLOCKSHIFT] & 0xFFFF) +
  364. (i & BLOCKMASK)]);;
  365. }
  366. for (i = 0; i < INDEXCOUNT; ++i) {
  367. indices[i] = (short)(i<<BLOCKSHIFT);
  368. }
  369. values = null;
  370. values = tempArray;
  371. isCompact = false;
  372. }
  373. }
  374. // # of elements in the indexed array
  375. private short capacity()
  376. {
  377. return (short)values.length;
  378. }
  379. private char getArrayValue(int n)
  380. {
  381. return values[n];
  382. }
  383. private short getIndexArrayValue(int n)
  384. {
  385. return indices[n];
  386. }
  387. private int
  388. FindOverlappingPosition(int start, char[] tempIndex, int tempIndexCount)
  389. {
  390. int i;
  391. short j;
  392. short currentCount;
  393. if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
  394. printPlainArray(start, BLOCKCOUNT, null);
  395. printPlainArray(0, tempIndexCount, tempIndex);
  396. }
  397. for (i = 0; i < tempIndexCount; i += BLOCKCOUNT) {
  398. currentCount = (short)BLOCKCOUNT;
  399. if (i + BLOCKCOUNT > tempIndexCount) {
  400. currentCount = (short)(tempIndexCount - i);
  401. }
  402. for (j = 0; j < currentCount; ++j) {
  403. if (values[start + j] != values[tempIndex[i + j]]) break;
  404. }
  405. if (j == currentCount) break;
  406. }
  407. if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
  408. for (j = 1; j < i; ++j) {
  409. System.out.print(" ");
  410. }
  411. printPlainArray(start, BLOCKCOUNT, null);
  412. System.out.println(" Found At: " + i);
  413. }
  414. return i;
  415. }
  416. private static final int DEBUGSHOWOVERLAPLIMIT = 100;
  417. private static final boolean DEBUGTRACE = false;
  418. private static final boolean DEBUGSMALL = false;
  419. private static final boolean DEBUGOVERLAP = false;
  420. private static final int DEBUGSMALLLIMIT = 30000;
  421. private static final int BLOCKSHIFT =7;
  422. private static final int BLOCKCOUNT =(1<<BLOCKSHIFT);
  423. private static final int INDEXSHIFT =(16-BLOCKSHIFT);
  424. private static final int INDEXCOUNT =(1<<INDEXSHIFT);
  425. private static final int BLOCKMASK = BLOCKCOUNT - 1;
  426. private char[] values; // char -> short (char parameterized short)
  427. private short indices[];
  428. private StringBuffer exceptions = new StringBuffer();
  429. private boolean isCompact;
  430. };