1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 2000-2002 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.util;
  58. /**
  59. * This class is a symbol table implementation that guarantees that
  60. * strings used as identifiers are unique references. Multiple calls
  61. * to <code>addSymbol</code> will always return the same string
  62. * reference.
  63. * <p>
  64. * The symbol table performs the same task as <code>String.intern()</code>
  65. * with the following differences:
  66. * <ul>
  67. * <li>
  68. * A new string object does not need to be created in order to
  69. * retrieve a unique reference. Symbols can be added by using
  70. * a series of characters in a character array.
  71. * </li>
  72. * <li>
  73. * Users of the symbol table can provide their own symbol hashing
  74. * implementation. For example, a simple string hashing algorithm
  75. * may fail to produce a balanced set of hashcodes for symbols
  76. * that are <em>mostly</em> unique. Strings with similar leading
  77. * characters are especially prone to this poor hashing behavior.
  78. * </li>
  79. * </ul>
  80. *
  81. * @see SymbolHash
  82. *
  83. * @author Andy Clark
  84. *
  85. * @version $Id: SymbolTable.java,v 1.7 2002/02/07 22:15:09 neilg Exp $
  86. */
  87. public class SymbolTable {
  88. //
  89. // Constants
  90. //
  91. /** Default table size. */
  92. protected static final int TABLE_SIZE = 101;
  93. //
  94. // Data
  95. //
  96. /** Buckets. */
  97. protected Entry[] fBuckets = null;
  98. // actual table size
  99. protected int fTableSize;
  100. //
  101. // Constructors
  102. //
  103. /** Constructs a symbol table with a default number of buckets. */
  104. public SymbolTable() {
  105. this(TABLE_SIZE);
  106. }
  107. /** Constructs a symbol table with a specified number of buckets. */
  108. public SymbolTable(int tableSize) {
  109. fTableSize = tableSize;
  110. fBuckets = new Entry[fTableSize];
  111. }
  112. //
  113. // Public methods
  114. //
  115. /**
  116. * Adds the specified symbol to the symbol table and returns a
  117. * reference to the unique symbol. If the symbol already exists,
  118. * the previous symbol reference is returned instead, in order
  119. * guarantee that symbol references remain unique.
  120. *
  121. * @param symbol The new symbol.
  122. */
  123. public String addSymbol(String symbol) {
  124. // search for identical symbol
  125. int bucket = hash(symbol) % fTableSize;
  126. int length = symbol.length();
  127. OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
  128. if (length == entry.characters.length) {
  129. for (int i = 0; i < length; i++) {
  130. if (symbol.charAt(i) != entry.characters[i]) {
  131. continue OUTER;
  132. }
  133. }
  134. return entry.symbol;
  135. }
  136. }
  137. // create new entry
  138. Entry entry = new Entry(symbol, fBuckets[bucket]);
  139. fBuckets[bucket] = entry;
  140. return entry.symbol;
  141. } // addSymbol(String):String
  142. /**
  143. * Adds the specified symbol to the symbol table and returns a
  144. * reference to the unique symbol. If the symbol already exists,
  145. * the previous symbol reference is returned instead, in order
  146. * guarantee that symbol references remain unique.
  147. *
  148. * @param buffer The buffer containing the new symbol.
  149. * @param offset The offset into the buffer of the new symbol.
  150. * @param length The length of the new symbol in the buffer.
  151. */
  152. public String addSymbol(char[] buffer, int offset, int length) {
  153. // search for identical symbol
  154. int bucket = hash(buffer, offset, length) % fTableSize;
  155. OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
  156. if (length == entry.characters.length) {
  157. for (int i = 0; i < length; i++) {
  158. if (buffer[offset + i] != entry.characters[i]) {
  159. continue OUTER;
  160. }
  161. }
  162. return entry.symbol;
  163. }
  164. }
  165. // add new entry
  166. Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
  167. fBuckets[bucket] = entry;
  168. return entry.symbol;
  169. } // addSymbol(char[],int,int):String
  170. /**
  171. * Returns a hashcode value for the specified symbol. The value
  172. * returned by this method must be identical to the value returned
  173. * by the <code>hash(char[],int,int)</code> method when called
  174. * with the character array that comprises the symbol string.
  175. *
  176. * @param symbol The symbol to hash.
  177. */
  178. public int hash(String symbol) {
  179. int code = 0;
  180. int length = symbol.length();
  181. for (int i = 0; i < length; i++) {
  182. code = code * 37 + symbol.charAt(i);
  183. }
  184. return code & 0x7FFFFFF;
  185. } // hash(String):int
  186. /**
  187. * Returns a hashcode value for the specified symbol information.
  188. * The value returned by this method must be identical to the value
  189. * returned by the <code>hash(String)</code> method when called
  190. * with the string object created from the symbol information.
  191. *
  192. * @param buffer The character buffer containing the symbol.
  193. * @param offset The offset into the character buffer of the start
  194. * of the symbol.
  195. * @param length The length of the symbol.
  196. */
  197. public int hash(char[] buffer, int offset, int length) {
  198. int code = 0;
  199. for (int i = 0; i < length; i++) {
  200. code = code * 37 + buffer[offset + i];
  201. }
  202. return code & 0x7FFFFFF;
  203. } // hash(char[],int,int):int
  204. /**
  205. * Returns true if the symbol table already contains the specified
  206. * symbol.
  207. *
  208. * @param symbol The symbol to look for.
  209. */
  210. public boolean containsSymbol(String symbol) {
  211. // search for identical symbol
  212. int bucket = hash(symbol) % fTableSize;
  213. int length = symbol.length();
  214. OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
  215. if (length == entry.characters.length) {
  216. for (int i = 0; i < length; i++) {
  217. if (symbol.charAt(i) != entry.characters[i]) {
  218. continue OUTER;
  219. }
  220. }
  221. return true;
  222. }
  223. }
  224. return false;
  225. } // containsSymbol(String):boolean
  226. /**
  227. * Returns true if the symbol table already contains the specified
  228. * symbol.
  229. *
  230. * @param buffer The buffer containing the symbol to look for.
  231. * @param offset The offset into the buffer.
  232. * @param length The length of the symbol in the buffer.
  233. */
  234. public boolean containsSymbol(char[] buffer, int offset, int length) {
  235. // search for identical symbol
  236. int bucket = hash(buffer, offset, length) % fTableSize;
  237. OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
  238. if (length == entry.characters.length) {
  239. for (int i = 0; i < length; i++) {
  240. if (buffer[offset + i] != entry.characters[i]) {
  241. continue OUTER;
  242. }
  243. }
  244. return true;
  245. }
  246. }
  247. return false;
  248. } // containsSymbol(char[],int,int):boolean
  249. //
  250. // Classes
  251. //
  252. /**
  253. * This class is a symbol table entry. Each entry acts as a node
  254. * in a linked list.
  255. */
  256. protected static final class Entry {
  257. //
  258. // Data
  259. //
  260. /** Symbol. */
  261. public String symbol;
  262. /**
  263. * Symbol characters. This information is duplicated here for
  264. * comparison performance.
  265. */
  266. public char[] characters;
  267. /** The next entry. */
  268. public Entry next;
  269. //
  270. // Constructors
  271. //
  272. /**
  273. * Constructs a new entry from the specified symbol and next entry
  274. * reference.
  275. */
  276. public Entry(String symbol, Entry next) {
  277. this.symbol = symbol.intern();
  278. characters = new char[symbol.length()];
  279. symbol.getChars(0, characters.length, characters, 0);
  280. this.next = next;
  281. }
  282. /**
  283. * Constructs a new entry from the specified symbol information and
  284. * next entry reference.
  285. */
  286. public Entry(char[] ch, int offset, int length, Entry next) {
  287. characters = new char[length];
  288. System.arraycopy(ch, offset, characters, 0, length);
  289. symbol = new String(characters).intern();
  290. this.next = next;
  291. }
  292. } // class Entry
  293. } // class SymbolTable