- /*
- * The Apache Software License, Version 1.1
- *
- *
- * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation and was
- * originally based on software copyright (c) 1999, International
- * Business Machines, Inc., http://www.apache.org. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
- package org.apache.xml.dtm.ref;
-
- // %REVIEW% Should this be based on SuballocatedIntVector instead?
- // (Unclear. Pools will rarely be huge. But if they ever are...)
- import org.apache.xml.utils.IntVector;
- import java.util.Vector;
-
- /** <p>DTMStringPool is an "interning" mechanism for strings. It will
- * create a stable 1:1 mapping between a set of string values and a set of
- * integer index values, so the integers can be used to reliably and
- * uniquely identify (and when necessary retrieve) the strings.</p>
- *
- * <p>Design Priorities:
- * <ul>
- * <li>String-to-index lookup speed is critical.</li>
- * <li>Index-to-String lookup speed is slightly less so.</li>
- * <li>Threadsafety is not guaranteed at this level.
- * Enforce that in the application if needed.</li>
- * <li>Storage efficiency is an issue but not a huge one.
- * It is expected that string pools won't exceed about 2000 entries.</li>
- * </ul>
- * </p>
- *
- * <p>Implementation detail: A standard Hashtable is relatively
- * inefficient when looking up primitive int values, especially when
- * we're already maintaining an int-to-string vector. So I'm
- * maintaining a simple hash chain within this class.</p>
- *
- * <p>NOTE: There is nothing in the code that has a real dependency upon
- * String. It would work with any object type that implements reliable
- * .hashCode() and .equals() operations. The API enforces Strings because
- * it's safer that way, but this could trivially be turned into a general
- * ObjectPool if one was needed.</p>
- *
- * <p>Status: Passed basic test in main().</p>
- * */
- public class DTMStringPool
- {
- Vector m_intToString;
- static final int HASHPRIME=101;
- int[] m_hashStart=new int[HASHPRIME];
- IntVector m_hashChain;
- public static final int NULL=-1;
-
- public DTMStringPool()
- {
- m_intToString=new Vector();
- m_hashChain=new IntVector(512);
- removeAllElements();
-
- // -sb Add this to force empty strings to be index 0.
- stringToIndex("");
- }
-
- public void removeAllElements()
- {
- m_intToString.removeAllElements();
- for(int i=0;i<HASHPRIME;++i)
- m_hashStart[i]=NULL;
- m_hashChain.removeAllElements();
- }
-
- /** @return string whose value is uniquely identified by this integer index.
- * @throws java.lang.ArrayIndexOutOfBoundsException
- * if index doesn't map to a string.
- * */
- public String indexToString(int i)
- throws java.lang.ArrayIndexOutOfBoundsException
- {
- if(i==NULL) return null;
- return (String) m_intToString.elementAt(i);
- }
-
- /** @return integer index uniquely identifying the value of this string. */
- public int stringToIndex(String s)
- {
- if(s==null) return NULL;
-
- int hashslot=s.hashCode()%HASHPRIME;
- if(hashslot<0) hashslot=-hashslot;
-
- // Is it one we already know?
- int hashlast=m_hashStart[hashslot];
- int hashcandidate=hashlast;
- while(hashcandidate!=NULL)
- {
- if(m_intToString.elementAt(hashcandidate).equals(s))
- return hashcandidate;
-
- hashlast=hashcandidate;
- hashcandidate=m_hashChain.elementAt(hashcandidate);
- }
-
- // New value. Add to tables.
- int newIndex=m_intToString.size();
- m_intToString.addElement(s);
-
- m_hashChain.addElement(NULL); // Initialize to no-following-same-hash
- if(hashlast==NULL) // First for this hash
- m_hashStart[hashslot]=newIndex;
- else // Link from previous with same hash
- m_hashChain.setElementAt(newIndex,hashlast);
-
- return newIndex;
- }
-
- /** Command-line unit test driver. This test relies on the fact that
- * this version of the pool assigns indices consecutively, starting
- * from zero, as new unique strings are encountered.
- */
- public static void main(String[] args)
- {
- String[] word={
- "Zero","One","Two","Three","Four","Five",
- "Six","Seven","Eight","Nine","Ten",
- "Eleven","Twelve","Thirteen","Fourteen","Fifteen",
- "Sixteen","Seventeen","Eighteen","Nineteen","Twenty",
- "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four",
- "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight",
- "Twenty-Nine","Thirty","Thirty-One","Thirty-Two",
- "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six",
- "Thirty-Seven","Thirty-Eight","Thirty-Nine"};
-
- DTMStringPool pool=new DTMStringPool();
-
- System.out.println("If no complaints are printed below, we passed initial test.");
-
- for(int pass=0;pass<=1;++pass)
- {
- int i;
-
- for(i=0;i<word.length;++i)
- {
- int j=pool.stringToIndex(word[i]);
- if(j!=i)
- System.out.println("\tMismatch populating pool: assigned "+
- j+" for create "+i);
- }
-
- for(i=0;i<word.length;++i)
- {
- int j=pool.stringToIndex(word[i]);
- if(j!=i)
- System.out.println("\tMismatch in stringToIndex: returned "+
- j+" for lookup "+i);
- }
-
- for(i=0;i<word.length;++i)
- {
- String w=pool.indexToString(i);
- if(!word[i].equals(w))
- System.out.println("\tMismatch in indexToString: returned"+
- w+" for lookup "+i);
- }
-
- pool.removeAllElements();
-
- System.out.println("\nPass "+pass+" complete\n");
- } // end pass loop
- }
- }