1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xalan" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, Lotus
  53. * Development Corporation., http://www.lotus.com. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package org.apache.xml.dtm.ref.sax2dtm;
  58. import java.util.Hashtable;
  59. import java.util.Vector;
  60. import javax.xml.transform.Source;
  61. import javax.xml.transform.SourceLocator;
  62. import org.apache.xalan.transformer.XalanProperties;
  63. import org.apache.xalan.res.XSLTErrorResources;
  64. import org.apache.xalan.res.XSLMessages;
  65. import org.apache.xml.dtm.*;
  66. import org.apache.xml.dtm.ref.*;
  67. import org.apache.xml.utils.StringVector;
  68. import org.apache.xml.utils.IntVector;
  69. import org.apache.xml.utils.FastStringBuffer;
  70. import org.apache.xml.utils.IntStack;
  71. import org.apache.xml.utils.SuballocatedIntVector;
  72. import org.apache.xml.utils.SystemIDResolver;
  73. import org.apache.xml.utils.WrappedRuntimeException;
  74. import org.apache.xml.utils.XMLCharacterRecognizer;
  75. import org.apache.xml.utils.XMLString;
  76. import org.apache.xml.utils.XMLStringFactory;
  77. import org.xml.sax.*;
  78. import org.xml.sax.ext.*;
  79. /**
  80. * This is a subclass of SAX2DTM which has been modified to meet the needs of
  81. * Result Tree Frameworks (RTFs). The differences are:
  82. *
  83. * 1) Multiple XML trees may be appended to the single DTM. This means
  84. * that the root node of each document is _not_ node 0. Some code has
  85. * had to be deoptimized to support this mode of operation, and an
  86. * explicit mechanism for obtaining the Node Handle of the root node
  87. * has been provided.
  88. *
  89. * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
  90. * most recently added trees off the end of the DTM as stylesheet elements
  91. * (and thus variable contexts) are exited.
  92. *
  93. * PLEASE NOTE that this class may be _heavily_ dependent upon the
  94. * internals of the SAX2DTM superclass, and must be maintained in
  95. * parallel with that code. Arguably, they should be conditionals
  96. * within a single class... but they have deen separated for
  97. * performance reasons. (In fact, one could even argue about which is
  98. * the superclass and which is the subclass; the current arrangement
  99. * is as much about preserving stability of existing code during
  100. * development as anything else.)
  101. *
  102. * %REVIEW% In fact, since the differences are so minor, I think it
  103. * may be possible/practical to fold them back into the base
  104. * SAX2DTM. Consider that as a future code-size optimization.
  105. * */
  106. public class SAX2RTFDTM extends SAX2DTM
  107. {
  108. /** Set true to monitor SAX events and similar diagnostic info. */
  109. private static final boolean DEBUG = false;
  110. /** Most recently started Document, or null if the DTM is empty. */
  111. private int m_currentDocumentNode=NULL;
  112. /** Tail-pruning mark: Number of nodes in use */
  113. IntStack mark_size=new IntStack();
  114. /** Tail-pruning mark: Number of data items in use */
  115. IntStack mark_data_size=new IntStack();
  116. /** Tail-pruning mark: Number of size-of-data fields in use */
  117. IntStack mark_char_size=new IntStack();
  118. /** Tail-pruning mark: Number of dataOrQName slots in use */
  119. IntStack mark_doq_size=new IntStack();
  120. /** Tail-pruning mark: Number of namespace declaration sets in use
  121. * %REVIEW% I don't think number of NS sets is ever different from number
  122. * of NS elements. We can probabably reduce these to a single stack and save
  123. * some storage.
  124. * */
  125. IntStack mark_nsdeclset_size=new IntStack();
  126. /** Tail-pruning mark: Number of naespace declaration elements in use
  127. * %REVIEW% I don't think number of NS sets is ever different from number
  128. * of NS elements. We can probabably reduce these to a single stack and save
  129. * some storage.
  130. */
  131. IntStack mark_nsdeclelem_size=new IntStack();
  132. public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
  133. DTMWSFilter whiteSpaceFilter,
  134. XMLStringFactory xstringfactory,
  135. boolean doIndexing)
  136. {
  137. super(mgr, source, dtmIdentity, whiteSpaceFilter,
  138. xstringfactory, doIndexing);
  139. // NEVER track source locators for RTFs; they aren't meaningful. I think.
  140. // (If we did track them, we'd need to tail-prune these too.)
  141. m_useSourceLocationProperty=false; //org.apache.xalan.processor.TransformerFactoryImpl.m_source_location;
  142. m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector() : null;
  143. m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
  144. m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
  145. }
  146. /**
  147. * Given a DTM, find the owning document node. In the case of
  148. * SAX2RTFDTM, which may contain multiple documents, this returns
  149. * the <b>most recently started</b> document, or null if the DTM is
  150. * empty or no document is currently under construction.
  151. *
  152. * %REVIEW% Should we continue to report the most recent after
  153. * construction has ended? I think not, given that it may have been
  154. * tail-pruned.
  155. *
  156. * @param nodeHandle the id of the node.
  157. * @return int Node handle of Document node, or null if this DTM does not
  158. * contain an "active" document.
  159. * */
  160. public int getDocument()
  161. {
  162. return makeNodeHandle(m_currentDocumentNode);
  163. }
  164. /**
  165. * Given a node handle, find the owning document node, using DTM semantics
  166. * (Document owns itself) rather than DOM semantics (Document has no owner).
  167. *
  168. * (I'm counting on the fact that getOwnerDocument() is implemented on top
  169. * of this call, in the superclass, to avoid having to rewrite that one.
  170. * Be careful if that code changes!)
  171. *
  172. * @param nodeHandle the id of the node.
  173. * @return int Node handle of owning document
  174. */
  175. public int getDocumentRoot(int nodeHandle)
  176. {
  177. for(int id=makeNodeIdentity(nodeHandle);
  178. id!=NULL;
  179. id=_parent(id))
  180. if(_type(id)==DTM.DOCUMENT_NODE)
  181. return makeNodeHandle(id);
  182. return DTM.NULL; // Safety net; should never happen
  183. }
  184. /**
  185. * Given a node identifier, find the owning document node. Unlike the DOM,
  186. * this considers the owningDocument of a Document to be itself. Note that
  187. * in shared DTMs this may not be zero.
  188. *
  189. * @param nodeIdentifier the id of the starting node.
  190. * @return int Node identifier of the root of this DTM tree
  191. */
  192. protected int _documentRoot(int nodeIdentifier)
  193. {
  194. if(nodeIdentifier==NULL) return NULL;
  195. for(int parent=_parent(nodeIdentifier);
  196. parent!=NULL;
  197. nodeIdentifier=parent,parent=_parent(nodeIdentifier))
  198. ;
  199. return nodeIdentifier;
  200. }
  201. /**
  202. * Receive notification of the beginning of a new RTF document.
  203. *
  204. * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
  205. * might want to consider folding the start/endDocument changes back
  206. * into the main SAX2DTM so we don't have to expose so many fields
  207. * (even as Protected) and carry the additional code.
  208. *
  209. * @throws SAXException Any SAX exception, possibly
  210. * wrapping another exception.
  211. * @see org.xml.sax.ContentHandler#startDocument
  212. * */
  213. public void startDocument() throws SAXException
  214. {
  215. // Re-initialize the tree append process
  216. m_endDocumentOccured = false;
  217. m_prefixMappings = new java.util.Vector();
  218. m_contextIndexes = new IntStack();
  219. m_parents = new IntStack();
  220. m_currentDocumentNode=m_size;
  221. super.startDocument();
  222. }
  223. /**
  224. * Receive notification of the end of the document.
  225. *
  226. * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
  227. * might want to consider folding the start/endDocument changes back
  228. * into the main SAX2DTM so we don't have to expose so many fields
  229. * (even as Protected).
  230. *
  231. * @throws SAXException Any SAX exception, possibly
  232. * wrapping another exception.
  233. * @see org.xml.sax.ContentHandler#endDocument
  234. * */
  235. public void endDocument() throws SAXException
  236. {
  237. charactersFlush();
  238. m_nextsib.setElementAt(NULL,m_currentDocumentNode);
  239. if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
  240. m_firstch.setElementAt(NULL,m_currentDocumentNode);
  241. if (DTM.NULL != m_previous)
  242. m_nextsib.setElementAt(DTM.NULL,m_previous);
  243. m_parents = null;
  244. m_prefixMappings = null;
  245. m_contextIndexes = null;
  246. m_currentDocumentNode= NULL; // no longer open
  247. m_endDocumentOccured = true;
  248. }
  249. /** "Tail-pruning" support for RTFs.
  250. *
  251. * This function pushes information about the current size of the
  252. * DTM's data structures onto a stack, for use by popRewindMark()
  253. * (which see).
  254. *
  255. * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
  256. * RTFs will not be indexed, so I can simply panic if that case
  257. * arises. Hey, it works...
  258. * */
  259. public void pushRewindMark()
  260. {
  261. if(m_indexing || m_elemIndexes!=null)
  262. throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
  263. // Values from DTMDefaultBase
  264. // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
  265. mark_size.push(m_size);
  266. mark_nsdeclset_size.push( (m_namespaceDeclSets==null) ? 0 : m_namespaceDeclSets.size() );
  267. mark_nsdeclelem_size.push( (m_namespaceDeclSetElements==null) ? 0 : m_namespaceDeclSetElements.size() );
  268. // Values from SAX2DTM
  269. mark_data_size.push(m_data.size());
  270. mark_char_size.push(m_chars.size());
  271. mark_doq_size.push(m_dataOrQName.size());
  272. }
  273. /** "Tail-pruning" support for RTFs.
  274. *
  275. * This function pops the information previously saved by
  276. * pushRewindMark (which see) and uses it to discard all nodes added
  277. * to the DTM after that time. We expect that this will allow us to
  278. * reuse storage more effectively.
  279. *
  280. * This is _not_ intended to be called while a document is still being
  281. * constructed -- only between endDocument and the next startDocument
  282. *
  283. * %REVIEW% WARNING: This is the first use of some of the truncation
  284. * methods. If Xalan blows up after this is called, that's a likely
  285. * place to check.
  286. *
  287. * %REVIEW% Our original design for DTMs permitted them to share
  288. * string pools. If there any risk that this might be happening, we
  289. * can _not_ rewind and recover the string storage. One solution
  290. * might to assert that DTMs used for RTFs Must Not take advantage
  291. * of that feature, but this seems excessively fragile. Another, much
  292. * less attractive, would be to just let them leak... Nah.
  293. *
  294. * @return true if and only if the pop completely emptied the
  295. * RTF. That response is used when determining how to unspool
  296. * RTF-started-while-RTF-open situations.
  297. * */
  298. public boolean popRewindMark()
  299. {
  300. boolean top=mark_size.empty();
  301. m_size=top ? 0 : mark_size.pop();
  302. m_exptype.setSize(m_size);
  303. m_firstch.setSize(m_size);
  304. m_nextsib.setSize(m_size);
  305. m_prevsib.setSize(m_size);
  306. m_parent.setSize(m_size);
  307. m_elemIndexes=null;
  308. int ds= top ? 0 : mark_nsdeclset_size.pop();
  309. if (m_namespaceDeclSets!=null)
  310. m_namespaceDeclSets.setSize(ds);
  311. int ds1= top ? 0 : mark_nsdeclelem_size.pop();
  312. if (m_namespaceDeclSetElements!=null)
  313. m_namespaceDeclSetElements.setSize(ds1);
  314. // Values from SAX2DTM
  315. m_data.setSize(top ? 0 : mark_data_size.pop());
  316. m_chars.setLength(top ? 0 : mark_char_size.pop());
  317. m_dataOrQName.setSize(top ? 0 : mark_doq_size.pop());
  318. // Return true iff DTM now empty
  319. return m_size==0;
  320. }
  321. /** @return true if a DTM tree is currently under construction.
  322. * */
  323. public boolean isTreeIncomplete()
  324. {
  325. return !m_endDocumentOccured;
  326. }
  327. }