1. /*
  2. * Copyright 1999-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * $Id: SAX2RTFDTM.java,v 1.9 2004/02/17 04:07:37 minchau Exp $
  18. */
  19. package com.sun.org.apache.xml.internal.dtm.ref.sax2dtm;
  20. import javax.xml.transform.Source;
  21. import com.sun.org.apache.xml.internal.dtm.DTM;
  22. import com.sun.org.apache.xml.internal.dtm.DTMManager;
  23. import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
  24. import com.sun.org.apache.xml.internal.utils.IntStack;
  25. import com.sun.org.apache.xml.internal.utils.IntVector;
  26. import com.sun.org.apache.xml.internal.utils.StringVector;
  27. import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
  28. import org.xml.sax.SAXException;
  29. /**
  30. * This is a subclass of SAX2DTM which has been modified to meet the needs of
  31. * Result Tree Frameworks (RTFs). The differences are:
  32. *
  33. * 1) Multiple XML trees may be appended to the single DTM. This means
  34. * that the root node of each document is _not_ node 0. Some code has
  35. * had to be deoptimized to support this mode of operation, and an
  36. * explicit mechanism for obtaining the Node Handle of the root node
  37. * has been provided.
  38. *
  39. * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
  40. * most recently added trees off the end of the DTM as stylesheet elements
  41. * (and thus variable contexts) are exited.
  42. *
  43. * PLEASE NOTE that this class may be _heavily_ dependent upon the
  44. * internals of the SAX2DTM superclass, and must be maintained in
  45. * parallel with that code. Arguably, they should be conditionals
  46. * within a single class... but they have deen separated for
  47. * performance reasons. (In fact, one could even argue about which is
  48. * the superclass and which is the subclass; the current arrangement
  49. * is as much about preserving stability of existing code during
  50. * development as anything else.)
  51. *
  52. * %REVIEW% In fact, since the differences are so minor, I think it
  53. * may be possible/practical to fold them back into the base
  54. * SAX2DTM. Consider that as a future code-size optimization.
  55. * */
  56. public class SAX2RTFDTM extends SAX2DTM
  57. {
  58. /** Set true to monitor SAX events and similar diagnostic info. */
  59. private static final boolean DEBUG = false;
  60. /** Most recently started Document, or null if the DTM is empty. */
  61. private int m_currentDocumentNode=NULL;
  62. /** Tail-pruning mark: Number of nodes in use */
  63. IntStack mark_size=new IntStack();
  64. /** Tail-pruning mark: Number of data items in use */
  65. IntStack mark_data_size=new IntStack();
  66. /** Tail-pruning mark: Number of size-of-data fields in use */
  67. IntStack mark_char_size=new IntStack();
  68. /** Tail-pruning mark: Number of dataOrQName slots in use */
  69. IntStack mark_doq_size=new IntStack();
  70. /** Tail-pruning mark: Number of namespace declaration sets in use
  71. * %REVIEW% I don't think number of NS sets is ever different from number
  72. * of NS elements. We can probabably reduce these to a single stack and save
  73. * some storage.
  74. * */
  75. IntStack mark_nsdeclset_size=new IntStack();
  76. /** Tail-pruning mark: Number of naespace declaration elements in use
  77. * %REVIEW% I don't think number of NS sets is ever different from number
  78. * of NS elements. We can probabably reduce these to a single stack and save
  79. * some storage.
  80. */
  81. IntStack mark_nsdeclelem_size=new IntStack();
  82. /**
  83. * Tail-pruning mark: initial number of nodes in use
  84. */
  85. int m_emptyNodeCount;
  86. /**
  87. * Tail-pruning mark: initial number of namespace declaration sets
  88. */
  89. int m_emptyNSDeclSetCount;
  90. /**
  91. * Tail-pruning mark: initial number of namespace declaration elements
  92. */
  93. int m_emptyNSDeclSetElemsCount;
  94. /**
  95. * Tail-pruning mark: initial number of data items in use
  96. */
  97. int m_emptyDataCount;
  98. /**
  99. * Tail-pruning mark: initial number of characters in use
  100. */
  101. int m_emptyCharsCount;
  102. /**
  103. * Tail-pruning mark: default initial number of dataOrQName slots in use
  104. */
  105. int m_emptyDataQNCount;
  106. public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
  107. DTMWSFilter whiteSpaceFilter,
  108. XMLStringFactory xstringfactory,
  109. boolean doIndexing)
  110. {
  111. super(mgr, source, dtmIdentity, whiteSpaceFilter,
  112. xstringfactory, doIndexing);
  113. // NEVER track source locators for RTFs; they aren't meaningful. I think.
  114. // (If we did track them, we'd need to tail-prune these too.)
  115. //com.sun.org.apache.xalan.internal.processor.TransformerFactoryImpl.m_source_location;
  116. m_useSourceLocationProperty=false;
  117. m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
  118. : null;
  119. m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
  120. m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
  121. // Record initial sizes of fields that are pushed and restored
  122. // for RTF tail-pruning. More entries can be popped than pushed, so
  123. // we need this to mark the primordial state of the DTM.
  124. m_emptyNodeCount = m_size;
  125. m_emptyNSDeclSetCount = (m_namespaceDeclSets == null)
  126. ? 0 : m_namespaceDeclSets.size();
  127. m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null)
  128. ? 0 : m_namespaceDeclSetElements.size();
  129. m_emptyDataCount = m_data.size();
  130. m_emptyCharsCount = m_chars.size();
  131. m_emptyDataQNCount = m_dataOrQName.size();
  132. }
  133. /**
  134. * Given a DTM, find the owning document node. In the case of
  135. * SAX2RTFDTM, which may contain multiple documents, this returns
  136. * the <b>most recently started</b> document, or null if the DTM is
  137. * empty or no document is currently under construction.
  138. *
  139. * %REVIEW% Should we continue to report the most recent after
  140. * construction has ended? I think not, given that it may have been
  141. * tail-pruned.
  142. *
  143. * @param nodeHandle the id of the node.
  144. * @return int Node handle of Document node, or null if this DTM does not
  145. * contain an "active" document.
  146. * */
  147. public int getDocument()
  148. {
  149. return makeNodeHandle(m_currentDocumentNode);
  150. }
  151. /**
  152. * Given a node handle, find the owning document node, using DTM semantics
  153. * (Document owns itself) rather than DOM semantics (Document has no owner).
  154. *
  155. * (I'm counting on the fact that getOwnerDocument() is implemented on top
  156. * of this call, in the superclass, to avoid having to rewrite that one.
  157. * Be careful if that code changes!)
  158. *
  159. * @param nodeHandle the id of the node.
  160. * @return int Node handle of owning document
  161. */
  162. public int getDocumentRoot(int nodeHandle)
  163. {
  164. for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) {
  165. if (_type(id)==DTM.DOCUMENT_NODE) {
  166. return makeNodeHandle(id);
  167. }
  168. }
  169. return DTM.NULL; // Safety net; should never happen
  170. }
  171. /**
  172. * Given a node identifier, find the owning document node. Unlike the DOM,
  173. * this considers the owningDocument of a Document to be itself. Note that
  174. * in shared DTMs this may not be zero.
  175. *
  176. * @param nodeIdentifier the id of the starting node.
  177. * @return int Node identifier of the root of this DTM tree
  178. */
  179. protected int _documentRoot(int nodeIdentifier)
  180. {
  181. if(nodeIdentifier==NULL) return NULL;
  182. for (int parent=_parent(nodeIdentifier);
  183. parent!=NULL;
  184. nodeIdentifier=parent,parent=_parent(nodeIdentifier))
  185. ;
  186. return nodeIdentifier;
  187. }
  188. /**
  189. * Receive notification of the beginning of a new RTF document.
  190. *
  191. * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
  192. * might want to consider folding the start/endDocument changes back
  193. * into the main SAX2DTM so we don't have to expose so many fields
  194. * (even as Protected) and carry the additional code.
  195. *
  196. * @throws SAXException Any SAX exception, possibly
  197. * wrapping another exception.
  198. * @see org.xml.sax.ContentHandler#startDocument
  199. * */
  200. public void startDocument() throws SAXException
  201. {
  202. // Re-initialize the tree append process
  203. m_endDocumentOccured = false;
  204. m_prefixMappings = new java.util.Vector();
  205. m_contextIndexes = new IntStack();
  206. m_parents = new IntStack();
  207. m_currentDocumentNode=m_size;
  208. super.startDocument();
  209. }
  210. /**
  211. * Receive notification of the end of the document.
  212. *
  213. * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
  214. * might want to consider folding the start/endDocument changes back
  215. * into the main SAX2DTM so we don't have to expose so many fields
  216. * (even as Protected).
  217. *
  218. * @throws SAXException Any SAX exception, possibly
  219. * wrapping another exception.
  220. * @see org.xml.sax.ContentHandler#endDocument
  221. * */
  222. public void endDocument() throws SAXException
  223. {
  224. charactersFlush();
  225. m_nextsib.setElementAt(NULL,m_currentDocumentNode);
  226. if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
  227. m_firstch.setElementAt(NULL,m_currentDocumentNode);
  228. if (DTM.NULL != m_previous)
  229. m_nextsib.setElementAt(DTM.NULL,m_previous);
  230. m_parents = null;
  231. m_prefixMappings = null;
  232. m_contextIndexes = null;
  233. m_currentDocumentNode= NULL; // no longer open
  234. m_endDocumentOccured = true;
  235. }
  236. /** "Tail-pruning" support for RTFs.
  237. *
  238. * This function pushes information about the current size of the
  239. * DTM's data structures onto a stack, for use by popRewindMark()
  240. * (which see).
  241. *
  242. * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
  243. * RTFs will not be indexed, so I can simply panic if that case
  244. * arises. Hey, it works...
  245. * */
  246. public void pushRewindMark()
  247. {
  248. if(m_indexing || m_elemIndexes!=null)
  249. throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
  250. // Values from DTMDefaultBase
  251. // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
  252. mark_size.push(m_size);
  253. mark_nsdeclset_size.push((m_namespaceDeclSets==null)
  254. ? 0
  255. : m_namespaceDeclSets.size());
  256. mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null)
  257. ? 0
  258. : m_namespaceDeclSetElements.size());
  259. // Values from SAX2DTM
  260. mark_data_size.push(m_data.size());
  261. mark_char_size.push(m_chars.size());
  262. mark_doq_size.push(m_dataOrQName.size());
  263. }
  264. /** "Tail-pruning" support for RTFs.
  265. *
  266. * This function pops the information previously saved by
  267. * pushRewindMark (which see) and uses it to discard all nodes added
  268. * to the DTM after that time. We expect that this will allow us to
  269. * reuse storage more effectively.
  270. *
  271. * This is _not_ intended to be called while a document is still being
  272. * constructed -- only between endDocument and the next startDocument
  273. *
  274. * %REVIEW% WARNING: This is the first use of some of the truncation
  275. * methods. If Xalan blows up after this is called, that's a likely
  276. * place to check.
  277. *
  278. * %REVIEW% Our original design for DTMs permitted them to share
  279. * string pools. If there any risk that this might be happening, we
  280. * can _not_ rewind and recover the string storage. One solution
  281. * might to assert that DTMs used for RTFs Must Not take advantage
  282. * of that feature, but this seems excessively fragile. Another, much
  283. * less attractive, would be to just let them leak... Nah.
  284. *
  285. * @return true if and only if the pop completely emptied the
  286. * RTF. That response is used when determining how to unspool
  287. * RTF-started-while-RTF-open situations.
  288. * */
  289. public boolean popRewindMark()
  290. {
  291. boolean top=mark_size.empty();
  292. m_size=top ? m_emptyNodeCount : mark_size.pop();
  293. m_exptype.setSize(m_size);
  294. m_firstch.setSize(m_size);
  295. m_nextsib.setSize(m_size);
  296. m_prevsib.setSize(m_size);
  297. m_parent.setSize(m_size);
  298. m_elemIndexes=null;
  299. int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop();
  300. if (m_namespaceDeclSets!=null) {
  301. m_namespaceDeclSets.setSize(ds);
  302. }
  303. int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop();
  304. if (m_namespaceDeclSetElements!=null) {
  305. m_namespaceDeclSetElements.setSize(ds1);
  306. }
  307. // Values from SAX2DTM - m_data always has a reserved entry
  308. m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
  309. m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop());
  310. m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop());
  311. // Return true iff DTM now empty
  312. return m_size==0;
  313. }
  314. /** @return true if a DTM tree is currently under construction.
  315. * */
  316. public boolean isTreeIncomplete()
  317. {
  318. return !m_endDocumentOccured;
  319. }
  320. }