1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xalan" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, Lotus
  53. * Development Corporation., http://www.lotus.com. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package org.apache.xml.dtm.ref.dom2dtm;
  58. import org.apache.xml.dtm.ref.*;
  59. import org.apache.xml.dtm.*;
  60. import org.apache.xml.utils.SuballocatedIntVector;
  61. import org.apache.xml.utils.IntStack;
  62. import org.apache.xml.utils.BoolStack;
  63. import org.apache.xml.utils.StringBufferPool;
  64. import org.apache.xml.utils.FastStringBuffer;
  65. import org.apache.xml.utils.TreeWalker;
  66. import org.apache.xml.utils.QName;
  67. import org.apache.xml.utils.XMLCharacterRecognizer;
  68. import org.w3c.dom.*;
  69. import java.util.Vector;
  70. import javax.xml.transform.dom.DOMSource;
  71. import javax.xml.transform.SourceLocator;
  72. import org.xml.sax.ContentHandler;
  73. import org.apache.xml.utils.NodeVector;
  74. import org.apache.xml.utils.XMLString;
  75. import org.apache.xml.utils.XMLStringFactory;
  76. import org.apache.xalan.res.XSLTErrorResources;
  77. import org.apache.xalan.res.XSLMessages;
  78. /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
  79. * DTM API.
  80. *
  81. * Note that it doesn't necessarily represent a full Document
  82. * tree. You can wrap a DOM2DTM around a specific node and its subtree
  83. * and the right things should happen. (I don't _think_ we currently
  84. * support DocumentFrgment nodes as roots, though that might be worth
  85. * considering.)
  86. *
  87. * Note too that we do not currently attempt to track document
  88. * mutation. If you alter the DOM after wrapping DOM2DTM around it,
  89. * all bets are off.
  90. * */
  91. public class DOM2DTM extends DTMDefaultBaseIterators
  92. {
  93. static final boolean JJK_DEBUG=false;
  94. static final boolean JJK_NEWCODE=true;
  95. /** Manefest constant
  96. */
  97. static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
  98. /** The current position in the DOM tree. Last node examined for
  99. * possible copying to DTM. */
  100. transient private Node m_pos;
  101. /** The current position in the DTM tree. Who children get appended to. */
  102. private int m_last_parent=0;
  103. /** The current position in the DTM tree. Who children reference as their
  104. * previous sib. */
  105. private int m_last_kid=NULL;
  106. /** The top of the subtree.
  107. * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
  108. * */
  109. transient private Node m_root;
  110. /** True iff the first element has been processed. This is used to control
  111. synthesis of the implied xml: namespace declaration node. */
  112. boolean m_processedFirstElement=false;
  113. /** true if ALL the nodes in the m_root subtree have been processed;
  114. * false if our incremental build has not yet finished scanning the
  115. * DOM tree. */
  116. transient private boolean m_nodesAreProcessed;
  117. /** The node objects. The instance part of the handle indexes
  118. * directly into this vector. Each DTM node may actually be
  119. * composed of several DOM nodes (for example, if logically-adjacent
  120. * Text/CDATASection nodes in the DOM have been coalesced into a
  121. * single DTM Text node); this table points only to the first in
  122. * that sequence. */
  123. protected Vector m_nodes = new Vector();
  124. /**
  125. * Construct a DOM2DTM object from a DOM node.
  126. *
  127. * @param mgr The DTMManager who owns this DTM.
  128. * @param domSource the DOM source that this DTM will wrap.
  129. * @param dtmIdentity The DTM identity ID for this DTM.
  130. * @param whiteSpaceFilter The white space filter for this DTM, which may
  131. * be null.
  132. * @param xstringfactory XMLString factory for creating character content.
  133. * @param doIndexing true if the caller considers it worth it to use
  134. * indexing schemes.
  135. */
  136. public DOM2DTM(DTMManager mgr, DOMSource domSource,
  137. int dtmIdentity, DTMWSFilter whiteSpaceFilter,
  138. XMLStringFactory xstringfactory,
  139. boolean doIndexing)
  140. {
  141. super(mgr, domSource, dtmIdentity, whiteSpaceFilter,
  142. xstringfactory, doIndexing);
  143. // Initialize DOM navigation
  144. m_pos=m_root = domSource.getNode();
  145. // Initialize DTM navigation
  146. m_last_parent=m_last_kid=NULL;
  147. m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
  148. // Apparently the domSource root may not actually be the
  149. // Document node. If it's an Element node, we need to immediately
  150. // add its attributes. Adapted from nextNode().
  151. // %REVIEW% Move this logic into addNode and recurse? Cleaner!
  152. //
  153. // (If it's an EntityReference node, we're probably scrod. For now
  154. // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
  155. //
  156. // %ISSUE% What about inherited namespaces in this case?
  157. // Do we need to special-case initialize them into the DTM model?
  158. if(ELEMENT_NODE == m_root.getNodeType())
  159. {
  160. NamedNodeMap attrs=m_root.getAttributes();
  161. int attrsize=(attrs==null) ? 0 : attrs.getLength();
  162. if(attrsize>0)
  163. {
  164. int attrIndex=NULL; // start with no previous sib
  165. for(int i=0;i<attrsize;++i)
  166. {
  167. // No need to force nodetype in this case;
  168. // addNode() will take care of switching it from
  169. // Attr to Namespace if necessary.
  170. attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
  171. m_firstch.setElementAt(DTM.NULL,attrIndex);
  172. }
  173. // Terminate list of attrs, and make sure they aren't
  174. // considered children of the element
  175. m_nextsib.setElementAt(DTM.NULL,attrIndex);
  176. // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
  177. } // if attrs exist
  178. } //if(ELEMENT_NODE)
  179. // Initialize DTM-completed status
  180. m_nodesAreProcessed = false;
  181. }
  182. /**
  183. * Construct the node map from the node.
  184. *
  185. * @param node The node that is to be added to the DTM.
  186. * @param parentIndex The current parent index.
  187. * @param previousSibling The previous sibling index.
  188. * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
  189. * Used to force nodes to Text rather than CDATASection when their
  190. * coalesced value includes ordinary Text nodes (current DTM behavior).
  191. *
  192. * @return The index identity of the node that was added.
  193. */
  194. protected int addNode(Node node, int parentIndex,
  195. int previousSibling, int forceNodeType)
  196. {
  197. int nodeIndex = m_nodes.size();
  198. // Have we overflowed a DTM Identity's addressing range?
  199. if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
  200. {
  201. try
  202. {
  203. if(m_mgr==null)
  204. throw new ClassCastException();
  205. // Handle as Extended Addressing
  206. DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
  207. int id=mgrD.getFirstFreeDTMID();
  208. mgrD.addDTM(this,id,nodeIndex);
  209. m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
  210. }
  211. catch(ClassCastException e)
  212. {
  213. // %REVIEW% Wrong error message, but I've been told we're trying
  214. // not to add messages right not for I18N reasons.
  215. // %REVIEW% Should this be a Fatal Error?
  216. error(XSLMessages.createMessage(XSLTErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
  217. }
  218. }
  219. m_size++;
  220. // ensureSize(nodeIndex);
  221. int type;
  222. if(NULL==forceNodeType)
  223. type = node.getNodeType();
  224. else
  225. type=forceNodeType;
  226. // %REVIEW% The Namespace Spec currently says that Namespaces are
  227. // processed in a non-namespace-aware manner, by matching the
  228. // QName, even though there is in fact a namespace assigned to
  229. // these nodes in the DOM. If and when that changes, we will have
  230. // to consider whether we check the namespace-for-namespaces
  231. // rather than the node name.
  232. //
  233. // %TBD% Note that the DOM does not necessarily explicitly declare
  234. // all the namespaces it uses. DOM Level 3 will introduce a
  235. // namespace-normalization operation which reconciles that, and we
  236. // can request that users invoke it or otherwise ensure that the
  237. // tree is namespace-well-formed before passing the DOM to Xalan.
  238. // But if they don't, what should we do about it? We probably
  239. // don't want to alter the source DOM (and may not be able to do
  240. // so if it's read-only). The best available answer might be to
  241. // synthesize additional DTM Namespace Nodes that don't correspond
  242. // to DOM Attr Nodes.
  243. if (Node.ATTRIBUTE_NODE == type)
  244. {
  245. String name = node.getNodeName();
  246. if (name.startsWith("xmlns:") || name.equals("xmlns"))
  247. {
  248. type = DTM.NAMESPACE_NODE;
  249. }
  250. }
  251. m_nodes.addElement(node);
  252. m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
  253. m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
  254. m_prevsib.setElementAt(previousSibling,nodeIndex);
  255. m_parent.setElementAt(parentIndex,nodeIndex);
  256. if(DTM.NULL != parentIndex &&
  257. type != DTM.ATTRIBUTE_NODE &&
  258. type != DTM.NAMESPACE_NODE)
  259. {
  260. // If the DTM parent had no children, this becomes its first child.
  261. if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
  262. m_firstch.setElementAt(nodeIndex,parentIndex);
  263. }
  264. String nsURI = node.getNamespaceURI();
  265. // Deal with the difference between Namespace spec and XSLT
  266. // definitions of local name. (The former says PIs don't have
  267. // localnames; the latter says they do.)
  268. String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ?
  269. node.getNodeName() :
  270. node.getLocalName();
  271. // Hack to make DOM1 sort of work...
  272. if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
  273. && null == localName)
  274. localName = node.getNodeName(); // -sb
  275. ExpandedNameTable exnt = m_expandedNameTable;
  276. // %TBD% Nodes created with the old non-namespace-aware DOM
  277. // calls createElement() and createAttribute() will never have a
  278. // localname. That will cause their expandedNameID to be just the
  279. // nodeType... which will keep them from being matched
  280. // successfully by name. Since the DOM makes no promise that
  281. // those will participate in namespace processing, this is
  282. // officially accepted as Not Our Fault. But it might be nice to
  283. // issue a diagnostic message!
  284. if(node.getLocalName()==null &&
  285. (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
  286. {
  287. // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
  288. }
  289. int expandedNameID = (null != localName)
  290. ? exnt.getExpandedTypeID(nsURI, localName, type) :
  291. exnt.getExpandedTypeID(type);
  292. m_exptype.setElementAt(expandedNameID,nodeIndex);
  293. indexNode(expandedNameID, nodeIndex);
  294. if (DTM.NULL != previousSibling)
  295. m_nextsib.setElementAt(nodeIndex,previousSibling);
  296. // This should be done after m_exptype has been set, and probably should
  297. // always be the last thing we do
  298. if (type == DTM.NAMESPACE_NODE)
  299. declareNamespaceInContext(parentIndex,nodeIndex);
  300. return nodeIndex;
  301. }
  302. /**
  303. * Get the number of nodes that have been added.
  304. */
  305. protected int getNumberOfNodes()
  306. {
  307. return m_nodes.size();
  308. }
  309. /**
  310. * This method iterates to the next node that will be added to the table.
  311. * Each call to this method adds a new node to the table, unless the end
  312. * is reached, in which case it returns null.
  313. *
  314. * @return The true if a next node is found or false if
  315. * there are no more nodes.
  316. */
  317. protected boolean nextNode()
  318. {
  319. // Non-recursive one-fetch-at-a-time depth-first traversal with
  320. // attribute/namespace nodes and white-space stripping.
  321. // Navigating the DOM is simple, navigating the DTM is simple;
  322. // keeping track of both at once is a trifle baroque but at least
  323. // we've avoided most of the special cases.
  324. if (m_nodesAreProcessed)
  325. return false;
  326. // %REVIEW% Is this local copy Really Useful from a performance
  327. // point of view? Or is this a false microoptimization?
  328. Node pos=m_pos;
  329. Node next=null;
  330. int nexttype=NULL;
  331. // Navigate DOM tree
  332. do
  333. {
  334. // Look down to first child.
  335. if (pos.hasChildNodes())
  336. {
  337. next = pos.getFirstChild();
  338. // %REVIEW% There's probably a more elegant way to skip
  339. // the doctype. (Just let it go and Suppress it?
  340. if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
  341. next=next.getNextSibling();
  342. // Push DTM context -- except for children of Entity References,
  343. // which have no DTM equivalent and cause no DTM navigation.
  344. if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
  345. {
  346. m_last_parent=m_last_kid;
  347. m_last_kid=NULL;
  348. // Whitespace-handler context stacking
  349. if(null != m_wsfilter)
  350. {
  351. short wsv =
  352. m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
  353. boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
  354. ? getShouldStripWhitespace()
  355. : (DTMWSFilter.STRIP == wsv);
  356. pushShouldStripWhitespace(shouldStrip);
  357. } // if(m_wsfilter)
  358. }
  359. }
  360. // If that fails, look up and right (but not past root!)
  361. else
  362. {
  363. if(m_last_kid!=NULL)
  364. {
  365. // Last node posted at this level had no more children
  366. // If it has _no_ children, we need to record that.
  367. if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
  368. m_firstch.setElementAt(NULL,m_last_kid);
  369. }
  370. while(m_last_parent != NULL)
  371. {
  372. // %REVIEW% There's probably a more elegant way to
  373. // skip the doctype. (Just let it go and Suppress it?
  374. next = pos.getNextSibling();
  375. if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
  376. next=next.getNextSibling();
  377. if(next!=null)
  378. break; // Found it!
  379. // No next-sibling found. Pop the DOM.
  380. pos=pos.getParentNode();
  381. if(pos==null)
  382. {
  383. // %TBD% Should never arise, but I want to be sure of that...
  384. if(JJK_DEBUG)
  385. {
  386. System.out.println("***** DOM2DTM Pop Control Flow problem");
  387. for(;;); // Freeze right here!
  388. }
  389. }
  390. // The only parents in the DTM are Elements. However,
  391. // the DOM could contain EntityReferences. If we
  392. // encounter one, pop it _without_ popping DTM.
  393. if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
  394. {
  395. // Nothing needs doing
  396. if(JJK_DEBUG)
  397. System.out.println("***** DOM2DTM popping EntRef");
  398. }
  399. else
  400. {
  401. popShouldStripWhitespace();
  402. // Fix and pop DTM
  403. if(m_last_kid==NULL)
  404. m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
  405. else
  406. m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
  407. m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
  408. }
  409. }
  410. if(m_last_parent==NULL)
  411. next=null;
  412. }
  413. if(next!=null)
  414. nexttype=next.getNodeType();
  415. // If it's an entity ref, advance past it.
  416. //
  417. // %REVIEW% Should we let this out the door and just suppress it?
  418. // More work, but simpler code, more likely to be correct, and
  419. // it doesn't happen very often. We'd get rid of the loop too.
  420. if (ENTITY_REFERENCE_NODE == nexttype)
  421. pos=next;
  422. }
  423. while (ENTITY_REFERENCE_NODE == nexttype);
  424. // Did we run out of the tree?
  425. if(next==null)
  426. {
  427. m_nextsib.setElementAt(NULL,0);
  428. m_nodesAreProcessed = true;
  429. m_pos=null;
  430. if(JJK_DEBUG)
  431. {
  432. System.out.println("***** DOM2DTM Crosscheck:");
  433. for(int i=0;i<m_nodes.size();++i)
  434. System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
  435. }
  436. return false;
  437. }
  438. // Text needs some special handling:
  439. //
  440. // DTM may skip whitespace. This is handled by the suppressNode flag, which
  441. // when true will keep the DTM node from being created.
  442. //
  443. // DTM only directly records the first DOM node of any logically-contiguous
  444. // sequence. The lastTextNode value will be set to the last node in the
  445. // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
  446. // advance next over this whole block. Should be simpler than special-casing
  447. // the above loop for "Was the logically-preceeding sibling a text node".
  448. //
  449. // Finally, a DTM node should be considered a CDATASection only if all the
  450. // contiguous text it covers is CDATASections. The first Text should
  451. // force DTM to Text.
  452. boolean suppressNode=false;
  453. Node lastTextNode=null;
  454. nexttype=next.getNodeType();
  455. // nexttype=pos.getNodeType();
  456. if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
  457. {
  458. // If filtering, initially assume we're going to suppress the node
  459. suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
  460. // Scan logically contiguous text (siblings, plus "flattening"
  461. // of entity reference boundaries).
  462. Node n=next;
  463. while(n!=null)
  464. {
  465. lastTextNode=n;
  466. // Any Text node means DTM considers it all Text
  467. if(TEXT_NODE == n.getNodeType())
  468. nexttype=TEXT_NODE;
  469. // Any non-whitespace in this sequence blocks whitespace
  470. // suppression
  471. suppressNode &=
  472. XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
  473. n=logicalNextDOMTextNode(n);
  474. }
  475. }
  476. // Special handling for PIs: Some DOMs represent the XML
  477. // Declaration as a PI. This is officially incorrect, per the DOM
  478. // spec, but is considered a "wrong but tolerable" temporary
  479. // workaround pending proper handling of these fields in DOM Level
  480. // 3. We want to recognize and reject that case.
  481. else if(PROCESSING_INSTRUCTION_NODE==nexttype)
  482. {
  483. suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
  484. }
  485. if(!suppressNode)
  486. {
  487. // Inserting next. NOTE that we force the node type; for
  488. // coalesced Text, this records CDATASections adjacent to
  489. // ordinary Text as Text.
  490. int nextindex=addNode(next,m_last_parent,m_last_kid,
  491. nexttype);
  492. m_last_kid=nextindex;
  493. if(ELEMENT_NODE == nexttype)
  494. {
  495. int attrIndex=NULL; // start with no previous sib
  496. // Process attributes _now_, rather than waiting.
  497. // Simpler control flow, makes NS cache available immediately.
  498. NamedNodeMap attrs=next.getAttributes();
  499. int attrsize=(attrs==null) ? 0 : attrs.getLength();
  500. if(attrsize>0)
  501. {
  502. for(int i=0;i<attrsize;++i)
  503. {
  504. // No need to force nodetype in this case;
  505. // addNode() will take care of switching it from
  506. // Attr to Namespace if necessary.
  507. attrIndex=addNode(attrs.item(i),
  508. nextindex,attrIndex,NULL);
  509. m_firstch.setElementAt(DTM.NULL,attrIndex);
  510. // If the xml: prefix is explicitly declared
  511. // we don't need to synthesize one.
  512. //
  513. // NOTE that XML Namespaces were not originally
  514. // defined as being namespace-aware (grrr), and
  515. // while the W3C is planning to fix this it's
  516. // safer for now to test the QName and trust the
  517. // parsers to prevent anyone from redefining the
  518. // reserved xmlns: prefix
  519. if(!m_processedFirstElement
  520. && "xmlns:xml".equals(attrs.item(i).getNodeName()))
  521. m_processedFirstElement=true;
  522. }
  523. // Terminate list of attrs, and make sure they aren't
  524. // considered children of the element
  525. } // if attrs exist
  526. if(!m_processedFirstElement)
  527. {
  528. // The DOM might not have an explicit declaration for the
  529. // implicit "xml:" prefix, but the XPath data model
  530. // requires that this appear as a Namespace Node so we
  531. // have to synthesize one. You can think of this as
  532. // being a default attribute defined by the XML
  533. // Namespaces spec rather than by the DTD.
  534. attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
  535. (Element)next,"xml",NAMESPACE_DECL_NS,
  536. makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
  537. ),
  538. nextindex,attrIndex,NULL);
  539. m_firstch.setElementAt(DTM.NULL,attrIndex);
  540. m_processedFirstElement=true;
  541. }
  542. if(attrIndex!=NULL)
  543. m_nextsib.setElementAt(DTM.NULL,attrIndex);
  544. } //if(ELEMENT_NODE)
  545. } // (if !suppressNode)
  546. // Text postprocessing: Act on values stored above
  547. if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
  548. {
  549. // %TBD% If nexttype was forced to TEXT, patch the DTM node
  550. next=lastTextNode; // Advance the DOM cursor over contiguous text
  551. }
  552. // Remember where we left off.
  553. m_pos=next;
  554. return true;
  555. }
  556. /**
  557. * Return an DOM node for the given node.
  558. *
  559. * @param nodeHandle The node ID.
  560. *
  561. * @return A node representation of the DTM node.
  562. */
  563. public Node getNode(int nodeHandle)
  564. {
  565. int identity = makeNodeIdentity(nodeHandle);
  566. return (Node) m_nodes.elementAt(identity);
  567. }
  568. /**
  569. * Get a Node from an identity index.
  570. *
  571. * NEEDSDOC @param nodeIdentity
  572. *
  573. * NEEDSDOC ($objectName$) @return
  574. */
  575. protected Node lookupNode(int nodeIdentity)
  576. {
  577. return (Node) m_nodes.elementAt(nodeIdentity);
  578. }
  579. /**
  580. * Get the next node identity value in the list, and call the iterator
  581. * if it hasn't been added yet.
  582. *
  583. * @param identity The node identity (index).
  584. * @return identity+1, or DTM.NULL.
  585. */
  586. protected int getNextNodeIdentity(int identity)
  587. {
  588. identity += 1;
  589. if (identity >= m_nodes.size())
  590. {
  591. if (!nextNode())
  592. identity = DTM.NULL;
  593. }
  594. return identity;
  595. }
  596. /**
  597. * Get the handle from a Node.
  598. * <p>%OPT% This will be pretty slow.</p>
  599. *
  600. * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
  601. * walk down DTM reconstructing path) might be considerably faster
  602. * on later nodes in large documents. That might also imply improving
  603. * this call to handle nodes which would be in this DTM but
  604. * have not yet been built, which might or might not be a Good Thing.</p>
  605. *
  606. * %REVIEW% This relies on being able to test node-identity via
  607. * object-identity. DTM2DOM proxying is a great example of a case where
  608. * that doesn't work. DOM Level 3 will provide the isSameNode() method
  609. * to fix that, but until then this is going to be flaky.
  610. *
  611. * @param node A node, which may be null.
  612. *
  613. * @return The node handle or <code>DTM.NULL</code>.
  614. */
  615. private int getHandleFromNode(Node node)
  616. {
  617. if (null != node)
  618. {
  619. int len = m_nodes.size();
  620. boolean isMore;
  621. int i = 0;
  622. do
  623. {
  624. for (; i < len; i++)
  625. {
  626. if (m_nodes.elementAt(i) == node)
  627. return makeNodeHandle(i);
  628. }
  629. isMore = nextNode();
  630. len = m_nodes.size();
  631. }
  632. while(isMore || i < len);
  633. }
  634. return DTM.NULL;
  635. }
  636. /** Get the handle from a Node. This is a more robust version of
  637. * getHandleFromNode, intended to be usable by the public.
  638. *
  639. * <p>%OPT% This will be pretty slow.</p>
  640. *
  641. * %REVIEW% This relies on being able to test node-identity via
  642. * object-identity. DTM2DOM proxying is a great example of a case where
  643. * that doesn't work. DOM Level 3 will provide the isSameNode() method
  644. * to fix that, but until then this is going to be flaky.
  645. *
  646. * @param node A node, which may be null.
  647. *
  648. * @return The node handle or <code>DTM.NULL</code>. */
  649. public int getHandleOfNode(Node node)
  650. {
  651. if (null != node)
  652. {
  653. // Is Node actually within the same document? If not, don't search!
  654. // This would be easier if m_root was always the Document node, but
  655. // we decided to allow wrapping a DTM around a subtree.
  656. if((m_root==node) ||
  657. (m_root.getNodeType()==DOCUMENT_NODE &&
  658. m_root==node.getOwnerDocument()) ||
  659. (m_root.getNodeType()!=DOCUMENT_NODE &&
  660. m_root.getOwnerDocument()==node.getOwnerDocument())
  661. )
  662. {
  663. // If node _is_ in m_root's tree, find its handle
  664. //
  665. // %OPT% This check may be improved significantly when DOM
  666. // Level 3 nodeKey and relative-order tests become
  667. // available!
  668. for(Node cursor=node;
  669. cursor!=null;
  670. cursor=
  671. (cursor.getNodeType()!=ATTRIBUTE_NODE)
  672. ? cursor.getParentNode()
  673. : ((org.w3c.dom.Attr)cursor).getOwnerElement())
  674. {
  675. if(cursor==m_root)
  676. // We know this node; find its handle.
  677. return getHandleFromNode(node);
  678. } // for ancestors of node
  679. } // if node and m_root in same Document
  680. } // if node!=null
  681. return DTM.NULL;
  682. }
  683. /**
  684. * Retrieves an attribute node by by qualified name and namespace URI.
  685. *
  686. * @param nodeHandle int Handle of the node upon which to look up this attribute..
  687. * @param namespaceURI The namespace URI of the attribute to
  688. * retrieve, or null.
  689. * @param name The local name of the attribute to
  690. * retrieve.
  691. * @return The attribute node handle with the specified name (
  692. * <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
  693. * attribute.
  694. */
  695. public int getAttributeNode(int nodeHandle, String namespaceURI,
  696. String name)
  697. {
  698. // %OPT% This is probably slower than it needs to be.
  699. if (null == namespaceURI)
  700. namespaceURI = "";
  701. int type = getNodeType(nodeHandle);
  702. if (DTM.ELEMENT_NODE == type)
  703. {
  704. // Assume that attributes immediately follow the element.
  705. int identity = makeNodeIdentity(nodeHandle);
  706. while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
  707. {
  708. // Assume this can not be null.
  709. type = _type(identity);
  710. // %REVIEW%
  711. // Should namespace nodes be retrievable DOM-style as attrs?
  712. // If not we need a separate function... which may be desirable
  713. // architecturally, but which is ugly from a code point of view.
  714. // (If we REALLY insist on it, this code should become a subroutine
  715. // of both -- retrieve the node, then test if the type matches
  716. // what you're looking for.)
  717. if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
  718. {
  719. Node node = lookupNode(identity);
  720. String nodeuri = node.getNamespaceURI();
  721. if (null == nodeuri)
  722. nodeuri = "";
  723. String nodelocalname = node.getLocalName();
  724. if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
  725. return makeNodeHandle(identity);
  726. }
  727. else // if (DTM.NAMESPACE_NODE != type)
  728. {
  729. break;
  730. }
  731. }
  732. }
  733. return DTM.NULL;
  734. }
  735. /**
  736. * Get the string-value of a node as a String object
  737. * (see http://www.w3.org/TR/xpath#data-model
  738. * for the definition of a node's string-value).
  739. *
  740. * @param nodeHandle The node ID.
  741. *
  742. * @return A string object that represents the string-value of the given node.
  743. */
  744. public XMLString getStringValue(int nodeHandle)
  745. {
  746. int type = getNodeType(nodeHandle);
  747. Node node = getNode(nodeHandle);
  748. // %TBD% If an element only has one text node, we should just use it
  749. // directly.
  750. if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
  751. || DTM.DOCUMENT_FRAGMENT_NODE == type)
  752. {
  753. FastStringBuffer buf = StringBufferPool.get();
  754. String s;
  755. try
  756. {
  757. getNodeData(node, buf);
  758. s = (buf.length() > 0) ? buf.toString() : "";
  759. }
  760. finally
  761. {
  762. StringBufferPool.free(buf);
  763. }
  764. return m_xstrf.newstr( s );
  765. }
  766. else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
  767. {
  768. // If this is a DTM text node, it may be made of multiple DOM text
  769. // nodes -- including navigating into Entity References. DOM2DTM
  770. // records the first node in the sequence and requires that we
  771. // pick up the others when we retrieve the DTM node's value.
  772. //
  773. // %REVIEW% DOM Level 3 is expected to add a "whole text"
  774. // retrieval method which performs this function for us.
  775. FastStringBuffer buf = StringBufferPool.get();
  776. while(node!=null)
  777. {
  778. buf.append(node.getNodeValue());
  779. node=logicalNextDOMTextNode(node);
  780. }
  781. String s=(buf.length() > 0) ? buf.toString() : "";
  782. StringBufferPool.free(buf);
  783. return m_xstrf.newstr( s );
  784. }
  785. else
  786. return m_xstrf.newstr( node.getNodeValue() );
  787. }
  788. /**
  789. * Retrieve the text content of a DOM subtree, appending it into a
  790. * user-supplied FastStringBuffer object. Note that attributes are
  791. * not considered part of the content of an element.
  792. * <p>
  793. * There are open questions regarding whitespace stripping.
  794. * Currently we make no special effort in that regard, since the standard
  795. * DOM doesn't yet provide DTD-based information to distinguish
  796. * whitespace-in-element-context from genuine #PCDATA. Note that we
  797. * should probably also consider xml:space if/when we address this.
  798. * DOM Level 3 may solve the problem for us.
  799. * <p>
  800. * %REVIEW% Actually, since this method operates on the DOM side of the
  801. * fence rather than the DTM side, it SHOULDN'T do
  802. * any special handling. The DOM does what the DOM does; if you want
  803. * DTM-level abstractions, use DTM-level methods.
  804. *
  805. * @param node Node whose subtree is to be walked, gathering the
  806. * contents of all Text or CDATASection nodes.
  807. * @param buf FastStringBuffer into which the contents of the text
  808. * nodes are to be concatenated.
  809. */
  810. protected static void getNodeData(Node node, FastStringBuffer buf)
  811. {
  812. switch (node.getNodeType())
  813. {
  814. case Node.DOCUMENT_FRAGMENT_NODE :
  815. case Node.DOCUMENT_NODE :
  816. case Node.ELEMENT_NODE :
  817. {
  818. for (Node child = node.getFirstChild(); null != child;
  819. child = child.getNextSibling())
  820. {
  821. getNodeData(child, buf);
  822. }
  823. }
  824. break;
  825. case Node.TEXT_NODE :
  826. case Node.CDATA_SECTION_NODE :
  827. case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node
  828. buf.append(node.getNodeValue());
  829. break;
  830. case Node.PROCESSING_INSTRUCTION_NODE :
  831. // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
  832. break;
  833. default :
  834. // ignore
  835. break;
  836. }
  837. }
  838. /**
  839. * Given a node handle, return its DOM-style node name. This will
  840. * include names such as #text or #document.
  841. *
  842. * @param nodeHandle the id of the node.
  843. * @return String Name of this node, which may be an empty string.
  844. * %REVIEW% Document when empty string is possible...
  845. * %REVIEW-COMMENT% It should never be empty, should it?
  846. */
  847. public String getNodeName(int nodeHandle)
  848. {
  849. Node node = getNode(nodeHandle);
  850. // Assume non-null.
  851. return node.getNodeName();
  852. }
  853. /**
  854. * Given a node handle, return the XPath node name. This should be
  855. * the name as described by the XPath data model, NOT the DOM-style
  856. * name.
  857. *
  858. * @param nodeHandle the id of the node.
  859. * @return String Name of this node, which may be an empty string.
  860. */
  861. public String getNodeNameX(int nodeHandle)
  862. {
  863. String name;
  864. short type = getNodeType(nodeHandle);
  865. switch (type)
  866. {
  867. case DTM.NAMESPACE_NODE :
  868. {
  869. Node node = getNode(nodeHandle);
  870. // assume not null.
  871. name = node.getNodeName();
  872. if(name.startsWith("xmlns:"))
  873. {
  874. name = QName.getLocalPart(name);
  875. }
  876. else if(name.equals("xmlns"))
  877. {
  878. name = "";
  879. }
  880. }
  881. break;
  882. case DTM.ATTRIBUTE_NODE :
  883. case DTM.ELEMENT_NODE :
  884. case DTM.ENTITY_REFERENCE_NODE :
  885. case DTM.PROCESSING_INSTRUCTION_NODE :
  886. {
  887. Node node = getNode(nodeHandle);
  888. // assume not null.
  889. name = node.getNodeName();
  890. }
  891. break;
  892. default :
  893. name = "";
  894. }
  895. return name;
  896. }
  897. /**
  898. * Given a node handle, return its XPath-style localname.
  899. * (As defined in Namespaces, this is the portion of the name after any
  900. * colon character).
  901. *
  902. * @param nodeHandle the id of the node.
  903. * @return String Local name of this node.
  904. */
  905. public String getLocalName(int nodeHandle)
  906. {
  907. if(JJK_NEWCODE)
  908. {
  909. int id=makeNodeIdentity(nodeHandle);
  910. if(NULL==id) return null;
  911. Node newnode=(Node)m_nodes.elementAt(id);
  912. String newname=newnode.getLocalName();
  913. if (null == newname)
  914. {
  915. // XSLT treats PIs, and possibly other things, as having QNames.
  916. String qname = newnode.getNodeName();
  917. if('#'==newnode.getNodeName().charAt(0))
  918. {
  919. // Match old default for this function
  920. // This conversion may or may not be necessary
  921. newname="";
  922. }
  923. else
  924. {
  925. int index = qname.indexOf(':');
  926. newname = (index < 0) ? qname : qname.substring(index + 1);
  927. }
  928. }
  929. return newname;
  930. }
  931. else
  932. {
  933. String name;
  934. short type = getNodeType(nodeHandle);
  935. switch (type)
  936. {
  937. case DTM.ATTRIBUTE_NODE :
  938. case DTM.ELEMENT_NODE :
  939. case DTM.ENTITY_REFERENCE_NODE :
  940. case DTM.NAMESPACE_NODE :
  941. case DTM.PROCESSING_INSTRUCTION_NODE :
  942. {
  943. Node node = getNode(nodeHandle);
  944. // assume not null.
  945. name = node.getLocalName();
  946. if (null == name)
  947. {
  948. String qname = node.getNodeName();
  949. int index = qname.indexOf(':');
  950. name = (index < 0) ? qname : qname.substring(index + 1);
  951. }
  952. }
  953. break;
  954. default :
  955. name = "";
  956. }
  957. return name;
  958. }
  959. }
  960. /**
  961. * Given a namespace handle, return the prefix that the namespace decl is
  962. * mapping.
  963. * Given a node handle, return the prefix used to map to the namespace.
  964. *
  965. * <p> %REVIEW% Are you sure you want "" for no prefix? </p>
  966. * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb </p>
  967. *
  968. * @param nodeHandle the id of the node.
  969. * @return String prefix of this node's name, or "" if no explicit
  970. * namespace prefix was given.
  971. */
  972. public String getPrefix(int nodeHandle)
  973. {
  974. String prefix;
  975. short type = getNodeType(nodeHandle);
  976. switch (type)
  977. {
  978. case DTM.NAMESPACE_NODE :
  979. {
  980. Node node = getNode(nodeHandle);
  981. // assume not null.
  982. String qname = node.getNodeName();
  983. int index = qname.indexOf(':');
  984. prefix = (index < 0) ? "" : qname.substring(index + 1);
  985. }
  986. break;
  987. case DTM.ATTRIBUTE_NODE :
  988. case DTM.ELEMENT_NODE :
  989. {
  990. Node node = getNode(nodeHandle);
  991. // assume not null.
  992. String qname = node.getNodeName();
  993. int index = qname.indexOf(':');
  994. prefix = (index < 0) ? "" : qname.substring(0, index);
  995. }
  996. break;
  997. default :
  998. prefix = "";
  999. }
  1000. return prefix;
  1001. }
  1002. /**
  1003. * Given a node handle, return its DOM-style namespace URI
  1004. * (As defined in Namespaces, this is the declared URI which this node's
  1005. * prefix -- or default in lieu thereof -- was mapped to.)
  1006. *
  1007. * <p>%REVIEW% Null or ""? -sb</p>
  1008. *
  1009. * @param nodeHandle the id of the node.
  1010. * @return String URI value of this node's namespace, or null if no
  1011. * namespace was resolved.
  1012. */
  1013. public String getNamespaceURI(int nodeHandle)
  1014. {
  1015. if(JJK_NEWCODE)
  1016. {
  1017. int id=makeNodeIdentity(nodeHandle);
  1018. if(id==NULL) return null;
  1019. Node node=(Node)m_nodes.elementAt(id);
  1020. return node.getNamespaceURI();
  1021. }
  1022. else
  1023. {
  1024. String nsuri;
  1025. short type = getNodeType(nodeHandle);
  1026. switch (type)
  1027. {
  1028. case DTM.ATTRIBUTE_NODE :
  1029. case DTM.ELEMENT_NODE :
  1030. case DTM.ENTITY_REFERENCE_NODE :
  1031. case DTM.NAMESPACE_NODE :
  1032. case DTM.PROCESSING_INSTRUCTION_NODE :
  1033. {
  1034. Node node = getNode(nodeHandle);
  1035. // assume not null.
  1036. nsuri = node.getNamespaceURI();
  1037. // %TBD% Handle DOM1?
  1038. }
  1039. break;
  1040. default :
  1041. nsuri = null;
  1042. }
  1043. return nsuri;
  1044. }
  1045. }
  1046. /** Utility function: Given a DOM Text node, determine whether it is
  1047. * logically followed by another Text or CDATASection node. This may
  1048. * involve traversing into Entity References.
  1049. *
  1050. * %REVIEW% DOM Level 3 is expected to add functionality which may
  1051. * allow us to retire this.
  1052. */
  1053. private Node logicalNextDOMTextNode(Node n)
  1054. {
  1055. Node p=n.getNextSibling();
  1056. if(p==null)
  1057. {
  1058. // Walk out of any EntityReferenceNodes that ended with text
  1059. for(n=n.getParentNode();
  1060. n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
  1061. n=n.getParentNode())
  1062. {
  1063. p=n.getNextSibling();
  1064. if(p!=null)
  1065. break;
  1066. }
  1067. }
  1068. n=p;
  1069. while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
  1070. {
  1071. // Walk into any EntityReferenceNodes that start with text
  1072. if(n.hasChildNodes())
  1073. n=n.getFirstChild();
  1074. else
  1075. n=n.getNextSibling();
  1076. }
  1077. if(n!=null)
  1078. {
  1079. // Found a logical next sibling. Is it text?
  1080. int ntype=n.getNodeType();
  1081. if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
  1082. n=null;
  1083. }
  1084. return n;
  1085. }
  1086. /**
  1087. * Given a node handle, return its node value. This is mostly
  1088. * as defined by the DOM, but may ignore some conveniences.
  1089. * <p>
  1090. *
  1091. * @param nodeHandle The node id.
  1092. * @return String Value of this node, or null if not
  1093. * meaningful for this node type.
  1094. */
  1095. public String getNodeValue(int nodeHandle)
  1096. {
  1097. // The _type(nodeHandle) call was taking the lion's share of our
  1098. // time, and was wrong anyway since it wasn't coverting handle to
  1099. // identity. Inlined it.
  1100. int type = _exptype(makeNodeIdentity(nodeHandle));
  1101. type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
  1102. if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
  1103. return getNode(nodeHandle).getNodeValue();
  1104. // If this is a DTM text node, it may be made of multiple DOM text
  1105. // nodes -- including navigating into Entity References. DOM2DTM
  1106. // records the first node in the sequence and requires that we
  1107. // pick up the others when we retrieve the DTM node's value.
  1108. //
  1109. // %REVIEW% DOM Level 3 is expected to add a "whole text"
  1110. // retrieval method which performs this function for us.
  1111. Node node = getNode(nodeHandle);
  1112. Node n=logicalNextDOMTextNode(node);
  1113. if(n==null)
  1114. return node.getNodeValue();
  1115. FastStringBuffer buf = StringBufferPool.get();
  1116. buf.append(node.getNodeValue());
  1117. while(n!=null)
  1118. {
  1119. buf.append(n.getNodeValue());
  1120. n=logicalNextDOMTextNode(n);
  1121. }
  1122. String s = (buf.length() > 0) ? buf.toString() : "";
  1123. StringBufferPool.free(buf);
  1124. return s;
  1125. }
  1126. /**
  1127. * A document type declaration information item has the following properties:
  1128. *
  1129. * 1. [system identifier] The system identifier of the external subset, if
  1130. * it exists. Otherwise this property has no value.
  1131. *
  1132. * @return the system identifier String object, or null if there is none.
  1133. */
  1134. public String getDocumentTypeDeclarationSystemIdentifier()
  1135. {
  1136. Document doc;
  1137. if (m_root.getNodeType() == Node.DOCUMENT_NODE)
  1138. doc = (Document) m_root;
  1139. else
  1140. doc = m_root.getOwnerDocument();
  1141. if (null != doc)
  1142. {
  1143. DocumentType dtd = doc.getDoctype();
  1144. if (null != dtd)
  1145. {
  1146. return dtd.getSystemId();
  1147. }
  1148. }
  1149. return null;
  1150. }
  1151. /**
  1152. * Return the public identifier of the external subset,
  1153. * normalized as described in 4.2.2 External Entities [XML]. If there is
  1154. * no external subset or if it has no public identifier, this property
  1155. * has no value.
  1156. *
  1157. * @param the document type declaration handle
  1158. *
  1159. * @return the public identifier String object, or null if there is none.
  1160. */
  1161. public String getDocumentTypeDeclarationPublicIdentifier()
  1162. {
  1163. Document doc;
  1164. if (m_root.getNodeType() == Node.DOCUMENT_NODE)
  1165. doc = (Document) m_root;
  1166. else
  1167. doc = m_root.getOwnerDocument();
  1168. if (null != doc)
  1169. {
  1170. DocumentType dtd = doc.getDoctype();
  1171. if (null != dtd)
  1172. {
  1173. return dtd.getPublicId();
  1174. }
  1175. }
  1176. return null;
  1177. }
  1178. /**
  1179. * Returns the <code>Element</code> whose <code>ID</code> is given by
  1180. * <code>elementId</code>. If no such element exists, returns
  1181. * <code>DTM.NULL</code>. Behavior is not defined if more than one element
  1182. * has this <code>ID</code>. Attributes (including those
  1183. * with the name "ID") are not of type ID unless so defined by DTD/Schema
  1184. * information available to the DTM implementation.
  1185. * Implementations that do not know whether attributes are of type ID or
  1186. * not are expected to return <code>DTM.NULL</code>.
  1187. *
  1188. * <p>%REVIEW% Presumably IDs are still scoped to a single document,
  1189. * and this operation searches only within a single document, right?
  1190. * Wouldn't want collisions between DTMs in the same process.</p>
  1191. *
  1192. * @param elementId The unique <code>id</code> value for an element.
  1193. * @return The handle of the matching element.
  1194. */
  1195. public int getElementById(String elementId)
  1196. {
  1197. Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
  1198. ? (Document) m_root : m_root.getOwnerDocument();
  1199. if(null != doc)
  1200. {
  1201. Node elem = doc.getElementById(elementId);
  1202. if(null != elem)
  1203. {
  1204. int elemHandle = getHandleFromNode(elem);
  1205. if(DTM.NULL == elemHandle)
  1206. {
  1207. int identity = m_nodes.size()-1;
  1208. while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
  1209. {
  1210. Node node = getNode(identity);
  1211. if(node == elem)
  1212. {
  1213. elemHandle = getHandleFromNode(elem);
  1214. break;
  1215. }
  1216. }
  1217. }
  1218. return elemHandle;
  1219. }
  1220. }
  1221. return DTM.NULL;
  1222. }
  1223. /**
  1224. * The getUnparsedEntityURI function returns the URI of the unparsed
  1225. * entity with the specified name in the same document as the context
  1226. * node (see [3.3 Unparsed Entities]). It returns the empty string if
  1227. * there is no such entity.
  1228. * <p>
  1229. * XML processors may choose to use the System Identifier (if one
  1230. * is provided) to resolve the entity, rather than the URI in the
  1231. * Public Identifier. The details are dependent on the processor, and
  1232. * we would have to support some form of plug-in resolver to handle
  1233. * this properly. Currently, we simply return the System Identifier if
  1234. * present, and hope that it a usable URI or that our caller can
  1235. * map it to one.
  1236. * TODO: Resolve Public Identifiers... or consider changing function name.
  1237. * <p>
  1238. * If we find a relative URI
  1239. * reference, XML expects it to be resolved in terms of the base URI
  1240. * of the document. The DOM doesn't do that for us, and it isn't
  1241. * entirely clear whether that should be done here; currently that's
  1242. * pushed up to a higher level of our application. (Note that DOM Level
  1243. * 1 didn't store the document's base URI.)
  1244. * TODO: Consider resolving Relative URIs.
  1245. * <p>
  1246. * (The DOM's statement that "An XML processor may choose to
  1247. * completely expand entities before the structure model is passed
  1248. * to the DOM" refers only to parsed entities, not unparsed, and hence
  1249. * doesn't affect this function.)
  1250. *
  1251. * @param name A string containing the Entity Name of the unparsed
  1252. * entity.
  1253. *
  1254. * @return String containing the URI of the Unparsed Entity, or an
  1255. * empty string if no such entity exists.
  1256. */
  1257. public String getUnparsedEntityURI(String name)
  1258. {
  1259. String url = "";
  1260. Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
  1261. ? (Document) m_root : m_root.getOwnerDocument();
  1262. if (null != doc)
  1263. {
  1264. DocumentType doctype = doc.getDoctype();
  1265. if (null != doctype)
  1266. {
  1267. NamedNodeMap entities = doctype.getEntities();
  1268. if(null == entities)
  1269. return url;
  1270. Entity entity = (Entity) entities.getNamedItem(name);
  1271. if(null == entity)
  1272. return url;
  1273. String notationName = entity.getNotationName();
  1274. if (null != notationName) // then it's unparsed
  1275. {
  1276. // The draft says: "The XSLT processor may use the public
  1277. // identifier to generate a URI for the entity instead of the URI
  1278. // specified in the system identifier. If the XSLT processor does
  1279. // not use the public identifier to generate the URI, it must use
  1280. // the system identifier; if the system identifier is a relative
  1281. // URI, it must be resolved into an absolute URI using the URI of
  1282. // the resource containing the entity declaration as the base
  1283. // URI [RFC2396]."
  1284. // So I'm falling a bit short here.
  1285. url = entity.getSystemId();
  1286. if (null == url)
  1287. {
  1288. url = entity.getPublicId();
  1289. }
  1290. else
  1291. {
  1292. // This should be resolved to an absolute URL, but that's hard
  1293. // to do from here.
  1294. }
  1295. }
  1296. }
  1297. }
  1298. return url;
  1299. }
  1300. /**
  1301. * 5. [specified] A flag indicating whether this attribute was actually
  1302. * specified in the start-tag of its element, or was defaulted from the
  1303. * DTD.
  1304. *
  1305. * @param the attribute handle
  1306. *
  1307. * NEEDSDOC @param attributeHandle
  1308. * @return <code>true</code> if the attribute was specified;
  1309. * <code>false</code> if it was defaulted.
  1310. */
  1311. public boolean isAttributeSpecified(int attributeHandle)
  1312. {
  1313. int type = getNodeType(attributeHandle);
  1314. if (DTM.ATTRIBUTE_NODE == type)
  1315. {
  1316. Attr attr = (Attr)getNode(attributeHandle);
  1317. return attr.getSpecified();
  1318. }
  1319. return false;
  1320. }
  1321. /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
  1322. * we're wrapped around an existing DOM.
  1323. *
  1324. * @param source The IncrementalSAXSource that we want to recieve events from
  1325. * on demand.
  1326. */
  1327. public void setIncrementalSAXSource(IncrementalSAXSource source)
  1328. {
  1329. }
  1330. /** getContentHandler returns "our SAX builder" -- the thing that
  1331. * someone else should send SAX events to in order to extend this
  1332. * DTM model.
  1333. *
  1334. * @return null if this model doesn't respond to SAX events,
  1335. * "this" if the DTM object has a built-in SAX ContentHandler,
  1336. * the IncrmentalSAXSource if we're bound to one and should receive
  1337. * the SAX stream via it for incremental build purposes...
  1338. * */
  1339. public org.xml.sax.ContentHandler getContentHandler()
  1340. {
  1341. return null;
  1342. }
  1343. /**
  1344. * Return this DTM's lexical handler.
  1345. *
  1346. * %REVIEW% Should this return null if constrution already done/begun?
  1347. *
  1348. * @return null if this model doesn't respond to lexical SAX events,
  1349. * "this" if the DTM object has a built-in SAX ContentHandler,
  1350. * the IncrementalSAXSource if we're bound to one and should receive
  1351. * the SAX stream via it for incremental build purposes...
  1352. */
  1353. public org.xml.sax.ext.LexicalHandler getLexicalHandler()
  1354. {
  1355. return null;
  1356. }
  1357. /**
  1358. * Return this DTM's EntityResolver.
  1359. *
  1360. * @return null if this model doesn't respond to SAX entity ref events.
  1361. */
  1362. public org.xml.sax.EntityResolver getEntityResolver()
  1363. {
  1364. return null;
  1365. }
  1366. /**
  1367. * Return this DTM's DTDHandler.
  1368. *
  1369. * @return null if this model doesn't respond to SAX dtd events.
  1370. */
  1371. public org.xml.sax.DTDHandler getDTDHandler()
  1372. {
  1373. return null;
  1374. }
  1375. /**
  1376. * Return this DTM's ErrorHandler.
  1377. *
  1378. * @return null if this model doesn't respond to SAX error events.
  1379. */
  1380. public org.xml.sax.ErrorHandler getErrorHandler()
  1381. {
  1382. return null;
  1383. }
  1384. /**
  1385. * Return this DTM's DeclHandler.
  1386. *
  1387. * @return null if this model doesn't respond to SAX Decl events.
  1388. */
  1389. public org.xml.sax.ext.DeclHandler getDeclHandler()
  1390. {
  1391. return null;
  1392. }
  1393. /** @return true iff we're building this model incrementally (eg
  1394. * we're partnered with a IncrementalSAXSource) and thus require that the
  1395. * transformation and the parse run simultaneously. Guidance to the
  1396. * DTMManager.
  1397. * */
  1398. public boolean needsTwoThreads()
  1399. {
  1400. return false;
  1401. }
  1402. // ========== Direct SAX Dispatch, for optimization purposes ========
  1403. /**
  1404. * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
  1405. * of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
  1406. * the definition of <CODE>S</CODE></A> for details.
  1407. * @param ch Character to check as XML whitespace.
  1408. * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
  1409. */
  1410. private static boolean isSpace(char ch)
  1411. {
  1412. return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now.
  1413. }
  1414. /**
  1415. * Directly call the
  1416. * characters method on the passed ContentHandler for the
  1417. * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
  1418. * for the definition of a node's string-value). Multiple calls to the
  1419. * ContentHandler's characters methods may well occur for a single call to
  1420. * this method.
  1421. *
  1422. * @param nodeHandle The node ID.
  1423. * @param ch A non-null reference to a ContentHandler.
  1424. *
  1425. * @throws org.xml.sax.SAXException
  1426. */
  1427. public void dispatchCharactersEvents(
  1428. int nodeHandle, org.xml.sax.ContentHandler ch,
  1429. boolean normalize)
  1430. throws org.xml.sax.SAXException
  1431. {
  1432. if(normalize)
  1433. {
  1434. XMLString str = getStringValue(nodeHandle);
  1435. str = str.fixWhiteSpace(true, true, false);
  1436. str.dispatchCharactersEvents(ch);
  1437. }
  1438. else
  1439. {
  1440. int type = getNodeType(nodeHandle);
  1441. Node node = getNode(nodeHandle);
  1442. dispatchNodeData(node, ch, 0);
  1443. // Text coalition -- a DTM text node may represent multiple
  1444. // DOM nodes.
  1445. if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
  1446. {
  1447. while( null != (node=logicalNextDOMTextNode(node)) )
  1448. {
  1449. dispatchNodeData(node, ch, 0);
  1450. }
  1451. }
  1452. }
  1453. }
  1454. /**
  1455. * Retrieve the text content of a DOM subtree, appending it into a
  1456. * user-supplied FastStringBuffer object. Note that attributes are
  1457. * not considered part of the content of an element.
  1458. * <p>
  1459. * There are open questions regarding whitespace stripping.
  1460. * Currently we make no special effort in that regard, since the standard
  1461. * DOM doesn't yet provide DTD-based information to distinguish
  1462. * whitespace-in-element-context from genuine #PCDATA. Note that we
  1463. * should probably also consider xml:space if/when we address this.
  1464. * DOM Level 3 may solve the problem for us.
  1465. * <p>
  1466. * %REVIEW% Note that as a DOM-level operation, it can be argued that this
  1467. * routine _shouldn't_ perform any processing beyond what the DOM already
  1468. * does, and that whitespace stripping and so on belong at the DTM level.
  1469. * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
  1470. *
  1471. * @param node Node whose subtree is to be walked, gathering the
  1472. * contents of all Text or CDATASection nodes.
  1473. * @param buf FastStringBuffer into which the contents of the text
  1474. * nodes are to be concatenated.
  1475. */
  1476. protected static void dispatchNodeData(Node node,
  1477. org.xml.sax.ContentHandler ch,
  1478. int depth)
  1479. throws org.xml.sax.SAXException
  1480. {
  1481. switch (node.getNodeType())
  1482. {
  1483. case Node.DOCUMENT_FRAGMENT_NODE :
  1484. case Node.DOCUMENT_NODE :
  1485. case Node.ELEMENT_NODE :
  1486. {
  1487. for (Node child = node.getFirstChild(); null != child;
  1488. child = child.getNextSibling())
  1489. {
  1490. dispatchNodeData(child, ch, depth+1);
  1491. }
  1492. }
  1493. break;
  1494. case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
  1495. case Node.COMMENT_NODE :
  1496. if(0 != depth)
  1497. break;
  1498. // NOTE: Because this operation works in the DOM space, it does _not_ attempt
  1499. // to perform Text Coalition. That should only be done in DTM space.
  1500. case Node.TEXT_NODE :
  1501. case Node.CDATA_SECTION_NODE :
  1502. case Node.ATTRIBUTE_NODE :
  1503. String str = node.getNodeValue();
  1504. if(ch instanceof CharacterNodeHandler)
  1505. {
  1506. ((CharacterNodeHandler)ch).characters(node);
  1507. }
  1508. else
  1509. {
  1510. ch.characters(str.toCharArray(), 0, str.length());
  1511. }
  1512. break;
  1513. // /* case Node.PROCESSING_INSTRUCTION_NODE :
  1514. // // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
  1515. // break; */
  1516. default :
  1517. // ignore
  1518. break;
  1519. }
  1520. }
  1521. TreeWalker m_walker = new TreeWalker(null);
  1522. /**
  1523. * Directly create SAX parser events from a subtree.
  1524. *
  1525. * @param nodeHandle The node ID.
  1526. * @param ch A non-null reference to a ContentHandler.
  1527. *
  1528. * @throws org.xml.sax.SAXException
  1529. */
  1530. public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
  1531. throws org.xml.sax.SAXException
  1532. {
  1533. TreeWalker treeWalker = m_walker;
  1534. ContentHandler prevCH = treeWalker.getContentHandler();
  1535. if(null != prevCH)
  1536. {
  1537. treeWalker = new TreeWalker(null);
  1538. }
  1539. treeWalker.setContentHandler(ch);
  1540. try
  1541. {
  1542. Node node = getNode(nodeHandle);
  1543. treeWalker.traverse(node);
  1544. }
  1545. finally
  1546. {
  1547. treeWalker.setContentHandler(null);
  1548. }
  1549. }
  1550. public interface CharacterNodeHandler
  1551. {
  1552. public void characters(Node node)
  1553. throws org.xml.sax.SAXException;
  1554. }
  1555. /**
  1556. * For the moment all the run time properties are ignored by this
  1557. * class.
  1558. *
  1559. * @param property a <code>String</code> value
  1560. * @param value an <code>Object</code> value
  1561. */
  1562. public void setProperty(String property, Object value)
  1563. {
  1564. }
  1565. /**
  1566. * No source information is available for DOM2DTM, so return
  1567. * <code>null</code> here.
  1568. *
  1569. * @param node an <code>int</code> value
  1570. * @return null
  1571. */
  1572. public SourceLocator getSourceLocatorFor(int node)
  1573. {
  1574. return null;
  1575. }
  1576. }