1. /*
  2. * Copyright 1999-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * $Id: DOM2DTM.java,v 1.34 2004/02/16 23:06:53 minchau Exp $
  18. */
  19. package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm;
  20. import java.util.Vector;
  21. import javax.xml.transform.SourceLocator;
  22. import javax.xml.transform.dom.DOMSource;
  23. import com.sun.org.apache.xml.internal.dtm.DTM;
  24. import com.sun.org.apache.xml.internal.dtm.DTMManager;
  25. import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
  26. import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators;
  27. import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault;
  28. import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable;
  29. import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource;
  30. import com.sun.org.apache.xml.internal.res.XMLErrorResources;
  31. import com.sun.org.apache.xml.internal.res.XMLMessages;
  32. import com.sun.org.apache.xml.internal.utils.FastStringBuffer;
  33. import com.sun.org.apache.xml.internal.utils.QName;
  34. import com.sun.org.apache.xml.internal.utils.StringBufferPool;
  35. import com.sun.org.apache.xml.internal.utils.TreeWalker;
  36. import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer;
  37. import com.sun.org.apache.xml.internal.utils.XMLString;
  38. import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
  39. import org.w3c.dom.Attr;
  40. import org.w3c.dom.Document;
  41. import org.w3c.dom.DocumentType;
  42. import org.w3c.dom.Element;
  43. import org.w3c.dom.Entity;
  44. import org.w3c.dom.NamedNodeMap;
  45. import org.w3c.dom.Node;
  46. import org.xml.sax.ContentHandler;
  47. /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
  48. * DTM API.
  49. *
  50. * Note that it doesn't necessarily represent a full Document
  51. * tree. You can wrap a DOM2DTM around a specific node and its subtree
  52. * and the right things should happen. (I don't _think_ we currently
  53. * support DocumentFrgment nodes as roots, though that might be worth
  54. * considering.)
  55. *
  56. * Note too that we do not currently attempt to track document
  57. * mutation. If you alter the DOM after wrapping DOM2DTM around it,
  58. * all bets are off.
  59. * */
  60. public class DOM2DTM extends DTMDefaultBaseIterators
  61. {
  62. static final boolean JJK_DEBUG=false;
  63. static final boolean JJK_NEWCODE=true;
  64. /** Manefest constant
  65. */
  66. static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
  67. /** The current position in the DOM tree. Last node examined for
  68. * possible copying to DTM. */
  69. transient private Node m_pos;
  70. /** The current position in the DTM tree. Who children get appended to. */
  71. private int m_last_parent=0;
  72. /** The current position in the DTM tree. Who children reference as their
  73. * previous sib. */
  74. private int m_last_kid=NULL;
  75. /** The top of the subtree.
  76. * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
  77. * */
  78. transient private Node m_root;
  79. /** True iff the first element has been processed. This is used to control
  80. synthesis of the implied xml: namespace declaration node. */
  81. boolean m_processedFirstElement=false;
  82. /** true if ALL the nodes in the m_root subtree have been processed;
  83. * false if our incremental build has not yet finished scanning the
  84. * DOM tree. */
  85. transient private boolean m_nodesAreProcessed;
  86. /** The node objects. The instance part of the handle indexes
  87. * directly into this vector. Each DTM node may actually be
  88. * composed of several DOM nodes (for example, if logically-adjacent
  89. * Text/CDATASection nodes in the DOM have been coalesced into a
  90. * single DTM Text node); this table points only to the first in
  91. * that sequence. */
  92. protected Vector m_nodes = new Vector();
  93. /**
  94. * Construct a DOM2DTM object from a DOM node.
  95. *
  96. * @param mgr The DTMManager who owns this DTM.
  97. * @param domSource the DOM source that this DTM will wrap.
  98. * @param dtmIdentity The DTM identity ID for this DTM.
  99. * @param whiteSpaceFilter The white space filter for this DTM, which may
  100. * be null.
  101. * @param xstringfactory XMLString factory for creating character content.
  102. * @param doIndexing true if the caller considers it worth it to use
  103. * indexing schemes.
  104. */
  105. public DOM2DTM(DTMManager mgr, DOMSource domSource,
  106. int dtmIdentity, DTMWSFilter whiteSpaceFilter,
  107. XMLStringFactory xstringfactory,
  108. boolean doIndexing)
  109. {
  110. super(mgr, domSource, dtmIdentity, whiteSpaceFilter,
  111. xstringfactory, doIndexing);
  112. // Initialize DOM navigation
  113. m_pos=m_root = domSource.getNode();
  114. // Initialize DTM navigation
  115. m_last_parent=m_last_kid=NULL;
  116. m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
  117. // Apparently the domSource root may not actually be the
  118. // Document node. If it's an Element node, we need to immediately
  119. // add its attributes. Adapted from nextNode().
  120. // %REVIEW% Move this logic into addNode and recurse? Cleaner!
  121. //
  122. // (If it's an EntityReference node, we're probably scrod. For now
  123. // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
  124. //
  125. // %ISSUE% What about inherited namespaces in this case?
  126. // Do we need to special-case initialize them into the DTM model?
  127. if(ELEMENT_NODE == m_root.getNodeType())
  128. {
  129. NamedNodeMap attrs=m_root.getAttributes();
  130. int attrsize=(attrs==null) ? 0 : attrs.getLength();
  131. if(attrsize>0)
  132. {
  133. int attrIndex=NULL; // start with no previous sib
  134. for(int i=0;i<attrsize;++i)
  135. {
  136. // No need to force nodetype in this case;
  137. // addNode() will take care of switching it from
  138. // Attr to Namespace if necessary.
  139. attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
  140. m_firstch.setElementAt(DTM.NULL,attrIndex);
  141. }
  142. // Terminate list of attrs, and make sure they aren't
  143. // considered children of the element
  144. m_nextsib.setElementAt(DTM.NULL,attrIndex);
  145. // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
  146. } // if attrs exist
  147. } //if(ELEMENT_NODE)
  148. // Initialize DTM-completed status
  149. m_nodesAreProcessed = false;
  150. }
  151. /**
  152. * Construct the node map from the node.
  153. *
  154. * @param node The node that is to be added to the DTM.
  155. * @param parentIndex The current parent index.
  156. * @param previousSibling The previous sibling index.
  157. * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
  158. * Used to force nodes to Text rather than CDATASection when their
  159. * coalesced value includes ordinary Text nodes (current DTM behavior).
  160. *
  161. * @return The index identity of the node that was added.
  162. */
  163. protected int addNode(Node node, int parentIndex,
  164. int previousSibling, int forceNodeType)
  165. {
  166. int nodeIndex = m_nodes.size();
  167. // Have we overflowed a DTM Identity's addressing range?
  168. if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
  169. {
  170. try
  171. {
  172. if(m_mgr==null)
  173. throw new ClassCastException();
  174. // Handle as Extended Addressing
  175. DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
  176. int id=mgrD.getFirstFreeDTMID();
  177. mgrD.addDTM(this,id,nodeIndex);
  178. m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
  179. }
  180. catch(ClassCastException e)
  181. {
  182. // %REVIEW% Wrong error message, but I've been told we're trying
  183. // not to add messages right not for I18N reasons.
  184. // %REVIEW% Should this be a Fatal Error?
  185. error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
  186. }
  187. }
  188. m_size++;
  189. // ensureSize(nodeIndex);
  190. int type;
  191. if(NULL==forceNodeType)
  192. type = node.getNodeType();
  193. else
  194. type=forceNodeType;
  195. // %REVIEW% The Namespace Spec currently says that Namespaces are
  196. // processed in a non-namespace-aware manner, by matching the
  197. // QName, even though there is in fact a namespace assigned to
  198. // these nodes in the DOM. If and when that changes, we will have
  199. // to consider whether we check the namespace-for-namespaces
  200. // rather than the node name.
  201. //
  202. // %TBD% Note that the DOM does not necessarily explicitly declare
  203. // all the namespaces it uses. DOM Level 3 will introduce a
  204. // namespace-normalization operation which reconciles that, and we
  205. // can request that users invoke it or otherwise ensure that the
  206. // tree is namespace-well-formed before passing the DOM to Xalan.
  207. // But if they don't, what should we do about it? We probably
  208. // don't want to alter the source DOM (and may not be able to do
  209. // so if it's read-only). The best available answer might be to
  210. // synthesize additional DTM Namespace Nodes that don't correspond
  211. // to DOM Attr Nodes.
  212. if (Node.ATTRIBUTE_NODE == type)
  213. {
  214. String name = node.getNodeName();
  215. if (name.startsWith("xmlns:") || name.equals("xmlns"))
  216. {
  217. type = DTM.NAMESPACE_NODE;
  218. }
  219. }
  220. m_nodes.addElement(node);
  221. m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
  222. m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
  223. m_prevsib.setElementAt(previousSibling,nodeIndex);
  224. m_parent.setElementAt(parentIndex,nodeIndex);
  225. if(DTM.NULL != parentIndex &&
  226. type != DTM.ATTRIBUTE_NODE &&
  227. type != DTM.NAMESPACE_NODE)
  228. {
  229. // If the DTM parent had no children, this becomes its first child.
  230. if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
  231. m_firstch.setElementAt(nodeIndex,parentIndex);
  232. }
  233. String nsURI = node.getNamespaceURI();
  234. // Deal with the difference between Namespace spec and XSLT
  235. // definitions of local name. (The former says PIs don't have
  236. // localnames; the latter says they do.)
  237. String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ?
  238. node.getNodeName() :
  239. node.getLocalName();
  240. // Hack to make DOM1 sort of work...
  241. if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
  242. && null == localName)
  243. localName = node.getNodeName(); // -sb
  244. ExpandedNameTable exnt = m_expandedNameTable;
  245. // %TBD% Nodes created with the old non-namespace-aware DOM
  246. // calls createElement() and createAttribute() will never have a
  247. // localname. That will cause their expandedNameID to be just the
  248. // nodeType... which will keep them from being matched
  249. // successfully by name. Since the DOM makes no promise that
  250. // those will participate in namespace processing, this is
  251. // officially accepted as Not Our Fault. But it might be nice to
  252. // issue a diagnostic message!
  253. if(node.getLocalName()==null &&
  254. (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
  255. {
  256. // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
  257. }
  258. int expandedNameID = (null != localName)
  259. ? exnt.getExpandedTypeID(nsURI, localName, type) :
  260. exnt.getExpandedTypeID(type);
  261. m_exptype.setElementAt(expandedNameID,nodeIndex);
  262. indexNode(expandedNameID, nodeIndex);
  263. if (DTM.NULL != previousSibling)
  264. m_nextsib.setElementAt(nodeIndex,previousSibling);
  265. // This should be done after m_exptype has been set, and probably should
  266. // always be the last thing we do
  267. if (type == DTM.NAMESPACE_NODE)
  268. declareNamespaceInContext(parentIndex,nodeIndex);
  269. return nodeIndex;
  270. }
  271. /**
  272. * Get the number of nodes that have been added.
  273. */
  274. public int getNumberOfNodes()
  275. {
  276. return m_nodes.size();
  277. }
  278. /**
  279. * This method iterates to the next node that will be added to the table.
  280. * Each call to this method adds a new node to the table, unless the end
  281. * is reached, in which case it returns null.
  282. *
  283. * @return The true if a next node is found or false if
  284. * there are no more nodes.
  285. */
  286. protected boolean nextNode()
  287. {
  288. // Non-recursive one-fetch-at-a-time depth-first traversal with
  289. // attribute/namespace nodes and white-space stripping.
  290. // Navigating the DOM is simple, navigating the DTM is simple;
  291. // keeping track of both at once is a trifle baroque but at least
  292. // we've avoided most of the special cases.
  293. if (m_nodesAreProcessed)
  294. return false;
  295. // %REVIEW% Is this local copy Really Useful from a performance
  296. // point of view? Or is this a false microoptimization?
  297. Node pos=m_pos;
  298. Node next=null;
  299. int nexttype=NULL;
  300. // Navigate DOM tree
  301. do
  302. {
  303. // Look down to first child.
  304. if (pos.hasChildNodes())
  305. {
  306. next = pos.getFirstChild();
  307. // %REVIEW% There's probably a more elegant way to skip
  308. // the doctype. (Just let it go and Suppress it?
  309. if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
  310. next=next.getNextSibling();
  311. // Push DTM context -- except for children of Entity References,
  312. // which have no DTM equivalent and cause no DTM navigation.
  313. if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
  314. {
  315. m_last_parent=m_last_kid;
  316. m_last_kid=NULL;
  317. // Whitespace-handler context stacking
  318. if(null != m_wsfilter)
  319. {
  320. short wsv =
  321. m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
  322. boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
  323. ? getShouldStripWhitespace()
  324. : (DTMWSFilter.STRIP == wsv);
  325. pushShouldStripWhitespace(shouldStrip);
  326. } // if(m_wsfilter)
  327. }
  328. }
  329. // If that fails, look up and right (but not past root!)
  330. else
  331. {
  332. if(m_last_kid!=NULL)
  333. {
  334. // Last node posted at this level had no more children
  335. // If it has _no_ children, we need to record that.
  336. if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
  337. m_firstch.setElementAt(NULL,m_last_kid);
  338. }
  339. while(m_last_parent != NULL)
  340. {
  341. // %REVIEW% There's probably a more elegant way to
  342. // skip the doctype. (Just let it go and Suppress it?
  343. next = pos.getNextSibling();
  344. if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
  345. next=next.getNextSibling();
  346. if(next!=null)
  347. break; // Found it!
  348. // No next-sibling found. Pop the DOM.
  349. pos=pos.getParentNode();
  350. if(pos==null)
  351. {
  352. // %TBD% Should never arise, but I want to be sure of that...
  353. if(JJK_DEBUG)
  354. {
  355. System.out.println("***** DOM2DTM Pop Control Flow problem");
  356. for(;;); // Freeze right here!
  357. }
  358. }
  359. // The only parents in the DTM are Elements. However,
  360. // the DOM could contain EntityReferences. If we
  361. // encounter one, pop it _without_ popping DTM.
  362. if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
  363. {
  364. // Nothing needs doing
  365. if(JJK_DEBUG)
  366. System.out.println("***** DOM2DTM popping EntRef");
  367. }
  368. else
  369. {
  370. popShouldStripWhitespace();
  371. // Fix and pop DTM
  372. if(m_last_kid==NULL)
  373. m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
  374. else
  375. m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
  376. m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
  377. }
  378. }
  379. if(m_last_parent==NULL)
  380. next=null;
  381. }
  382. if(next!=null)
  383. nexttype=next.getNodeType();
  384. // If it's an entity ref, advance past it.
  385. //
  386. // %REVIEW% Should we let this out the door and just suppress it?
  387. // More work, but simpler code, more likely to be correct, and
  388. // it doesn't happen very often. We'd get rid of the loop too.
  389. if (ENTITY_REFERENCE_NODE == nexttype)
  390. pos=next;
  391. }
  392. while (ENTITY_REFERENCE_NODE == nexttype);
  393. // Did we run out of the tree?
  394. if(next==null)
  395. {
  396. m_nextsib.setElementAt(NULL,0);
  397. m_nodesAreProcessed = true;
  398. m_pos=null;
  399. if(JJK_DEBUG)
  400. {
  401. System.out.println("***** DOM2DTM Crosscheck:");
  402. for(int i=0;i<m_nodes.size();++i)
  403. System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
  404. }
  405. return false;
  406. }
  407. // Text needs some special handling:
  408. //
  409. // DTM may skip whitespace. This is handled by the suppressNode flag, which
  410. // when true will keep the DTM node from being created.
  411. //
  412. // DTM only directly records the first DOM node of any logically-contiguous
  413. // sequence. The lastTextNode value will be set to the last node in the
  414. // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
  415. // advance next over this whole block. Should be simpler than special-casing
  416. // the above loop for "Was the logically-preceeding sibling a text node".
  417. //
  418. // Finally, a DTM node should be considered a CDATASection only if all the
  419. // contiguous text it covers is CDATASections. The first Text should
  420. // force DTM to Text.
  421. boolean suppressNode=false;
  422. Node lastTextNode=null;
  423. nexttype=next.getNodeType();
  424. // nexttype=pos.getNodeType();
  425. if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
  426. {
  427. // If filtering, initially assume we're going to suppress the node
  428. suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
  429. // Scan logically contiguous text (siblings, plus "flattening"
  430. // of entity reference boundaries).
  431. Node n=next;
  432. while(n!=null)
  433. {
  434. lastTextNode=n;
  435. // Any Text node means DTM considers it all Text
  436. if(TEXT_NODE == n.getNodeType())
  437. nexttype=TEXT_NODE;
  438. // Any non-whitespace in this sequence blocks whitespace
  439. // suppression
  440. suppressNode &=
  441. XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
  442. n=logicalNextDOMTextNode(n);
  443. }
  444. }
  445. // Special handling for PIs: Some DOMs represent the XML
  446. // Declaration as a PI. This is officially incorrect, per the DOM
  447. // spec, but is considered a "wrong but tolerable" temporary
  448. // workaround pending proper handling of these fields in DOM Level
  449. // 3. We want to recognize and reject that case.
  450. else if(PROCESSING_INSTRUCTION_NODE==nexttype)
  451. {
  452. suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
  453. }
  454. if(!suppressNode)
  455. {
  456. // Inserting next. NOTE that we force the node type; for
  457. // coalesced Text, this records CDATASections adjacent to
  458. // ordinary Text as Text.
  459. int nextindex=addNode(next,m_last_parent,m_last_kid,
  460. nexttype);
  461. m_last_kid=nextindex;
  462. if(ELEMENT_NODE == nexttype)
  463. {
  464. int attrIndex=NULL; // start with no previous sib
  465. // Process attributes _now_, rather than waiting.
  466. // Simpler control flow, makes NS cache available immediately.
  467. NamedNodeMap attrs=next.getAttributes();
  468. int attrsize=(attrs==null) ? 0 : attrs.getLength();
  469. if(attrsize>0)
  470. {
  471. for(int i=0;i<attrsize;++i)
  472. {
  473. // No need to force nodetype in this case;
  474. // addNode() will take care of switching it from
  475. // Attr to Namespace if necessary.
  476. attrIndex=addNode(attrs.item(i),
  477. nextindex,attrIndex,NULL);
  478. m_firstch.setElementAt(DTM.NULL,attrIndex);
  479. // If the xml: prefix is explicitly declared
  480. // we don't need to synthesize one.
  481. //
  482. // NOTE that XML Namespaces were not originally
  483. // defined as being namespace-aware (grrr), and
  484. // while the W3C is planning to fix this it's
  485. // safer for now to test the QName and trust the
  486. // parsers to prevent anyone from redefining the
  487. // reserved xmlns: prefix
  488. if(!m_processedFirstElement
  489. && "xmlns:xml".equals(attrs.item(i).getNodeName()))
  490. m_processedFirstElement=true;
  491. }
  492. // Terminate list of attrs, and make sure they aren't
  493. // considered children of the element
  494. } // if attrs exist
  495. if(!m_processedFirstElement)
  496. {
  497. // The DOM might not have an explicit declaration for the
  498. // implicit "xml:" prefix, but the XPath data model
  499. // requires that this appear as a Namespace Node so we
  500. // have to synthesize one. You can think of this as
  501. // being a default attribute defined by the XML
  502. // Namespaces spec rather than by the DTD.
  503. attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
  504. (Element)next,"xml",NAMESPACE_DECL_NS,
  505. makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
  506. ),
  507. nextindex,attrIndex,NULL);
  508. m_firstch.setElementAt(DTM.NULL,attrIndex);
  509. m_processedFirstElement=true;
  510. }
  511. if(attrIndex!=NULL)
  512. m_nextsib.setElementAt(DTM.NULL,attrIndex);
  513. } //if(ELEMENT_NODE)
  514. } // (if !suppressNode)
  515. // Text postprocessing: Act on values stored above
  516. if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
  517. {
  518. // %TBD% If nexttype was forced to TEXT, patch the DTM node
  519. next=lastTextNode; // Advance the DOM cursor over contiguous text
  520. }
  521. // Remember where we left off.
  522. m_pos=next;
  523. return true;
  524. }
  525. /**
  526. * Return an DOM node for the given node.
  527. *
  528. * @param nodeHandle The node ID.
  529. *
  530. * @return A node representation of the DTM node.
  531. */
  532. public Node getNode(int nodeHandle)
  533. {
  534. int identity = makeNodeIdentity(nodeHandle);
  535. return (Node) m_nodes.elementAt(identity);
  536. }
  537. /**
  538. * Get a Node from an identity index.
  539. *
  540. * NEEDSDOC @param nodeIdentity
  541. *
  542. * NEEDSDOC ($objectName$) @return
  543. */
  544. protected Node lookupNode(int nodeIdentity)
  545. {
  546. return (Node) m_nodes.elementAt(nodeIdentity);
  547. }
  548. /**
  549. * Get the next node identity value in the list, and call the iterator
  550. * if it hasn't been added yet.
  551. *
  552. * @param identity The node identity (index).
  553. * @return identity+1, or DTM.NULL.
  554. */
  555. protected int getNextNodeIdentity(int identity)
  556. {
  557. identity += 1;
  558. if (identity >= m_nodes.size())
  559. {
  560. if (!nextNode())
  561. identity = DTM.NULL;
  562. }
  563. return identity;
  564. }
  565. /**
  566. * Get the handle from a Node.
  567. * <p>%OPT% This will be pretty slow.</p>
  568. *
  569. * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
  570. * walk down DTM reconstructing path) might be considerably faster
  571. * on later nodes in large documents. That might also imply improving
  572. * this call to handle nodes which would be in this DTM but
  573. * have not yet been built, which might or might not be a Good Thing.</p>
  574. *
  575. * %REVIEW% This relies on being able to test node-identity via
  576. * object-identity. DTM2DOM proxying is a great example of a case where
  577. * that doesn't work. DOM Level 3 will provide the isSameNode() method
  578. * to fix that, but until then this is going to be flaky.
  579. *
  580. * @param node A node, which may be null.
  581. *
  582. * @return The node handle or <code>DTM.NULL</code>.
  583. */
  584. private int getHandleFromNode(Node node)
  585. {
  586. if (null != node)
  587. {
  588. int len = m_nodes.size();
  589. boolean isMore;
  590. int i = 0;
  591. do
  592. {
  593. for (; i < len; i++)
  594. {
  595. if (m_nodes.elementAt(i) == node)
  596. return makeNodeHandle(i);
  597. }
  598. isMore = nextNode();
  599. len = m_nodes.size();
  600. }
  601. while(isMore || i < len);
  602. }
  603. return DTM.NULL;
  604. }
  605. /** Get the handle from a Node. This is a more robust version of
  606. * getHandleFromNode, intended to be usable by the public.
  607. *
  608. * <p>%OPT% This will be pretty slow.</p>
  609. *
  610. * %REVIEW% This relies on being able to test node-identity via
  611. * object-identity. DTM2DOM proxying is a great example of a case where
  612. * that doesn't work. DOM Level 3 will provide the isSameNode() method
  613. * to fix that, but until then this is going to be flaky.
  614. *
  615. * @param node A node, which may be null.
  616. *
  617. * @return The node handle or <code>DTM.NULL</code>. */
  618. public int getHandleOfNode(Node node)
  619. {
  620. if (null != node)
  621. {
  622. // Is Node actually within the same document? If not, don't search!
  623. // This would be easier if m_root was always the Document node, but
  624. // we decided to allow wrapping a DTM around a subtree.
  625. if((m_root==node) ||
  626. (m_root.getNodeType()==DOCUMENT_NODE &&
  627. m_root==node.getOwnerDocument()) ||
  628. (m_root.getNodeType()!=DOCUMENT_NODE &&
  629. m_root.getOwnerDocument()==node.getOwnerDocument())
  630. )
  631. {
  632. // If node _is_ in m_root's tree, find its handle
  633. //
  634. // %OPT% This check may be improved significantly when DOM
  635. // Level 3 nodeKey and relative-order tests become
  636. // available!
  637. for(Node cursor=node;
  638. cursor!=null;
  639. cursor=
  640. (cursor.getNodeType()!=ATTRIBUTE_NODE)
  641. ? cursor.getParentNode()
  642. : ((org.w3c.dom.Attr)cursor).getOwnerElement())
  643. {
  644. if(cursor==m_root)
  645. // We know this node; find its handle.
  646. return getHandleFromNode(node);
  647. } // for ancestors of node
  648. } // if node and m_root in same Document
  649. } // if node!=null
  650. return DTM.NULL;
  651. }
  652. /**
  653. * Retrieves an attribute node by by qualified name and namespace URI.
  654. *
  655. * @param nodeHandle int Handle of the node upon which to look up this attribute..
  656. * @param namespaceURI The namespace URI of the attribute to
  657. * retrieve, or null.
  658. * @param name The local name of the attribute to
  659. * retrieve.
  660. * @return The attribute node handle with the specified name (
  661. * <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
  662. * attribute.
  663. */
  664. public int getAttributeNode(int nodeHandle, String namespaceURI,
  665. String name)
  666. {
  667. // %OPT% This is probably slower than it needs to be.
  668. if (null == namespaceURI)
  669. namespaceURI = "";
  670. int type = getNodeType(nodeHandle);
  671. if (DTM.ELEMENT_NODE == type)
  672. {
  673. // Assume that attributes immediately follow the element.
  674. int identity = makeNodeIdentity(nodeHandle);
  675. while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
  676. {
  677. // Assume this can not be null.
  678. type = _type(identity);
  679. // %REVIEW%
  680. // Should namespace nodes be retrievable DOM-style as attrs?
  681. // If not we need a separate function... which may be desirable
  682. // architecturally, but which is ugly from a code point of view.
  683. // (If we REALLY insist on it, this code should become a subroutine
  684. // of both -- retrieve the node, then test if the type matches
  685. // what you're looking for.)
  686. if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
  687. {
  688. Node node = lookupNode(identity);
  689. String nodeuri = node.getNamespaceURI();
  690. if (null == nodeuri)
  691. nodeuri = "";
  692. String nodelocalname = node.getLocalName();
  693. if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
  694. return makeNodeHandle(identity);
  695. }
  696. else // if (DTM.NAMESPACE_NODE != type)
  697. {
  698. break;
  699. }
  700. }
  701. }
  702. return DTM.NULL;
  703. }
  704. /**
  705. * Get the string-value of a node as a String object
  706. * (see http://www.w3.org/TR/xpath#data-model
  707. * for the definition of a node's string-value).
  708. *
  709. * @param nodeHandle The node ID.
  710. *
  711. * @return A string object that represents the string-value of the given node.
  712. */
  713. public XMLString getStringValue(int nodeHandle)
  714. {
  715. int type = getNodeType(nodeHandle);
  716. Node node = getNode(nodeHandle);
  717. // %TBD% If an element only has one text node, we should just use it
  718. // directly.
  719. if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
  720. || DTM.DOCUMENT_FRAGMENT_NODE == type)
  721. {
  722. FastStringBuffer buf = StringBufferPool.get();
  723. String s;
  724. try
  725. {
  726. getNodeData(node, buf);
  727. s = (buf.length() > 0) ? buf.toString() : "";
  728. }
  729. finally
  730. {
  731. StringBufferPool.free(buf);
  732. }
  733. return m_xstrf.newstr( s );
  734. }
  735. else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
  736. {
  737. // If this is a DTM text node, it may be made of multiple DOM text
  738. // nodes -- including navigating into Entity References. DOM2DTM
  739. // records the first node in the sequence and requires that we
  740. // pick up the others when we retrieve the DTM node's value.
  741. //
  742. // %REVIEW% DOM Level 3 is expected to add a "whole text"
  743. // retrieval method which performs this function for us.
  744. FastStringBuffer buf = StringBufferPool.get();
  745. while(node!=null)
  746. {
  747. buf.append(node.getNodeValue());
  748. node=logicalNextDOMTextNode(node);
  749. }
  750. String s=(buf.length() > 0) ? buf.toString() : "";
  751. StringBufferPool.free(buf);
  752. return m_xstrf.newstr( s );
  753. }
  754. else
  755. return m_xstrf.newstr( node.getNodeValue() );
  756. }
  757. /**
  758. * Determine if the string-value of a node is whitespace
  759. *
  760. * @param nodeHandle The node Handle.
  761. *
  762. * @return Return true if the given node is whitespace.
  763. */
  764. public boolean isWhitespace(int nodeHandle)
  765. {
  766. int type = getNodeType(nodeHandle);
  767. Node node = getNode(nodeHandle);
  768. if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
  769. {
  770. // If this is a DTM text node, it may be made of multiple DOM text
  771. // nodes -- including navigating into Entity References. DOM2DTM
  772. // records the first node in the sequence and requires that we
  773. // pick up the others when we retrieve the DTM node's value.
  774. //
  775. // %REVIEW% DOM Level 3 is expected to add a "whole text"
  776. // retrieval method which performs this function for us.
  777. FastStringBuffer buf = StringBufferPool.get();
  778. while(node!=null)
  779. {
  780. buf.append(node.getNodeValue());
  781. node=logicalNextDOMTextNode(node);
  782. }
  783. boolean b = buf.isWhitespace(0, buf.length());
  784. StringBufferPool.free(buf);
  785. return b;
  786. }
  787. return false;
  788. }
  789. /**
  790. * Retrieve the text content of a DOM subtree, appending it into a
  791. * user-supplied FastStringBuffer object. Note that attributes are
  792. * not considered part of the content of an element.
  793. * <p>
  794. * There are open questions regarding whitespace stripping.
  795. * Currently we make no special effort in that regard, since the standard
  796. * DOM doesn't yet provide DTD-based information to distinguish
  797. * whitespace-in-element-context from genuine #PCDATA. Note that we
  798. * should probably also consider xml:space if/when we address this.
  799. * DOM Level 3 may solve the problem for us.
  800. * <p>
  801. * %REVIEW% Actually, since this method operates on the DOM side of the
  802. * fence rather than the DTM side, it SHOULDN'T do
  803. * any special handling. The DOM does what the DOM does; if you want
  804. * DTM-level abstractions, use DTM-level methods.
  805. *
  806. * @param node Node whose subtree is to be walked, gathering the
  807. * contents of all Text or CDATASection nodes.
  808. * @param buf FastStringBuffer into which the contents of the text
  809. * nodes are to be concatenated.
  810. */
  811. protected static void getNodeData(Node node, FastStringBuffer buf)
  812. {
  813. switch (node.getNodeType())
  814. {
  815. case Node.DOCUMENT_FRAGMENT_NODE :
  816. case Node.DOCUMENT_NODE :
  817. case Node.ELEMENT_NODE :
  818. {
  819. for (Node child = node.getFirstChild(); null != child;
  820. child = child.getNextSibling())
  821. {
  822. getNodeData(child, buf);
  823. }
  824. }
  825. break;
  826. case Node.TEXT_NODE :
  827. case Node.CDATA_SECTION_NODE :
  828. case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node
  829. buf.append(node.getNodeValue());
  830. break;
  831. case Node.PROCESSING_INSTRUCTION_NODE :
  832. // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
  833. break;
  834. default :
  835. // ignore
  836. break;
  837. }
  838. }
  839. /**
  840. * Given a node handle, return its DOM-style node name. This will
  841. * include names such as #text or #document.
  842. *
  843. * @param nodeHandle the id of the node.
  844. * @return String Name of this node, which may be an empty string.
  845. * %REVIEW% Document when empty string is possible...
  846. * %REVIEW-COMMENT% It should never be empty, should it?
  847. */
  848. public String getNodeName(int nodeHandle)
  849. {
  850. Node node = getNode(nodeHandle);
  851. // Assume non-null.
  852. return node.getNodeName();
  853. }
  854. /**
  855. * Given a node handle, return the XPath node name. This should be
  856. * the name as described by the XPath data model, NOT the DOM-style
  857. * name.
  858. *
  859. * @param nodeHandle the id of the node.
  860. * @return String Name of this node, which may be an empty string.
  861. */
  862. public String getNodeNameX(int nodeHandle)
  863. {
  864. String name;
  865. short type = getNodeType(nodeHandle);
  866. switch (type)
  867. {
  868. case DTM.NAMESPACE_NODE :
  869. {
  870. Node node = getNode(nodeHandle);
  871. // assume not null.
  872. name = node.getNodeName();
  873. if(name.startsWith("xmlns:"))
  874. {
  875. name = QName.getLocalPart(name);
  876. }
  877. else if(name.equals("xmlns"))
  878. {
  879. name = "";
  880. }
  881. }
  882. break;
  883. case DTM.ATTRIBUTE_NODE :
  884. case DTM.ELEMENT_NODE :
  885. case DTM.ENTITY_REFERENCE_NODE :
  886. case DTM.PROCESSING_INSTRUCTION_NODE :
  887. {
  888. Node node = getNode(nodeHandle);
  889. // assume not null.
  890. name = node.getNodeName();
  891. }
  892. break;
  893. default :
  894. name = "";
  895. }
  896. return name;
  897. }
  898. /**
  899. * Given a node handle, return its XPath-style localname.
  900. * (As defined in Namespaces, this is the portion of the name after any
  901. * colon character).
  902. *
  903. * @param nodeHandle the id of the node.
  904. * @return String Local name of this node.
  905. */
  906. public String getLocalName(int nodeHandle)
  907. {
  908. if(JJK_NEWCODE)
  909. {
  910. int id=makeNodeIdentity(nodeHandle);
  911. if(NULL==id) return null;
  912. Node newnode=(Node)m_nodes.elementAt(id);
  913. String newname=newnode.getLocalName();
  914. if (null == newname)
  915. {
  916. // XSLT treats PIs, and possibly other things, as having QNames.
  917. String qname = newnode.getNodeName();
  918. if('#'==qname.charAt(0))
  919. {
  920. // Match old default for this function
  921. // This conversion may or may not be necessary
  922. newname="";
  923. }
  924. else
  925. {
  926. int index = qname.indexOf(':');
  927. newname = (index < 0) ? qname : qname.substring(index + 1);
  928. }
  929. }
  930. return newname;
  931. }
  932. else
  933. {
  934. String name;
  935. short type = getNodeType(nodeHandle);
  936. switch (type)
  937. {
  938. case DTM.ATTRIBUTE_NODE :
  939. case DTM.ELEMENT_NODE :
  940. case DTM.ENTITY_REFERENCE_NODE :
  941. case DTM.NAMESPACE_NODE :
  942. case DTM.PROCESSING_INSTRUCTION_NODE :
  943. {
  944. Node node = getNode(nodeHandle);
  945. // assume not null.
  946. name = node.getLocalName();
  947. if (null == name)
  948. {
  949. String qname = node.getNodeName();
  950. int index = qname.indexOf(':');
  951. name = (index < 0) ? qname : qname.substring(index + 1);
  952. }
  953. }
  954. break;
  955. default :
  956. name = "";
  957. }
  958. return name;
  959. }
  960. }
  961. /**
  962. * Given a namespace handle, return the prefix that the namespace decl is
  963. * mapping.
  964. * Given a node handle, return the prefix used to map to the namespace.
  965. *
  966. * <p> %REVIEW% Are you sure you want "" for no prefix? </p>
  967. * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb </p>
  968. *
  969. * @param nodeHandle the id of the node.
  970. * @return String prefix of this node's name, or "" if no explicit
  971. * namespace prefix was given.
  972. */
  973. public String getPrefix(int nodeHandle)
  974. {
  975. String prefix;
  976. short type = getNodeType(nodeHandle);
  977. switch (type)
  978. {
  979. case DTM.NAMESPACE_NODE :
  980. {
  981. Node node = getNode(nodeHandle);
  982. // assume not null.
  983. String qname = node.getNodeName();
  984. int index = qname.indexOf(':');
  985. prefix = (index < 0) ? "" : qname.substring(index + 1);
  986. }
  987. break;
  988. case DTM.ATTRIBUTE_NODE :
  989. case DTM.ELEMENT_NODE :
  990. {
  991. Node node = getNode(nodeHandle);
  992. // assume not null.
  993. String qname = node.getNodeName();
  994. int index = qname.indexOf(':');
  995. prefix = (index < 0) ? "" : qname.substring(0, index);
  996. }
  997. break;
  998. default :
  999. prefix = "";
  1000. }
  1001. return prefix;
  1002. }
  1003. /**
  1004. * Given a node handle, return its DOM-style namespace URI
  1005. * (As defined in Namespaces, this is the declared URI which this node's
  1006. * prefix -- or default in lieu thereof -- was mapped to.)
  1007. *
  1008. * <p>%REVIEW% Null or ""? -sb</p>
  1009. *
  1010. * @param nodeHandle the id of the node.
  1011. * @return String URI value of this node's namespace, or null if no
  1012. * namespace was resolved.
  1013. */
  1014. public String getNamespaceURI(int nodeHandle)
  1015. {
  1016. if(JJK_NEWCODE)
  1017. {
  1018. int id=makeNodeIdentity(nodeHandle);
  1019. if(id==NULL) return null;
  1020. Node node=(Node)m_nodes.elementAt(id);
  1021. return node.getNamespaceURI();
  1022. }
  1023. else
  1024. {
  1025. String nsuri;
  1026. short type = getNodeType(nodeHandle);
  1027. switch (type)
  1028. {
  1029. case DTM.ATTRIBUTE_NODE :
  1030. case DTM.ELEMENT_NODE :
  1031. case DTM.ENTITY_REFERENCE_NODE :
  1032. case DTM.NAMESPACE_NODE :
  1033. case DTM.PROCESSING_INSTRUCTION_NODE :
  1034. {
  1035. Node node = getNode(nodeHandle);
  1036. // assume not null.
  1037. nsuri = node.getNamespaceURI();
  1038. // %TBD% Handle DOM1?
  1039. }
  1040. break;
  1041. default :
  1042. nsuri = null;
  1043. }
  1044. return nsuri;
  1045. }
  1046. }
  1047. /** Utility function: Given a DOM Text node, determine whether it is
  1048. * logically followed by another Text or CDATASection node. This may
  1049. * involve traversing into Entity References.
  1050. *
  1051. * %REVIEW% DOM Level 3 is expected to add functionality which may
  1052. * allow us to retire this.
  1053. */
  1054. private Node logicalNextDOMTextNode(Node n)
  1055. {
  1056. Node p=n.getNextSibling();
  1057. if(p==null)
  1058. {
  1059. // Walk out of any EntityReferenceNodes that ended with text
  1060. for(n=n.getParentNode();
  1061. n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
  1062. n=n.getParentNode())
  1063. {
  1064. p=n.getNextSibling();
  1065. if(p!=null)
  1066. break;
  1067. }
  1068. }
  1069. n=p;
  1070. while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
  1071. {
  1072. // Walk into any EntityReferenceNodes that start with text
  1073. if(n.hasChildNodes())
  1074. n=n.getFirstChild();
  1075. else
  1076. n=n.getNextSibling();
  1077. }
  1078. if(n!=null)
  1079. {
  1080. // Found a logical next sibling. Is it text?
  1081. int ntype=n.getNodeType();
  1082. if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
  1083. n=null;
  1084. }
  1085. return n;
  1086. }
  1087. /**
  1088. * Given a node handle, return its node value. This is mostly
  1089. * as defined by the DOM, but may ignore some conveniences.
  1090. * <p>
  1091. *
  1092. * @param nodeHandle The node id.
  1093. * @return String Value of this node, or null if not
  1094. * meaningful for this node type.
  1095. */
  1096. public String getNodeValue(int nodeHandle)
  1097. {
  1098. // The _type(nodeHandle) call was taking the lion's share of our
  1099. // time, and was wrong anyway since it wasn't coverting handle to
  1100. // identity. Inlined it.
  1101. int type = _exptype(makeNodeIdentity(nodeHandle));
  1102. type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
  1103. if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
  1104. return getNode(nodeHandle).getNodeValue();
  1105. // If this is a DTM text node, it may be made of multiple DOM text
  1106. // nodes -- including navigating into Entity References. DOM2DTM
  1107. // records the first node in the sequence and requires that we
  1108. // pick up the others when we retrieve the DTM node's value.
  1109. //
  1110. // %REVIEW% DOM Level 3 is expected to add a "whole text"
  1111. // retrieval method which performs this function for us.
  1112. Node node = getNode(nodeHandle);
  1113. Node n=logicalNextDOMTextNode(node);
  1114. if(n==null)
  1115. return node.getNodeValue();
  1116. FastStringBuffer buf = StringBufferPool.get();
  1117. buf.append(node.getNodeValue());
  1118. while(n!=null)
  1119. {
  1120. buf.append(n.getNodeValue());
  1121. n=logicalNextDOMTextNode(n);
  1122. }
  1123. String s = (buf.length() > 0) ? buf.toString() : "";
  1124. StringBufferPool.free(buf);
  1125. return s;
  1126. }
  1127. /**
  1128. * A document type declaration information item has the following properties:
  1129. *
  1130. * 1. [system identifier] The system identifier of the external subset, if
  1131. * it exists. Otherwise this property has no value.
  1132. *
  1133. * @return the system identifier String object, or null if there is none.
  1134. */
  1135. public String getDocumentTypeDeclarationSystemIdentifier()
  1136. {
  1137. Document doc;
  1138. if (m_root.getNodeType() == Node.DOCUMENT_NODE)
  1139. doc = (Document) m_root;
  1140. else
  1141. doc = m_root.getOwnerDocument();
  1142. if (null != doc)
  1143. {
  1144. DocumentType dtd = doc.getDoctype();
  1145. if (null != dtd)
  1146. {
  1147. return dtd.getSystemId();
  1148. }
  1149. }
  1150. return null;
  1151. }
  1152. /**
  1153. * Return the public identifier of the external subset,
  1154. * normalized as described in 4.2.2 External Entities [XML]. If there is
  1155. * no external subset or if it has no public identifier, this property
  1156. * has no value.
  1157. *
  1158. * @param the document type declaration handle
  1159. *
  1160. * @return the public identifier String object, or null if there is none.
  1161. */
  1162. public String getDocumentTypeDeclarationPublicIdentifier()
  1163. {
  1164. Document doc;
  1165. if (m_root.getNodeType() == Node.DOCUMENT_NODE)
  1166. doc = (Document) m_root;
  1167. else
  1168. doc = m_root.getOwnerDocument();
  1169. if (null != doc)
  1170. {
  1171. DocumentType dtd = doc.getDoctype();
  1172. if (null != dtd)
  1173. {
  1174. return dtd.getPublicId();
  1175. }
  1176. }
  1177. return null;
  1178. }
  1179. /**
  1180. * Returns the <code>Element</code> whose <code>ID</code> is given by
  1181. * <code>elementId</code>. If no such element exists, returns
  1182. * <code>DTM.NULL</code>. Behavior is not defined if more than one element
  1183. * has this <code>ID</code>. Attributes (including those
  1184. * with the name "ID") are not of type ID unless so defined by DTD/Schema
  1185. * information available to the DTM implementation.
  1186. * Implementations that do not know whether attributes are of type ID or
  1187. * not are expected to return <code>DTM.NULL</code>.
  1188. *
  1189. * <p>%REVIEW% Presumably IDs are still scoped to a single document,
  1190. * and this operation searches only within a single document, right?
  1191. * Wouldn't want collisions between DTMs in the same process.</p>
  1192. *
  1193. * @param elementId The unique <code>id</code> value for an element.
  1194. * @return The handle of the matching element.
  1195. */
  1196. public int getElementById(String elementId)
  1197. {
  1198. Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
  1199. ? (Document) m_root : m_root.getOwnerDocument();
  1200. if(null != doc)
  1201. {
  1202. Node elem = doc.getElementById(elementId);
  1203. if(null != elem)
  1204. {
  1205. int elemHandle = getHandleFromNode(elem);
  1206. if(DTM.NULL == elemHandle)
  1207. {
  1208. int identity = m_nodes.size()-1;
  1209. while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
  1210. {
  1211. Node node = getNode(identity);
  1212. if(node == elem)
  1213. {
  1214. elemHandle = getHandleFromNode(elem);
  1215. break;
  1216. }
  1217. }
  1218. }
  1219. return elemHandle;
  1220. }
  1221. }
  1222. return DTM.NULL;
  1223. }
  1224. /**
  1225. * The getUnparsedEntityURI function returns the URI of the unparsed
  1226. * entity with the specified name in the same document as the context
  1227. * node (see [3.3 Unparsed Entities]). It returns the empty string if
  1228. * there is no such entity.
  1229. * <p>
  1230. * XML processors may choose to use the System Identifier (if one
  1231. * is provided) to resolve the entity, rather than the URI in the
  1232. * Public Identifier. The details are dependent on the processor, and
  1233. * we would have to support some form of plug-in resolver to handle
  1234. * this properly. Currently, we simply return the System Identifier if
  1235. * present, and hope that it a usable URI or that our caller can
  1236. * map it to one.
  1237. * TODO: Resolve Public Identifiers... or consider changing function name.
  1238. * <p>
  1239. * If we find a relative URI
  1240. * reference, XML expects it to be resolved in terms of the base URI
  1241. * of the document. The DOM doesn't do that for us, and it isn't
  1242. * entirely clear whether that should be done here; currently that's
  1243. * pushed up to a higher level of our application. (Note that DOM Level
  1244. * 1 didn't store the document's base URI.)
  1245. * TODO: Consider resolving Relative URIs.
  1246. * <p>
  1247. * (The DOM's statement that "An XML processor may choose to
  1248. * completely expand entities before the structure model is passed
  1249. * to the DOM" refers only to parsed entities, not unparsed, and hence
  1250. * doesn't affect this function.)
  1251. *
  1252. * @param name A string containing the Entity Name of the unparsed
  1253. * entity.
  1254. *
  1255. * @return String containing the URI of the Unparsed Entity, or an
  1256. * empty string if no such entity exists.
  1257. */
  1258. public String getUnparsedEntityURI(String name)
  1259. {
  1260. String url = "";
  1261. Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
  1262. ? (Document) m_root : m_root.getOwnerDocument();
  1263. if (null != doc)
  1264. {
  1265. DocumentType doctype = doc.getDoctype();
  1266. if (null != doctype)
  1267. {
  1268. NamedNodeMap entities = doctype.getEntities();
  1269. if(null == entities)
  1270. return url;
  1271. Entity entity = (Entity) entities.getNamedItem(name);
  1272. if(null == entity)
  1273. return url;
  1274. String notationName = entity.getNotationName();
  1275. if (null != notationName) // then it's unparsed
  1276. {
  1277. // The draft says: "The XSLT processor may use the public
  1278. // identifier to generate a URI for the entity instead of the URI
  1279. // specified in the system identifier. If the XSLT processor does
  1280. // not use the public identifier to generate the URI, it must use
  1281. // the system identifier; if the system identifier is a relative
  1282. // URI, it must be resolved into an absolute URI using the URI of
  1283. // the resource containing the entity declaration as the base
  1284. // URI [RFC2396]."
  1285. // So I'm falling a bit short here.
  1286. url = entity.getSystemId();
  1287. if (null == url)
  1288. {
  1289. url = entity.getPublicId();
  1290. }
  1291. else
  1292. {
  1293. // This should be resolved to an absolute URL, but that's hard
  1294. // to do from here.
  1295. }
  1296. }
  1297. }
  1298. }
  1299. return url;
  1300. }
  1301. /**
  1302. * 5. [specified] A flag indicating whether this attribute was actually
  1303. * specified in the start-tag of its element, or was defaulted from the
  1304. * DTD.
  1305. *
  1306. * @param the attribute handle
  1307. *
  1308. * NEEDSDOC @param attributeHandle
  1309. * @return <code>true</code> if the attribute was specified;
  1310. * <code>false</code> if it was defaulted.
  1311. */
  1312. public boolean isAttributeSpecified(int attributeHandle)
  1313. {
  1314. int type = getNodeType(attributeHandle);
  1315. if (DTM.ATTRIBUTE_NODE == type)
  1316. {
  1317. Attr attr = (Attr)getNode(attributeHandle);
  1318. return attr.getSpecified();
  1319. }
  1320. return false;
  1321. }
  1322. /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
  1323. * we're wrapped around an existing DOM.
  1324. *
  1325. * @param source The IncrementalSAXSource that we want to recieve events from
  1326. * on demand.
  1327. */
  1328. public void setIncrementalSAXSource(IncrementalSAXSource source)
  1329. {
  1330. }
  1331. /** getContentHandler returns "our SAX builder" -- the thing that
  1332. * someone else should send SAX events to in order to extend this
  1333. * DTM model.
  1334. *
  1335. * @return null if this model doesn't respond to SAX events,
  1336. * "this" if the DTM object has a built-in SAX ContentHandler,
  1337. * the IncrmentalSAXSource if we're bound to one and should receive
  1338. * the SAX stream via it for incremental build purposes...
  1339. * */
  1340. public org.xml.sax.ContentHandler getContentHandler()
  1341. {
  1342. return null;
  1343. }
  1344. /**
  1345. * Return this DTM's lexical handler.
  1346. *
  1347. * %REVIEW% Should this return null if constrution already done/begun?
  1348. *
  1349. * @return null if this model doesn't respond to lexical SAX events,
  1350. * "this" if the DTM object has a built-in SAX ContentHandler,
  1351. * the IncrementalSAXSource if we're bound to one and should receive
  1352. * the SAX stream via it for incremental build purposes...
  1353. */
  1354. public org.xml.sax.ext.LexicalHandler getLexicalHandler()
  1355. {
  1356. return null;
  1357. }
  1358. /**
  1359. * Return this DTM's EntityResolver.
  1360. *
  1361. * @return null if this model doesn't respond to SAX entity ref events.
  1362. */
  1363. public org.xml.sax.EntityResolver getEntityResolver()
  1364. {
  1365. return null;
  1366. }
  1367. /**
  1368. * Return this DTM's DTDHandler.
  1369. *
  1370. * @return null if this model doesn't respond to SAX dtd events.
  1371. */
  1372. public org.xml.sax.DTDHandler getDTDHandler()
  1373. {
  1374. return null;
  1375. }
  1376. /**
  1377. * Return this DTM's ErrorHandler.
  1378. *
  1379. * @return null if this model doesn't respond to SAX error events.
  1380. */
  1381. public org.xml.sax.ErrorHandler getErrorHandler()
  1382. {
  1383. return null;
  1384. }
  1385. /**
  1386. * Return this DTM's DeclHandler.
  1387. *
  1388. * @return null if this model doesn't respond to SAX Decl events.
  1389. */
  1390. public org.xml.sax.ext.DeclHandler getDeclHandler()
  1391. {
  1392. return null;
  1393. }
  1394. /** @return true iff we're building this model incrementally (eg
  1395. * we're partnered with a IncrementalSAXSource) and thus require that the
  1396. * transformation and the parse run simultaneously. Guidance to the
  1397. * DTMManager.
  1398. * */
  1399. public boolean needsTwoThreads()
  1400. {
  1401. return false;
  1402. }
  1403. // ========== Direct SAX Dispatch, for optimization purposes ========
  1404. /**
  1405. * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
  1406. * of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
  1407. * the definition of <CODE>S</CODE></A> for details.
  1408. * @param ch Character to check as XML whitespace.
  1409. * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
  1410. */
  1411. private static boolean isSpace(char ch)
  1412. {
  1413. return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now.
  1414. }
  1415. /**
  1416. * Directly call the
  1417. * characters method on the passed ContentHandler for the
  1418. * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
  1419. * for the definition of a node's string-value). Multiple calls to the
  1420. * ContentHandler's characters methods may well occur for a single call to
  1421. * this method.
  1422. *
  1423. * @param nodeHandle The node ID.
  1424. * @param ch A non-null reference to a ContentHandler.
  1425. *
  1426. * @throws org.xml.sax.SAXException
  1427. */
  1428. public void dispatchCharactersEvents(
  1429. int nodeHandle, org.xml.sax.ContentHandler ch,
  1430. boolean normalize)
  1431. throws org.xml.sax.SAXException
  1432. {
  1433. if(normalize)
  1434. {
  1435. XMLString str = getStringValue(nodeHandle);
  1436. str = str.fixWhiteSpace(true, true, false);
  1437. str.dispatchCharactersEvents(ch);
  1438. }
  1439. else
  1440. {
  1441. int type = getNodeType(nodeHandle);
  1442. Node node = getNode(nodeHandle);
  1443. dispatchNodeData(node, ch, 0);
  1444. // Text coalition -- a DTM text node may represent multiple
  1445. // DOM nodes.
  1446. if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
  1447. {
  1448. while( null != (node=logicalNextDOMTextNode(node)) )
  1449. {
  1450. dispatchNodeData(node, ch, 0);
  1451. }
  1452. }
  1453. }
  1454. }
  1455. /**
  1456. * Retrieve the text content of a DOM subtree, appending it into a
  1457. * user-supplied FastStringBuffer object. Note that attributes are
  1458. * not considered part of the content of an element.
  1459. * <p>
  1460. * There are open questions regarding whitespace stripping.
  1461. * Currently we make no special effort in that regard, since the standard
  1462. * DOM doesn't yet provide DTD-based information to distinguish
  1463. * whitespace-in-element-context from genuine #PCDATA. Note that we
  1464. * should probably also consider xml:space if/when we address this.
  1465. * DOM Level 3 may solve the problem for us.
  1466. * <p>
  1467. * %REVIEW% Note that as a DOM-level operation, it can be argued that this
  1468. * routine _shouldn't_ perform any processing beyond what the DOM already
  1469. * does, and that whitespace stripping and so on belong at the DTM level.
  1470. * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
  1471. *
  1472. * @param node Node whose subtree is to be walked, gathering the
  1473. * contents of all Text or CDATASection nodes.
  1474. * @param buf FastStringBuffer into which the contents of the text
  1475. * nodes are to be concatenated.
  1476. */
  1477. protected static void dispatchNodeData(Node node,
  1478. org.xml.sax.ContentHandler ch,
  1479. int depth)
  1480. throws org.xml.sax.SAXException
  1481. {
  1482. switch (node.getNodeType())
  1483. {
  1484. case Node.DOCUMENT_FRAGMENT_NODE :
  1485. case Node.DOCUMENT_NODE :
  1486. case Node.ELEMENT_NODE :
  1487. {
  1488. for (Node child = node.getFirstChild(); null != child;
  1489. child = child.getNextSibling())
  1490. {
  1491. dispatchNodeData(child, ch, depth+1);
  1492. }
  1493. }
  1494. break;
  1495. case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
  1496. case Node.COMMENT_NODE :
  1497. if(0 != depth)
  1498. break;
  1499. // NOTE: Because this operation works in the DOM space, it does _not_ attempt
  1500. // to perform Text Coalition. That should only be done in DTM space.
  1501. case Node.TEXT_NODE :
  1502. case Node.CDATA_SECTION_NODE :
  1503. case Node.ATTRIBUTE_NODE :
  1504. String str = node.getNodeValue();
  1505. if(ch instanceof CharacterNodeHandler)
  1506. {
  1507. ((CharacterNodeHandler)ch).characters(node);
  1508. }
  1509. else
  1510. {
  1511. ch.characters(str.toCharArray(), 0, str.length());
  1512. }
  1513. break;
  1514. // /* case Node.PROCESSING_INSTRUCTION_NODE :
  1515. // // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
  1516. // break; */
  1517. default :
  1518. // ignore
  1519. break;
  1520. }
  1521. }
  1522. TreeWalker m_walker = new TreeWalker(null);
  1523. /**
  1524. * Directly create SAX parser events from a subtree.
  1525. *
  1526. * @param nodeHandle The node ID.
  1527. * @param ch A non-null reference to a ContentHandler.
  1528. *
  1529. * @throws org.xml.sax.SAXException
  1530. */
  1531. public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
  1532. throws org.xml.sax.SAXException
  1533. {
  1534. TreeWalker treeWalker = m_walker;
  1535. ContentHandler prevCH = treeWalker.getContentHandler();
  1536. if(null != prevCH)
  1537. {
  1538. treeWalker = new TreeWalker(null);
  1539. }
  1540. treeWalker.setContentHandler(ch);
  1541. try
  1542. {
  1543. Node node = getNode(nodeHandle);
  1544. treeWalker.traverse(node);
  1545. }
  1546. finally
  1547. {
  1548. treeWalker.setContentHandler(null);
  1549. }
  1550. }
  1551. public interface CharacterNodeHandler
  1552. {
  1553. public void characters(Node node)
  1554. throws org.xml.sax.SAXException;
  1555. }
  1556. /**
  1557. * For the moment all the run time properties are ignored by this
  1558. * class.
  1559. *
  1560. * @param property a <code>String</code> value
  1561. * @param value an <code>Object</code> value
  1562. */
  1563. public void setProperty(String property, Object value)
  1564. {
  1565. }
  1566. /**
  1567. * No source information is available for DOM2DTM, so return
  1568. * <code>null</code> here.
  1569. *
  1570. * @param node an <code>int</code> value
  1571. * @return null
  1572. */
  1573. public SourceLocator getSourceLocatorFor(int node)
  1574. {
  1575. return null;
  1576. }
  1577. }