1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xalan" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, Lotus
  53. * Development Corporation., http://www.lotus.com. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package org.apache.xpath;
  58. import java.util.Hashtable;
  59. import java.util.Vector;
  60. import org.w3c.dom.*;
  61. import javax.xml.transform.TransformerException;
  62. import org.apache.xml.utils.NSInfo;
  63. import org.apache.xml.utils.QName;
  64. import org.apache.xml.utils.StringBufferPool;
  65. import org.apache.xml.utils.FastStringBuffer;
  66. import org.apache.xalan.res.XSLMessages;
  67. import org.apache.xpath.res.XPATHErrorResources;
  68. // Imported JAVA API for XML Parsing 1.0 classes
  69. import javax.xml.parsers.DocumentBuilder;
  70. import javax.xml.parsers.DocumentBuilderFactory;
  71. import javax.xml.parsers.ParserConfigurationException;
  72. import org.apache.xml.dtm.ref.DTMNodeProxy;
  73. /**
  74. * @deprecated Since the introduction of the DTM, this class will be removed.
  75. * This class provides a front-end to DOM implementations, providing
  76. * a number of utility functions that either aren't yet standardized
  77. * by the DOM spec or that are defined in optional DOM modules and
  78. * hence may not be present in all DOMs.
  79. */
  80. public class DOMHelper
  81. {
  82. /**
  83. * DOM Level 1 did not have a standard mechanism for creating a new
  84. * Document object. This function provides a DOM-implementation-independent
  85. * abstraction for that for that concept. It's typically used when
  86. * outputting a new DOM as the result of an operation.
  87. * <p>
  88. * TODO: This isn't directly compatable with DOM Level 2.
  89. * The Level 2 createDocument call also creates the root
  90. * element, and thus requires that you know what that element will be
  91. * before creating the Document. We should think about whether we want
  92. * to change this code, and the callers, so we can use the DOM's own
  93. * method. (It's also possible that DOM Level 3 may relax this
  94. * sequence, but you may give up some intelligence in the DOM by
  95. * doing so; the intent was that knowing the document type and root
  96. * element might let the DOM automatically switch to a specialized
  97. * subclass for particular kinds of documents.)
  98. *
  99. * @return The newly created DOM Document object, with no children, or
  100. * null if we can't find a DOM implementation that permits creating
  101. * new empty Documents.
  102. */
  103. public static Document createDocument()
  104. {
  105. try
  106. {
  107. // Use an implementation of the JAVA API for XML Parsing 1.0 to
  108. // create a DOM Document node to contain the result.
  109. DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
  110. dfactory.setNamespaceAware(true);
  111. dfactory.setValidating(true);
  112. DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
  113. Document outNode = docBuilder.newDocument();
  114. return outNode;
  115. }
  116. catch (ParserConfigurationException pce)
  117. {
  118. throw new RuntimeException(
  119. XSLMessages.createXPATHMessage(
  120. XPATHErrorResources.ER_CREATEDOCUMENT_NOT_SUPPORTED, null)); //"createDocument() not supported in XPathContext!");
  121. // return null;
  122. }
  123. }
  124. /**
  125. * <meta name="usage" content="advanced"/>
  126. * Tells, through the combination of the default-space attribute
  127. * on xsl:stylesheet, xsl:strip-space, xsl:preserve-space, and the
  128. * xml:space attribute, whether or not extra whitespace should be stripped
  129. * from the node. Literal elements from template elements should
  130. * <em>not</em> be tested with this function.
  131. * @param textNode A text node from the source tree.
  132. * @return true if the text node should be stripped of extra whitespace.
  133. *
  134. * @throws javax.xml.transform.TransformerException
  135. */
  136. public boolean shouldStripSourceNode(Node textNode)
  137. throws javax.xml.transform.TransformerException
  138. {
  139. // return (null == m_envSupport) ? false : m_envSupport.shouldStripSourceNode(textNode);
  140. return false;
  141. }
  142. /**
  143. * Supports the XPath function GenerateID by returning a unique
  144. * identifier string for any given DOM Node.
  145. * <p>
  146. * Warning: The base implementation uses the Node object's hashCode(),
  147. * which is NOT guaranteed to be unique. If that method hasn't been
  148. * overridden in this DOM ipmlementation, most Java implementions will
  149. * derive it from the object's address and should be OK... but if
  150. * your DOM uses a different definition of hashCode (eg hashing the
  151. * contents of the subtree), or if your DOM may have multiple objects
  152. * that represent a single Node in the data structure (eg via proxying),
  153. * you may need to find another way to assign a unique identifier.
  154. * <p>
  155. * Also, be aware that if nodes are destroyed and recreated, there is
  156. * an open issue regarding whether an ID may be reused. Currently
  157. * we're assuming that the input document is stable for the duration
  158. * of the XPath/XSLT operation, so this shouldn't arise in this context.
  159. * <p>
  160. * (DOM Level 3 is investigating providing a unique node "key", but
  161. * that won't help Level 1 and Level 2 implementations.)
  162. *
  163. * @param node whose identifier you want to obtain
  164. *
  165. * @return a string which should be different for every Node object.
  166. */
  167. public String getUniqueID(Node node)
  168. {
  169. return "N" + Integer.toHexString(node.hashCode()).toUpperCase();
  170. }
  171. /**
  172. * Figure out whether node2 should be considered as being later
  173. * in the document than node1, in Document Order as defined
  174. * by the XPath model. This may not agree with the ordering defined
  175. * by other XML applications.
  176. * <p>
  177. * There are some cases where ordering isn't defined, and neither are
  178. * the results of this function -- though we'll generally return true.
  179. *
  180. * TODO: Make sure this does the right thing with attribute nodes!!!
  181. *
  182. * @param node1 DOM Node to perform position comparison on.
  183. * @param node2 DOM Node to perform position comparison on .
  184. *
  185. * @return false if node2 comes before node1, otherwise return true.
  186. * You can think of this as
  187. * <code>(node1.documentOrderPosition <= node2.documentOrderPosition)</code>.
  188. */
  189. public static boolean isNodeAfter(Node node1, Node node2)
  190. {
  191. if (node1 == node2 || isNodeTheSame(node1, node2))
  192. return true;
  193. // Default return value, if there is no defined ordering
  194. boolean isNodeAfter = true;
  195. Node parent1 = getParentOfNode(node1);
  196. Node parent2 = getParentOfNode(node2);
  197. // Optimize for most common case
  198. if (parent1 == parent2 || isNodeTheSame(parent1, parent2)) // then we know they are siblings
  199. {
  200. if (null != parent1)
  201. isNodeAfter = isNodeAfterSibling(parent1, node1, node2);
  202. else
  203. {
  204. // If both parents are null, ordering is not defined.
  205. // We're returning a value in lieu of throwing an exception.
  206. // Not a case we expect to arise in XPath, but beware if you
  207. // try to reuse this method.
  208. // We can just fall through in this case, which allows us
  209. // to hit the debugging code at the end of the function.
  210. //return isNodeAfter;
  211. }
  212. }
  213. else
  214. {
  215. // General strategy: Figure out the lengths of the two
  216. // ancestor chains, reconcile the lengths, and look for
  217. // the lowest common ancestor. If that ancestor is one of
  218. // the nodes being compared, it comes before the other.
  219. // Otherwise perform a sibling compare.
  220. //
  221. // NOTE: If no common ancestor is found, ordering is undefined
  222. // and we return the default value of isNodeAfter.
  223. // Count parents in each ancestor chain
  224. int nParents1 = 2, nParents2 = 2; // include node & parent obtained above
  225. while (parent1 != null)
  226. {
  227. nParents1++;
  228. parent1 = getParentOfNode(parent1);
  229. }
  230. while (parent2 != null)
  231. {
  232. nParents2++;
  233. parent2 = getParentOfNode(parent2);
  234. }
  235. // Initially assume scan for common ancestor starts with
  236. // the input nodes.
  237. Node startNode1 = node1, startNode2 = node2;
  238. // If one ancestor chain is longer, adjust its start point
  239. // so we're comparing at the same depths
  240. if (nParents1 < nParents2)
  241. {
  242. // Adjust startNode2 to depth of startNode1
  243. int adjust = nParents2 - nParents1;
  244. for (int i = 0; i < adjust; i++)
  245. {
  246. startNode2 = getParentOfNode(startNode2);
  247. }
  248. }
  249. else if (nParents1 > nParents2)
  250. {
  251. // adjust startNode1 to depth of startNode2
  252. int adjust = nParents1 - nParents2;
  253. for (int i = 0; i < adjust; i++)
  254. {
  255. startNode1 = getParentOfNode(startNode1);
  256. }
  257. }
  258. Node prevChild1 = null, prevChild2 = null; // so we can "back up"
  259. // Loop up the ancestor chain looking for common parent
  260. while (null != startNode1)
  261. {
  262. if (startNode1 == startNode2 || isNodeTheSame(startNode1, startNode2)) // common parent?
  263. {
  264. if (null == prevChild1) // first time in loop?
  265. {
  266. // Edge condition: one is the ancestor of the other.
  267. isNodeAfter = (nParents1 < nParents2) ? true : false;
  268. break; // from while loop
  269. }
  270. else
  271. {
  272. // Compare ancestors below lowest-common as siblings
  273. isNodeAfter = isNodeAfterSibling(startNode1, prevChild1,
  274. prevChild2);
  275. break; // from while loop
  276. }
  277. } // end if(startNode1 == startNode2)
  278. // Move up one level and try again
  279. prevChild1 = startNode1;
  280. startNode1 = getParentOfNode(startNode1);
  281. prevChild2 = startNode2;
  282. startNode2 = getParentOfNode(startNode2);
  283. } // end while(parents exist to examine)
  284. } // end big else (not immediate siblings)
  285. // WARNING: The following diagnostic won't report the early
  286. // "same node" case. Fix if/when needed.
  287. /* -- please do not remove... very useful for diagnostics --
  288. System.out.println("node1 = "+node1.getNodeName()+"("+node1.getNodeType()+")"+
  289. ", node2 = "+node2.getNodeName()
  290. +"("+node2.getNodeType()+")"+
  291. ", isNodeAfter = "+isNodeAfter); */
  292. return isNodeAfter;
  293. } // end isNodeAfter(Node node1, Node node2)
  294. /**
  295. * Use DTMNodeProxy to determine whether two nodes are the same.
  296. *
  297. * @param node1 The first DOM node to compare.
  298. * @param node2 The second DOM node to compare.
  299. * @return true if the two nodes are the same.
  300. */
  301. public static boolean isNodeTheSame(Node node1, Node node2)
  302. {
  303. if (node1 instanceof DTMNodeProxy && node2 instanceof DTMNodeProxy)
  304. return ((DTMNodeProxy)node1).equals((DTMNodeProxy)node2);
  305. else
  306. return (node1 == node2);
  307. }
  308. /**
  309. * Figure out if child2 is after child1 in document order.
  310. * <p>
  311. * Warning: Some aspects of "document order" are not well defined.
  312. * For example, the order of attributes is considered
  313. * meaningless in XML, and the order reported by our model will
  314. * be consistant for a given invocation but may not
  315. * match that of either the source file or the serialized output.
  316. *
  317. * @param parent Must be the parent of both child1 and child2.
  318. * @param child1 Must be the child of parent and not equal to child2.
  319. * @param child2 Must be the child of parent and not equal to child1.
  320. * @return true if child 2 is after child1 in document order.
  321. */
  322. private static boolean isNodeAfterSibling(Node parent, Node child1,
  323. Node child2)
  324. {
  325. boolean isNodeAfterSibling = false;
  326. short child1type = child1.getNodeType();
  327. short child2type = child2.getNodeType();
  328. if ((Node.ATTRIBUTE_NODE != child1type)
  329. && (Node.ATTRIBUTE_NODE == child2type))
  330. {
  331. // always sort attributes before non-attributes.
  332. isNodeAfterSibling = false;
  333. }
  334. else if ((Node.ATTRIBUTE_NODE == child1type)
  335. && (Node.ATTRIBUTE_NODE != child2type))
  336. {
  337. // always sort attributes before non-attributes.
  338. isNodeAfterSibling = true;
  339. }
  340. else if (Node.ATTRIBUTE_NODE == child1type)
  341. {
  342. NamedNodeMap children = parent.getAttributes();
  343. int nNodes = children.getLength();
  344. boolean found1 = false, found2 = false;
  345. // Count from the start until we find one or the other.
  346. for (int i = 0; i < nNodes; i++)
  347. {
  348. Node child = children.item(i);
  349. if (child1 == child || isNodeTheSame(child1, child))
  350. {
  351. if (found2)
  352. {
  353. isNodeAfterSibling = false;
  354. break;
  355. }
  356. found1 = true;
  357. }
  358. else if (child2 == child || isNodeTheSame(child2, child))
  359. {
  360. if (found1)
  361. {
  362. isNodeAfterSibling = true;
  363. break;
  364. }
  365. found2 = true;
  366. }
  367. }
  368. }
  369. else
  370. {
  371. // TODO: Check performance of alternate solution:
  372. // There are two choices here: Count from the start of
  373. // the document until we find one or the other, or count
  374. // from one until we find or fail to find the other.
  375. // Either can wind up scanning all the siblings in the worst
  376. // case, which on a wide document can be a lot of work but
  377. // is more typically is a short list.
  378. // Scanning from the start involves two tests per iteration,
  379. // but it isn't clear that scanning from the middle doesn't
  380. // yield more iterations on average.
  381. // We should run some testcases.
  382. Node child = parent.getFirstChild();
  383. boolean found1 = false, found2 = false;
  384. while (null != child)
  385. {
  386. // Node child = children.item(i);
  387. if (child1 == child || isNodeTheSame(child1, child))
  388. {
  389. if (found2)
  390. {
  391. isNodeAfterSibling = false;
  392. break;
  393. }
  394. found1 = true;
  395. }
  396. else if (child2 == child || isNodeTheSame(child2, child))
  397. {
  398. if (found1)
  399. {
  400. isNodeAfterSibling = true;
  401. break;
  402. }
  403. found2 = true;
  404. }
  405. child = child.getNextSibling();
  406. }
  407. }
  408. return isNodeAfterSibling;
  409. } // end isNodeAfterSibling(Node parent, Node child1, Node child2)
  410. //==========================================================
  411. // SECTION: Namespace resolution
  412. //==========================================================
  413. /**
  414. * <meta name="usage" content="internal"/>
  415. * Get the depth level of this node in the tree (equals 1 for
  416. * a parentless node).
  417. *
  418. * @param n Node to be examined.
  419. * @return the number of ancestors, plus one
  420. */
  421. public short getLevel(Node n)
  422. {
  423. short level = 1;
  424. while (null != (n = getParentOfNode(n)))
  425. {
  426. level++;
  427. }
  428. return level;
  429. }
  430. /**
  431. * Given an XML Namespace prefix and a context in which the prefix
  432. * is to be evaluated, return the Namespace Name this prefix was
  433. * bound to. Note that DOM Level 3 is expected to provide a version of
  434. * this which deals with the DOM's "early binding" behavior.
  435. *
  436. * Default handling:
  437. *
  438. * @param prefix String containing namespace prefix to be resolved,
  439. * without the ':' which separates it from the localname when used
  440. * in a Node Name. The empty sting signifies the default namespace
  441. * at this point in the document.
  442. * @param namespaceContext Element which provides context for resolution.
  443. * (We could extend this to work for other nodes by first seeking their
  444. * nearest Element ancestor.)
  445. *
  446. * @return a String containing the Namespace URI which this prefix
  447. * represents in the specified context.
  448. */
  449. public String getNamespaceForPrefix(String prefix, Element namespaceContext)
  450. {
  451. int type;
  452. Node parent = namespaceContext;
  453. String namespace = null;
  454. if (prefix.equals("xml"))
  455. {
  456. namespace = QName.S_XMLNAMESPACEURI; // Hardcoded, per Namespace spec
  457. }
  458. else if(prefix.equals("xmlns"))
  459. {
  460. // Hardcoded in the DOM spec, expected to be adopted by
  461. // Namespace spec. NOTE: Namespace declarations _must_ use
  462. // the xmlns: prefix; other prefixes declared as belonging
  463. // to this namespace will not be recognized and should
  464. // probably be rejected by parsers as erroneous declarations.
  465. namespace = "http://www.w3.org/2000/xmlns/";
  466. }
  467. else
  468. {
  469. // Attribute name for this prefix's declaration
  470. String declname=(prefix=="")
  471. ? "xmlns"
  472. : "xmlns:"+prefix;
  473. // Scan until we run out of Elements or have resolved the namespace
  474. while ((null != parent) && (null == namespace)
  475. && (((type = parent.getNodeType()) == Node.ELEMENT_NODE)
  476. || (type == Node.ENTITY_REFERENCE_NODE)))
  477. {
  478. if (type == Node.ELEMENT_NODE)
  479. {
  480. // Look for the appropriate Namespace Declaration attribute,
  481. // either "xmlns:prefix" or (if prefix is "") "xmlns".
  482. // TODO: This does not handle "implicit declarations"
  483. // which may be created when the DOM is edited. DOM Level
  484. // 3 will define how those should be interpreted. But
  485. // this issue won't arise in freshly-parsed DOMs.
  486. // NOTE: declname is set earlier, outside the loop.
  487. Attr attr=((Element)parent).getAttributeNode(declname);
  488. if(attr!=null)
  489. {
  490. namespace = attr.getNodeValue();
  491. break;
  492. }
  493. }
  494. parent = getParentOfNode(parent);
  495. }
  496. }
  497. return namespace;
  498. }
  499. /**
  500. * An experiment for the moment.
  501. */
  502. Hashtable m_NSInfos = new Hashtable();
  503. /** Object to put into the m_NSInfos table that tells that a node has not been
  504. * processed, but has xmlns namespace decls. */
  505. protected static final NSInfo m_NSInfoUnProcWithXMLNS = new NSInfo(false,
  506. true);
  507. /** Object to put into the m_NSInfos table that tells that a node has not been
  508. * processed, but has no xmlns namespace decls. */
  509. protected static final NSInfo m_NSInfoUnProcWithoutXMLNS = new NSInfo(false,
  510. false);
  511. /** Object to put into the m_NSInfos table that tells that a node has not been
  512. * processed, and has no xmlns namespace decls, and has no ancestor decls. */
  513. protected static final NSInfo m_NSInfoUnProcNoAncestorXMLNS =
  514. new NSInfo(false, false, NSInfo.ANCESTORNOXMLNS);
  515. /** Object to put into the m_NSInfos table that tells that a node has been
  516. * processed, and has xmlns namespace decls. */
  517. protected static final NSInfo m_NSInfoNullWithXMLNS = new NSInfo(true,
  518. true);
  519. /** Object to put into the m_NSInfos table that tells that a node has been
  520. * processed, and has no xmlns namespace decls. */
  521. protected static final NSInfo m_NSInfoNullWithoutXMLNS = new NSInfo(true,
  522. false);
  523. /** Object to put into the m_NSInfos table that tells that a node has been
  524. * processed, and has no xmlns namespace decls. and has no ancestor decls. */
  525. protected static final NSInfo m_NSInfoNullNoAncestorXMLNS =
  526. new NSInfo(true, false, NSInfo.ANCESTORNOXMLNS);
  527. /** Vector of node (odd indexes) and NSInfos (even indexes) that tell if
  528. * the given node is a candidate for ancestor namespace processing. */
  529. protected Vector m_candidateNoAncestorXMLNS = new Vector();
  530. /**
  531. * Returns the namespace of the given node. Differs from simply getting
  532. * the node's prefix and using getNamespaceForPrefix in that it attempts
  533. * to cache some of the data in NSINFO objects, to avoid repeated lookup.
  534. * TODO: Should we consider moving that logic into getNamespaceForPrefix?
  535. *
  536. * @param n Node to be examined.
  537. *
  538. * @return String containing the Namespace Name (uri) for this node.
  539. * Note that this is undefined for any nodes other than Elements and
  540. * Attributes.
  541. */
  542. public String getNamespaceOfNode(Node n)
  543. {
  544. String namespaceOfPrefix;
  545. boolean hasProcessedNS;
  546. NSInfo nsInfo;
  547. short ntype = n.getNodeType();
  548. if (Node.ATTRIBUTE_NODE != ntype)
  549. {
  550. Object nsObj = m_NSInfos.get(n); // return value
  551. nsInfo = (nsObj == null) ? null : (NSInfo) nsObj;
  552. hasProcessedNS = (nsInfo == null) ? false : nsInfo.m_hasProcessedNS;
  553. }
  554. else
  555. {
  556. hasProcessedNS = false;
  557. nsInfo = null;
  558. }
  559. if (hasProcessedNS)
  560. {
  561. namespaceOfPrefix = nsInfo.m_namespace;
  562. }
  563. else
  564. {
  565. namespaceOfPrefix = null;
  566. String nodeName = n.getNodeName();
  567. int indexOfNSSep = nodeName.indexOf(':');
  568. String prefix;
  569. if (Node.ATTRIBUTE_NODE == ntype)
  570. {
  571. if (indexOfNSSep > 0)
  572. {
  573. prefix = nodeName.substring(0, indexOfNSSep);
  574. }
  575. else
  576. {
  577. // Attributes don't use the default namespace, so if
  578. // there isn't a prefix, we're done.
  579. return namespaceOfPrefix;
  580. }
  581. }
  582. else
  583. {
  584. prefix = (indexOfNSSep >= 0)
  585. ? nodeName.substring(0, indexOfNSSep) : "";
  586. }
  587. boolean ancestorsHaveXMLNS = false;
  588. boolean nHasXMLNS = false;
  589. if (prefix.equals("xml"))
  590. {
  591. namespaceOfPrefix = QName.S_XMLNAMESPACEURI;
  592. }
  593. else
  594. {
  595. int parentType;
  596. Node parent = n;
  597. while ((null != parent) && (null == namespaceOfPrefix))
  598. {
  599. if ((null != nsInfo)
  600. && (nsInfo.m_ancestorHasXMLNSAttrs
  601. == nsInfo.ANCESTORNOXMLNS))
  602. {
  603. break;
  604. }
  605. parentType = parent.getNodeType();
  606. if ((null == nsInfo) || nsInfo.m_hasXMLNSAttrs)
  607. {
  608. boolean elementHasXMLNS = false;
  609. if (parentType == Node.ELEMENT_NODE)
  610. {
  611. NamedNodeMap nnm = parent.getAttributes();
  612. for (int i = 0; i < nnm.getLength(); i++)
  613. {
  614. Node attr = nnm.item(i);
  615. String aname = attr.getNodeName();
  616. if (aname.charAt(0) == 'x')
  617. {
  618. boolean isPrefix = aname.startsWith("xmlns:");
  619. if (aname.equals("xmlns") || isPrefix)
  620. {
  621. if (n == parent)
  622. nHasXMLNS = true;
  623. elementHasXMLNS = true;
  624. ancestorsHaveXMLNS = true;
  625. String p = isPrefix ? aname.substring(6) : "";
  626. if (p.equals(prefix))
  627. {
  628. namespaceOfPrefix = attr.getNodeValue();
  629. break;
  630. }
  631. }
  632. }
  633. }
  634. }
  635. if ((Node.ATTRIBUTE_NODE != parentType) && (null == nsInfo)
  636. && (n != parent))
  637. {
  638. nsInfo = elementHasXMLNS
  639. ? m_NSInfoUnProcWithXMLNS : m_NSInfoUnProcWithoutXMLNS;
  640. m_NSInfos.put(parent, nsInfo);
  641. }
  642. }
  643. if (Node.ATTRIBUTE_NODE == parentType)
  644. {
  645. parent = getParentOfNode(parent);
  646. }
  647. else
  648. {
  649. m_candidateNoAncestorXMLNS.addElement(parent);
  650. m_candidateNoAncestorXMLNS.addElement(nsInfo);
  651. parent = parent.getParentNode();
  652. }
  653. if (null != parent)
  654. {
  655. Object nsObj = m_NSInfos.get(parent); // return value
  656. nsInfo = (nsObj == null) ? null : (NSInfo) nsObj;
  657. }
  658. }
  659. int nCandidates = m_candidateNoAncestorXMLNS.size();
  660. if (nCandidates > 0)
  661. {
  662. if ((false == ancestorsHaveXMLNS) && (null == parent))
  663. {
  664. for (int i = 0; i < nCandidates; i += 2)
  665. {
  666. Object candidateInfo = m_candidateNoAncestorXMLNS.elementAt(i
  667. + 1);
  668. if (candidateInfo == m_NSInfoUnProcWithoutXMLNS)
  669. {
  670. m_NSInfos.put(m_candidateNoAncestorXMLNS.elementAt(i),
  671. m_NSInfoUnProcNoAncestorXMLNS);
  672. }
  673. else if (candidateInfo == m_NSInfoNullWithoutXMLNS)
  674. {
  675. m_NSInfos.put(m_candidateNoAncestorXMLNS.elementAt(i),
  676. m_NSInfoNullNoAncestorXMLNS);
  677. }
  678. }
  679. }
  680. m_candidateNoAncestorXMLNS.removeAllElements();
  681. }
  682. }
  683. if (Node.ATTRIBUTE_NODE != ntype)
  684. {
  685. if (null == namespaceOfPrefix)
  686. {
  687. if (ancestorsHaveXMLNS)
  688. {
  689. if (nHasXMLNS)
  690. m_NSInfos.put(n, m_NSInfoNullWithXMLNS);
  691. else
  692. m_NSInfos.put(n, m_NSInfoNullWithoutXMLNS);
  693. }
  694. else
  695. {
  696. m_NSInfos.put(n, m_NSInfoNullNoAncestorXMLNS);
  697. }
  698. }
  699. else
  700. {
  701. m_NSInfos.put(n, new NSInfo(namespaceOfPrefix, nHasXMLNS));
  702. }
  703. }
  704. }
  705. return namespaceOfPrefix;
  706. }
  707. /**
  708. * Returns the local name of the given node. If the node's name begins
  709. * with a namespace prefix, this is the part after the colon; otherwise
  710. * it's the full node name.
  711. *
  712. * @param n the node to be examined.
  713. *
  714. * @return String containing the Local Name
  715. */
  716. public String getLocalNameOfNode(Node n)
  717. {
  718. String qname = n.getNodeName();
  719. int index = qname.indexOf(':');
  720. return (index < 0) ? qname : qname.substring(index + 1);
  721. }
  722. /**
  723. * Returns the element name with the namespace prefix (if any) replaced
  724. * by the Namespace URI it was bound to. This is not a standard
  725. * representation of a node name, but it allows convenient
  726. * single-string comparison of the "universal" names of two nodes.
  727. *
  728. * @param elem Element to be examined.
  729. *
  730. * @return String in the form "namespaceURI:localname" if the node
  731. * belongs to a namespace, or simply "localname" if it doesn't.
  732. * @see #getExpandedAttributeName
  733. */
  734. public String getExpandedElementName(Element elem)
  735. {
  736. String namespace = getNamespaceOfNode(elem);
  737. return (null != namespace)
  738. ? namespace + ":" + getLocalNameOfNode(elem)
  739. : getLocalNameOfNode(elem);
  740. }
  741. /**
  742. * Returns the attribute name with the namespace prefix (if any) replaced
  743. * by the Namespace URI it was bound to. This is not a standard
  744. * representation of a node name, but it allows convenient
  745. * single-string comparison of the "universal" names of two nodes.
  746. *
  747. * @param attr Attr to be examined
  748. *
  749. * @return String in the form "namespaceURI:localname" if the node
  750. * belongs to a namespace, or simply "localname" if it doesn't.
  751. * @see #getExpandedElementName
  752. */
  753. public String getExpandedAttributeName(Attr attr)
  754. {
  755. String namespace = getNamespaceOfNode(attr);
  756. return (null != namespace)
  757. ? namespace + ":" + getLocalNameOfNode(attr)
  758. : getLocalNameOfNode(attr);
  759. }
  760. //==========================================================
  761. // SECTION: DOM Helper Functions
  762. //==========================================================
  763. /**
  764. * Tell if the node is ignorable whitespace. Note that this can
  765. * be determined only in the context of a DTD or other Schema,
  766. * and that DOM Level 2 has nostandardized DOM API which can
  767. * return that information.
  768. * @deprecated
  769. *
  770. * @param node Node to be examined
  771. *
  772. * @return CURRENTLY HARDCODED TO FALSE, but should return true if
  773. * and only if the node is of type Text, contains only whitespace,
  774. * and does not appear as part of the #PCDATA content of an element.
  775. * (Note that determining this last may require allowing for
  776. * Entity References.)
  777. */
  778. public boolean isIgnorableWhitespace(Text node)
  779. {
  780. boolean isIgnorable = false; // return value
  781. // TODO: I can probably do something to figure out if this
  782. // space is ignorable from just the information in
  783. // the DOM tree.
  784. // -- You need to be able to distinguish whitespace
  785. // that is #PCDATA from whitespace that isn't. That requires
  786. // DTD support, which won't be standardized until DOM Level 3.
  787. return isIgnorable;
  788. }
  789. /**
  790. * Get the first unparented node in the ancestor chain.
  791. * @deprecated
  792. *
  793. * @param node Starting node, to specify which chain to chase
  794. *
  795. * @return the topmost ancestor.
  796. */
  797. public Node getRoot(Node node)
  798. {
  799. Node root = null;
  800. while (node != null)
  801. {
  802. root = node;
  803. node = getParentOfNode(node);
  804. }
  805. return root;
  806. }
  807. /**
  808. * Get the root node of the document tree, regardless of
  809. * whether or not the node passed in is a document node.
  810. * <p>
  811. * TODO: This doesn't handle DocumentFragments or "orphaned" subtrees
  812. * -- it's currently returning ownerDocument even when the tree is
  813. * not actually part of the main Document tree. We should either
  814. * rewrite the description to say that it finds the Document node,
  815. * or change the code to walk up the ancestor chain.
  816. *
  817. * @param n Node to be examined
  818. *
  819. * @return the Document node. Note that this is not the correct answer
  820. * if n was (or was a child of) a DocumentFragment or an orphaned node,
  821. * as can arise if the DOM has been edited rather than being generated
  822. * by a parser.
  823. */
  824. public Node getRootNode(Node n)
  825. {
  826. int nt = n.getNodeType();
  827. return ( (Node.DOCUMENT_NODE == nt) || (Node.DOCUMENT_FRAGMENT_NODE == nt) )
  828. ? n : n.getOwnerDocument();
  829. }
  830. /**
  831. * Test whether the given node is a namespace decl node. In DOM Level 2
  832. * this can be done in a namespace-aware manner, but in Level 1 DOMs
  833. * it has to be done by testing the node name.
  834. *
  835. * @param n Node to be examined.
  836. *
  837. * @return boolean -- true iff the node is an Attr whose name is
  838. * "xmlns" or has the "xmlns:" prefix.
  839. */
  840. public boolean isNamespaceNode(Node n)
  841. {
  842. if (Node.ATTRIBUTE_NODE == n.getNodeType())
  843. {
  844. String attrName = n.getNodeName();
  845. return (attrName.startsWith("xmlns:") || attrName.equals("xmlns"));
  846. }
  847. return false;
  848. }
  849. /**
  850. * Obtain the XPath-model parent of a DOM node -- ownerElement for Attrs,
  851. * parent for other nodes.
  852. * <p>
  853. * Background: The DOM believes that you must be your Parent's
  854. * Child, and thus Attrs don't have parents. XPath said that Attrs
  855. * do have their owning Element as their parent. This function
  856. * bridges the difference, either by using the DOM Level 2 ownerElement
  857. * function or by using a "silly and expensive function" in Level 1
  858. * DOMs.
  859. * <p>
  860. * (There's some discussion of future DOMs generalizing ownerElement
  861. * into ownerNode and making it work on all types of nodes. This
  862. * still wouldn't help the users of Level 1 or Level 2 DOMs)
  863. * <p>
  864. *
  865. * @param node Node whose XPath parent we want to obtain
  866. *
  867. * @return the parent of the node, or the ownerElement if it's an
  868. * Attr node, or null if the node is an orphan.
  869. *
  870. * @throws RuntimeException if the Document has no root element.
  871. * This can't arise if the Document was created
  872. * via the DOM Level 2 factory methods, but is possible if other
  873. * mechanisms were used to obtain it
  874. */
  875. public static Node getParentOfNode(Node node) throws RuntimeException
  876. {
  877. Node parent;
  878. short nodeType = node.getNodeType();
  879. if (Node.ATTRIBUTE_NODE == nodeType)
  880. {
  881. Document doc = node.getOwnerDocument();
  882. /*
  883. TBD:
  884. if(null == doc)
  885. {
  886. throw new RuntimeException(XSLMessages.createXPATHMessage(XPATHErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT, null));//"Attribute child does not have an owner document!");
  887. }
  888. */
  889. // Given how expensive the tree walk may be, we should first ask
  890. // whether this DOM can answer the question for us. The additional
  891. // test does slow down Level 1 DOMs slightly. DOMHelper2, which
  892. // is currently specialized for Xerces, assumes it can use the
  893. // Level 2 solution. We might want to have an intermediate stage,
  894. // which would assume DOM Level 2 but not assume Xerces.
  895. //
  896. // (Shouldn't have to check whether impl is null in a compliant DOM,
  897. // but let's be paranoid for a moment...)
  898. DOMImplementation impl=doc.getImplementation();
  899. if(impl!=null && impl.hasFeature("Core","2.0"))
  900. {
  901. parent=((Attr)node).getOwnerElement();
  902. return parent;
  903. }
  904. // DOM Level 1 solution, as fallback. Hugely expensive.
  905. Element rootElem = doc.getDocumentElement();
  906. if (null == rootElem)
  907. {
  908. throw new RuntimeException(
  909. XSLMessages.createXPATHMessage(
  910. XPATHErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT_ELEMENT,
  911. null)); //"Attribute child does not have an owner document element!");
  912. }
  913. parent = locateAttrParent(rootElem, node);
  914. }
  915. else
  916. {
  917. parent = node.getParentNode();
  918. // if((Node.DOCUMENT_NODE != nodeType) && (null == parent))
  919. // {
  920. // throw new RuntimeException("Child does not have parent!");
  921. // }
  922. }
  923. return parent;
  924. }
  925. /**
  926. * Given an ID, return the element. This can work only if the document
  927. * is interpreted in the context of a DTD or Schema, since otherwise
  928. * we don't know which attributes are or aren't IDs.
  929. * <p>
  930. * Note that DOM Level 1 had no ability to retrieve this information.
  931. * DOM Level 2 introduced it but does not promise that it will be
  932. * supported in all DOMs; those which can't support it will always
  933. * return null.
  934. * <p>
  935. * TODO: getElementByID is currently unimplemented. Support DOM Level 2?
  936. *
  937. * @param id The unique identifier to be searched for.
  938. * @param doc The document to search within.
  939. * @return CURRENTLY HARDCODED TO NULL, but it should be:
  940. * The node which has this unique identifier, or null if there
  941. * is no such node or this DOM can't reliably recognize it.
  942. */
  943. public Element getElementByID(String id, Document doc)
  944. {
  945. return null;
  946. }
  947. /**
  948. * The getUnparsedEntityURI function returns the URI of the unparsed
  949. * entity with the specified name in the same document as the context
  950. * node (see [3.3 Unparsed Entities]). It returns the empty string if
  951. * there is no such entity.
  952. * <p>
  953. * XML processors may choose to use the System Identifier (if one
  954. * is provided) to resolve the entity, rather than the URI in the
  955. * Public Identifier. The details are dependent on the processor, and
  956. * we would have to support some form of plug-in resolver to handle
  957. * this properly. Currently, we simply return the System Identifier if
  958. * present, and hope that it a usable URI or that our caller can
  959. * map it to one.
  960. * TODO: Resolve Public Identifiers... or consider changing function name.
  961. * <p>
  962. * If we find a relative URI
  963. * reference, XML expects it to be resolved in terms of the base URI
  964. * of the document. The DOM doesn't do that for us, and it isn't
  965. * entirely clear whether that should be done here; currently that's
  966. * pushed up to a higher levelof our application. (Note that DOM Level
  967. * 1 didn't store the document's base URI.)
  968. * TODO: Consider resolving Relative URIs.
  969. * <p>
  970. * (The DOM's statement that "An XML processor may choose to
  971. * completely expand entities before the structure model is passed
  972. * to the DOM" refers only to parsed entities, not unparsed, and hence
  973. * doesn't affect this function.)
  974. *
  975. * @param name A string containing the Entity Name of the unparsed
  976. * entity.
  977. * @param doc Document node for the document to be searched.
  978. *
  979. * @return String containing the URI of the Unparsed Entity, or an
  980. * empty string if no such entity exists.
  981. */
  982. public String getUnparsedEntityURI(String name, Document doc)
  983. {
  984. String url = "";
  985. DocumentType doctype = doc.getDoctype();
  986. if (null != doctype)
  987. {
  988. NamedNodeMap entities = doctype.getEntities();
  989. if(null == entities)
  990. return url;
  991. Entity entity = (Entity) entities.getNamedItem(name);
  992. if(null == entity)
  993. return url;
  994. String notationName = entity.getNotationName();
  995. if (null != notationName) // then it's unparsed
  996. {
  997. // The draft says: "The XSLT processor may use the public
  998. // identifier to generate a URI for the entity instead of the URI
  999. // specified in the system identifier. If the XSLT processor does
  1000. // not use the public identifier to generate the URI, it must use
  1001. // the system identifier; if the system identifier is a relative
  1002. // URI, it must be resolved into an absolute URI using the URI of
  1003. // the resource containing the entity declaration as the base
  1004. // URI [RFC2396]."
  1005. // So I'm falling a bit short here.
  1006. url = entity.getSystemId();
  1007. if (null == url)
  1008. {
  1009. url = entity.getPublicId();
  1010. }
  1011. else
  1012. {
  1013. // This should be resolved to an absolute URL, but that's hard
  1014. // to do from here.
  1015. }
  1016. }
  1017. }
  1018. return url;
  1019. }
  1020. /**
  1021. * Support for getParentOfNode; walks a DOM tree until it finds
  1022. * the Element which owns the Attr. This is hugely expensive, and
  1023. * if at all possible you should use the DOM Level 2 Attr.ownerElement()
  1024. * method instead.
  1025. * <p>
  1026. * The DOM Level 1 developers expected that folks would keep track
  1027. * of the last Element they'd seen and could recover the info from
  1028. * that source. Obviously that doesn't work very well if the only
  1029. * information you've been presented with is the Attr. The DOM Level 2
  1030. * getOwnerElement() method fixes that, but only for Level 2 and
  1031. * later DOMs.
  1032. *
  1033. * @param elem Element whose subtree is to be searched for this Attr
  1034. * @param attr Attr whose owner is to be located.
  1035. *
  1036. * @return the first Element whose attribute list includes the provided
  1037. * attr. In modern DOMs, this will also be the only such Element. (Early
  1038. * DOMs had some hope that Attrs might be sharable, but this idea has
  1039. * been abandoned.)
  1040. */
  1041. private static Node locateAttrParent(Element elem, Node attr)
  1042. {
  1043. Node parent = null;
  1044. // This should only be called for Level 1 DOMs, so we don't have to
  1045. // worry about namespace issues. In later levels, it's possible
  1046. // for a DOM to have two Attrs with the same NodeName but
  1047. // different namespaces, and we'd need to get getAttributeNodeNS...
  1048. // but later levels also have Attr.getOwnerElement.
  1049. Attr check=elem.getAttributeNode(attr.getNodeName());
  1050. if(check==attr)
  1051. parent = elem;
  1052. if (null == parent)
  1053. {
  1054. for (Node node = elem.getFirstChild(); null != node;
  1055. node = node.getNextSibling())
  1056. {
  1057. if (Node.ELEMENT_NODE == node.getNodeType())
  1058. {
  1059. parent = locateAttrParent((Element) node, attr);
  1060. if (null != parent)
  1061. break;
  1062. }
  1063. }
  1064. }
  1065. return parent;
  1066. }
  1067. /**
  1068. * The factory object used for creating nodes
  1069. * in the result tree.
  1070. */
  1071. protected Document m_DOMFactory = null;
  1072. /**
  1073. * Store the factory object required to create DOM nodes
  1074. * in the result tree. In fact, that's just the result tree's
  1075. * Document node...
  1076. *
  1077. * @param domFactory The DOM Document Node within whose context
  1078. * the result tree will be built.
  1079. */
  1080. public void setDOMFactory(Document domFactory)
  1081. {
  1082. this.m_DOMFactory = domFactory;
  1083. }
  1084. /**
  1085. * Retrieve the factory object required to create DOM nodes
  1086. * in the result tree.
  1087. *
  1088. * @return The result tree's DOM Document Node.
  1089. */
  1090. public Document getDOMFactory()
  1091. {
  1092. if (null == this.m_DOMFactory)
  1093. {
  1094. this.m_DOMFactory = createDocument();
  1095. }
  1096. return this.m_DOMFactory;
  1097. }
  1098. /**
  1099. * Get the textual contents of the node. See
  1100. * getNodeData(Node,FastStringBuffer) for discussion of how
  1101. * whitespace nodes are handled.
  1102. *
  1103. * @param node DOM Node to be examined
  1104. * @return String containing a concatenation of all the
  1105. * textual content within that node.
  1106. * @see #getNodeData(Node,FastStringBuffer)
  1107. *
  1108. */
  1109. public static String getNodeData(Node node)
  1110. {
  1111. FastStringBuffer buf = StringBufferPool.get();
  1112. String s;
  1113. try
  1114. {
  1115. getNodeData(node, buf);
  1116. s = (buf.length() > 0) ? buf.toString() : "";
  1117. }
  1118. finally
  1119. {
  1120. StringBufferPool.free(buf);
  1121. }
  1122. return s;
  1123. }
  1124. /**
  1125. * Retrieve the text content of a DOM subtree, appending it into a
  1126. * user-supplied FastStringBuffer object. Note that attributes are
  1127. * not considered part of the content of an element.
  1128. * <p>
  1129. * There are open questions regarding whitespace stripping.
  1130. * Currently we make no special effort in that regard, since the standard
  1131. * DOM doesn't yet provide DTD-based information to distinguish
  1132. * whitespace-in-element-context from genuine #PCDATA. Note that we
  1133. * should probably also consider xml:space if/when we address this.
  1134. * DOM Level 3 may solve the problem for us.
  1135. *
  1136. * @param node Node whose subtree is to be walked, gathering the
  1137. * contents of all Text or CDATASection nodes.
  1138. * @param buf FastStringBuffer into which the contents of the text
  1139. * nodes are to be concatenated.
  1140. */
  1141. public static void getNodeData(Node node, FastStringBuffer buf)
  1142. {
  1143. switch (node.getNodeType())
  1144. {
  1145. case Node.DOCUMENT_FRAGMENT_NODE :
  1146. case Node.DOCUMENT_NODE :
  1147. case Node.ELEMENT_NODE :
  1148. {
  1149. for (Node child = node.getFirstChild(); null != child;
  1150. child = child.getNextSibling())
  1151. {
  1152. getNodeData(child, buf);
  1153. }
  1154. }
  1155. break;
  1156. case Node.TEXT_NODE :
  1157. case Node.CDATA_SECTION_NODE :
  1158. buf.append(node.getNodeValue());
  1159. break;
  1160. case Node.ATTRIBUTE_NODE :
  1161. buf.append(node.getNodeValue());
  1162. break;
  1163. case Node.PROCESSING_INSTRUCTION_NODE :
  1164. // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
  1165. break;
  1166. default :
  1167. // ignore
  1168. break;
  1169. }
  1170. }
  1171. }