1. /* $Id: NodeCreateRule.java,v 1.10 2004/05/10 06:52:50 skitching Exp $
  2. *
  3. * Copyright 2002-2004 The Apache Software Foundation.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package org.apache.commons.digester;
  18. import javax.xml.parsers.DocumentBuilder;
  19. import javax.xml.parsers.DocumentBuilderFactory;
  20. import javax.xml.parsers.ParserConfigurationException;
  21. import org.w3c.dom.Attr;
  22. import org.w3c.dom.DOMException;
  23. import org.w3c.dom.Document;
  24. import org.w3c.dom.Element;
  25. import org.w3c.dom.Node;
  26. import org.xml.sax.Attributes;
  27. import org.xml.sax.ContentHandler;
  28. import org.xml.sax.SAXException;
  29. import org.xml.sax.XMLReader;
  30. import org.xml.sax.helpers.DefaultHandler;
  31. /**
  32. * A rule implementation that creates a DOM
  33. * {@link org.w3c.dom.Node Node} containing the XML at the element that matched
  34. * the rule. Two concrete types of nodes can be created by this rule:
  35. * <ul>
  36. * <li>the default is to create an {@link org.w3c.dom.Element Element} node.
  37. * The created element will correspond to the element that matched the rule,
  38. * containing all XML content underneath that element.</li>
  39. * <li>alternatively, this rule can create nodes of type
  40. * {@link org.w3c.dom.DocumentFragment DocumentFragment}, which will contain
  41. * only the XML content under the element the rule was trigged on.</li>
  42. * </ul>
  43. * The created node will be normalized, meaning it will not contain text nodes
  44. * that only contain white space characters.
  45. *
  46. *
  47. * <p>The created <code>Node</code> will be pushed on Digester's object stack
  48. * when done. To use it in the context of another DOM
  49. * {@link org.w3c.dom.Document Document}, it must be imported first, using the
  50. * Document method
  51. * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}.
  52. * </p>
  53. *
  54. * <p><strong>Important Note:</strong> This is implemented by replacing the SAX
  55. * {@link org.xml.sax.ContentHandler ContentHandler} in the parser used by
  56. * Digester, and resetting it when the matched element is closed. As a side
  57. * effect, rules that would match XML nodes under the element that matches
  58. * a <code>NodeCreateRule</code> will never be triggered by Digester, which
  59. * usually is the behavior one would expect.</p>
  60. *
  61. * <p><strong>Note</strong> that the current implementation does not set the namespace prefixes
  62. * in the exported nodes. The (usually more important) namespace URIs are set,
  63. * of course.</p>
  64. *
  65. * @since Digester 1.4
  66. */
  67. public class NodeCreateRule extends Rule {
  68. // ---------------------------------------------------------- Inner Classes
  69. /**
  70. * The SAX content handler that does all the actual work of assembling the
  71. * DOM node tree from the SAX events.
  72. */
  73. private class NodeBuilder
  74. extends DefaultHandler {
  75. // ------------------------------------------------------- Constructors
  76. /**
  77. * Constructor.
  78. *
  79. * <p>Stores the content handler currently used by Digester so it can
  80. * be reset when done, and initializes the DOM objects needed to
  81. * build the node.</p>
  82. *
  83. * @param doc the document to use to create nodes
  84. * @param root the root node
  85. * @throws ParserConfigurationException if the DocumentBuilderFactory
  86. * could not be instantiated
  87. * @throws SAXException if the XMLReader could not be instantiated by
  88. * Digester (should not happen)
  89. */
  90. public NodeBuilder(Document doc, Node root)
  91. throws ParserConfigurationException, SAXException {
  92. this.doc = doc;
  93. this.root = root;
  94. this.top = root;
  95. oldContentHandler = digester.getXMLReader().getContentHandler();
  96. }
  97. // ------------------------------------------------- Instance Variables
  98. /**
  99. * The content handler used by Digester before it was set to this
  100. * content handler.
  101. */
  102. protected ContentHandler oldContentHandler = null;
  103. /**
  104. * Depth of the current node, relative to the element where the content
  105. * handler was put into action.
  106. */
  107. protected int depth = 0;
  108. /**
  109. * A DOM Document used to create the various Node instances.
  110. */
  111. protected Document doc = null;
  112. /**
  113. * The DOM node that will be pushed on Digester's stack.
  114. */
  115. protected Node root = null;
  116. /**
  117. * The current top DOM mode.
  118. */
  119. protected Node top = null;
  120. // --------------------------------------------- ContentHandler Methods
  121. /**
  122. * Appends a {@link org.w3c.dom.Text Text} node to the current node.
  123. *
  124. * @param ch the characters from the XML document
  125. * @param start the start position in the array
  126. * @param length the number of characters to read from the array
  127. * @throws SAXException if the DOM implementation throws an exception
  128. */
  129. public void characters(char[] ch, int start, int length)
  130. throws SAXException {
  131. try {
  132. String str = new String(ch, start, length);
  133. if (str.trim().length() > 0) {
  134. top.appendChild(doc.createTextNode(str));
  135. }
  136. } catch (DOMException e) {
  137. throw new SAXException(e.getMessage());
  138. }
  139. }
  140. /**
  141. * Checks whether control needs to be returned to Digester.
  142. *
  143. * @param namespaceURI the namespace URI
  144. * @param localName the local name
  145. * @param qName the qualified (prefixed) name
  146. * @throws SAXException if the DOM implementation throws an exception
  147. */
  148. public void endElement(String namespaceURI, String localName,
  149. String qName)
  150. throws SAXException {
  151. try {
  152. if (depth == 0) {
  153. getDigester().getXMLReader().setContentHandler(
  154. oldContentHandler);
  155. getDigester().push(root);
  156. getDigester().endElement(namespaceURI, localName, qName);
  157. }
  158. top = top.getParentNode();
  159. depth--;
  160. } catch (DOMException e) {
  161. throw new SAXException(e.getMessage());
  162. }
  163. }
  164. /**
  165. * Adds a new
  166. * {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to
  167. * the current node.
  168. *
  169. * @param target the processing instruction target
  170. * @param data the processing instruction data, or null if none was
  171. * supplied
  172. * @throws SAXException if the DOM implementation throws an exception
  173. */
  174. public void processingInstruction(String target, String data)
  175. throws SAXException {
  176. try {
  177. top.appendChild(doc.createProcessingInstruction(target, data));
  178. } catch (DOMException e) {
  179. throw new SAXException(e.getMessage());
  180. }
  181. }
  182. /**
  183. * Adds a new child {@link org.w3c.dom.Element Element} to the current
  184. * node.
  185. *
  186. * @param namespaceURI the namespace URI
  187. * @param localName the local name
  188. * @param qName the qualified (prefixed) name
  189. * @param atts the list of attributes
  190. * @throws SAXException if the DOM implementation throws an exception
  191. */
  192. public void startElement(String namespaceURI, String localName,
  193. String qName, Attributes atts)
  194. throws SAXException {
  195. try {
  196. Node previousTop = top;
  197. if ((localName == null) || (localName.length() == 0)) {
  198. top = doc.createElement(qName);
  199. } else {
  200. top = doc.createElementNS(namespaceURI, localName);
  201. }
  202. for (int i = 0; i < atts.getLength(); i++) {
  203. Attr attr = null;
  204. if ((atts.getLocalName(i) == null) ||
  205. (atts.getLocalName(i).length() == 0)) {
  206. attr = doc.createAttribute(atts.getQName(i));
  207. attr.setNodeValue(atts.getValue(i));
  208. ((Element)top).setAttributeNode(attr);
  209. } else {
  210. attr = doc.createAttributeNS(atts.getURI(i),
  211. atts.getLocalName(i));
  212. attr.setNodeValue(atts.getValue(i));
  213. ((Element)top).setAttributeNodeNS(attr);
  214. }
  215. }
  216. previousTop.appendChild(top);
  217. depth++;
  218. } catch (DOMException e) {
  219. throw new SAXException(e.getMessage());
  220. }
  221. }
  222. }
  223. // ----------------------------------------------------------- Constructors
  224. /**
  225. * Default constructor. Creates an instance of this rule that will create a
  226. * DOM {@link org.w3c.dom.Element Element}.
  227. */
  228. public NodeCreateRule() throws ParserConfigurationException {
  229. this(Node.ELEMENT_NODE);
  230. }
  231. /**
  232. * Constructor. Creates an instance of this rule that will create a DOM
  233. * {@link org.w3c.dom.Element Element}, but lets you specify the JAXP
  234. * <code>DocumentBuilder</code> that should be used when constructing the
  235. * node tree.
  236. *
  237. * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
  238. */
  239. public NodeCreateRule(DocumentBuilder documentBuilder) {
  240. this(Node.ELEMENT_NODE, documentBuilder);
  241. }
  242. /**
  243. * Constructor. Creates an instance of this rule that will create either a
  244. * DOM {@link org.w3c.dom.Element Element} or a DOM
  245. * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the
  246. * value of the <code>nodeType</code> parameter.
  247. *
  248. * @param nodeType the type of node to create, which can be either
  249. * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or
  250. * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
  251. * @throws IllegalArgumentException if the node type is not supported
  252. */
  253. public NodeCreateRule(int nodeType) throws ParserConfigurationException {
  254. this(nodeType,
  255. DocumentBuilderFactory.newInstance().newDocumentBuilder());
  256. }
  257. /**
  258. * Constructor. Creates an instance of this rule that will create either a
  259. * DOM {@link org.w3c.dom.Element Element} or a DOM
  260. * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the
  261. * value of the <code>nodeType</code> parameter. This constructor lets you
  262. * specify the JAXP <code>DocumentBuilder</code> that should be used when
  263. * constructing the node tree.
  264. *
  265. * @param nodeType the type of node to create, which can be either
  266. * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or
  267. * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
  268. * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
  269. * @throws IllegalArgumentException if the node type is not supported
  270. */
  271. public NodeCreateRule(int nodeType, DocumentBuilder documentBuilder) {
  272. if (!((nodeType == Node.DOCUMENT_FRAGMENT_NODE) ||
  273. (nodeType == Node.ELEMENT_NODE))) {
  274. throw new IllegalArgumentException(
  275. "Can only create nodes of type DocumentFragment and Element");
  276. }
  277. this.nodeType = nodeType;
  278. this.documentBuilder = documentBuilder;
  279. }
  280. // ----------------------------------------------------- Instance Variables
  281. /**
  282. * The JAXP <code>DocumentBuilder</code> to use.
  283. */
  284. private DocumentBuilder documentBuilder = null;
  285. /**
  286. * The type of the node that should be created. Must be one of the
  287. * constants defined in {@link org.w3c.dom.Node Node}, but currently only
  288. * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and
  289. * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
  290. * are allowed values.
  291. */
  292. private int nodeType = Node.ELEMENT_NODE;
  293. // ----------------------------------------------------------- Rule Methods
  294. /**
  295. * Implemented to replace the content handler currently in use by a
  296. * {@link NodeBuilder NodeCreateRule.NodeBuilder}.
  297. *
  298. * @param namespaceURI the namespace URI of the matching element, or an
  299. * empty string if the parser is not namespace aware or the element has
  300. * no namespace
  301. * @param name the local name if the parser is namespace aware, or just
  302. * the element name otherwise
  303. * @param attributes The attribute list of this element
  304. * @throws Exception indicates a JAXP configuration problem
  305. */
  306. public void begin(String namespaceURI, String name, Attributes attributes)
  307. throws Exception {
  308. XMLReader xmlReader = getDigester().getXMLReader();
  309. Document doc = documentBuilder.newDocument();
  310. NodeBuilder builder = null;
  311. if (nodeType == Node.ELEMENT_NODE) {
  312. Element element = null;
  313. if (getDigester().getNamespaceAware()) {
  314. element =
  315. doc.createElementNS(namespaceURI, name);
  316. for (int i = 0; i < attributes.getLength(); i++) {
  317. element.setAttributeNS(attributes.getURI(i),
  318. attributes.getLocalName(i),
  319. attributes.getValue(i));
  320. }
  321. } else {
  322. element = doc.createElement(name);
  323. for (int i = 0; i < attributes.getLength(); i++) {
  324. element.setAttribute(attributes.getQName(i),
  325. attributes.getValue(i));
  326. }
  327. }
  328. builder = new NodeBuilder(doc, element);
  329. } else {
  330. builder = new NodeBuilder(doc, doc.createDocumentFragment());
  331. }
  332. xmlReader.setContentHandler(builder);
  333. }
  334. /**
  335. * Pop the Node off the top of the stack.
  336. */
  337. public void end() throws Exception {
  338. Object top = digester.pop();
  339. }
  340. }