1. /*
  2. * Copyright 2000-2004 The Apache Software Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. *
  16. */
  17. package org.apache.tools.ant.util;
  18. import java.io.IOException;
  19. import java.io.OutputStream;
  20. import java.io.OutputStreamWriter;
  21. import java.io.Writer;
  22. import org.w3c.dom.Attr;
  23. import org.w3c.dom.Element;
  24. import org.w3c.dom.NamedNodeMap;
  25. import org.w3c.dom.Node;
  26. import org.w3c.dom.NodeList;
  27. import org.w3c.dom.Text;
  28. /**
  29. * Writes a DOM tree to a given Writer.
  30. *
  31. * <p>Utility class used by {@link org.apache.tools.ant.XmlLogger
  32. * XmlLogger} and
  33. * org.apache.tools.ant.taskdefs.optional.junit.XMLJUnitResultFormatter
  34. * XMLJUnitResultFormatter}.</p>
  35. *
  36. */
  37. public class DOMElementWriter {
  38. private static String lSep = System.getProperty("line.separator");
  39. /**
  40. * Don't try to be too smart but at least recognize the predefined
  41. * entities.
  42. */
  43. protected String[] knownEntities = {"gt", "amp", "lt", "apos", "quot"};
  44. /**
  45. * Writes a DOM tree to a stream in UTF8 encoding. Note that
  46. * it prepends the <?xml version='1.0' encoding='UTF-8'?>.
  47. * The indent number is set to 0 and a 2-space indent.
  48. * @param root the root element of the DOM tree.
  49. * @param out the outputstream to write to.
  50. * @throws IOException if an error happens while writing to the stream.
  51. */
  52. public void write(Element root, OutputStream out) throws IOException {
  53. Writer wri = new OutputStreamWriter(out, "UTF8");
  54. wri.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  55. write(root, wri, 0, " ");
  56. wri.flush();
  57. }
  58. /**
  59. * Writes a DOM tree to a stream.
  60. *
  61. * @param element the Root DOM element of the tree
  62. * @param out where to send the output
  63. * @param indent number of
  64. * @param indentWith string that should be used to indent the corresponding tag.
  65. * @throws IOException if an error happens while writing to the stream.
  66. */
  67. public void write(Element element, Writer out, int indent,
  68. String indentWith)
  69. throws IOException {
  70. // Write indent characters
  71. for (int i = 0; i < indent; i++) {
  72. out.write(indentWith);
  73. }
  74. // Write element
  75. out.write("<");
  76. out.write(element.getTagName());
  77. // Write attributes
  78. NamedNodeMap attrs = element.getAttributes();
  79. for (int i = 0; i < attrs.getLength(); i++) {
  80. Attr attr = (Attr) attrs.item(i);
  81. out.write(" ");
  82. out.write(attr.getName());
  83. out.write("=\"");
  84. out.write(encode(attr.getValue()));
  85. out.write("\"");
  86. }
  87. out.write(">");
  88. // Write child elements and text
  89. boolean hasChildren = false;
  90. NodeList children = element.getChildNodes();
  91. for (int i = 0; i < children.getLength(); i++) {
  92. Node child = children.item(i);
  93. switch (child.getNodeType()) {
  94. case Node.ELEMENT_NODE:
  95. if (!hasChildren) {
  96. out.write(lSep);
  97. hasChildren = true;
  98. }
  99. write((Element) child, out, indent + 1, indentWith);
  100. break;
  101. case Node.TEXT_NODE:
  102. out.write(encode(child.getNodeValue()));
  103. break;
  104. case Node.COMMENT_NODE:
  105. out.write("<!--");
  106. out.write(encode(child.getNodeValue()));
  107. out.write("-->");
  108. break;
  109. case Node.CDATA_SECTION_NODE:
  110. out.write("<![CDATA[");
  111. out.write(encodedata(((Text) child).getData()));
  112. out.write("]]>");
  113. break;
  114. case Node.ENTITY_REFERENCE_NODE:
  115. out.write('&');
  116. out.write(child.getNodeName());
  117. out.write(';');
  118. break;
  119. case Node.PROCESSING_INSTRUCTION_NODE:
  120. out.write("<?");
  121. out.write(child.getNodeName());
  122. String data = child.getNodeValue();
  123. if (data != null && data.length() > 0) {
  124. out.write(' ');
  125. out.write(data);
  126. }
  127. out.write("?>");
  128. break;
  129. }
  130. }
  131. // If we had child elements, we need to indent before we close
  132. // the element, otherwise we're on the same line and don't need
  133. // to indent
  134. if (hasChildren) {
  135. for (int i = 0; i < indent; i++) {
  136. out.write(indentWith);
  137. }
  138. }
  139. // Write element close
  140. out.write("</");
  141. out.write(element.getTagName());
  142. out.write(">");
  143. out.write(lSep);
  144. out.flush();
  145. }
  146. /**
  147. * Escape <, > & ', " as their entities and
  148. * drop characters that are illegal in XML documents.
  149. */
  150. public String encode(String value) {
  151. StringBuffer sb = new StringBuffer();
  152. int len = value.length();
  153. for (int i = 0; i < len; i++) {
  154. char c = value.charAt(i);
  155. switch (c) {
  156. case '<':
  157. sb.append("<");
  158. break;
  159. case '>':
  160. sb.append(">");
  161. break;
  162. case '\'':
  163. sb.append("'");
  164. break;
  165. case '\"':
  166. sb.append(""");
  167. break;
  168. case '&':
  169. int nextSemi = value.indexOf(";", i);
  170. if (nextSemi < 0
  171. || !isReference(value.substring(i, nextSemi + 1))) {
  172. sb.append("&");
  173. } else {
  174. sb.append('&');
  175. }
  176. break;
  177. default:
  178. if (isLegalCharacter(c)) {
  179. sb.append(c);
  180. }
  181. break;
  182. }
  183. }
  184. return sb.substring(0);
  185. }
  186. /**
  187. * Drop characters that are illegal in XML documents.
  188. *
  189. * <p>Also ensure that we are not including an <code>]]></code>
  190. * marker by replacing that sequence with
  191. * <code>&#x5d;&#x5d;&gt;</code>.</p>
  192. *
  193. * <p>See XML 1.0 2.2 <a
  194. * href="http://www.w3.org/TR/1998/REC-xml-19980210#charsets">http://www.w3.org/TR/1998/REC-xml-19980210#charsets</a> and
  195. * 2.7 <a
  196. * href="http://www.w3.org/TR/1998/REC-xml-19980210#sec-cdata-sect">http://www.w3.org/TR/1998/REC-xml-19980210#sec-cdata-sect</a>.</p>
  197. */
  198. public String encodedata(final String value) {
  199. StringBuffer sb = new StringBuffer();
  200. int len = value.length();
  201. for (int i = 0; i < len; ++i) {
  202. char c = value.charAt(i);
  203. if (isLegalCharacter(c)) {
  204. sb.append(c);
  205. }
  206. }
  207. String result = sb.substring(0);
  208. int cdEnd = result.indexOf("]]>");
  209. while (cdEnd != -1) {
  210. sb.setLength(cdEnd);
  211. sb.append("]]>")
  212. .append(result.substring(cdEnd + 3));
  213. result = sb.substring(0);
  214. cdEnd = result.indexOf("]]>");
  215. }
  216. return result;
  217. }
  218. /**
  219. * Is the given argument a character or entity reference?
  220. */
  221. public boolean isReference(String ent) {
  222. if (!(ent.charAt(0) == '&') || !ent.endsWith(";")) {
  223. return false;
  224. }
  225. if (ent.charAt(1) == '#') {
  226. if (ent.charAt(2) == 'x') {
  227. try {
  228. Integer.parseInt(ent.substring(3, ent.length() - 1), 16);
  229. return true;
  230. } catch (NumberFormatException nfe) {
  231. return false;
  232. }
  233. } else {
  234. try {
  235. Integer.parseInt(ent.substring(2, ent.length() - 1));
  236. return true;
  237. } catch (NumberFormatException nfe) {
  238. return false;
  239. }
  240. }
  241. }
  242. String name = ent.substring(1, ent.length() - 1);
  243. for (int i = 0; i < knownEntities.length; i++) {
  244. if (name.equals(knownEntities[i])) {
  245. return true;
  246. }
  247. }
  248. return false;
  249. }
  250. /**
  251. * Is the given character allowed inside an XML document?
  252. *
  253. * <p>See XML 1.0 2.2 <a
  254. * href="http://www.w3.org/TR/1998/REC-xml-19980210#charsets">
  255. * http://www.w3.org/TR/1998/REC-xml-19980210#charsets</a>.</p>
  256. *
  257. * @since 1.10, Ant 1.5
  258. */
  259. public boolean isLegalCharacter(char c) {
  260. if (c == 0x9 || c == 0xA || c == 0xD) {
  261. return true;
  262. } else if (c < 0x20) {
  263. return false;
  264. } else if (c <= 0xD7FF) {
  265. return true;
  266. } else if (c < 0xE000) {
  267. return false;
  268. } else if (c <= 0xFFFD) {
  269. return true;
  270. }
  271. return false;
  272. }
  273. }