1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. // Sep 14, 2000:
  58. // Fixed serializer to report IO exception directly, instead at
  59. // the end of document processing.
  60. // Reported by Patrick Higgins <phiggins@transzap.com>
  61. package com.sun.org.apache.xml.internal.serialize;
  62. import java.io.IOException;
  63. import org.w3c.dom.Element;
  64. import org.w3c.dom.Node;
  65. import org.xml.sax.AttributeList;
  66. import org.xml.sax.Attributes;
  67. import org.xml.sax.SAXException;
  68. /**
  69. * Implements a text serializer supporting both DOM and SAX
  70. * serializing. For usage instructions see {@link Serializer}.
  71. * <p>
  72. * If an output stream is used, the encoding is taken from the
  73. * output format (defaults to <tt>UTF-8</tt>). If a writer is
  74. * used, make sure the writer uses the same encoding (if applies)
  75. * as specified in the output format.
  76. * <p>
  77. * The serializer supports both DOM and SAX. DOM serializing is done
  78. * by calling {@link #serialize} and SAX serializing is done by firing
  79. * SAX events and using the serializer as a document handler.
  80. * <p>
  81. * If an I/O exception occurs while serializing, the serializer
  82. * will not throw an exception directly, but only throw it
  83. * at the end of serializing (either DOM or SAX's {@link
  84. * org.xml.sax.DocumentHandler#endDocument}.
  85. *
  86. *
  87. * @version $Revision: 1.13 $ $Date: 2003/05/13 13:23:49 $
  88. * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  89. * @see Serializer
  90. */
  91. public class TextSerializer
  92. extends BaseMarkupSerializer
  93. {
  94. /**
  95. * Constructs a new serializer. The serializer cannot be used without
  96. * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
  97. * first.
  98. */
  99. public TextSerializer()
  100. {
  101. super( new OutputFormat( Method.TEXT, null, false ) );
  102. }
  103. public void setOutputFormat( OutputFormat format )
  104. {
  105. super.setOutputFormat( format != null ? format : new OutputFormat( Method.TEXT, null, false ) );
  106. }
  107. //-----------------------------------------//
  108. // SAX content handler serializing methods //
  109. //-----------------------------------------//
  110. public void startElement( String namespaceURI, String localName,
  111. String rawName, Attributes attrs )
  112. throws SAXException
  113. {
  114. startElement( rawName == null ? localName : rawName, null );
  115. }
  116. public void endElement( String namespaceURI, String localName,
  117. String rawName )
  118. throws SAXException
  119. {
  120. endElement( rawName == null ? localName : rawName );
  121. }
  122. //------------------------------------------//
  123. // SAX document handler serializing methods //
  124. //------------------------------000---------//
  125. public void startElement( String tagName, AttributeList attrs )
  126. throws SAXException
  127. {
  128. boolean preserveSpace;
  129. ElementState state;
  130. try {
  131. state = getElementState();
  132. if ( isDocumentState() ) {
  133. // If this is the root element handle it differently.
  134. // If the first root element in the document, serialize
  135. // the document's DOCTYPE. Space preserving defaults
  136. // to that of the output format.
  137. if ( ! _started )
  138. startDocument( tagName );
  139. }
  140. // For any other element, if first in parent, then
  141. // use the parnet's space preserving.
  142. preserveSpace = state.preserveSpace;
  143. // Do not change the current element state yet.
  144. // This only happens in endElement().
  145. // Ignore all other attributes of the element, only printing
  146. // its contents.
  147. // Now it's time to enter a new element state
  148. // with the tag name and space preserving.
  149. // We still do not change the curent element state.
  150. state = enterElementState( null, null, tagName, preserveSpace );
  151. } catch ( IOException except ) {
  152. throw new SAXException( except );
  153. }
  154. }
  155. public void endElement( String tagName )
  156. throws SAXException
  157. {
  158. try {
  159. endElementIO( tagName );
  160. } catch ( IOException except ) {
  161. throw new SAXException( except );
  162. }
  163. }
  164. public void endElementIO( String tagName )
  165. throws IOException
  166. {
  167. ElementState state;
  168. // Works much like content() with additions for closing
  169. // an element. Note the different checks for the closed
  170. // element's state and the parent element's state.
  171. state = getElementState();
  172. // Leave the element state and update that of the parent
  173. // (if we're not root) to not empty and after element.
  174. state = leaveElementState();
  175. state.afterElement = true;
  176. state.empty = false;
  177. if ( isDocumentState() )
  178. _printer.flush();
  179. }
  180. public void processingInstructionIO( String target, String code ) throws IOException
  181. {
  182. }
  183. public void comment( String text )
  184. {
  185. }
  186. public void comment( char[] chars, int start, int length )
  187. {
  188. }
  189. public void characters( char[] chars, int start, int length )
  190. throws SAXException
  191. {
  192. ElementState state;
  193. try {
  194. state = content();
  195. state.doCData = state.inCData = false;
  196. printText( chars, start, length, true, true );
  197. } catch ( IOException except ) {
  198. throw new SAXException( except );
  199. }
  200. }
  201. protected void characters( String text, boolean unescaped )
  202. throws IOException
  203. {
  204. ElementState state;
  205. state = content();
  206. state.doCData = state.inCData = false;
  207. printText( text, true, true );
  208. }
  209. //------------------------------------------//
  210. // Generic node serializing methods methods //
  211. //------------------------------------------//
  212. /**
  213. * Called to serialize the document's DOCTYPE by the root element.
  214. * <p>
  215. * This method will check if it has not been called before ({@link #_started}),
  216. * will serialize the document type declaration, and will serialize all
  217. * pre-root comments and PIs that were accumulated in the document
  218. * (see {@link #serializePreRoot}). Pre-root will be serialized even if
  219. * this is not the first root element of the document.
  220. */
  221. protected void startDocument( String rootTagName )
  222. throws IOException
  223. {
  224. // Required to stop processing the DTD, even though the DTD
  225. // is not printed.
  226. _printer.leaveDTD();
  227. _started = true;
  228. // Always serialize these, even if not te first root element.
  229. serializePreRoot();
  230. }
  231. /**
  232. * Called to serialize a DOM element. Equivalent to calling {@link
  233. * #startElement}, {@link #endElement} and serializing everything
  234. * inbetween, but better optimized.
  235. */
  236. protected void serializeElement( Element elem )
  237. throws IOException
  238. {
  239. Node child;
  240. ElementState state;
  241. boolean preserveSpace;
  242. String tagName;
  243. tagName = elem.getTagName();
  244. state = getElementState();
  245. if ( isDocumentState() ) {
  246. // If this is the root element handle it differently.
  247. // If the first root element in the document, serialize
  248. // the document's DOCTYPE. Space preserving defaults
  249. // to that of the output format.
  250. if ( ! _started )
  251. startDocument( tagName );
  252. }
  253. // For any other element, if first in parent, then
  254. // use the parnet's space preserving.
  255. preserveSpace = state.preserveSpace;
  256. // Do not change the current element state yet.
  257. // This only happens in endElement().
  258. // Ignore all other attributes of the element, only printing
  259. // its contents.
  260. // If element has children, then serialize them, otherwise
  261. // serialize en empty tag.
  262. if ( elem.hasChildNodes() ) {
  263. // Enter an element state, and serialize the children
  264. // one by one. Finally, end the element.
  265. state = enterElementState( null, null, tagName, preserveSpace );
  266. child = elem.getFirstChild();
  267. while ( child != null ) {
  268. serializeNode( child );
  269. child = child.getNextSibling();
  270. }
  271. endElementIO( tagName );
  272. } else {
  273. if ( ! isDocumentState() ) {
  274. // After element but parent element is no longer empty.
  275. state.afterElement = true;
  276. state.empty = false;
  277. }
  278. }
  279. }
  280. /**
  281. * Serialize the DOM node. This method is unique to the Text serializer.
  282. *
  283. * @param node The node to serialize
  284. */
  285. protected void serializeNode( Node node )
  286. throws IOException
  287. {
  288. // Based on the node type call the suitable SAX handler.
  289. // Only comments entities and documents which are not
  290. // handled by SAX are serialized directly.
  291. switch ( node.getNodeType() ) {
  292. case Node.TEXT_NODE : {
  293. String text;
  294. text = node.getNodeValue();
  295. if ( text != null )
  296. characters( node.getNodeValue(), true );
  297. break;
  298. }
  299. case Node.CDATA_SECTION_NODE : {
  300. String text;
  301. text = node.getNodeValue();
  302. if ( text != null )
  303. characters( node.getNodeValue(), true );
  304. break;
  305. }
  306. case Node.COMMENT_NODE :
  307. break;
  308. case Node.ENTITY_REFERENCE_NODE :
  309. // Ignore.
  310. break;
  311. case Node.PROCESSING_INSTRUCTION_NODE :
  312. break;
  313. case Node.ELEMENT_NODE :
  314. serializeElement( (Element) node );
  315. break;
  316. case Node.DOCUMENT_NODE :
  317. // !!! Fall through
  318. case Node.DOCUMENT_FRAGMENT_NODE : {
  319. Node child;
  320. // By definition this will happen if the node is a document,
  321. // document fragment, etc. Just serialize its contents. It will
  322. // work well for other nodes that we do not know how to serialize.
  323. child = node.getFirstChild();
  324. while ( child != null ) {
  325. serializeNode( child );
  326. child = child.getNextSibling();
  327. }
  328. break;
  329. }
  330. default:
  331. break;
  332. }
  333. }
  334. protected ElementState content(boolean ignorable)
  335. {
  336. ElementState state;
  337. state = getElementState();
  338. if ( ! isDocumentState() ) {
  339. // If this is the first content in the element,
  340. // change the state to not-empty.
  341. if ( state.empty )
  342. state.empty = false;
  343. // Except for one content type, all of them
  344. // are not last element. That one content
  345. // type will take care of itself.
  346. state.afterElement = false;
  347. }
  348. return state;
  349. }
  350. protected String getEntityRef( int ch )
  351. {
  352. return null;
  353. }
  354. }