1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. // Sep 14, 2000:
  58. // Fixed comments to preserve whitespaces and add a line break
  59. // when indenting. Reported by Gervase Markham <gerv@gerv.net>
  60. // Sep 14, 2000:
  61. // Fixed serializer to report IO exception directly, instead at
  62. // the end of document processing.
  63. // Reported by Patrick Higgins <phiggins@transzap.com>
  64. // Sep 13, 2000:
  65. // CR in character data will print as �D;
  66. // Aug 25, 2000:
  67. // Fixed processing instruction printing inside element content
  68. // to not escape content. Reported by Mikael Staldal
  69. // <d96-mst@d.kth.se>
  70. // Aug 25, 2000:
  71. // Added ability to omit comments.
  72. // Contributed by Anupam Bagchi <abagchi@jtcsv.com>
  73. // Aug 26, 2000:
  74. // Fixed bug in newline handling when preserving spaces.
  75. // Contributed by Mike Dusseault <mdusseault@home.com>
  76. // Aug 29, 2000:
  77. // Fixed state.unescaped not being set to false when
  78. // entering element state.
  79. // Reported by Lowell Vaughn <lvaughn@agillion.com>
  80. package com.sun.org.apache.xml.internal.serialize;
  81. import java.io.IOException;
  82. import java.io.OutputStream;
  83. import java.io.Writer;
  84. import java.util.Hashtable;
  85. import java.util.Vector;
  86. import com.sun.org.apache.xerces.internal.dom.DOMErrorImpl;
  87. import com.sun.org.apache.xerces.internal.dom.DOMLocatorImpl;
  88. import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
  89. import com.sun.org.apache.xerces.internal.util.XMLChar;
  90. import org.w3c.dom.DOMImplementation;
  91. import org.w3c.dom.Document;
  92. import org.w3c.dom.DocumentFragment;
  93. import org.w3c.dom.DocumentType;
  94. import org.w3c.dom.DOMError;
  95. import org.w3c.dom.DOMErrorHandler;
  96. import org.w3c.dom.Element;
  97. import org.w3c.dom.Entity;
  98. import org.w3c.dom.NamedNodeMap;
  99. import org.w3c.dom.Node;
  100. import org.w3c.dom.Notation;
  101. import org.w3c.dom.ls.LSSerializerFilter;
  102. import org.w3c.dom.traversal.NodeFilter;
  103. import org.xml.sax.ContentHandler;
  104. import org.xml.sax.DTDHandler;
  105. import org.xml.sax.DocumentHandler;
  106. import org.xml.sax.Locator;
  107. import org.xml.sax.SAXException;
  108. import org.xml.sax.ext.DeclHandler;
  109. import org.xml.sax.ext.LexicalHandler;
  110. /**
  111. * Base class for a serializer supporting both DOM and SAX pretty
  112. * serializing of XML/HTML/XHTML documents. Derives classes perform
  113. * the method-specific serializing, this class provides the common
  114. * serializing mechanisms.
  115. * <p>
  116. * The serializer must be initialized with the proper writer and
  117. * output format before it can be used by calling {@link #setOutputCharStream}
  118. * or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat}
  119. * for the output format.
  120. * <p>
  121. * The serializer can be reused any number of times, but cannot
  122. * be used concurrently by two threads.
  123. * <p>
  124. * If an output stream is used, the encoding is taken from the
  125. * output format (defaults to <tt>UTF-8</tt>). If a writer is
  126. * used, make sure the writer uses the same encoding (if applies)
  127. * as specified in the output format.
  128. * <p>
  129. * The serializer supports both DOM and SAX. DOM serializing is done
  130. * by calling {@link #serialize(Document)} and SAX serializing is done by firing
  131. * SAX events and using the serializer as a document handler.
  132. * This also applies to derived class.
  133. * <p>
  134. * If an I/O exception occurs while serializing, the serializer
  135. * will not throw an exception directly, but only throw it
  136. * at the end of serializing (either DOM or SAX's {@link
  137. * org.xml.sax.DocumentHandler#endDocument}.
  138. * <p>
  139. * For elements that are not specified as whitespace preserving,
  140. * the serializer will potentially break long text lines at space
  141. * boundaries, indent lines, and serialize elements on separate
  142. * lines. Line terminators will be regarded as spaces, and
  143. * spaces at beginning of line will be stripped.
  144. * <p>
  145. * When indenting, the serializer is capable of detecting seemingly
  146. * element content, and serializing these elements indented on separate
  147. * lines. An element is serialized indented when it is the first or
  148. * last child of an element, or immediate following or preceding
  149. * another element.
  150. *
  151. *
  152. * @version $Revision: 1.51 $ $Date: 2004/02/12 16:56:07 $
  153. * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  154. * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
  155. * @author Elena Litani, IBM
  156. * @see Serializer
  157. * @see LSSerializer
  158. */
  159. public abstract class BaseMarkupSerializer
  160. implements ContentHandler, DocumentHandler, LexicalHandler,
  161. DTDHandler, DeclHandler, DOMSerializer, Serializer
  162. {
  163. // DOM L3 implementation
  164. protected short features = 0xFFFFFFFF;
  165. protected DOMErrorHandler fDOMErrorHandler;
  166. protected final DOMErrorImpl fDOMError = new DOMErrorImpl();
  167. protected LSSerializerFilter fDOMFilter;
  168. protected EncodingInfo _encodingInfo;
  169. /**
  170. * Holds array of all element states that have been entered.
  171. * The array is automatically resized. When leaving an element,
  172. * it's state is not removed but reused when later returning
  173. * to the same nesting level.
  174. */
  175. private ElementState[] _elementStates;
  176. /**
  177. * The index of the next state to place in the array,
  178. * or one plus the index of the current state. When zero,
  179. * we are in no state.
  180. */
  181. private int _elementStateCount;
  182. /**
  183. * Vector holding comments and PIs that come before the root
  184. * element (even after it), see {@link #serializePreRoot}.
  185. */
  186. private Vector _preRoot;
  187. /**
  188. * If the document has been started (header serialized), this
  189. * flag is set to true so it's not started twice.
  190. */
  191. protected boolean _started;
  192. /**
  193. * True if the serializer has been prepared. This flag is set
  194. * to false when the serializer is reset prior to using it,
  195. * and to true after it has been prepared for usage.
  196. */
  197. private boolean _prepared;
  198. /**
  199. * Association between namespace URIs (keys) and prefixes (values).
  200. * Accumulated here prior to starting an element and placing this
  201. * list in the element state.
  202. */
  203. protected Hashtable _prefixes;
  204. /**
  205. * The system identifier of the document type, if known.
  206. */
  207. protected String _docTypePublicId;
  208. /**
  209. * The system identifier of the document type, if known.
  210. */
  211. protected String _docTypeSystemId;
  212. /**
  213. * The output format associated with this serializer. This will never
  214. * be a null reference. If no format was passed to the constructor,
  215. * the default one for this document type will be used. The format
  216. * object is never changed by the serializer.
  217. */
  218. protected OutputFormat _format;
  219. /**
  220. * The printer used for printing text parts.
  221. */
  222. protected Printer _printer;
  223. /**
  224. * True if indenting printer.
  225. */
  226. protected boolean _indenting;
  227. /** Temporary buffer to store character data */
  228. protected final StringBuffer fStrBuffer = new StringBuffer(40);
  229. /**
  230. * The underlying writer.
  231. */
  232. private Writer _writer;
  233. /**
  234. * The output stream.
  235. */
  236. private OutputStream _output;
  237. /** Current node that is being processed */
  238. protected Node fCurrentNode = null;
  239. //--------------------------------//
  240. // Constructor and initialization //
  241. //--------------------------------//
  242. /**
  243. * Protected constructor can only be used by derived class.
  244. * Must initialize the serializer before serializing any document,
  245. * by calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
  246. * first
  247. */
  248. protected BaseMarkupSerializer( OutputFormat format )
  249. {
  250. int i;
  251. _elementStates = new ElementState[ 10 ];
  252. for ( i = 0 ; i < _elementStates.length ; ++i )
  253. _elementStates[ i ] = new ElementState();
  254. _format = format;
  255. }
  256. public DocumentHandler asDocumentHandler()
  257. throws IOException
  258. {
  259. prepare();
  260. return this;
  261. }
  262. public ContentHandler asContentHandler()
  263. throws IOException
  264. {
  265. prepare();
  266. return this;
  267. }
  268. public DOMSerializer asDOMSerializer()
  269. throws IOException
  270. {
  271. prepare();
  272. return this;
  273. }
  274. public void setOutputByteStream( OutputStream output )
  275. {
  276. if ( output == null ) {
  277. String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
  278. "ArgumentIsNull", new Object[]{"output"});
  279. throw new NullPointerException(msg);
  280. }
  281. _output = output;
  282. _writer = null;
  283. reset();
  284. }
  285. public void setOutputCharStream( Writer writer )
  286. {
  287. if ( writer == null ) {
  288. String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
  289. "ArgumentIsNull", new Object[]{"writer"});
  290. throw new NullPointerException(msg);
  291. }
  292. _writer = writer;
  293. _output = null;
  294. reset();
  295. }
  296. public void setOutputFormat( OutputFormat format )
  297. {
  298. if ( format == null ) {
  299. String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
  300. "ArgumentIsNull", new Object[]{"format"});
  301. throw new NullPointerException(msg);
  302. }
  303. _format = format;
  304. reset();
  305. }
  306. public boolean reset()
  307. {
  308. if ( _elementStateCount > 1 ) {
  309. String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
  310. "ResetInMiddle", null);
  311. throw new IllegalStateException(msg);
  312. }
  313. _prepared = false;
  314. fCurrentNode = null;
  315. fStrBuffer.setLength(0);
  316. return true;
  317. }
  318. protected void prepare()
  319. throws IOException
  320. {
  321. if ( _prepared )
  322. return;
  323. if ( _writer == null && _output == null ) {
  324. String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
  325. "NoWriterSupplied", null);
  326. throw new IOException(msg);
  327. }
  328. // If the output stream has been set, use it to construct
  329. // the writer. It is possible that the serializer has been
  330. // reused with the same output stream and different encoding.
  331. _encodingInfo = _format.getEncodingInfo();
  332. if ( _output != null ) {
  333. _writer = _encodingInfo.getWriter(_output);
  334. }
  335. if ( _format.getIndenting() ) {
  336. _indenting = true;
  337. _printer = new IndentPrinter( _writer, _format );
  338. } else {
  339. _indenting = false;
  340. _printer = new Printer( _writer, _format );
  341. }
  342. ElementState state;
  343. _elementStateCount = 0;
  344. state = _elementStates[ 0 ];
  345. state.namespaceURI = null;
  346. state.localName = null;
  347. state.rawName = null;
  348. state.preserveSpace = _format.getPreserveSpace();
  349. state.empty = true;
  350. state.afterElement = false;
  351. state.afterComment = false;
  352. state.doCData = state.inCData = false;
  353. state.prefixes = null;
  354. _docTypePublicId = _format.getDoctypePublic();
  355. _docTypeSystemId = _format.getDoctypeSystem();
  356. _started = false;
  357. _prepared = true;
  358. }
  359. //----------------------------------//
  360. // DOM document serializing methods //
  361. //----------------------------------//
  362. /**
  363. * Serializes the DOM element using the previously specified
  364. * writer and output format. Throws an exception only if
  365. * an I/O exception occured while serializing.
  366. *
  367. * @param elem The element to serialize
  368. * @throws IOException An I/O exception occured while
  369. * serializing
  370. */
  371. public void serialize( Element elem )
  372. throws IOException
  373. {
  374. reset();
  375. prepare();
  376. serializeNode( elem );
  377. _printer.flush();
  378. if ( _printer.getException() != null )
  379. throw _printer.getException();
  380. }
  381. /**
  382. * Serializes the DOM document fragmnt using the previously specified
  383. * writer and output format. Throws an exception only if
  384. * an I/O exception occured while serializing.
  385. *
  386. * @param elem The element to serialize
  387. * @throws IOException An I/O exception occured while
  388. * serializing
  389. */
  390. public void serialize( DocumentFragment frag )
  391. throws IOException
  392. {
  393. reset();
  394. prepare();
  395. serializeNode( frag );
  396. _printer.flush();
  397. if ( _printer.getException() != null )
  398. throw _printer.getException();
  399. }
  400. /**
  401. * Serializes the DOM document using the previously specified
  402. * writer and output format. Throws an exception only if
  403. * an I/O exception occured while serializing.
  404. *
  405. * @param doc The document to serialize
  406. * @throws IOException An I/O exception occured while
  407. * serializing
  408. */
  409. public void serialize( Document doc )
  410. throws IOException
  411. {
  412. reset();
  413. prepare();
  414. serializeNode( doc );
  415. serializePreRoot();
  416. _printer.flush();
  417. if ( _printer.getException() != null )
  418. throw _printer.getException();
  419. }
  420. //------------------------------------------//
  421. // SAX document handler serializing methods //
  422. //------------------------------------------//
  423. public void startDocument()
  424. throws SAXException
  425. {
  426. try {
  427. prepare();
  428. } catch ( IOException except ) {
  429. throw new SAXException( except.toString() );
  430. }
  431. // Nothing to do here. All the magic happens in startDocument(String)
  432. }
  433. public void characters( char[] chars, int start, int length )
  434. throws SAXException
  435. {
  436. ElementState state;
  437. try {
  438. state = content();
  439. // Check if text should be print as CDATA section or unescaped
  440. // based on elements listed in the output format (the element
  441. // state) or whether we are inside a CDATA section or entity.
  442. if ( state.inCData || state.doCData ) {
  443. int saveIndent;
  444. // Print a CDATA section. The text is not escaped, but ']]>'
  445. // appearing in the code must be identified and dealt with.
  446. // The contents of a text node is considered space preserving.
  447. if ( ! state.inCData ) {
  448. _printer.printText( "<![CDATA[" );
  449. state.inCData = true;
  450. }
  451. saveIndent = _printer.getNextIndent();
  452. _printer.setNextIndent( 0 );
  453. char ch;
  454. for ( int index = start ; index < length ; ++index ) {
  455. ch = chars[index];
  456. if ( ch == ']' && index + 2 < length &&
  457. chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
  458. _printer.printText("]]]]><![CDATA[>");
  459. index +=2;
  460. continue;
  461. }
  462. if (!XMLChar.isValid(ch)) {
  463. // check if it is surrogate
  464. if (++index <length) {
  465. surrogates(ch, chars[index]);
  466. }
  467. else {
  468. fatalError("The character '"+(char)ch+"' is an invalid XML character");
  469. }
  470. continue;
  471. } else {
  472. if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0xF7 ) ||
  473. ch == '\n' || ch == '\r' || ch == '\t' ) {
  474. _printer.printText((char)ch);
  475. } else {
  476. // The character is not printable -- split CDATA section
  477. _printer.printText("]]>&#x");
  478. _printer.printText(Integer.toHexString(ch));
  479. _printer.printText(";<![CDATA[");
  480. }
  481. }
  482. }
  483. _printer.setNextIndent( saveIndent );
  484. } else {
  485. int saveIndent;
  486. if ( state.preserveSpace ) {
  487. // If preserving space then hold of indentation so no
  488. // excessive spaces are printed at line breaks, escape
  489. // the text content without replacing spaces and print
  490. // the text breaking only at line breaks.
  491. saveIndent = _printer.getNextIndent();
  492. _printer.setNextIndent( 0 );
  493. printText( chars, start, length, true, state.unescaped );
  494. _printer.setNextIndent( saveIndent );
  495. } else {
  496. printText( chars, start, length, false, state.unescaped );
  497. }
  498. }
  499. } catch ( IOException except ) {
  500. throw new SAXException( except );
  501. }
  502. }
  503. public void ignorableWhitespace( char[] chars, int start, int length )
  504. throws SAXException
  505. {
  506. int i;
  507. try {
  508. content();
  509. // Print ignorable whitespaces only when indenting, after
  510. // all they are indentation. Cancel the indentation to
  511. // not indent twice.
  512. if ( _indenting ) {
  513. _printer.setThisIndent( 0 );
  514. for ( i = start ; length-- > 0 ; ++i )
  515. _printer.printText( chars[ i ] );
  516. }
  517. } catch ( IOException except ) {
  518. throw new SAXException( except );
  519. }
  520. }
  521. public final void processingInstruction( String target, String code )
  522. throws SAXException
  523. {
  524. try {
  525. processingInstructionIO( target, code );
  526. } catch ( IOException except ) {
  527. throw new SAXException( except );
  528. }
  529. }
  530. public void processingInstructionIO( String target, String code )
  531. throws IOException
  532. {
  533. int index;
  534. ElementState state;
  535. state = content();
  536. // Create the processing instruction textual representation.
  537. // Make sure we don't have '?>' inside either target or code.
  538. index = target.indexOf( "?>" );
  539. if ( index >= 0 )
  540. fStrBuffer.append( "<?" ).append( target.substring( 0, index ) );
  541. else
  542. fStrBuffer.append( "<?" ).append( target );
  543. if ( code != null ) {
  544. fStrBuffer.append( ' ' );
  545. index = code.indexOf( "?>" );
  546. if ( index >= 0 )
  547. fStrBuffer.append( code.substring( 0, index ) );
  548. else
  549. fStrBuffer.append( code );
  550. }
  551. fStrBuffer.append( "?>" );
  552. // If before the root element (or after it), do not print
  553. // the PI directly but place it in the pre-root vector.
  554. if ( isDocumentState() ) {
  555. if ( _preRoot == null )
  556. _preRoot = new Vector();
  557. _preRoot.addElement( fStrBuffer.toString() );
  558. } else {
  559. _printer.indent();
  560. printText( fStrBuffer.toString(), true, true );
  561. _printer.unindent();
  562. if ( _indenting )
  563. state.afterElement = true;
  564. }
  565. fStrBuffer.setLength(0);
  566. }
  567. public void comment( char[] chars, int start, int length )
  568. throws SAXException
  569. {
  570. try {
  571. comment( new String( chars, start, length ) );
  572. } catch ( IOException except ) {
  573. throw new SAXException( except );
  574. }
  575. }
  576. public void comment( String text )
  577. throws IOException
  578. {
  579. int index;
  580. ElementState state;
  581. if ( _format.getOmitComments() )
  582. return;
  583. state = content();
  584. // Create the processing comment textual representation.
  585. // Make sure we don't have '-->' inside the comment.
  586. index = text.indexOf( "-->" );
  587. if ( index >= 0 )
  588. fStrBuffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" );
  589. else
  590. fStrBuffer.append( "<!--" ).append( text ).append( "-->" );
  591. // If before the root element (or after it), do not print
  592. // the comment directly but place it in the pre-root vector.
  593. if ( isDocumentState() ) {
  594. if ( _preRoot == null )
  595. _preRoot = new Vector();
  596. _preRoot.addElement( fStrBuffer.toString() );
  597. } else {
  598. // Indent this element on a new line if the first
  599. // content of the parent element or immediately
  600. // following an element.
  601. if ( _indenting && ! state.preserveSpace)
  602. _printer.breakLine();
  603. _printer.indent();
  604. printText( fStrBuffer.toString(), true, true );
  605. _printer.unindent();
  606. if ( _indenting )
  607. state.afterElement = true;
  608. }
  609. fStrBuffer.setLength(0);
  610. state.afterComment = true;
  611. state.afterElement = false;
  612. }
  613. public void startCDATA()
  614. {
  615. ElementState state;
  616. state = getElementState();
  617. state.doCData = true;
  618. }
  619. public void endCDATA()
  620. {
  621. ElementState state;
  622. state = getElementState();
  623. state.doCData = false;
  624. }
  625. public void startNonEscaping()
  626. {
  627. ElementState state;
  628. state = getElementState();
  629. state.unescaped = true;
  630. }
  631. public void endNonEscaping()
  632. {
  633. ElementState state;
  634. state = getElementState();
  635. state.unescaped = false;
  636. }
  637. public void startPreserving()
  638. {
  639. ElementState state;
  640. state = getElementState();
  641. state.preserveSpace = true;
  642. }
  643. public void endPreserving()
  644. {
  645. ElementState state;
  646. state = getElementState();
  647. state.preserveSpace = false;
  648. }
  649. /**
  650. * Called at the end of the document to wrap it up.
  651. * Will flush the output stream and throw an exception
  652. * if any I/O error occured while serializing.
  653. *
  654. * @throws SAXException An I/O exception occured during
  655. * serializing
  656. */
  657. public void endDocument()
  658. throws SAXException
  659. {
  660. try {
  661. // Print all the elements accumulated outside of
  662. // the root element.
  663. serializePreRoot();
  664. // Flush the output, this is necessary for fStrBuffered output.
  665. _printer.flush();
  666. } catch ( IOException except ) {
  667. throw new SAXException( except );
  668. }
  669. }
  670. public void startEntity( String name )
  671. {
  672. // ???
  673. }
  674. public void endEntity( String name )
  675. {
  676. // ???
  677. }
  678. public void setDocumentLocator( Locator locator )
  679. {
  680. // Nothing to do
  681. }
  682. //-----------------------------------------//
  683. // SAX content handler serializing methods //
  684. //-----------------------------------------//
  685. public void skippedEntity ( String name )
  686. throws SAXException
  687. {
  688. try {
  689. endCDATA();
  690. content();
  691. _printer.printText( '&' );
  692. _printer.printText( name );
  693. _printer.printText( ';' );
  694. } catch ( IOException except ) {
  695. throw new SAXException( except );
  696. }
  697. }
  698. public void startPrefixMapping( String prefix, String uri )
  699. throws SAXException
  700. {
  701. if ( _prefixes == null )
  702. _prefixes = new Hashtable();
  703. _prefixes.put( uri, prefix == null ? "" : prefix );
  704. }
  705. public void endPrefixMapping( String prefix )
  706. throws SAXException
  707. {
  708. }
  709. //------------------------------------------//
  710. // SAX DTD/Decl handler serializing methods //
  711. //------------------------------------------//
  712. public final void startDTD( String name, String publicId, String systemId )
  713. throws SAXException
  714. {
  715. try {
  716. _printer.enterDTD();
  717. _docTypePublicId = publicId;
  718. _docTypeSystemId = systemId;
  719. } catch ( IOException except ) {
  720. throw new SAXException( except );
  721. }
  722. }
  723. public void endDTD()
  724. {
  725. // Nothing to do here, all the magic occurs in startDocument(String).
  726. }
  727. public void elementDecl( String name, String model )
  728. throws SAXException
  729. {
  730. try {
  731. _printer.enterDTD();
  732. _printer.printText( "<!ELEMENT " );
  733. _printer.printText( name );
  734. _printer.printText( ' ' );
  735. _printer.printText( model );
  736. _printer.printText( '>' );
  737. if ( _indenting )
  738. _printer.breakLine();
  739. } catch ( IOException except ) {
  740. throw new SAXException( except );
  741. }
  742. }
  743. public void attributeDecl( String eName, String aName, String type,
  744. String valueDefault, String value )
  745. throws SAXException
  746. {
  747. try {
  748. _printer.enterDTD();
  749. _printer.printText( "<!ATTLIST " );
  750. _printer.printText( eName );
  751. _printer.printText( ' ' );
  752. _printer.printText( aName );
  753. _printer.printText( ' ' );
  754. _printer.printText( type );
  755. if ( valueDefault != null ) {
  756. _printer.printText( ' ' );
  757. _printer.printText( valueDefault );
  758. }
  759. if ( value != null ) {
  760. _printer.printText( " \"" );
  761. printEscaped( value );
  762. _printer.printText( '"' );
  763. }
  764. _printer.printText( '>' );
  765. if ( _indenting )
  766. _printer.breakLine();
  767. } catch ( IOException except ) {
  768. throw new SAXException( except );
  769. }
  770. }
  771. public void internalEntityDecl( String name, String value )
  772. throws SAXException
  773. {
  774. try {
  775. _printer.enterDTD();
  776. _printer.printText( "<!ENTITY " );
  777. _printer.printText( name );
  778. _printer.printText( " \"" );
  779. printEscaped( value );
  780. _printer.printText( "\">" );
  781. if ( _indenting )
  782. _printer.breakLine();
  783. } catch ( IOException except ) {
  784. throw new SAXException( except );
  785. }
  786. }
  787. public void externalEntityDecl( String name, String publicId, String systemId )
  788. throws SAXException
  789. {
  790. try {
  791. _printer.enterDTD();
  792. unparsedEntityDecl( name, publicId, systemId, null );
  793. } catch ( IOException except ) {
  794. throw new SAXException( except );
  795. }
  796. }
  797. public void unparsedEntityDecl( String name, String publicId,
  798. String systemId, String notationName )
  799. throws SAXException
  800. {
  801. try {
  802. _printer.enterDTD();
  803. if ( publicId == null ) {
  804. _printer.printText( "<!ENTITY " );
  805. _printer.printText( name );
  806. _printer.printText( " SYSTEM " );
  807. printDoctypeURL( systemId );
  808. } else {
  809. _printer.printText( "<!ENTITY " );
  810. _printer.printText( name );
  811. _printer.printText( " PUBLIC " );
  812. printDoctypeURL( publicId );
  813. _printer.printText( ' ' );
  814. printDoctypeURL( systemId );
  815. }
  816. if ( notationName != null ) {
  817. _printer.printText( " NDATA " );
  818. _printer.printText( notationName );
  819. }
  820. _printer.printText( '>' );
  821. if ( _indenting )
  822. _printer.breakLine();
  823. } catch ( IOException except ) {
  824. throw new SAXException( except );
  825. }
  826. }
  827. public void notationDecl( String name, String publicId, String systemId )
  828. throws SAXException
  829. {
  830. try {
  831. _printer.enterDTD();
  832. if ( publicId != null ) {
  833. _printer.printText( "<!NOTATION " );
  834. _printer.printText( name );
  835. _printer.printText( " PUBLIC " );
  836. printDoctypeURL( publicId );
  837. if ( systemId != null ) {
  838. _printer.printText( ' ' );
  839. printDoctypeURL( systemId );
  840. }
  841. } else {
  842. _printer.printText( "<!NOTATION " );
  843. _printer.printText( name );
  844. _printer.printText( " SYSTEM " );
  845. printDoctypeURL( systemId );
  846. }
  847. _printer.printText( '>' );
  848. if ( _indenting )
  849. _printer.breakLine();
  850. } catch ( IOException except ) {
  851. throw new SAXException( except );
  852. }
  853. }
  854. //------------------------------------------//
  855. // Generic node serializing methods methods //
  856. //------------------------------------------//
  857. /**
  858. * Serialize the DOM node. This method is shared across XML, HTML and XHTML
  859. * serializers and the differences are masked out in a separate {@link
  860. * #serializeElement}.
  861. *
  862. * @param node The node to serialize
  863. * @see #serializeElement
  864. * @throws IOException An I/O exception occured while
  865. * serializing
  866. */
  867. protected void serializeNode( Node node )
  868. throws IOException
  869. {
  870. fCurrentNode = node;
  871. // Based on the node type call the suitable SAX handler.
  872. // Only comments entities and documents which are not
  873. // handled by SAX are serialized directly.
  874. switch ( node.getNodeType() ) {
  875. case Node.TEXT_NODE : {
  876. String text;
  877. text = node.getNodeValue();
  878. if ( text != null ) {
  879. if (fDOMFilter !=null &&
  880. (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_TEXT)!= 0) {
  881. short code = fDOMFilter.acceptNode(node);
  882. switch (code) {
  883. case NodeFilter.FILTER_REJECT:
  884. case NodeFilter.FILTER_SKIP: {
  885. break;
  886. }
  887. default: {
  888. characters(text);
  889. }
  890. }
  891. }
  892. else if ( !_indenting || getElementState().preserveSpace
  893. || (text.replace('\n',' ').trim().length() != 0))
  894. characters( text );
  895. }
  896. break;
  897. }
  898. case Node.CDATA_SECTION_NODE : {
  899. String text = node.getNodeValue();
  900. if ((features & DOMSerializerImpl.CDATA) != 0) {
  901. if (text != null) {
  902. if (fDOMFilter != null
  903. && (fDOMFilter.getWhatToShow()
  904. & NodeFilter.SHOW_CDATA_SECTION)
  905. != 0) {
  906. short code = fDOMFilter.acceptNode(node);
  907. switch (code) {
  908. case NodeFilter.FILTER_REJECT :
  909. case NodeFilter.FILTER_SKIP :
  910. {
  911. // skip the CDATA node
  912. return;
  913. }
  914. default :
  915. {
  916. //fall through..
  917. }
  918. }
  919. }
  920. startCDATA();
  921. characters(text);
  922. endCDATA();
  923. }
  924. } else {
  925. // transform into a text node
  926. characters(text);
  927. }
  928. break;
  929. }
  930. case Node.COMMENT_NODE : {
  931. String text;
  932. if ( ! _format.getOmitComments() ) {
  933. text = node.getNodeValue();
  934. if ( text != null ) {
  935. if (fDOMFilter !=null &&
  936. (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_COMMENT)!= 0) {
  937. short code = fDOMFilter.acceptNode(node);
  938. switch (code) {
  939. case NodeFilter.FILTER_REJECT:
  940. case NodeFilter.FILTER_SKIP: {
  941. // skip the comment node
  942. return;
  943. }
  944. default: {
  945. // fall through
  946. }
  947. }
  948. }
  949. comment( text );
  950. }
  951. }
  952. break;
  953. }
  954. case Node.ENTITY_REFERENCE_NODE : {
  955. Node child;
  956. endCDATA();
  957. content();
  958. if (((features & DOMSerializerImpl.ENTITIES) != 0)
  959. || (node.getFirstChild() == null)) {
  960. if (fDOMFilter !=null &&
  961. (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE)!= 0) {
  962. short code = fDOMFilter.acceptNode(node);
  963. switch (code) {
  964. case NodeFilter.FILTER_REJECT:{
  965. return; // remove the node
  966. }
  967. case NodeFilter.FILTER_SKIP: {
  968. child = node.getFirstChild();
  969. while ( child != null ) {
  970. serializeNode( child );
  971. child = child.getNextSibling();
  972. }
  973. return;
  974. }
  975. default: {
  976. // fall through
  977. }
  978. }
  979. }
  980. checkUnboundNamespacePrefixedNode(node);
  981. _printer.printText("&");
  982. _printer.printText(node.getNodeName());
  983. _printer.printText(";");
  984. }
  985. else {
  986. child = node.getFirstChild();
  987. while ( child != null ) {
  988. serializeNode( child );
  989. child = child.getNextSibling();
  990. }
  991. }
  992. break;
  993. }
  994. case Node.PROCESSING_INSTRUCTION_NODE : {
  995. if (fDOMFilter !=null &&
  996. (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_PROCESSING_INSTRUCTION)!= 0) {
  997. short code = fDOMFilter.acceptNode(node);
  998. switch (code) {
  999. case NodeFilter.FILTER_REJECT:
  1000. case NodeFilter.FILTER_SKIP: {
  1001. return; // skip this node
  1002. }
  1003. default: { // fall through
  1004. }
  1005. }
  1006. }
  1007. processingInstructionIO( node.getNodeName(), node.getNodeValue() );
  1008. break;
  1009. }
  1010. case Node.ELEMENT_NODE : {
  1011. if (fDOMFilter !=null &&
  1012. (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ELEMENT)!= 0) {
  1013. short code = fDOMFilter.acceptNode(node);
  1014. switch (code) {
  1015. case NodeFilter.FILTER_REJECT: {
  1016. return;
  1017. }
  1018. case NodeFilter.FILTER_SKIP: {
  1019. Node child = node.getFirstChild();
  1020. while ( child != null ) {
  1021. serializeNode( child );
  1022. child = child.getNextSibling();
  1023. }
  1024. return; // skip this node
  1025. }
  1026. default: { // fall through
  1027. }
  1028. }
  1029. }
  1030. serializeElement( (Element) node );
  1031. break;
  1032. }
  1033. case Node.DOCUMENT_NODE : {
  1034. DocumentType docType;
  1035. DOMImplementation domImpl;
  1036. NamedNodeMap map;
  1037. Entity entity;
  1038. Notation notation;
  1039. int i;
  1040. // If there is a document type, use the SAX events to
  1041. // serialize it.
  1042. docType = ( (Document) node ).getDoctype();
  1043. if (docType != null) {
  1044. // DOM Level 2 (or higher)
  1045. domImpl = ( (Document) node ).getImplementation();
  1046. try {
  1047. String internal;
  1048. _printer.enterDTD();
  1049. _docTypePublicId = docType.getPublicId();
  1050. _docTypeSystemId = docType.getSystemId();
  1051. internal = docType.getInternalSubset();
  1052. if ( internal != null && internal.length() > 0 )
  1053. _printer.printText( internal );
  1054. endDTD();
  1055. }
  1056. // DOM Level 1 -- does implementation have methods?
  1057. catch (NoSuchMethodError nsme) {
  1058. Class docTypeClass = docType.getClass();
  1059. String docTypePublicId = null;
  1060. String docTypeSystemId = null;
  1061. try {
  1062. java.lang.reflect.Method getPublicId = docTypeClass.getMethod("getPublicId", null);
  1063. if (getPublicId.getReturnType().equals(String.class)) {
  1064. docTypePublicId = (String)getPublicId.invoke(docType, null);
  1065. }
  1066. }
  1067. catch (Exception e) {
  1068. // ignore
  1069. }
  1070. try {
  1071. java.lang.reflect.Method getSystemId = docTypeClass.getMethod("getSystemId", null);
  1072. if (getSystemId.getReturnType().equals(String.class)) {
  1073. docTypeSystemId = (String)getSystemId.invoke(docType, null);
  1074. }
  1075. }
  1076. catch (Exception e) {
  1077. // ignore
  1078. }
  1079. _printer.enterDTD();
  1080. _docTypePublicId = docTypePublicId;
  1081. _docTypeSystemId = docTypeSystemId;
  1082. endDTD();
  1083. }
  1084. }
  1085. // !! Fall through
  1086. }
  1087. case Node.DOCUMENT_FRAGMENT_NODE : {
  1088. Node child;
  1089. // By definition this will happen if the node is a document,
  1090. // document fragment, etc. Just serialize its contents. It will
  1091. // work well for other nodes that we do not know how to serialize.
  1092. child = node.getFirstChild();
  1093. while ( child != null ) {
  1094. serializeNode( child );
  1095. child = child.getNextSibling();
  1096. }
  1097. break;
  1098. }
  1099. default:
  1100. break;
  1101. }
  1102. }
  1103. /**
  1104. * Must be called by a method about to print any type of content.
  1105. * If the element was just opened, the opening tag is closed and
  1106. * will be matched to a closing tag. Returns the current element
  1107. * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
  1108. *
  1109. * @return The current element state
  1110. * @throws IOException An I/O exception occured while
  1111. * serializing
  1112. */
  1113. protected ElementState content()
  1114. throws IOException
  1115. {
  1116. ElementState state;
  1117. state = getElementState();
  1118. if ( ! isDocumentState() ) {
  1119. // Need to close CData section first
  1120. if ( state.inCData && ! state.doCData ) {
  1121. _printer.printText( "]]>" );
  1122. state.inCData = false;
  1123. }
  1124. // If this is the first content in the element,
  1125. // change the state to not-empty and close the
  1126. // opening element tag.
  1127. if ( state.empty ) {
  1128. _printer.printText( '>' );
  1129. state.empty = false;
  1130. }
  1131. // Except for one content type, all of them
  1132. // are not last element. That one content
  1133. // type will take care of itself.
  1134. state.afterElement = false;
  1135. // Except for one content type, all of them
  1136. // are not last comment. That one content
  1137. // type will take care of itself.
  1138. state.afterComment = false;
  1139. }
  1140. return state;
  1141. }
  1142. /**
  1143. * Called to print the text contents in the prevailing element format.
  1144. * Since this method is capable of printing text as CDATA, it is used
  1145. * for that purpose as well. White space handling is determined by the
  1146. * current element state. In addition, the output format can dictate
  1147. * whether the text is printed as CDATA or unescaped.
  1148. *
  1149. * @param text The text to print
  1150. * @param unescaped True is should print unescaped
  1151. * @throws IOException An I/O exception occured while
  1152. * serializing
  1153. */
  1154. protected void characters( String text )
  1155. throws IOException
  1156. {
  1157. ElementState state;
  1158. state = content();
  1159. // Check if text should be print as CDATA section or unescaped
  1160. // based on elements listed in the output format (the element
  1161. // state) or whether we are inside a CDATA section or entity.
  1162. if ( state.inCData || state.doCData ) {
  1163. int index;
  1164. int saveIndent;
  1165. // Print a CDATA section. The text is not escaped, but ']]>'
  1166. // appearing in the code must be identified and dealt with.
  1167. // The contents of a text node is considered space preserving.
  1168. if ( ! state.inCData ) {
  1169. _printer.printText("<![CDATA[");
  1170. state.inCData = true;
  1171. }
  1172. saveIndent = _printer.getNextIndent();
  1173. _printer.setNextIndent( 0 );
  1174. printCDATAText( text);
  1175. _printer.setNextIndent( saveIndent );
  1176. } else {
  1177. int saveIndent;
  1178. if ( state.preserveSpace ) {
  1179. // If preserving space then hold of indentation so no
  1180. // excessive spaces are printed at line breaks, escape
  1181. // the text content without replacing spaces and print
  1182. // the text breaking only at line breaks.
  1183. saveIndent = _printer.getNextIndent();
  1184. _printer.setNextIndent( 0 );
  1185. printText( text, true, state.unescaped );
  1186. _printer.setNextIndent( saveIndent );
  1187. } else {
  1188. printText( text, false, state.unescaped );
  1189. }
  1190. }
  1191. }
  1192. /**
  1193. * Returns the suitable entity reference for this character value,
  1194. * or null if no such entity exists. Calling this method with <tt>'&'</tt>
  1195. * will return <tt>"&amp;"</tt>.
  1196. *
  1197. * @param ch Character value
  1198. * @return Character entity name, or null
  1199. */
  1200. protected abstract String getEntityRef( int ch );
  1201. /**
  1202. * Called to serializee the DOM element. The element is serialized based on
  1203. * the serializer's method (XML, HTML, XHTML).
  1204. *
  1205. * @param elem The element to serialize
  1206. * @throws IOException An I/O exception occured while
  1207. * serializing
  1208. */
  1209. protected abstract void serializeElement( Element elem )
  1210. throws IOException;
  1211. /**
  1212. * Comments and PIs cannot be serialized before the root element,
  1213. * because the root element serializes the document type, which
  1214. * generally comes first. Instead such PIs and comments are
  1215. * accumulated inside a vector and serialized by calling this
  1216. * method. Will be called when the root element is serialized
  1217. * and when the document finished serializing.
  1218. *
  1219. * @throws IOException An I/O exception occured while
  1220. * serializing
  1221. */
  1222. protected void serializePreRoot()
  1223. throws IOException
  1224. {
  1225. int i;
  1226. if ( _preRoot != null ) {
  1227. for ( i = 0 ; i < _preRoot.size() ; ++i ) {
  1228. printText( (String) _preRoot.elementAt( i ), true, true );
  1229. if ( _indenting )
  1230. _printer.breakLine();
  1231. }
  1232. _preRoot.removeAllElements();
  1233. }
  1234. }
  1235. //---------------------------------------------//
  1236. // Text pretty printing and formatting methods //
  1237. //---------------------------------------------//
  1238. protected void printCDATAText( String text ) throws IOException {
  1239. int length = text.length();
  1240. char ch;
  1241. for ( int index = 0 ; index < length; ++index ) {
  1242. ch = text.charAt( index );
  1243. if (ch == ']'
  1244. && index + 2 < length
  1245. && text.charAt(index + 1) == ']'
  1246. && text.charAt(index + 2) == '>') { // check for ']]>'
  1247. if (fDOMErrorHandler != null) {
  1248. // REVISIT: this means that if DOM Error handler is not registered we don't report any
  1249. // fatal errors and might serialize not wellformed document
  1250. if ((features & DOMSerializerImpl.SPLITCDATA) == 0
  1251. && (features & DOMSerializerImpl.WELLFORMED) == 0) {
  1252. // issue fatal error
  1253. String msg =
  1254. DOMMessageFormatter.formatMessage(
  1255. DOMMessageFormatter.SERIALIZER_DOMAIN,
  1256. "EndingCDATA",
  1257. null);
  1258. modifyDOMError(
  1259. msg,
  1260. DOMError.SEVERITY_FATAL_ERROR,
  1261. fCurrentNode);
  1262. boolean continueProcess =
  1263. fDOMErrorHandler.handleError(fDOMError);
  1264. if (!continueProcess) {
  1265. throw new IOException();
  1266. }
  1267. } else {
  1268. // issue warning
  1269. String msg =
  1270. DOMMessageFormatter.formatMessage(
  1271. DOMMessageFormatter.SERIALIZER_DOMAIN,
  1272. "SplittingCDATA",
  1273. null);
  1274. modifyDOMError(
  1275. msg,
  1276. DOMError.SEVERITY_WARNING,
  1277. fCurrentNode);
  1278. fDOMErrorHandler.handleError(fDOMError);
  1279. }
  1280. }
  1281. // split CDATA section
  1282. _printer.printText("]]]]><![CDATA[>");
  1283. index += 2;
  1284. continue;
  1285. }
  1286. if (!XMLChar.isValid(ch)) {
  1287. // check if it is surrogate
  1288. if (++index <length) {
  1289. surrogates(ch, text.charAt(index));
  1290. }
  1291. else {
  1292. fatalError("The character '"+(char)ch+"' is an invalid XML character");
  1293. }
  1294. continue;
  1295. } else {
  1296. if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0xF7 ) ||
  1297. ch == '\n' || ch == '\r' || ch == '\t' ) {
  1298. _printer.printText((char)ch);
  1299. } else {
  1300. // The character is not printable -- split CDATA section
  1301. _printer.printText("]]>&#x");
  1302. _printer.printText(Integer.toHexString(ch));
  1303. _printer.printText(";<![CDATA[");
  1304. }
  1305. }
  1306. }
  1307. }
  1308. protected void surrogates(int high, int low) throws IOException{
  1309. if (XMLChar.isHighSurrogate(high)) {
  1310. if (!XMLChar.isLowSurrogate(low)) {
  1311. //Invalid XML
  1312. fatalError("The character '"+(char)low+"' is an invalid XML character");
  1313. }
  1314. else {
  1315. int supplemental = XMLChar.supplemental((char)high, (char)low);
  1316. if (!XMLChar.isValid(supplemental)) {
  1317. //Invalid XML
  1318. fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
  1319. }
  1320. else {
  1321. if (content().inCData ) {
  1322. _printer.printText("]]>&#x");
  1323. _printer.printText(Integer.toHexString(supplemental));
  1324. _printer.printText(";<![CDATA[");
  1325. }
  1326. else {
  1327. printHex(supplemental);
  1328. }
  1329. }
  1330. }
  1331. } else {
  1332. fatalError("The character '"+(char)high+"' is an invalid XML character");
  1333. }
  1334. }
  1335. /**
  1336. * Called to print additional text with whitespace handling.
  1337. * If spaces are preserved, the text is printed as if by calling
  1338. * {@link #printText(String,boolean,boolean)} with a call to {@link Printer#breakLine}
  1339. * for each new line. If spaces are not preserved, the text is
  1340. * broken at space boundaries if longer than the line width;
  1341. * Multiple spaces are printed as such, but spaces at beginning
  1342. * of line are removed.
  1343. *
  1344. * @param text The text to print
  1345. * @param preserveSpace Space preserving flag
  1346. * @param unescaped Print unescaped
  1347. */
  1348. protected void printText( char[] chars, int start, int length,
  1349. boolean preserveSpace, boolean unescaped )
  1350. throws IOException
  1351. {
  1352. int index;
  1353. char ch;
  1354. if ( preserveSpace ) {
  1355. // Preserving spaces: the text must print exactly as it is,
  1356. // without breaking when spaces appear in the text and without
  1357. // consolidating spaces. If a line terminator is used, a line
  1358. // break will occur.
  1359. while ( length-- > 0 ) {
  1360. ch = chars[ start ];
  1361. ++start;
  1362. if ( ch == '\n' || ch == '\r' || unescaped )
  1363. _printer.printText( ch );
  1364. else
  1365. printEscaped( ch );
  1366. }
  1367. } else {
  1368. // Not preserving spaces: print one part at a time, and
  1369. // use spaces between parts to break them into different
  1370. // lines. Spaces at beginning of line will be stripped
  1371. // by printing mechanism. Line terminator is treated
  1372. // no different than other text part.
  1373. while ( length-- > 0 ) {
  1374. ch = chars[ start ];
  1375. ++start;
  1376. if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
  1377. _printer.printSpace();
  1378. else if ( unescaped )
  1379. _printer.printText( ch );
  1380. else
  1381. printEscaped( ch );
  1382. }
  1383. }
  1384. }
  1385. protected void printText( String text, boolean preserveSpace, boolean unescaped )
  1386. throws IOException
  1387. {
  1388. int index;
  1389. char ch;
  1390. if ( preserveSpace ) {
  1391. // Preserving spaces: the text must print exactly as it is,
  1392. // without breaking when spaces appear in the text and without
  1393. // consolidating spaces. If a line terminator is used, a line
  1394. // break will occur.
  1395. for ( index = 0 ; index < text.length() ; ++index ) {
  1396. ch = text.charAt( index );
  1397. if ( ch == '\n' || ch == '\r' || unescaped )
  1398. _printer.printText( ch );
  1399. else
  1400. printEscaped( ch );
  1401. }
  1402. } else {
  1403. // Not preserving spaces: print one part at a time, and
  1404. // use spaces between parts to break them into different
  1405. // lines. Spaces at beginning of line will be stripped
  1406. // by printing mechanism. Line terminator is treated
  1407. // no different than other text part.
  1408. for ( index = 0 ; index < text.length() ; ++index ) {
  1409. ch = text.charAt( index );
  1410. if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
  1411. _printer.printSpace();
  1412. else if ( unescaped )
  1413. _printer.printText( ch );
  1414. else
  1415. printEscaped( ch );
  1416. }
  1417. }
  1418. }
  1419. /**
  1420. * Print a document type public or system identifier URL.
  1421. * Encapsulates the URL in double quotes, escapes non-printing
  1422. * characters and print it equivalent to {@link #printText}.
  1423. *
  1424. * @param url The document type url to print
  1425. */
  1426. protected void printDoctypeURL( String url )
  1427. throws IOException
  1428. {
  1429. int i;
  1430. _printer.printText( '"' );
  1431. for( i = 0 ; i < url.length() ; ++i ) {
  1432. if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) {
  1433. _printer.printText( '%' );
  1434. _printer.printText( Integer.toHexString( url.charAt( i ) ) );
  1435. } else
  1436. _printer.printText( url.charAt( i ) );
  1437. }
  1438. _printer.printText( '"' );
  1439. }
  1440. protected void printEscaped( int ch )
  1441. throws IOException
  1442. {
  1443. String charRef;
  1444. // If there is a suitable entity reference for this
  1445. // character, print it. The list of available entity
  1446. // references is almost but not identical between
  1447. // XML and HTML.
  1448. charRef = getEntityRef( ch );
  1449. if ( charRef != null ) {
  1450. _printer.printText( '&' );
  1451. _printer.printText( charRef );
  1452. _printer.printText( ';' );
  1453. } else if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0xF7 ) ||
  1454. ch == '\n' || ch == '\r' || ch == '\t' ) {
  1455. // Non printables are below ASCII space but not tab or line
  1456. // terminator, ASCII delete, or above a certain Unicode threshold.
  1457. if (ch < 0x10000) {
  1458. _printer.printText((char)ch );
  1459. } else {
  1460. _printer.printText((char)(((ch-0x10000)>>10)+0xd800));
  1461. _printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00));
  1462. }
  1463. } else {
  1464. printHex(ch);
  1465. }
  1466. }
  1467. /**
  1468. * Escapes chars
  1469. */
  1470. final void printHex( int ch) throws IOException {
  1471. _printer.printText( "&#x" );
  1472. _printer.printText(Integer.toHexString(ch));
  1473. _printer.printText( ';' );
  1474. }
  1475. /**
  1476. * Escapes a string so it may be printed as text content or attribute
  1477. * value. Non printable characters are escaped using character references.
  1478. * Where the format specifies a deault entity reference, that reference
  1479. * is used (e.g. <tt>&lt;</tt>).
  1480. *
  1481. * @param source The string to escape
  1482. */
  1483. protected void printEscaped( String source )
  1484. throws IOException
  1485. {
  1486. for ( int i = 0 ; i < source.length() ; ++i ) {
  1487. int ch = source.charAt(i);
  1488. if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) {
  1489. int lowch = source.charAt(i+1);
  1490. if ((lowch & 0xfc00) == 0xdc00) {
  1491. ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00;
  1492. i++;
  1493. }
  1494. }
  1495. printEscaped(ch);
  1496. }
  1497. }
  1498. //--------------------------------//
  1499. // Element state handling methods //
  1500. //--------------------------------//
  1501. /**
  1502. * Return the state of the current element.
  1503. *
  1504. * @return Current element state
  1505. */
  1506. protected ElementState getElementState()
  1507. {
  1508. return _elementStates[ _elementStateCount ];
  1509. }
  1510. /**
  1511. * Enter a new element state for the specified element.
  1512. * Tag name and space preserving is specified, element
  1513. * state is initially empty.
  1514. *
  1515. * @return Current element state, or null
  1516. */
  1517. protected ElementState enterElementState( String namespaceURI, String localName,
  1518. String rawName, boolean preserveSpace )
  1519. {
  1520. ElementState state;
  1521. if ( _elementStateCount + 1 == _elementStates.length ) {
  1522. ElementState[] newStates;
  1523. // Need to create a larger array of states. This does not happen
  1524. // often, unless the document is really deep.
  1525. newStates = new ElementState[ _elementStates.length + 10 ];
  1526. for ( int i = 0 ; i < _elementStates.length ; ++i )
  1527. newStates[ i ] = _elementStates[ i ];
  1528. for ( int i = _elementStates.length ; i < newStates.length ; ++i )
  1529. newStates[ i ] = new ElementState();
  1530. _elementStates = newStates;
  1531. }
  1532. ++_elementStateCount;
  1533. state = _elementStates[ _elementStateCount ];
  1534. state.namespaceURI = namespaceURI;
  1535. state.localName = localName;
  1536. state.rawName = rawName;
  1537. state.preserveSpace = preserveSpace;
  1538. state.empty = true;
  1539. state.afterElement = false;
  1540. state.afterComment = false;
  1541. state.doCData = state.inCData = false;
  1542. state.unescaped = false;
  1543. state.prefixes = _prefixes;
  1544. _prefixes = null;
  1545. return state;
  1546. }
  1547. /**
  1548. * Leave the current element state and return to the
  1549. * state of the parent element. If this was the root
  1550. * element, return to the state of the document.
  1551. *
  1552. * @return Previous element state
  1553. */
  1554. protected ElementState leaveElementState()
  1555. {
  1556. if ( _elementStateCount > 0 ) {
  1557. /*Corrected by David Blondeau (blondeau@intalio.com)*/
  1558. _prefixes = null;
  1559. //_prefixes = _elementStates[ _elementStateCount ].prefixes;
  1560. -- _elementStateCount;
  1561. return _elementStates[ _elementStateCount ];
  1562. } else {
  1563. String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "Internal", null);
  1564. throw new IllegalStateException(msg);
  1565. }
  1566. }
  1567. /**
  1568. * Returns true if in the state of the document.
  1569. * Returns true before entering any element and after
  1570. * leaving the root element.
  1571. *
  1572. * @return True if in the state of the document
  1573. */
  1574. protected boolean isDocumentState()
  1575. {
  1576. return _elementStateCount == 0;
  1577. }
  1578. /**
  1579. * Returns the namespace prefix for the specified URI.
  1580. * If the URI has been mapped to a prefix, returns the
  1581. * prefix, otherwise returns null.
  1582. *
  1583. * @param namespaceURI The namespace URI
  1584. * @return The namespace prefix if known, or null
  1585. */
  1586. protected String getPrefix( String namespaceURI )
  1587. {
  1588. String prefix;
  1589. if ( _prefixes != null ) {
  1590. prefix = (String) _prefixes.get( namespaceURI );
  1591. if ( prefix != null )
  1592. return prefix;
  1593. }
  1594. if ( _elementStateCount == 0 )
  1595. return null;
  1596. else {
  1597. for ( int i = _elementStateCount ; i > 0 ; --i ) {
  1598. if ( _elementStates[ i ].prefixes != null ) {
  1599. prefix = (String) _elementStates[ i ].prefixes.get( namespaceURI );
  1600. if ( prefix != null )
  1601. return prefix;
  1602. }
  1603. }
  1604. }
  1605. return null;
  1606. }
  1607. /**
  1608. * The method modifies global DOM error object
  1609. *
  1610. * @param message
  1611. * @param severity
  1612. * @return a DOMError
  1613. */
  1614. protected DOMError modifyDOMError(String message, short severity, Node node){
  1615. fDOMError.reset();
  1616. fDOMError.fMessage = message;
  1617. fDOMError.fSeverity = severity;
  1618. fDOMError.fLocator = new DOMLocatorImpl(-1, -1, -1, node, null);
  1619. return fDOMError;
  1620. }
  1621. protected void fatalError(String message) throws IOException{
  1622. if (fDOMErrorHandler != null) {
  1623. modifyDOMError(message, DOMError.SEVERITY_FATAL_ERROR, fCurrentNode);
  1624. fDOMErrorHandler.handleError(fDOMError);
  1625. }
  1626. else {
  1627. throw new IOException(message);
  1628. }
  1629. }
  1630. /**
  1631. * DOM level 3:
  1632. * Check a node to determine if it contains unbound namespace prefixes.
  1633. *
  1634. * @param node The node to check for unbound namespace prefices
  1635. */
  1636. protected void checkUnboundNamespacePrefixedNode (Node node) throws IOException{
  1637. }
  1638. }