1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2004 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. // Sep 14, 2000:
  58. // Fixed serializer to report IO exception directly, instead at
  59. // the end of document processing.
  60. // Reported by Patrick Higgins <phiggins@transzap.com>
  61. // Aug 21, 2000:
  62. // Fixed bug in startDocument not calling prepare.
  63. // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
  64. // Aug 21, 2000:
  65. // Added ability to omit DOCTYPE declaration.
  66. // Sep 1, 2000:
  67. // If no output format is provided the serializer now defaults
  68. // to ISO-8859-1 encoding. Reported by Mikael Staldal
  69. // <d96-mst@d.kth.se>
  70. package com.sun.org.apache.xml.internal.serialize;
  71. import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
  72. import java.io.IOException;
  73. import java.io.OutputStream;
  74. import java.io.Writer;
  75. import java.util.Enumeration;
  76. import java.util.Locale;
  77. import org.w3c.dom.Attr;
  78. import org.w3c.dom.Element;
  79. import org.w3c.dom.NamedNodeMap;
  80. import org.w3c.dom.Node;
  81. import org.xml.sax.AttributeList;
  82. import org.xml.sax.Attributes;
  83. import org.xml.sax.SAXException;
  84. /**
  85. * Implements an HTML/XHTML serializer supporting both DOM and SAX
  86. * pretty serializing. HTML/XHTML mode is determined in the
  87. * constructor. For usage instructions see {@link Serializer}.
  88. * <p>
  89. * If an output stream is used, the encoding is taken from the
  90. * output format (defaults to <tt>UTF-8</tt>). If a writer is
  91. * used, make sure the writer uses the same encoding (if applies)
  92. * as specified in the output format.
  93. * <p>
  94. * The serializer supports both DOM and SAX. DOM serializing is done
  95. * by calling {@link #serialize} and SAX serializing is done by firing
  96. * SAX events and using the serializer as a document handler.
  97. * <p>
  98. * If an I/O exception occurs while serializing, the serializer
  99. * will not throw an exception directly, but only throw it
  100. * at the end of serializing (either DOM or SAX's {@link
  101. * org.xml.sax.DocumentHandler#endDocument}.
  102. * <p>
  103. * For elements that are not specified as whitespace preserving,
  104. * the serializer will potentially break long text lines at space
  105. * boundaries, indent lines, and serialize elements on separate
  106. * lines. Line terminators will be regarded as spaces, and
  107. * spaces at beginning of line will be stripped.
  108. * <p>
  109. * XHTML is slightly different than HTML:
  110. * <ul>
  111. * <li>Element/attribute names are lower case and case matters
  112. * <li>Attributes must specify value, even if empty string
  113. * <li>Empty elements must have '/' in empty tag
  114. * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
  115. * </ul>
  116. *
  117. * @deprecated
  118. * @version $Revision: 1.26 $ $Date: 2004/02/16 05:24:55 $
  119. * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  120. * @see Serializer
  121. */
  122. public class HTMLSerializer
  123. extends BaseMarkupSerializer
  124. {
  125. /**
  126. * True if serializing in XHTML format.
  127. */
  128. private boolean _xhtml;
  129. public static final String XHTMLNamespace = "http://www.w3.org/1999/xhtml";
  130. // for users to override XHTMLNamespace if need be.
  131. private String fUserXHTMLNamespace = null;
  132. /**
  133. * Constructs a new HTML/XHTML serializer depending on the value of
  134. * <tt>xhtml</tt>. The serializer cannot be used without calling
  135. * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
  136. *
  137. * @param xhtml True if XHTML serializing
  138. */
  139. protected HTMLSerializer( boolean xhtml, OutputFormat format )
  140. {
  141. super( format );
  142. _xhtml = xhtml;
  143. }
  144. /**
  145. * Constructs a new serializer. The serializer cannot be used without
  146. * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
  147. * first.
  148. */
  149. public HTMLSerializer()
  150. {
  151. this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
  152. }
  153. /**
  154. * Constructs a new serializer. The serializer cannot be used without
  155. * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
  156. * first.
  157. */
  158. public HTMLSerializer( OutputFormat format )
  159. {
  160. this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
  161. }
  162. /**
  163. * Constructs a new serializer that writes to the specified writer
  164. * using the specified output format. If <tt>format</tt> is null,
  165. * will use a default output format.
  166. *
  167. * @param writer The writer to use
  168. * @param format The output format to use, null for the default
  169. */
  170. public HTMLSerializer( Writer writer, OutputFormat format )
  171. {
  172. this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
  173. setOutputCharStream( writer );
  174. }
  175. /**
  176. * Constructs a new serializer that writes to the specified output
  177. * stream using the specified output format. If <tt>format</tt>
  178. * is null, will use a default output format.
  179. *
  180. * @param output The output stream to use
  181. * @param format The output format to use, null for the default
  182. */
  183. public HTMLSerializer( OutputStream output, OutputFormat format )
  184. {
  185. this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
  186. setOutputByteStream( output );
  187. }
  188. public void setOutputFormat( OutputFormat format )
  189. {
  190. super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
  191. }
  192. // Set value for alternate XHTML namespace.
  193. public void setXHTMLNamespace(String newNamespace) {
  194. fUserXHTMLNamespace = newNamespace;
  195. } // setXHTMLNamespace(String)
  196. //-----------------------------------------//
  197. // SAX content handler serializing methods //
  198. //-----------------------------------------//
  199. public void startElement( String namespaceURI, String localName,
  200. String rawName, Attributes attrs )
  201. throws SAXException
  202. {
  203. int i;
  204. boolean preserveSpace;
  205. ElementState state;
  206. String name;
  207. String value;
  208. String htmlName;
  209. boolean addNSAttr = false;
  210. try {
  211. if ( _printer == null )
  212. throw new IllegalStateException(
  213. DOMMessageFormatter.formatMessage(
  214. DOMMessageFormatter.SERIALIZER_DOMAIN,
  215. "NoWriterSupplied", null));
  216. state = getElementState();
  217. if ( isDocumentState() ) {
  218. // If this is the root element handle it differently.
  219. // If the first root element in the document, serialize
  220. // the document's DOCTYPE. Space preserving defaults
  221. // to that of the output format.
  222. if ( ! _started )
  223. startDocument( (localName == null || localName.length() == 0)
  224. ? rawName : localName );
  225. } else {
  226. // For any other element, if first in parent, then
  227. // close parent's opening tag and use the parnet's
  228. // space preserving.
  229. if ( state.empty )
  230. _printer.printText( '>' );
  231. // Indent this element on a new line if the first
  232. // content of the parent element or immediately
  233. // following an element.
  234. if ( _indenting && ! state.preserveSpace &&
  235. ( state.empty || state.afterElement ) )
  236. _printer.breakLine();
  237. }
  238. preserveSpace = state.preserveSpace;
  239. // Do not change the current element state yet.
  240. // This only happens in endElement().
  241. // As per SAX2, the namespace URI is an empty string if the element has no
  242. // namespace URI, or namespaces is turned off. The check against null protects
  243. // against broken SAX implementations, so I've left it there. - mrglavas
  244. boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0);
  245. // SAX2: rawName (QName) could be empty string if
  246. // namespace-prefixes property is false.
  247. if ( rawName == null || rawName.length() == 0) {
  248. rawName = localName;
  249. if ( hasNamespaceURI ) {
  250. String prefix;
  251. prefix = getPrefix( namespaceURI );
  252. if ( prefix != null && prefix.length() != 0 )
  253. rawName = prefix + ":" + localName;
  254. }
  255. addNSAttr = true;
  256. }
  257. if ( !hasNamespaceURI )
  258. htmlName = rawName;
  259. else {
  260. if ( namespaceURI.equals( XHTMLNamespace ) ||
  261. (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) )
  262. htmlName = localName;
  263. else
  264. htmlName = null;
  265. }
  266. // XHTML: element names are lower case, DOM will be different
  267. _printer.printText( '<' );
  268. if ( _xhtml )
  269. _printer.printText( rawName.toLowerCase(Locale.ENGLISH) );
  270. else
  271. _printer.printText( rawName );
  272. _printer.indent();
  273. // For each attribute serialize it's name and value as one part,
  274. // separated with a space so the element can be broken on
  275. // multiple lines.
  276. if ( attrs != null ) {
  277. for ( i = 0 ; i < attrs.getLength() ; ++i ) {
  278. _printer.printSpace();
  279. name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH);
  280. value = attrs.getValue( i );
  281. if ( _xhtml || hasNamespaceURI ) {
  282. // XHTML: print empty string for null values.
  283. if ( value == null ) {
  284. _printer.printText( name );
  285. _printer.printText( "=\"\"" );
  286. } else {
  287. _printer.printText( name );
  288. _printer.printText( "=\"" );
  289. printEscaped( value );
  290. _printer.printText( '"' );
  291. }
  292. } else {
  293. // HTML: Empty values print as attribute name, no value.
  294. // HTML: URI attributes will print unescaped
  295. if ( value == null ) {
  296. value = "";
  297. }
  298. if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
  299. _printer.printText( name );
  300. else if ( HTMLdtd.isURI( rawName, name ) ) {
  301. _printer.printText( name );
  302. _printer.printText( "=\"" );
  303. _printer.printText( escapeURI( value ) );
  304. _printer.printText( '"' );
  305. } else if ( HTMLdtd.isBoolean( rawName, name ) )
  306. _printer.printText( name );
  307. else {
  308. _printer.printText( name );
  309. _printer.printText( "=\"" );
  310. printEscaped( value );
  311. _printer.printText( '"' );
  312. }
  313. }
  314. }
  315. }
  316. if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) )
  317. preserveSpace = true;
  318. if ( addNSAttr ) {
  319. Enumeration keys;
  320. keys = _prefixes.keys();
  321. while ( keys.hasMoreElements() ) {
  322. _printer.printSpace();
  323. value = (String) keys.nextElement();
  324. name = (String) _prefixes.get( value );
  325. if ( name.length() == 0 ) {
  326. _printer.printText( "xmlns=\"" );
  327. printEscaped( value );
  328. _printer.printText( '"' );
  329. } else {
  330. _printer.printText( "xmlns:" );
  331. _printer.printText( name );
  332. _printer.printText( "=\"" );
  333. printEscaped( value );
  334. _printer.printText( '"' );
  335. }
  336. }
  337. }
  338. // Now it's time to enter a new element state
  339. // with the tag name and space preserving.
  340. // We still do not change the curent element state.
  341. state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
  342. // Prevents line breaks inside A/TD
  343. if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) ||
  344. htmlName.equalsIgnoreCase( "TD" ) ) ) {
  345. state.empty = false;
  346. _printer.printText( '>' );
  347. }
  348. // Handle SCRIPT and STYLE specifically by changing the
  349. // state of the current element to CDATA (XHTML) or
  350. // unescaped (HTML).
  351. if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) ||
  352. rawName.equalsIgnoreCase( "STYLE" ) ) ) {
  353. if ( _xhtml ) {
  354. // XHTML: Print contents as CDATA section
  355. state.doCData = true;
  356. } else {
  357. // HTML: Print contents unescaped
  358. state.unescaped = true;
  359. }
  360. }
  361. } catch ( IOException except ) {
  362. throw new SAXException( except );
  363. }
  364. }
  365. public void endElement( String namespaceURI, String localName,
  366. String rawName )
  367. throws SAXException
  368. {
  369. try {
  370. endElementIO( namespaceURI, localName, rawName );
  371. } catch ( IOException except ) {
  372. throw new SAXException( except );
  373. }
  374. }
  375. public void endElementIO( String namespaceURI, String localName,
  376. String rawName )
  377. throws IOException
  378. {
  379. ElementState state;
  380. String htmlName;
  381. // Works much like content() with additions for closing
  382. // an element. Note the different checks for the closed
  383. // element's state and the parent element's state.
  384. _printer.unindent();
  385. state = getElementState();
  386. if ( state.namespaceURI == null || state.namespaceURI.length() == 0 )
  387. htmlName = state.rawName;
  388. else {
  389. if ( state.namespaceURI.equals( XHTMLNamespace ) ||
  390. (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(state.namespaceURI)) )
  391. htmlName = state.localName;
  392. else
  393. htmlName = null;
  394. }
  395. if ( _xhtml) {
  396. if ( state.empty ) {
  397. _printer.printText( " />" );
  398. } else {
  399. // Must leave CData section first
  400. if ( state.inCData )
  401. _printer.printText( "]]>" );
  402. // XHTML: element names are lower case, DOM will be different
  403. _printer.printText( "</" );
  404. _printer.printText( state.rawName.toLowerCase(Locale.ENGLISH) );
  405. _printer.printText( '>' );
  406. }
  407. } else {
  408. if ( state.empty )
  409. _printer.printText( '>' );
  410. // This element is not empty and that last content was
  411. // another element, so print a line break before that
  412. // last element and this element's closing tag.
  413. // [keith] Provided this is not an anchor.
  414. // HTML: some elements do not print closing tag (e.g. LI)
  415. if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) {
  416. if ( _indenting && ! state.preserveSpace && state.afterElement )
  417. _printer.breakLine();
  418. // Must leave CData section first (Illegal in HTML, but still)
  419. if ( state.inCData )
  420. _printer.printText( "]]>" );
  421. _printer.printText( "</" );
  422. _printer.printText( state.rawName );
  423. _printer.printText( '>' );
  424. }
  425. }
  426. // Leave the element state and update that of the parent
  427. // (if we're not root) to not empty and after element.
  428. state = leaveElementState();
  429. // Temporary hack to prevent line breaks inside A/TD
  430. if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) &&
  431. ! htmlName.equalsIgnoreCase( "TD" ) ) )
  432. state.afterElement = true;
  433. state.empty = false;
  434. if ( isDocumentState() )
  435. _printer.flush();
  436. }
  437. //------------------------------------------//
  438. // SAX document handler serializing methods //
  439. //------------------------------------------//
  440. public void characters( char[] chars, int start, int length )
  441. throws SAXException
  442. {
  443. ElementState state;
  444. try {
  445. // HTML: no CDATA section
  446. state = content();
  447. state.doCData = false;
  448. super.characters( chars, start, length );
  449. } catch ( IOException except ) {
  450. throw new SAXException( except );
  451. }
  452. }
  453. public void startElement( String tagName, AttributeList attrs )
  454. throws SAXException
  455. {
  456. int i;
  457. boolean preserveSpace;
  458. ElementState state;
  459. String name;
  460. String value;
  461. try {
  462. if ( _printer == null )
  463. throw new IllegalStateException(
  464. DOMMessageFormatter.formatMessage(
  465. DOMMessageFormatter.SERIALIZER_DOMAIN,
  466. "NoWriterSupplied", null));
  467. state = getElementState();
  468. if ( isDocumentState() ) {
  469. // If this is the root element handle it differently.
  470. // If the first root element in the document, serialize
  471. // the document's DOCTYPE. Space preserving defaults
  472. // to that of the output format.
  473. if ( ! _started )
  474. startDocument( tagName );
  475. } else {
  476. // For any other element, if first in parent, then
  477. // close parent's opening tag and use the parnet's
  478. // space preserving.
  479. if ( state.empty )
  480. _printer.printText( '>' );
  481. // Indent this element on a new line if the first
  482. // content of the parent element or immediately
  483. // following an element.
  484. if ( _indenting && ! state.preserveSpace &&
  485. ( state.empty || state.afterElement ) )
  486. _printer.breakLine();
  487. }
  488. preserveSpace = state.preserveSpace;
  489. // Do not change the current element state yet.
  490. // This only happens in endElement().
  491. // XHTML: element names are lower case, DOM will be different
  492. _printer.printText( '<' );
  493. if ( _xhtml )
  494. _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
  495. else
  496. _printer.printText( tagName );
  497. _printer.indent();
  498. // For each attribute serialize it's name and value as one part,
  499. // separated with a space so the element can be broken on
  500. // multiple lines.
  501. if ( attrs != null ) {
  502. for ( i = 0 ; i < attrs.getLength() ; ++i ) {
  503. _printer.printSpace();
  504. name = attrs.getName( i ).toLowerCase(Locale.ENGLISH);
  505. value = attrs.getValue( i );
  506. if ( _xhtml ) {
  507. // XHTML: print empty string for null values.
  508. if ( value == null ) {
  509. _printer.printText( name );
  510. _printer.printText( "=\"\"" );
  511. } else {
  512. _printer.printText( name );
  513. _printer.printText( "=\"" );
  514. printEscaped( value );
  515. _printer.printText( '"' );
  516. }
  517. } else {
  518. // HTML: Empty values print as attribute name, no value.
  519. // HTML: URI attributes will print unescaped
  520. if ( value == null ) {
  521. value = "";
  522. }
  523. if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
  524. _printer.printText( name );
  525. else if ( HTMLdtd.isURI( tagName, name ) ) {
  526. _printer.printText( name );
  527. _printer.printText( "=\"" );
  528. _printer.printText( escapeURI( value ) );
  529. _printer.printText( '"' );
  530. } else if ( HTMLdtd.isBoolean( tagName, name ) )
  531. _printer.printText( name );
  532. else {
  533. _printer.printText( name );
  534. _printer.printText( "=\"" );
  535. printEscaped( value );
  536. _printer.printText( '"' );
  537. }
  538. }
  539. }
  540. }
  541. if ( HTMLdtd.isPreserveSpace( tagName ) )
  542. preserveSpace = true;
  543. // Now it's time to enter a new element state
  544. // with the tag name and space preserving.
  545. // We still do not change the curent element state.
  546. state = enterElementState( null, null, tagName, preserveSpace );
  547. // Prevents line breaks inside A/TD
  548. if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
  549. state.empty = false;
  550. _printer.printText( '>' );
  551. }
  552. // Handle SCRIPT and STYLE specifically by changing the
  553. // state of the current element to CDATA (XHTML) or
  554. // unescaped (HTML).
  555. if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
  556. tagName.equalsIgnoreCase( "STYLE" ) ) {
  557. if ( _xhtml ) {
  558. // XHTML: Print contents as CDATA section
  559. state.doCData = true;
  560. } else {
  561. // HTML: Print contents unescaped
  562. state.unescaped = true;
  563. }
  564. }
  565. } catch ( IOException except ) {
  566. throw new SAXException( except );
  567. }
  568. }
  569. public void endElement( String tagName )
  570. throws SAXException
  571. {
  572. endElement( null, null, tagName );
  573. }
  574. //------------------------------------------//
  575. // Generic node serializing methods methods //
  576. //------------------------------------------//
  577. /**
  578. * Called to serialize the document's DOCTYPE by the root element.
  579. * The document type declaration must name the root element,
  580. * but the root element is only known when that element is serialized,
  581. * and not at the start of the document.
  582. * <p>
  583. * This method will check if it has not been called before ({@link #_started}),
  584. * will serialize the document type declaration, and will serialize all
  585. * pre-root comments and PIs that were accumulated in the document
  586. * (see {@link #serializePreRoot}). Pre-root will be serialized even if
  587. * this is not the first root element of the document.
  588. */
  589. protected void startDocument( String rootTagName )
  590. throws IOException
  591. {
  592. StringBuffer buffer;
  593. // Not supported in HTML/XHTML, but we still have to switch
  594. // out of DTD mode.
  595. _printer.leaveDTD();
  596. if ( ! _started ) {
  597. // If the public and system identifiers were not specified
  598. // in the output format, use the appropriate ones for HTML
  599. // or XHTML.
  600. if ( _docTypePublicId == null && _docTypeSystemId == null ) {
  601. if ( _xhtml ) {
  602. _docTypePublicId = HTMLdtd.XHTMLPublicId;
  603. _docTypeSystemId = HTMLdtd.XHTMLSystemId;
  604. } else {
  605. _docTypePublicId = HTMLdtd.HTMLPublicId;
  606. _docTypeSystemId = HTMLdtd.HTMLSystemId;
  607. }
  608. }
  609. if ( ! _format.getOmitDocumentType() ) {
  610. // XHTML: If public identifier and system identifier
  611. // specified, print them, else print just system identifier
  612. // HTML: If public identifier specified, print it with
  613. // system identifier, if specified.
  614. // XHTML requires that all element names are lower case, so the
  615. // root on the DOCTYPE must be 'html'. - mrglavas
  616. if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null ) ) {
  617. if (_xhtml) {
  618. _printer.printText( "<!DOCTYPE html PUBLIC " );
  619. }
  620. else {
  621. _printer.printText( "<!DOCTYPE HTML PUBLIC " );
  622. }
  623. printDoctypeURL( _docTypePublicId );
  624. if ( _docTypeSystemId != null ) {
  625. if ( _indenting ) {
  626. _printer.breakLine();
  627. _printer.printText( " " );
  628. } else
  629. _printer.printText( ' ' );
  630. printDoctypeURL( _docTypeSystemId );
  631. }
  632. _printer.printText( '>' );
  633. _printer.breakLine();
  634. } else if ( _docTypeSystemId != null ) {
  635. if (_xhtml) {
  636. _printer.printText( "<!DOCTYPE html SYSTEM " );
  637. }
  638. else {
  639. _printer.printText( "<!DOCTYPE HTML SYSTEM " );
  640. }
  641. printDoctypeURL( _docTypeSystemId );
  642. _printer.printText( '>' );
  643. _printer.breakLine();
  644. }
  645. }
  646. }
  647. _started = true;
  648. // Always serialize these, even if not te first root element.
  649. serializePreRoot();
  650. }
  651. /**
  652. * Called to serialize a DOM element. Equivalent to calling {@link
  653. * #startElement}, {@link #endElement} and serializing everything
  654. * inbetween, but better optimized.
  655. */
  656. protected void serializeElement( Element elem )
  657. throws IOException
  658. {
  659. Attr attr;
  660. NamedNodeMap attrMap;
  661. int i;
  662. Node child;
  663. ElementState state;
  664. boolean preserveSpace;
  665. String name;
  666. String value;
  667. String tagName;
  668. tagName = elem.getTagName();
  669. state = getElementState();
  670. if ( isDocumentState() ) {
  671. // If this is the root element handle it differently.
  672. // If the first root element in the document, serialize
  673. // the document's DOCTYPE. Space preserving defaults
  674. // to that of the output format.
  675. if ( ! _started )
  676. startDocument( tagName );
  677. } else {
  678. // For any other element, if first in parent, then
  679. // close parent's opening tag and use the parnet's
  680. // space preserving.
  681. if ( state.empty )
  682. _printer.printText( '>' );
  683. // Indent this element on a new line if the first
  684. // content of the parent element or immediately
  685. // following an element.
  686. if ( _indenting && ! state.preserveSpace &&
  687. ( state.empty || state.afterElement ) )
  688. _printer.breakLine();
  689. }
  690. preserveSpace = state.preserveSpace;
  691. // Do not change the current element state yet.
  692. // This only happens in endElement().
  693. // XHTML: element names are lower case, DOM will be different
  694. _printer.printText( '<' );
  695. if ( _xhtml )
  696. _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
  697. else
  698. _printer.printText( tagName );
  699. _printer.indent();
  700. // Lookup the element's attribute, but only print specified
  701. // attributes. (Unspecified attributes are derived from the DTD.
  702. // For each attribute print it's name and value as one part,
  703. // separated with a space so the element can be broken on
  704. // multiple lines.
  705. attrMap = elem.getAttributes();
  706. if ( attrMap != null ) {
  707. for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
  708. attr = (Attr) attrMap.item( i );
  709. name = attr.getName().toLowerCase(Locale.ENGLISH);
  710. value = attr.getValue();
  711. if ( attr.getSpecified() ) {
  712. _printer.printSpace();
  713. if ( _xhtml ) {
  714. // XHTML: print empty string for null values.
  715. if ( value == null ) {
  716. _printer.printText( name );
  717. _printer.printText( "=\"\"" );
  718. } else {
  719. _printer.printText( name );
  720. _printer.printText( "=\"" );
  721. printEscaped( value );
  722. _printer.printText( '"' );
  723. }
  724. } else {
  725. // HTML: Empty values print as attribute name, no value.
  726. // HTML: URI attributes will print unescaped
  727. if ( value == null ) {
  728. value = "";
  729. }
  730. if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
  731. _printer.printText( name );
  732. else if ( HTMLdtd.isURI( tagName, name ) ) {
  733. _printer.printText( name );
  734. _printer.printText( "=\"" );
  735. _printer.printText( escapeURI( value ) );
  736. _printer.printText( '"' );
  737. } else if ( HTMLdtd.isBoolean( tagName, name ) )
  738. _printer.printText( name );
  739. else {
  740. _printer.printText( name );
  741. _printer.printText( "=\"" );
  742. printEscaped( value );
  743. _printer.printText( '"' );
  744. }
  745. }
  746. }
  747. }
  748. }
  749. if ( HTMLdtd.isPreserveSpace( tagName ) )
  750. preserveSpace = true;
  751. // If element has children, or if element is not an empty tag,
  752. // serialize an opening tag.
  753. if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) {
  754. // Enter an element state, and serialize the children
  755. // one by one. Finally, end the element.
  756. state = enterElementState( null, null, tagName, preserveSpace );
  757. // Prevents line breaks inside A/TD
  758. if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
  759. state.empty = false;
  760. _printer.printText( '>' );
  761. }
  762. // Handle SCRIPT and STYLE specifically by changing the
  763. // state of the current element to CDATA (XHTML) or
  764. // unescaped (HTML).
  765. if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
  766. tagName.equalsIgnoreCase( "STYLE" ) ) {
  767. if ( _xhtml ) {
  768. // XHTML: Print contents as CDATA section
  769. state.doCData = true;
  770. } else {
  771. // HTML: Print contents unescaped
  772. state.unescaped = true;
  773. }
  774. }
  775. child = elem.getFirstChild();
  776. while ( child != null ) {
  777. serializeNode( child );
  778. child = child.getNextSibling();
  779. }
  780. endElementIO( null, null, tagName );
  781. } else {
  782. _printer.unindent();
  783. // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
  784. // HTML: Empty tags are defined as such in DTD no in document.
  785. if ( _xhtml )
  786. _printer.printText( " />" );
  787. else
  788. _printer.printText( '>' );
  789. // After element but parent element is no longer empty.
  790. state.afterElement = true;
  791. state.empty = false;
  792. if ( isDocumentState() )
  793. _printer.flush();
  794. }
  795. }
  796. protected void characters( String text )
  797. throws IOException
  798. {
  799. ElementState state;
  800. // HTML: no CDATA section
  801. state = content();
  802. super.characters( text );
  803. }
  804. protected String getEntityRef( int ch )
  805. {
  806. return HTMLdtd.fromChar( ch );
  807. }
  808. protected String escapeURI( String uri )
  809. {
  810. int index;
  811. // XXX Apparently Netscape doesn't like if we escape the URI
  812. // using %nn, so we leave it as is, just remove any quotes.
  813. index = uri.indexOf( "\"" );
  814. if ( index >= 0 )
  815. return uri.substring( 0, index );
  816. else
  817. return uri;
  818. }
  819. }