1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. // Aug 21, 2000:
  58. // Added ability to omit DOCTYPE declaration.
  59. // Reported by Lars Martin <lars@smb-tec.com>
  60. // Aug 25, 2000:
  61. // Added ability to omit comments.
  62. // Contributed by Anupam Bagchi <abagchi@jtcsv.com>
  63. package com.sun.org.apache.xml.internal.serialize;
  64. import java.io.UnsupportedEncodingException;
  65. import org.w3c.dom.Document;
  66. import org.w3c.dom.DocumentType;
  67. import org.w3c.dom.Node;
  68. import org.w3c.dom.html.HTMLDocument;
  69. /**
  70. * Specifies an output format to control the serializer. Based on the
  71. * XSLT specification for output format, plus additional parameters.
  72. * Used to select the suitable serializer and determine how the
  73. * document should be formatted on output.
  74. * <p>
  75. * The two interesting constructors are:
  76. * <ul>
  77. * <li>{@link #OutputFormat(String,String,boolean)} creates a format
  78. * for the specified method (XML, HTML, Text, etc), encoding and indentation
  79. * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
  80. * compatible with the document type (XML, HTML, Text, etc), encoding and
  81. * indentation
  82. * </ul>
  83. *
  84. *
  85. * @version $Revision: 1.20 $ $Date: 2003/12/10 17:14:17 $
  86. * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  87. * <a href="mailto:visco@intalio.com">Keith Visco</a>
  88. * @see Serializer
  89. * @see Method
  90. * @see LineSeparator
  91. */
  92. public class OutputFormat
  93. {
  94. public static class DTD
  95. {
  96. /**
  97. * Public identifier for HTML 4.01 (Strict) document type.
  98. */
  99. public static final String HTMLPublicId = "-//W3C//DTD HTML 4.01//EN";
  100. /**
  101. * System identifier for HTML 4.01 (Strict) document type.
  102. */
  103. public static final String HTMLSystemId =
  104. "http://www.w3.org/TR/html4/strict.dtd";
  105. /**
  106. * Public identifier for XHTML 1.0 (Strict) document type.
  107. */
  108. public static final String XHTMLPublicId =
  109. "-//W3C//DTD XHTML 1.0 Strict//EN";
  110. /**
  111. * System identifier for XHTML 1.0 (Strict) document type.
  112. */
  113. public static final String XHTMLSystemId =
  114. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
  115. }
  116. public static class Defaults
  117. {
  118. /**
  119. * If indentation is turned on, the default identation
  120. * level is 4.
  121. *
  122. * @see #setIndenting(boolean)
  123. */
  124. public static final int Indent = 4;
  125. /**
  126. * The default encoding for Web documents it UTF-8.
  127. *
  128. * @see #getEncoding()
  129. */
  130. public static final String Encoding = "UTF-8";
  131. /**
  132. * The default line width at which to break long lines
  133. * when identing. This is set to 72.
  134. */
  135. public static final int LineWidth = 72;
  136. }
  137. /**
  138. * Holds the output method specified for this document,
  139. * or null if no method was specified.
  140. */
  141. private String _method;
  142. /**
  143. * Specifies the version of the output method.
  144. */
  145. private String _version;
  146. /**
  147. * The indentation level, or zero if no indentation
  148. * was requested.
  149. */
  150. private int _indent = 0;
  151. /**
  152. * The encoding to use, if an input stream is used.
  153. * The default is always UTF-8.
  154. */
  155. private String _encoding = Defaults.Encoding;
  156. /**
  157. * The EncodingInfo instance for _encoding.
  158. */
  159. private EncodingInfo _encodingInfo = null;
  160. // whether java names for encodings are permitted
  161. private boolean _allowJavaNames = false;
  162. /**
  163. * The specified media type or null.
  164. */
  165. private String _mediaType;
  166. /**
  167. * The specified document type system identifier, or null.
  168. */
  169. private String _doctypeSystem;
  170. /**
  171. * The specified document type public identifier, or null.
  172. */
  173. private String _doctypePublic;
  174. /**
  175. * Ture if the XML declaration should be ommited;
  176. */
  177. private boolean _omitXmlDeclaration = false;
  178. /**
  179. * Ture if the DOCTYPE declaration should be ommited;
  180. */
  181. private boolean _omitDoctype = false;
  182. /**
  183. * Ture if comments should be ommited;
  184. */
  185. private boolean _omitComments = false;
  186. /**
  187. * Ture if the comments should be ommited;
  188. */
  189. private boolean _stripComments = false;
  190. /**
  191. * True if the document type should be marked as standalone.
  192. */
  193. private boolean _standalone = false;
  194. /**
  195. * List of element tag names whose text node children must
  196. * be output as CDATA.
  197. */
  198. private String[] _cdataElements;
  199. /**
  200. * List of element tag names whose text node children must
  201. * be output unescaped.
  202. */
  203. private String[] _nonEscapingElements;
  204. /**
  205. * The selected line separator.
  206. */
  207. private String _lineSeparator = LineSeparator.Web;
  208. /**
  209. * The line width at which to wrap long lines when indenting.
  210. */
  211. private int _lineWidth = Defaults.LineWidth;
  212. /**
  213. * True if spaces should be preserved in elements that do not
  214. * specify otherwise, or specify the default behavior.
  215. */
  216. private boolean _preserve = false;
  217. /** If true, an empty string valued attribute is output as "". If false and
  218. * and we are using the HTMLSerializer, then only the attribute name is
  219. * serialized. Defaults to false for backwards compatibility.
  220. */
  221. private boolean _preserveEmptyAttributes = false;
  222. /**
  223. * Constructs a new output format with the default values.
  224. */
  225. public OutputFormat()
  226. {
  227. }
  228. /**
  229. * Constructs a new output format with the default values for
  230. * the specified method and encoding. If <tt>indent</tt>
  231. * is true, the document will be pretty printed with the default
  232. * indentation level and default line wrapping.
  233. *
  234. * @param method The specified output method
  235. * @param encoding The specified encoding
  236. * @param indenting True for pretty printing
  237. * @see #setEncoding
  238. * @see #setIndenting
  239. * @see #setMethod
  240. */
  241. public OutputFormat( String method, String encoding, boolean indenting )
  242. {
  243. setMethod( method );
  244. setEncoding( encoding );
  245. setIndenting( indenting );
  246. }
  247. /**
  248. * Constructs a new output format with the proper method,
  249. * document type identifiers and media type for the specified
  250. * document.
  251. *
  252. * @param doc The document to output
  253. * @see #whichMethod
  254. */
  255. public OutputFormat( Document doc )
  256. {
  257. setMethod( whichMethod( doc ) );
  258. setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
  259. setMediaType( whichMediaType( getMethod() ) );
  260. }
  261. /**
  262. * Constructs a new output format with the proper method,
  263. * document type identifiers and media type for the specified
  264. * document, and with the specified encoding. If <tt>indent</tt>
  265. * is true, the document will be pretty printed with the default
  266. * indentation level and default line wrapping.
  267. *
  268. * @param doc The document to output
  269. * @param encoding The specified encoding
  270. * @param indenting True for pretty printing
  271. * @see #setEncoding
  272. * @see #setIndenting
  273. * @see #whichMethod
  274. */
  275. public OutputFormat( Document doc, String encoding, boolean indenting )
  276. {
  277. this( doc );
  278. setEncoding( encoding );
  279. setIndenting( indenting );
  280. }
  281. /**
  282. * Returns the method specified for this output format.
  283. * Typically the method will be <tt>xml</tt>, <tt>html</tt>
  284. * or <tt>text</tt>, but it might be other values.
  285. * If no method was specified, null will be returned
  286. * and the most suitable method will be determined for
  287. * the document by calling {@link #whichMethod}.
  288. *
  289. * @return The specified output method, or null
  290. */
  291. public String getMethod()
  292. {
  293. return _method;
  294. }
  295. /**
  296. * Sets the method for this output format.
  297. *
  298. * @see #getMethod
  299. * @param method The output method, or null
  300. */
  301. public void setMethod( String method )
  302. {
  303. _method = method;
  304. }
  305. /**
  306. * Returns the version for this output method.
  307. * If no version was specified, will return null
  308. * and the default version number will be used.
  309. * If the serializerr does not support that particular
  310. * version, it should default to a supported version.
  311. *
  312. * @return The specified method version, or null
  313. */
  314. public String getVersion()
  315. {
  316. return _version;
  317. }
  318. /**
  319. * Sets the version for this output method.
  320. * For XML the value would be "1.0", for HTML
  321. * it would be "4.0".
  322. *
  323. * @see #getVersion
  324. * @param version The output method version, or null
  325. */
  326. public void setVersion( String version )
  327. {
  328. _version = version;
  329. }
  330. /**
  331. * Returns the indentation specified. If no indentation
  332. * was specified, zero is returned and the document
  333. * should not be indented.
  334. *
  335. * @return The indentation or zero
  336. * @see #setIndenting
  337. */
  338. public int getIndent()
  339. {
  340. return _indent;
  341. }
  342. /**
  343. * Returns true if indentation was specified.
  344. */
  345. public boolean getIndenting()
  346. {
  347. return ( _indent > 0 );
  348. }
  349. /**
  350. * Sets the indentation. The document will not be
  351. * indented if the indentation is set to zero.
  352. * Calling {@link #setIndenting} will reset this
  353. * value to zero (off) or the default (on).
  354. *
  355. * @param indent The indentation, or zero
  356. */
  357. public void setIndent( int indent )
  358. {
  359. if ( indent < 0 )
  360. _indent = 0;
  361. else
  362. _indent = indent;
  363. }
  364. /**
  365. * Sets the indentation on and off. When set on, the default
  366. * indentation level and default line wrapping is used
  367. * (see {@link Defaults#Indent} and {@link Defaults#LineWidth}).
  368. * To specify a different indentation level or line wrapping,
  369. * use {@link #setIndent} and {@link #setLineWidth}.
  370. *
  371. * @param on True if indentation should be on
  372. */
  373. public void setIndenting( boolean on )
  374. {
  375. if ( on ) {
  376. _indent = Defaults.Indent;
  377. _lineWidth = Defaults.LineWidth;
  378. } else {
  379. _indent = 0;
  380. _lineWidth = 0;
  381. }
  382. }
  383. /**
  384. * Returns the specified encoding. If no encoding was
  385. * specified, the default is always "UTF-8".
  386. *
  387. * @return The encoding
  388. */
  389. public String getEncoding()
  390. {
  391. return _encoding;
  392. }
  393. /**
  394. * Sets the encoding for this output method. If no
  395. * encoding was specified, the default is always "UTF-8".
  396. * Make sure the encoding is compatible with the one
  397. * used by the {@link java.io.Writer}.
  398. *
  399. * @see #getEncoding
  400. * @param encoding The encoding, or null
  401. */
  402. public void setEncoding( String encoding )
  403. {
  404. _encoding = encoding;
  405. _encodingInfo = null;
  406. }
  407. /**
  408. * Sets the encoding for this output method with an <code>EncodingInfo</code>
  409. * instance.
  410. */
  411. public void setEncoding(EncodingInfo encInfo) {
  412. _encoding = encInfo.getIANAName();
  413. _encodingInfo = encInfo;
  414. }
  415. /**
  416. * Returns an <code>EncodingInfo<code> instance for the encoding.
  417. *
  418. * @see #setEncoding
  419. */
  420. public EncodingInfo getEncodingInfo() throws UnsupportedEncodingException {
  421. if (_encodingInfo == null)
  422. _encodingInfo = Encodings.getEncodingInfo(_encoding, _allowJavaNames);
  423. return _encodingInfo;
  424. }
  425. /**
  426. * Sets whether java encoding names are permitted
  427. */
  428. public void setAllowJavaNames (boolean allow) {
  429. _allowJavaNames = allow;
  430. }
  431. /**
  432. * Returns whether java encoding names are permitted
  433. */
  434. public boolean setAllowJavaNames () {
  435. return _allowJavaNames;
  436. }
  437. /**
  438. * Returns the specified media type, or null.
  439. * To determine the media type based on the
  440. * document type, use {@link #whichMediaType}.
  441. *
  442. * @return The specified media type, or null
  443. */
  444. public String getMediaType()
  445. {
  446. return _mediaType;
  447. }
  448. /**
  449. * Sets the media type.
  450. *
  451. * @see #getMediaType
  452. * @param mediaType The specified media type
  453. */
  454. public void setMediaType( String mediaType )
  455. {
  456. _mediaType = mediaType;
  457. }
  458. /**
  459. * Sets the document type public and system identifiers.
  460. * Required only if the DOM Document or SAX events do not
  461. * specify the document type, and one must be present in
  462. * the serialized document. Any document type specified
  463. * by the DOM Document or SAX events will override these
  464. * values.
  465. *
  466. * @param publicId The public identifier, or null
  467. * @param systemId The system identifier, or null
  468. */
  469. public void setDoctype( String publicId, String systemId )
  470. {
  471. _doctypePublic = publicId;
  472. _doctypeSystem = systemId;
  473. }
  474. /**
  475. * Returns the specified document type public identifier,
  476. * or null.
  477. */
  478. public String getDoctypePublic()
  479. {
  480. return _doctypePublic;
  481. }
  482. /**
  483. * Returns the specified document type system identifier,
  484. * or null.
  485. */
  486. public String getDoctypeSystem()
  487. {
  488. return _doctypeSystem;
  489. }
  490. /**
  491. * Returns true if comments should be ommited.
  492. * The default is false.
  493. */
  494. public boolean getOmitComments()
  495. {
  496. return _omitComments;
  497. }
  498. /**
  499. * Sets comment omitting on and off.
  500. *
  501. * @param omit True if comments should be ommited
  502. */
  503. public void setOmitComments( boolean omit )
  504. {
  505. _omitComments = omit;
  506. }
  507. /**
  508. * Returns true if the DOCTYPE declaration should
  509. * be ommited. The default is false.
  510. */
  511. public boolean getOmitDocumentType()
  512. {
  513. return _omitDoctype;
  514. }
  515. /**
  516. * Sets DOCTYPE declaration omitting on and off.
  517. *
  518. * @param omit True if DOCTYPE declaration should be ommited
  519. */
  520. public void setOmitDocumentType( boolean omit )
  521. {
  522. _omitDoctype = omit;
  523. }
  524. /**
  525. * Returns true if the XML document declaration should
  526. * be ommited. The default is false.
  527. */
  528. public boolean getOmitXMLDeclaration()
  529. {
  530. return _omitXmlDeclaration;
  531. }
  532. /**
  533. * Sets XML declaration omitting on and off.
  534. *
  535. * @param omit True if XML declaration should be ommited
  536. */
  537. public void setOmitXMLDeclaration( boolean omit )
  538. {
  539. _omitXmlDeclaration = omit;
  540. }
  541. /**
  542. * Returns true if the document type is standalone.
  543. * The default is false.
  544. */
  545. public boolean getStandalone()
  546. {
  547. return _standalone;
  548. }
  549. /**
  550. * Sets document DTD standalone. The public and system
  551. * identifiers must be null for the document to be
  552. * serialized as standalone.
  553. *
  554. * @param standalone True if document DTD is standalone
  555. */
  556. public void setStandalone( boolean standalone )
  557. {
  558. _standalone = standalone;
  559. }
  560. /**
  561. * Returns a list of all the elements whose text node children
  562. * should be output as CDATA, or null if no such elements were
  563. * specified.
  564. */
  565. public String[] getCDataElements()
  566. {
  567. return _cdataElements;
  568. }
  569. /**
  570. * Returns true if the text node children of the given elements
  571. * should be output as CDATA.
  572. *
  573. * @param tagName The element's tag name
  574. * @return True if should serialize as CDATA
  575. */
  576. public boolean isCDataElement( String tagName )
  577. {
  578. int i;
  579. if ( _cdataElements == null )
  580. return false;
  581. for ( i = 0 ; i < _cdataElements.length ; ++i )
  582. if ( _cdataElements[ i ].equals( tagName ) )
  583. return true;
  584. return false;
  585. }
  586. /**
  587. * Sets the list of elements for which text node children
  588. * should be output as CDATA.
  589. *
  590. * @param cdataElements List of CDATA element tag names
  591. */
  592. public void setCDataElements( String[] cdataElements )
  593. {
  594. _cdataElements = cdataElements;
  595. }
  596. /**
  597. * Returns a list of all the elements whose text node children
  598. * should be output unescaped (no character references), or null
  599. * if no such elements were specified.
  600. */
  601. public String[] getNonEscapingElements()
  602. {
  603. return _nonEscapingElements;
  604. }
  605. /**
  606. * Returns true if the text node children of the given elements
  607. * should be output unescaped.
  608. *
  609. * @param tagName The element's tag name
  610. * @return True if should serialize unescaped
  611. */
  612. public boolean isNonEscapingElement( String tagName )
  613. {
  614. int i;
  615. if ( _nonEscapingElements == null ) {
  616. return false;
  617. }
  618. for ( i = 0 ; i < _nonEscapingElements.length ; ++i )
  619. if ( _nonEscapingElements[ i ].equals( tagName ) )
  620. return true;
  621. return false;
  622. }
  623. /**
  624. * Sets the list of elements for which text node children
  625. * should be output unescaped (no character references).
  626. *
  627. * @param nonEscapingElements List of unescaped element tag names
  628. */
  629. public void setNonEscapingElements( String[] nonEscapingElements )
  630. {
  631. _nonEscapingElements = nonEscapingElements;
  632. }
  633. /**
  634. * Returns a specific line separator to use. The default is the
  635. * Web line separator (<tt>\n</tt>). A string is returned to
  636. * support double codes (CR + LF).
  637. *
  638. * @return The specified line separator
  639. */
  640. public String getLineSeparator()
  641. {
  642. return _lineSeparator;
  643. }
  644. /**
  645. * Sets the line separator. The default is the Web line separator
  646. * (<tt>\n</tt>). The machine's line separator can be obtained
  647. * from the system property <tt>line.separator</tt>, but is only
  648. * useful if the document is edited on machines of the same type.
  649. * For general documents, use the Web line separator.
  650. *
  651. * @param lineSeparator The specified line separator
  652. */
  653. public void setLineSeparator( String lineSeparator )
  654. {
  655. if ( lineSeparator == null )
  656. _lineSeparator = LineSeparator.Web;
  657. else
  658. _lineSeparator = lineSeparator;
  659. }
  660. /**
  661. * Returns true if the default behavior for this format is to
  662. * preserve spaces. All elements that do not specify otherwise
  663. * or specify the default behavior will be formatted based on
  664. * this rule. All elements that specify space preserving will
  665. * always preserve space.
  666. */
  667. public boolean getPreserveSpace()
  668. {
  669. return _preserve;
  670. }
  671. /**
  672. * Sets space preserving as the default behavior. The default is
  673. * space stripping and all elements that do not specify otherwise
  674. * or use the default value will not preserve spaces.
  675. *
  676. * @param preserve True if spaces should be preserved
  677. */
  678. public void setPreserveSpace( boolean preserve )
  679. {
  680. _preserve = preserve;
  681. }
  682. /**
  683. * Return the selected line width for breaking up long lines.
  684. * When indenting, and only when indenting, long lines will be
  685. * broken at space boundaries based on this line width.
  686. * No line wrapping occurs if this value is zero.
  687. */
  688. public int getLineWidth()
  689. {
  690. return _lineWidth;
  691. }
  692. /**
  693. * Sets the line width. If zero then no line wrapping will
  694. * occur. Calling {@link #setIndenting} will reset this
  695. * value to zero (off) or the default (on).
  696. *
  697. * @param lineWidth The line width to use, zero for default
  698. * @see #getLineWidth
  699. * @see #setIndenting
  700. */
  701. public void setLineWidth( int lineWidth )
  702. {
  703. if ( lineWidth <= 0 )
  704. _lineWidth = 0;
  705. else
  706. _lineWidth = lineWidth;
  707. }
  708. /**
  709. * Returns the preserveEmptyAttribute flag. If flag is false, then'
  710. * attributes with empty string values are output as the attribute
  711. * name only (in HTML mode).
  712. * @return preserve the preserve flag
  713. */ public boolean getPreserveEmptyAttributes () { return _preserveEmptyAttributes; } /**
  714. * Sets the preserveEmptyAttribute flag. If flag is false, then'
  715. * attributes with empty string values are output as the attribute
  716. * name only (in HTML mode).
  717. * @param preserve the preserve flag
  718. */ public void setPreserveEmptyAttributes (boolean preserve) { _preserveEmptyAttributes = preserve; }
  719. /**
  720. * Returns the last printable character based on the selected
  721. * encoding. Control characters and non-printable characters
  722. * are always printed as character references.
  723. */
  724. public char getLastPrintable()
  725. {
  726. if ( getEncoding() != null &&
  727. ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
  728. return 0xFF;
  729. else
  730. return 0xFFFF;
  731. }
  732. /**
  733. * Determine the output method for the specified document.
  734. * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument}
  735. * then the method is said to be <tt>html</tt>. If the root
  736. * element is 'html' and all text nodes preceding the root
  737. * element are all whitespace, then the method is said to be
  738. * <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
  739. *
  740. * @param doc The document to check
  741. * @return The suitable method
  742. */
  743. public static String whichMethod( Document doc )
  744. {
  745. Node node;
  746. String value;
  747. int i;
  748. // If document is derived from HTMLDocument then the default
  749. // method is html.
  750. if ( doc instanceof HTMLDocument )
  751. return Method.HTML;
  752. // Lookup the root element and the text nodes preceding it.
  753. // If root element is html and all text nodes contain whitespace
  754. // only, the method is html.
  755. // FIXME (SM) should we care about namespaces here?
  756. node = doc.getFirstChild();
  757. while (node != null) {
  758. // If the root element is html, the method is html.
  759. if ( node.getNodeType() == Node.ELEMENT_NODE ) {
  760. if ( node.getNodeName().equalsIgnoreCase( "html" ) ) {
  761. return Method.HTML;
  762. } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) {
  763. return Method.FOP;
  764. } else {
  765. return Method.XML;
  766. }
  767. } else if ( node.getNodeType() == Node.TEXT_NODE ) {
  768. // If a text node preceding the root element contains
  769. // only whitespace, this might be html, otherwise it's
  770. // definitely xml.
  771. value = node.getNodeValue();
  772. for ( i = 0 ; i < value.length() ; ++i )
  773. if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A &&
  774. value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D )
  775. return Method.XML;
  776. }
  777. node = node.getNextSibling();
  778. }
  779. // Anything else, the method is xml.
  780. return Method.XML;
  781. }
  782. /**
  783. * Returns the document type public identifier
  784. * specified for this document, or null.
  785. */
  786. public static String whichDoctypePublic( Document doc )
  787. {
  788. DocumentType doctype;
  789. /* DOM Level 2 was introduced into the code base*/
  790. doctype = doc.getDoctype();
  791. if ( doctype != null ) {
  792. // Note on catch: DOM Level 1 does not specify this method
  793. // and the code will throw a NoSuchMethodError
  794. try {
  795. return doctype.getPublicId();
  796. } catch ( Error except ) { }
  797. }
  798. if ( doc instanceof HTMLDocument )
  799. return DTD.XHTMLPublicId;
  800. return null;
  801. }
  802. /**
  803. * Returns the document type system identifier
  804. * specified for this document, or null.
  805. */
  806. public static String whichDoctypeSystem( Document doc )
  807. {
  808. DocumentType doctype;
  809. /* DOM Level 2 was introduced into the code base*/
  810. doctype = doc.getDoctype();
  811. if ( doctype != null ) {
  812. // Note on catch: DOM Level 1 does not specify this method
  813. // and the code will throw a NoSuchMethodError
  814. try {
  815. return doctype.getSystemId();
  816. } catch ( Error except ) { }
  817. }
  818. if ( doc instanceof HTMLDocument )
  819. return DTD.XHTMLSystemId;
  820. return null;
  821. }
  822. /**
  823. * Returns the suitable media format for a document
  824. * output with the specified method.
  825. */
  826. public static String whichMediaType( String method )
  827. {
  828. if ( method.equalsIgnoreCase( Method.XML ) )
  829. return "text/xml";
  830. if ( method.equalsIgnoreCase( Method.HTML ) )
  831. return "text/html";
  832. if ( method.equalsIgnoreCase( Method.XHTML ) )
  833. return "text/html";
  834. if ( method.equalsIgnoreCase( Method.TEXT ) )
  835. return "text/plain";
  836. if ( method.equalsIgnoreCase( Method.FOP ) )
  837. return "application/pdf";
  838. return null;
  839. }
  840. }