- /*
- * The Apache Software License, Version 1.1
- *
- *
- * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation and was
- * originally based on software copyright (c) 1999, International
- * Business Machines, Inc., http://www.apache.org. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-
-
- // Sep 14, 2000:
- // Fixed problem with namespace handling. Contributed by
- // David Blondeau <blondeau@intalio.com>
- // Sep 14, 2000:
- // Fixed serializer to report IO exception directly, instead at
- // the end of document processing.
- // Reported by Patrick Higgins <phiggins@transzap.com>
- // Aug 21, 2000:
- // Fixed bug in startDocument not calling prepare.
- // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
- // Aug 21, 2000:
- // Added ability to omit DOCTYPE declaration.
-
-
- package com.sun.org.apache.xml.internal.serialize;
-
-
- import java.io.IOException;
- import java.io.OutputStream;
- import java.io.Writer;
-
- import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
- import org.w3c.dom.DOMError;
- import com.sun.org.apache.xerces.internal.impl.Constants;
- import com.sun.org.apache.xerces.internal.util.NamespaceSupport;
- import com.sun.org.apache.xerces.internal.util.SymbolTable;
- import com.sun.org.apache.xerces.internal.util.XML11Char;
- import com.sun.org.apache.xerces.internal.util.XMLChar;
- import org.xml.sax.SAXException;
-
- /**
- * Implements an XML serializer supporting both DOM and SAX pretty
- * serializing. For usage instructions see {@link Serializer}.
- * <p>
- * If an output stream is used, the encoding is taken from the
- * output format (defaults to <tt>UTF-8</tt>). If a writer is
- * used, make sure the writer uses the same encoding (if applies)
- * as specified in the output format.
- * <p>
- * The serializer supports both DOM and SAX. SAX serializing is done by firing
- * SAX events and using the serializer as a document handler. DOM serializing is done
- * by calling {@link #serialize(Document)} or by using DOM Level 3
- * {@link org.w3c.dom.ls.DOMSerializer} and
- * serializing with {@link org.w3c.dom.ls.DOMSerializer#write},
- * {@link org.w3c.dom.ls.DOMSerializer#writeToString}.
- * <p>
- * If an I/O exception occurs while serializing, the serializer
- * will not throw an exception directly, but only throw it
- * at the end of serializing (either DOM or SAX's {@link
- * org.xml.sax.DocumentHandler#endDocument}.
- * <p>
- * For elements that are not specified as whitespace preserving,
- * the serializer will potentially break long text lines at space
- * boundaries, indent lines, and serialize elements on separate
- * lines. Line terminators will be regarded as spaces, and
- * spaces at beginning of line will be stripped.
- * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
- * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
- * @author Elena Litani IBM
- * @version $Revision: 1.8 $ $Date: 2004/01/29 21:11:30 $
- * @see Serializer
- */
- public class XML11Serializer
- extends XMLSerializer {
-
- //
- // constants
- //
-
- protected static final boolean DEBUG = false;
-
- //
- // data
- //
-
- //
- // DOM Level 3 implementation: variables intialized in DOMSerializerImpl
- //
-
- /** stores namespaces in scope */
- protected NamespaceSupport fNSBinder;
-
- /** stores all namespace bindings on the current element */
- protected NamespaceSupport fLocalNSBinder;
-
- /** symbol table for serialization */
- protected SymbolTable fSymbolTable;
-
- // is node dom level 1 node?
- protected boolean fDOML1 = false;
- // counter for new prefix names
- protected int fNamespaceCounter = 1;
- protected final static String PREFIX = "NS";
-
- /**
- * Controls whether namespace fixup should be performed during
- * the serialization.
- * NOTE: if this field is set to true the following
- * fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable,
- * XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter.
- */
- protected boolean fNamespaces = false;
-
-
- private boolean fPreserveSpace;
-
-
- /**
- * Constructs a new serializer. The serializer cannot be used without
- * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
- * first.
- */
- public XML11Serializer() {
- super( );
- _format.setVersion("1.1");
- }
-
-
- /**
- * Constructs a new serializer. The serializer cannot be used without
- * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
- * first.
- */
- public XML11Serializer( OutputFormat format ) {
- super( format );
- _format.setVersion("1.1");
- }
-
-
- /**
- * Constructs a new serializer that writes to the specified writer
- * using the specified output format. If <tt>format</tt> is null,
- * will use a default output format.
- *
- * @param writer The writer to use
- * @param format The output format to use, null for the default
- */
- public XML11Serializer( Writer writer, OutputFormat format ) {
- super( writer, format );
- _format.setVersion("1.1");
- }
-
-
- /**
- * Constructs a new serializer that writes to the specified output
- * stream using the specified output format. If <tt>format</tt>
- * is null, will use a default output format.
- *
- * @param output The output stream to use
- * @param format The output format to use, null for the default
- */
- public XML11Serializer( OutputStream output, OutputFormat format ) {
- super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) );
- _format.setVersion("1.1");
- }
-
- //-----------------------------------------//
- // SAX content handler serializing methods //
- //-----------------------------------------//
-
-
- public void characters( char[] chars, int start, int length )
- throws SAXException
- {
- ElementState state;
-
- try {
- state = content();
-
- // Check if text should be print as CDATA section or unescaped
- // based on elements listed in the output format (the element
- // state) or whether we are inside a CDATA section or entity.
-
- if ( state.inCData || state.doCData ) {
- int saveIndent;
-
- // Print a CDATA section. The text is not escaped, but ']]>'
- // appearing in the code must be identified and dealt with.
- // The contents of a text node is considered space preserving.
- if ( ! state.inCData ) {
- _printer.printText( "<![CDATA[" );
- state.inCData = true;
- }
- saveIndent = _printer.getNextIndent();
- _printer.setNextIndent( 0 );
- char ch;
- for ( int index = start ; index < length ; ++index ) {
- ch = chars[index];
- if ( ch == ']' && index + 2 < length &&
- chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
- _printer.printText("]]]]><![CDATA[>");
- index +=2;
- continue;
- }
- if (!XML11Char.isXML11Valid(ch)) {
- // check if it is surrogate
- if (++index <length) {
- surrogates(ch, chars[index]);
- }
- else {
- fatalError("The character '"+(char)ch+"' is an invalid XML character");
- }
- continue;
- } else {
- if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
- _printer.printText((char)ch);
- } else {
- // The character is not printable -- split CDATA section
- _printer.printText("]]>");
- _printer.printText(Integer.toHexString(ch));
- _printer.printText(";<![CDATA[");
- }
- }
- }
- _printer.setNextIndent( saveIndent );
-
- } else {
-
- int saveIndent;
-
- if ( state.preserveSpace ) {
- // If preserving space then hold of indentation so no
- // excessive spaces are printed at line breaks, escape
- // the text content without replacing spaces and print
- // the text breaking only at line breaks.
- saveIndent = _printer.getNextIndent();
- _printer.setNextIndent( 0 );
- printText( chars, start, length, true, state.unescaped );
- _printer.setNextIndent( saveIndent );
- } else {
- printText( chars, start, length, false, state.unescaped );
- }
- }
- } catch ( IOException except ) {
- throw new SAXException( except );
- }
- }
-
-
- //
- // overwrite printing functions to make sure serializer prints out valid XML
- //
- protected void printEscaped( String source ) throws IOException {
- int length = source.length();
- for ( int i = 0 ; i < length ; ++i ) {
- int ch = source.charAt(i);
- if (!XML11Char.isXML11Valid(ch)) {
- if (++i <length) {
- surrogates(ch, source.charAt(i));
- } else {
- fatalError("The character '"+(char)ch+"' is an invalid XML character");
- }
- continue;
- }
- if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 0x2028){
- printHex(ch);
- } else if (ch == '<') {
- _printer.printText("<");
- } else if (ch == '&') {
- _printer.printText("&");
- } else if (ch == '"') {
- _printer.printText(""");
- } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) {
- _printer.printText((char) ch);
- } else {
- printHex(ch);
- }
- }
- }
-
- protected final void printCDATAText(String text) throws IOException {
- int length = text.length();
- char ch;
-
- for (int index = 0; index < length; ++index) {
- ch = text.charAt(index);
-
- if (ch == ']'
- && index + 2 < length
- && text.charAt(index + 1) == ']'
- && text.charAt(index + 2) == '>') { // check for ']]>'
- if (fDOMErrorHandler != null){
- // REVISIT: this means that if DOM Error handler is not registered we don't report any
- // fatal errors and might serialize not wellformed document
- if ((features & DOMSerializerImpl.SPLITCDATA) == 0
- && (features & DOMSerializerImpl.WELLFORMED) == 0) {
- // issue fatal error
- String msg =
- DOMMessageFormatter.formatMessage(
- DOMMessageFormatter.SERIALIZER_DOMAIN,
- "EndingCDATA",
- null);
- modifyDOMError(
- msg,
- DOMError.SEVERITY_FATAL_ERROR,
- fCurrentNode);
- boolean continueProcess =
- fDOMErrorHandler.handleError(fDOMError);
- if (!continueProcess) {
- throw new IOException();
- }
- } else {
- // issue warning
- String msg =
- DOMMessageFormatter.formatMessage(
- DOMMessageFormatter.SERIALIZER_DOMAIN,
- "SplittingCDATA",
- null);
- modifyDOMError(
- msg,
- DOMError.SEVERITY_WARNING,
- fCurrentNode);
- fDOMErrorHandler.handleError(fDOMError);
- }
- }
- // split CDATA section
- _printer.printText("]]]]><![CDATA[>");
- index += 2;
- continue;
- }
-
- if (!XML11Char.isXML11Valid(ch)) {
- // check if it is surrogate
- if (++index < length) {
- surrogates(ch, text.charAt(index));
- } else {
- fatalError(
- "The character '"
- + (char) ch
- + "' is an invalid XML character");
- }
- continue;
- } else {
- if (_encodingInfo.isPrintable((char) ch)
- && XML11Char.isXML11ValidLiteral(ch)) {
- _printer.printText((char) ch);
- } else {
-
- // The character is not printable -- split CDATA section
- _printer.printText("]]>");
- _printer.printText(Integer.toHexString(ch));
- _printer.printText(";<![CDATA[");
- }
- }
- }
- }
-
-
- // note that this "int" should, in all cases, be a char.
- // REVISIT: make it a char...
- protected final void printXMLChar( int ch ) throws IOException {
-
- if (ch == '\r' || ch == 0x0085 || ch == 0x2028) {
- printHex(ch);
- } else if ( ch == '<') {
- _printer.printText("<");
- } else if (ch == '&') {
- _printer.printText("&");
- } else if (ch == '>'){
- // character sequence "]]>" can't appear in content, therefore
- // we should escape '>'
- _printer.printText(">");
- } else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
- _printer.printText((char)ch);
- } else {
- printHex(ch);
- }
- }
-
-
-
- protected final void surrogates(int high, int low) throws IOException{
- if (XMLChar.isHighSurrogate(high)) {
- if (!XMLChar.isLowSurrogate(low)) {
- //Invalid XML
- fatalError("The character '"+(char)low+"' is an invalid XML character");
- }
- else {
- int supplemental = XMLChar.supplemental((char)high, (char)low);
- if (!XML11Char.isXML11Valid(supplemental)) {
- //Invalid XML
- fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
- }
- else {
- if (content().inCData ) {
- _printer.printText("]]>");
- _printer.printText(Integer.toHexString(supplemental));
- _printer.printText(";<![CDATA[");
- }
- else {
- printHex(supplemental);
- }
- }
- }
- } else {
- fatalError("The character '"+(char)high+"' is an invalid XML character");
- }
-
- }
-
-
- protected void printText( String text, boolean preserveSpace, boolean unescaped )
- throws IOException {
- int index;
- char ch;
- int length = text.length();
- if ( preserveSpace ) {
- // Preserving spaces: the text must print exactly as it is,
- // without breaking when spaces appear in the text and without
- // consolidating spaces. If a line terminator is used, a line
- // break will occur.
- for ( index = 0 ; index < length ; ++index ) {
- ch = text.charAt( index );
- if (!XML11Char.isXML11Valid(ch)) {
- // check if it is surrogate
- if (++index <length) {
- surrogates(ch, text.charAt(index));
- } else {
- fatalError("The character '"+(char)ch+"' is an invalid XML character");
- }
- continue;
- }
- if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) {
- _printer.printText( ch );
- } else
- printXMLChar( ch );
- }
- } else {
- // Not preserving spaces: print one part at a time, and
- // use spaces between parts to break them into different
- // lines. Spaces at beginning of line will be stripped
- // by printing mechanism. Line terminator is treated
- // no different than other text part.
- for ( index = 0 ; index < length ; ++index ) {
- ch = text.charAt( index );
- if (!XML11Char.isXML11Valid(ch)) {
- // check if it is surrogate
- if (++index <length) {
- surrogates(ch, text.charAt(index));
- } else {
- fatalError("The character '"+(char)ch+"' is an invalid XML character");
- }
- continue;
- }
-
- if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
- _printer.printText( ch );
- else
- printXMLChar( ch);
- }
- }
- }
-
-
-
- protected void printText( char[] chars, int start, int length,
- boolean preserveSpace, boolean unescaped ) throws IOException {
- int index;
- char ch;
-
- if ( preserveSpace ) {
- // Preserving spaces: the text must print exactly as it is,
- // without breaking when spaces appear in the text and without
- // consolidating spaces. If a line terminator is used, a line
- // break will occur.
- while ( length-- > 0 ) {
- ch = chars[ start ];
- ++start;
- if (!XML11Char.isXML11Valid(ch)) {
- // check if it is surrogate
- if (++start <length) {
- surrogates(ch, chars[start]);
- } else {
- fatalError("The character '"+(char)ch+"' is an invalid XML character");
- }
- continue;
- }
- if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
- _printer.printText( ch );
- else
- printXMLChar( ch );
- }
- } else {
- // Not preserving spaces: print one part at a time, and
- // use spaces between parts to break them into different
- // lines. Spaces at beginning of line will be stripped
- // by printing mechanism. Line terminator is treated
- // no different than other text part.
- while ( length-- > 0 ) {
- ch = chars[ start ];
- ++start;
-
- if (!XML11Char.isXML11Valid(ch)) {
- // check if it is surrogate
- if (++start <length) {
- surrogates(ch, chars[start]);
- } else {
- fatalError("The character '"+(char)ch+"' is an invalid XML character");
- }
- continue;
- }
-
- if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
- _printer.printText( ch );
- else
- printXMLChar( ch );
- }
- }
- }
-
-
- public boolean reset() {
- super.reset();
- return true;
-
- }
-
- }
-
-
-
-