- /*
- * $Id: XmlDocumentBuilder.java,v 1.6 2001/09/14 00:50:25 edwingo Exp $
- *
- * The Apache Software License, Version 1.1
- *
- *
- * Copyright (c) 2000 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Crimson" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation and was
- * originally based on software copyright (c) 1999, Sun Microsystems, Inc.,
- * http://www.sun.com. For more information on the Apache Software
- * Foundation, please see <http://www.apache.org/>.
- */
-
- package org.apache.crimson.tree;
-
-
- import java.io.IOException;
-
- import java.net.URL;
-
- import java.util.Dictionary;
- import java.util.Hashtable;
- import java.util.Locale;
- import java.util.Vector;
-
- import org.w3c.dom.CDATASection;
- import org.w3c.dom.Comment;
- import org.w3c.dom.Document;
- import org.w3c.dom.EntityReference;
- import org.w3c.dom.DOMException;
- import org.w3c.dom.DOMImplementation;
-
- import org.xml.sax.ErrorHandler;
- import org.xml.sax.InputSource;
- import org.xml.sax.Locator;
- import org.xml.sax.SAXException;
- import org.xml.sax.SAXParseException;
- import org.xml.sax.ContentHandler;
- import org.xml.sax.DTDHandler;
- import org.xml.sax.Attributes;
- import org.xml.sax.ext.LexicalHandler;
- import org.xml.sax.ext.DeclHandler;
-
- import org.apache.crimson.parser.AttributesEx;
-
-
- /**
- * This class is a SAX2 ContentHandler which converts a stream of parse
- * events into an in-memory DOM document. After each <em>Parser.parse()</em>
- * invocation returns, a resulting DOM Document may be accessed via the
- * <em>getDocument</em> method. The parser and its builder should be used
- * together; the builder may be used with only one parser at a time.
- *
- * <P> This builder optionally does XML namespace processing, reporting
- * conformance problems as recoverable errors using the parser's error
- * handler.
- *
- * <P> Note: element factories are deprecated because they are non-standard
- * and are provided here only for backwards compatibility. To customize
- * the document, a powerful technique involves using an element factory
- * specifying what element tags (from a given XML namespace) correspond to
- * what implementation classes. Parse trees produced by such a builder can
- * have nodes which add behaviors to achieve application-specific
- * functionality, such as modifing the tree as it is parsed.
- *
- * <P> The object model here is that XML elements are polymorphic, with
- * semantic intelligence embedded through customized internal nodes.
- * Those nodes are created as the parse tree is built. Such trees now
- * build on the W3C Document Object Model (DOM), and other models may be
- * supported by the customized nodes. This allows both generic tools
- * (understanding generic interfaces such as the DOM core) and specialized
- * tools (supporting specialized behaviors, such as the HTML extensions
- * to the DOM core; or for XSL elements) to share data structures.
- *
- * <P> Normally only "model" semantics are in document data structures,
- * but "view" or "controller" semantics can be supported if desired.
- *
- * <P> Elements may choose to intercept certain parsing events directly.
- * They do this by overriding the default implementations of methods
- * in the <em>XmlReadable</em> interface. This is normally done to make
- * the DOM tree represent application level modeling requirements, rather
- * than matching an XML structure that may not be optimized appropriately.
- *
- * @author David Brownell
- * @version $Revision: 1.6 $
- */
- public class XmlDocumentBuilder implements ContentHandler, LexicalHandler,
- DeclHandler, DTDHandler
- {
- // used during parsing
- protected XmlDocument document;
- protected Locator locator;
- private Locale locale = Locale.getDefault ();
-
- private ElementFactory factory;
- private Vector attrTmp = new Vector ();
-
- protected ParentNode elementStack[];
- protected int topOfStack;
- private boolean inDTD;
- private boolean inCDataSection;
-
- private Doctype doctype;
-
- // parser modes
- private boolean disableNamespaces = true; /* Keep this for
- backward API
- compatibility,
- but it does
- not change any
- behavior. */
- private boolean ignoreWhitespace = false;
- private boolean expandEntityRefs = true;
- private boolean ignoreComments = false;
- private boolean putCDATAIntoText = false;
-
-
- /**
- * Default constructor is for use in conjunction with a SAX2 parser.
- */
- public XmlDocumentBuilder() {
- // No-op
- }
-
-
- /**
- * Returns true if certain lexical information is automatically
- * discarded when a DOM tree is built, producing smaller parse trees
- * that are easier to use.
- * <b>Obsolete:</b> for backwards compatibility
- */
- public boolean isIgnoringLexicalInfo () {
- return ignoreWhitespace && expandEntityRefs
- && ignoreComments && putCDATAIntoText;
- }
-
- /**
- * Controls whether certain lexical information is discarded.
- *
- * <P> That information includes whitespace in element content which
- * is ignorable (note that some nonvalidating XML parsers will not
- * report that information); all comments; which text is found in
- * CDATA sections; and boundaries of entity references.
- *
- * <P> "Ignorable whitespace" as reported by parsers is whitespace
- * used to format XML markup. That is, all whitespace except that in
- * "mixed" or ANY content models is ignorable. When it is discarded,
- * pretty-printing may be necessary to make the document be readable
- * again by humans.
- *
- * <P> Whitespace inside "mixed" and ANY content models needs different
- * treatment, since it could be part of the document content. In such
- * cases XML defines a <em>xml:space</em> attribute which applications
- * should use to determine whether whitespace must be preserved (value
- * of the attribute is <em>preserve</em>) or whether default behavior
- * (such as eliminating leading and trailing space, and normalizing
- * consecutive internal whitespace to a single space) is allowed.
- *
- * @param value true indicates that such lexical information should
- * be discarded during parsing.
- * <b>Obsolete:</b> for backwards compatibility
- */
- public void setIgnoringLexicalInfo (boolean value) {
- ignoreWhitespace = value;
- expandEntityRefs = value;
- ignoreComments = value;
- putCDATAIntoText = value;
- }
-
- /**
- * Internal API used by JAXP implementation. Access is set to "public"
- * to enable inter-package access. Use JAXP DocumentBuilderFactory
- * class to access this functionality.
- */
- public void setIgnoreWhitespace(boolean value) {
- ignoreWhitespace = value;
- }
-
- /**
- * Internal API used by JAXP implementation. Access is set to "public"
- * to enable inter-package access. Use JAXP DocumentBuilderFactory
- * class to access this functionality.
- */
- public void setExpandEntityReferences(boolean value) {
- expandEntityRefs = value;
- }
-
- /**
- * Internal API used by JAXP implementation. Access is set to "public"
- * to enable inter-package access. Use JAXP DocumentBuilderFactory
- * class to access this functionality.
- */
- public void setIgnoreComments(boolean value) {
- ignoreComments = value;
- }
-
- /**
- * Internal API used by JAXP implementation. Access is set to "public"
- * to enable inter-package access. Use JAXP DocumentBuilderFactory
- * class to access this functionality.
- */
- public void setPutCDATAIntoText(boolean value) {
- putCDATAIntoText = value;
- }
-
-
- /**
- * Returns true if namespace conformance is not checked as the
- * DOM tree is built.
- */
- public boolean getDisableNamespaces () {
- return disableNamespaces;
- }
-
- /**
- * Controls whether namespace conformance is checked during DOM
- * tree construction, or (the default) not. In this framework, the
- * DOM Builder is responsible for enforcing all namespace constraints.
- * When enabled, this makes constructing a DOM tree slightly slower.
- * (However, at this time it can't enforce the requirement that
- * parameter entity names not contain colons.)
- */
- public void setDisableNamespaces (boolean value) {
- disableNamespaces = value;
- }
-
- /**
- * Return the result of parsing, after a SAX parser has used this as a
- * content handler during parsing.
- */
- public XmlDocument getDocument() {
- return document;
- }
-
-
- /**
- * Returns the locale to be used for diagnostic messages by
- * this builder, and by documents it produces. This uses
- * the locale of any associated parser.
- */
- public Locale getLocale() {
- return locale;
- }
-
- /**
- * Assigns the locale to be used for diagnostic messages.
- * Multi-language applications, such as web servers dealing with
- * clients from different locales, need the ability to interact
- * with clients in languages other than the server's default.
- *
- * <P>When an XmlDocument is created, its locale is the default
- * locale for the virtual machine. If a parser was recorded,
- * the locale will be associated with that parser.
- *
- * @see #chooseLocale
- */
- public void setLocale(Locale locale)
- throws SAXException
- {
- if (locale == null) {
- locale = Locale.getDefault();
- }
- this.locale = locale;
- }
-
- /**
- * Chooses a client locale to use for diagnostics, using the first
- * language specified in the list that is supported by this builder.
- * That locale is then automatically assigned using <a
- * href="#setLocale(java.util.Locale)">setLocale()</a>. Such a list
- * could be provided by a variety of user preference mechanisms,
- * including the HTTP <em>Accept-Language</em> header field.
- *
- * @see org.apache.crimson.util.MessageCatalog
- *
- * @param languages Array of language specifiers, ordered with the most
- * preferable one at the front. For example, "en-ca" then "fr-ca",
- * followed by "zh_CN". Both RFC 1766 and Java styles are supported.
- * @return The chosen locale, or null.
- */
- public Locale chooseLocale (String languages [])
- throws SAXException
- {
- Locale l = XmlDocument.catalog.chooseLocale (languages);
-
- if (l != null)
- setLocale (l);
- return l;
- }
-
- /*
- * Gets the messages from the resource bundles for the given messageId.
- */
- String getMessage (String messageId) {
- return getMessage (messageId, null);
- }
-
- /*
- * Gets the messages from the resource bundles for the given messageId
- * after formatting it with the parameters passed to it.
- */
- String getMessage (String messageId, Object[] parameters) {
- if (locale == null) {
- getLocale ();
- }
- return XmlDocument.catalog.getMessage (locale, messageId, parameters);
- }
-
-
- //////////////////////////////////////////////////////////////////////
- // ContentHandler callbacks
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Receive an object for locating the origin of SAX document events.
- */
- public void setDocumentLocator(Locator locator) {
- this.locator = locator;
- }
-
- /**
- * This is a factory method, used to create an XmlDocument.
- * Subclasses may override this method, for example to provide
- * document classes with particular behaviors, or provide
- * particular factory behaviours (such as returning elements
- * that support the HTML DOM methods, if they have the right
- * name and are in the right namespace).
- */
- public XmlDocument createDocument ()
- {
- XmlDocument retval = new XmlDocument ();
-
- if (factory != null) {
- retval.setElementFactory(factory);
- }
- return retval;
- }
-
-
- /**
- * Assigns the factory to be associated with documents produced
- * by this builder.
- * @deprecated
- */
- final public void setElementFactory(ElementFactory factory) {
- this.factory = factory;
- }
-
-
- /**
- * Returns the factory to be associated with documents produced
- * by this builder.
- * @deprecated
- */
- final public ElementFactory getElementFactory() {
- return factory;
- }
-
-
- /**
- * Receive notification of the beginning of a document.
- */
- public void startDocument () throws SAXException
- {
- document = createDocument ();
-
- if (locator != null)
- document.setSystemId (locator.getSystemId ());
-
- //
- // XXX don't want fixed size limits! Fix someday. For
- // now, wide trees predominate, not deep ones. This is
- // allowing a _very_ deep tree ... we typically observe
- // depths on the order of a dozen.
- //
- elementStack = new ParentNode [200];
- topOfStack = 0;
- elementStack [topOfStack] = document;
-
- inDTD = false;
- }
-
- /**
- * Receive notification of the end of a document.
- */
- public void endDocument () throws SAXException
- {
- if (topOfStack != 0)
- throw new IllegalStateException (getMessage ("XDB-000"));
- document.trimToSize ();
- }
-
- /**
- * Begin the scope of a prefix-URI Namespace mapping.
- */
- public void startPrefixMapping(String prefix, String uri)
- throws SAXException
- {
- // No-op
- }
-
- /**
- * End the scope of a prefix-URI mapping.
- */
- public void endPrefixMapping(String prefix) throws SAXException {
- // No-op
- }
-
- /**
- * Receive notification of the beginning of an element.
- */
- public void startElement(String namespaceURI, String localName,
- String qName, Attributes attributes)
- throws SAXException
- {
- //
- // Convert set of attributes to DOM representation.
- //
- AttributeSet attSet = null;
- int length = attributes.getLength();
- if (length != 0) {
- try {
- attSet = AttributeSet.createAttributeSet1(attributes);
- } catch (DOMException ex) {
- throw new SAXParseException(getMessage("XDB-002",
- new Object[] { ex.getMessage() }), locator, ex);
- }
- }
-
- //
- // Then create the element, associate its attributes, and
- // stack it for later addition.
- //
- ElementNode e = null;
- try {
- e = (ElementNode) document.createElementEx(qName);
- } catch (DOMException ex) {
- throw new SAXParseException(getMessage("XDB-004",
- new Object[] { ex.getMessage() }), locator, ex);
- }
- if (attributes instanceof AttributesEx) {
- e.setIdAttributeName(
- ((AttributesEx)attributes).getIdAttributeName());
- }
- if (length != 0) {
- e.setAttributes(attSet);
- }
-
- elementStack[topOfStack++].appendChild(e);
- elementStack[topOfStack] = e;
- }
-
- /**
- * Receive notification of the end of an element.
- */
- public void endElement(String namespaceURI, String localName,
- String qName)
- throws SAXException
- {
- ParentNode e = (ParentNode) elementStack[topOfStack];
-
- elementStack[topOfStack--] = null;
-
- // Trusting that the SAX parser is correct, and hasn't
- // mismatched start/end element callbacks.
- // if (!tag.equals (e.getTagName ()))
- // throw new SAXParseException ((getMessage ("XDB-009", new
- // Object[] { tag, e.getTagName () })), locator);
-
- e.reduceWaste(); // use less space
- }
-
- /**
- * Receive notification of character data.
- */
- public void characters(char buf [], int offset, int len)
- throws SAXException
- {
- ParentNode top = elementStack [topOfStack];
-
- if (inCDataSection) {
- String temp = new String (buf, offset, len);
- CDATASection section;
-
- section = (CDATASection) top.getLastChild ();
- section.appendData (temp);
- return;
- }
-
-
- try {
- NodeBase lastChild = (NodeBase) top.getLastChild ();
- if (lastChild != null && lastChild.getClass() == TextNode.class) {
- // Merge only TextNode data and not CDataNode data
- String tmp = new String (buf, offset, len);
- ((TextNode)lastChild).appendData (tmp);
- } else {
- TextNode text = document.newText (buf, offset, len);
- top.appendChild (text);
- }
- } catch (DOMException ex) {
- throw new SAXParseException(getMessage("XDB-004",
- new Object[] { ex.getMessage() }), locator, ex);
- }
- }
-
- /**
- * Receive notification of ignorable whitespace in element content.
- *
- * Reports ignorable whitespace; if lexical information is not ignored
- * the whitespace reported here is recorded in a DOM text (or CDATA, as
- * appropriate) node.
- *
- * @param buf holds text characters
- * @param offset initial index of characters in <em>buf</em>
- * @param len how many characters are being passed
- * @exception SAXException as appropriate
- */
- public void ignorableWhitespace(char buf [], int offset, int len)
- throws SAXException
- {
- if (ignoreWhitespace)
- return;
-
- characters(buf, offset, len);
- }
-
- /**
- * Receive notification of a processing instruction.
- */
- public void processingInstruction(String name, String instruction)
- throws SAXException
- {
- // Ignore PIs in DTD for DOM support
- if (inDTD)
- return;
-
- ParentNode top = elementStack [topOfStack];
- PINode pi;
-
- try {
- pi = (PINode) document.createProcessingInstruction (name,
- instruction);
- top.appendChild (pi);
- } catch (DOMException ex) {
- throw new SAXParseException(getMessage("XDB-004",
- new Object[] { ex.getMessage() }), locator, ex);
- }
- }
-
- /**
- * Receive notification of a skipped entity.
- */
- public void skippedEntity(String name) throws SAXException {
- // No-op
- }
-
-
- //////////////////////////////////////////////////////////////////////
- // org.xml.sax.ext.LexicalHandler callbacks
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Report the start of DTD declarations, if any.
- */
- public void startDTD(String name, String publicId, String systemId)
- throws SAXException
- {
- DOMImplementation impl = document.getImplementation();
- doctype = (Doctype)impl.createDocumentType(name, publicId, systemId);
-
- // Set the owner since DOM2 specifies this to be null
- doctype.setOwnerDocument(document);
-
- inDTD = true;
- }
-
- /**
- * Report the end of DTD declarations.
- */
- public void endDTD() throws SAXException {
- document.appendChild(doctype);
- inDTD = false;
- }
-
- /**
- * Report the beginning of an entity in content.
- */
- public void startEntity(String name) throws SAXException {
- // Our parser doesn't report Paramater entities. Need to make
- // changes for that.
-
- // Ignore entity refs while parsing DTD
- if (expandEntityRefs || inDTD) {
- return;
- }
-
- EntityReference e = document.createEntityReference(name);
- elementStack[topOfStack++].appendChild(e);
- elementStack[topOfStack] = (ParentNode)e;
- }
-
- /**
- * Report the end of an entity.
- */
- public void endEntity(String name) throws SAXException {
- // Ignore entity refs while parsing DTD
- if (inDTD) {
- return;
- }
-
- ParentNode entity = elementStack[topOfStack];
-
- if (!(entity instanceof EntityReference))
- return;
-
- entity.setReadonly(true);
- elementStack[topOfStack--] = null;
- if (!name.equals(entity.getNodeName())) {
- throw new SAXParseException(getMessage("XDB-011",
- new Object[] { name, entity.getNodeName() }), locator);
- }
- }
-
- /**
- * Report the start of a CDATA section.
- *
- * <P>If this builder is set to record lexical information then this
- * callback arranges that character data (and ignorable whitespace) be
- * recorded as part of a CDATA section, until the matching
- * <em>endCDATA</em> method is called.
- */
- public void startCDATA() throws SAXException {
- if (putCDATAIntoText) {
- return;
- }
-
- CDATASection text = document.createCDATASection("");
- ParentNode top = elementStack[topOfStack];
-
- try {
- inCDataSection = true;
- top.appendChild(text);
- } catch (DOMException ex) {
- throw new SAXParseException(getMessage("XDB-004",
- new Object[] { ex.getMessage() }), locator, ex);
- }
- }
-
- /**
- * Report the end of a CDATA section.
- */
- public void endCDATA() throws SAXException {
- inCDataSection = false;
- }
-
- /**
- * Report an XML comment anywhere in the document.
- */
- public void comment(char[] ch, int start, int length) throws SAXException {
- // Ignore comments if lexical info is to be ignored,
- // or if parsing the DTD
- if (ignoreComments || inDTD) {
- return;
- }
-
- String text = new String(ch, start, length);
- Comment comment = document.createComment(text);
- ParentNode top = elementStack[topOfStack];
-
- try {
- top.appendChild(comment);
- } catch (DOMException ex) {
- throw new SAXParseException(getMessage("XDB-004",
- new Object[] { ex.getMessage() }), locator, ex);
- }
- }
-
-
- //////////////////////////////////////////////////////////////////////
- // org.xml.sax.ext.DeclHandler callbacks
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Report an element type declaration.
- */
- public void elementDecl(String name, String model) throws SAXException {
- // ignored
- }
-
- /**
- * Report an attribute type declaration.
- */
- public void attributeDecl(String eName, String aName, String type,
- String valueDefault, String value)
- throws SAXException
- {
- // ignored
- }
-
- /**
- * Report an internal entity declaration.
- */
- public void internalEntityDecl(String name, String value)
- throws SAXException
- {
- // SAX2 reports PEDecls which we ignore for DOM2. SAX2 also reports
- // only the first defined GEDecl which matches with DOM2.
- if (!name.startsWith("%")) {
- doctype.addEntityNode(name, value);
- }
- }
-
- /**
- * Report a parsed external entity declaration.
- */
- public void externalEntityDecl(String name, String publicId,
- String systemId)
- throws SAXException
- {
- // SAX2 reports PEDecls which we ignore for DOM2. SAX2 also reports
- // only the first defined GEDecl which matches with DOM2.
- if (!name.startsWith("%")) {
- doctype.addEntityNode(name, publicId, systemId, null);
- }
- }
-
-
- //////////////////////////////////////////////////////////////////////
- // org.xml.sax.DTDHandler callbacks
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Receive notification of a notation declaration event.
- */
- public void notationDecl(String n, String p, String s)
- throws SAXException
- {
- doctype.addNotation(n, p, s);
- }
-
- /**
- * Receive notification of an unparsed entity declaration event.
- */
- public void unparsedEntityDecl(String name, String publicId,
- String systemId, String notation)
- throws SAXException
- {
- doctype.addEntityNode(name, publicId, systemId, notation);
- }
- }