- /*
- * The Apache Software License, Version 1.1
- *
- *
- * Copyright (c) 1999-2004 The Apache Software Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation and was
- * originally based on software copyright (c) 1999, International
- * Business Machines, Inc., http://www.apache.org. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
- package com.sun.org.apache.xerces.internal.impl;
-
- import java.io.CharConversionException;
- import java.io.EOFException;
- import java.io.IOException;
-
- import com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDDescription;
- import com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException;
- import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager;
- import com.sun.org.apache.xerces.internal.util.NamespaceSupport;
- import com.sun.org.apache.xerces.internal.util.XMLChar;
- import com.sun.org.apache.xerces.internal.util.XMLEntityDescriptionImpl;
- import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
- import com.sun.org.apache.xerces.internal.xni.Augmentations;
- import com.sun.org.apache.xerces.internal.xni.NamespaceContext;
- import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
- import com.sun.org.apache.xerces.internal.xni.XMLString;
- import com.sun.org.apache.xerces.internal.xni.XNIException;
- import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
- import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
- import com.sun.org.apache.xerces.internal.xni.parser.XMLDTDScanner;
- import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource;
-
- /**
- * This class is responsible for scanning XML document structure
- * and content. The scanner acts as the source for the document
- * information which is communicated to the document handler.
- * <p>
- * This component requires the following features and properties from the
- * component manager that uses it:
- * <ul>
- * <li>http://xml.org/sax/features/namespaces</li>
- * <li>http://xml.org/sax/features/validation</li>
- * <li>http://apache.org/xml/features/nonvalidating/load-external-dtd</li>
- * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
- * <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
- * <li>http://apache.org/xml/properties/internal/symbol-table</li>
- * <li>http://apache.org/xml/properties/internal/error-reporter</li>
- * <li>http://apache.org/xml/properties/internal/entity-manager</li>
- * <li>http://apache.org/xml/properties/internal/dtd-scanner</li>
- * </ul>
- *
- * @author Glenn Marcy, IBM
- * @author Andy Clark, IBM
- * @author Arnaud Le Hors, IBM
- * @author Eric Ye, IBM
- *
- * @version $Id: XMLDocumentScannerImpl.java,v 1.45 2004/04/30 15:36:38 mrglavas Exp $
- */
- public class XMLDocumentScannerImpl
- extends XMLDocumentFragmentScannerImpl {
-
- //
- // Constants
- //
-
- // scanner states
-
- /** Scanner state: XML declaration. */
- protected static final int SCANNER_STATE_XML_DECL = 0;
-
- /** Scanner state: prolog. */
- protected static final int SCANNER_STATE_PROLOG = 5;
-
- /** Scanner state: trailing misc. */
- protected static final int SCANNER_STATE_TRAILING_MISC = 12;
-
- /** Scanner state: DTD internal declarations. */
- protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 17;
-
- /** Scanner state: open DTD external subset. */
- protected static final int SCANNER_STATE_DTD_EXTERNAL = 18;
-
- /** Scanner state: DTD external declarations. */
- protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 19;
-
- // feature identifiers
-
- /** Feature identifier: load external DTD. */
- protected static final String LOAD_EXTERNAL_DTD =
- Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE;
-
- /** Feature identifier: load external DTD. */
- protected static final String DISALLOW_DOCTYPE_DECL_FEATURE =
- Constants.XERCES_FEATURE_PREFIX + Constants.DISALLOW_DOCTYPE_DECL_FEATURE;
-
- // property identifiers
-
- /** Property identifier: DTD scanner. */
- protected static final String DTD_SCANNER =
- Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY;
-
- /** property identifier: ValidationManager */
- protected static final String VALIDATION_MANAGER =
- Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY;
-
- /** property identifier: NamespaceContext */
- protected static final String NAMESPACE_CONTEXT =
- Constants.XERCES_PROPERTY_PREFIX + Constants.NAMESPACE_CONTEXT_PROPERTY;
-
-
-
- // recognized features and properties
-
- /** Recognized features. */
- private static final String[] RECOGNIZED_FEATURES = {
- LOAD_EXTERNAL_DTD,
- DISALLOW_DOCTYPE_DECL_FEATURE,
- };
-
- /** Feature defaults. */
- private static final Boolean[] FEATURE_DEFAULTS = {
- Boolean.TRUE,
- Boolean.FALSE,
- };
-
- /** Recognized properties. */
- private static final String[] RECOGNIZED_PROPERTIES = {
- DTD_SCANNER,
- VALIDATION_MANAGER,
- NAMESPACE_CONTEXT,
- };
-
- /** Property defaults. */
- private static final Object[] PROPERTY_DEFAULTS = {
- null,
- null,
- null,
- };
-
- //
- // Data
- //
-
- // properties
-
- /** DTD scanner. */
- protected XMLDTDScanner fDTDScanner;
- /** Validation manager . */
- protected ValidationManager fValidationManager;
-
- // protected data
-
- /** Scanning DTD. */
- protected boolean fScanningDTD;
-
- // other info
-
- /** Doctype name. */
- protected String fDoctypeName;
-
- /** Doctype declaration public identifier. */
- protected String fDoctypePublicId;
-
- /** Doctype declaration system identifier. */
- protected String fDoctypeSystemId;
-
- /** Namespace support. */
- protected NamespaceContext fNamespaceContext = new NamespaceSupport();
-
- // features
-
- /** Load external DTD. */
- protected boolean fLoadExternalDTD = true;
-
- /** Disallow doctype declaration. */
- protected boolean fDisallowDoctype = false;
-
- // state
-
- /** Seen doctype declaration. */
- protected boolean fSeenDoctypeDecl;
-
- // dispatchers
-
- /** XML declaration dispatcher. */
- protected Dispatcher fXMLDeclDispatcher = new XMLDeclDispatcher();
-
- /** Prolog dispatcher. */
- protected Dispatcher fPrologDispatcher = new PrologDispatcher();
-
- /** DTD dispatcher. */
- protected Dispatcher fDTDDispatcher = new DTDDispatcher();
-
- /** Trailing miscellaneous section dispatcher. */
- protected Dispatcher fTrailingMiscDispatcher = new TrailingMiscDispatcher();
-
- // temporary variables
-
- /** Array of 3 strings. */
- private String[] fStrings = new String[3];
-
- /** String. */
- private XMLString fString = new XMLString();
-
- /** String buffer. */
- private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
-
- /** External subset source. */
- private XMLInputSource fExternalSubsetSource = null;
-
- //
- // Constructors
- //
-
- /** Default constructor. */
- public XMLDocumentScannerImpl() {} // <init>()
-
- //
- // XMLDocumentScanner methods
- //
-
- /**
- * Sets the input source.
- *
- * @param inputSource The input source.
- *
- * @throws IOException Thrown on i/o error.
- */
- public void setInputSource(XMLInputSource inputSource) throws IOException {
- fEntityManager.setEntityHandler(this);
- fEntityManager.startDocumentEntity(inputSource);
- //fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
- } // setInputSource(XMLInputSource)
-
- //
- // XMLComponent methods
- //
-
- /**
- * Resets the component. The component can query the component manager
- * about any features and properties that affect the operation of the
- * component.
- *
- * @param componentManager The component manager.
- *
- * @throws SAXException Thrown by component on initialization error.
- * For example, if a feature or property is
- * required for the operation of the component, the
- * component manager may throw a
- * SAXNotRecognizedException or a
- * SAXNotSupportedException.
- */
- public void reset(XMLComponentManager componentManager)
- throws XMLConfigurationException {
-
- super.reset(componentManager);
-
- // other settings
- fDoctypeName = null;
- fDoctypePublicId = null;
- fDoctypeSystemId = null;
- fSeenDoctypeDecl = false;
- fScanningDTD = false;
- fExternalSubsetSource = null;
-
- if (!fParserSettings) {
- // parser settings have not been changed
- fNamespaceContext.reset();
- // setup dispatcher
- setScannerState(SCANNER_STATE_XML_DECL);
- setDispatcher(fXMLDeclDispatcher);
- return;
- }
-
- // xerces features
- try {
- fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD);
- }
- catch (XMLConfigurationException e) {
- fLoadExternalDTD = true;
- }
- try {
- fDisallowDoctype = componentManager.getFeature(DISALLOW_DOCTYPE_DECL_FEATURE);
- }
- catch (XMLConfigurationException e) {
- fDisallowDoctype = false;
- }
-
- // xerces properties
- fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER);
- try {
- fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER);
- }
- catch (XMLConfigurationException e) {
- fValidationManager = null;
- }
-
- try {
- fNamespaceContext = (NamespaceContext)componentManager.getProperty(NAMESPACE_CONTEXT);
- }
- catch (XMLConfigurationException e) { }
- if (fNamespaceContext == null) {
- fNamespaceContext = new NamespaceSupport();
- }
- fNamespaceContext.reset();
-
- // setup dispatcher
- setScannerState(SCANNER_STATE_XML_DECL);
- setDispatcher(fXMLDeclDispatcher);
-
- } // reset(XMLComponentManager)
-
- /**
- * Returns a list of feature identifiers that are recognized by
- * this component. This method may return null if no features
- * are recognized by this component.
- */
- public String[] getRecognizedFeatures() {
- String[] featureIds = super.getRecognizedFeatures();
- int length = featureIds != null ? featureIds.length : 0;
- String[] combinedFeatureIds = new String[length + RECOGNIZED_FEATURES.length];
- if (featureIds != null) {
- System.arraycopy(featureIds, 0, combinedFeatureIds, 0, featureIds.length);
- }
- System.arraycopy(RECOGNIZED_FEATURES, 0, combinedFeatureIds, length, RECOGNIZED_FEATURES.length);
- return combinedFeatureIds;
- } // getRecognizedFeatures():String[]
-
- /**
- * Sets the state of a feature. This method is called by the component
- * manager any time after reset when a feature changes state.
- * <p>
- * <strong>Note:</strong> Components should silently ignore features
- * that do not affect the operation of the component.
- *
- * @param featureId The feature identifier.
- * @param state The state of the feature.
- *
- * @throws SAXNotRecognizedException The component should not throw
- * this exception.
- * @throws SAXNotSupportedException The component should not throw
- * this exception.
- */
- public void setFeature(String featureId, boolean state)
- throws XMLConfigurationException {
-
- super.setFeature(featureId, state);
-
- // Xerces properties
- if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
- final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length();
-
- if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() &&
- featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) {
- fLoadExternalDTD = state;
- return;
- }
- else if (suffixLength == Constants.DISALLOW_DOCTYPE_DECL_FEATURE.length() &&
- featureId.endsWith(Constants.DISALLOW_DOCTYPE_DECL_FEATURE)) {
- fDisallowDoctype = state;
- return;
- }
- }
-
- } // setFeature(String,boolean)
-
- /**
- * Returns a list of property identifiers that are recognized by
- * this component. This method may return null if no properties
- * are recognized by this component.
- */
- public String[] getRecognizedProperties() {
- String[] propertyIds = super.getRecognizedProperties();
- int length = propertyIds != null ? propertyIds.length : 0;
- String[] combinedPropertyIds = new String[length + RECOGNIZED_PROPERTIES.length];
- if (propertyIds != null) {
- System.arraycopy(propertyIds, 0, combinedPropertyIds, 0, propertyIds.length);
- }
- System.arraycopy(RECOGNIZED_PROPERTIES, 0, combinedPropertyIds, length, RECOGNIZED_PROPERTIES.length);
- return combinedPropertyIds;
- } // getRecognizedProperties():String[]
-
- /**
- * Sets the value of a property. This method is called by the component
- * manager any time after reset when a property changes value.
- * <p>
- * <strong>Note:</strong> Components should silently ignore properties
- * that do not affect the operation of the component.
- *
- * @param propertyId The property identifier.
- * @param value The value of the property.
- *
- * @throws SAXNotRecognizedException The component should not throw
- * this exception.
- * @throws SAXNotSupportedException The component should not throw
- * this exception.
- */
- public void setProperty(String propertyId, Object value)
- throws XMLConfigurationException {
-
- super.setProperty(propertyId, value);
-
- // Xerces properties
- if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
- final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
-
- if (suffixLength == Constants.DTD_SCANNER_PROPERTY.length() &&
- propertyId.endsWith(Constants.DTD_SCANNER_PROPERTY)) {
- fDTDScanner = (XMLDTDScanner)value;
- }
- if (suffixLength == Constants.NAMESPACE_CONTEXT_PROPERTY.length() &&
- propertyId.endsWith(Constants.NAMESPACE_CONTEXT_PROPERTY)) {
- if (value != null) {
- fNamespaceContext = (NamespaceContext)value;
- }
- }
-
- return;
- }
-
- } // setProperty(String,Object)
-
- /**
- * Returns the default state for a feature, or null if this
- * component does not want to report a default value for this
- * feature.
- *
- * @param featureId The feature identifier.
- *
- * @since Xerces 2.2.0
- */
- public Boolean getFeatureDefault(String featureId) {
-
- for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
- if (RECOGNIZED_FEATURES[i].equals(featureId)) {
- return FEATURE_DEFAULTS[i];
- }
- }
- return super.getFeatureDefault(featureId);
- } // getFeatureDefault(String):Boolean
-
- /**
- * Returns the default state for a property, or null if this
- * component does not want to report a default value for this
- * property.
- *
- * @param propertyId The property identifier.
- *
- * @since Xerces 2.2.0
- */
- public Object getPropertyDefault(String propertyId) {
- for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
- if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
- return PROPERTY_DEFAULTS[i];
- }
- }
- return super.getPropertyDefault(propertyId);
- } // getPropertyDefault(String):Object
-
- //
- // XMLEntityHandler methods
- //
-
- /**
- * This method notifies of the start of an entity. The DTD has the
- * pseudo-name of "[dtd]" parameter entity names start with '%'; and
- * general entities are just specified by their name.
- *
- * @param name The name of the entity.
- * @param identifier The resource identifier.
- * @param encoding The auto-detected IANA encoding name of the entity
- * stream. This value will be null in those situations
- * where the entity encoding is not auto-detected (e.g.
- * internal entities or a document entity that is
- * parsed from a java.io.Reader).
- *
- * @throws XNIException Thrown by handler to signal an error.
- */
- public void startEntity(String name,
- XMLResourceIdentifier identifier,
- String encoding, Augmentations augs) throws XNIException {
-
- super.startEntity(name, identifier, encoding, augs);
-
- // prepare to look for a TextDecl if external general entity
- if (!name.equals("[xml]") && fEntityScanner.isExternal()) {
- setScannerState(SCANNER_STATE_TEXT_DECL);
- }
-
- // call handler
- if (fDocumentHandler != null && name.equals("[xml]")) {
- fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null);
- }
-
- } // startEntity(String,identifier,String)
-
- /**
- * This method notifies the end of an entity. The DTD has the pseudo-name
- * of "[dtd]" parameter entity names start with '%'; and general entities
- * are just specified by their name.
- *
- * @param name The name of the entity.
- *
- * @throws XNIException Thrown by handler to signal an error.
- */
- public void endEntity(String name, Augmentations augs) throws XNIException {
-
- super.endEntity(name, augs);
-
- // call handler
- if (fDocumentHandler != null && name.equals("[xml]")) {
- fDocumentHandler.endDocument(null);
- }
-
- } // endEntity(String)
-
- //
- // Protected methods
- //
-
- // dispatcher factory methods
-
- /** Creates a content dispatcher. */
- protected Dispatcher createContentDispatcher() {
- return new ContentDispatcher();
- } // createContentDispatcher():Dispatcher
-
- // scanning methods
-
- /** Scans a doctype declaration. */
- protected boolean scanDoctypeDecl() throws IOException, XNIException {
-
- // spaces
- if (!fEntityScanner.skipSpaces()) {
- reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL",
- null);
- }
-
- // root element name
- fDoctypeName = fEntityScanner.scanName();
- if (fDoctypeName == null) {
- reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null);
- }
-
- // external id
- if (fEntityScanner.skipSpaces()) {
- scanExternalID(fStrings, false);
- fDoctypeSystemId = fStrings[0];
- fDoctypePublicId = fStrings[1];
- fEntityScanner.skipSpaces();
- }
-
- fHasExternalDTD = fDoctypeSystemId != null;
-
- // Attempt to locate an external subset with an external subset resolver.
- if (!fHasExternalDTD && fExternalSubsetResolver != null) {
- XMLDTDDescription desc = new XMLDTDDescription(null,
- null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null, fDoctypeName);
- fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(desc);
- fHasExternalDTD = fExternalSubsetSource != null;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- // NOTE: I don't like calling the doctypeDecl callback until
- // end of the *full* doctype line (including internal
- // subset) is parsed correctly but SAX2 requires that
- // it knows the root element name and public and system
- // identifier for the startDTD call. -Ac
- if (fExternalSubsetSource == null) {
- fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null);
- }
- else {
- fDocumentHandler.doctypeDecl(fDoctypeName, fExternalSubsetSource.getPublicId(), fExternalSubsetSource.getSystemId(), null);
- }
- }
-
- // is there an internal subset?
- boolean internalSubset = true;
- if (!fEntityScanner.skipChar('[')) {
- internalSubset = false;
- fEntityScanner.skipSpaces();
- if (!fEntityScanner.skipChar('>')) {
- reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName});
- }
- fMarkupDepth--;
- }
-
- return internalSubset;
-
- } // scanDoctypeDecl():boolean
-
- //
- // Private methods
- //
-
- /** Returns the scanner state name. */
- protected String getScannerStateName(int state) {
-
- switch (state) {
- case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL";
- case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG";
- case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC";
- case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS";
- case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL";
- case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS";
- }
- return super.getScannerStateName(state);
-
- } // getScannerStateName(int):String
-
- //
- // Classes
- //
-
- /**
- * Dispatcher to handle XMLDecl scanning.
- *
- * @author Andy Clark, IBM
- */
- protected final class XMLDeclDispatcher
- implements Dispatcher {
-
- //
- // Dispatcher methods
- //
-
- /**
- * Dispatch an XML "event".
- *
- * @param complete True if this dispatcher is intended to scan
- * and dispatch as much as possible.
- *
- * @return True if there is more to dispatch either from this
- * or a another dispatcher.
- *
- * @throws IOException Thrown on i/o error.
- * @throws XNIException Thrown on parse error.
- */
- public boolean dispatch(boolean complete)
- throws IOException, XNIException {
-
- // next dispatcher is prolog regardless of whether there
- // is an XMLDecl in this document
- setScannerState(SCANNER_STATE_PROLOG);
- setDispatcher(fPrologDispatcher);
-
- // scan XMLDecl
- try {
- if (fEntityScanner.skipString("<?xml")) {
- fMarkupDepth++;
- // NOTE: special case where document starts with a PI
- // whose name starts with "xml" (e.g. "xmlfoo")
- if (XMLChar.isName(fEntityScanner.peekChar())) {
- fStringBuffer.clear();
- fStringBuffer.append("xml");
- if (fNamespaces) {
- while (XMLChar.isNCName(fEntityScanner.peekChar())) {
- fStringBuffer.append((char)fEntityScanner.scanChar());
- }
- }
- else {
- while (XMLChar.isName(fEntityScanner.peekChar())) {
- fStringBuffer.append((char)fEntityScanner.scanChar());
- }
- }
- String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length);
- scanPIData(target, fString);
- }
-
- // standard XML declaration
- else {
- scanXMLDeclOrTextDecl(false);
- }
- }
- fEntityManager.fCurrentEntity.mayReadChunks = true;
-
- // if no XMLDecl, then scan piece of prolog
- return true;
- }
- // encoding errors
- catch (MalformedByteSequenceException e) {
- fErrorReporter.reportError(e.getDomain(), e.getKey(),
- e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR);
- return false;
- }
- catch (CharConversionException e) {
- reportFatalError("CharConversionFailure", null);
- return false;
- }
- // premature end of file
- catch (EOFException e) {
- reportFatalError("PrematureEOF", null);
- return false;
- //throw e;
- }
-
-
- } // dispatch(boolean):boolean
-
- } // class XMLDeclDispatcher
-
- /**
- * Dispatcher to handle prolog scanning.
- *
- * @author Andy Clark, IBM
- */
- protected final class PrologDispatcher
- implements Dispatcher {
-
- //
- // Dispatcher methods
- //
-
- /**
- * Dispatch an XML "event".
- *
- * @param complete True if this dispatcher is intended to scan
- * and dispatch as much as possible.
- *
- * @return True if there is more to dispatch either from this
- * or a another dispatcher.
- *
- * @throws IOException Thrown on i/o error.
- * @throws XNIException Thrown on parse error.
- */
- public boolean dispatch(boolean complete)
- throws IOException, XNIException {
-
- try {
- boolean again;
- do {
- again = false;
- switch (fScannerState) {
- case SCANNER_STATE_PROLOG: {
- fEntityScanner.skipSpaces();
- if (fEntityScanner.skipChar('<')) {
- setScannerState(SCANNER_STATE_START_OF_MARKUP);
- again = true;
- }
- else if (fEntityScanner.skipChar('&')) {
- setScannerState(SCANNER_STATE_REFERENCE);
- again = true;
- }
- else {
- setScannerState(SCANNER_STATE_CONTENT);
- again = true;
- }
- break;
- }
- case SCANNER_STATE_START_OF_MARKUP: {
- fMarkupDepth++;
- if (fEntityScanner.skipChar('!')) {
- if (fEntityScanner.skipChar('-')) {
- if (!fEntityScanner.skipChar('-')) {
- reportFatalError("InvalidCommentStart",
- null);
- }
- setScannerState(SCANNER_STATE_COMMENT);
- again = true;
- }
- else if (fEntityScanner.skipString("DOCTYPE")) {
- setScannerState(SCANNER_STATE_DOCTYPE);
- again = true;
- }
- else {
- reportFatalError("MarkupNotRecognizedInProlog",
- null);
- }
- }
- else if (isValidNameStartChar(fEntityScanner.peekChar())) {
- setScannerState(SCANNER_STATE_ROOT_ELEMENT);
- setDispatcher(fContentDispatcher);
- return true;
- }
- else if (fEntityScanner.skipChar('?')) {
- setScannerState(SCANNER_STATE_PI);
- again = true;
- }
- else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
- setScannerState(SCANNER_STATE_ROOT_ELEMENT);
- setDispatcher(fContentDispatcher);
- return true;
- }
- else {
- reportFatalError("MarkupNotRecognizedInProlog",
- null);
- }
- break;
- }
- case SCANNER_STATE_COMMENT: {
- scanComment();
- setScannerState(SCANNER_STATE_PROLOG);
- break;
- }
- case SCANNER_STATE_PI: {
- scanPI();
- setScannerState(SCANNER_STATE_PROLOG);
- break;
- }
- case SCANNER_STATE_DOCTYPE: {
- if (fDisallowDoctype) {
- reportFatalError("DoctypeNotAllowed", null);
- }
- if (fSeenDoctypeDecl) {
- reportFatalError("AlreadySeenDoctype", null);
- }
- fSeenDoctypeDecl = true;
-
- // scanDoctypeDecl() sends XNI doctypeDecl event that
- // in SAX is converted to startDTD() event.
- if (scanDoctypeDecl()) {
- setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS);
- setDispatcher(fDTDDispatcher);
- return true;
- }
-
- // handle external subset
- if (fDoctypeSystemId != null) {
- if (((fValidation || fLoadExternalDTD)
- && (fValidationManager == null || !fValidationManager.isCachedDTD()))) {
- setScannerState(SCANNER_STATE_DTD_EXTERNAL);
- setDispatcher(fDTDDispatcher);
- return true;
- }
- }
- else if (fExternalSubsetSource != null) {
- if (((fValidation || fLoadExternalDTD)
- && (fValidationManager == null || !fValidationManager.isCachedDTD()))) {
- // This handles the case of a DOCTYPE that had neither an internal subset or an external subset.
- fDTDScanner.setInputSource(fExternalSubsetSource);
- fExternalSubsetSource = null;
- setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
- setDispatcher(fDTDDispatcher);
- return true;
- }
- }
-
- // Send endDTD() call if:
- // a) systemId is null or if an external subset resolver could not locate an external subset.
- // b) "load-external-dtd" and validation are false
- // c) DTD grammar is cached
-
- // in XNI this results in 3 events: doctypeDecl, startDTD, endDTD
- // in SAX this results in 2 events: startDTD, endDTD
- fDTDScanner.setInputSource(null);
- setScannerState(SCANNER_STATE_PROLOG);
- break;
- }
- case SCANNER_STATE_CONTENT: {
- reportFatalError("ContentIllegalInProlog", null);
- fEntityScanner.scanChar();
- }
- case SCANNER_STATE_REFERENCE: {
- reportFatalError("ReferenceIllegalInProlog", null);
- }
- }
- } while (complete || again);
-
- if (complete) {
- if (fEntityScanner.scanChar() != '<') {
- reportFatalError("RootElementRequired", null);
- }
- setScannerState(SCANNER_STATE_ROOT_ELEMENT);
- setDispatcher(fContentDispatcher);
- }
- }
- // encoding errors
- catch (MalformedByteSequenceException e) {
- fErrorReporter.reportError(e.getDomain(), e.getKey(),
- e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR);
- return false;
- }
- catch (CharConversionException e) {
- reportFatalError("CharConversionFailure", null);
- return false;
- }
- // premature end of file
- catch (EOFException e) {
- reportFatalError("PrematureEOF", null);
- return false;
- //throw e;
- }
-
- return true;
-
- } // dispatch(boolean):boolean
-
- } // class PrologDispatcher
-
- /**
- * Dispatcher to handle the internal and external DTD subsets.
- *
- * @author Andy Clark, IBM
- */
- protected final class DTDDispatcher
- implements Dispatcher {
-
- //
- // Dispatcher methods
- //
-
- /**
- * Dispatch an XML "event".
- *
- * @param complete True if this dispatcher is intended to scan
- * and dispatch as much as possible.
- *
- * @return True if there is more to dispatch either from this
- * or a another dispatcher.
- *
- * @throws IOException Thrown on i/o error.
- * @throws XNIException Thrown on parse error.
- */
- public boolean dispatch(boolean complete)
- throws IOException, XNIException {
- fEntityManager.setEntityHandler(null);
- try {
- boolean again;
- XMLEntityDescriptionImpl entityDescription = new XMLEntityDescriptionImpl();
- do {
- again = false;
- switch (fScannerState) {
- case SCANNER_STATE_DTD_INTERNAL_DECLS: {
- // REVISIT: Should there be a feature for
- // the "complete" parameter?
- boolean completeDTD = true;
-
- boolean moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD && fLoadExternalDTD);
- if (!moreToScan) {
- // end doctype declaration
- if (!fEntityScanner.skipChar(']')) {
- reportFatalError("EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET",
- null);
- }
- fEntityScanner.skipSpaces();
- if (!fEntityScanner.skipChar('>')) {
- reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName});
- }
- fMarkupDepth--;
-
- // scan external subset next
- if (fDoctypeSystemId != null) {
- if ((fValidation || fLoadExternalDTD)
- && (fValidationManager == null || !fValidationManager.isCachedDTD())) {
- setScannerState(SCANNER_STATE_DTD_EXTERNAL);
- break;
- }
- }
- else if (fExternalSubsetSource != null) {
- if ((fValidation || fLoadExternalDTD)
- && (fValidationManager == null || !fValidationManager.isCachedDTD())) {
- // This handles the case of a DOCTYPE that only had an internal subset.
- fDTDScanner.setInputSource(fExternalSubsetSource);
- fExternalSubsetSource = null;
- setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
- break;
- }
- }
-
- // break out of this dispatcher.
- setScannerState(SCANNER_STATE_PROLOG);
- setDispatcher(fPrologDispatcher);
- fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
- return true;
- }
- break;
- }
- case SCANNER_STATE_DTD_EXTERNAL: {
- entityDescription.setDescription("[dtd]", fDoctypePublicId, fDoctypeSystemId, null, null);
- XMLInputSource xmlInputSource =
- fEntityManager.resolveEntity(entityDescription);
- fDTDScanner.setInputSource(xmlInputSource);
- setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
- again = true;
- break;
- }
- case SCANNER_STATE_DTD_EXTERNAL_DECLS: {
- // REVISIT: Should there be a feature for
- // the "complete" parameter?
- boolean completeDTD = true;
- boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD);
- if (!moreToScan) {
- setScannerState(SCANNER_STATE_PROLOG);
- setDispatcher(fPrologDispatcher);
- fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
- return true;
- }
- break;
- }
- default: {
- throw new XNIException("DTDDispatcher#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')');
- }
- }
- } while (complete || again);
- }
- // encoding errors
- catch (MalformedByteSequenceException e) {
- fErrorReporter.reportError(e.getDomain(), e.getKey(),
- e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR);
- return false;
- }
- catch (CharConversionException e) {
- reportFatalError("CharConversionFailure", null);
- return false;
- }
- // premature end of file
- catch (EOFException e) {
- reportFatalError("PrematureEOF", null);
- return false;
- //throw e;
- }
-
- // cleanup
- finally {
- fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
- }
-
- return true;
-
- } // dispatch(boolean):boolean
-
- } // class DTDDispatcher
-
- /**
- * Dispatcher to handle content scanning.
- *
- * @author Andy Clark, IBM
- * @author Eric Ye, IBM
- */
- protected class ContentDispatcher
- extends FragmentContentDispatcher {
-
- //
- // Protected methods
- //
-
- // hooks
-
- // NOTE: These hook methods are added so that the full document
- // scanner can share the majority of code with this class.
-
- /**
- * Scan for DOCTYPE hook. This method is a hook for subclasses
- * to add code to handle scanning for a the "DOCTYPE" string
- * after the string "<!" has been scanned.
- *
- * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
- * was not scanned.
- */
- protected boolean scanForDoctypeHook()
- throws IOException, XNIException {
-
- if (fEntityScanner.skipString("DOCTYPE")) {
- setScannerState(SCANNER_STATE_DOCTYPE);
- return true;
- }
- return false;
-
- } // scanForDoctypeHook():boolean
-
- /**
- * Element depth iz zero. This methos is a hook for subclasses
- * to add code to handle when the element depth hits zero. When
- * scanning a document fragment, an element depth of zero is
- * normal. However, when scanning a full XML document, the
- * scanner must handle the trailing miscellanous section of
- * the document after the end of the document's root element.
- *
- * @return True if the caller should stop and return true which
- * allows the scanner to switch to a new scanning
- * dispatcher. A return value of false indicates that
- * the content dispatcher should continue as normal.
- */
- protected boolean elementDepthIsZeroHook()
- throws IOException, XNIException {
-
- setScannerState(SCANNER_STATE_TRAILING_MISC);
- setDispatcher(fTrailingMiscDispatcher);
- return true;
-
- } // elementDepthIsZeroHook():boolean
-
- /**
- * Scan for root element hook. This method is a hook for
- * subclasses to add code that handles scanning for the root
- * element. When scanning a document fragment, there is no
- * "root" element. However, when scanning a full XML document,
- * the scanner must handle the root element specially.
- *
- * @return True if the caller should stop and return true which
- * allows the scanner to switch to a new scanning
- * dispatcher. A return value of false indicates that
- * the content dispatcher should continue as normal.
- */
- protected boolean scanRootElementHook()
- throws IOException, XNIException {
-
- if (fExternalSubsetResolver != null && !fSeenDoctypeDecl
- && !fDisallowDoctype && (fValidation || fLoadExternalDTD)) {
- scanStartElementName();
- resolveExternalSubsetAndRead();
- if (scanStartElementAfterName()) {
- setScannerState(SCANNER_STATE_TRAILING_MISC);
- setDispatcher(fTrailingMiscDispatcher);
- return true;
- }
- }
- else if (scanStartElement()) {
- setScannerState(SCANNER_STATE_TRAILING_MISC);
- setDispatcher(fTrailingMiscDispatcher);
- return true;
- }
- return false;
-
- } // scanRootElementHook():boolean
-
- /**
- * End of file hook. This method is a hook for subclasses to
- * add code that handles the end of file. The end of file in
- * a document fragment is OK if the markup depth is zero.
- * However, when scanning a full XML document, an end of file
- * is always premature.
- */
- protected void endOfFileHook(EOFException e)
- throws IOException, XNIException {
-
- reportFatalError("PrematureEOF", null);
- // in case continue-after-fatal-error set, should not do this...
- //throw e;
-
- } // endOfFileHook()
-
- /**
- * <p>Attempt to locate an external subset for a document that does not otherwise
- * have one. If an external subset is located, then it is scanned.</p>
- */
- protected void resolveExternalSubsetAndRead()
- throws IOException, XNIException {
- XMLDTDDescription desc = new XMLDTDDescription(null,
- null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null, fElementQName.rawname);
- XMLInputSource src = fExternalSubsetResolver.getExternalSubset(desc);
- if (src != null) {
- fDoctypeName = fElementQName.rawname;
- fDoctypePublicId = src.getPublicId();
- fDoctypeSystemId = src.getSystemId();
- // call document handler
- if (fDocumentHandler != null) {
- // This inserts a doctypeDecl event into the stream though no
- // DOCTYPE existed in the instance document.
- fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null);
- }
- try {
- fDTDScanner.setInputSource(src);
- while (fDTDScanner.scanDTDExternalSubset(true));
- }
- finally {
- fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
- }
- }
- } // resolveExternalSubsetAndRead()
-
- } // class ContentDispatcher
-
- /**
- * Dispatcher to handle trailing miscellaneous section scanning.
- *
- * @author Andy Clark, IBM
- * @author Eric Ye, IBM
- */
- protected final class TrailingMiscDispatcher
- implements Dispatcher {
-
- //
- // Dispatcher methods
- //
-
- /**
- * Dispatch an XML "event".
- *
- * @param complete True if this dispatcher is intended to scan
- * and dispatch as much as possible.
- *
- * @return True if there is more to dispatch either from this
- * or a another dispatcher.
- *
- * @throws IOException Thrown on i/o error.
- * @throws XNIException Thrown on parse error.
- */
- public boolean dispatch(boolean complete)
- throws IOException, XNIException {
-
- try {
- boolean again;
- do {
- again = false;
- switch (fScannerState) {
- case SCANNER_STATE_TRAILING_MISC: {
- fEntityScanner.skipSpaces();
- if (fEntityScanner.skipChar('<')) {
- setScannerState(SCANNER_STATE_START_OF_MARKUP);
- again = true;
- }
- else {
- setScannerState(SCANNER_STATE_CONTENT);
- again = true;
- }
- break;
- }
- case SCANNER_STATE_START_OF_MARKUP: {
- fMarkupDepth++;
- if (fEntityScanner.skipChar('?')) {
- setScannerState(SCANNER_STATE_PI);
- again = true;
- }
- else if (fEntityScanner.skipChar('!')) {
- setScannerState(SCANNER_STATE_COMMENT);
- again = true;
- }
- else if (fEntityScanner.skipChar('/')) {
- reportFatalError("MarkupNotRecognizedInMisc",
- null);
- again = true;
- }
- else if (isValidNameStartChar(fEntityScanner.peekChar())) {
- reportFatalError("MarkupNotRecognizedInMisc",
- null);
- scanStartElement();
- setScannerState(SCANNER_STATE_CONTENT);
- }
- else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
- reportFatalError("MarkupNotRecognizedInMisc",
- null);
- scanStartElement();
- setScannerState(SCANNER_STATE_CONTENT);
- }
- else {
- reportFatalError("MarkupNotRecognizedInMisc",
- null);
- }
- break;
- }
- case SCANNER_STATE_PI: {
- scanPI();
- setScannerState(SCANNER_STATE_TRAILING_MISC);
- break;
- }
- case SCANNER_STATE_COMMENT: {
- if (!fEntityScanner.skipString("--")) {
- reportFatalError("InvalidCommentStart", null);
- }
- scanComment();
- setScannerState(SCANNER_STATE_TRAILING_MISC);
- break;
- }
- case SCANNER_STATE_CONTENT: {
- int ch = fEntityScanner.peekChar();
- if (ch == -1) {
- setScannerState(SCANNER_STATE_TERMINATED);
- return false;
- }
- reportFatalError("ContentIllegalInTrailingMisc",
- null);
- fEntityScanner.scanChar();
- setScannerState(SCANNER_STATE_TRAILING_MISC);
- break;
- }
- case SCANNER_STATE_REFERENCE: {
- reportFatalError("ReferenceIllegalInTrailingMisc",
- null);
- setScannerState(SCANNER_STATE_TRAILING_MISC);
- break;
- }
- case SCANNER_STATE_TERMINATED: {
- return false;
- }
- }
- } while (complete || again);
- }
- // encoding errors
- catch (MalformedByteSequenceException e) {
- fErrorReporter.reportError(e.getDomain(), e.getKey(),
- e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR);
- return false;
- }
- catch (CharConversionException e) {
- reportFatalError("CharConversionFailure", null);
- return false;
- }
- catch (EOFException e) {
- // NOTE: This is the only place we're allowed to reach
- // the real end of the document stream. Unless the
- // end of file was reached prematurely.
- if (fMarkupDepth != 0) {
- reportFatalError("PrematureEOF", null);
- return false;
- //throw e;
- }
-
- setScannerState(SCANNER_STATE_TERMINATED);
- return false;
- }
-
- return true;
-
- } // dispatch(boolean):boolean
-
- } // class TrailingMiscDispatcher
-
- } // class XMLDocumentScannerImpl