- /*
- * $Id: Parser2.java,v 1.16 2001/09/29 04:17:47 edwingo Exp $
- *
- * The Apache Software License, Version 1.1
- *
- *
- * Copyright (c) 2000 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Crimson" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation and was
- * originally based on software copyright (c) 1999, Sun Microsystems, Inc.,
- * http://www.sun.com. For more information on the Apache Software
- * Foundation, please see <http://www.apache.org/>.
- */
-
-
- package org.apache.crimson.parser;
-
- import java.io.InputStream;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.io.Reader;
- import java.io.FileNotFoundException;
-
- import java.util.Enumeration;
- import java.util.Hashtable;
- import java.util.Locale;
- import java.util.Vector;
- import java.util.Properties;
-
- import java.security.AccessController;
- import java.security.PrivilegedAction;
- import java.security.PrivilegedExceptionAction;
-
- import org.xml.sax.*;
- import org.xml.sax.helpers.*;
- import org.xml.sax.ext.*;
-
- import org.apache.crimson.util.MessageCatalog;
- import org.apache.crimson.util.XmlChars;
- import org.apache.crimson.util.XmlNames;
-
-
- //
- // NOTE: when maintaining this code, take care to keep the message
- // catalogue(s) up to date!! It's important that the diagnostics
- // be informative.
- //
-
-
- /**
- * This implements a fast non-validating SAX2 parser. This one always
- * processes external parsed entities, strictly adheres to the XML 1.0
- * specification, and provides useful diagnostics. It supports an
- * optimization allowing faster processing of valid standalone XML
- * documents. For multi-language applications (such as web servers using
- * XML processing to create dynamic content), a method supports choosing a
- * locale for parser diagnostics which is both understood by the message
- * recipient and supported by the parser.
- *
- * <P> This conforms to the XML 1.0 specification. To configure an XML
- * processor which tests document conformance against XML Namespaces,
- * provide a <em>DtdEventListener</em> which examines declarations of
- * entities and notations, and have your document listener check other
- * constraints such as ensuring <em>xmlns*</em> attribute values properly
- * declare all namespace prefixes. (Only element and attribute names may
- * contain colons, and even then the name prefix before the colon must be
- * properly declared.)
- *
- * <P> SAX parsers produce a stream of parse events, which applications
- * process to create an object model which is specific to their tasks.
- * Applications which do not want to process event streams in that way
- * should use an API producing a standardized object model, such as the
- * W3C's <em>Document Object Model</em> (DOM). This parser supports
- * building fully conformant DOM <em>Document</em> objects, through
- * use of DtdEventListener extensions to SAX in conjunction with an
- * appropriate implementation of a SAX <em>DocumentHandler</em>. In
- * addition, it supports some features (exposing comments, CDATA sections,
- * and entity references) which are allowed by DOM but not required to
- * be reported by conformant XML processors. (As usual, the default
- * handler for parsing events other than fatal errors ignores them.)
- *
- * @see ValidatingParser
- *
- * @author David Brownell
- * @author Rajiv Mordani
- * @author Edwin Goei
- * @version $Revision: 1.16 $
- */
- public class Parser2
- {
- // stack of input entities being merged
- private InputEntity in;
-
- // temporaries reused during parsing
- private AttributesExImpl attTmp;
- private StringBuffer strTmp;
- private char nameTmp [];
- private NameCache nameCache;
- private char charTmp [] = new char [2];
- private String[] namePartsTmp = new String[3];
-
- // temporaries local to namespace attribute processing in elements
- private boolean seenNSDecl;
- private NamespaceSupport nsSupport;
- /**
- * nsAttTmp holds a list of namespace attributes used to check for
- * #REQUIRED when validating and (namespaces == true && prefixes ==
- * false)
- */
- private Vector nsAttTmp;
-
- // NOTE: odd heap behavior, at least with classic VM: if "strTmp" is
- // reused, LOTS of extra memory is consumed in some simple situations.
- // JVM bug filed; it's no longer a win to reuse it as much, in any case.
-
- // parsing modes
- private boolean isValidating = false;
- private boolean fastStandalone = false;
- private boolean isInAttribute = false;
- private boolean namespaces; // new in SAX2
- private boolean prefixes; // new in SAX2
-
- // temporary DTD parsing state
- private boolean inExternalPE;
- private boolean doLexicalPE;
- private boolean donePrologue;
-
- // info about the document
- private boolean isStandalone;
- private String rootElementName;
-
- // DTD state, used during parsing
- private boolean ignoreDeclarations;
- private SimpleHashtable elements = new SimpleHashtable (47);
- private SimpleHashtable params = new SimpleHashtable (7);
-
- // exposed to package-private subclass
- Hashtable notations = new Hashtable (7);
- SimpleHashtable entities = new SimpleHashtable (17);
-
- // stuff associated with SAX
- private ContentHandler contentHandler;
- private DTDHandler dtdHandler;
- private EntityResolver resolver;
- private ErrorHandler errHandler;
- private Locale locale;
- private Locator locator;
-
- // SAX2 extension API support
- private DeclHandler declHandler;
- private LexicalHandler lexicalHandler;
-
- private boolean disallowDoctypeDecl = false ;
- private String propertyEntityExpansionLimit = null;
- private String propertyDisallowDoctypeDecl = null ;
- private String propertyElementAttributeLimit = null;
-
- //restricting entity expansions
- //set this value to zero, initially no entity is expanded
- private int entityExpansionCount = 0 ;
- //this can be set to any arbitrary value, it would be reset by the value obtained from system property.
- private int entityExpansionLimit = -1 ;
- private int elementAttributeLimit = -1;
-
- private static final int DEFAULT_ENTITY_EXPANSION_LIMIT = 64000 ;
- private static final int DEFAULT_ELEMENT_ATTRIBUTE_LIMIT = 10000;
-
- // Compile time option: disable validation support for a better
- // fit in memory-critical environments (P-Java etc). Doing that
- // and removing the validating parser support saves (at this time)
- // about 15% in size.
-
- private static final boolean supportValidation = true;
-
-
- // string constants -- use these copies so "==" works
- // package private
- static final String strANY = "ANY";
- static final String strEMPTY = "EMPTY";
-
- // system properties
- static final String SYSTEM_PROPERTY_ENTITY_EXPANSION_LIMIT = "entityExpansionLimit" ;
- static final String SYSTEM_PROPERTY_DISALLOW_DOCTYPE_DECL = "disallowDoctypeDecl" ;
- static final String SYSTEM_PROPERTY_ELEMENT_ATTRIBUTE_LIMIT = "elementAttributeLimit";
-
- static final boolean SECURITY_DEBUG = false ;
-
- ////////////////////////////////////////////////////////////////
- //
- // PARSER methods
- //
- ////////////////////////////////////////////////////////////////
-
- /**
- * Construct a SAX2 parser object
- */
- public Parser2 ()
- {
- locator = new DocLocator ();
- setHandlers ();
- //setSecuirtyConstraintValues()
- setSecurityConstraintValues() ;
- }
-
- /**
- * Set up the namespace related features for this parser. SAX2 specifies
- * these are read-only during a parse, read-write otherwise.
- */
- void setNamespaceFeatures(boolean namespaces, boolean prefixes) {
- this.namespaces = namespaces;
- this.prefixes = prefixes;
- }
-
- void setEntityResolver(EntityResolver resolver) {
- this.resolver = resolver;
- }
-
- public void setDTDHandler(DTDHandler handler) {
- dtdHandler = handler;
- }
-
- void setContentHandler(ContentHandler handler) {
- contentHandler = handler;
- }
-
- void setErrorHandler (ErrorHandler handler) {
- errHandler = handler;
- }
-
- void setLexicalHandler (LexicalHandler handler) {
- lexicalHandler = handler;
- }
-
- void setDeclHandler (DeclHandler handler) {
- declHandler = handler;
- }
-
-
- // XXX Maybe we can remove some of these old locale methods
- /**
- * <b>SAX:</b> Used by applications to request locale for diagnostics.
- *
- * @param l The locale to use, or null to use system defaults
- * (which may include only message IDs).
- * @throws SAXException If no diagnostic messages are available
- * in that locale.
- */
- public void setLocale (Locale l)
- throws SAXException
- {
- if (l != null && !messages.isLocaleSupported (l.toString ()))
- throw new SAXException (messages.getMessage (locale,
- "P-078", new Object [] { l }));
- locale = l;
- }
-
- /** Returns the diagnostic locale. */
- public Locale getLocale ()
- { return locale; }
-
- /**
- * Chooses a client locale to use for diagnostics, using the first
- * language specified in the list that is supported by this parser.
- * That locale is then set using <a href="#setLocale(java.util.Locale)">
- * setLocale()</a>. Such a list could be provided by a variety of user
- * preference mechanisms, including the HTTP <em>Accept-Language</em>
- * header field.
- *
- * @see org.apache.crimson.util.MessageCatalog
- *
- * @param languages Array of language specifiers, ordered with the most
- * preferable one at the front. For example, "en-ca" then "fr-ca",
- * followed by "zh_CN". Both RFC 1766 and Java styles are supported.
- * @return The chosen locale, or null.
- */
- public Locale chooseLocale (String languages [])
- throws SAXException
- {
- Locale l = messages.chooseLocale (languages);
-
- if (l != null)
- setLocale (l);
- return l;
- }
-
-
- /** <b>SAX:</b> Parse a document. */
- public void parse (InputSource in)
- throws SAXException, IOException
- {
- init ();
- parseInternal (in);
- }
-
- /**
- * Setting this flag enables faster processing of valid standalone
- * documents: external DTD information is not processed, and no
- * attribute normalization or defaulting is done. This optimization
- * is only permitted in non-validating parsers; for validating
- * parsers, this mode is silently disabled.
- *
- * <P> For documents which are declared as standalone, but which are
- * not valid, a fatal error may be reported for references to externally
- * defined entities. That could happen in any nonvalidating parser which
- * did not read externally defined entities. Also, if any attribute
- * values need normalization or defaulting, it will not be done.
- */
- public void setFastStandalone (boolean value)
- { fastStandalone = value && !isValidating; }
-
- /**
- * Returns true if standalone documents skip processing of
- * all external DTD information.
- */
- public boolean isFastStandalone ()
- { return fastStandalone; }
-
-
- /**
- * In support of the HTML DOM model of client side
- * <em><xhtml:script></em> tag processing, this method permits
- * data to be spliced into the input stream. This method would
- * normally be called from an <em>endElement</em> callback to put the
- * buffered result of calls such as DOM <em>HTMLDocument.write</em>
- * into the input stream.
- */
- public void pushInputBuffer (char buf [], int offset, int len)
- throws SAXException
- {
- if (len <= 0)
- return;
-
- // arraycopy is inelegant, but that's the worst penalty for now
- if (offset != 0 || len != buf.length) {
- char tmp [] = new char [len];
- System.arraycopy (buf, offset, tmp, 0, len);
- buf = tmp;
- }
- pushReader (buf, null, false);
- }
-
-
- // package private
- void setIsValidating (boolean value)
- {
- if (supportValidation)
- isValidating = value;
- else
- throw new RuntimeException (messages.getMessage (locale, "V-000"));
- if (value)
- fastStandalone = false;
- }
-
-
- // makes sure the parser's reset to "before a document"
- private void init ()
- {
- in = null;
- // alloc temporary data used in parsing
- attTmp = new AttributesExImpl ();
- strTmp = new StringBuffer ();
- nameTmp = new char [20];
- nameCache = new NameCache ();
-
- if (namespaces) {
- nsSupport = new NamespaceSupport();
- if (supportValidation && isValidating && !prefixes) {
- nsAttTmp = new Vector();
- }
- }
-
- // reset doc info
- isStandalone = false;
- rootElementName = null;
- isInAttribute = false;
-
- inExternalPE = false;
- doLexicalPE = false;
- donePrologue = false;
-
- entities.clear ();
- notations.clear ();
- params.clear ();
- elements.clear ();
- ignoreDeclarations = false;
-
- // initialize predefined references ... re-interpreted later
- builtin ("amp", "&");
- builtin ("lt", "<");
- builtin ("gt", ">");
- builtin ("quot", "\"");
- builtin ("apos", "'");
-
- if (locale == null)
- locale = Locale.getDefault ();
- if (resolver == null)
- resolver = new Resolver ();
-
- setHandlers ();
-
- //only for SECURITY_DEBUG
- if(SECURITY_DEBUG)System.out.println(" Last Entity expansion count = " + entityExpansionCount );
-
- //reset it to zero before next parse
- entityExpansionCount = 0;
-
- //if there is no limit set by the application.. set the default entity expansion limit to DEFAULT_ENTITY_EXPANSION_LIMIT
- if(entityExpansionLimit < 0){
- entityExpansionLimit = DEFAULT_ENTITY_EXPANSION_LIMIT ;
- }
- //if there is no limit set by the application.. set the default element attribute limit to DEFAULT_ELEMENT_ATTRIBUTE_LIMIT
- if(elementAttributeLimit < 0){
- elementAttributeLimit = DEFAULT_ELEMENT_ATTRIBUTE_LIMIT;
- }
- if(SECURITY_DEBUG){
- System.out.println(" Entity expansion limit in effect = " + entityExpansionLimit );
- System.out.println(" DisallowDoctypeDecl in effect = " + disallowDoctypeDecl );
- System.out.println(" Element Attribute limit in effect = " + elementAttributeLimit );
- }
-
- }//init()
-
- void setSecurityConstraintValues(){
-
- //SYSTEM PROPERTY ENTITY EXPANSION LIMIT
- //get the value of entityExpansionLimit from SYSTEM PROPERTY
- //put this code in doPriviliged block so it can still be executed if the caller have less privileges
- try {
- propertyEntityExpansionLimit = (String)AccessController.doPrivileged(new PrivilegedAction(){
- public Object run(){
- return System.getProperty(SYSTEM_PROPERTY_ENTITY_EXPANSION_LIMIT);
- }
- });
- } catch ( SecurityException se ) {
- //This exception can happen in case we are running as an applet
- }
-
- //SYSTEM PROPERTY DISALLOW DOCTYPE DECL
- //get the value of disallowDoctypeDecl from system property
- //put this code in doPriviliged block so it can still be executed if the caller have less privileges
- try {
- propertyDisallowDoctypeDecl = (String) AccessController.doPrivileged( new PrivilegedAction(){
- public Object run(){
- return System.getProperty(SYSTEM_PROPERTY_DISALLOW_DOCTYPE_DECL);
- }
- });
-
- } catch ( SecurityException se ) {
- //This exception can happen in case we are running as an applet
- }
-
- //SYSTEM PROPERTY ELEMENT ATTRIBUTE LIMIT
- //get the value of elementAttributeLimit from system property
- //put this code in doPriviliged block so it can still be executed if the caller have less privileges
- try {
- propertyElementAttributeLimit = (String) AccessController.doPrivileged( new PrivilegedAction(){
- public Object run(){
- return System.getProperty(SYSTEM_PROPERTY_ELEMENT_ATTRIBUTE_LIMIT);
- }
- });
-
- } catch ( SecurityException se ) {
- //This exception can happen in case we are running as an applet
- }
-
- if(SECURITY_DEBUG){
- System.out.println(" ENTITY_EXPANSION_LIMIT SET FROM SYSTEM PROPERTY = " + propertyEntityExpansionLimit );
- System.out.println(" DISALLOW_DOCTYPE_DECL SET FROM SYSTEM PROPERTY = " + propertyDisallowDoctypeDecl );
- System.out.println(" ELEMENT_ATTRIBUT_LIMIT SET FROM SYSTEM PROPERTY = " + propertyElementAttributeLimit );
- }
-
- //if either of the value is not set.. try to get the value from jaxp.properties file..
- if( propertyEntityExpansionLimit == null || propertyDisallowDoctypeDecl == null ){
- //for performance reasons don't read jaxp.properties file again and again..
-
- // try to read from $java.home/lib/jaxp.properties
- try {
- //put this code in doPriviliged block so it can still be executed if the caller have less privileges
- FileInputStream fis = (FileInputStream) AccessController.doPrivileged(new PrivilegedExceptionAction() {
- public Object run() throws FileNotFoundException {
- String javah = System.getProperty( "java.home" );
- String configFile = javah + File.separator +
- "lib" + File.separator + "jaxp.properties";
- File f = new File( configFile );
- if(f.exists()){
- return new FileInputStream(f);
- }
- else{
- return null ;
- }
- }
- });
- if(fis != null){
- Properties props = new Properties();
- props.load( fis );
- //we dont know which one was null..
- if(propertyEntityExpansionLimit == null){
- propertyEntityExpansionLimit = props.getProperty(SYSTEM_PROPERTY_ENTITY_EXPANSION_LIMIT);
- if(SECURITY_DEBUG)System.out.println("Value from jaxp.properites file, propertyEntityExpansionLimit = " + propertyEntityExpansionLimit );
- }
- //we dont know which one was null..
- if(propertyDisallowDoctypeDecl == null){
- propertyDisallowDoctypeDecl = props.getProperty(SYSTEM_PROPERTY_DISALLOW_DOCTYPE_DECL);
- if(SECURITY_DEBUG)System.out.println("Value from jaxp.properites file, propertyDisallowDoctypeDecl = " + propertyDisallowDoctypeDecl );
- }
- }
- } catch(Exception ex ) {
- //ignore the exception
- }
- }
-
- //get the value of entityExpansionLimit
- try{
- if(propertyEntityExpansionLimit != null){
- entityExpansionLimit = Integer.parseInt(propertyEntityExpansionLimit);
- }
- }catch(NumberFormatException nfe){
- //ignore the exception.. or
- }
-
- //get the value of disallowDoctypeDecl
- if(propertyDisallowDoctypeDecl != null && (propertyDisallowDoctypeDecl.equals("true") || propertyDisallowDoctypeDecl.equals("TRUE"))){
- disallowDoctypeDecl = true;
- }
-
- //get the value of elementAttributeLimit
- try{
- if(propertyElementAttributeLimit != null){
- elementAttributeLimit = Integer.parseInt(propertyElementAttributeLimit);
- }
- }catch(NumberFormatException nfe){
- //ignore the exception..
- }
-
-
- }//getSecurityConstraintValues()
-
- static private final NullHandler nullHandler = new NullHandler();
-
- private void setHandlers ()
- {
- if (contentHandler == null) {
- contentHandler = nullHandler;
- }
- if (errHandler == null) {
- errHandler = nullHandler;
- }
- if (dtdHandler == null) {
- dtdHandler = nullHandler;
- }
- if (lexicalHandler == null) {
- lexicalHandler = nullHandler;
- }
- if (declHandler == null) {
- declHandler = nullHandler;
- }
- }
-
- private void builtin (String entityName, String entityValue)
- {
- InternalEntity entity;
- entity = new InternalEntity (entityName, entityValue.toCharArray ());
- entities.put (entityName, entity);
- }
-
-
-
- ////////////////////////////////////////////////////////////////
- //
- // parsing is by recursive descent, code roughly
- // following the BNF rules except tweaked for simple
- // lookahead. rules are more or less in numeric order,
- // except where code sharing suggests other structures.
- //
- // a classic benefit of recursive descent parsers: it's
- // relatively easy to get diagnostics that make sense.
- //
- ////////////////////////////////////////////////////////////////
-
-
- //
- // CHAPTER 2: Documents
- //
-
- private void parseInternal (InputSource input)
- throws SAXException, IOException
- {
- if (input == null)
- fatal ("P-000");
-
- try {
- in = InputEntity.getInputEntity (errHandler, locale);
- in.init (input, null, null, false);
-
- //
- // doc handler sees the locator, lots of PIs, DTD info
- // about external entities and notations, then the body.
- //Need to initialize this after InputEntity cos locator uses
- //InputEntity's systemid, publicid, line no. etc
-
- contentHandler.setDocumentLocator (locator);
-
- contentHandler.startDocument ();
-
- // [1] document ::= prolog element Misc*
- // [22] prolog ::= XMLDecl? Misc* (DoctypeDecl Misc *)?
-
- maybeXmlDecl ();
- maybeMisc (false);
-
- if (!maybeDoctypeDecl ()) {
- if (supportValidation && isValidating)
- warning ("V-001", null);
- }
-
- maybeMisc (false);
- donePrologue = true;
-
- //
- // One root element ... then basically PIs before EOF.
- //
- if (!in.peekc ('<') || !maybeElement (null))
- fatal ("P-067");
- //Check subclass. Used for validation of id refs.
- afterRoot ();
- maybeMisc (true);
- if (!in.isEOF ())
- fatal ("P-001", new Object []
- { Integer.toHexString (((int)getc ())) } );
- contentHandler.endDocument ();
-
- } catch (EndOfInputException e) {
- if (!in.isDocument ()) {
- String name = in.getName ();
- do { // force a relevant URI and line number
- in = in.pop ();
- } while (in.isInternal ());
- fatal ("P-002", new Object []
- { name },
- e);
- } else
- fatal ("P-003", null, e);
-
- } catch (RuntimeException e) {
- // Don't discard location that triggered the exception
- throw new SAXParseException (
- e.getMessage () != null
- ? e.getMessage ()
- : e.getClass ().getName (),
- locator.getPublicId (), locator.getSystemId (),
- locator.getLineNumber (), locator.getColumnNumber (),
- e);
-
- } finally {
- // recycle temporary data used during parsing
- strTmp = null;
- attTmp = null;
- nameTmp = null;
- nameCache = null;
- nsAttTmp = null;
-
- // ditto input sources etc
- if (in != null) {
- in.close ();
- in = null;
- }
-
- // get rid of all DTD info ... some of it would be
- // useful for editors etc, investigate later.
-
- params.clear ();
- entities.clear ();
- notations.clear ();
- elements.clear ();
-
- afterDocument ();
- }
- }
-
- // package private -- for subclass
- void afterRoot () throws SAXException { }
-
- // package private -- for subclass
- void afterDocument () { }
-
- // role is for diagnostics
- private void whitespace (String roleId) throws IOException, SAXException
- // [3] S ::= (#x20 | #x9 | #xd | #xa)+
- {
- if (!maybeWhitespace ())
- fatal ("P-004", new Object []
- { messages.getMessage (locale, roleId) });
- }
-
- // S?
- private boolean maybeWhitespace () throws IOException, SAXException
- {
- if (!(inExternalPE && doLexicalPE))
- return in.maybeWhitespace ();
-
- // see getc() for the PE logic -- this lets us splice
- // expansions of PEs in "anywhere". getc() has smarts,
- // so for external PEs we don't bypass it.
-
- // XXX we can marginally speed PE handling, and certainly
- // be cleaner (hence potentially more correct), by using
- // the observations that expanded PEs only start and stop
- // where whitespace is allowed. getc wouldn't need any
- // "lexical" PE expansion logic, and no other method needs
- // to handle termination of PEs. (parsing of literals would
- // still need to pop entities, but not parsing of references
- // in content.)
-
- char c = getc();
- boolean saw = false;
-
- while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
- saw = true;
-
- // this gracefully ends things when we stop playing
- // with internal parameters. caller should have a
- // grammar rule allowing whitespace at end of entity.
- if (in.isEOF () && !in.isInternal ())
- return saw;
- c = getc ();
- }
- ungetc ();
- return saw;
- }
-
- private String maybeGetName ()
- throws IOException, SAXException
- {
- NameCacheEntry entry = maybeGetNameCacheEntry ();
- return (entry == null) ? null : entry.name;
- }
-
- private NameCacheEntry maybeGetNameCacheEntry ()
- throws IOException, SAXException
- {
- // [5] Name ::= (Letter|'_'|':') (Namechar)*
- char c = getc ();
-
- if (!XmlChars.isLetter (c) && c != ':' && c != '_') {
- ungetc ();
- return null;
- }
- return nameCharString (c);
- }
-
- // Used when parsing enumerations
- private String getNmtoken ()
- throws SAXException, IOException
- {
- // [7] Nmtoken ::= (Namechar)+
- char c = getc ();
- if (!XmlChars.isNameChar (c))
- fatal ("P-006", new Object [] { new Character (c) });
- return nameCharString (c).name;
- }
-
- // n.b. this gets used when parsing attribute values (for
- // internal references) so we can't use strTmp; it's also
- // a hotspot for CPU and memory in the parser (called at least
- // once for each element) so this has been optimized a bit.
-
- private NameCacheEntry nameCharString (char c)
- throws IOException, SAXException
- {
- int i = 1;
-
- nameTmp [0] = c;
- for (;;) {
- if ((c = in.getNameChar ()) == 0)
- break;
- if (i >= nameTmp.length) {
- char tmp [] = new char [nameTmp.length + 10];
- System.arraycopy (nameTmp, 0, tmp, 0, nameTmp.length);
- nameTmp = tmp;
- }
- nameTmp [i++] = c;
- }
- return nameCache.lookupEntry (nameTmp, i);
- }
-
- //
- // much similarity between parsing entity values in DTD
- // and attribute values (in DTD or content) ... both follow
- // literal parsing rules, newline canonicalization, etc
- //
- // leaves value in 'strTmp' ... either a "replacement text" (4.5),
- // or else partially normalized attribute value (the first bit
- // of 3.3.3's spec, without the "if not CDATA" bits).
- //
- private void parseLiteral (boolean isEntityValue)
- throws IOException, SAXException
- {
- // [9] EntityValue ::=
- // '"' ([^"&%] | Reference | PEReference)* '"'
- // | "'" ([^'&%] | Reference | PEReference)* "'"
- // [10] AttValue ::=
- // '"' ([^"&] | Reference )* '"'
- // | "'" ([^'&] | Reference )* "'"
-
- // Only expand PEs in getc() when processing entity value literals
- // and do not expand when processing AttValue. Save state of
- // doLexicalPE and restore it before returning.
- boolean savedLexicalPE = doLexicalPE;
- // doLexicalPE = isEntityValue;
-
- char quote = getc ();
- char c;
- InputEntity source = in;
-
- if (quote != '\'' && quote != '"')
- fatal ("P-007");
-
- // don't report entity expansions within attributes,
- // they're reported "fully expanded" via SAX
- isInAttribute = !isEntityValue;
-
- // get value into strTmp
- strTmp = new StringBuffer ();
-
- // scan, allowing entity push/pop wherever ...
- // expanded entities can't terminate the literal!
- for (;;) {
- if (in != source && in.isEOF ()) {
- // we don't report end of parsed entities
- // within attributes (no SAX hooks)
- in = in.pop ();
- continue;
- }
- if ((c = getc ()) == quote && in == source)
- break;
-
- //
- // Basically the "reference in attribute value"
- // row of the chart in section 4.4 of the spec
- //
- if (c == '&') {
- String entityName = maybeGetName ();
-
- if (entityName != null) {
- nextChar (';', "F-020", entityName);
-
- // 4.4 says: bypass these here ... we'll catch
- // forbidden refs to unparsed entities on use
- if (isEntityValue) {
- strTmp.append ('&');
- strTmp.append (entityName);
- strTmp.append (';');
- continue;
- }
- expandEntityInLiteral (entityName, entities, isEntityValue);
-
-
- // character references are always included immediately
- } else if ((c = getc ()) == '#') {
- int tmp = parseCharNumber ();
-
- if (tmp > 0xffff) {
- tmp = surrogatesToCharTmp (tmp);
- strTmp.append (charTmp [0]);
- if (tmp == 2)
- strTmp.append (charTmp [1]);
- } else
- strTmp.append ((char) tmp);
- } else
- fatal ("P-009");
- continue;
-
- }
-
- // expand parameter entities only within entity value literals
- if (c == '%' && isEntityValue) {
- String entityName = maybeGetName ();
-
- if (entityName != null) {
- nextChar (';', "F-021", entityName);
- if (inExternalPE)
- expandEntityInLiteral (entityName,
- params, isEntityValue);
- else
- fatal ("P-010", new Object [] { entityName });
- continue;
- } else
- fatal ("P-011");
- }
-
- // For attribute values ...
- if (!isEntityValue) {
- // 3.3.3 says whitespace normalizes to space...
- if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
- strTmp.append (' ');
- continue;
- }
-
- // "<" not legal in parsed literals ...
- if (c == '<')
- fatal ("P-012");
- }
-
- strTmp.append (c);
- }
-
- isInAttribute = false;
- // doLexicalPE = savedLexicalPE;
- }
-
- // does a SINGLE expansion of the entity (often reparsed later)
- private void expandEntityInLiteral (
- String name,
- SimpleHashtable table,
- boolean isEntityValue
- ) throws SAXException, IOException
- {
- Object entity = table.get (name);
-
- //throw fatal error when entity expansion count reaches the limit set by application
- //if we don't want to have any costraint on number of entity that can be expanaded
- //set the DEFAULT_ENTITY_EXPANSION_LIMIT to -1.
- if( entityExpansionLimit != -1 && entityExpansionCount++ >= entityExpansionLimit){
- fatal ("P-086", new Object[] {new Integer(entityExpansionLimit)});
- };
-
- //
- // Note: if entity is a PE (value.isPE) there is an XML
- // requirement that the content be "markkupdecl", but that error
- // is ignored here (as permitted by the XML spec).
- //
- if (entity instanceof InternalEntity) {
- InternalEntity value = (InternalEntity) entity;
- if (supportValidation && isValidating
- && isStandalone
- && !value.isFromInternalSubset)
- error ("V-002", new Object [] { name });
- pushReader (value.buf, name, !value.isPE);
-
- } else if (entity instanceof ExternalEntity) {
- if (!isEntityValue) // must be a PE ...
- fatal ("P-013", new Object [] { name });
- // XXX if this returns false ...
- pushReader ((ExternalEntity) entity);
-
- } else if (entity == null) {
- //
- // Note: much confusion about whether spec requires such
- // errors to be fatal in many cases, but none about whether
- // it allows "normal" errors to be unrecoverable!
- //
- fatal (
- (table == params) ? "V-022" : "P-014",
- new Object [] { name });
- }
- }
-
- // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
- // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
-
- // NOTE: XML spec should explicitly say that PE ref syntax is
- // ignored in PIs, comments, SystemLiterals, and Pubid Literal
- // values ... can't process the XML spec's own DTD without doing
- // that for comments.
-
- private String getQuotedString (String type, String extra)
- throws IOException, SAXException
- {
- // use in.getc to bypass PE processing
- char quote = in.getc ();
-
- if (quote != '\'' && quote != '"')
- fatal ("P-015", new Object [] {
- messages.getMessage (locale, type, new Object [] { extra })
- });
-
- char c;
-
- strTmp = new StringBuffer ();
- while ((c = in.getc ()) != quote)
- strTmp.append ((char)c);
- return strTmp.toString ();
- }
-
-
- private String parsePublicId ()
- throws IOException, SAXException
- {
- // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
- // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
- String retval = getQuotedString ("F-033", null);
- for (int i = 0; i < retval.length (); i++) {
- char c = retval.charAt (i);
- if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
- && !(c >= 'A' && c <= 'Z')
- && !(c >= 'a' && c <= 'z'))
- fatal ("P-016", new Object [] { new Character (c) });
- }
- strTmp = new StringBuffer ();
- strTmp.append (retval);
- return normalize (false);
- }
-
- // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
- // handled by: InputEntity.parsedContent()
-
- private boolean maybeComment (boolean skipStart)
- throws IOException, SAXException
- {
- // [15] Comment ::= '<!--'
- // ( (Char - '-') | ('-' (Char - '-'))*
- // '-->'
- if (!in.peek (skipStart ? "!--" : "<!--", null))
- return false;
-
- boolean savedLexicalPE = doLexicalPE;
-
- doLexicalPE = false;
- boolean saveCommentText = lexicalHandler != nullHandler;
- if (saveCommentText) {
- strTmp = new StringBuffer ();
- }
-
- oneComment:
- for (;;) {
- try {
- // bypass PE expansion, but permit PEs
- // to complete ... valid docs won't care.
- for (;;) {
- int c = getc ();
- if (c == '-') {
- c = getc ();
- if (c != '-') {
- if (saveCommentText)
- strTmp.append ('-');
- ungetc ();
- continue;
- }
- nextChar ('>', "F-022", null);
- break oneComment;
- }
- if (saveCommentText)
- strTmp.append ((char)c);
- }
- } catch (EndOfInputException e) {
- //
- // This is fatal EXCEPT when we're processing a PE...
- // in which case a validating processor reports an error.
- // External PEs are easy to detect; internal ones we
- // infer by being an internal entity outside an element.
- //
- if (inExternalPE || (!donePrologue && in.isInternal ())) {
- if (supportValidation && isValidating)
- error ("V-021", null);
- in = in.pop ();
- continue;
- }
- fatal ("P-017");
- }
- }
- doLexicalPE = savedLexicalPE;
- if (saveCommentText) {
- // Convert string to array of chars
- int length = strTmp.length();
- char[] charArray = new char[length];
- if (length != 0) {
- // XXX Avoid calling getChars on zero-size array as a
- // workaround for a bug that occurs in at least JDK1.2.2
- // which has since been fixed in JDK1.3
- strTmp.getChars(0, length, charArray, 0);
- }
- lexicalHandler.comment(charArray, 0, length);
- }
- return true;
- }
-
- private boolean maybePI (boolean skipStart)
- throws IOException, SAXException
- {
- // [16] PI ::= '<?' PITarget
- // (S (Char* - (Char* '?>' Char*)))?
- // '?>'
- // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
- boolean savedLexicalPE = doLexicalPE;
-
- if (!in.peek (skipStart ? "?" : "<?", null))
- return false;
- doLexicalPE = false;
-
- String target = maybeGetName ();
-
- if (target == null)
- fatal ("P-018");
- if ("xml".equals (target))
- fatal ("P-019");
- if ("xml".equalsIgnoreCase (target))
- fatal ("P-020", new Object [] { target });
-
- if (maybeWhitespace ()) {
- strTmp = new StringBuffer ();
- try {
- for (;;) {
- // use in.getc to bypass PE processing
- char c = in.getc ();
- //Reached the end of PI.
- if (c == '?' && in.peekc ('>'))
- break;
- strTmp.append (c);
- }
- } catch (EndOfInputException e) {
- fatal ("P-021");
- }
- contentHandler.processingInstruction (target, strTmp.toString ());
- } else {
- if (!in.peek ("?>", null))
- fatal ("P-022");
- contentHandler.processingInstruction (target, "");
- }
-
- doLexicalPE = savedLexicalPE;
- return true;
- }
-
- // [18] CDSect ::= CDStart CData CDEnd
- // [19] CDStart ::= '<![CDATA['
- // [20] CData ::= (Char* - (Char* ']]>' Char*))
- // [21] CDEnd ::= ']]>'
- //
- // ... handled by InputEntity.unparsedContent()
-
-
- private void maybeXmlDecl ()
- throws IOException, SAXException
- {
- // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl?
- // SDDecl? S? '>'
-
- if (!in.isXmlDeclOrTextDeclPrefix()) {
- return;
- }
- // Consume '<?xml'
- peek("<?xml");
-
- readVersion (true, "1.0");
- readEncoding (false);
- readStandalone ();
- maybeWhitespace ();
- if (!peek ("?>")) {
- char c = getc ();
- fatal ("P-023", new Object []
- { Integer.toHexString (c), new Character (c) });
- }
- }
-
- // collapsing several rules together ...
- // simpler than attribute literals -- no reference parsing!
- private String maybeReadAttribute (String name, boolean must)
- throws IOException, SAXException
- {
- // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
- // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
- // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\"
- if (!maybeWhitespace ()) {
- if (!must)
- return null;
- fatal ("P-024", new Object [] { name });
- // NOTREACHED
- }
-
- if (!peek (name))
- if (must)
- fatal ("P-024", new Object [] { name });
- else {
- // To ensure that the whitespace is there so that when we
- // check for the next attribute we assure that the
- // whitespace still exists.
- ungetc ();
- return null;
- }
-
- // [25] Eq ::= S? '=' S?
- maybeWhitespace ();
- nextChar ('=', "F-023", null);
- maybeWhitespace ();
-
- return getQuotedString ("F-035", name);
- }
-
- private void readVersion (boolean must, String versionNum)
- throws IOException, SAXException
- {
- String value = maybeReadAttribute ("version", must);
-
- // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
-
- if (must && value == null)
- fatal ("P-025", new Object [] { versionNum });
- if (value != null) {
- int length = value.length ();
- for (int i = 0; i < length; i++) {
- char c = value.charAt (i);
- if (!( (c >= '0' && c <= '9')
- || c == '_' || c == '.'
- || (c >= 'a' && c <= 'z')
- || (c >= 'A' && c <= 'Z')
- || c == ':' || c == '-')
- )
- fatal ("P-026", new Object [] { value });
- }
- }
- if (value != null && !value.equals (versionNum))
- error ("P-027", new Object [] { versionNum, value });
- }
-
- private void maybeMisc (boolean eofOK)
- throws IOException, SAXException
- {
- // Misc*
- while (!eofOK || !in.isEOF ()) {
- // [27] Misc ::= Comment | PI | S
- if (maybeComment (false)
- || maybePI (false)
- || maybeWhitespace ())
- continue;
- else
- break;
- }
- }
-
- // common code used by most markup declarations
- // ... S (Q)Name ...
- private String getMarkupDeclname (String roleId, boolean qname)
- throws IOException, SAXException
- {
- String name;
-
- whitespace (roleId);
- name = maybeGetName ();
- if (name == null)
- fatal ("P-005", new Object []
- { messages.getMessage (locale, roleId) });
- return name;
- }
-
- private boolean maybeDoctypeDecl ()
- throws IOException, SAXException
- {
- // [28] doctypedecl ::= '<!DOCTYPE' S Name
- // (S ExternalID)?
- // S? ('[' (markupdecl|PEReference|S)* ']' S?)?
- // '>'
- if (!peek ("<!DOCTYPE")){
- return false;
- }
- else{
- if(disallowDoctypeDecl){
- fatal("P-085", new Object[] {SYSTEM_PROPERTY_DISALLOW_DOCTYPE_DECL} );
- }
- }
-
- ExternalEntity externalSubset = null;
-
- rootElementName = getMarkupDeclname ("F-014", true);
- if (maybeWhitespace ()
- && (externalSubset = maybeExternalID ()) != null) {
- lexicalHandler.startDTD(rootElementName, externalSubset.publicId,
- externalSubset.verbatimSystemId);
- maybeWhitespace ();
- } else {
- lexicalHandler.startDTD(rootElementName, null, null);
- }
- if (in.peekc ('[')) {
- for (;;) {
- //Pop PEs when they are done.
- if (in.isEOF () && !in.isDocument ()) {
- in = in.pop ();
- continue;
- }
- if (maybeMarkupDecl ()
- || maybePEReference ()
- || maybeWhitespace ()
- )
- continue;
- else if (peek ("<!["))
- fatal ("P-028");
- else
- break;
- }
- nextChar (']', "F-024", null);
- maybeWhitespace ();
- }
- nextChar ('>', "F-025", null);
-
- // [30] extSubset ::= TextDecl? extSubsetDecl
- // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
- // | PEReference | S )*
- // ... same as [79] extPE, which is where the code is
-
- if (externalSubset != null) {
- externalSubset.name = "[dtd]"; // SAX2 ext specifies this name
- externalSubset.isPE = true;
- externalParameterEntity (externalSubset);
- }
-
- // params are no good to anyone starting now -- bye!
- params.clear ();
-
- lexicalHandler.endDTD();
-
- // make sure notations mentioned in attributes
- // and entities were declared ... those are validity
- // errors, but we must always clean up after them!
- Vector v = new Vector ();
-
- for (Enumeration e = notations.keys ();
- e.hasMoreElements ();
- ) {
- String name = (String) e.nextElement ();
- Object value = notations.get (name);
-
- if (value == Boolean.TRUE) {
- if (supportValidation && isValidating)
- error ("V-003", new Object [] { name });
- v.addElement (name);
- } else if (value instanceof String) {
- if (supportValidation && isValidating)
- error ("V-004", new Object [] { name });
- v.addElement (name);
- }
- }
- while (!v.isEmpty ()) {
- Object name = v.firstElement ();
- v.removeElement (name);
- notations.remove (name);
- }
-
- return true;
- }
-
- private boolean maybeMarkupDecl ()
- throws IOException, SAXException
- {
- // [29] markupdecl ::= elementdecl | Attlistdecl
- // | EntityDecl | NotationDecl | PI | Comment
- return maybeElementDecl ()
- || maybeAttlistDecl ()
- || maybeEntityDecl ()
- || maybeNotationDecl ()
- || maybePI (false)
- || maybeComment (false)
- ;
- }
-
-
- private void readStandalone ()
- throws IOException, SAXException
- {
- String value = maybeReadAttribute ("standalone", false);
-
- // [32] SDDecl ::= ... "yes" or "no"
- if (value == null || "no".equals (value))
- return;
- if ("yes".equals (value)) {
- isStandalone = true;
- return;
- }
- fatal ("P-029", new Object [] { value });
- }
-
- private static final String XmlLang = "xml:lang";
-
- private boolean isXmlLang (String value)
- {
- // [33] LanguageId ::= Langcode ('-' Subcode)*
- // [34] Langcode ::= ISO639Code | IanaCode | UserCode
- // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
- // [36] IanaCode ::= [iI] '-' SubCode
- // [37] UserCode ::= [xX] '-' SubCode
- // [38] SubCode ::= [a-zA-Z]+
-
- // the ISO and IANA codes (and subcodes) are registered,
- // but that's neither a WF nor a validity constraint.
-
- int nextSuffix;
- char c;
-
- if (value.length () < 2)
- return false;
- c = value.charAt (1);
- if (c == '-') { // IANA, or user, code
- c = value.charAt (0);
- if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
- return false;
- nextSuffix = 1;
- } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
- // 2 letter ISO code, or error
- c = value.charAt (0);
- if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
- return false;
- nextSuffix = 2;
- } else
- return false;
-
- // here "suffix" ::= '-' [a-zA-Z]+ suffix*
- while (nextSuffix < value.length ()) {
- c = value.charAt (nextSuffix);
- if (c != '-')
- break;
- while (++nextSuffix < value.length ()) {
- c = value.charAt (nextSuffix);
- if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
- break;
- }
- }
- return value.length () == nextSuffix && c != '-';
- }
-
-
-
- //
- // CHAPTER 3: Logical Structures
- //
-
- private boolean maybeElement (ElementValidator validator)
- throws IOException, SAXException
- {
- // [39] element ::= EmptyElemTag | Stag content ETag
- // [40] STag ::= '<' Name (S Attribute)* S? '>'
-
- NameCacheEntry name;
- ElementDecl element;
- boolean haveAttributes = false;
- boolean hasContent = true;
- int startLine;
-
- // the leading "<" has already been consumed
- name = maybeGetNameCacheEntry ();
-
- // n.b. InputEntity guarantees 1+N char pushback always,
- // and maybeGetName won't use more than one to see if
- // it's instead "<?", "<!--", "<![CDATA[", or an error.
- if (name == null)
- return false;
-
- // XXX Test for namespace conformance here
- // if (namespaces) {
- // some code testing name.name
- // }
-
- // report validity errors ASAP
- if (validator != null)
- validator.consume (name.name);
-
- element = (ElementDecl) elements.get (name.name);
- if (supportValidation && isValidating) {
- if (element == null || element.contentType == null) {
- error ("V-005", new Object [] { name.name });
- // minimize repetitive diagnostics
- element = new ElementDecl (name.name);
- element.contentType = strANY;
- elements.put (name.name, element);
- }
- if (validator == null
- && rootElementName != null
- && !rootElementName.equals (name.name))
- error ("V-006", new Object [] { name.name, rootElementName });
- }
-
- // save the line number here so we can give better diagnostics
- // by identifying where the element started; WF errors may be
- // reported thousands of lines "late".
- startLine = in.getLineNumber ();
-
- // Invariant: attTmp and nsAttTmp are empty except briefly in this
- // method they are not empty iff haveAttributes is true
-
- // Track whether we saw whitespace before an attribute;
- // in some cases it's required, though superfluous
- boolean sawWhite = in.maybeWhitespace ();
-
- // These are exceptions from the first pass; they should be ignored
- // if there's a second pass, but reported otherwise. A second pass
- // occurs when a namespace declaration is found in the first pass.
- Vector exceptions = null;
-
- // SAX2 Namespace processing
- if (namespaces) {
- nsSupport.pushContext();
- seenNSDecl = false;
- }
-
- // Each pass through this loop reads
- // Name eq AttValue S?
- // Loop exits on ">", "/>", error, or when the elementAttributeLimit has been reached
- for (int attributeCount = 0 ; ; attributeCount++ ) {
- if (attributeCount > elementAttributeLimit ){
- fatal ("P-087", new Object[] {new Integer(elementAttributeLimit)});
- }
-
- if (in.peekc ('>'))
- break;
-
- // [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>'
- if (in.peekc ('/')) {
- hasContent = false;
- break;
- }
-
- //Need to have a whitespace between attributes.
- if (!sawWhite)
- fatal ("P-030");
-
- // [41] Attribute ::= Name Eq AttValue
-
- String attQName;
- AttributeDecl info;
- String value;
-
- attQName = maybeGetName ();
- // Need to do this as we have already consumed the
- // whitespace and didn't see the end tag.
- if (attQName == null)
- fatal ("P-031", new Object [] { new Character (getc ()) });
-
- if (attTmp.getValue (attQName) != null)
- fatal ("P-032", new Object [] { attQName });
-
- // [25] Eq ::= S? '=' S?
- in.maybeWhitespace ();
- nextChar ('=', "F-026", attQName);
- in.maybeWhitespace ();
-
- // We are not in the DTD => PEs are not recognized => we no
- // longer need to expand PEs => don't expand PEs in AttValue =>
- // doLexicalPE = false and call parseLiteral(isEntityValue =
- // false) both
- doLexicalPE = false;
- parseLiteral (false);
- // We are no longer in the DTD so we never need to expand PEs
-
- sawWhite = in.maybeWhitespace ();
-
- // normalize and check values right away.
-
- info = (element == null)
- ? null
- : (AttributeDecl) element.attributes.get (attQName);
- if (info == null) {
- if (supportValidation && isValidating)
- error ("V-007", new Object [] { attQName, name.name });
- value = strTmp.toString ();
- } else {
- if (!AttributeDecl.CDATA.equals (info.type)) {
- value = normalize (!info.isFromInternalSubset);
- if (supportValidation && isValidating)
- validateAttributeSyntax (info, value);
- } else
- value = strTmp.toString ();
- if (supportValidation && isValidating
- && info.isFixed
- && !value.equals (info.defaultValue))
- error ("V-008",
- new Object [] {attQName, name.name, info.defaultValue});
- }
-
- // assert(value != null)
-
- if (XmlLang.equals (attQName) && !isXmlLang (value))
- error ("P-033", new Object [] { value });
-
- String type = (info == null) ? AttributeDecl.CDATA : info.type;
- String defaultValue = (info == null) ? null : info.defaultValue;
-
- if (namespaces) {
- exceptions = processAttributeNS(attQName, type, value,
- defaultValue, true, false,
- exceptions);
- } else {
- // No namespaces case
- attTmp.addAttribute("", "", attQName, type, value,
- defaultValue, true);
- }
-
- haveAttributes = true;
- }
- if (element != null)
- attTmp.setIdAttributeName (element.id);
-
- // if we had ATTLIST decls, handle required & defaulted attributes
- // before telling next layer about this element
- if (element != null && element.attributes.size () != 0) {
- haveAttributes = defaultAttributes(element) || haveAttributes;
- }
-
- // Ensure that this element's namespace declarations apply to all of
- // this element's attributes as well. If there was a Namespace
- // declaration, we have to make a second pass just to be safe -- this
- // will happen very rarely, possibly only once for each document.
- if (seenNSDecl) {
- // assert(namespaces == true)
- int length = attTmp.getLength();
- for (int i = 0; i < length; i++) {
- String attQName = attTmp.getQName(i);
- if (attQName.startsWith("xmlns")) {
- // Could be a namespace declaration
-
- if (attQName.length() == 5 || attQName.charAt(5) == ':') {
- // Default or non-default NS declaration
- continue;
- }
- }
-
- // assert(not a namespace declaration)
- String attName[] = processName(attQName, true, false);
- attTmp.setURI(i, attName[0]);
- attTmp.setLocalName(i, attName[1]);
- }
- } else if (exceptions != null && errHandler != null) {
- for (int i = 0; i < exceptions.size(); i++) {
- errHandler.error((SAXParseException)(exceptions.elementAt(i)));
- }
- }
-
- // OK, finally report the event.
- if (namespaces) {
- String[] parts = processName(name.name, false, false);
- contentHandler.startElement(parts[0], parts[1], parts[2], attTmp);
- } else {
- contentHandler.startElement("", "", name.name, attTmp);
- }
-
- // Clear temporaries only when necessary because this may be
- // expensive and a doc may have lots of elements w/o attributes
- if (haveAttributes) {
- attTmp.clear();
- if (supportValidation && isValidating && namespaces && !prefixes) {
- nsAttTmp.removeAllElements();
- }
- }
-
- // prepare to validate the content of this element.
- // in nonvalidating parsers, this accepts ANY content
- validator = newValidator (element);
-
- if (hasContent) {
- content (element, false, validator);
-
- // [42] ETag ::= '</' Name S? '>'
- // ... content swallowed "</"
-
- if (!in.peek (name.name, name.chars))
- fatal ("P-034", new Object []
- { name.name, new Integer (startLine) });
- in.maybeWhitespace ();
- }
-
- nextChar ('>', "F-027", name.name);
- validator.done ();
-
- if (namespaces) {
- // Split the name. Unfortunately, we can't always reuse the
- // info from the startElement event above b/c this element may
- // have subelements and a global temporary is used.
- String[] parts = processName(name.name, false, false);
-
- // Report appropriate events...
- contentHandler.endElement(parts[0], parts[1], parts[2]);
- Enumeration prefixes = nsSupport.getDeclaredPrefixes();
- while (prefixes.hasMoreElements()) {
- String prefix = (String)prefixes.nextElement();
- contentHandler.endPrefixMapping(prefix);
- }
- nsSupport.popContext();
- } else {
- contentHandler.endElement("", "", name.name);
- }
-
- return true;
- }
-
- /**
- * Process attributes for namespace support. This is mostly common
- * code that gets called from two places and was factored out. The
- * <code>isDefaulting</code> param specifies where the code is called
- * from.
- *
- * @param isDefaulting true iff we are processing this attribute from
- * the <code>defaultAttributes(...)</code> method
- *
- * The namespace processing code is derived from the SAX2 ParserAdapter
- * code. This code should be kept in sync with ParserAdapter bug
- * fixes.
- *
- * Note: Modifies <code>seenNSDecl</code> iff a xmlns attribute, ie a
- * namespace decl, was found. Modifies <code>attTmp</code> and
- * <code>nsAttTmp</code>.
- */
- private Vector processAttributeNS(String attQName, String type,
- String value, String defaultValue,
- boolean isSpecified, boolean isDefaulting,
- Vector exceptions)
- throws SAXException
- {
- // assert(namespaces == true)
-
- nonNamespace:
- if (attQName.startsWith("xmlns")) {
- // Could be a namespace declaration
-
- boolean defaultNSDecl = attQName.length() == 5;
- if (!defaultNSDecl && attQName.charAt(5) != ':') {
- // Not a namespace declaration
- break nonNamespace;
- }
-
- // Must be some kind of namespace declaration
- String prefix;
- if (defaultNSDecl) {
- // Default namespace, so use empty string as prefix
- prefix = "";
- } else {
- // Non-default namespace decl, extract the prefix
- prefix = attQName.substring(6);
- }
-
- if (!nsSupport.declarePrefix(prefix, value)) {
- error("P-083", new Object[] { prefix });
- }
- contentHandler.startPrefixMapping(prefix, value);
-
- // We may need to add this attribute to appropriate lists
- if (prefixes) {
- attTmp.addAttribute("", prefix, attQName.intern(),
- type, value, defaultValue, isSpecified);
- } else if (supportValidation && isValidating && !isDefaulting) {
- // Add this namespace attribute to a different list that
- // will be used to check for #REQUIRED attributes later.
- // Since "prefixes" is false, these are not reported to the
- // ContentHandler. This step is not needed during the
- // second pass of attribute processing where default values
- // are provided.
- nsAttTmp.addElement(attQName);
- }
- seenNSDecl = true;
- return exceptions;
- }
-
- // This isn't a namespace declaration.
- try {
- String attName[] = processName(attQName, true, true);
- attTmp.addAttribute(attName[0], attName[1], attName[2], type,
- value, defaultValue, isSpecified);
- } catch (SAXException e) {
- if (exceptions == null) {
- exceptions = new Vector();
- }
- exceptions.addElement(e);
- attTmp.addAttribute("", attQName, attQName, type, value,
- defaultValue, isSpecified);
- }
- return exceptions;
- }
-
- /**
- * Process a qualified (prefixed) name.
- *
- * <p>If the name has an undeclared prefix, use only the qname
- * and make an ErrorHandler.error callback in case the app is
- * interested.</p>
- *
- * @param qName The qualified (prefixed) name.
- * @param isAttribute true if this is an attribute name.
- * @return The name split into three parts.
- * @exception org.xml.sax.SAXException The client may throw
- * an exception if there is an error callback.
- */
- private String[] processName(String qName, boolean isAttribute,
- boolean useException)
- throws SAXException
- {
- // assert(namespaces == true)
- String parts[] = nsSupport.processName(qName, namePartsTmp,
- isAttribute);
- if (parts == null) {
- parts = new String[3];
- // SAX should use "" instead of null for parts 0 and 1 ???
- parts[0] = "";
- String localName = XmlNames.getLocalPart(qName);
- parts[1] = localName != null ? localName.intern() : "";
- parts[2] = qName.intern();
-
- String messageId = "P-084";
- Object[] parameters = new Object[] { qName };
- if (useException) {
- throw new SAXParseException(
- messages.getMessage(locale, messageId, parameters),
- locator);
- }
- error(messageId, parameters);
- }
- return parts;
- }
-
-
- /**
- * To validate, subclassers should create an object that can
- * accept valid streams of element names, text, and terminate.
- */
- // package private ... overriden in validating subclass
- ElementValidator newValidator (ElementDecl element)
- {
- return ElementValidator.ANY; // "ANY" content is OK
- }
-
-
- /**
- * To validate, subclassers should at this time make sure that
- * values are of the declared types:<UL>
- * <LI> ID and IDREF(S) values are Names
- * <LI> NMTOKEN(S) are Nmtokens
- * <LI> ENUMERATION values match one of the tokens
- * <LI> NOTATION values match a notation name
- * <LI> ENTITIY(IES) values match an unparsed external entity
- * </UL>
- *
- * <P> Separately, make sure IDREF values match some ID
- * provided in the document (in the afterRoot method).
- */
- // package private
- void validateAttributeSyntax (AttributeDecl attr, String value)
- throws SAXException
- {
- return;
- }
-
- /**
- * Provide default attributes for an element and check for #REQUIRED
- * attributes.
- *
- * Note: this method accesses <code>attTmp</code> and
- * <code>nsAttTmp</code>
- */
- private boolean defaultAttributes(ElementDecl element)
- throws SAXException
- {
- boolean didDefault = false;
-
- // Go through all declared attributes and:
- // 1) Default anything the document didn't provide.
- // 2) Check #REQUIRED values.
- for (Enumeration e = element.attributes.keys();
- e.hasMoreElements(); ) {
-
- // Declared attribute name
- String declAttName = (String)e.nextElement();
-
- if (attTmp.getValue(declAttName) != null) {
- // Attribute already has value so no defaulting necessary
- continue;
- }
-
- // If we get here, then declared attribute is not in the list
- // of attributes to be reported to ContentHandler.
-
- // Get more info on the declared attribute
- AttributeDecl info =
- (AttributeDecl)element.attributes.get(declAttName);
-
- // If this is a #REQUIRED attribute...
- if (supportValidation && isValidating && info.isRequired) {
- // Under certain conditions, check the auxiliary nsAttTmp
- // list for #REQUIRED attributes since these are not in the
- // list to be reported to the ContentHandler.
- if (namespaces && !prefixes) {
- if (nsAttTmp.contains(declAttName)) {
- // Namespace attribute is #REQUIRED and already has
- // a value
- continue;
- }
- }
- error("V-009", new Object [] { declAttName });
- }
-
- String defaultValue = info.defaultValue;
- if (defaultValue != null) {
- if (supportValidation && isValidating
- && isStandalone && !info.isFromInternalSubset)
- error ("V-010", new Object [] { declAttName });
-
- if (namespaces) {
- processAttributeNS(declAttName, info.type, defaultValue,
- defaultValue, false, true, null);
- } else {
- attTmp.addAttribute("", "", declAttName, info.type,
- defaultValue, defaultValue, false);
- }
- didDefault = true;
- }
- }
- return didDefault;
- }
-
- // parses content inside a given element (or parsed entity), optionally
- // allowing EOF (when expanding internal or external entities) and
- // optionally validating elements/#PCDATA that we see
- private void content (
- ElementDecl element,
- boolean allowEOF,
- ElementValidator validator
- ) throws IOException, SAXException
- {
- for (;;) {
- // [43] content ::= (element|CharData|Reference
- // |CDSect|PI|Comment)*
-
- // markup?
- if (in.peekc ('<')) {
- if (maybeElement (validator))
- continue;
-
- // Three cases: Error, and either EOF or ETag.
- // Here we check Etag as a common exit.
- if (in.peekc ('/'))
- return;
-
- // Less commonly, it's a comment, PI, CDATA ...
- if (maybeComment (true) || maybePI (true))
- continue;
-
- // ... CDATA are specially delimited characters; can be
- // #PCDATA or whitespace (the latter has validity issues).
- if (in.peek("![CDATA[", null)) {
- lexicalHandler.startCDATA();
- in.unparsedContent(contentHandler, validator,
- (element != null) && element.ignoreWhitespace,
- (isStandalone
- && supportValidation && isValidating
- && !element.isFromInternalSubset)
- ? "V-023"
- : null
- );
- lexicalHandler.endCDATA();
- continue;
- }
-
- // ... or a grammatical error (WF violation).
- char c = getc ();
-
- fatal ("P-079", new Object [] {
- Integer.toHexString (c), new Character (c) });
- // NOTREACHED
- }
-
- // characters? ... whitespace or #PCDATA
- if (element != null
- && element.ignoreWhitespace
- && in.ignorableWhitespace (contentHandler)) {
- // XXX prefer to report validity error before the
- // whitespace was reported ...
- if (supportValidation && isValidating
- && isStandalone && !element.isFromInternalSubset)
- error ("V-011", new Object [] { element.name });
- continue;
- }
- if (in.parsedContent (contentHandler, validator))
- continue;
-
- if (in.isEOF ())
- break;
-
- // else MUST be an entity reference
- if (!maybeReferenceInContent (element, validator))
- throw new InternalError ();
- }
- if (!allowEOF)
- fatal ("P-035");
- }
-
- private boolean maybeElementDecl ()
- throws IOException, SAXException
- {
- // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
- // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
- InputEntity start = peekDeclaration ("!ELEMENT");
-
- if (start == null)
- return false;
-
- // n.b. for content models where inter-element whitespace is
- // ignorable, we mark that fact here.
- String name = getMarkupDeclname ("F-015", true);
- ElementDecl element = (ElementDecl) elements.get (name);
- boolean declEffective = false;
-
- if (element != null) {
- if (element.contentType != null) {
- if (supportValidation && isValidating
- && element.contentType != null)
- error ("V-012", new Object [] { name });
- // don't override previous declaration
- element = new ElementDecl (name);
- } // else <!ATTLIST name ...> came first
- } else {
- element = new ElementDecl (name);
- if (!ignoreDeclarations) {
- elements.put (element.name, element);
- declEffective = true;
- }
- }
- element.isFromInternalSubset = !inExternalPE;
-
- whitespace ("F-000");
- if (peek (strEMPTY)) {
- element.contentType = strEMPTY;
- element.ignoreWhitespace = true;
- } else if (peek (strANY)) {
- element.contentType = strANY;
- element.ignoreWhitespace = false;
- } else
- element.contentType = getMixedOrChildren (element);
-
- maybeWhitespace ();
- char c = getc ();
- if (c != '>')
- fatal ("P-036", new Object [] { name, new Character (c) });
- if (supportValidation && isValidating && start != in)
- error ("V-013", null);
-
- if (declEffective) {
- declHandler.elementDecl(element.name, element.contentType);
- }
- return true;
- }
-
- // We're leaving the content model as a regular expression;
- // it's an efficient natural way to express such things, and
- // libraries often interpret them. No whitespace in the
- // model we store, though!
-
- private String getMixedOrChildren (ElementDecl element)
- throws IOException, SAXException
- {
- InputEntity start;
-
- // [47] children ::= (choice|seq) ('?'|'*'|'+')?
- strTmp = new StringBuffer ();
-
- nextChar ('(', "F-028", element.name);
- start = in;
- maybeWhitespace ();
- strTmp.append ('(');
-
- if (peek ("#PCDATA")) {
- strTmp.append ("#PCDATA");
- getMixed (element.name, start);
- element.ignoreWhitespace = false;
- } else {
- element.model = getcps (element.name, start);
- element.ignoreWhitespace = true;
- }
- return strTmp.toString ();
- }
-
- // package private -- overridden by validating subclass
- ContentModel newContentModel (String tag)
- {
- return null;
- }
-
- // package private -- overridden by validating subclass
- ContentModel newContentModel (char type, ContentModel next)
- {
- return null;
- }
-
- // '(' S? already consumed
- // matching ')' must be in "start" entity if validating
- private ContentModel getcps (
- String element,
- InputEntity start
- ) throws IOException, SAXException
- {
- // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
- // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
- // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
- boolean decided = false;
- char type = 0;
- ContentModel retval, current, temp;
-
- retval = current = temp = null;
-
- do {
- String tag;
-
- tag = maybeGetName ();
- if (tag != null) {
- strTmp.append (tag);
- temp = getFrequency (newContentModel (tag));
- } else if (peek ("(")) {
- InputEntity next = in;
- strTmp.append ('(');
- maybeWhitespace ();
- temp = getFrequency (getcps (element, next));
- } else
- fatal ((type == 0) ? "P-039" :
- ((type == ',') ? "P-037" : "P-038"),
- new Object [] { new Character (getc ()) });
-
- maybeWhitespace ();
- if (decided) {
- char c = getc ();
-
- if (current != null) {
- current.next = newContentModel (type, temp);
- current = current.next;
- }
- if (c == type) {
- strTmp.append (type);
- maybeWhitespace ();
- continue;
- } else if (c == '\u0029') { // rparen
- ungetc ();
- continue;
- } else {
- fatal ((type == 0) ? "P-041" : "P-040",
- new Object [] {
- new Character (c),
- new Character (type)
- });
- }
- } else {
- type = getc ();
- if (type == '|' || type == ',') {
- decided = true;
- retval = current = newContentModel (type, temp);
- } else {
- retval = current = temp;
- ungetc ();
- continue;
- }
- strTmp.append (type);
- }
- maybeWhitespace ();
- } while (!peek (")"));
- if (supportValidation && isValidating && in != start)
- error ("V-014", new Object [] { element });
- strTmp.append (')');
- return getFrequency (retval);
- }
-
- private ContentModel getFrequency (ContentModel original)
- throws IOException, SAXException
- {
- char c = getc ();
-
- if (c == '?' || c == '+' || c == '*') {
- strTmp.append (c);
- if (original == null)
- return null;
- if (original.type == 0) { // foo* etc
- original.type = c;
- return original;
- }
- return newContentModel (c, original);
- } else {
- ungetc ();
- return original;
- }
- }
-
- // '(' S? '#PCDATA' already consumed
- // matching ')' must be in "start" entity if validating
- private void getMixed (String element, InputEntity start)
- throws IOException, SAXException
- {
- // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
- // | '(' S? '#PCDATA' S? ')'
- maybeWhitespace ();
- if (peek ("\u0029*") || peek ("\u0029")) {
- if (supportValidation && isValidating && in != start)
- error ("V-014", new Object [] { element });
- strTmp.append (')');
- return;
- }
-
- Vector v = null;
-
- if (supportValidation && isValidating)
- v = new Vector ();
-
- while (peek ("|")) {
- String name;
-
- strTmp.append ('|');
- maybeWhitespace ();
-
- name = maybeGetName ();
- if (name == null)
- fatal ("P-042", new Object []
- { element, Integer.toHexString (getc ()) });
- if (supportValidation && isValidating) {
- if (v.contains (name))
- error ("V-015", new Object [] { name });
- else
- v.addElement (name);
- }
- strTmp.append (name);
- maybeWhitespace ();
- }
-
- if (!peek ("\u0029*")) // right paren
- fatal ("P-043", new Object []
- { element, new Character (getc ()) });
- if (supportValidation && isValidating && in != start)
- error ("V-014", new Object [] { element });
- strTmp.append (')');
- }
-
- private boolean maybeAttlistDecl ()
- throws IOException, SAXException
- {
- // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
- InputEntity start = peekDeclaration ("!ATTLIST");
-
- if (start == null)
- return false;
-
- String name = getMarkupDeclname ("F-016", true);
- ElementDecl element = (ElementDecl) elements.get (name);
-
- if (element == null) {
- // not yet declared -- no problem.
- element = new ElementDecl (name);
- if (!ignoreDeclarations)
- elements.put (name, element);
- }
-
- maybeWhitespace ();
- while (!peek (">")) {
-
- // [53] AttDef ::= S Name S AttType S DefaultDecl
- // [54] AttType ::= StringType | TokenizedType | EnumeratedType
- name = maybeGetName ();
- if (name == null)
- fatal ("P-044", new Object [] { new Character (getc ()) });
- whitespace ("F-001");
-
- AttributeDecl a = new AttributeDecl (name);
- a.isFromInternalSubset = !inExternalPE;
-
- // Note: use the type constants from AttributeDecl
- // so that "==" may be used (faster)
-
- // [55] StringType ::= 'CDATA'
- if (peek (AttributeDecl.CDATA))
- a.type = AttributeDecl.CDATA;
-
- // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
- // | 'ENTITY' | 'ENTITIES'
- // | 'NMTOKEN' | 'NMTOKENS'
- // n.b. if "IDREFS" is there, both "ID" and "IDREF"
- // match peekahead ... so this order matters!
- else if (peek (AttributeDecl.IDREFS))
- a.type = AttributeDecl.IDREFS;
- else if (peek (AttributeDecl.IDREF))
- a.type = AttributeDecl.IDREF;
- else if (peek (AttributeDecl.ID)) {
- a.type = AttributeDecl.ID;
- if (element.id != null) {
- if (supportValidation && isValidating)
- error ("V-016", new Object [] { element.id });
- } else
- element.id = name;
- } else if (peek (AttributeDecl.ENTITY))
- a.type = AttributeDecl.ENTITY;
- else if (peek (AttributeDecl.ENTITIES))
- a.type = AttributeDecl.ENTITIES;
- else if (peek (AttributeDecl.NMTOKENS))
- a.type = AttributeDecl.NMTOKENS;
- else if (peek (AttributeDecl.NMTOKEN))
- a.type = AttributeDecl.NMTOKEN;
-
- // [57] EnumeratedType ::= NotationType | Enumeration
- // [58] NotationType ::= 'NOTATION' S '(' S? Name
- // (S? '|' S? Name)* S? ')'
- else if (peek (AttributeDecl.NOTATION)) {
- a.type = AttributeDecl.NOTATION;
- whitespace ("F-002");
- nextChar ('(', "F-029", null);
- maybeWhitespace ();
-
- Vector v = new Vector ();
- do {
- if ((name = maybeGetName ()) == null)
- fatal ("P-068");
- // permit deferred declarations
- if (supportValidation && isValidating
- && notations.get (name) == null)
- notations.put (name, name);
- v.addElement (name);
- maybeWhitespace ();
- if (peek ("|"))
- maybeWhitespace ();
- } while (!peek (")"));
- a.values = new String [v.size ()];
- for (int i = 0; i < v.size (); i++)
- a.values [i] = (String)v.elementAt (i);
-
- // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
- } else if (peek ("(")) {
- a.type = AttributeDecl.ENUMERATION;
- maybeWhitespace ();
-
- Vector v = new Vector ();
- do {
- name = getNmtoken ();
- v.addElement (name);
- maybeWhitespace ();
- if (peek ("|"))
- maybeWhitespace ();
- } while (!peek (")"));
- a.values = new String [v.size ()];
- for (int i = 0; i < v.size (); i++)
- a.values [i] = (String)v.elementAt (i);
- } else
- fatal ("P-045",
- new Object [] { name, new Character (getc ()) });
-
-
- // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
- // | (('#FIXED' S)? AttValue)
- whitespace ("F-003");
- if (peek ("#REQUIRED")) {
- a.valueDefault = AttributeDecl.REQUIRED;
- a.isRequired = true;
- } else if (peek ("#FIXED")) {
- if (supportValidation && isValidating
- && a.type == AttributeDecl.ID)
- error ("V-017", new Object [] { a.name });
- a.valueDefault = AttributeDecl.FIXED;
- a.isFixed = true;
- whitespace ("F-004");
-
- // Don't expand PEs in AttValue => doLexicalPE = false and
- // call parseLiteral(isEntityValue = false) both
- doLexicalPE = false;
- parseLiteral(false);
-
- // We are in DTD so set this back to true
- doLexicalPE = true;
-
- if (a.type != AttributeDecl.CDATA)
- a.defaultValue = normalize (false);
- else
- a.defaultValue = strTmp.toString ();
- if (a.type != AttributeDecl.CDATA)
- validateAttributeSyntax (a, a.defaultValue);
- } else if (peek ("#IMPLIED")) {
- a.valueDefault = AttributeDecl.IMPLIED;
- } else {
- if (supportValidation && isValidating
- && a.type == AttributeDecl.ID)
- error ("V-018", new Object [] { a.name });
- // By default a.valueDefault == null here
-
- // Don't expand PEs in AttValue => doLexicalPE = false and
- // call parseLiteral(isEntityValue = false) both
- doLexicalPE = false;
- parseLiteral(false);
-
- // We are in DTD so set this back to true
- doLexicalPE = true;
-
- if (a.type != AttributeDecl.CDATA)
- a.defaultValue = normalize (false);
- else
- a.defaultValue = strTmp.toString ();
- if (a.type != AttributeDecl.CDATA)
- validateAttributeSyntax (a, a.defaultValue);
- }
-
- if (XmlLang.equals (a.name)
- && a.defaultValue != null
- && !isXmlLang (a.defaultValue))
- error ("P-033", new Object [] { a.defaultValue });
-
- if (!ignoreDeclarations
- && element.attributes.get (a.name) == null) {
- element.attributes.put (a.name, a);
-
- // Report attribute declaration to SAX DeclHandler
- String saxType;
- if (a.type == AttributeDecl.ENUMERATION
- || a.type == AttributeDecl.NOTATION) {
- StringBuffer fullType = new StringBuffer();
-
- if (a.type == AttributeDecl.NOTATION) {
- fullType.append(a.type);
- fullType.append(" ");
- }
-
- if (a.values.length > 1) {
- fullType.append("(");
- }
- for (int i = 0; i < a.values.length; i++) {
- fullType.append(a.values[i]);
- if (i + 1 < a.values.length) {
- fullType.append("|");
- }
- }
- if (a.values.length > 1) {
- fullType.append(")");
- }
-
- saxType = fullType.toString();
- } else {
- saxType = a.type;
- }
- declHandler.attributeDecl(element.name, a.name, saxType,
- a.valueDefault, a.defaultValue);
- }
- maybeWhitespace ();
- }
- if (supportValidation && isValidating && start != in)
- error ("V-013", null);
- return true;
- }
-
- // used when parsing literal attribute values,
- // or public identifiers.
- //
- // input in strTmp
- private String normalize (boolean invalidIfNeeded)
- throws SAXException
- {
- // this can allocate an extra string...
-
- String s = strTmp.toString ();
- String s2 = s.trim ();
- boolean didStrip = false;
-
- if (s != s2) {
- s = s2;
- s2 = null;
- didStrip = true;
- }
- strTmp = new StringBuffer ();
- for (int i = 0; i < s.length (); i++) {
- char c = s.charAt (i);
- if (!XmlChars.isSpace (c)) {
- strTmp.append (c);
- continue;
- }
- strTmp.append (' ');
- while (++i < s.length () && XmlChars.isSpace (s.charAt (i)))
- didStrip = true;
- i--;
- }
- if (supportValidation && isValidating && isStandalone) {
- if (invalidIfNeeded && (s2 == null || didStrip))
- // XXX would like to tell the name of the attribute
- // which shouldn't have needed normalization
- error ("V-019", null);
- }
- if (didStrip)
- return strTmp.toString ();
- else
- return s;
- }
-
- private boolean maybeConditionalSect ()
- throws IOException, SAXException
- {
- // [61] conditionalSect ::= includeSect | ignoreSect
-
- if (!peek ("<!["))
- return false;
-
- String keyword;
- InputEntity start = in;
-
- maybeWhitespace ();
-
- if ((keyword = maybeGetName ()) == null)
- fatal ("P-046");
- maybeWhitespace ();
- nextChar ('[', "F-030", null);
-
- // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
- // extSubsetDecl ']]>'
- if ("INCLUDE".equals (keyword)) {
- for (;;) {
- while (in.isEOF () && in != start)
- in = in.pop ();
- if (in.isEOF ()) {
- if (supportValidation && isValidating)
- error ("V-020", null);
- in = in.pop ();
- }
- if (peek ("]]>"))
- break;
-
- doLexicalPE = false;
- if (maybeWhitespace ())
- continue;
- if (maybePEReference ())
- continue;
- doLexicalPE = true;
- if (maybeMarkupDecl () || maybeConditionalSect ())
- continue;
-
- fatal ("P-047");
- }
-
- // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
- // ignoreSectcontents ']]>'
- // [64] ignoreSectcontents ::= Ignore ('<!['
- // ignoreSectcontents ']]>' Ignore)*
- // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
- } else if ("IGNORE".equals (keyword)) {
- int nestlevel = 1;
- // ignoreSectcontents
- doLexicalPE = false;
- while (nestlevel > 0) {
- char c = getc (); // will pop input entities
- if (c == '<') {
- if (peek ("!["))
- nestlevel++;
- } else if (c == ']') {
- if (peek ("]>"))
- nestlevel--;
- } else
- continue;
- }
- } else
- fatal ("P-048", new Object [] { keyword });
- return true;
- }
-
-
- //
- // CHAPTER 4: Physical Structures
- //
-
- private boolean maybeReferenceInContent (
- ElementDecl element,
- ElementValidator validator
- ) throws IOException, SAXException
- {
- // [66] CharRef ::= ('' [0-9]+) | ('' [0-9a-fA-F]*) ';'
- // [67] Reference ::= EntityRef | CharRef
- // [68] EntityRef ::= '&' Name ';'
- if (!in.peekc ('&'))
- return false;
-
- if (!in.peekc ('#')) {
- String name = maybeGetName ();
- if (name == null)
- fatal ("P-009");
- nextChar (';', "F-020", name);
- expandEntityInContent (element, name, validator);
- return true;
- }
-
- validator.text ();
- contentHandler.characters (charTmp, 0,
- surrogatesToCharTmp (parseCharNumber ()));
- return true;
- }
-
- // parse decimal or hex numeric character reference
- private int parseCharNumber ()
- throws SAXException, IOException
- {
- char c;
- int retval = 0;
-
- // n.b. we ignore overflow ...
- if (getc () != 'x') {
- ungetc ();
- for (;;) {
- c = getc ();
- if (c >= '0' && c <= '9') {
- retval *= 10;
- retval += (c - '0');
- continue;
- }
- if (c == ';')
- return retval;
- fatal ("P-049");
- }
- } else for (;;) {
- c = getc ();
- if (c >= '0' && c <= '9') {
- retval <<= 4;
- retval += (c - '0');
- continue;
- }
- if (c >= 'a' && c <= 'f') {
- retval <<= 4;
- retval += 10 + (c - 'a');
- continue;
- }
- if (c >= 'A' && c <= 'F') {
- retval <<= 4;
- retval += 10 + (c - 'A');
- continue;
- }
- if (c == ';')
- return retval;
- fatal ("P-050");
- }
- }
-
- // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
- // though still subject to the 'Char' construct in XML
- private int surrogatesToCharTmp (int ucs4)
- throws SAXException
- {
- if (ucs4 <= 0xffff) {
- if (XmlChars.isChar (ucs4)) {
- charTmp [0] = (char) ucs4;
- return 1;
- }
- } else if (ucs4 <= 0x0010ffff) {
- // we represent these as UNICODE surrogate pairs
- ucs4 -= 0x10000;
- charTmp [0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
- charTmp [1] = (char) (0xdc00 | (ucs4 & 0x03ff));
- return 2;
- }
- fatal ("P-051", new Object [] { Integer.toHexString (ucs4) });
- // NOTREACHED
- return -1;
- }
-
- private void expandEntityInContent (
- ElementDecl element,
- String name,
- ElementValidator validator
- ) throws SAXException, IOException
- {
- Object entity = entities.get (name);
- InputEntity last = in;
-
- if (entity == null) {
- //
- // Note: much confusion about whether spec requires such
- // errors to be fatal in many cases, but none about whether
- // it allows "normal" errors to be unrecoverable!
- //
- fatal ("P-014", new Object [] { name });
- }
-
- //throw fatal error when entity expansion count reaches the limit set by application
- //if we don't want to have any costraint on number of entity that can be expanaded
- //set the DEFAULT_ENTITY_EXPANSION_LIMIT to -1.
- if(entityExpansionLimit != -1 && entityExpansionCount++ >= entityExpansionLimit){
- fatal ("P-086", new Object[] {new Integer(entityExpansionLimit)});
- };
-
- //only for SECURITY_DEBUG
- //System.out.println("Entity Expansion Count = " + entityExpansionCount + " :: Entity Name = " + name );
-
- if (entity instanceof InternalEntity) {
- InternalEntity e = (InternalEntity) entity;
-
- //
- // we need to expand both entities and markup here...
- //
- if (supportValidation && isValidating
- && isStandalone
- && !e.isFromInternalSubset)
- error ("V-002", new Object [] { name });
- pushReader (e.buf, name, true);
- content (element, true, validator);
- if (in != last && !in.isEOF ()) {
- while (in.isInternal ())
- in = in.pop ();
- fatal ("P-052", new Object [] { name });
- }
- lexicalHandler.endEntity(name);
- in = in.pop ();
- } else if (entity instanceof ExternalEntity) {
- ExternalEntity e = (ExternalEntity) entity;
- if (e.notation != null)
- fatal ("P-053", new Object [] { name });
-
- if (supportValidation && isValidating
- && isStandalone
- && !e.isFromInternalSubset)
- error ("V-002", new Object [] { name });
-
- externalParsedEntity (element, e, validator);
- } else
- throw new InternalError (name);
- }
-
- private boolean maybePEReference ()
- throws IOException, SAXException
- {
- // This is the SYNTACTIC version of this construct.
- // When processing external entities, there is also
- // a LEXICAL version; see getc() and doLexicalPE.
-
- // [69] PEReference ::= '%' Name ';'
- if (!in.peekc ('%'))
- return false;
-
- String name = maybeGetName ();
- Object entity;
-
- if (name == null)
- fatal ("P-011");
- nextChar (';', "F-021", name);
- entity = params.get (name);
-
- if (entity instanceof InternalEntity) {
- InternalEntity value = (InternalEntity) entity;
- pushReader (value.buf, name, false);
-
- } else if (entity instanceof ExternalEntity) {
- externalParameterEntity ((ExternalEntity)entity);
-
- } else if (entity == null) {
- //
- // NOTE: by treating undefined parameter entities as
- // nonfatal, we are assuming that the contradiction
- // between them being a WFC versus a VC is resolved in
- // favor of the latter. Further, we are assuming that
- // validating parsers should behave like nonvalidating
- // ones in such a case: ignoring further declarations.
- //
- ignoreDeclarations = true;
- if (supportValidation && isValidating)
- error ("V-022", new Object [] { name });
- else
- warning ("V-022", new Object [] { name });
- }
- return true;
- }
-
- private boolean maybeEntityDecl ()
- throws IOException, SAXException
- {
- // [70] EntityDecl ::= GEDecl | PEDecl
- // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
- // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>'
- // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
- // [74] PEDef ::= EntityValue | ExternalID
- //
- InputEntity start = peekDeclaration ("!ENTITY");
-
- if (start == null)
- return false;
-
- String entityName;
- SimpleHashtable defns;
- ExternalEntity externalId;
- boolean doStore;
-
- // PE expansion gets selectively turned off several places:
- // in ENTITY declarations (here), in comments, in PIs.
-
- // Here, we allow PE entities to be declared, and allows
- // literals to include PE refs without the added spaces
- // required with their expansion in markup decls.
-
- doLexicalPE = false;
- whitespace ("F-005");
- if (in.peekc ('%')) {
- whitespace ("F-006");
- defns = params;
- } else
- defns = entities;
-
- ungetc (); // leave some whitespace
- doLexicalPE = true;
- entityName = getMarkupDeclname ("F-017", false);
- whitespace ("F-007");
- externalId = maybeExternalID ();
-
- //
- // first definition sticks ... e.g. internal subset PEs are used
- // to override DTD defaults. It's also an "error" to incorrectly
- // redefine builtin internal entities, but since reporting such
- // errors is optional we only give warnings ("just in case") for
- // non-parameter entities.
- //
- doStore = (defns.get (entityName) == null);
- if (!doStore && defns == entities)
- warning ("P-054", new Object [] { entityName });
-
- // if we skipped a PE, ignore declarations since the
- // PE might have included an ovrriding declaration
- doStore &= !ignoreDeclarations;
-
- // internal entities
- if (externalId == null) {
- char value [];
- InternalEntity entity;
-
- doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd"
- parseLiteral (true);
- doLexicalPE = true;
- if (doStore) {
- value = new char [strTmp.length ()];
- if (value.length != 0)
- strTmp.getChars (0, value.length, value, 0);
- entity = new InternalEntity (entityName, value);
- entity.isPE = (defns == params);
- entity.isFromInternalSubset = !inExternalPE;
- defns.put (entityName, entity);
-
- // Report event
- if (defns == params) {
- entityName = "%" + entityName;
- }
- declHandler.internalEntityDecl(entityName, new String(value));
- }
-
- // external entities (including unparsed)
- } else {
- // [76] NDataDecl ::= S 'NDATA' S Name
- if (defns == entities && maybeWhitespace ()
- && peek ("NDATA")) {
- externalId.notation = getMarkupDeclname ("F-018", false);
-
- // flag undeclared notation for checking after
- // the DTD is fully processed
- if (supportValidation && isValidating
- && notations.get (externalId.notation) == null)
- notations.put (externalId.notation, Boolean.TRUE);
- }
- externalId.name = entityName;
- externalId.isPE = (defns == params);
- externalId.isFromInternalSubset = !inExternalPE;
- if (doStore) {
- defns.put (entityName, externalId);
- if (externalId.notation != null) {
- dtdHandler.unparsedEntityDecl (entityName,
- externalId.publicId, externalId.systemId,
- externalId.notation);
- } else {
- // Parsed external entity, either general or parameter
- if (defns == params) {
- entityName = "%" + entityName;
- }
- declHandler.externalEntityDecl(entityName,
- externalId.publicId, externalId.systemId);
- }
- }
- }
- maybeWhitespace ();
- nextChar ('>', "F-031", entityName);
- if (supportValidation && isValidating && start != in)
- error ("V-013", null);
- return true;
- }
-
- private ExternalEntity maybeExternalID ()
- throws IOException, SAXException
- {
- // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
- // | 'PUBLIC' S' PubidLiteral S Systemliteral
- String temp = null;
- ExternalEntity retval;
-
- if (peek ("PUBLIC")) {
- whitespace ("F-009");
- temp = parsePublicId ();
- } else if (!peek ("SYSTEM"))
- return null;
-
- retval = new ExternalEntity (in);
- retval.publicId = temp;
- whitespace ("F-008");
- retval.verbatimSystemId = getQuotedString("F-034", null);
- retval.systemId = resolveURI(retval.verbatimSystemId);
- return retval;
- }
-
- private String parseSystemId()
- throws IOException, SAXException
- {
- String uri = getQuotedString("F-034", null);
- return resolveURI(uri);
- }
-
- private String resolveURI(String uri)
- throws SAXException
- {
- int temp = uri.indexOf (':');
-
- // resolve relative URIs ... must do it here since
- // it's relative to the source file holding the URI!
-
- // "new java.net.URL (URL, string)" conforms to RFC 1630,
- // but we can't use that except when the URI is a URL.
- // The entity resolver is allowed to handle URIs that are
- // not URLs, so we pass URIs through with scheme intact
- if (temp == -1 || uri.indexOf ('/') < temp) {
- String baseURI;
-
- baseURI = in.getSystemId ();
- if (baseURI == null)
- fatal ("P-055", new Object [] { uri });
- if (uri.length () == 0)
- uri = ".";
- baseURI = baseURI.substring (0, baseURI.lastIndexOf ('/') + 1);
- if (uri.charAt (0) != '/')
- uri = baseURI + uri;
- else {
- // We have relative URI that begins with a '/'
-
- // Extract scheme including colon from baseURI
- String baseURIScheme;
- int colonIndex = baseURI.indexOf(':');
- if (colonIndex == -1) {
- // Base URI does not have a scheme so default to
- // "file:" scheme
- baseURIScheme = "file:";
- } else {
- baseURIScheme = baseURI.substring(0, colonIndex + 1);
- }
-
- uri = baseURIScheme + uri;
- }
-
- // letting other code map any "/xxx/../" or "/./" to "/",
- // since all URIs must handle it the same.
- }
- // check for fragment ID in URI
- if (uri.indexOf ('#') != -1)
- error ("P-056", new Object [] { uri });
- return uri;
- }
-
- private void maybeTextDecl ()
- throws IOException, SAXException
- {
- // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
-
- if (!in.isXmlDeclOrTextDeclPrefix()) {
- return;
- }
- // Consume '<?xml'
- peek("<?xml");
-
- readVersion (false, "1.0");
- readEncoding (true);
- maybeWhitespace ();
- if (!peek ("?>"))
- fatal ("P-057");
- }
-
- // returns true except in case of nonvalidating parser which
- // chose to ignore the entity.
-
- private boolean externalParsedEntity (
- ElementDecl element,
- ExternalEntity next,
- ElementValidator validator
- ) throws IOException, SAXException
- {
- // [78] ExtParsedEnt ::= TextDecl? content
-
- if (!pushReader (next)) {
- if (!isInAttribute) {
- lexicalHandler.endEntity(next.name);
- }
- return false;
- }
-
- maybeTextDecl ();
- content (element, true, validator);
- if (!in.isEOF ())
- fatal ("P-058", new Object [] { next.name });
- in = in.pop ();
- if (!isInAttribute) {
- lexicalHandler.endEntity(next.name);
- }
- return true;
- }
-
- private void externalParameterEntity (ExternalEntity next)
- throws IOException, SAXException
- {
- //
- // Reap the intended benefits of standalone declarations:
- // don't deal with external parameter entities, except to
- // validate the standalone declaration.
- //
- // XXX perhaps: also add an option to skip reading external
- // PEs when not validating, so this behaves like the parsers
- // in Gecko and IE5. Means setting ignoreDeclarations ...
- //
- if (isStandalone && fastStandalone)
- return;
-
- // n.b. "in external parameter entities" (and external
- // DTD subset, same grammar) parameter references can
- // occur "within" markup declarations ... expansions can
- // cross syntax rules. Flagged here; affects getc().
-
- // [79] ExtPE ::= TextDecl? extSubsetDecl
- // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
- // | PEReference | S )*
- InputEntity pe;
-
- inExternalPE = true;
-
- // Check for common case of file not found and throw a
- // SAXParseException
- try {
- // XXX if this returns false ...
- pushReader (next);
- } catch (IOException e) {
- fatal ("P-082", new Object [] { next.systemId }, e);
- }
-
- pe = in;
-
- // Check for common case of bad URL and throw a SAXParseException.
- // For bad URL case, JDK does not throw an exception when
- // URLConnection.getInputStream() is called but later when the app
- // tries to read from the stream in maybeTextDecl().
- try {
- maybeTextDecl ();
- } catch (IOException e) {
- // Pop invalid InputEntity so Locator info will be correct
- in = in.pop ();
- fatal ("P-082", new Object [] { next.systemId }, e);
- }
- while (!pe.isEOF ()) {
- // pop internal PEs (and whitespace before/after)
- if (in.isEOF ()) {
- in = in.pop ();
- continue;
- }
- doLexicalPE = false;
- if (maybeWhitespace ())
- continue;
- if (maybePEReference ())
- continue;
- doLexicalPE = true;
- if (maybeMarkupDecl () || maybeConditionalSect ())
- continue;
- break;
- }
- // if (in != pe) throw new InternalError ("who popped my PE?");
- if (!pe.isEOF ())
- fatal ("P-059", new Object [] { in.getName () });
- in = in.pop ();
- inExternalPE = !in.isDocument ();
- doLexicalPE = false;
- }
-
- private void readEncoding (boolean must)
- throws IOException, SAXException
- {
- // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
- String name = maybeReadAttribute ("encoding", must);
-
- if (name == null)
- return;
- for (int i = 0; i < name.length (); i++) {
- char c = name.charAt (i);
- if ((c >= 'A' && c <= 'Z')
- || (c >= 'a' && c <= 'z'))
- continue;
- if (i != 0
- && ((c >= '0' && c <= '9')
- || c == '-'
- || c == '_'
- || c == '.'
- ))
- continue;
- fatal ("P-060", new Object [] { new Character (c) });
- }
-
- //
- // This should be the encoding in use, and it's even an error for
- // it to be anything else (in certain cases that are impractical to
- // to test, and may even be insufficient). So, we do the best we
- // can, and warn if things look suspicious. Note that Java doesn't
- // uniformly expose the encodings, and that the names it uses
- // internally are nonstandard. Also, that the XML spec allows
- // such "errors" not to be reported at all.
- //
- String currentEncoding = in.getEncoding ();
-
- if (currentEncoding != null
- && !name.equalsIgnoreCase (currentEncoding))
- warning ("P-061", new Object [] { name, currentEncoding });
- }
-
- private boolean maybeNotationDecl ()
- throws IOException, SAXException
- {
- // [82] NotationDecl ::= '<!NOTATION' S Name S
- // (ExternalID | PublicID) S? '>'
- // [83] PublicID ::= 'PUBLIC' S PubidLiteral
- InputEntity start = peekDeclaration ("!NOTATION");
-
- if (start == null)
- return false;
-
- String name = getMarkupDeclname ("F-019", false);
- ExternalEntity entity = new ExternalEntity (in);
-
- whitespace ("F-011");
- if (peek ("PUBLIC")) {
- whitespace ("F-009");
- entity.publicId = parsePublicId ();
- if (maybeWhitespace ()) {
- if (!peek (">"))
- entity.systemId = parseSystemId ();
- else
- ungetc ();
- }
- } else if (peek ("SYSTEM")) {
- whitespace ("F-008");
- entity.systemId = parseSystemId ();
- } else
- fatal ("P-062");
- maybeWhitespace ();
- nextChar ('>', "F-032", name);
- if (supportValidation && isValidating && start != in)
- error ("V-013", null);
- if (entity.systemId != null && entity.systemId.indexOf ('#') != -1)
- error ("P-056", new Object [] { entity.systemId });
-
- Object value = notations.get (name);
- if (value != null && value instanceof ExternalEntity)
- warning ("P-063", new Object [] { name });
-
- // if we skipped a PE, ignore declarations since the
- // PE might have included an ovrriding declaration
- else if (!ignoreDeclarations) {
- notations.put (name, entity);
- dtdHandler.notationDecl (name, entity.publicId,
- entity.systemId);
- }
- return true;
- }
-
-
- ////////////////////////////////////////////////////////////////
- //
- // UTILITIES
- //
- ////////////////////////////////////////////////////////////////
-
- private char getc () throws IOException, SAXException
- {
- if (!(inExternalPE && doLexicalPE)) {
- char c = in.getc ();
- if (c == '%' && doLexicalPE)
- fatal ("P-080");
- return c;
- }
-
- //
- // External parameter entities get funky processing of '%param;'
- // references. It's not clearly defined in the XML spec; but it
- // boils down to having those refs be _lexical_ in most cases to
- // include partial syntax productions. It also needs selective
- // enabling; "<!ENTITY % foo ...>" must work, for example, and
- // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
- // if it's expanded in a literal, else "ab cd". PEs also do
- // not expand within comments or PIs, and external PEs are only
- // allowed to have markup decls (and so aren't handled lexically).
- //
- // This PE handling should be merged into maybeWhitespace, where
- // it can be dealt with more consistently.
- //
- // Also, there are some validity constraints in this area.
- //
- char c;
-
- while (in.isEOF ()) {
- if (in.isInternal () || (doLexicalPE && !in.isDocument ()))
- in = in.pop ();
- else {
- fatal ("P-064", new Object [] { in.getName () });
- }
- }
- if ((c = in.getc ()) == '%' && doLexicalPE) {
- // PE ref ::= '%' name ';'
- String name = maybeGetName ();
- Object entity;
-
- if (name == null)
- fatal ("P-011");
- nextChar (';', "F-021", name);
- entity = params.get (name);
-
- // push a magic "entity" before and after the
- // real one, so ungetc() behaves uniformly
- pushReader (" ".toCharArray (), null, false);
- if (entity instanceof InternalEntity)
- pushReader (((InternalEntity) entity).buf, name, false);
- else if (entity instanceof ExternalEntity)
- // PEs can't be unparsed!
- // XXX if this returns false ...
- pushReader ((ExternalEntity) entity);
- else if (entity == null)
- // see note in maybePEReference re making this be nonfatal.
- fatal ("V-022");
- else
- throw new InternalError ();
- pushReader (" ".toCharArray (), null, false);
- return in.getc ();
- }
- return c;
- }
-
- private void ungetc () // throws IOException, SAXException
- { in.ungetc (); }
-
- private boolean peek (String s) throws IOException, SAXException
- { return in.peek (s, null); }
-
- // Return the entity starting the specified declaration
- // (for validating declaration nesting) else null.
- private InputEntity peekDeclaration (String s)
- throws IOException, SAXException
- {
- InputEntity start;
-
- if (!in.peekc ('<'))
- return null;
- start = in;
- if (in.peek (s, null))
- return start;
- in.ungetc ();
- return null;
- }
-
- private void nextChar (char c, String location, String near)
- throws IOException, SAXException
- {
- while (in.isEOF () && !in.isDocument ())
- in = in.pop ();
- if (!in.peekc (c))
- fatal ("P-008", new Object []
- { new Character (c),
- messages.getMessage (locale, location),
- (near == null ? "" : ('"' + near + '"'))});
- }
-
-
-
- private void pushReader (char buf [], String name, boolean isGeneral)
- throws SAXException
- {
- if (isGeneral && !isInAttribute) {
- lexicalHandler.startEntity(name);
- }
-
- InputEntity r = InputEntity.getInputEntity (errHandler, locale);
- r.init (buf, name, in, !isGeneral);
- in = r;
- }
-
- // returns false if the external entity is being ignored ...
- // potentially possible in nonvalidating parsers, but not
- // currently supported. (See notes everywhere this is called;
- // both error handling, and reporting start/stop of entity
- // expansion, are issues! Also, SAX has no way to say "don't
- // read this entity".)
-
- private boolean pushReader (ExternalEntity next)
- throws SAXException, IOException
- {
- if (!next.isPE && !isInAttribute) {
- lexicalHandler.startEntity(next.name);
- }
-
- InputEntity r = InputEntity.getInputEntity (errHandler, locale);
- InputSource s = next.getInputSource (resolver);
-
- r.init (s, next.name, in, next.isPE);
- in = r;
- return true;
- }
-
-
- // error handling convenience routines
-
- private void warning (String messageId, Object parameters [])
- throws SAXException
- {
- SAXParseException x;
-
- x = new SAXParseException (
- messages.getMessage (locale, messageId, parameters),
- locator);
-
- // continuable, minor ... "this may matter to you..."
- errHandler.warning (x);
- }
-
- // package private ... normally returns.
- void error (String messageId, Object parameters [])
- throws SAXException
- {
- SAXParseException x = new SAXParseException (
- messages.getMessage (locale, messageId, parameters),
- locator);
-
- // continuable, major ... e.g. invalid document
- errHandler.error (x);
- }
-
- private void fatal (String message) throws SAXException
- {
- fatal (message, null, null);
- }
-
- private void fatal (String message, Object parameters [])
- throws SAXException
- {
- fatal (message, parameters, null);
- }
-
- private void fatal (String messageId, Object parameters [], Exception e)
- throws SAXException
- {
- SAXParseException x = new SAXParseException (
- messages.getMessage (locale, messageId, parameters),
- locator, e);
- errHandler.fatalError (x);
-
- // not continuable ... e.g. basic well-formedness errors
- throw x;
- }
-
-
- //
- // LOCATOR -- used for err reporting. the app calls us,
- // we tell where the parsing current event happened.
- //
- class DocLocator implements Locator {
-
- public String getPublicId ()
- {
- return (in == null) ? null : in.getPublicId ();
- }
-
- public String getSystemId ()
- {
- return (in == null) ? null : in.getSystemId ();
- }
-
- public int getLineNumber ()
- {
- return (in == null) ? -1 : in.getLineNumber ();
- }
-
- public int getColumnNumber ()
- {
- return (in == null) ? -1 : in.getColumnNumber ();
- }
- }
-
-
- //
- // Map char arrays to strings ... cuts down both on memory and
- // CPU usage for element/attribute/other names that are reused.
- //
- // Documents typically repeat names a lot, so we more or less
- // intern all the strings within the document; since some strings
- // are repeated in multiple documents (e.g. stylesheets) we go
- // a bit further, and intern globally.
- //
- static class NameCache {
- //
- // Unless we auto-grow this, the default size should be a
- // reasonable bit larger than needed for most XML files
- // we've yet seen (and be prime). If it's too small, the
- // penalty is just excess cache collisions.
- //
- NameCacheEntry hashtable [] = new NameCacheEntry [541];
-
- //
- // Usually we just want to get the 'symbol' for these chars
- //
- String lookup (char value [], int len)
- {
- return lookupEntry (value, len).name;
- }
-
- //
- // Sometimes we need to scan the chars in the resulting
- // string, so there's an accessor which exposes them.
- // (Mostly for element end tags.)
- //
- NameCacheEntry lookupEntry (char value [], int len)
- {
- int index = 0;
- NameCacheEntry entry;
-
- // hashing to get index
- for (int i = 0; i < len; i++)
- index = index * 31 + value [i];
- index &= 0x7fffffff;
- index %= hashtable.length;
-
- // return entry if one's there ...
- for (entry = hashtable [index];
- entry != null;
- entry = entry.next) {
- if (entry.matches (value, len))
- return entry;
- }
-
- // else create new one
- entry = new NameCacheEntry ();
- entry.chars = new char [len];
- System.arraycopy (value, 0, entry.chars, 0, len);
- entry.name = new String (entry.chars);
- //
- // NOTE: JDK 1.1 has a fixed size string intern table,
- // with non-GC'd entries. It can panic here; that's a
- // JDK problem, use 1.2 or later with many identifiers.
- //
- entry.name = entry.name.intern (); // "global" intern
- entry.next = hashtable [index];
- hashtable [index] = entry;
- return entry;
- }
- }
-
- static class NameCacheEntry {
- String name;
- char chars [];
- NameCacheEntry next;
-
- boolean matches (char value [], int len)
- {
- if (chars.length != len)
- return false;
- for (int i = 0; i < len; i++)
- if (value [i] != chars [i])
- return false;
- return true;
- }
- }
-
- //
- // A combined handler class that does nothing
- //
- private static class NullHandler extends DefaultHandler
- implements LexicalHandler, DeclHandler
- {
- public void startDTD (String name, String publicId, String systemId) {}
- public void endDTD () {}
- public void startEntity (String name) {}
- public void endEntity (String name) {}
- public void startCDATA () {}
- public void endCDATA () {}
- public void comment (char ch[], int start, int length) {}
- public void elementDecl (String name, String model) {}
- public void attributeDecl (String eName, String aName, String type,
- String valueDefault, String value) {}
- public void internalEntityDecl (String name, String value) {}
- public void externalEntityDecl (String name, String publicId,
- String systemId) {}
- }
-
- //
- // Message catalog for diagnostics.
- //
- static final Catalog messages = new Catalog();
-
- static final class Catalog extends MessageCatalog {
- Catalog() {
- super(Parser2.class);
- }
- }
- }