- /*
- * $Id: ValidatingParser.java,v 1.1.1.1 2000/11/23 01:53:33 edwingo Exp $
- *
- * The Apache Software License, Version 1.1
- *
- *
- * Copyright (c) 2000 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Crimson" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation and was
- * originally based on software copyright (c) 1999, Sun Microsystems, Inc.,
- * http://www.sun.com. For more information on the Apache Software
- * Foundation, please see <http://www.apache.org/>.
- */
-
-
- package org.apache.crimson.parser;
-
- import java.util.Enumeration;
- import java.util.StringTokenizer;
- import java.util.Vector;
-
- import org.xml.sax.HandlerBase;
- import org.xml.sax.SAXException;
- import org.xml.sax.SAXParseException;
-
- import org.apache.crimson.util.XmlNames;
-
-
- /**
- * This parser tests XML documents against the validity constraints
- * specified in the XML 1.0 specification as it parses them. It
- * reports violations of those constraints using the standard SAX API.
- *
- * <P><em>This parser should be configured to use an <code>ErrorHandler</code>
- * that reject documents with validity errors, otherwise they will be accepted
- * despite errors.</em> The default error handling, as specified by SAX,
- * ignores all validity errors. The simplest way to have validity errors
- * have a useful effect is to pass a boolean <em>true</em> value to
- * the parser's constructor.
- *
- * <P> Note that most validity checks are performed during parsing by
- * the base class, for efficiency. They're disabled by default in
- * that class, and enabled by the constructor in this class.
- *
- * @author David Brownell
- * @version $Revision: 1.1.1.1 $
- */
- public class ValidatingParser extends Parser2
- {
- private SimpleHashtable ids = new SimpleHashtable ();
-
- /** Constructs a SAX parser object. */
- public ValidatingParser ()
- {
- setIsValidating (true);
- }
-
- /**
- * Constructs a SAX parser object, optionally assigning the error
- * handler to report exceptions on recoverable errors (which include
- * all validity errors) as well as fatal errors.
- *
- * @param rejectValidityErrors When true, the parser will use an
- * error handler which throws exceptions on recoverable errors.
- * Otherwise it uses the default SAX error handler, which ignores
- * such errors.
- */
- public ValidatingParser (boolean rejectValidityErrors)
- {
- this ();
- if (rejectValidityErrors)
- setErrorHandler (new HandlerBase () {
- public void error (SAXParseException x)
- throws SAXException
- { throw x; }
- });
- }
-
- // REMINDER: validation errors are not fatal, so code flow
- // must continue correctly if error() returns.
-
-
- // package private ... overrides base class method
- void afterRoot () throws SAXException
- {
- // Make sure all IDREFs match declared ID attributes. We scan
- // after the document element is parsed, since XML allows forward
- // references, and only now can we know if they're all resolved.
-
- for (Enumeration e = ids.keys ();
- e.hasMoreElements ();
- ) {
- String id = (String) e.nextElement ();
- Boolean value = (Boolean) ids.get (id);
- if (Boolean.FALSE == value)
- error ("V-024", new Object [] { id });
- }
- }
-
- // package private ... overrides base class method
- void afterDocument ()
- {
- ids.clear ();
- }
-
- // package private ... overrides base class method
- void validateAttributeSyntax (AttributeDecl attr, String value)
- throws SAXException
- {
- // ID, IDREF(S) ... values are Names
- if (AttributeDecl.ID == attr.type) {
- if (!XmlNames.isName (value))
- error ("V-025", new Object [] { value });
-
- Boolean b = (Boolean) ids.getNonInterned (value);
- if (b == null || b.equals (Boolean.FALSE))
- ids.put (value.intern (), Boolean.TRUE);
- else
- error ("V-026", new Object [] { value });
-
- } else if (AttributeDecl.IDREF == attr.type) {
- if (!XmlNames.isName (value))
- error ("V-027", new Object [] { value });
-
- Boolean b = (Boolean) ids.getNonInterned (value);
- if (b == null)
- ids.put (value.intern (), Boolean.FALSE);
-
- } else if (AttributeDecl.IDREFS == attr.type) {
- StringTokenizer tokenizer = new StringTokenizer (value);
- Boolean b;
- boolean sawValue = false;
-
- while (tokenizer.hasMoreTokens ()) {
- value = tokenizer.nextToken ();
- if (!XmlNames.isName (value))
- error ("V-027", new Object [] { value });
- b = (Boolean) ids.getNonInterned (value);
- if (b == null)
- ids.put (value.intern (), Boolean.FALSE);
- sawValue = true;
- }
- if (!sawValue)
- error ("V-039", null);
-
-
- // NMTOKEN(S) ... values are Nmtoken(s)
- } else if (AttributeDecl.NMTOKEN == attr.type) {
- if (!XmlNames.isNmtoken (value))
- error ("V-028", new Object [] { value });
-
- } else if (AttributeDecl.NMTOKENS == attr.type) {
- StringTokenizer tokenizer = new StringTokenizer (value);
- boolean sawValue = false;
-
- while (tokenizer.hasMoreTokens ()) {
- value = tokenizer.nextToken ();
- if (!XmlNames.isNmtoken (value))
- error ("V-028", new Object [] { value });
- sawValue = true;
- }
- if (!sawValue)
- error ("V-032", null);
-
- // ENUMERATION ... values match one of the tokens
- } else if (AttributeDecl.ENUMERATION == attr.type) {
- for (int i = 0; i < attr.values.length; i++)
- if (value.equals (attr.values [i]))
- return;
- error ("V-029", new Object [] { value });
-
- // NOTATION values match a notation name
- } else if (AttributeDecl.NOTATION == attr.type) {
- //
- // XXX XML 1.0 spec should probably list references to
- // externally defined notations in standalone docs as
- // validity errors. Ditto externally defined unparsed
- // entities; neither should show up in attributes, else
- // one needs to read the external declarations in order
- // to make sense of the document (exactly what tagging
- // a doc as "standalone" intends you won't need to do).
- //
- for (int i = 0; i < attr.values.length; i++)
- if (value.equals (attr.values [i]))
- return;
- error ("V-030", new Object [] { value });
-
- // ENTITY(IES) values match an unparsed entity(ies)
- } else if (AttributeDecl.ENTITY == attr.type) {
- // see note above re standalone
- if (!isUnparsedEntity (value))
- error ("V-031", new Object [] { value });
-
- } else if (AttributeDecl.ENTITIES == attr.type) {
- StringTokenizer tokenizer = new StringTokenizer (value);
- boolean sawValue = false;
-
- while (tokenizer.hasMoreTokens ()) {
- value = tokenizer.nextToken ();
- // see note above re standalone
- if (!isUnparsedEntity (value))
- error ("V-031", new Object [] { value });
- sawValue = true;
- }
- if (!sawValue)
- error ("V-040", null);
-
- } else if (AttributeDecl.CDATA != attr.type)
- throw new InternalError (attr.type);
- }
-
-
- // package private ... overrides base class method
- ContentModel newContentModel (String tag)
- {
- return new ContentModel (tag);
- }
-
- // package private ... overrides base class method
- ContentModel newContentModel (char type, ContentModel next)
- {
- return new ContentModel (type, next);
- }
-
- // package private ... overrides base class method
- ElementValidator newValidator (ElementDecl element)
- {
- if (element.validator != null)
- return element.validator;
- if (element.model != null)
- return new ChildrenValidator (element);
-
- //
- // most types of content model have very simple validation
- // algorithms; only "children" needs mutable state.
- //
- if (element.contentType == null || strANY == element.contentType)
- element.validator = ElementValidator.ANY;
- else if (strEMPTY == element.contentType)
- element.validator = EMPTY;
- else // (element.contentType.charAt (1) == '#')
- element.validator = new MixedValidator (element);
- return element.validator;
- }
-
- private final EmptyValidator EMPTY = new EmptyValidator ();
-
- // "EMPTY" model allows nothing
- class EmptyValidator extends ElementValidator
- {
- public void consume (String token) throws SAXException
- { error ("V-033", null); }
-
- public void text () throws SAXException
- { error ("V-033", null); }
- }
-
- // Mixed content models allow text with selected elements
- class MixedValidator extends ElementValidator
- {
- private ElementDecl element;
-
- MixedValidator (ElementDecl element)
- { this.element = element; }
-
- public void consume (String type) throws SAXException
- {
- String model = element.contentType;
-
- for (int index = 8; // skip "(#PCDATA|"
- (index = model.indexOf (type, index + 1)) >= 9;
- ) {
- char c;
-
- // allow this type name to suffix -- "|xxTYPE"
- if (model.charAt (index -1) != '|')
- continue;
- c = model.charAt (index + type.length ());
- if (c == '|' || c == ')')
- return;
- // allow this type name to prefix -- "|TYPExx"
- }
- error ("V-034", new Object [] { element.name, type, model });
- }
- }
-
- class ChildrenValidator extends ElementValidator
- {
- private ContentModelState state;
- private String name;
-
- ChildrenValidator (ElementDecl element)
- {
- state = new ContentModelState (element.model);
- name = element.name;
- }
-
- public void consume (String token) throws SAXException
- {
- if (state == null)
- error ("V-035", new Object [] { name, token });
- else try {
- state = state.advance (token);
- } catch (EndOfInputException e) {
- error ("V-036", new Object [] { name, token });
- }
- }
-
- public void text () throws SAXException
- {
- error ("V-037", new Object [] { name });
- }
-
- public void done () throws SAXException
- {
- if (state != null && !state.terminate ())
- error ("V-038", new Object [] { name });
- }
- }
-
- private boolean isUnparsedEntity (String name)
- {
- Object e = entities.getNonInterned (name);
- if (e == null || !(e instanceof ExternalEntity))
- return false;
- return ((ExternalEntity)e).notation != null;
- }
- }