1. /*
  2. * $Id: ValidatingParser.java,v 1.1.1.1 2000/11/23 01:53:33 edwingo Exp $
  3. *
  4. * The Apache Software License, Version 1.1
  5. *
  6. *
  7. * Copyright (c) 2000 The Apache Software Foundation. All rights
  8. * reserved.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * 1. Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. *
  17. * 2. Redistributions in binary form must reproduce the above copyright
  18. * notice, this list of conditions and the following disclaimer in
  19. * the documentation and/or other materials provided with the
  20. * distribution.
  21. *
  22. * 3. The end-user documentation included with the redistribution,
  23. * if any, must include the following acknowledgment:
  24. * "This product includes software developed by the
  25. * Apache Software Foundation (http://www.apache.org/)."
  26. * Alternately, this acknowledgment may appear in the software itself,
  27. * if and wherever such third-party acknowledgments normally appear.
  28. *
  29. * 4. The names "Crimson" and "Apache Software Foundation" must
  30. * not be used to endorse or promote products derived from this
  31. * software without prior written permission. For written
  32. * permission, please contact apache@apache.org.
  33. *
  34. * 5. Products derived from this software may not be called "Apache",
  35. * nor may "Apache" appear in their name, without prior written
  36. * permission of the Apache Software Foundation.
  37. *
  38. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  39. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  40. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  41. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  42. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  43. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  44. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  45. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  46. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  47. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  48. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  49. * SUCH DAMAGE.
  50. * ====================================================================
  51. *
  52. * This software consists of voluntary contributions made by many
  53. * individuals on behalf of the Apache Software Foundation and was
  54. * originally based on software copyright (c) 1999, Sun Microsystems, Inc.,
  55. * http://www.sun.com. For more information on the Apache Software
  56. * Foundation, please see <http://www.apache.org/>.
  57. */
  58. package org.apache.crimson.parser;
  59. import java.util.Enumeration;
  60. import java.util.StringTokenizer;
  61. import java.util.Vector;
  62. import org.xml.sax.HandlerBase;
  63. import org.xml.sax.SAXException;
  64. import org.xml.sax.SAXParseException;
  65. import org.apache.crimson.util.XmlNames;
  66. /**
  67. * This parser tests XML documents against the validity constraints
  68. * specified in the XML 1.0 specification as it parses them. It
  69. * reports violations of those constraints using the standard SAX API.
  70. *
  71. * <P><em>This parser should be configured to use an <code>ErrorHandler</code>
  72. * that reject documents with validity errors, otherwise they will be accepted
  73. * despite errors.</em> The default error handling, as specified by SAX,
  74. * ignores all validity errors. The simplest way to have validity errors
  75. * have a useful effect is to pass a boolean <em>true</em> value to
  76. * the parser's constructor.
  77. *
  78. * <P> Note that most validity checks are performed during parsing by
  79. * the base class, for efficiency. They're disabled by default in
  80. * that class, and enabled by the constructor in this class.
  81. *
  82. * @author David Brownell
  83. * @version $Revision: 1.1.1.1 $
  84. */
  85. public class ValidatingParser extends Parser2
  86. {
  87. private SimpleHashtable ids = new SimpleHashtable ();
  88. /** Constructs a SAX parser object. */
  89. public ValidatingParser ()
  90. {
  91. setIsValidating (true);
  92. }
  93. /**
  94. * Constructs a SAX parser object, optionally assigning the error
  95. * handler to report exceptions on recoverable errors (which include
  96. * all validity errors) as well as fatal errors.
  97. *
  98. * @param rejectValidityErrors When true, the parser will use an
  99. * error handler which throws exceptions on recoverable errors.
  100. * Otherwise it uses the default SAX error handler, which ignores
  101. * such errors.
  102. */
  103. public ValidatingParser (boolean rejectValidityErrors)
  104. {
  105. this ();
  106. if (rejectValidityErrors)
  107. setErrorHandler (new HandlerBase () {
  108. public void error (SAXParseException x)
  109. throws SAXException
  110. { throw x; }
  111. });
  112. }
  113. // REMINDER: validation errors are not fatal, so code flow
  114. // must continue correctly if error() returns.
  115. // package private ... overrides base class method
  116. void afterRoot () throws SAXException
  117. {
  118. // Make sure all IDREFs match declared ID attributes. We scan
  119. // after the document element is parsed, since XML allows forward
  120. // references, and only now can we know if they're all resolved.
  121. for (Enumeration e = ids.keys ();
  122. e.hasMoreElements ();
  123. ) {
  124. String id = (String) e.nextElement ();
  125. Boolean value = (Boolean) ids.get (id);
  126. if (Boolean.FALSE == value)
  127. error ("V-024", new Object [] { id });
  128. }
  129. }
  130. // package private ... overrides base class method
  131. void afterDocument ()
  132. {
  133. ids.clear ();
  134. }
  135. // package private ... overrides base class method
  136. void validateAttributeSyntax (AttributeDecl attr, String value)
  137. throws SAXException
  138. {
  139. // ID, IDREF(S) ... values are Names
  140. if (AttributeDecl.ID == attr.type) {
  141. if (!XmlNames.isName (value))
  142. error ("V-025", new Object [] { value });
  143. Boolean b = (Boolean) ids.getNonInterned (value);
  144. if (b == null || b.equals (Boolean.FALSE))
  145. ids.put (value.intern (), Boolean.TRUE);
  146. else
  147. error ("V-026", new Object [] { value });
  148. } else if (AttributeDecl.IDREF == attr.type) {
  149. if (!XmlNames.isName (value))
  150. error ("V-027", new Object [] { value });
  151. Boolean b = (Boolean) ids.getNonInterned (value);
  152. if (b == null)
  153. ids.put (value.intern (), Boolean.FALSE);
  154. } else if (AttributeDecl.IDREFS == attr.type) {
  155. StringTokenizer tokenizer = new StringTokenizer (value);
  156. Boolean b;
  157. boolean sawValue = false;
  158. while (tokenizer.hasMoreTokens ()) {
  159. value = tokenizer.nextToken ();
  160. if (!XmlNames.isName (value))
  161. error ("V-027", new Object [] { value });
  162. b = (Boolean) ids.getNonInterned (value);
  163. if (b == null)
  164. ids.put (value.intern (), Boolean.FALSE);
  165. sawValue = true;
  166. }
  167. if (!sawValue)
  168. error ("V-039", null);
  169. // NMTOKEN(S) ... values are Nmtoken(s)
  170. } else if (AttributeDecl.NMTOKEN == attr.type) {
  171. if (!XmlNames.isNmtoken (value))
  172. error ("V-028", new Object [] { value });
  173. } else if (AttributeDecl.NMTOKENS == attr.type) {
  174. StringTokenizer tokenizer = new StringTokenizer (value);
  175. boolean sawValue = false;
  176. while (tokenizer.hasMoreTokens ()) {
  177. value = tokenizer.nextToken ();
  178. if (!XmlNames.isNmtoken (value))
  179. error ("V-028", new Object [] { value });
  180. sawValue = true;
  181. }
  182. if (!sawValue)
  183. error ("V-032", null);
  184. // ENUMERATION ... values match one of the tokens
  185. } else if (AttributeDecl.ENUMERATION == attr.type) {
  186. for (int i = 0; i < attr.values.length; i++)
  187. if (value.equals (attr.values [i]))
  188. return;
  189. error ("V-029", new Object [] { value });
  190. // NOTATION values match a notation name
  191. } else if (AttributeDecl.NOTATION == attr.type) {
  192. //
  193. // XXX XML 1.0 spec should probably list references to
  194. // externally defined notations in standalone docs as
  195. // validity errors. Ditto externally defined unparsed
  196. // entities; neither should show up in attributes, else
  197. // one needs to read the external declarations in order
  198. // to make sense of the document (exactly what tagging
  199. // a doc as "standalone" intends you won't need to do).
  200. //
  201. for (int i = 0; i < attr.values.length; i++)
  202. if (value.equals (attr.values [i]))
  203. return;
  204. error ("V-030", new Object [] { value });
  205. // ENTITY(IES) values match an unparsed entity(ies)
  206. } else if (AttributeDecl.ENTITY == attr.type) {
  207. // see note above re standalone
  208. if (!isUnparsedEntity (value))
  209. error ("V-031", new Object [] { value });
  210. } else if (AttributeDecl.ENTITIES == attr.type) {
  211. StringTokenizer tokenizer = new StringTokenizer (value);
  212. boolean sawValue = false;
  213. while (tokenizer.hasMoreTokens ()) {
  214. value = tokenizer.nextToken ();
  215. // see note above re standalone
  216. if (!isUnparsedEntity (value))
  217. error ("V-031", new Object [] { value });
  218. sawValue = true;
  219. }
  220. if (!sawValue)
  221. error ("V-040", null);
  222. } else if (AttributeDecl.CDATA != attr.type)
  223. throw new InternalError (attr.type);
  224. }
  225. // package private ... overrides base class method
  226. ContentModel newContentModel (String tag)
  227. {
  228. return new ContentModel (tag);
  229. }
  230. // package private ... overrides base class method
  231. ContentModel newContentModel (char type, ContentModel next)
  232. {
  233. return new ContentModel (type, next);
  234. }
  235. // package private ... overrides base class method
  236. ElementValidator newValidator (ElementDecl element)
  237. {
  238. if (element.validator != null)
  239. return element.validator;
  240. if (element.model != null)
  241. return new ChildrenValidator (element);
  242. //
  243. // most types of content model have very simple validation
  244. // algorithms; only "children" needs mutable state.
  245. //
  246. if (element.contentType == null || strANY == element.contentType)
  247. element.validator = ElementValidator.ANY;
  248. else if (strEMPTY == element.contentType)
  249. element.validator = EMPTY;
  250. else // (element.contentType.charAt (1) == '#')
  251. element.validator = new MixedValidator (element);
  252. return element.validator;
  253. }
  254. private final EmptyValidator EMPTY = new EmptyValidator ();
  255. // "EMPTY" model allows nothing
  256. class EmptyValidator extends ElementValidator
  257. {
  258. public void consume (String token) throws SAXException
  259. { error ("V-033", null); }
  260. public void text () throws SAXException
  261. { error ("V-033", null); }
  262. }
  263. // Mixed content models allow text with selected elements
  264. class MixedValidator extends ElementValidator
  265. {
  266. private ElementDecl element;
  267. MixedValidator (ElementDecl element)
  268. { this.element = element; }
  269. public void consume (String type) throws SAXException
  270. {
  271. String model = element.contentType;
  272. for (int index = 8; // skip "(#PCDATA|"
  273. (index = model.indexOf (type, index + 1)) >= 9;
  274. ) {
  275. char c;
  276. // allow this type name to suffix -- "|xxTYPE"
  277. if (model.charAt (index -1) != '|')
  278. continue;
  279. c = model.charAt (index + type.length ());
  280. if (c == '|' || c == ')')
  281. return;
  282. // allow this type name to prefix -- "|TYPExx"
  283. }
  284. error ("V-034", new Object [] { element.name, type, model });
  285. }
  286. }
  287. class ChildrenValidator extends ElementValidator
  288. {
  289. private ContentModelState state;
  290. private String name;
  291. ChildrenValidator (ElementDecl element)
  292. {
  293. state = new ContentModelState (element.model);
  294. name = element.name;
  295. }
  296. public void consume (String token) throws SAXException
  297. {
  298. if (state == null)
  299. error ("V-035", new Object [] { name, token });
  300. else try {
  301. state = state.advance (token);
  302. } catch (EndOfInputException e) {
  303. error ("V-036", new Object [] { name, token });
  304. }
  305. }
  306. public void text () throws SAXException
  307. {
  308. error ("V-037", new Object [] { name });
  309. }
  310. public void done () throws SAXException
  311. {
  312. if (state != null && !state.terminate ())
  313. error ("V-038", new Object [] { name });
  314. }
  315. }
  316. private boolean isUnparsedEntity (String name)
  317. {
  318. Object e = entities.getNonInterned (name);
  319. if (e == null || !(e instanceof ExternalEntity))
  320. return false;
  321. return ((ExternalEntity)e).notation != null;
  322. }
  323. }