1. /*
  2. * $Id: ContentModel.java,v 1.1.1.1 2000/11/23 01:53:33 edwingo Exp $
  3. *
  4. * The Apache Software License, Version 1.1
  5. *
  6. *
  7. * Copyright (c) 2000 The Apache Software Foundation. All rights
  8. * reserved.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * 1. Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. *
  17. * 2. Redistributions in binary form must reproduce the above copyright
  18. * notice, this list of conditions and the following disclaimer in
  19. * the documentation and/or other materials provided with the
  20. * distribution.
  21. *
  22. * 3. The end-user documentation included with the redistribution,
  23. * if any, must include the following acknowledgment:
  24. * "This product includes software developed by the
  25. * Apache Software Foundation (http://www.apache.org/)."
  26. * Alternately, this acknowledgment may appear in the software itself,
  27. * if and wherever such third-party acknowledgments normally appear.
  28. *
  29. * 4. The names "Crimson" and "Apache Software Foundation" must
  30. * not be used to endorse or promote products derived from this
  31. * software without prior written permission. For written
  32. * permission, please contact apache@apache.org.
  33. *
  34. * 5. Products derived from this software may not be called "Apache",
  35. * nor may "Apache" appear in their name, without prior written
  36. * permission of the Apache Software Foundation.
  37. *
  38. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  39. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  40. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  41. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  42. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  43. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  44. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  45. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  46. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  47. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  48. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  49. * SUCH DAMAGE.
  50. * ====================================================================
  51. *
  52. * This software consists of voluntary contributions made by many
  53. * individuals on behalf of the Apache Software Foundation and was
  54. * originally based on software copyright (c) 1999, Sun Microsystems, Inc.,
  55. * http://www.sun.com. For more information on the Apache Software
  56. * Foundation, please see <http://www.apache.org/>.
  57. */
  58. package org.apache.crimson.parser;
  59. import java.util.Vector;
  60. import java.util.Enumeration;
  61. import java.io.*;
  62. /**
  63. * A representation of a "children" content model. These are basically a
  64. * regular expression; other content models are simpler. There is an
  65. * SGML compatibility restriction on DTDs that such content models be
  66. * deterministic, which in this sense just means that backtracking isn't
  67. * needed to validate against it.
  68. *
  69. * <P> At the moment, for expediency, nondeterministic models are neither
  70. * tested for nor are they handled reasonably. This could be done after
  71. * each element's content model is fully parsed.
  72. *
  73. * <P> The most efficient way to do this would be to compile each content
  74. * model pattern into a deterministic finite automaton (no stack) and
  75. * just walk the DFA's graph ... but for now, these aren't compiled.
  76. *
  77. * @author Arthur van Hoff
  78. * @author David Brownell
  79. * @version $Revision: 1.1.1.1 $
  80. */
  81. final class ContentModel
  82. {
  83. /**
  84. * Type. Either '*', '?', '+'; or connectives ',', '|'; or
  85. * zero for content that's an element.
  86. */
  87. public char type;
  88. /**
  89. * The content. Either an Element name, or a ContentModel.
  90. */
  91. public Object content;
  92. /**
  93. * The next content model (in a ',' or '|' connective expression).
  94. * "next" has a list of connectives of the same type.
  95. */
  96. public ContentModel next;
  97. //
  98. // Cache mapping element names --> TRUE or FALSE based on whether
  99. // they can be 'first' in this content model or not. NOTE: it'd
  100. // be nice to have a lower cost cache, e.g. numbering elements and
  101. // using byte arrays.
  102. //
  103. private SimpleHashtable cache = new SimpleHashtable ();
  104. /**
  105. * Create a content model for an element.
  106. */
  107. public ContentModel (String element) {
  108. this.type = 0;
  109. this.content = element;
  110. }
  111. /**
  112. * Create a content model of a particular type.
  113. * Normally used to specify a frequency, or to start a connective.
  114. */
  115. public ContentModel (char type, ContentModel content) {
  116. this.type = type;
  117. this.content = content;
  118. }
  119. /**
  120. * Return true if the content model could
  121. * match an empty input stream.
  122. */
  123. public boolean empty () {
  124. // if it matters, this could cache as a simple boolean!
  125. switch (type) {
  126. case '*':
  127. case '?':
  128. return true;
  129. case '+':
  130. case 0:
  131. return false;
  132. case '|':
  133. if (content instanceof ContentModel
  134. && ((ContentModel)content).empty ()) {
  135. return true;
  136. }
  137. for (ContentModel m = (ContentModel)next;
  138. m != null;
  139. m = m.next) {
  140. if (m.empty ())
  141. return true;
  142. }
  143. return false;
  144. case ',':
  145. if (content instanceof ContentModel) {
  146. if (!((ContentModel)content).empty ()) {
  147. return false;
  148. }
  149. } else {
  150. return false;
  151. }
  152. for (ContentModel m = (ContentModel)next;
  153. m != null;
  154. m = m.next) {
  155. if (!m.empty ())
  156. return false;
  157. }
  158. return true;
  159. default:
  160. throw new InternalError ();
  161. }
  162. }
  163. /**
  164. * Return true if the token could potentially be the
  165. * first token in the input stream.
  166. */
  167. public boolean first (String token) {
  168. Boolean b = (Boolean) cache.get (token);
  169. boolean retval;
  170. if (b != null)
  171. return b.booleanValue ();
  172. // if we had no cached result, compute it
  173. switch (type) {
  174. case '*':
  175. case '?':
  176. case '+':
  177. case 0:
  178. if (content instanceof String)
  179. retval = (content == token);
  180. else
  181. retval = ((ContentModel)content).first (token);
  182. break;
  183. case ',':
  184. if (content instanceof String)
  185. retval = (content == token);
  186. else if (((ContentModel)content).first (token))
  187. retval = true;
  188. else if (!((ContentModel)content).empty ())
  189. retval = false;
  190. else if (next != null)
  191. retval = ((ContentModel)next).first (token);
  192. else
  193. retval = false;
  194. break;
  195. case '|':
  196. if (content instanceof String && content == token)
  197. retval = true;
  198. else if (((ContentModel)content).first (token))
  199. retval = true;
  200. else if (next != null)
  201. retval = ((ContentModel)next).first (token);
  202. else
  203. retval = false;
  204. break;
  205. default:
  206. throw new InternalError ();
  207. }
  208. // store the result, so we can be faster next time
  209. if (retval)
  210. cache.put (token, Boolean.TRUE);
  211. else
  212. cache.put (token, Boolean.FALSE);
  213. return retval;
  214. }
  215. /**
  216. * Convert to a string (for debugging).
  217. *
  218. public String toString () {
  219. return toString (true);
  220. }
  221. private String contentString ()
  222. {
  223. if (content instanceof ContentModel)
  224. return ((ContentModel)content).toString (false);
  225. else
  226. return (String) content;
  227. }
  228. private String toString (boolean isOuter)
  229. {
  230. String temp = contentString ();
  231. switch (type) {
  232. case '*':
  233. case '?':
  234. case '+':
  235. if (isOuter && temp.charAt (0) != '(')
  236. return "(" + temp + type + ")";
  237. else
  238. return temp + type;
  239. case 0:
  240. if (isOuter)
  241. return "(" + temp + ")";
  242. else
  243. return temp;
  244. case ',':
  245. case '|':
  246. if (next == null)
  247. return temp;
  248. for (ContentModel m = next; m != null; m = m.next)
  249. temp += type + m.contentString ();
  250. return "(" + temp + ")";
  251. default:
  252. throw new InternalError ("foo");
  253. }
  254. }
  255. /**/
  256. }