1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2003 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.impl.xs.models;
  58. import com.sun.org.apache.xerces.internal.xni.QName;
  59. import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode;
  60. import com.sun.org.apache.xerces.internal.impl.dtd.models.CMStateSet;
  61. import com.sun.org.apache.xerces.internal.impl.xs.SubstitutionGroupHandler;
  62. import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl;
  63. import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl;
  64. import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl;
  65. import com.sun.org.apache.xerces.internal.impl.xs.XSWildcardDecl;
  66. import com.sun.org.apache.xerces.internal.impl.xs.XMLSchemaException;
  67. import com.sun.org.apache.xerces.internal.impl.xs.XSConstraints;
  68. import java.util.Vector;
  69. /**
  70. * DFAContentModel is the implementation of XSCMValidator that does
  71. * all of the non-trivial element content validation. This class does
  72. * the conversion from the regular expression to the DFA that
  73. * it then uses in its validation algorithm.
  74. *
  75. * @author Neil Graham, IBM
  76. * @version $Id: XSDFACM.java,v 1.10 2003/04/30 20:24:49 sandygao Exp $
  77. */
  78. public class XSDFACM
  79. implements XSCMValidator {
  80. //
  81. // Constants
  82. //
  83. private static final boolean DEBUG = false;
  84. // special strings
  85. // debugging
  86. /** Set to true to debug content model validation. */
  87. private static final boolean DEBUG_VALIDATE_CONTENT = false;
  88. //
  89. // Data
  90. //
  91. /**
  92. * This is the map of unique input symbol elements to indices into
  93. * each state's per-input symbol transition table entry. This is part
  94. * of the built DFA information that must be kept around to do the
  95. * actual validation. Note tat since either XSElementDecl or XSParticleDecl object
  96. * can live here, we've got to use an Object.
  97. */
  98. private Object fElemMap[] = null;
  99. /**
  100. * This is a map of whether the element map contains information
  101. * related to ANY models.
  102. */
  103. private int fElemMapType[] = null;
  104. /**
  105. * id of the unique input symbol
  106. */
  107. private int fElemMapId[] = null;
  108. /** The element map size. */
  109. private int fElemMapSize = 0;
  110. /**
  111. * This is an array of booleans, one per state (there are
  112. * fTransTableSize states in the DFA) that indicates whether that
  113. * state is a final state.
  114. */
  115. private boolean fFinalStateFlags[] = null;
  116. /**
  117. * The list of follow positions for each NFA position (i.e. for each
  118. * non-epsilon leaf node.) This is only used during the building of
  119. * the DFA, and is let go afterwards.
  120. */
  121. private CMStateSet fFollowList[] = null;
  122. /**
  123. * This is the head node of our intermediate representation. It is
  124. * only non-null during the building of the DFA (just so that it
  125. * does not have to be passed all around.) Once the DFA is built,
  126. * this is no longer required so its nulled out.
  127. */
  128. private CMNode fHeadNode = null;
  129. /**
  130. * The count of leaf nodes. This is an important number that set some
  131. * limits on the sizes of data structures in the DFA process.
  132. */
  133. private int fLeafCount = 0;
  134. /**
  135. * An array of non-epsilon leaf nodes, which is used during the DFA
  136. * build operation, then dropped.
  137. */
  138. private XSCMLeaf fLeafList[] = null;
  139. /** Array mapping ANY types to the leaf list. */
  140. private int fLeafListType[] = null;
  141. /**
  142. * This is the transition table that is the main by product of all
  143. * of the effort here. It is an array of arrays of ints. The first
  144. * dimension is the number of states we end up with in the DFA. The
  145. * second dimensions is the number of unique elements in the content
  146. * model (fElemMapSize). Each entry in the second dimension indicates
  147. * the new state given that input for the first dimension's start
  148. * state.
  149. * <p>
  150. * The fElemMap array handles mapping from element indexes to
  151. * positions in the second dimension of the transition table.
  152. */
  153. private int fTransTable[][] = null;
  154. /**
  155. * The number of valid entries in the transition table, and in the other
  156. * related tables such as fFinalStateFlags.
  157. */
  158. private int fTransTableSize = 0;
  159. // temp variables
  160. //
  161. // Constructors
  162. //
  163. /**
  164. * Constructs a DFA content model.
  165. *
  166. * @param symbolTable The symbol table.
  167. * @param syntaxTree The syntax tree of the content model.
  168. * @param leafCount The number of leaves.
  169. *
  170. * @exception RuntimeException Thrown if DFA can't be built.
  171. */
  172. public XSDFACM(CMNode syntaxTree, int leafCount) {
  173. // Store away our index and pools in members
  174. fLeafCount = leafCount;
  175. //
  176. // Create some string pool indexes that represent the names of some
  177. // magical nodes in the syntax tree.
  178. // (already done in static initialization...
  179. //
  180. //
  181. // Ok, so lets grind through the building of the DFA. This method
  182. // handles the high level logic of the algorithm, but it uses a
  183. // number of helper classes to do its thing.
  184. //
  185. // In order to avoid having hundreds of references to the error and
  186. // string handlers around, this guy and all of his helper classes
  187. // just throw a simple exception and we then pass it along.
  188. //
  189. if(DEBUG_VALIDATE_CONTENT) {
  190. XSDFACM.time -= System.currentTimeMillis();
  191. }
  192. buildDFA(syntaxTree);
  193. if(DEBUG_VALIDATE_CONTENT) {
  194. XSDFACM.time += System.currentTimeMillis();
  195. System.out.println("DFA build: " + XSDFACM.time + "ms");
  196. }
  197. }
  198. private static long time = 0;
  199. //
  200. // XSCMValidator methods
  201. //
  202. /**
  203. * check whether the given state is one of the final states
  204. *
  205. * @param state the state to check
  206. *
  207. * @return whether it's a final state
  208. */
  209. public boolean isFinalState (int state) {
  210. return (state < 0)? false :
  211. fFinalStateFlags[state];
  212. }
  213. /**
  214. * one transition only
  215. *
  216. * @param curElem The current element's QName
  217. * @param stateStack stack to store the previous state
  218. * @param curPos the current position of the stack
  219. *
  220. * @return null if transition is invalid; otherwise the Object corresponding to the
  221. * XSElementDecl or XSWildcardDecl identified. Also, the
  222. * state array will be modified to include the new state; this so that the validator can
  223. * store it away.
  224. *
  225. * @exception RuntimeException thrown on error
  226. */
  227. public Object oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler) {
  228. int curState = state[0];
  229. if(curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) {
  230. // there was an error last time; so just go find correct Object in fElemmMap.
  231. // ... after resetting state[0].
  232. if(curState == XSCMValidator.FIRST_ERROR)
  233. state[0] = XSCMValidator.SUBSEQUENT_ERROR;
  234. return findMatchingDecl(curElem, subGroupHandler);
  235. }
  236. int nextState = 0;
  237. int elemIndex = 0;
  238. Object matchingDecl = null;
  239. for (; elemIndex < fElemMapSize; elemIndex++) {
  240. nextState = fTransTable[curState][elemIndex];
  241. if (nextState == -1)
  242. continue;
  243. int type = fElemMapType[elemIndex] ;
  244. if (type == XSParticleDecl.PARTICLE_ELEMENT) {
  245. matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
  246. if (matchingDecl != null) {
  247. break;
  248. }
  249. }
  250. else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
  251. if(((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) {
  252. matchingDecl = fElemMap[elemIndex];
  253. break;
  254. }
  255. }
  256. }
  257. // if we still can't find a match, set the state to first_error
  258. // and return null
  259. if (elemIndex == fElemMapSize) {
  260. state[1] = state[0];
  261. state[0] = XSCMValidator.FIRST_ERROR;
  262. return findMatchingDecl(curElem, subGroupHandler);
  263. }
  264. state[0] = nextState;
  265. return matchingDecl;
  266. } // oneTransition(QName, int[], SubstitutionGroupHandler): Object
  267. Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) {
  268. Object matchingDecl = null;
  269. for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
  270. int type = fElemMapType[elemIndex] ;
  271. if (type == XSParticleDecl.PARTICLE_ELEMENT) {
  272. matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]);
  273. if (matchingDecl != null) {
  274. return matchingDecl;
  275. }
  276. }
  277. else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
  278. if(((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri))
  279. return fElemMap[elemIndex];
  280. }
  281. }
  282. return null;
  283. }
  284. // This method returns the start states of the content model.
  285. public int[] startContentModel() {
  286. int[] val = new int[2];
  287. val[0] = 0;
  288. return val;
  289. } // startContentModel():int[]
  290. // this method returns whether the last state was a valid final state
  291. public boolean endContentModel(int[] state) {
  292. return fFinalStateFlags[state[0]];
  293. } // endContentModel(int[]): boolean
  294. // Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc.,
  295. // but we can put it back later.
  296. //
  297. // Private methods
  298. //
  299. /**
  300. * Builds the internal DFA transition table from the given syntax tree.
  301. *
  302. * @param syntaxTree The syntax tree.
  303. *
  304. * @exception RuntimeException Thrown if DFA cannot be built.
  305. */
  306. private void buildDFA(CMNode syntaxTree) {
  307. //
  308. // The first step we need to take is to rewrite the content model
  309. // using our CMNode objects, and in the process get rid of any
  310. // repetition short cuts, converting them into '*' style repetitions
  311. // or getting rid of repetitions altogether.
  312. //
  313. // The conversions done are:
  314. //
  315. // x+ -> (x|x*)
  316. // x? -> (x|epsilon)
  317. //
  318. // This is a relatively complex scenario. What is happening is that
  319. // we create a top level binary node of which the special EOC value
  320. // is set as the right side node. The the left side is set to the
  321. // rewritten syntax tree. The source is the original content model
  322. // info from the decl pool. The rewrite is done by buildSyntaxTree()
  323. // which recurses the decl pool's content of the element and builds
  324. // a new tree in the process.
  325. //
  326. // Note that, during this operation, we set each non-epsilon leaf
  327. // node's DFA state position and count the number of such leafs, which
  328. // is left in the fLeafCount member.
  329. //
  330. // The nodeTmp object is passed in just as a temp node to use during
  331. // the recursion. Otherwise, we'd have to create a new node on every
  332. // level of recursion, which would be piggy in Java (as is everything
  333. // for that matter.)
  334. //
  335. /* MODIFIED (Jan, 2001)
  336. *
  337. * Use following rules.
  338. * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x)
  339. * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x)
  340. *
  341. * The same computation of follow as x* is applied to x+
  342. *
  343. * The modification drastically reduces computation time of
  344. * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
  345. */
  346. //
  347. // And handle specially the EOC node, which also must be numbered
  348. // and counted as a non-epsilon leaf node. It could not be handled
  349. // in the above tree build because it was created before all that
  350. // started. We save the EOC position since its used during the DFA
  351. // building loop.
  352. //
  353. int EOCPos = fLeafCount;
  354. XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++);
  355. fHeadNode = new XSCMBinOp(
  356. XSModelGroupImpl.MODELGROUP_SEQUENCE,
  357. syntaxTree,
  358. nodeEOC
  359. );
  360. //
  361. // Ok, so now we have to iterate the new tree and do a little more
  362. // work now that we know the leaf count. One thing we need to do is
  363. // to calculate the first and last position sets of each node. This
  364. // is cached away in each of the nodes.
  365. //
  366. // Along the way we also set the leaf count in each node as the
  367. // maximum state count. They must know this in order to create their
  368. // first/last pos sets.
  369. //
  370. // We also need to build an array of references to the non-epsilon
  371. // leaf nodes. Since we iterate it in the same way as before, this
  372. // will put them in the array according to their position values.
  373. //
  374. fLeafList = new XSCMLeaf[fLeafCount];
  375. fLeafListType = new int[fLeafCount];
  376. postTreeBuildInit(fHeadNode);
  377. //
  378. // And, moving onward... We now need to build the follow position
  379. // sets for all the nodes. So we allocate an array of state sets,
  380. // one for each leaf node (i.e. each DFA position.)
  381. //
  382. fFollowList = new CMStateSet[fLeafCount];
  383. for (int index = 0; index < fLeafCount; index++)
  384. fFollowList[index] = new CMStateSet(fLeafCount);
  385. calcFollowList(fHeadNode);
  386. //
  387. // And finally the big push... Now we build the DFA using all the
  388. // states and the tree we've built up. First we set up the various
  389. // data structures we are going to use while we do this.
  390. //
  391. // First of all we need an array of unique element names in our
  392. // content model. For each transition table entry, we need a set of
  393. // contiguous indices to represent the transitions for a particular
  394. // input element. So we need to a zero based range of indexes that
  395. // map to element types. This element map provides that mapping.
  396. //
  397. fElemMap = new Object[fLeafCount];
  398. fElemMapType = new int[fLeafCount];
  399. fElemMapId = new int[fLeafCount];
  400. fElemMapSize = 0;
  401. for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
  402. // optimization from Henry Zongaro:
  403. //fElemMap[outIndex] = new Object ();
  404. fElemMap[outIndex] = null;
  405. int inIndex = 0;
  406. final int id = fLeafList[outIndex].getParticleId();
  407. for (; inIndex < fElemMapSize; inIndex++) {
  408. if (id == fElemMapId[inIndex])
  409. break;
  410. }
  411. // If it was not in the list, then add it, if not the EOC node
  412. if (inIndex == fElemMapSize) {
  413. fElemMap[fElemMapSize] = fLeafList[outIndex].getLeaf();
  414. fElemMapType[fElemMapSize] = fLeafListType[outIndex];
  415. fElemMapId[fElemMapSize] = id;
  416. fElemMapSize++;
  417. }
  418. }
  419. // the last entry in the element map must be the EOC element.
  420. // remove it from the map.
  421. if (DEBUG) {
  422. if (fElemMapId[fElemMapSize-1] != -1)
  423. System.err.println("interal error in DFA: last element is not EOC.");
  424. }
  425. fElemMapSize--;
  426. /***
  427. * Optimization(Jan, 2001); We sort fLeafList according to
  428. * elemIndex which is *uniquely* associated to each leaf.
  429. * We are *assuming* that each element appears in at least one leaf.
  430. **/
  431. int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
  432. int fSortCount = 0;
  433. for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
  434. final int id = fElemMapId[elemIndex];
  435. for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
  436. if (id == fLeafList[leafIndex].getParticleId())
  437. fLeafSorter[fSortCount++] = leafIndex;
  438. }
  439. fLeafSorter[fSortCount++] = -1;
  440. }
  441. /* Optimization(Jan, 2001) */
  442. //
  443. // Next lets create some arrays, some that hold transient
  444. // information during the DFA build and some that are permament.
  445. // These are kind of sticky since we cannot know how big they will
  446. // get, but we don't want to use any Java collections because of
  447. // performance.
  448. //
  449. // Basically they will probably be about fLeafCount*2 on average,
  450. // but can be as large as 2^(fLeafCount*2), worst case. So we start
  451. // with fLeafCount*4 as a middle ground. This will be very unlikely
  452. // to ever have to expand, though it if does, the overhead will be
  453. // somewhat ugly.
  454. //
  455. int curArraySize = fLeafCount * 4;
  456. CMStateSet[] statesToDo = new CMStateSet[curArraySize];
  457. fFinalStateFlags = new boolean[curArraySize];
  458. fTransTable = new int[curArraySize][];
  459. //
  460. // Ok we start with the initial set as the first pos set of the
  461. // head node (which is the seq node that holds the content model
  462. // and the EOC node.)
  463. //
  464. CMStateSet setT = fHeadNode.firstPos();
  465. //
  466. // Init our two state flags. Basically the unmarked state counter
  467. // is always chasing the current state counter. When it catches up,
  468. // that means we made a pass through that did not add any new states
  469. // to the lists, at which time we are done. We could have used a
  470. // expanding array of flags which we used to mark off states as we
  471. // complete them, but this is easier though less readable maybe.
  472. //
  473. int unmarkedState = 0;
  474. int curState = 0;
  475. //
  476. // Init the first transition table entry, and put the initial state
  477. // into the states to do list, then bump the current state.
  478. //
  479. fTransTable[curState] = makeDefStateList();
  480. statesToDo[curState] = setT;
  481. curState++;
  482. /* Optimization(Jan, 2001); This is faster for
  483. * a large content model such as, "(t001+|t002+|.... |t500+)".
  484. */
  485. java.util.Hashtable stateTable = new java.util.Hashtable();
  486. /* Optimization(Jan, 2001) */
  487. //
  488. // Ok, almost done with the algorithm... We now enter the
  489. // loop where we go until the states done counter catches up with
  490. // the states to do counter.
  491. //
  492. while (unmarkedState < curState) {
  493. //
  494. // Get the first unmarked state out of the list of states to do.
  495. // And get the associated transition table entry.
  496. //
  497. setT = statesToDo[unmarkedState];
  498. int[] transEntry = fTransTable[unmarkedState];
  499. // Mark this one final if it contains the EOC state
  500. fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos);
  501. // Bump up the unmarked state count, marking this state done
  502. unmarkedState++;
  503. // Loop through each possible input symbol in the element map
  504. CMStateSet newSet = null;
  505. /* Optimization(Jan, 2001) */
  506. int sorterIndex = 0;
  507. /* Optimization(Jan, 2001) */
  508. for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
  509. //
  510. // Build up a set of states which is the union of all of
  511. // the follow sets of DFA positions that are in the current
  512. // state. If we gave away the new set last time through then
  513. // create a new one. Otherwise, zero out the existing one.
  514. //
  515. if (newSet == null)
  516. newSet = new CMStateSet(fLeafCount);
  517. else
  518. newSet.zeroBits();
  519. /* Optimization(Jan, 2001) */
  520. int leafIndex = fLeafSorter[sorterIndex++];
  521. while (leafIndex != -1) {
  522. // If this leaf index (DFA position) is in the current set...
  523. if (setT.getBit(leafIndex)) {
  524. //
  525. // If this leaf is the current input symbol, then we
  526. // want to add its follow list to the set of states to
  527. // transition to from the current state.
  528. //
  529. newSet.union(fFollowList[leafIndex]);
  530. }
  531. leafIndex = fLeafSorter[sorterIndex++];
  532. }
  533. /* Optimization(Jan, 2001) */
  534. //
  535. // If this new set is not empty, then see if its in the list
  536. // of states to do. If not, then add it.
  537. //
  538. if (!newSet.isEmpty()) {
  539. //
  540. // Search the 'states to do' list to see if this new
  541. // state set is already in there.
  542. //
  543. /* Optimization(Jan, 2001) */
  544. Integer stateObj = (Integer)stateTable.get(newSet);
  545. int stateIndex = (stateObj == null ? curState : stateObj.intValue());
  546. /* Optimization(Jan, 2001) */
  547. // If we did not find it, then add it
  548. if (stateIndex == curState) {
  549. //
  550. // Put this new state into the states to do and init
  551. // a new entry at the same index in the transition
  552. // table.
  553. //
  554. statesToDo[curState] = newSet;
  555. fTransTable[curState] = makeDefStateList();
  556. /* Optimization(Jan, 2001) */
  557. stateTable.put(newSet, new Integer(curState));
  558. /* Optimization(Jan, 2001) */
  559. // We now have a new state to do so bump the count
  560. curState++;
  561. //
  562. // Null out the new set to indicate we adopted it.
  563. // This will cause the creation of a new set on the
  564. // next time around the loop.
  565. //
  566. newSet = null;
  567. }
  568. //
  569. // Now set this state in the transition table's entry
  570. // for this element (using its index), with the DFA
  571. // state we will move to from the current state when we
  572. // see this input element.
  573. //
  574. transEntry[elemIndex] = stateIndex;
  575. // Expand the arrays if we're full
  576. if (curState == curArraySize) {
  577. //
  578. // Yikes, we overflowed the initial array size, so
  579. // we've got to expand all of these arrays. So adjust
  580. // up the size by 50% and allocate new arrays.
  581. //
  582. final int newSize = (int)(curArraySize * 1.5);
  583. CMStateSet[] newToDo = new CMStateSet[newSize];
  584. boolean[] newFinalFlags = new boolean[newSize];
  585. int[][] newTransTable = new int[newSize][];
  586. // Copy over all of the existing content
  587. for (int expIndex = 0; expIndex < curArraySize; expIndex++) {
  588. newToDo[expIndex] = statesToDo[expIndex];
  589. newFinalFlags[expIndex] = fFinalStateFlags[expIndex];
  590. newTransTable[expIndex] = fTransTable[expIndex];
  591. }
  592. // Store the new array size
  593. curArraySize = newSize;
  594. statesToDo = newToDo;
  595. fFinalStateFlags = newFinalFlags;
  596. fTransTable = newTransTable;
  597. }
  598. }
  599. }
  600. }
  601. //
  602. // And now we can say bye bye to the temp representation since we've
  603. // built the DFA.
  604. //
  605. if (DEBUG_VALIDATE_CONTENT)
  606. dumpTree(fHeadNode, 0);
  607. fHeadNode = null;
  608. fLeafList = null;
  609. fFollowList = null;
  610. fLeafListType = null;
  611. fElemMapId = null;
  612. }
  613. /**
  614. * Calculates the follow list of the current node.
  615. *
  616. * @param nodeCur The curent node.
  617. *
  618. * @exception RuntimeException Thrown if follow list cannot be calculated.
  619. */
  620. private void calcFollowList(CMNode nodeCur) {
  621. // Recurse as required
  622. if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) {
  623. // Recurse only
  624. calcFollowList(((XSCMBinOp)nodeCur).getLeft());
  625. calcFollowList(((XSCMBinOp)nodeCur).getRight());
  626. }
  627. else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
  628. // Recurse first
  629. calcFollowList(((XSCMBinOp)nodeCur).getLeft());
  630. calcFollowList(((XSCMBinOp)nodeCur).getRight());
  631. //
  632. // Now handle our level. We use our left child's last pos
  633. // set and our right child's first pos set, so go ahead and
  634. // get them ahead of time.
  635. //
  636. final CMStateSet last = ((XSCMBinOp)nodeCur).getLeft().lastPos();
  637. final CMStateSet first = ((XSCMBinOp)nodeCur).getRight().firstPos();
  638. //
  639. // Now, for every position which is in our left child's last set
  640. // add all of the states in our right child's first set to the
  641. // follow set for that position.
  642. //
  643. for (int index = 0; index < fLeafCount; index++) {
  644. if (last.getBit(index))
  645. fFollowList[index].union(first);
  646. }
  647. }
  648. else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE
  649. || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) {
  650. // Recurse first
  651. calcFollowList(((XSCMUniOp)nodeCur).getChild());
  652. //
  653. // Now handle our level. We use our own first and last position
  654. // sets, so get them up front.
  655. //
  656. final CMStateSet first = nodeCur.firstPos();
  657. final CMStateSet last = nodeCur.lastPos();
  658. //
  659. // For every position which is in our last position set, add all
  660. // of our first position states to the follow set for that
  661. // position.
  662. //
  663. for (int index = 0; index < fLeafCount; index++) {
  664. if (last.getBit(index))
  665. fFollowList[index].union(first);
  666. }
  667. }
  668. else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
  669. // Recurse only
  670. calcFollowList(((XSCMUniOp)nodeCur).getChild());
  671. }
  672. }
  673. /**
  674. * Dumps the tree of the current node to standard output.
  675. *
  676. * @param nodeCur The current node.
  677. * @param level The maximum levels to output.
  678. *
  679. * @exception RuntimeException Thrown on error.
  680. */
  681. private void dumpTree(CMNode nodeCur, int level) {
  682. for (int index = 0; index < level; index++)
  683. System.out.print(" ");
  684. int type = nodeCur.type();
  685. switch(type ) {
  686. case XSModelGroupImpl.MODELGROUP_CHOICE:
  687. case XSModelGroupImpl.MODELGROUP_SEQUENCE: {
  688. if (type == XSModelGroupImpl.MODELGROUP_CHOICE)
  689. System.out.print("Choice Node ");
  690. else
  691. System.out.print("Seq Node ");
  692. if (nodeCur.isNullable())
  693. System.out.print("Nullable ");
  694. System.out.print("firstPos=");
  695. System.out.print(nodeCur.firstPos().toString());
  696. System.out.print(" lastPos=");
  697. System.out.println(nodeCur.lastPos().toString());
  698. dumpTree(((XSCMBinOp)nodeCur).getLeft(), level+1);
  699. dumpTree(((XSCMBinOp)nodeCur).getRight(), level+1);
  700. break;
  701. }
  702. case XSParticleDecl.PARTICLE_ZERO_OR_MORE:
  703. case XSParticleDecl.PARTICLE_ONE_OR_MORE:
  704. case XSParticleDecl.PARTICLE_ZERO_OR_ONE: {
  705. System.out.print("Rep Node ");
  706. if (nodeCur.isNullable())
  707. System.out.print("Nullable ");
  708. System.out.print("firstPos=");
  709. System.out.print(nodeCur.firstPos().toString());
  710. System.out.print(" lastPos=");
  711. System.out.println(nodeCur.lastPos().toString());
  712. dumpTree(((XSCMUniOp)nodeCur).getChild(), level+1);
  713. break;
  714. }
  715. case XSParticleDecl.PARTICLE_ELEMENT: {
  716. System.out.print
  717. (
  718. "Leaf: (pos="
  719. + ((XSCMLeaf)nodeCur).getPosition()
  720. + "), "
  721. + "(elemIndex="
  722. + ((XSCMLeaf)nodeCur).getLeaf()
  723. + ") "
  724. );
  725. if (nodeCur.isNullable())
  726. System.out.print(" Nullable ");
  727. System.out.print("firstPos=");
  728. System.out.print(nodeCur.firstPos().toString());
  729. System.out.print(" lastPos=");
  730. System.out.println(nodeCur.lastPos().toString());
  731. break;
  732. }
  733. case XSParticleDecl.PARTICLE_WILDCARD:
  734. System.out.print("Any Node: ");
  735. System.out.print("firstPos=");
  736. System.out.print(nodeCur.firstPos().toString());
  737. System.out.print(" lastPos=");
  738. System.out.println(nodeCur.lastPos().toString());
  739. break;
  740. default: {
  741. throw new RuntimeException("ImplementationMessages.VAL_NIICM");
  742. }
  743. }
  744. }
  745. /**
  746. * -1 is used to represent bad transitions in the transition table
  747. * entry for each state. So each entry is initialized to an all -1
  748. * array. This method creates a new entry and initializes it.
  749. */
  750. private int[] makeDefStateList()
  751. {
  752. int[] retArray = new int[fElemMapSize];
  753. for (int index = 0; index < fElemMapSize; index++)
  754. retArray[index] = -1;
  755. return retArray;
  756. }
  757. /** Post tree build initialization. */
  758. private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException {
  759. // Set the maximum states on this node
  760. nodeCur.setMaxStates(fLeafCount);
  761. XSCMLeaf leaf = null;
  762. int pos = 0;
  763. // Recurse as required
  764. if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) {
  765. leaf = (XSCMLeaf)nodeCur;
  766. pos = leaf.getPosition();
  767. fLeafList[pos] = leaf;
  768. fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD;
  769. }
  770. else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) ||
  771. (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) {
  772. postTreeBuildInit(((XSCMBinOp)nodeCur).getLeft());
  773. postTreeBuildInit(((XSCMBinOp)nodeCur).getRight());
  774. }
  775. else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE ||
  776. nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE ||
  777. nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
  778. postTreeBuildInit(((XSCMUniOp)nodeCur).getChild());
  779. }
  780. else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) {
  781. // Put this node in the leaf list at the current index if its
  782. // a non-epsilon leaf.
  783. leaf = (XSCMLeaf)nodeCur;
  784. pos = leaf.getPosition();
  785. fLeafList[pos] = leaf;
  786. fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT;
  787. }
  788. else {
  789. throw new RuntimeException("ImplementationMessages.VAL_NIICM");
  790. }
  791. }
  792. /**
  793. * check whether this content violates UPA constraint.
  794. *
  795. * @param errors to hold the UPA errors
  796. * @return true if this content model contains other or list wildcard
  797. */
  798. public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler) throws XMLSchemaException {
  799. // Unique Particle Attribution
  800. // store the conflict results between any two elements in fElemMap
  801. // 0: not compared; -1: no conflict; 1: conflict
  802. // initialize the conflict table (all 0 initially)
  803. byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize];
  804. // for each state, check whether it has overlap transitions
  805. for (int i = 0; i < fTransTable.length && fTransTable[i] != null; i++) {
  806. for (int j = 0; j < fElemMapSize; j++) {
  807. for (int k = j+1; k < fElemMapSize; k++) {
  808. if (fTransTable[i][j] != -1 &&
  809. fTransTable[i][k] != -1) {
  810. if (conflictTable[j][k] == 0) {
  811. conflictTable[j][k] = XSConstraints.overlapUPA
  812. (fElemMap[j],fElemMap[k],
  813. subGroupHandler) ?
  814. (byte)1 : (byte)-1;
  815. }
  816. }
  817. }
  818. }
  819. }
  820. // report all errors
  821. for (int i = 0; i < fElemMapSize; i++) {
  822. for (int j = 0; j < fElemMapSize; j++) {
  823. if (conflictTable[i][j] == 1) {
  824. //errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(),
  825. // fElemMap[j].toString()});
  826. // REVISIT: do we want to report all errors? or just one?
  827. throw new XMLSchemaException("cos-nonambig", new Object[]{fElemMap[i].toString(),
  828. fElemMap[j].toString()});
  829. }
  830. }
  831. }
  832. // if there is a other or list wildcard, we need to check this CM
  833. // again, if this grammar is cached.
  834. for (int i = 0; i < fElemMapSize; i++) {
  835. if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) {
  836. XSWildcardDecl wildcard = (XSWildcardDecl)fElemMap[i];
  837. if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST ||
  838. wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) {
  839. return true;
  840. }
  841. }
  842. }
  843. return false;
  844. }
  845. /**
  846. * Check which elements are valid to appear at this point. This method also
  847. * works if the state is in error, in which case it returns what should
  848. * have been seen.
  849. *
  850. * @param state the current state
  851. * @return a Vector whose entries are instances of
  852. * either XSWildcardDecl or XSElementDecl.
  853. */
  854. public Vector whatCanGoHere(int[] state) {
  855. int curState = state[0];
  856. if (curState < 0)
  857. curState = state[1];
  858. Vector ret = new Vector();
  859. for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
  860. if (fTransTable[curState][elemIndex] != -1)
  861. ret.addElement(fElemMap[elemIndex]);
  862. }
  863. return ret;
  864. }
  865. } // class DFAContentModel