1. package com.sun.java_cup.internal.runtime;
  2. import java.util.Stack;
  3. /** This class implements a skeleton table driven LR parser. In general,
  4. * LR parsers are a form of bottom up shift-reduce parsers. Shift-reduce
  5. * parsers act by shifting input onto a parse stack until the Symbols
  6. * matching the right hand side of a production appear on the top of the
  7. * stack. Once this occurs, a reduce is performed. This involves removing
  8. * the Symbols corresponding to the right hand side of the production
  9. * (the so called "handle") and replacing them with the non-terminal from
  10. * the left hand side of the production. <p>
  11. *
  12. * To control the decision of whether to shift or reduce at any given point,
  13. * the parser uses a state machine (the "viable prefix recognition machine"
  14. * built by the parser generator). The current state of the machine is placed
  15. * on top of the parse stack (stored as part of a Symbol object representing
  16. * a terminal or non terminal). The parse action table is consulted
  17. * (using the current state and the current lookahead Symbol as indexes) to
  18. * determine whether to shift or to reduce. When the parser shifts, it
  19. * changes to a new state by pushing a new Symbol (containing a new state)
  20. * onto the stack. When the parser reduces, it pops the handle (right hand
  21. * side of a production) off the stack. This leaves the parser in the state
  22. * it was in before any of those Symbols were matched. Next the reduce-goto
  23. * table is consulted (using the new state and current lookahead Symbol as
  24. * indexes) to determine a new state to go to. The parser then shifts to
  25. * this goto state by pushing the left hand side Symbol of the production
  26. * (also containing the new state) onto the stack.<p>
  27. *
  28. * This class actually provides four LR parsers. The methods parse() and
  29. * debug_parse() provide two versions of the main parser (the only difference
  30. * being that debug_parse() emits debugging trace messages as it parses).
  31. * In addition to these main parsers, the error recovery mechanism uses two
  32. * more. One of these is used to simulate "parsing ahead" in the input
  33. * without carrying out actions (to verify that a potential error recovery
  34. * has worked), and the other is used to parse through buffered "parse ahead"
  35. * input in order to execute all actions and re-synchronize the actual parser
  36. * configuration.<p>
  37. *
  38. * This is an abstract class which is normally filled out by a subclass
  39. * generated by the JavaCup parser generator. In addition to supplying
  40. * the actual parse tables, generated code also supplies methods which
  41. * invoke various pieces of user supplied code, provide access to certain
  42. * special Symbols (e.g., EOF and error), etc. Specifically, the following
  43. * abstract methods are normally supplied by generated code:
  44. * <dl compact>
  45. * <dt> short[][] production_table()
  46. * <dd> Provides a reference to the production table (indicating the index of
  47. * the left hand side non terminal and the length of the right hand side
  48. * for each production in the grammar).
  49. * <dt> short[][] action_table()
  50. * <dd> Provides a reference to the parse action table.
  51. * <dt> short[][] reduce_table()
  52. * <dd> Provides a reference to the reduce-goto table.
  53. * <dt> int start_state()
  54. * <dd> Indicates the index of the start state.
  55. * <dt> int start_production()
  56. * <dd> Indicates the index of the starting production.
  57. * <dt> int EOF_sym()
  58. * <dd> Indicates the index of the EOF Symbol.
  59. * <dt> int error_sym()
  60. * <dd> Indicates the index of the error Symbol.
  61. * <dt> Symbol do_action()
  62. * <dd> Executes a piece of user supplied action code. This always comes at
  63. * the point of a reduce in the parse, so this code also allocates and
  64. * fills in the left hand side non terminal Symbol object that is to be
  65. * pushed onto the stack for the reduce.
  66. * <dt> void init_actions()
  67. * <dd> Code to initialize a special object that encapsulates user supplied
  68. * actions (this object is used by do_action() to actually carry out the
  69. * actions).
  70. * </dl>
  71. *
  72. * In addition to these routines that <i>must</i> be supplied by the
  73. * generated subclass there are also a series of routines that <i>may</i>
  74. * be supplied. These include:
  75. * <dl>
  76. * <dt> Symbol scan()
  77. * <dd> Used to get the next input Symbol from the scanner.
  78. * <dt> Scanner getScanner()
  79. * <dd> Used to provide a scanner for the default implementation of
  80. * scan().
  81. * <dt> int error_sync_size()
  82. * <dd> This determines how many Symbols past the point of an error
  83. * must be parsed without error in order to consider a recovery to
  84. * be valid. This defaults to 3. Values less than 2 are not
  85. * recommended.
  86. * <dt> void report_error(String message, Object info)
  87. * <dd> This method is called to report an error. The default implementation
  88. * simply prints a message to System.err and where the error occurred.
  89. * This method is often replaced in order to provide a more sophisticated
  90. * error reporting mechanism.
  91. * <dt> void report_fatal_error(String message, Object info)
  92. * <dd> This method is called when a fatal error that cannot be recovered from
  93. * is encountered. In the default implementation, it calls
  94. * report_error() to emit a message, then throws an exception.
  95. * <dt> void syntax_error(Symbol cur_token)
  96. * <dd> This method is called as soon as syntax error is detected (but
  97. * before recovery is attempted). In the default implementation it
  98. * invokes: report_error("Syntax error", null);
  99. * <dt> void unrecovered_syntax_error(Symbol cur_token)
  100. * <dd> This method is called if syntax error recovery fails. In the default
  101. * implementation it invokes:<br>
  102. * report_fatal_error("Couldn't repair and continue parse", null);
  103. * </dl>
  104. *
  105. * @see com.sun.java_cup.internal.runtime.Symbol
  106. * @see com.sun.java_cup.internal.runtime.Symbol
  107. * @see com.sun.java_cup.internal.runtime.virtual_parse_stack
  108. * @version last updated: 7/3/96
  109. * @author Frank Flannery
  110. */
  111. public abstract class lr_parser {
  112. /*-----------------------------------------------------------*/
  113. /*--- Constructor(s) ----------------------------------------*/
  114. /*-----------------------------------------------------------*/
  115. /** Simple constructor. */
  116. public lr_parser()
  117. {
  118. /* nothing to do here */
  119. }
  120. /** Constructor that sets the default scanner. [CSA/davidm] */
  121. public lr_parser(Scanner s) {
  122. this(); /* in case default constructor someday does something */
  123. setScanner(s);
  124. }
  125. /*-----------------------------------------------------------*/
  126. /*--- (Access to) Static (Class) Variables ------------------*/
  127. /*-----------------------------------------------------------*/
  128. /** The default number of Symbols after an error we much match to consider
  129. * it recovered from.
  130. */
  131. protected final static int _error_sync_size = 3;
  132. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  133. /** The number of Symbols after an error we much match to consider it
  134. * recovered from.
  135. */
  136. protected int error_sync_size() {return _error_sync_size; }
  137. /*-----------------------------------------------------------*/
  138. /*--- (Access to) Instance Variables ------------------------*/
  139. /*-----------------------------------------------------------*/
  140. /** Table of production information (supplied by generated subclass).
  141. * This table contains one entry per production and is indexed by
  142. * the negative-encoded values (reduce actions) in the action_table.
  143. * Each entry has two parts, the index of the non-terminal on the
  144. * left hand side of the production, and the number of Symbols
  145. * on the right hand side.
  146. */
  147. public abstract short[][] production_table();
  148. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  149. /** The action table (supplied by generated subclass). This table is
  150. * indexed by state and terminal number indicating what action is to
  151. * be taken when the parser is in the given state (i.e., the given state
  152. * is on top of the stack) and the given terminal is next on the input.
  153. * States are indexed using the first dimension, however, the entries for
  154. * a given state are compacted and stored in adjacent index, value pairs
  155. * which are searched for rather than accessed directly (see get_action()).
  156. * The actions stored in the table will be either shifts, reduces, or
  157. * errors. Shifts are encoded as positive values (one greater than the
  158. * state shifted to). Reduces are encoded as negative values (one less
  159. * than the production reduced by). Error entries are denoted by zero.
  160. *
  161. * @see com.sun.java_cup.internal.runtime.lr_parser#get_action
  162. */
  163. public abstract short[][] action_table();
  164. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  165. /** The reduce-goto table (supplied by generated subclass). This
  166. * table is indexed by state and non-terminal number and contains
  167. * state numbers. States are indexed using the first dimension, however,
  168. * the entries for a given state are compacted and stored in adjacent
  169. * index, value pairs which are searched for rather than accessed
  170. * directly (see get_reduce()). When a reduce occurs, the handle
  171. * (corresponding to the RHS of the matched production) is popped off
  172. * the stack. The new top of stack indicates a state. This table is
  173. * then indexed by that state and the LHS of the reducing production to
  174. * indicate where to "shift" to.
  175. *
  176. * @see com.sun.java_cup.internal.runtime.lr_parser#get_reduce
  177. */
  178. public abstract short[][] reduce_table();
  179. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  180. /** The index of the start state (supplied by generated subclass). */
  181. public abstract int start_state();
  182. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  183. /** The index of the start production (supplied by generated subclass). */
  184. public abstract int start_production();
  185. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  186. /** The index of the end of file terminal Symbol (supplied by generated
  187. * subclass).
  188. */
  189. public abstract int EOF_sym();
  190. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  191. /** The index of the special error Symbol (supplied by generated subclass). */
  192. public abstract int error_sym();
  193. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  194. /** Internal flag to indicate when parser should quit. */
  195. protected boolean _done_parsing = false;
  196. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  197. /** This method is called to indicate that the parser should quit. This is
  198. * normally called by an accept action, but can be used to cancel parsing
  199. * early in other circumstances if desired.
  200. */
  201. public void done_parsing()
  202. {
  203. _done_parsing = true;
  204. }
  205. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  206. /* Global parse state shared by parse(), error recovery, and
  207. * debugging routines */
  208. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  209. /** Indication of the index for top of stack (for use by actions). */
  210. protected int tos;
  211. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  212. /** The current lookahead Symbol. */
  213. protected Symbol cur_token;
  214. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  215. /** The parse stack itself. */
  216. protected Stack stack = new Stack();
  217. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  218. /** Direct reference to the production table. */
  219. protected short[][] production_tab;
  220. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  221. /** Direct reference to the action table. */
  222. protected short[][] action_tab;
  223. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  224. /** Direct reference to the reduce-goto table. */
  225. protected short[][] reduce_tab;
  226. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  227. /** This is the scanner object used by the default implementation
  228. * of scan() to get Symbols. To avoid name conflicts with existing
  229. * code, this field is private. [CSA/davidm] */
  230. private Scanner _scanner;
  231. /**
  232. * Simple accessor method to set the default scanner.
  233. */
  234. public void setScanner(Scanner s) { _scanner = s; }
  235. /**
  236. * Simple accessor method to get the default scanner.
  237. */
  238. public Scanner getScanner() { return _scanner; }
  239. /*-----------------------------------------------------------*/
  240. /*--- General Methods ---------------------------------------*/
  241. /*-----------------------------------------------------------*/
  242. /** Perform a bit of user supplied action code (supplied by generated
  243. * subclass). Actions are indexed by an internal action number assigned
  244. * at parser generation time.
  245. *
  246. * @param act_num the internal index of the action to be performed.
  247. * @param parser the parser object we are acting for.
  248. * @param stack the parse stack of that object.
  249. * @param top the index of the top element of the parse stack.
  250. */
  251. public abstract Symbol do_action(
  252. int act_num,
  253. lr_parser parser,
  254. Stack stack,
  255. int top)
  256. throws java.lang.Exception;
  257. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  258. /** User code for initialization inside the parser. Typically this
  259. * initializes the scanner. This is called before the parser requests
  260. * the first Symbol. Here this is just a placeholder for subclasses that
  261. * might need this and we perform no action. This method is normally
  262. * overridden by the generated code using this contents of the "init with"
  263. * clause as its body.
  264. */
  265. public void user_init() throws java.lang.Exception { }
  266. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  267. /** Initialize the action object. This is called before the parser does
  268. * any parse actions. This is filled in by generated code to create
  269. * an object that encapsulates all action code.
  270. */
  271. protected abstract void init_actions() throws java.lang.Exception;
  272. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  273. /** Get the next Symbol from the input (supplied by generated subclass).
  274. * Once end of file has been reached, all subsequent calls to scan
  275. * should return an EOF Symbol (which is Symbol number 0). By default
  276. * this method returns getScanner().next_token(); this implementation
  277. * can be overriden by the generated parser using the code declared in
  278. * the "scan with" clause. Do not recycle objects; every call to
  279. * scan() should return a fresh object.
  280. */
  281. public Symbol scan() throws java.lang.Exception {
  282. return getScanner().next_token();
  283. }
  284. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  285. /** Report a fatal error. This method takes a message string and an
  286. * additional object (to be used by specializations implemented in
  287. * subclasses). Here in the base class a very simple implementation
  288. * is provided which reports the error then throws an exception.
  289. *
  290. * @param message an error message.
  291. * @param info an extra object reserved for use by specialized subclasses.
  292. */
  293. public void report_fatal_error(
  294. String message,
  295. Object info)
  296. throws java.lang.Exception
  297. {
  298. /* stop parsing (not really necessary since we throw an exception, but) */
  299. done_parsing();
  300. /* use the normal error message reporting to put out the message */
  301. report_error(message, info);
  302. /* throw an exception */
  303. throw new Exception("Can't recover from previous error(s)");
  304. }
  305. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  306. /** Report a non fatal error (or warning). This method takes a message
  307. * string and an additional object (to be used by specializations
  308. * implemented in subclasses). Here in the base class a very simple
  309. * implementation is provided which simply prints the message to
  310. * System.err.
  311. *
  312. * @param message an error message.
  313. * @param info an extra object reserved for use by specialized subclasses.
  314. */
  315. public void report_error(String message, Object info)
  316. {
  317. System.err.print(message);
  318. if (info instanceof Symbol)
  319. if (((Symbol)info).left != -1)
  320. System.err.println(" at character " + ((Symbol)info).left +
  321. " of input");
  322. else System.err.println("");
  323. else System.err.println("");
  324. }
  325. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  326. /** This method is called when a syntax error has been detected and recovery
  327. * is about to be invoked. Here in the base class we just emit a
  328. * "Syntax error" error message.
  329. *
  330. * @param cur_token the current lookahead Symbol.
  331. */
  332. public void syntax_error(Symbol cur_token)
  333. {
  334. report_error("Syntax error", cur_token);
  335. }
  336. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  337. /** This method is called if it is determined that syntax error recovery
  338. * has been unsuccessful. Here in the base class we report a fatal error.
  339. *
  340. * @param cur_token the current lookahead Symbol.
  341. */
  342. public void unrecovered_syntax_error(Symbol cur_token)
  343. throws java.lang.Exception
  344. {
  345. report_fatal_error("Couldn't repair and continue parse", cur_token);
  346. }
  347. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  348. /** Fetch an action from the action table. The table is broken up into
  349. * rows, one per state (rows are indexed directly by state number).
  350. * Within each row, a list of index, value pairs are given (as sequential
  351. * entries in the table), and the list is terminated by a default entry
  352. * (denoted with a Symbol index of -1). To find the proper entry in a row
  353. * we do a linear or binary search (depending on the size of the row).
  354. *
  355. * @param state the state index of the action being accessed.
  356. * @param sym the Symbol index of the action being accessed.
  357. */
  358. protected final short get_action(int state, int sym)
  359. {
  360. short tag;
  361. int first, last, probe;
  362. short[] row = action_tab[state];
  363. /* linear search if we are < 10 entries */
  364. if (row.length < 20)
  365. for (probe = 0; probe < row.length; probe++)
  366. {
  367. /* is this entry labeled with our Symbol or the default? */
  368. tag = row[probe++];
  369. if (tag == sym || tag == -1)
  370. {
  371. /* return the next entry */
  372. return row[probe];
  373. }
  374. }
  375. /* otherwise binary search */
  376. else
  377. {
  378. first = 0;
  379. last = (row.length-1)/2 - 1; /* leave out trailing default entry */
  380. while (first <= last)
  381. {
  382. probe = (first+last)/2;
  383. if (sym == row[probe*2])
  384. return row[probe*2+1];
  385. else if (sym > row[probe*2])
  386. first = probe+1;
  387. else
  388. last = probe-1;
  389. }
  390. /* not found, use the default at the end */
  391. return row[row.length-1];
  392. }
  393. /* shouldn't happened, but if we run off the end we return the
  394. default (error == 0) */
  395. return 0;
  396. }
  397. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  398. /** Fetch a state from the reduce-goto table. The table is broken up into
  399. * rows, one per state (rows are indexed directly by state number).
  400. * Within each row, a list of index, value pairs are given (as sequential
  401. * entries in the table), and the list is terminated by a default entry
  402. * (denoted with a Symbol index of -1). To find the proper entry in a row
  403. * we do a linear search.
  404. *
  405. * @param state the state index of the entry being accessed.
  406. * @param sym the Symbol index of the entry being accessed.
  407. */
  408. protected final short get_reduce(int state, int sym)
  409. {
  410. short tag;
  411. short[] row = reduce_tab[state];
  412. /* if we have a null row we go with the default */
  413. if (row == null)
  414. return -1;
  415. for (int probe = 0; probe < row.length; probe++)
  416. {
  417. /* is this entry labeled with our Symbol or the default? */
  418. tag = row[probe++];
  419. if (tag == sym || tag == -1)
  420. {
  421. /* return the next entry */
  422. return row[probe];
  423. }
  424. }
  425. /* if we run off the end we return the default (error == -1) */
  426. return -1;
  427. }
  428. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  429. /** This method provides the main parsing routine. It returns only when
  430. * done_parsing() has been called (typically because the parser has
  431. * accepted, or a fatal error has been reported). See the header
  432. * documentation for the class regarding how shift/reduce parsers operate
  433. * and how the various tables are used.
  434. */
  435. public Symbol parse() throws java.lang.Exception
  436. {
  437. /* the current action code */
  438. int act;
  439. /* the Symbol/stack element returned by a reduce */
  440. Symbol lhs_sym = null;
  441. /* information about production being reduced with */
  442. short handle_size, lhs_sym_num;
  443. /* set up direct reference to tables to drive the parser */
  444. production_tab = production_table();
  445. action_tab = action_table();
  446. reduce_tab = reduce_table();
  447. /* initialize the action encapsulation object */
  448. init_actions();
  449. /* do user initialization */
  450. user_init();
  451. /* get the first token */
  452. cur_token = scan();
  453. /* push dummy Symbol with start state to get us underway */
  454. stack.removeAllElements();
  455. stack.push(new Symbol(0, start_state()));
  456. tos = 0;
  457. /* continue until we are told to stop */
  458. for (_done_parsing = false; !_done_parsing; )
  459. {
  460. /* Check current token for freshness. */
  461. if (cur_token.used_by_parser)
  462. throw new Error("Symbol recycling detected (fix your scanner).");
  463. /* current state is always on the top of the stack */
  464. /* look up action out of the current state with the current input */
  465. act = get_action(((Symbol)stack.peek()).parse_state, cur_token.sym);
  466. /* decode the action -- > 0 encodes shift */
  467. if (act > 0)
  468. {
  469. /* shift to the encoded state by pushing it on the stack */
  470. cur_token.parse_state = act-1;
  471. cur_token.used_by_parser = true;
  472. stack.push(cur_token);
  473. tos++;
  474. /* advance to the next Symbol */
  475. cur_token = scan();
  476. }
  477. /* if its less than zero, then it encodes a reduce action */
  478. else if (act < 0)
  479. {
  480. /* perform the action for the reduce */
  481. lhs_sym = do_action((-act)-1, this, stack, tos);
  482. /* look up information about the production */
  483. lhs_sym_num = production_tab[(-act)-1][0];
  484. handle_size = production_tab[(-act)-1][1];
  485. /* pop the handle off the stack */
  486. for (int i = 0; i < handle_size; i++)
  487. {
  488. stack.pop();
  489. tos--;
  490. }
  491. /* look up the state to go to from the one popped back to */
  492. act = get_reduce(((Symbol)stack.peek()).parse_state, lhs_sym_num);
  493. /* shift to that state */
  494. lhs_sym.parse_state = act;
  495. lhs_sym.used_by_parser = true;
  496. stack.push(lhs_sym);
  497. tos++;
  498. }
  499. /* finally if the entry is zero, we have an error */
  500. else if (act == 0)
  501. {
  502. /* call user syntax error reporting routine */
  503. syntax_error(cur_token);
  504. /* try to error recover */
  505. if (!error_recovery(false))
  506. {
  507. /* if that fails give up with a fatal syntax error */
  508. unrecovered_syntax_error(cur_token);
  509. /* just in case that wasn't fatal enough, end parse */
  510. done_parsing();
  511. } else {
  512. lhs_sym = (Symbol)stack.peek();
  513. }
  514. }
  515. }
  516. return lhs_sym;
  517. }
  518. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  519. /** Write a debugging message to System.err for the debugging version
  520. * of the parser.
  521. *
  522. * @param mess the text of the debugging message.
  523. */
  524. public void debug_message(String mess)
  525. {
  526. System.err.println(mess);
  527. }
  528. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  529. /** Dump the parse stack for debugging purposes. */
  530. public void dump_stack()
  531. {
  532. if (stack == null)
  533. {
  534. debug_message("# Stack dump requested, but stack is null");
  535. return;
  536. }
  537. debug_message("============ Parse Stack Dump ============");
  538. /* dump the stack */
  539. for (int i=0; i<stack.size(); i++)
  540. {
  541. debug_message("Symbol: " + ((Symbol)stack.elementAt(i)).sym +
  542. " State: " + ((Symbol)stack.elementAt(i)).parse_state);
  543. }
  544. debug_message("==========================================");
  545. }
  546. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  547. /** Do debug output for a reduce.
  548. *
  549. * @param prod_num the production we are reducing with.
  550. * @param nt_num the index of the LHS non terminal.
  551. * @param rhs_size the size of the RHS.
  552. */
  553. public void debug_reduce(int prod_num, int nt_num, int rhs_size)
  554. {
  555. debug_message("# Reduce with prod #" + prod_num + " [NT=" + nt_num +
  556. ", " + "SZ=" + rhs_size + "]");
  557. }
  558. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  559. /** Do debug output for shift.
  560. *
  561. * @param shift_tkn the Symbol being shifted onto the stack.
  562. */
  563. public void debug_shift(Symbol shift_tkn)
  564. {
  565. debug_message("# Shift under term #" + shift_tkn.sym +
  566. " to state #" + shift_tkn.parse_state);
  567. }
  568. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  569. /** Do debug output for stack state. [CSA]
  570. */
  571. public void debug_stack() {
  572. StringBuffer sb=new StringBuffer("## STACK:");
  573. for (int i=0; i<stack.size(); i++) {
  574. Symbol s = (Symbol) stack.elementAt(i);
  575. sb.append(" <state "+s.parse_state+", sym "+s.sym+">");
  576. if ((i%3)==2 || (i==(stack.size()-1))) {
  577. debug_message(sb.toString());
  578. sb = new StringBuffer(" ");
  579. }
  580. }
  581. }
  582. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  583. /** Perform a parse with debugging output. This does exactly the
  584. * same things as parse(), except that it calls debug_shift() and
  585. * debug_reduce() when shift and reduce moves are taken by the parser
  586. * and produces various other debugging messages.
  587. */
  588. public Symbol debug_parse()
  589. throws java.lang.Exception
  590. {
  591. /* the current action code */
  592. int act;
  593. /* the Symbol/stack element returned by a reduce */
  594. Symbol lhs_sym = null;
  595. /* information about production being reduced with */
  596. short handle_size, lhs_sym_num;
  597. /* set up direct reference to tables to drive the parser */
  598. production_tab = production_table();
  599. action_tab = action_table();
  600. reduce_tab = reduce_table();
  601. debug_message("# Initializing parser");
  602. /* initialize the action encapsulation object */
  603. init_actions();
  604. /* do user initialization */
  605. user_init();
  606. /* the current Symbol */
  607. cur_token = scan();
  608. debug_message("# Current Symbol is #" + cur_token.sym);
  609. /* push dummy Symbol with start state to get us underway */
  610. stack.removeAllElements();
  611. stack.push(new Symbol(0, start_state()));
  612. tos = 0;
  613. /* continue until we are told to stop */
  614. for (_done_parsing = false; !_done_parsing; )
  615. {
  616. /* Check current token for freshness. */
  617. if (cur_token.used_by_parser)
  618. throw new Error("Symbol recycling detected (fix your scanner).");
  619. /* current state is always on the top of the stack */
  620. //debug_stack();
  621. /* look up action out of the current state with the current input */
  622. act = get_action(((Symbol)stack.peek()).parse_state, cur_token.sym);
  623. /* decode the action -- > 0 encodes shift */
  624. if (act > 0)
  625. {
  626. /* shift to the encoded state by pushing it on the stack */
  627. cur_token.parse_state = act-1;
  628. cur_token.used_by_parser = true;
  629. debug_shift(cur_token);
  630. stack.push(cur_token);
  631. tos++;
  632. /* advance to the next Symbol */
  633. cur_token = scan();
  634. debug_message("# Current token is " + cur_token);
  635. }
  636. /* if its less than zero, then it encodes a reduce action */
  637. else if (act < 0)
  638. {
  639. /* perform the action for the reduce */
  640. lhs_sym = do_action((-act)-1, this, stack, tos);
  641. /* look up information about the production */
  642. lhs_sym_num = production_tab[(-act)-1][0];
  643. handle_size = production_tab[(-act)-1][1];
  644. debug_reduce((-act)-1, lhs_sym_num, handle_size);
  645. /* pop the handle off the stack */
  646. for (int i = 0; i < handle_size; i++)
  647. {
  648. stack.pop();
  649. tos--;
  650. }
  651. /* look up the state to go to from the one popped back to */
  652. act = get_reduce(((Symbol)stack.peek()).parse_state, lhs_sym_num);
  653. debug_message("# Reduce rule: top state " +
  654. ((Symbol)stack.peek()).parse_state +
  655. ", lhs sym " + lhs_sym_num + " -> state " + act);
  656. /* shift to that state */
  657. lhs_sym.parse_state = act;
  658. lhs_sym.used_by_parser = true;
  659. stack.push(lhs_sym);
  660. tos++;
  661. debug_message("# Goto state #" + act);
  662. }
  663. /* finally if the entry is zero, we have an error */
  664. else if (act == 0)
  665. {
  666. /* call user syntax error reporting routine */
  667. syntax_error(cur_token);
  668. /* try to error recover */
  669. if (!error_recovery(true))
  670. {
  671. /* if that fails give up with a fatal syntax error */
  672. unrecovered_syntax_error(cur_token);
  673. /* just in case that wasn't fatal enough, end parse */
  674. done_parsing();
  675. } else {
  676. lhs_sym = (Symbol)stack.peek();
  677. }
  678. }
  679. }
  680. return lhs_sym;
  681. }
  682. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  683. /* Error recovery code */
  684. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  685. /** Attempt to recover from a syntax error. This returns false if recovery
  686. * fails, true if it succeeds. Recovery happens in 4 steps. First we
  687. * pop the parse stack down to a point at which we have a shift out
  688. * of the top-most state on the error Symbol. This represents the
  689. * initial error recovery configuration. If no such configuration is
  690. * found, then we fail. Next a small number of "lookahead" or "parse
  691. * ahead" Symbols are read into a buffer. The size of this buffer is
  692. * determined by error_sync_size() and determines how many Symbols beyond
  693. * the error must be matched to consider the recovery a success. Next,
  694. * we begin to discard Symbols in attempt to get past the point of error
  695. * to a point where we can continue parsing. After each Symbol, we attempt
  696. * to "parse ahead" though the buffered lookahead Symbols. The "parse ahead"
  697. * process simulates that actual parse, but does not modify the real
  698. * parser's configuration, nor execute any actions. If we can parse all
  699. * the stored Symbols without error, then the recovery is considered a
  700. * success. Once a successful recovery point is determined, we do an
  701. * actual parse over the stored input -- modifying the real parse
  702. * configuration and executing all actions. Finally, we return the the
  703. * normal parser to continue with the overall parse.
  704. *
  705. * @param debug should we produce debugging messages as we parse.
  706. */
  707. protected boolean error_recovery(boolean debug)
  708. throws java.lang.Exception
  709. {
  710. if (debug) debug_message("# Attempting error recovery");
  711. /* first pop the stack back into a state that can shift on error and
  712. do that shift (if that fails, we fail) */
  713. if (!find_recovery_config(debug))
  714. {
  715. if (debug) debug_message("# Error recovery fails");
  716. return false;
  717. }
  718. /* read ahead to create lookahead we can parse multiple times */
  719. read_lookahead();
  720. /* repeatedly try to parse forward until we make it the required dist */
  721. for (;;)
  722. {
  723. /* try to parse forward, if it makes it, bail out of loop */
  724. if (debug) debug_message("# Trying to parse ahead");
  725. if (try_parse_ahead(debug))
  726. {
  727. break;
  728. }
  729. /* if we are now at EOF, we have failed */
  730. if (lookahead[0].sym == EOF_sym())
  731. {
  732. if (debug) debug_message("# Error recovery fails at EOF");
  733. return false;
  734. }
  735. /* otherwise, we consume another Symbol and try again */
  736. if (debug)
  737. debug_message("# Consuming Symbol #" + cur_err_token().sym);
  738. restart_lookahead();
  739. }
  740. /* we have consumed to a point where we can parse forward */
  741. if (debug) debug_message("# Parse-ahead ok, going back to normal parse");
  742. /* do the real parse (including actions) across the lookahead */
  743. parse_lookahead(debug);
  744. /* we have success */
  745. return true;
  746. }
  747. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  748. /** Determine if we can shift under the special error Symbol out of the
  749. * state currently on the top of the (real) parse stack.
  750. */
  751. protected boolean shift_under_error()
  752. {
  753. /* is there a shift under error Symbol */
  754. return get_action(((Symbol)stack.peek()).parse_state, error_sym()) > 0;
  755. }
  756. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  757. /** Put the (real) parse stack into error recovery configuration by
  758. * popping the stack down to a state that can shift on the special
  759. * error Symbol, then doing the shift. If no suitable state exists on
  760. * the stack we return false
  761. *
  762. * @param debug should we produce debugging messages as we parse.
  763. */
  764. protected boolean find_recovery_config(boolean debug)
  765. {
  766. Symbol error_token;
  767. int act;
  768. if (debug) debug_message("# Finding recovery state on stack");
  769. /* Remember the right-position of the top symbol on the stack */
  770. int right_pos = ((Symbol)stack.peek()).right;
  771. int left_pos = ((Symbol)stack.peek()).left;
  772. /* pop down until we can shift under error Symbol */
  773. while (!shift_under_error())
  774. {
  775. /* pop the stack */
  776. if (debug)
  777. debug_message("# Pop stack by one, state was # " +
  778. ((Symbol)stack.peek()).parse_state);
  779. left_pos = ((Symbol)stack.pop()).left;
  780. tos--;
  781. /* if we have hit bottom, we fail */
  782. if (stack.empty())
  783. {
  784. if (debug) debug_message("# No recovery state found on stack");
  785. return false;
  786. }
  787. }
  788. /* state on top of the stack can shift under error, find the shift */
  789. act = get_action(((Symbol)stack.peek()).parse_state, error_sym());
  790. if (debug)
  791. {
  792. debug_message("# Recover state found (#" +
  793. ((Symbol)stack.peek()).parse_state + ")");
  794. debug_message("# Shifting on error to state #" + (act-1));
  795. }
  796. /* build and shift a special error Symbol */
  797. error_token = new Symbol(error_sym(), left_pos, right_pos);
  798. error_token.parse_state = act-1;
  799. error_token.used_by_parser = true;
  800. stack.push(error_token);
  801. tos++;
  802. return true;
  803. }
  804. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  805. /** Lookahead Symbols used for attempting error recovery "parse aheads". */
  806. protected Symbol lookahead[];
  807. /** Position in lookahead input buffer used for "parse ahead". */
  808. protected int lookahead_pos;
  809. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  810. /** Read from input to establish our buffer of "parse ahead" lookahead
  811. * Symbols.
  812. */
  813. protected void read_lookahead() throws java.lang.Exception
  814. {
  815. /* create the lookahead array */
  816. lookahead = new Symbol[error_sync_size()];
  817. /* fill in the array */
  818. for (int i = 0; i < error_sync_size(); i++)
  819. {
  820. lookahead[i] = cur_token;
  821. cur_token = scan();
  822. }
  823. /* start at the beginning */
  824. lookahead_pos = 0;
  825. }
  826. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  827. /** Return the current lookahead in our error "parse ahead" buffer. */
  828. protected Symbol cur_err_token() { return lookahead[lookahead_pos]; }
  829. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  830. /** Advance to next "parse ahead" input Symbol. Return true if we have
  831. * input to advance to, false otherwise.
  832. */
  833. protected boolean advance_lookahead()
  834. {
  835. /* advance the input location */
  836. lookahead_pos++;
  837. /* return true if we didn't go off the end */
  838. return lookahead_pos < error_sync_size();
  839. }
  840. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  841. /** Reset the parse ahead input to one Symbol past where we started error
  842. * recovery (this consumes one new Symbol from the real input).
  843. */
  844. protected void restart_lookahead() throws java.lang.Exception
  845. {
  846. /* move all the existing input over */
  847. for (int i = 1; i < error_sync_size(); i++)
  848. lookahead[i-1] = lookahead[i];
  849. /* read a new Symbol into the last spot */
  850. cur_token = scan();
  851. lookahead[error_sync_size()-1] = cur_token;
  852. /* reset our internal position marker */
  853. lookahead_pos = 0;
  854. }
  855. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  856. /** Do a simulated parse forward (a "parse ahead") from the current
  857. * stack configuration using stored lookahead input and a virtual parse
  858. * stack. Return true if we make it all the way through the stored
  859. * lookahead input without error. This basically simulates the action of
  860. * parse() using only our saved "parse ahead" input, and not executing any
  861. * actions.
  862. *
  863. * @param debug should we produce debugging messages as we parse.
  864. */
  865. protected boolean try_parse_ahead(boolean debug)
  866. throws java.lang.Exception
  867. {
  868. int act;
  869. short lhs, rhs_size;
  870. /* create a virtual stack from the real parse stack */
  871. virtual_parse_stack vstack = new virtual_parse_stack(stack);
  872. /* parse until we fail or get past the lookahead input */
  873. for (;;)
  874. {
  875. /* look up the action from the current state (on top of stack) */
  876. act = get_action(vstack.top(), cur_err_token().sym);
  877. /* if its an error, we fail */
  878. if (act == 0) return false;
  879. /* > 0 encodes a shift */
  880. if (act > 0)
  881. {
  882. /* push the new state on the stack */
  883. vstack.push(act-1);
  884. if (debug) debug_message("# Parse-ahead shifts Symbol #" +
  885. cur_err_token().sym + " into state #" + (act-1));
  886. /* advance simulated input, if we run off the end, we are done */
  887. if (!advance_lookahead()) return true;
  888. }
  889. /* < 0 encodes a reduce */
  890. else
  891. {
  892. /* if this is a reduce with the start production we are done */
  893. if ((-act)-1 == start_production())
  894. {
  895. if (debug) debug_message("# Parse-ahead accepts");
  896. return true;
  897. }
  898. /* get the lhs Symbol and the rhs size */
  899. lhs = production_tab[(-act)-1][0];
  900. rhs_size = production_tab[(-act)-1][1];
  901. /* pop handle off the stack */
  902. for (int i = 0; i < rhs_size; i++)
  903. vstack.pop();
  904. if (debug)
  905. debug_message("# Parse-ahead reduces: handle size = " +
  906. rhs_size + " lhs = #" + lhs + " from state #" + vstack.top());
  907. /* look up goto and push it onto the stack */
  908. vstack.push(get_reduce(vstack.top(), lhs));
  909. if (debug)
  910. debug_message("# Goto state #" + vstack.top());
  911. }
  912. }
  913. }
  914. /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
  915. /** Parse forward using stored lookahead Symbols. In this case we have
  916. * already verified that parsing will make it through the stored lookahead
  917. * Symbols and we are now getting back to the point at which we can hand
  918. * control back to the normal parser. Consequently, this version of the
  919. * parser performs all actions and modifies the real parse configuration.
  920. * This returns once we have consumed all the stored input or we accept.
  921. *
  922. * @param debug should we produce debugging messages as we parse.
  923. */
  924. protected void parse_lookahead(boolean debug)
  925. throws java.lang.Exception
  926. {
  927. /* the current action code */
  928. int act;
  929. /* the Symbol/stack element returned by a reduce */
  930. Symbol lhs_sym = null;
  931. /* information about production being reduced with */
  932. short handle_size, lhs_sym_num;
  933. /* restart the saved input at the beginning */
  934. lookahead_pos = 0;
  935. if (debug)
  936. {
  937. debug_message("# Reparsing saved input with actions");
  938. debug_message("# Current Symbol is #" + cur_err_token().sym);
  939. debug_message("# Current state is #" +
  940. ((Symbol)stack.peek()).parse_state);
  941. }
  942. /* continue until we accept or have read all lookahead input */
  943. while(!_done_parsing)
  944. {
  945. /* current state is always on the top of the stack */
  946. /* look up action out of the current state with the current input */
  947. act =
  948. get_action(((Symbol)stack.peek()).parse_state, cur_err_token().sym);
  949. /* decode the action -- > 0 encodes shift */
  950. if (act > 0)
  951. {
  952. /* shift to the encoded state by pushing it on the stack */
  953. cur_err_token().parse_state = act-1;
  954. cur_err_token().used_by_parser = true;
  955. if (debug) debug_shift(cur_err_token());
  956. stack.push(cur_err_token());
  957. tos++;
  958. /* advance to the next Symbol, if there is none, we are done */
  959. if (!advance_lookahead())
  960. {
  961. if (debug) debug_message("# Completed reparse");
  962. /* scan next Symbol so we can continue parse */
  963. // BUGFIX by Chris Harris <ckharris@ucsd.edu>:
  964. // correct a one-off error by commenting out
  965. // this next line.
  966. /*cur_token = scan();*/
  967. /* go back to normal parser */
  968. return;
  969. }
  970. if (debug)
  971. debug_message("# Current Symbol is #" + cur_err_token().sym);
  972. }
  973. /* if its less than zero, then it encodes a reduce action */
  974. else if (act < 0)
  975. {
  976. /* perform the action for the reduce */
  977. lhs_sym = do_action((-act)-1, this, stack, tos);
  978. /* look up information about the production */
  979. lhs_sym_num = production_tab[(-act)-1][0];
  980. handle_size = production_tab[(-act)-1][1];
  981. if (debug) debug_reduce((-act)-1, lhs_sym_num, handle_size);
  982. /* pop the handle off the stack */
  983. for (int i = 0; i < handle_size; i++)
  984. {
  985. stack.pop();
  986. tos--;
  987. }
  988. /* look up the state to go to from the one popped back to */
  989. act = get_reduce(((Symbol)stack.peek()).parse_state, lhs_sym_num);
  990. /* shift to that state */
  991. lhs_sym.parse_state = act;
  992. lhs_sym.used_by_parser = true;
  993. stack.push(lhs_sym);
  994. tos++;
  995. if (debug) debug_message("# Goto state #" + act);
  996. }
  997. /* finally if the entry is zero, we have an error
  998. (shouldn't happen here, but...)*/
  999. else if (act == 0)
  1000. {
  1001. report_fatal_error("Syntax error", lhs_sym);
  1002. return;
  1003. }
  1004. }
  1005. }
  1006. /*-----------------------------------------------------------*/
  1007. /** Utility function: unpacks parse tables from strings */
  1008. protected static short[][] unpackFromStrings(String[] sa)
  1009. {
  1010. // Concatanate initialization strings.
  1011. StringBuffer sb = new StringBuffer(sa[0]);
  1012. for (int i=1; i<sa.length; i++)
  1013. sb.append(sa[i]);
  1014. int n=0; // location in initialization string
  1015. int size1 = (((int)sb.charAt(n))<<16) | ((int)sb.charAt(n+1)); n+=2;
  1016. short[][] result = new short[size1][];
  1017. for (int i=0; i<size1; i++) {
  1018. int size2 = (((int)sb.charAt(n))<<16) | ((int)sb.charAt(n+1)); n+=2;
  1019. result[i] = new short[size2];
  1020. for (int j=0; j<size2; j++)
  1021. result[i][j] = (short) (sb.charAt(n++)-2);
  1022. }
  1023. return result;
  1024. }
  1025. }