1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xalan" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, Lotus
  53. * Development Corporation., http://www.lotus.com. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package org.apache.xpath.compiler;
  58. import java.util.Vector;
  59. import org.apache.xml.utils.PrefixResolver;
  60. import org.apache.xpath.res.XPATHErrorResources;
  61. /**
  62. * This class is in charge of lexical processing of the XPath
  63. * expression into tokens.
  64. */
  65. class Lexer
  66. {
  67. /**
  68. * The target XPath.
  69. */
  70. private Compiler m_compiler;
  71. /**
  72. * The prefix resolver to map prefixes to namespaces in the XPath.
  73. */
  74. PrefixResolver m_namespaceContext;
  75. /**
  76. * The XPath processor object.
  77. */
  78. XPathParser m_processor;
  79. /**
  80. * This value is added to each element name in the TARGETEXTRA
  81. * that is a 'target' (right-most top-level element name).
  82. */
  83. static final int TARGETEXTRA = 10000;
  84. /**
  85. * Ignore this, it is going away.
  86. * This holds a map to the m_tokenQueue that tells where the top-level elements are.
  87. * It is used for pattern matching so the m_tokenQueue can be walked backwards.
  88. * Each element that is a 'target', (right-most top level element name) has
  89. * TARGETEXTRA added to it.
  90. *
  91. */
  92. private int m_patternMap[] = new int[100];
  93. /**
  94. * Ignore this, it is going away.
  95. * The number of elements that m_patternMap maps;
  96. */
  97. private int m_patternMapSize;
  98. /**
  99. * Create a Lexer object.
  100. *
  101. * @param compiler The owning compiler for this lexer.
  102. * @param resolver The prefix resolver for mapping qualified name prefixes
  103. * to namespace URIs.
  104. * @param xpathProcessor The parser that is processing strings to opcodes.
  105. */
  106. Lexer(Compiler compiler, PrefixResolver resolver,
  107. XPathParser xpathProcessor)
  108. {
  109. m_compiler = compiler;
  110. m_namespaceContext = resolver;
  111. m_processor = xpathProcessor;
  112. }
  113. /**
  114. * Walk through the expression and build a token queue, and a map of the top-level
  115. * elements.
  116. * @param pat XSLT Expression.
  117. *
  118. * @throws javax.xml.transform.TransformerException
  119. */
  120. void tokenize(String pat) throws javax.xml.transform.TransformerException
  121. {
  122. tokenize(pat, null);
  123. }
  124. /**
  125. * Walk through the expression and build a token queue, and a map of the top-level
  126. * elements.
  127. * @param pat XSLT Expression.
  128. * @param targetStrings Vector to hold Strings, may be null.
  129. *
  130. * @throws javax.xml.transform.TransformerException
  131. */
  132. void tokenize(String pat, Vector targetStrings)
  133. throws javax.xml.transform.TransformerException
  134. {
  135. m_compiler.m_currentPattern = pat;
  136. m_patternMapSize = 0;
  137. // This needs to grow too.
  138. m_compiler.m_opMap = new OpMapVector(OpMap.MAXTOKENQUEUESIZE * 5, OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH);
  139. int nChars = pat.length();
  140. int startSubstring = -1;
  141. int posOfNSSep = -1;
  142. boolean isStartOfPat = true;
  143. boolean isAttrName = false;
  144. boolean isNum = false;
  145. // Nesting of '[' so we can know if the given element should be
  146. // counted inside the m_patternMap.
  147. int nesting = 0;
  148. // char[] chars = pat.toCharArray();
  149. for (int i = 0; i < nChars; i++)
  150. {
  151. char c = pat.charAt(i);
  152. switch (c)
  153. {
  154. case '\"' :
  155. {
  156. if (startSubstring != -1)
  157. {
  158. isNum = false;
  159. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  160. isAttrName = false;
  161. if (-1 != posOfNSSep)
  162. {
  163. posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
  164. }
  165. else
  166. {
  167. addToTokenQueue(pat.substring(startSubstring, i));
  168. }
  169. }
  170. startSubstring = i;
  171. for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
  172. if (c == '\"' && i < nChars)
  173. {
  174. addToTokenQueue(pat.substring(startSubstring, i + 1));
  175. startSubstring = -1;
  176. }
  177. else
  178. {
  179. m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
  180. null); //"misquoted literal... expected double quote!");
  181. }
  182. }
  183. break;
  184. case '\'' :
  185. if (startSubstring != -1)
  186. {
  187. isNum = false;
  188. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  189. isAttrName = false;
  190. if (-1 != posOfNSSep)
  191. {
  192. posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
  193. }
  194. else
  195. {
  196. addToTokenQueue(pat.substring(startSubstring, i));
  197. }
  198. }
  199. startSubstring = i;
  200. for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
  201. if (c == '\'' && i < nChars)
  202. {
  203. addToTokenQueue(pat.substring(startSubstring, i + 1));
  204. startSubstring = -1;
  205. }
  206. else
  207. {
  208. m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
  209. null); //"misquoted literal... expected single quote!");
  210. }
  211. break;
  212. case 0x0A :
  213. case 0x0D :
  214. case ' ' :
  215. case '\t' :
  216. if (startSubstring != -1)
  217. {
  218. isNum = false;
  219. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  220. isAttrName = false;
  221. if (-1 != posOfNSSep)
  222. {
  223. posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
  224. }
  225. else
  226. {
  227. addToTokenQueue(pat.substring(startSubstring, i));
  228. }
  229. startSubstring = -1;
  230. }
  231. break;
  232. case '@' :
  233. isAttrName = true;
  234. // fall-through on purpose
  235. case '-' :
  236. if ('-' == c)
  237. {
  238. if (!(isNum || (startSubstring == -1)))
  239. {
  240. break;
  241. }
  242. isNum = false;
  243. }
  244. // fall-through on purpose
  245. case '(' :
  246. case '[' :
  247. case ')' :
  248. case ']' :
  249. case '|' :
  250. case '/' :
  251. case '*' :
  252. case '+' :
  253. case '=' :
  254. case ',' :
  255. case '\\' : // Unused at the moment
  256. case '^' : // Unused at the moment
  257. case '!' : // Unused at the moment
  258. case '$' :
  259. case '<' :
  260. case '>' :
  261. if (startSubstring != -1)
  262. {
  263. isNum = false;
  264. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  265. isAttrName = false;
  266. if (-1 != posOfNSSep)
  267. {
  268. posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
  269. }
  270. else
  271. {
  272. addToTokenQueue(pat.substring(startSubstring, i));
  273. }
  274. startSubstring = -1;
  275. }
  276. else if (('/' == c) && isStartOfPat)
  277. {
  278. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  279. }
  280. else if ('*' == c)
  281. {
  282. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  283. isAttrName = false;
  284. }
  285. if (0 == nesting)
  286. {
  287. if ('|' == c)
  288. {
  289. if (null != targetStrings)
  290. {
  291. recordTokenString(targetStrings);
  292. }
  293. isStartOfPat = true;
  294. }
  295. }
  296. if ((')' == c) || (']' == c))
  297. {
  298. nesting--;
  299. }
  300. else if (('(' == c) || ('[' == c))
  301. {
  302. nesting++;
  303. }
  304. addToTokenQueue(pat.substring(i, i + 1));
  305. break;
  306. case ':' :
  307. if (i>0)
  308. {
  309. if (posOfNSSep == (i - 1))
  310. {
  311. if (startSubstring != -1)
  312. {
  313. if (startSubstring < (i - 1))
  314. addToTokenQueue(pat.substring(startSubstring, i - 1));
  315. }
  316. isNum = false;
  317. isAttrName = false;
  318. startSubstring = -1;
  319. posOfNSSep = -1;
  320. addToTokenQueue(pat.substring(i - 1, i + 1));
  321. break;
  322. }
  323. else
  324. {
  325. posOfNSSep = i;
  326. }
  327. }
  328. // fall through on purpose
  329. default :
  330. if (-1 == startSubstring)
  331. {
  332. startSubstring = i;
  333. isNum = Character.isDigit(c);
  334. }
  335. else if (isNum)
  336. {
  337. isNum = Character.isDigit(c);
  338. }
  339. }
  340. }
  341. if (startSubstring != -1)
  342. {
  343. isNum = false;
  344. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  345. if ((-1 != posOfNSSep) ||
  346. ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
  347. {
  348. posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
  349. }
  350. else
  351. {
  352. addToTokenQueue(pat.substring(startSubstring, nChars));
  353. }
  354. }
  355. if (0 == m_compiler.getTokenQueueSize())
  356. {
  357. m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null); //"Empty expression!");
  358. }
  359. else if (null != targetStrings)
  360. {
  361. recordTokenString(targetStrings);
  362. }
  363. m_processor.m_queueMark = 0;
  364. }
  365. /**
  366. * Record the current position on the token queue as long as
  367. * this is a top-level element. Must be called before the
  368. * next token is added to the m_tokenQueue.
  369. *
  370. * @param nesting The nesting count for the pattern element.
  371. * @param isStart true if this is the start of a pattern.
  372. * @param isAttrName true if we have determined that this is an attribute name.
  373. *
  374. * @return true if this is the start of a pattern.
  375. */
  376. private boolean mapPatternElemPos(int nesting, boolean isStart,
  377. boolean isAttrName)
  378. {
  379. if (0 == nesting)
  380. {
  381. if(m_patternMapSize >= m_patternMap.length)
  382. {
  383. int patternMap[] = m_patternMap;
  384. int len = m_patternMap.length;
  385. m_patternMap = new int[m_patternMapSize + 100];
  386. System.arraycopy(patternMap, 0, m_patternMap, 0, len);
  387. }
  388. if (!isStart)
  389. {
  390. m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
  391. }
  392. m_patternMap[m_patternMapSize] =
  393. (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
  394. m_patternMapSize++;
  395. isStart = false;
  396. }
  397. return isStart;
  398. }
  399. /**
  400. * Given a map pos, return the corresponding token queue pos.
  401. *
  402. * @param i The index in the m_patternMap.
  403. *
  404. * @return the token queue position.
  405. */
  406. private int getTokenQueuePosFromMap(int i)
  407. {
  408. int pos = m_patternMap[i];
  409. return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
  410. }
  411. /**
  412. * Reset token queue mark and m_token to a
  413. * given position.
  414. * @param mark The new position.
  415. */
  416. private final void resetTokenMark(int mark)
  417. {
  418. int qsz = m_compiler.getTokenQueueSize();
  419. m_processor.m_queueMark = (mark > 0)
  420. ? ((mark <= qsz) ? mark - 1 : mark) : 0;
  421. if (m_processor.m_queueMark < qsz)
  422. {
  423. m_processor.m_token =
  424. (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
  425. m_processor.m_tokenChar = m_processor.m_token.charAt(0);
  426. }
  427. else
  428. {
  429. m_processor.m_token = null;
  430. m_processor.m_tokenChar = 0;
  431. }
  432. }
  433. /**
  434. * Given a string, return the corresponding keyword token.
  435. *
  436. * @param key The keyword.
  437. *
  438. * @return An opcode value.
  439. */
  440. final int getKeywordToken(String key)
  441. {
  442. int tok;
  443. try
  444. {
  445. Integer itok = (Integer) Keywords.m_keywords.get(key);
  446. tok = (null != itok) ? itok.intValue() : 0;
  447. }
  448. catch (NullPointerException npe)
  449. {
  450. tok = 0;
  451. }
  452. catch (ClassCastException cce)
  453. {
  454. tok = 0;
  455. }
  456. return tok;
  457. }
  458. /**
  459. * Record the current token in the passed vector.
  460. *
  461. * @param targetStrings Vector of string.
  462. */
  463. private void recordTokenString(Vector targetStrings)
  464. {
  465. int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
  466. resetTokenMark(tokPos + 1);
  467. if (m_processor.lookahead('(', 1))
  468. {
  469. int tok = getKeywordToken(m_processor.m_token);
  470. switch (tok)
  471. {
  472. case OpCodes.NODETYPE_COMMENT :
  473. targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT);
  474. break;
  475. case OpCodes.NODETYPE_TEXT :
  476. targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT);
  477. break;
  478. case OpCodes.NODETYPE_NODE :
  479. targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
  480. break;
  481. case OpCodes.NODETYPE_ROOT :
  482. targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT);
  483. break;
  484. case OpCodes.NODETYPE_ANYELEMENT :
  485. targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
  486. break;
  487. case OpCodes.NODETYPE_PI :
  488. targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
  489. break;
  490. default :
  491. targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
  492. }
  493. }
  494. else
  495. {
  496. if (m_processor.tokenIs('@'))
  497. {
  498. tokPos++;
  499. resetTokenMark(tokPos + 1);
  500. }
  501. if (m_processor.lookahead(':', 1))
  502. {
  503. tokPos += 2;
  504. }
  505. targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos));
  506. }
  507. }
  508. /**
  509. * Add a token to the token queue.
  510. *
  511. *
  512. * @param s The token.
  513. */
  514. private final void addToTokenQueue(String s)
  515. {
  516. m_compiler.getTokenQueue().addElement(s);
  517. }
  518. /**
  519. * When a seperator token is found, see if there's a element name or
  520. * the like to map.
  521. *
  522. * @param pat The XPath name string.
  523. * @param startSubstring The start of the name string.
  524. * @param posOfNSSep The position of the namespace seperator (':').
  525. * @param posOfScan The end of the name index.
  526. *
  527. * @throws javax.xml.transform.TransformerException
  528. *
  529. * @return -1 always.
  530. */
  531. private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
  532. int posOfScan)
  533. throws javax.xml.transform.TransformerException
  534. {
  535. String prefix = "";
  536. if ((startSubstring >= 0) && (posOfNSSep >= 0))
  537. {
  538. prefix = pat.substring(startSubstring, posOfNSSep);
  539. }
  540. String uName;
  541. if ((null != m_namespaceContext) &&!prefix.equals("*")
  542. &&!prefix.equals("xmlns"))
  543. {
  544. try
  545. {
  546. if (prefix.length() > 0)
  547. uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
  548. prefix);
  549. else
  550. {
  551. // Assume last was wildcard. This is not legal according
  552. // to the draft. Set the below to true to make namespace
  553. // wildcards work.
  554. if (false)
  555. {
  556. addToTokenQueue(":");
  557. String s = pat.substring(posOfNSSep + 1, posOfScan);
  558. if (s.length() > 0)
  559. addToTokenQueue(s);
  560. return -1;
  561. }
  562. else
  563. {
  564. uName =
  565. ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
  566. prefix);
  567. }
  568. }
  569. }
  570. catch (ClassCastException cce)
  571. {
  572. uName = m_namespaceContext.getNamespaceForPrefix(prefix);
  573. }
  574. }
  575. else
  576. {
  577. uName = prefix;
  578. }
  579. if ((null != uName) && (uName.length() > 0))
  580. {
  581. addToTokenQueue(uName);
  582. addToTokenQueue(":");
  583. String s = pat.substring(posOfNSSep + 1, posOfScan);
  584. if (s.length() > 0)
  585. addToTokenQueue(s);
  586. }
  587. else
  588. {
  589. // error("Could not locate namespace for prefix: "+prefix);
  590. m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
  591. new String[] {prefix}); //"Prefix must resolve to a namespace: {0}";
  592. /*** Old code commented out 10-Jan-2001
  593. addToTokenQueue(prefix);
  594. addToTokenQueue(":");
  595. String s = pat.substring(posOfNSSep + 1, posOfScan);
  596. if (s.length() > 0)
  597. addToTokenQueue(s);
  598. ***/
  599. }
  600. return -1;
  601. }
  602. }