1. /*
  2. * Copyright 1999-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * $Id: Lexer.java,v 1.14 2004/02/17 04:32:49 minchau Exp $
  18. */
  19. package com.sun.org.apache.xpath.internal.compiler;
  20. import java.util.Vector;
  21. import com.sun.org.apache.xml.internal.utils.PrefixResolver;
  22. import com.sun.org.apache.xpath.internal.res.XPATHErrorResources;
  23. /**
  24. * This class is in charge of lexical processing of the XPath
  25. * expression into tokens.
  26. */
  27. class Lexer
  28. {
  29. /**
  30. * The target XPath.
  31. */
  32. private Compiler m_compiler;
  33. /**
  34. * The prefix resolver to map prefixes to namespaces in the XPath.
  35. */
  36. PrefixResolver m_namespaceContext;
  37. /**
  38. * The XPath processor object.
  39. */
  40. XPathParser m_processor;
  41. /**
  42. * This value is added to each element name in the TARGETEXTRA
  43. * that is a 'target' (right-most top-level element name).
  44. */
  45. static final int TARGETEXTRA = 10000;
  46. /**
  47. * Ignore this, it is going away.
  48. * This holds a map to the m_tokenQueue that tells where the top-level elements are.
  49. * It is used for pattern matching so the m_tokenQueue can be walked backwards.
  50. * Each element that is a 'target', (right-most top level element name) has
  51. * TARGETEXTRA added to it.
  52. *
  53. */
  54. private int m_patternMap[] = new int[100];
  55. /**
  56. * Ignore this, it is going away.
  57. * The number of elements that m_patternMap maps;
  58. */
  59. private int m_patternMapSize;
  60. /**
  61. * Create a Lexer object.
  62. *
  63. * @param compiler The owning compiler for this lexer.
  64. * @param resolver The prefix resolver for mapping qualified name prefixes
  65. * to namespace URIs.
  66. * @param xpathProcessor The parser that is processing strings to opcodes.
  67. */
  68. Lexer(Compiler compiler, PrefixResolver resolver,
  69. XPathParser xpathProcessor)
  70. {
  71. m_compiler = compiler;
  72. m_namespaceContext = resolver;
  73. m_processor = xpathProcessor;
  74. }
  75. /**
  76. * Walk through the expression and build a token queue, and a map of the top-level
  77. * elements.
  78. * @param pat XSLT Expression.
  79. *
  80. * @throws javax.xml.transform.TransformerException
  81. */
  82. void tokenize(String pat) throws javax.xml.transform.TransformerException
  83. {
  84. tokenize(pat, null);
  85. }
  86. /**
  87. * Walk through the expression and build a token queue, and a map of the top-level
  88. * elements.
  89. * @param pat XSLT Expression.
  90. * @param targetStrings Vector to hold Strings, may be null.
  91. *
  92. * @throws javax.xml.transform.TransformerException
  93. */
  94. void tokenize(String pat, Vector targetStrings)
  95. throws javax.xml.transform.TransformerException
  96. {
  97. m_compiler.m_currentPattern = pat;
  98. m_patternMapSize = 0;
  99. // This needs to grow too.
  100. m_compiler.m_opMap = new OpMapVector(OpMap.MAXTOKENQUEUESIZE * 5, OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH);
  101. int nChars = pat.length();
  102. int startSubstring = -1;
  103. int posOfNSSep = -1;
  104. boolean isStartOfPat = true;
  105. boolean isAttrName = false;
  106. boolean isNum = false;
  107. // Nesting of '[' so we can know if the given element should be
  108. // counted inside the m_patternMap.
  109. int nesting = 0;
  110. // char[] chars = pat.toCharArray();
  111. for (int i = 0; i < nChars; i++)
  112. {
  113. char c = pat.charAt(i);
  114. switch (c)
  115. {
  116. case '\"' :
  117. {
  118. if (startSubstring != -1)
  119. {
  120. isNum = false;
  121. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  122. isAttrName = false;
  123. if (-1 != posOfNSSep)
  124. {
  125. posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
  126. }
  127. else
  128. {
  129. addToTokenQueue(pat.substring(startSubstring, i));
  130. }
  131. }
  132. startSubstring = i;
  133. for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
  134. if (c == '\"' && i < nChars)
  135. {
  136. addToTokenQueue(pat.substring(startSubstring, i + 1));
  137. startSubstring = -1;
  138. }
  139. else
  140. {
  141. m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
  142. null); //"misquoted literal... expected double quote!");
  143. }
  144. }
  145. break;
  146. case '\'' :
  147. if (startSubstring != -1)
  148. {
  149. isNum = false;
  150. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  151. isAttrName = false;
  152. if (-1 != posOfNSSep)
  153. {
  154. posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
  155. }
  156. else
  157. {
  158. addToTokenQueue(pat.substring(startSubstring, i));
  159. }
  160. }
  161. startSubstring = i;
  162. for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
  163. if (c == '\'' && i < nChars)
  164. {
  165. addToTokenQueue(pat.substring(startSubstring, i + 1));
  166. startSubstring = -1;
  167. }
  168. else
  169. {
  170. m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
  171. null); //"misquoted literal... expected single quote!");
  172. }
  173. break;
  174. case 0x0A :
  175. case 0x0D :
  176. case ' ' :
  177. case '\t' :
  178. if (startSubstring != -1)
  179. {
  180. isNum = false;
  181. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  182. isAttrName = false;
  183. if (-1 != posOfNSSep)
  184. {
  185. posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
  186. }
  187. else
  188. {
  189. addToTokenQueue(pat.substring(startSubstring, i));
  190. }
  191. startSubstring = -1;
  192. }
  193. break;
  194. case '@' :
  195. isAttrName = true;
  196. // fall-through on purpose
  197. case '-' :
  198. if ('-' == c)
  199. {
  200. if (!(isNum || (startSubstring == -1)))
  201. {
  202. break;
  203. }
  204. isNum = false;
  205. }
  206. // fall-through on purpose
  207. case '(' :
  208. case '[' :
  209. case ')' :
  210. case ']' :
  211. case '|' :
  212. case '/' :
  213. case '*' :
  214. case '+' :
  215. case '=' :
  216. case ',' :
  217. case '\\' : // Unused at the moment
  218. case '^' : // Unused at the moment
  219. case '!' : // Unused at the moment
  220. case '$' :
  221. case '<' :
  222. case '>' :
  223. if (startSubstring != -1)
  224. {
  225. isNum = false;
  226. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  227. isAttrName = false;
  228. if (-1 != posOfNSSep)
  229. {
  230. posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
  231. }
  232. else
  233. {
  234. addToTokenQueue(pat.substring(startSubstring, i));
  235. }
  236. startSubstring = -1;
  237. }
  238. else if (('/' == c) && isStartOfPat)
  239. {
  240. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  241. }
  242. else if ('*' == c)
  243. {
  244. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  245. isAttrName = false;
  246. }
  247. if (0 == nesting)
  248. {
  249. if ('|' == c)
  250. {
  251. if (null != targetStrings)
  252. {
  253. recordTokenString(targetStrings);
  254. }
  255. isStartOfPat = true;
  256. }
  257. }
  258. if ((')' == c) || (']' == c))
  259. {
  260. nesting--;
  261. }
  262. else if (('(' == c) || ('[' == c))
  263. {
  264. nesting++;
  265. }
  266. addToTokenQueue(pat.substring(i, i + 1));
  267. break;
  268. case ':' :
  269. if (i>0)
  270. {
  271. if (posOfNSSep == (i - 1))
  272. {
  273. if (startSubstring != -1)
  274. {
  275. if (startSubstring < (i - 1))
  276. addToTokenQueue(pat.substring(startSubstring, i - 1));
  277. }
  278. isNum = false;
  279. isAttrName = false;
  280. startSubstring = -1;
  281. posOfNSSep = -1;
  282. addToTokenQueue(pat.substring(i - 1, i + 1));
  283. break;
  284. }
  285. else
  286. {
  287. posOfNSSep = i;
  288. }
  289. }
  290. // fall through on purpose
  291. default :
  292. if (-1 == startSubstring)
  293. {
  294. startSubstring = i;
  295. isNum = Character.isDigit(c);
  296. }
  297. else if (isNum)
  298. {
  299. isNum = Character.isDigit(c);
  300. }
  301. }
  302. }
  303. if (startSubstring != -1)
  304. {
  305. isNum = false;
  306. isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
  307. if ((-1 != posOfNSSep) ||
  308. ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
  309. {
  310. posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
  311. }
  312. else
  313. {
  314. addToTokenQueue(pat.substring(startSubstring, nChars));
  315. }
  316. }
  317. if (0 == m_compiler.getTokenQueueSize())
  318. {
  319. m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null); //"Empty expression!");
  320. }
  321. else if (null != targetStrings)
  322. {
  323. recordTokenString(targetStrings);
  324. }
  325. m_processor.m_queueMark = 0;
  326. }
  327. /**
  328. * Record the current position on the token queue as long as
  329. * this is a top-level element. Must be called before the
  330. * next token is added to the m_tokenQueue.
  331. *
  332. * @param nesting The nesting count for the pattern element.
  333. * @param isStart true if this is the start of a pattern.
  334. * @param isAttrName true if we have determined that this is an attribute name.
  335. *
  336. * @return true if this is the start of a pattern.
  337. */
  338. private boolean mapPatternElemPos(int nesting, boolean isStart,
  339. boolean isAttrName)
  340. {
  341. if (0 == nesting)
  342. {
  343. if(m_patternMapSize >= m_patternMap.length)
  344. {
  345. int patternMap[] = m_patternMap;
  346. int len = m_patternMap.length;
  347. m_patternMap = new int[m_patternMapSize + 100];
  348. System.arraycopy(patternMap, 0, m_patternMap, 0, len);
  349. }
  350. if (!isStart)
  351. {
  352. m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
  353. }
  354. m_patternMap[m_patternMapSize] =
  355. (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
  356. m_patternMapSize++;
  357. isStart = false;
  358. }
  359. return isStart;
  360. }
  361. /**
  362. * Given a map pos, return the corresponding token queue pos.
  363. *
  364. * @param i The index in the m_patternMap.
  365. *
  366. * @return the token queue position.
  367. */
  368. private int getTokenQueuePosFromMap(int i)
  369. {
  370. int pos = m_patternMap[i];
  371. return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
  372. }
  373. /**
  374. * Reset token queue mark and m_token to a
  375. * given position.
  376. * @param mark The new position.
  377. */
  378. private final void resetTokenMark(int mark)
  379. {
  380. int qsz = m_compiler.getTokenQueueSize();
  381. m_processor.m_queueMark = (mark > 0)
  382. ? ((mark <= qsz) ? mark - 1 : mark) : 0;
  383. if (m_processor.m_queueMark < qsz)
  384. {
  385. m_processor.m_token =
  386. (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
  387. m_processor.m_tokenChar = m_processor.m_token.charAt(0);
  388. }
  389. else
  390. {
  391. m_processor.m_token = null;
  392. m_processor.m_tokenChar = 0;
  393. }
  394. }
  395. /**
  396. * Given a string, return the corresponding keyword token.
  397. *
  398. * @param key The keyword.
  399. *
  400. * @return An opcode value.
  401. */
  402. final int getKeywordToken(String key)
  403. {
  404. int tok;
  405. try
  406. {
  407. Integer itok = (Integer) Keywords.m_keywords.get(key);
  408. tok = (null != itok) ? itok.intValue() : 0;
  409. }
  410. catch (NullPointerException npe)
  411. {
  412. tok = 0;
  413. }
  414. catch (ClassCastException cce)
  415. {
  416. tok = 0;
  417. }
  418. return tok;
  419. }
  420. /**
  421. * Record the current token in the passed vector.
  422. *
  423. * @param targetStrings Vector of string.
  424. */
  425. private void recordTokenString(Vector targetStrings)
  426. {
  427. int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
  428. resetTokenMark(tokPos + 1);
  429. if (m_processor.lookahead('(', 1))
  430. {
  431. int tok = getKeywordToken(m_processor.m_token);
  432. switch (tok)
  433. {
  434. case OpCodes.NODETYPE_COMMENT :
  435. targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT);
  436. break;
  437. case OpCodes.NODETYPE_TEXT :
  438. targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT);
  439. break;
  440. case OpCodes.NODETYPE_NODE :
  441. targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
  442. break;
  443. case OpCodes.NODETYPE_ROOT :
  444. targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT);
  445. break;
  446. case OpCodes.NODETYPE_ANYELEMENT :
  447. targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
  448. break;
  449. case OpCodes.NODETYPE_PI :
  450. targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
  451. break;
  452. default :
  453. targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
  454. }
  455. }
  456. else
  457. {
  458. if (m_processor.tokenIs('@'))
  459. {
  460. tokPos++;
  461. resetTokenMark(tokPos + 1);
  462. }
  463. if (m_processor.lookahead(':', 1))
  464. {
  465. tokPos += 2;
  466. }
  467. targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos));
  468. }
  469. }
  470. /**
  471. * Add a token to the token queue.
  472. *
  473. *
  474. * @param s The token.
  475. */
  476. private final void addToTokenQueue(String s)
  477. {
  478. m_compiler.getTokenQueue().addElement(s);
  479. }
  480. /**
  481. * When a seperator token is found, see if there's a element name or
  482. * the like to map.
  483. *
  484. * @param pat The XPath name string.
  485. * @param startSubstring The start of the name string.
  486. * @param posOfNSSep The position of the namespace seperator (':').
  487. * @param posOfScan The end of the name index.
  488. *
  489. * @throws javax.xml.transform.TransformerException
  490. *
  491. * @return -1 always.
  492. */
  493. private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
  494. int posOfScan)
  495. throws javax.xml.transform.TransformerException
  496. {
  497. String prefix = "";
  498. if ((startSubstring >= 0) && (posOfNSSep >= 0))
  499. {
  500. prefix = pat.substring(startSubstring, posOfNSSep);
  501. }
  502. String uName;
  503. if ((null != m_namespaceContext) &&!prefix.equals("*")
  504. &&!prefix.equals("xmlns"))
  505. {
  506. try
  507. {
  508. if (prefix.length() > 0)
  509. uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
  510. prefix);
  511. else
  512. {
  513. // Assume last was wildcard. This is not legal according
  514. // to the draft. Set the below to true to make namespace
  515. // wildcards work.
  516. if (false)
  517. {
  518. addToTokenQueue(":");
  519. String s = pat.substring(posOfNSSep + 1, posOfScan);
  520. if (s.length() > 0)
  521. addToTokenQueue(s);
  522. return -1;
  523. }
  524. else
  525. {
  526. uName =
  527. ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
  528. prefix);
  529. }
  530. }
  531. }
  532. catch (ClassCastException cce)
  533. {
  534. uName = m_namespaceContext.getNamespaceForPrefix(prefix);
  535. }
  536. }
  537. else
  538. {
  539. uName = prefix;
  540. }
  541. if ((null != uName) && (uName.length() > 0))
  542. {
  543. addToTokenQueue(uName);
  544. addToTokenQueue(":");
  545. String s = pat.substring(posOfNSSep + 1, posOfScan);
  546. if (s.length() > 0)
  547. addToTokenQueue(s);
  548. }
  549. else
  550. {
  551. // error("Could not locate namespace for prefix: "+prefix);
  552. m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
  553. new String[] {prefix}); //"Prefix must resolve to a namespace: {0}";
  554. /*** Old code commented out 10-Jan-2001
  555. addToTokenQueue(prefix);
  556. addToTokenQueue(":");
  557. String s = pat.substring(posOfNSSep + 1, posOfScan);
  558. if (s.length() > 0)
  559. addToTokenQueue(s);
  560. ***/
  561. }
  562. return -1;
  563. }
  564. }