1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.impl.xpath.regex;
  58. import java.text.CharacterIterator;
  59. /**
  60. * @version $Id: REUtil.java,v 1.7 2002/11/20 00:49:47 twl Exp $
  61. */
  62. public final class REUtil {
  63. private REUtil() {
  64. }
  65. static final int composeFromSurrogates(int high, int low) {
  66. return 0x10000 + ((high-0xd800)<<10) + low-0xdc00;
  67. }
  68. static final boolean isLowSurrogate(int ch) {
  69. return (ch & 0xfc00) == 0xdc00;
  70. }
  71. static final boolean isHighSurrogate(int ch) {
  72. return (ch & 0xfc00) == 0xd800;
  73. }
  74. static final String decomposeToSurrogates(int ch) {
  75. char[] chs = new char[2];
  76. ch -= 0x10000;
  77. chs[0] = (char)((ch>>10)+0xd800);
  78. chs[1] = (char)((ch&0x3ff)+0xdc00);
  79. return new String(chs);
  80. }
  81. static final String substring(CharacterIterator iterator, int begin, int end) {
  82. char[] src = new char[end-begin];
  83. for (int i = 0; i < src.length; i ++)
  84. src[i] = iterator.setIndex(i+begin);
  85. return new String(src);
  86. }
  87. // ================================================================
  88. static final int getOptionValue(int ch) {
  89. int ret = 0;
  90. switch (ch) {
  91. case 'i':
  92. ret = RegularExpression.IGNORE_CASE;
  93. break;
  94. case 'm':
  95. ret = RegularExpression.MULTIPLE_LINES;
  96. break;
  97. case 's':
  98. ret = RegularExpression.SINGLE_LINE;
  99. break;
  100. case 'x':
  101. ret = RegularExpression.EXTENDED_COMMENT;
  102. break;
  103. case 'u':
  104. ret = RegularExpression.USE_UNICODE_CATEGORY;
  105. break;
  106. case 'w':
  107. ret = RegularExpression.UNICODE_WORD_BOUNDARY;
  108. break;
  109. case 'F':
  110. ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION;
  111. break;
  112. case 'H':
  113. ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
  114. break;
  115. case 'X':
  116. ret = RegularExpression.XMLSCHEMA_MODE;
  117. break;
  118. case ',':
  119. ret = RegularExpression.SPECIAL_COMMA;
  120. break;
  121. default:
  122. }
  123. return ret;
  124. }
  125. static final int parseOptions(String opts) throws ParseException {
  126. if (opts == null) return 0;
  127. int options = 0;
  128. for (int i = 0; i < opts.length(); i ++) {
  129. int v = getOptionValue(opts.charAt(i));
  130. if (v == 0)
  131. throw new ParseException("Unknown Option: "+opts.substring(i), -1);
  132. options |= v;
  133. }
  134. return options;
  135. }
  136. static final String createOptionString(int options) {
  137. StringBuffer sb = new StringBuffer(9);
  138. if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0)
  139. sb.append((char)'F');
  140. if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0)
  141. sb.append((char)'H');
  142. if ((options & RegularExpression.XMLSCHEMA_MODE) != 0)
  143. sb.append((char)'X');
  144. if ((options & RegularExpression.IGNORE_CASE) != 0)
  145. sb.append((char)'i');
  146. if ((options & RegularExpression.MULTIPLE_LINES) != 0)
  147. sb.append((char)'m');
  148. if ((options & RegularExpression.SINGLE_LINE) != 0)
  149. sb.append((char)'s');
  150. if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0)
  151. sb.append((char)'u');
  152. if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0)
  153. sb.append((char)'w');
  154. if ((options & RegularExpression.EXTENDED_COMMENT) != 0)
  155. sb.append((char)'x');
  156. if ((options & RegularExpression.SPECIAL_COMMA) != 0)
  157. sb.append((char)',');
  158. return sb.toString().intern();
  159. }
  160. // ================================================================
  161. static String stripExtendedComment(String regex) {
  162. int len = regex.length();
  163. StringBuffer buffer = new StringBuffer(len);
  164. int offset = 0;
  165. while (offset < len) {
  166. int ch = regex.charAt(offset++);
  167. // Skips a white space.
  168. if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ')
  169. continue;
  170. if (ch == '#') { // Skips chracters between '#' and a line end.
  171. while (offset < len) {
  172. ch = regex.charAt(offset++);
  173. if (ch == '\r' || ch == '\n')
  174. break;
  175. }
  176. continue;
  177. }
  178. int next; // Strips an escaped white space.
  179. if (ch == '\\' && offset < len) {
  180. if ((next = regex.charAt(offset)) == '#'
  181. || next == '\t' || next == '\n' || next == '\f'
  182. || next == '\r' || next == ' ') {
  183. buffer.append((char)next);
  184. offset ++;
  185. } else { // Other escaped character.
  186. buffer.append((char)'\\');
  187. buffer.append((char)next);
  188. offset ++;
  189. }
  190. } else // As is.
  191. buffer.append((char)ch);
  192. }
  193. return buffer.toString();
  194. }
  195. // ================================================================
  196. /**
  197. * Sample entry.
  198. * <div>Usage: <KBD>com.sun.org.apache.xerces.internal.utils.regex.REUtil <regex> <string></KBD></div>
  199. */
  200. public static void main(String[] argv) {
  201. String pattern = null;
  202. try {
  203. String options = "";
  204. String target = null;
  205. if( argv.length == 0 ) {
  206. System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" );
  207. System.exit( 0 );
  208. }
  209. for (int i = 0; i < argv.length; i ++) {
  210. if (argv[i].length() == 0 || argv[i].charAt(0) != '-') {
  211. if (pattern == null)
  212. pattern = argv[i];
  213. else if (target == null)
  214. target = argv[i];
  215. else
  216. System.err.println("Unnecessary: "+argv[i]);
  217. } else if (argv[i].equals("-i")) {
  218. options += "i";
  219. } else if (argv[i].equals("-m")) {
  220. options += "m";
  221. } else if (argv[i].equals("-s")) {
  222. options += "s";
  223. } else if (argv[i].equals("-u")) {
  224. options += "u";
  225. } else if (argv[i].equals("-w")) {
  226. options += "w";
  227. } else if (argv[i].equals("-X")) {
  228. options += "X";
  229. } else {
  230. System.err.println("Unknown option: "+argv[i]);
  231. }
  232. }
  233. RegularExpression reg = new RegularExpression(pattern, options);
  234. System.out.println("RegularExpression: "+reg);
  235. Match match = new Match();
  236. reg.matches(target, match);
  237. for (int i = 0; i < match.getNumberOfGroups(); i ++) {
  238. if (i == 0 ) System.out.print("Matched range for the whole pattern: ");
  239. else System.out.print("["+i+"]: ");
  240. if (match.getBeginning(i) < 0)
  241. System.out.println("-1");
  242. else {
  243. System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", ");
  244. System.out.println("\""+match.getCapturedText(i)+"\"");
  245. }
  246. }
  247. } catch (ParseException pe) {
  248. if (pattern == null) {
  249. pe.printStackTrace();
  250. } else {
  251. System.err.println("com.sun.org.apache.xerces.internal.utils.regex.ParseException: "+pe.getMessage());
  252. String indent = " ";
  253. System.err.println(indent+pattern);
  254. int loc = pe.getLocation();
  255. if (loc >= 0) {
  256. System.err.print(indent);
  257. for (int i = 0; i < loc; i ++) System.err.print("-");
  258. System.err.println("^");
  259. }
  260. }
  261. } catch (Exception e) {
  262. e.printStackTrace();
  263. }
  264. }
  265. static final int CACHESIZE = 20;
  266. static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE];
  267. /**
  268. * Creates a RegularExpression instance.
  269. * This method caches created instances.
  270. *
  271. * @see RegularExpression#RegularExpression(java.lang.String, java.lang.String)
  272. */
  273. public static RegularExpression createRegex(String pattern, String options)
  274. throws ParseException {
  275. RegularExpression re = null;
  276. int intOptions = REUtil.parseOptions(options);
  277. synchronized (REUtil.regexCache) {
  278. int i;
  279. for (i = 0; i < REUtil.CACHESIZE; i ++) {
  280. RegularExpression cached = REUtil.regexCache[i];
  281. if (cached == null) {
  282. i = -1;
  283. break;
  284. }
  285. if (cached.equals(pattern, intOptions)) {
  286. re = cached;
  287. break;
  288. }
  289. }
  290. if (re != null) {
  291. if (i != 0) {
  292. System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i);
  293. REUtil.regexCache[0] = re;
  294. }
  295. } else {
  296. re = new RegularExpression(pattern, options);
  297. System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1);
  298. REUtil.regexCache[0] = re;
  299. }
  300. }
  301. return re;
  302. }
  303. /**
  304. *
  305. * @see RegularExpression#matches(java.lang.String)
  306. */
  307. public static boolean matches(String regex, String target) throws ParseException {
  308. return REUtil.createRegex(regex, null).matches(target);
  309. }
  310. /**
  311. *
  312. * @see RegularExpression#matches(java.lang.String)
  313. */
  314. public static boolean matches(String regex, String options, String target) throws ParseException {
  315. return REUtil.createRegex(regex, options).matches(target);
  316. }
  317. // ================================================================
  318. /**
  319. *
  320. */
  321. public static String quoteMeta(String literal) {
  322. int len = literal.length();
  323. StringBuffer buffer = null;
  324. for (int i = 0; i < len; i ++) {
  325. int ch = literal.charAt(i);
  326. if (".*+?{[()|\\^$".indexOf(ch) >= 0) {
  327. if (buffer == null) {
  328. buffer = new StringBuffer(i+(len-i)*2);
  329. if (i > 0) buffer.append(literal.substring(0, i));
  330. }
  331. buffer.append((char)'\\');
  332. buffer.append((char)ch);
  333. } else if (buffer != null)
  334. buffer.append((char)ch);
  335. }
  336. return buffer != null ? buffer.toString() : literal;
  337. }
  338. // ================================================================
  339. static void dumpString(String v) {
  340. for (int i = 0; i < v.length(); i ++) {
  341. System.out.print(Integer.toHexString(v.charAt(i)));
  342. System.out.print(" ");
  343. }
  344. System.out.println();
  345. }
  346. }