1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.impl.xpath.regex;
  58. import java.text.CharacterIterator;
  59. /**
  60. * Boyer-Moore searcher.
  61. *
  62. * @version $Id: BMPattern.java,v 1.3 2002/08/09 15:18:17 neilg Exp $
  63. */
  64. public class BMPattern {
  65. char[] pattern;
  66. int[] shiftTable;
  67. boolean ignoreCase;
  68. public BMPattern(String pat, boolean ignoreCase) {
  69. this(pat, 256, ignoreCase);
  70. }
  71. public BMPattern(String pat, int tableSize, boolean ignoreCase) {
  72. this.pattern = pat.toCharArray();
  73. this.shiftTable = new int[tableSize];
  74. this.ignoreCase = ignoreCase;
  75. int length = pattern.length;
  76. for (int i = 0; i < this.shiftTable.length; i ++)
  77. this.shiftTable[i] = length;
  78. for (int i = 0; i < length; i ++) {
  79. char ch = this.pattern[i];
  80. int diff = length-i-1;
  81. int index = ch % this.shiftTable.length;
  82. if (diff < this.shiftTable[index])
  83. this.shiftTable[index] = diff;
  84. if (this.ignoreCase) {
  85. ch = Character.toUpperCase(ch);
  86. index = ch % this.shiftTable.length;
  87. if (diff < this.shiftTable[index])
  88. this.shiftTable[index] = diff;
  89. ch = Character.toLowerCase(ch);
  90. index = ch % this.shiftTable.length;
  91. if (diff < this.shiftTable[index])
  92. this.shiftTable[index] = diff;
  93. }
  94. }
  95. }
  96. /**
  97. *
  98. * @return -1 if <var>iterator</var> does not contain this pattern.
  99. */
  100. public int matches(CharacterIterator iterator, int start, int limit) {
  101. if (this.ignoreCase) return this.matchesIgnoreCase(iterator, start, limit);
  102. int plength = this.pattern.length;
  103. if (plength == 0) return start;
  104. int index = start+plength;
  105. while (index <= limit) {
  106. int pindex = plength;
  107. int nindex = index+1;
  108. char ch;
  109. do {
  110. if ((ch = iterator.setIndex(--index)) != this.pattern[--pindex])
  111. break;
  112. if (pindex == 0)
  113. return index;
  114. } while (pindex > 0);
  115. index += this.shiftTable[ch % this.shiftTable.length]+1;
  116. if (index < nindex) index = nindex;
  117. }
  118. return -1;
  119. }
  120. /**
  121. *
  122. * @return -1 if <var>str</var> does not contain this pattern.
  123. */
  124. public int matches(String str, int start, int limit) {
  125. if (this.ignoreCase) return this.matchesIgnoreCase(str, start, limit);
  126. int plength = this.pattern.length;
  127. if (plength == 0) return start;
  128. int index = start+plength;
  129. while (index <= limit) {
  130. //System.err.println("Starts at "+index);
  131. int pindex = plength;
  132. int nindex = index+1;
  133. char ch;
  134. do {
  135. if ((ch = str.charAt(--index)) != this.pattern[--pindex])
  136. break;
  137. if (pindex == 0)
  138. return index;
  139. } while (pindex > 0);
  140. index += this.shiftTable[ch % this.shiftTable.length]+1;
  141. if (index < nindex) index = nindex;
  142. }
  143. return -1;
  144. }
  145. /**
  146. *
  147. * @return -1 if <var>chars</char> does not contain this pattern.
  148. */
  149. public int matches(char[] chars, int start, int limit) {
  150. if (this.ignoreCase) return this.matchesIgnoreCase(chars, start, limit);
  151. int plength = this.pattern.length;
  152. if (plength == 0) return start;
  153. int index = start+plength;
  154. while (index <= limit) {
  155. //System.err.println("Starts at "+index);
  156. int pindex = plength;
  157. int nindex = index+1;
  158. char ch;
  159. do {
  160. if ((ch = chars[--index]) != this.pattern[--pindex])
  161. break;
  162. if (pindex == 0)
  163. return index;
  164. } while (pindex > 0);
  165. index += this.shiftTable[ch % this.shiftTable.length]+1;
  166. if (index < nindex) index = nindex;
  167. }
  168. return -1;
  169. }
  170. int matchesIgnoreCase(CharacterIterator iterator, int start, int limit) {
  171. int plength = this.pattern.length;
  172. if (plength == 0) return start;
  173. int index = start+plength;
  174. while (index <= limit) {
  175. int pindex = plength;
  176. int nindex = index+1;
  177. char ch;
  178. do {
  179. char ch1 = ch = iterator.setIndex(--index);
  180. char ch2 = this.pattern[--pindex];
  181. if (ch1 != ch2) {
  182. ch1 = Character.toUpperCase(ch1);
  183. ch2 = Character.toUpperCase(ch2);
  184. if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
  185. break;
  186. }
  187. if (pindex == 0)
  188. return index;
  189. } while (pindex > 0);
  190. index += this.shiftTable[ch % this.shiftTable.length]+1;
  191. if (index < nindex) index = nindex;
  192. }
  193. return -1;
  194. }
  195. int matchesIgnoreCase(String text, int start, int limit) {
  196. int plength = this.pattern.length;
  197. if (plength == 0) return start;
  198. int index = start+plength;
  199. while (index <= limit) {
  200. int pindex = plength;
  201. int nindex = index+1;
  202. char ch;
  203. do {
  204. char ch1 = ch = text.charAt(--index);
  205. char ch2 = this.pattern[--pindex];
  206. if (ch1 != ch2) {
  207. ch1 = Character.toUpperCase(ch1);
  208. ch2 = Character.toUpperCase(ch2);
  209. if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
  210. break;
  211. }
  212. if (pindex == 0)
  213. return index;
  214. } while (pindex > 0);
  215. index += this.shiftTable[ch % this.shiftTable.length]+1;
  216. if (index < nindex) index = nindex;
  217. }
  218. return -1;
  219. }
  220. int matchesIgnoreCase(char[] chars, int start, int limit) {
  221. int plength = this.pattern.length;
  222. if (plength == 0) return start;
  223. int index = start+plength;
  224. while (index <= limit) {
  225. int pindex = plength;
  226. int nindex = index+1;
  227. char ch;
  228. do {
  229. char ch1 = ch = chars[--index];
  230. char ch2 = this.pattern[--pindex];
  231. if (ch1 != ch2) {
  232. ch1 = Character.toUpperCase(ch1);
  233. ch2 = Character.toUpperCase(ch2);
  234. if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
  235. break;
  236. }
  237. if (pindex == 0)
  238. return index;
  239. } while (pindex > 0);
  240. index += this.shiftTable[ch % this.shiftTable.length]+1;
  241. if (index < nindex) index = nindex;
  242. }
  243. return -1;
  244. }
  245. /*
  246. public static void main(String[] argv) {
  247. try {
  248. int[] shiftTable = new int[256];
  249. initializeBoyerMoore(argv[0], shiftTable, true);
  250. int o = -1;
  251. CharacterIterator ite = new java.text.StringCharacterIterator(argv[1]);
  252. long start = System.currentTimeMillis();
  253. //for (int i = 0; i < 10000; i ++)
  254. o = searchIgnoreCasesWithBoyerMoore(ite, 0, argv[0], shiftTable);
  255. start = System.currentTimeMillis()-start;
  256. System.out.println("Result: "+o+", Elapsed: "+start);
  257. } catch (Exception ex) {
  258. ex.printStackTrace();
  259. }
  260. }*/
  261. }