1. package com.sun.org.apache.regexp.internal;
  2. /*
  3. * ====================================================================
  4. *
  5. * The Apache Software License, Version 1.1
  6. *
  7. * Copyright (c) 1999 The Apache Software Foundation. All rights
  8. * reserved.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * 1. Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. *
  17. * 2. Redistributions in binary form must reproduce the above copyright
  18. * notice, this list of conditions and the following disclaimer in
  19. * the documentation and/or other materials provided with the
  20. * distribution.
  21. *
  22. * 3. The end-user documentation included with the redistribution, if
  23. * any, must include the following acknowlegement:
  24. * "This product includes software developed by the
  25. * Apache Software Foundation (http://www.apache.org/)."
  26. * Alternately, this acknowlegement may appear in the software itself,
  27. * if and wherever such third-party acknowlegements normally appear.
  28. *
  29. * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
  30. * Foundation" must not be used to endorse or promote products derived
  31. * from this software without prior written permission. For written
  32. * permission, please contact apache@apache.org.
  33. *
  34. * 5. Products derived from this software may not be called "Apache"
  35. * nor may "Apache" appear in their names without prior written
  36. * permission of the Apache Group.
  37. *
  38. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  39. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  40. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  41. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  42. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  43. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  44. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  45. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  46. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  47. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  48. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  49. * SUCH DAMAGE.
  50. * ====================================================================
  51. *
  52. * This software consists of voluntary contributions made by many
  53. * individuals on behalf of the Apache Software Foundation. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. *
  57. */
  58. import com.sun.org.apache.regexp.internal.RE;
  59. import java.util.Hashtable;
  60. /**
  61. * A class that holds compiled regular expressions. This is exposed mainly
  62. * for use by the recompile utility (which helps you produce precompiled
  63. * REProgram objects). You should not otherwise need to work directly with
  64. * this class.
  65. *
  66. * @see RE
  67. * @see RECompiler
  68. *
  69. * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
  70. * @version $Id: REProgram.java,v 1.1 2000/04/27 01:22:33 jon Exp $
  71. */
  72. public class REProgram
  73. {
  74. static final int OPT_HASBACKREFS = 1;
  75. char[] instruction; // The compiled regular expression 'program'
  76. int lenInstruction; // The amount of the instruction buffer in use
  77. char[] prefix; // Prefix string optimization
  78. int flags; // Optimization flags (REProgram.OPT_*)
  79. /**
  80. * Constructs a program object from a character array
  81. * @param instruction Character array with RE opcode instructions in it
  82. */
  83. public REProgram(char[] instruction)
  84. {
  85. this(instruction, instruction.length);
  86. }
  87. /**
  88. * Constructs a program object from a character array
  89. * @param instruction Character array with RE opcode instructions in it
  90. * @param lenInstruction Amount of instruction array in use
  91. */
  92. public REProgram(char[] instruction, int lenInstruction)
  93. {
  94. setInstructions(instruction, lenInstruction);
  95. }
  96. /**
  97. * Returns a copy of the current regular expression program in a character
  98. * array that is exactly the right length to hold the program. If there is
  99. * no program compiled yet, getInstructions() will return null.
  100. * @return A copy of the current compiled RE program
  101. */
  102. public char[] getInstructions()
  103. {
  104. // Ensure program has been compiled!
  105. if (lenInstruction != 0)
  106. {
  107. // Return copy of program
  108. char[] ret = new char[lenInstruction];
  109. System.arraycopy(instruction, 0, ret, 0, lenInstruction);
  110. return ret;
  111. }
  112. return null;
  113. }
  114. /**
  115. * Sets a new regular expression program to run. It is this method which
  116. * performs any special compile-time search optimizations. Currently only
  117. * two optimizations are in place - one which checks for backreferences
  118. * (so that they can be lazily allocated) and another which attempts to
  119. * find an prefix anchor string so that substantial amounts of input can
  120. * potentially be skipped without running the actual program.
  121. * @param instruction Program instruction buffer
  122. * @param lenInstruction Length of instruction buffer in use
  123. */
  124. public void setInstructions(char[] instruction, int lenInstruction)
  125. {
  126. // Save reference to instruction array
  127. this.instruction = instruction;
  128. this.lenInstruction = lenInstruction;
  129. // Initialize other program-related variables
  130. flags = 0;
  131. prefix = null;
  132. // Try various compile-time optimizations if there's a program
  133. if (instruction != null && lenInstruction != 0)
  134. {
  135. // If the first node is a branch
  136. if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
  137. {
  138. // to the end node
  139. int next = instruction[0 + RE.offsetNext];
  140. if (instruction[next + RE.offsetOpcode] == RE.OP_END)
  141. {
  142. // and the branch starts with an atom
  143. if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM)
  144. {
  145. // then get that atom as an prefix because there's no other choice
  146. int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
  147. prefix = new char[lenAtom];
  148. System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
  149. }
  150. }
  151. }
  152. BackrefScanLoop:
  153. // Check for backreferences
  154. for (int i = 0; i < lenInstruction; i += RE.nodeSize)
  155. {
  156. switch (instruction[i + RE.offsetOpcode])
  157. {
  158. case RE.OP_ANYOF:
  159. i += (instruction[i + RE.offsetOpdata] * 2);
  160. break;
  161. case RE.OP_ATOM:
  162. i += instruction[i + RE.offsetOpdata];
  163. break;
  164. case RE.OP_BACKREF:
  165. flags |= OPT_HASBACKREFS;
  166. break BackrefScanLoop;
  167. }
  168. }
  169. }
  170. }
  171. }