- package com.sun.org.apache.regexp.internal;
-
- /*
- * ====================================================================
- *
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 1999 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution, if
- * any, must include the following acknowlegement:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowlegement may appear in the software itself,
- * if and wherever such third-party acknowlegements normally appear.
- *
- * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
- * Foundation" must not be used to endorse or promote products derived
- * from this software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache"
- * nor may "Apache" appear in their names without prior written
- * permission of the Apache Group.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- *
- */
-
- import com.sun.org.apache.regexp.internal.RE;
- import java.util.Hashtable;
-
- /**
- * A class that holds compiled regular expressions. This is exposed mainly
- * for use by the recompile utility (which helps you produce precompiled
- * REProgram objects). You should not otherwise need to work directly with
- * this class.
- *
- * @see RE
- * @see RECompiler
- *
- * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- * @version $Id: REProgram.java,v 1.1 2000/04/27 01:22:33 jon Exp $
- */
- public class REProgram
- {
- static final int OPT_HASBACKREFS = 1;
-
- char[] instruction; // The compiled regular expression 'program'
- int lenInstruction; // The amount of the instruction buffer in use
- char[] prefix; // Prefix string optimization
- int flags; // Optimization flags (REProgram.OPT_*)
-
- /**
- * Constructs a program object from a character array
- * @param instruction Character array with RE opcode instructions in it
- */
- public REProgram(char[] instruction)
- {
- this(instruction, instruction.length);
- }
-
- /**
- * Constructs a program object from a character array
- * @param instruction Character array with RE opcode instructions in it
- * @param lenInstruction Amount of instruction array in use
- */
- public REProgram(char[] instruction, int lenInstruction)
- {
- setInstructions(instruction, lenInstruction);
- }
-
- /**
- * Returns a copy of the current regular expression program in a character
- * array that is exactly the right length to hold the program. If there is
- * no program compiled yet, getInstructions() will return null.
- * @return A copy of the current compiled RE program
- */
- public char[] getInstructions()
- {
- // Ensure program has been compiled!
- if (lenInstruction != 0)
- {
- // Return copy of program
- char[] ret = new char[lenInstruction];
- System.arraycopy(instruction, 0, ret, 0, lenInstruction);
- return ret;
- }
- return null;
- }
-
- /**
- * Sets a new regular expression program to run. It is this method which
- * performs any special compile-time search optimizations. Currently only
- * two optimizations are in place - one which checks for backreferences
- * (so that they can be lazily allocated) and another which attempts to
- * find an prefix anchor string so that substantial amounts of input can
- * potentially be skipped without running the actual program.
- * @param instruction Program instruction buffer
- * @param lenInstruction Length of instruction buffer in use
- */
- public void setInstructions(char[] instruction, int lenInstruction)
- {
- // Save reference to instruction array
- this.instruction = instruction;
- this.lenInstruction = lenInstruction;
-
- // Initialize other program-related variables
- flags = 0;
- prefix = null;
-
- // Try various compile-time optimizations if there's a program
- if (instruction != null && lenInstruction != 0)
- {
- // If the first node is a branch
- if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
- {
- // to the end node
- int next = instruction[0 + RE.offsetNext];
- if (instruction[next + RE.offsetOpcode] == RE.OP_END)
- {
- // and the branch starts with an atom
- if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM)
- {
- // then get that atom as an prefix because there's no other choice
- int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
- prefix = new char[lenAtom];
- System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
- }
- }
- }
-
- BackrefScanLoop:
-
- // Check for backreferences
- for (int i = 0; i < lenInstruction; i += RE.nodeSize)
- {
- switch (instruction[i + RE.offsetOpcode])
- {
- case RE.OP_ANYOF:
- i += (instruction[i + RE.offsetOpdata] * 2);
- break;
-
- case RE.OP_ATOM:
- i += instruction[i + RE.offsetOpdata];
- break;
-
- case RE.OP_BACKREF:
- flags |= OPT_HASBACKREFS;
- break BackrefScanLoop;
- }
- }
- }
- }
- }