1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Copyright 1999-2004 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.regexp.internal; 22 23 import java.io.Serializable; 24 25 /** 26 * A class that holds compiled regular expressions. This is exposed mainly 27 * for use by the recompile utility (which helps you produce precompiled 28 * REProgram objects). You should not otherwise need to work directly with 29 * this class. 30 * 31 * @see RE 32 * @see RECompiler 33 * 34 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> 35 */ 36 public class REProgram implements Serializable 37 { 38 static final int OPT_HASBACKREFS = 1; 39 40 char[] instruction; // The compiled regular expression 'program' 41 int lenInstruction; // The amount of the instruction buffer in use 42 char[] prefix; // Prefix string optimization 43 int flags; // Optimization flags (REProgram.OPT_*) 44 int maxParens = -1; 45 46 /** 47 * Constructs a program object from a character array 48 * @param instruction Character array with RE opcode instructions in it 49 */ 50 public REProgram(char[] instruction) 51 { 52 this(instruction, instruction.length); 53 } 54 55 /** 56 * Constructs a program object from a character array 57 * @param parens Count of parens in the program 58 * @param instruction Character array with RE opcode instructions in it 59 */ 60 public REProgram(int parens, char[] instruction) 61 { 62 this(instruction, instruction.length); 63 this.maxParens = parens; 64 } 65 66 /** 67 * Constructs a program object from a character array 68 * @param instruction Character array with RE opcode instructions in it 69 * @param lenInstruction Amount of instruction array in use 70 */ 71 public REProgram(char[] instruction, int lenInstruction) 72 { 73 setInstructions(instruction, lenInstruction); 74 } 75 76 /** 77 * Returns a copy of the current regular expression program in a character 78 * array that is exactly the right length to hold the program. If there is 79 * no program compiled yet, getInstructions() will return null. 80 * @return A copy of the current compiled RE program 81 */ 82 public char[] getInstructions() 83 { 84 // Ensure program has been compiled! 85 if (lenInstruction != 0) 86 { 87 // Return copy of program 88 char[] ret = new char[lenInstruction]; 89 System.arraycopy(instruction, 0, ret, 0, lenInstruction); 90 return ret; 91 } 92 return null; 93 } 94 95 /** 96 * Sets a new regular expression program to run. It is this method which 97 * performs any special compile-time search optimizations. Currently only 98 * two optimizations are in place - one which checks for backreferences 99 * (so that they can be lazily allocated) and another which attempts to 100 * find an prefix anchor string so that substantial amounts of input can 101 * potentially be skipped without running the actual program. 102 * @param instruction Program instruction buffer 103 * @param lenInstruction Length of instruction buffer in use 104 */ 105 public void setInstructions(char[] instruction, int lenInstruction) 106 { 107 // Save reference to instruction array 108 this.instruction = instruction; 109 this.lenInstruction = lenInstruction; 110 111 // Initialize other program-related variables 112 flags = 0; 113 prefix = null; 114 115 // Try various compile-time optimizations if there's a program 116 if (instruction != null && lenInstruction != 0) 117 { 118 // If the first node is a branch 119 if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH) 120 { 121 // to the end node 122 int next = instruction[0 + RE.offsetNext]; 123 if (instruction[next + RE.offsetOpcode] == RE.OP_END) 124 { 125 // and the branch starts with an atom 126 if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM) 127 { 128 // then get that atom as an prefix because there's no other choice 129 int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata]; 130 prefix = new char[lenAtom]; 131 System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom); 132 } 133 } 134 } 135 136 BackrefScanLoop: 137 138 // Check for backreferences 139 for (int i = 0; i < lenInstruction; i += RE.nodeSize) 140 { 141 switch (instruction[i + RE.offsetOpcode]) 142 { 143 case RE.OP_ANYOF: 144 i += (instruction[i + RE.offsetOpdata] * 2); 145 break; 146 147 case RE.OP_ATOM: 148 i += instruction[i + RE.offsetOpdata]; 149 break; 150 151 case RE.OP_BACKREF: 152 flags |= OPT_HASBACKREFS; 153 break BackrefScanLoop; 154 } 155 } 156 } 157 } 158 }