1 /*
2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
3 * @LastModified: Oct 2017
4 */
5 /*
6 * Licensed to the Apache Software Foundation (ASF) under one or more
7 * contributor license agreements. See the NOTICE file distributed with
8 * this work for additional information regarding copyright ownership.
9 * The ASF licenses this file to You under the Apache License, Version 2.0
10 * (the "License"); you may not use this file except in compliance with
11 * the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 */
21
22 package com.sun.org.apache.bcel.internal.util;
23
24 import com.sun.org.apache.bcel.internal.Const;
25 import com.sun.org.apache.bcel.internal.generic.ClassGenException;
26 import com.sun.org.apache.bcel.internal.generic.InstructionHandle;
27 import com.sun.org.apache.bcel.internal.generic.InstructionList;
28 import java.util.ArrayList;
29 import java.util.HashMap;
30 import java.util.Iterator;
31 import java.util.List;
32 import java.util.Locale;
33 import java.util.Map;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36
37 /**
38 * InstructionFinder is a tool to search for given instructions patterns, i.e.,
39 * match sequences of instructions in an instruction list via regular
40 * expressions. This can be used, e.g., in order to implement a peep hole
41 * optimizer that looks for code patterns and replaces them with faster
42 * equivalents.
43 *
44 * <p>
45 * This class internally uses the java.util.regex
46 * package to search for regular expressions.
47 *
48 * A typical application would look like this:
49 *
50 * <pre>
51 *
52 *
53 * InstructionFinder f = new InstructionFinder(il);
54 * String pat = "IfInstruction ICONST_0 GOTO ICONST_1 NOP (IFEQ|IFNE)";
55 *
56 * for (Iterator i = f.search(pat, constraint); i.hasNext(); ) {
57 * InstructionHandle[] match = (InstructionHandle[])i.next();
58 * ...
59 * il.delete(match[1], match[5]);
60 * ...
61 * }
62 *
63 *
64 * </pre>
65 *
66 * @version $Id: InstructionFinder.java 1749603 2016-06-21 20:50:19Z ggregory $
67 * @see com.sun.org.apache.bcel.internal.generic.Instruction
68 * @see InstructionList
69 */
70 public class InstructionFinder {
71
72 private static final int OFFSET = 32767; // char + OFFSET is outside of LATIN-1
73 private static final int NO_OPCODES = 256; // Potential number, some are not used
74 private static final Map<String, String> map = new HashMap<>();
75 private final InstructionList il;
76 private String il_string; // instruction list as string
77 private InstructionHandle[] handles; // map instruction
78
79
80 // list to array
81 /**
82 * @param il
83 * instruction list to search for given patterns
84 */
85 public InstructionFinder(final InstructionList il) {
86 this.il = il;
87 reread();
88 }
89
90
91 /**
92 * Reread the instruction list, e.g., after you've altered the list upon a
93 * match.
94 */
95 public final void reread() {
96 final int size = il.getLength();
97 final char[] buf = new char[size]; // Create a string with length equal to il length
98 handles = il.getInstructionHandles();
99 // Map opcodes to characters
100 for (int i = 0; i < size; i++) {
101 buf[i] = makeChar(handles[i].getInstruction().getOpcode());
102 }
103 il_string = new String(buf);
104 }
105
106
107 /**
108 * Map symbolic instruction names like "getfield" to a single character.
109 *
110 * @param pattern
111 * instruction pattern in lower case
112 * @return encoded string for a pattern such as "BranchInstruction".
113 */
114 private static String mapName( final String pattern ) {
115 final String result = map.get(pattern);
116 if (result != null) {
117 return result;
118 }
119 for (short i = 0; i < NO_OPCODES; i++) {
120 if (pattern.equals(Const.getOpcodeName(i))) {
121 return "" + makeChar(i);
122 }
123 }
124 throw new RuntimeException("Instruction unknown: " + pattern);
125 }
126
127
128 /**
129 * Replace symbolic names of instructions with the appropiate character and
130 * remove all white space from string. Meta characters such as +, * are
131 * ignored.
132 *
133 * @param pattern
134 * The pattern to compile
135 * @return translated regular expression string
136 */
137 private static String compilePattern( final String pattern ) {
138 //Bug: BCEL-77 - Instructions are assumed to be english, to avoid odd Locale issues
139 final String lower = pattern.toLowerCase(Locale.ENGLISH);
140 final StringBuilder buf = new StringBuilder();
141 final int size = pattern.length();
142 for (int i = 0; i < size; i++) {
143 char ch = lower.charAt(i);
144 if (Character.isLetterOrDigit(ch)) {
145 final StringBuilder name = new StringBuilder();
146 while ((Character.isLetterOrDigit(ch) || ch == '_') && i < size) {
147 name.append(ch);
148 if (++i < size) {
149 ch = lower.charAt(i);
150 } else {
151 break;
152 }
153 }
154 i--;
155 buf.append(mapName(name.toString()));
156 } else if (!Character.isWhitespace(ch)) {
157 buf.append(ch);
158 }
159 }
160 return buf.toString();
161 }
162
163
164 /**
165 * @return the matched piece of code as an array of instruction (handles)
166 */
167 private InstructionHandle[] getMatch( final int matched_from, final int match_length ) {
168 final InstructionHandle[] match = new InstructionHandle[match_length];
169 System.arraycopy(handles, matched_from, match, 0, match_length);
170 return match;
171 }
172
173
174 /**
175 * Search for the given pattern in the instruction list. You can search for
176 * any valid opcode via its symbolic name, e.g. "istore". You can also use a
177 * super class or an interface name to match a whole set of instructions, e.g.
178 * "BranchInstruction" or "LoadInstruction". "istore" is also an alias for all
179 * "istore_x" instructions. Additional aliases are "if" for "ifxx", "if_icmp"
180 * for "if_icmpxx", "if_acmp" for "if_acmpxx".
181 *
182 * Consecutive instruction names must be separated by white space which will
183 * be removed during the compilation of the pattern.
184 *
185 * For the rest the usual pattern matching rules for regular expressions
186 * apply.
187 * <P>
188 * Example pattern:
189 *
190 * <pre>
191 * search("BranchInstruction NOP ((IfInstruction|GOTO)+ ISTORE Instruction)*");
192 * </pre>
193 *
194 * <p>
195 * If you alter the instruction list upon a match such that other matching
196 * areas are affected, you should call reread() to update the finder and call
197 * search() again, because the matches are cached.
198 *
199 * @param pattern
200 * the instruction pattern to search for, where case is ignored
201 * @param from
202 * where to start the search in the instruction list
203 * @param constraint
204 * optional CodeConstraint to check the found code pattern for
205 * user-defined constraints
206 * @return iterator of matches where e.nextElement() returns an array of
207 * instruction handles describing the matched area
208 */
209 public final Iterator<InstructionHandle[]> search( final String pattern,
210 final InstructionHandle from, final CodeConstraint constraint ) {
211 final String search = compilePattern(pattern);
212 int start = -1;
213 for (int i = 0; i < handles.length; i++) {
214 if (handles[i] == from) {
215 start = i; // Where to start search from (index)
216 break;
217 }
218 }
219 if (start == -1) {
220 throw new ClassGenException("Instruction handle " + from
221 + " not found in instruction list.");
222 }
223 final Pattern regex = Pattern.compile(search);
224 final List<InstructionHandle[]> matches = new ArrayList<>();
225 final Matcher matcher = regex.matcher(il_string);
226 while (start < il_string.length() && matcher.find(start)) {
227 final int startExpr = matcher.start();
228 final int endExpr = matcher.end();
229 final int lenExpr = endExpr - startExpr;
230 final InstructionHandle[] match = getMatch(startExpr, lenExpr);
231 if ((constraint == null) || constraint.checkCode(match)) {
232 matches.add(match);
233 }
234 start = endExpr;
235 }
236 return matches.iterator();
237 }
238
239
240 /**
241 * Start search beginning from the start of the given instruction list.
242 *
243 * @param pattern
244 * the instruction pattern to search for, where case is ignored
245 * @return iterator of matches where e.nextElement() returns an array of
246 * instruction handles describing the matched area
247 */
248 public final Iterator<InstructionHandle[]> search( final String pattern ) {
249 return search(pattern, il.getStart(), null);
250 }
251
252
253 /**
254 * Start search beginning from `from'.
255 *
256 * @param pattern
257 * the instruction pattern to search for, where case is ignored
258 * @param from
259 * where to start the search in the instruction list
260 * @return iterator of matches where e.nextElement() returns an array of
261 * instruction handles describing the matched area
262 */
263 public final Iterator<InstructionHandle[]> search( final String pattern,
264 final InstructionHandle from ) {
265 return search(pattern, from, null);
266 }
267
268
269 /**
270 * Start search beginning from the start of the given instruction list. Check
271 * found matches with the constraint object.
272 *
273 * @param pattern
274 * the instruction pattern to search for, case is ignored
275 * @param constraint
276 * constraints to be checked on matching code
277 * @return instruction handle or `null' if the match failed
278 */
279 public final Iterator<InstructionHandle[]> search( final String pattern,
280 final CodeConstraint constraint ) {
281 return search(pattern, il.getStart(), constraint);
282 }
283
284
285 /**
286 * Convert opcode number to char.
287 */
288 private static char makeChar( final short opcode ) {
289 return (char) (opcode + OFFSET);
290 }
291
292
293 /**
294 * @return the inquired instruction list
295 */
296 public final InstructionList getInstructionList() {
297 return il;
298 }
299
300 /**
301 * Code patterns found may be checked using an additional user-defined
302 * constraint object whether they really match the needed criterion. I.e.,
303 * check constraints that can not expressed with regular expressions.
304 *
305 */
306 public interface CodeConstraint {
307
308 /**
309 * @param match
310 * array of instructions matching the requested pattern
311 * @return true if the matched area is really useful
312 */
313 boolean checkCode( InstructionHandle[] match );
314 }
315
316 // Initialize pattern map
317 static {
318 map.put("arithmeticinstruction","(irem|lrem|iand|ior|ineg|isub|lneg|fneg|fmul|ldiv|fadd|lxor|frem|idiv|land|ixor|ishr|fsub|lshl|fdiv|iadd|lor|dmul|lsub|ishl|imul|lmul|lushr|dneg|iushr|lshr|ddiv|drem|dadd|ladd|dsub)");
319 map.put("invokeinstruction", "(invokevirtual|invokeinterface|invokestatic|invokespecial|invokedynamic)");
320 map.put("arrayinstruction", "(baload|aastore|saload|caload|fastore|lastore|iaload|castore|iastore|aaload|bastore|sastore|faload|laload|daload|dastore)");
321 map.put("gotoinstruction", "(goto|goto_w)");
322 map.put("conversioninstruction", "(d2l|l2d|i2s|d2i|l2i|i2b|l2f|d2f|f2i|i2d|i2l|f2d|i2c|f2l|i2f)");
323 map.put("localvariableinstruction","(fstore|iinc|lload|dstore|dload|iload|aload|astore|istore|fload|lstore)");
324 map.put("loadinstruction", "(fload|dload|lload|iload|aload)");
325 map.put("fieldinstruction", "(getfield|putstatic|getstatic|putfield)");
326 map.put("cpinstruction", "(ldc2_w|invokeinterface|invokedynamic|multianewarray|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|ldc_w|invokestatic|invokevirtual|putfield|ldc|new|anewarray)");
327 map.put("stackinstruction", "(dup2|swap|dup2_x2|pop|pop2|dup|dup2_x1|dup_x2|dup_x1)");
328 map.put("branchinstruction", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
329 map.put("returninstruction", "(lreturn|ireturn|freturn|dreturn|areturn|return)");
330 map.put("storeinstruction", "(istore|fstore|dstore|astore|lstore)");
331 map.put("select", "(tableswitch|lookupswitch)");
332 map.put("ifinstruction", "(ifeq|ifgt|if_icmpne|if_icmpeq|ifge|ifnull|ifne|if_icmple|if_icmpge|if_acmpeq|if_icmplt|if_acmpne|ifnonnull|iflt|if_icmpgt|ifle)");
333 map.put("jsrinstruction", "(jsr|jsr_w)");
334 map.put("variablelengthinstruction", "(tableswitch|jsr|goto|lookupswitch)");
335 map.put("unconditionalbranch", "(goto|jsr|jsr_w|athrow|goto_w)");
336 map.put("constantpushinstruction", "(dconst|bipush|sipush|fconst|iconst|lconst)");
337 map.put("typedinstruction", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dastore|ret|f2d|f2i|drem|iinc|i2c|checkcast|frem|lreturn|astore|lushr|daload|dneg|fastore|istore|lshl|ldiv|lstore|areturn|ishr|ldc_w|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|faload|sipush|iushr|caload|instanceof|invokespecial|putfield|fmul|ireturn|laload|d2f|lneg|ixor|i2l|fdiv|lastore|multianewarray|i2b|getstatic|i2d|putstatic|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|freturn|ldc|aconst_null|castore|lmul|ldc2_w|dadd|iconst|f2l|ddiv|dstore|land|jsr|anewarray|dmul|bipush|dsub|sastore|d2i|i2s|lshr|iadd|l2i|lload|bastore|fstore|fneg|iload|fadd|baload|fconst|ior|ineg|dreturn|l2f|lconst|getfield|invokevirtual|invokestatic|iastore)");
338 map.put("popinstruction", "(fstore|dstore|pop|pop2|astore|putstatic|istore|lstore)");
339 map.put("allocationinstruction", "(multianewarray|new|anewarray|newarray)");
340 map.put("indexedinstruction", "(lload|lstore|fload|ldc2_w|invokeinterface|invokedynamic|multianewarray|astore|dload|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|dstore|istore|iinc|ldc_w|ret|fstore|invokestatic|iload|putfield|invokevirtual|ldc|new|aload|anewarray)");
341 map.put("pushinstruction", "(dup|lload|dup2|bipush|fload|ldc2_w|sipush|lconst|fconst|dload|getstatic|ldc_w|aconst_null|dconst|iload|ldc|iconst|aload)");
342 map.put("stackproducer", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dup|f2d|f2i|drem|i2c|checkcast|frem|lushr|daload|dneg|lshl|ldiv|ishr|ldc_w|invokeinterface|invokedynamic|lxor|ishl|l2d|i2f|faload|sipush|iushr|caload|instanceof|invokespecial|fmul|laload|d2f|lneg|ixor|i2l|fdiv|getstatic|i2b|swap|i2d|dup2|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|ldc|arraylength|aconst_null|tableswitch|lmul|ldc2_w|iconst|dadd|f2l|ddiv|land|jsr|anewarray|dmul|bipush|dsub|d2i|newarray|i2s|lshr|iadd|lload|l2i|fneg|iload|fadd|baload|fconst|lookupswitch|ior|ineg|lconst|l2f|getfield|invokevirtual|invokestatic)");
343 map.put("stackconsumer", "(imul|lsub|lor|iflt|fcmpg|if_icmpgt|iand|ifeq|if_icmplt|lrem|ifnonnull|idiv|d2l|isub|dcmpg|dastore|if_icmpeq|f2d|f2i|drem|i2c|checkcast|frem|lreturn|astore|lushr|pop2|monitorexit|dneg|fastore|istore|lshl|ldiv|lstore|areturn|if_icmpge|ishr|monitorenter|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|iushr|instanceof|invokespecial|fmul|ireturn|d2f|lneg|ixor|pop|i2l|ifnull|fdiv|lastore|i2b|if_acmpeq|ifge|swap|i2d|putstatic|fcmpl|ladd|irem|dcmpl|fsub|freturn|ifgt|castore|lmul|dadd|f2l|ddiv|dstore|land|if_icmpne|if_acmpne|dmul|dsub|sastore|ifle|d2i|i2s|lshr|iadd|l2i|bastore|fstore|fneg|fadd|ior|ineg|ifne|dreturn|l2f|if_icmple|getfield|invokevirtual|invokestatic|iastore)");
344 map.put("exceptionthrower","(irem|lrem|laload|putstatic|baload|dastore|areturn|getstatic|ldiv|anewarray|iastore|castore|idiv|saload|lastore|fastore|putfield|lreturn|caload|getfield|return|aastore|freturn|newarray|instanceof|multianewarray|athrow|faload|iaload|aaload|dreturn|monitorenter|checkcast|bastore|arraylength|new|invokevirtual|sastore|ldc_w|ireturn|invokespecial|monitorexit|invokeinterface|invokedynamic|ldc|invokestatic|daload)");
345 map.put("loadclass", "(multianewarray|invokeinterface|invokedynamic|instanceof|invokespecial|putfield|checkcast|putstatic|invokevirtual|new|getstatic|invokestatic|getfield|anewarray)");
346 map.put("instructiontargeter", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
347 // Some aliases
348 map.put("if_icmp", "(if_icmpne|if_icmpeq|if_icmple|if_icmpge|if_icmplt|if_icmpgt)");
349 map.put("if_acmp", "(if_acmpeq|if_acmpne)");
350 map.put("if", "(ifeq|ifne|iflt|ifge|ifgt|ifle)");
351 // Precompile some aliases first
352 map.put("iconst", precompile(Const.ICONST_0, Const.ICONST_5, Const.ICONST_M1));
353 map.put("lconst", new String(new char[] { '(', makeChar(Const.LCONST_0), '|', makeChar(Const.LCONST_1), ')' }));
354 map.put("dconst", new String(new char[] { '(', makeChar(Const.DCONST_0), '|', makeChar(Const.DCONST_1), ')' }));
355 map.put("fconst", new String(new char[] { '(', makeChar(Const.FCONST_0), '|', makeChar(Const.FCONST_1), '|', makeChar(Const.FCONST_2), ')' }));
356 map.put("lload", precompile(Const.LLOAD_0, Const.LLOAD_3, Const.LLOAD));
357 map.put("iload", precompile(Const.ILOAD_0, Const.ILOAD_3, Const.ILOAD));
358 map.put("dload", precompile(Const.DLOAD_0, Const.DLOAD_3, Const.DLOAD));
359 map.put("fload", precompile(Const.FLOAD_0, Const.FLOAD_3, Const.FLOAD));
360 map.put("aload", precompile(Const.ALOAD_0, Const.ALOAD_3, Const.ALOAD));
361 map.put("lstore", precompile(Const.LSTORE_0, Const.LSTORE_3, Const.LSTORE));
362 map.put("istore", precompile(Const.ISTORE_0, Const.ISTORE_3, Const.ISTORE));
363 map.put("dstore", precompile(Const.DSTORE_0, Const.DSTORE_3, Const.DSTORE));
364 map.put("fstore", precompile(Const.FSTORE_0, Const.FSTORE_3, Const.FSTORE));
365 map.put("astore", precompile(Const.ASTORE_0, Const.ASTORE_3, Const.ASTORE));
366 // Compile strings
367 for (final Map.Entry<String, String> entry : map.entrySet()) {
368 final String key = entry.getKey();
369 final String value = entry.getValue();
370 final char ch = value.charAt(1); // Omit already precompiled patterns
371 if (ch < OFFSET) {
372 map.put(key, compilePattern(value)); // precompile all patterns
373 }
374 }
375 // Add instruction alias to match anything
376 final StringBuilder buf = new StringBuilder("(");
377 for (short i = 0; i < NO_OPCODES; i++) {
378 if (Const.getNoOfOperands(i) != Const.UNDEFINED) { // Not an invalid opcode
379 buf.append(makeChar(i));
380 if (i < NO_OPCODES - 1) {
381 buf.append('|');
382 }
383 }
384 }
385 buf.append(')');
386 map.put("instruction", buf.toString());
387 }
388
389
390 private static String precompile( final short from, final short to, final short extra ) {
391 final StringBuilder buf = new StringBuilder("(");
392 for (short i = from; i <= to; i++) {
393 buf.append(makeChar(i));
394 buf.append('|');
395 }
396 buf.append(makeChar(extra));
397 buf.append(")");
398 return buf.toString();
399 }
400
401
402 /*
403 * Internal debugging routines.
404 */
405 // private static final String pattern2string( String pattern ) {
406 // return pattern2string(pattern, true);
407 // }
408
409
410 // private static final String pattern2string( String pattern, boolean make_string ) {
411 // StringBuffer buf = new StringBuffer();
412 // for (int i = 0; i < pattern.length(); i++) {
413 // char ch = pattern.charAt(i);
414 // if (ch >= OFFSET) {
415 // if (make_string) {
416 // buf.append(Constants.getOpcodeName(ch - OFFSET));
417 // } else {
418 // buf.append((ch - OFFSET));
419 // }
420 // } else {
421 // buf.append(ch);
422 // }
423 // }
424 // return buf.toString();
425 // }
426 }
--- EOF ---