1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * @LastModified: Oct 2017
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 package com.sun.org.apache.bcel.internal.util;
  23 
  24 import com.sun.org.apache.bcel.internal.Const;
  25 import com.sun.org.apache.bcel.internal.generic.ClassGenException;
  26 import com.sun.org.apache.bcel.internal.generic.InstructionHandle;
  27 import com.sun.org.apache.bcel.internal.generic.InstructionList;
  28 import java.util.ArrayList;
  29 import java.util.HashMap;
  30 import java.util.Iterator;
  31 import java.util.List;
  32 import java.util.Locale;
  33 import java.util.Map;
  34 import java.util.regex.Matcher;
  35 import java.util.regex.Pattern;
  36 
  37 /**
  38  * InstructionFinder is a tool to search for given instructions patterns, i.e.,
  39  * match sequences of instructions in an instruction list via regular
  40  * expressions. This can be used, e.g., in order to implement a peep hole
  41  * optimizer that looks for code patterns and replaces them with faster
  42  * equivalents.
  43  *
  44  * <p>
  45  * This class internally uses the java.util.regex
  46  * package to search for regular expressions.
  47  *
  48  * A typical application would look like this:
  49  *
  50  * <pre>
  51  *
  52  *
  53  *   InstructionFinder f   = new InstructionFinder(il);
  54  *   String            pat = &quot;IfInstruction ICONST_0 GOTO ICONST_1 NOP (IFEQ|IFNE)&quot;;
  55  *
  56  *   for (Iterator i = f.search(pat, constraint); i.hasNext(); ) {
  57  *   InstructionHandle[] match = (InstructionHandle[])i.next();
  58  *   ...
  59  *   il.delete(match[1], match[5]);
  60  *   ...
  61  *   }
  62  *
  63  *
  64  * </pre>
  65  *
  66  * @version $Id: InstructionFinder.java 1749603 2016-06-21 20:50:19Z ggregory $
  67  * @see com.sun.org.apache.bcel.internal.generic.Instruction
  68  * @see InstructionList
  69  */
  70 public class InstructionFinder {
  71 
  72     private static final int OFFSET = 32767; // char + OFFSET is outside of LATIN-1
  73     private static final int NO_OPCODES = 256; // Potential number, some are not used
  74     private static final Map<String, String> map = new HashMap<>();
  75     private final InstructionList il;
  76     private String il_string; // instruction list as string
  77     private InstructionHandle[] handles; // map instruction
  78 
  79 
  80     // list to array
  81     /**
  82      * @param il
  83      *          instruction list to search for given patterns
  84      */
  85     public InstructionFinder(final InstructionList il) {
  86         this.il = il;
  87         reread();
  88     }
  89 
  90 
  91     /**
  92      * Reread the instruction list, e.g., after you've altered the list upon a
  93      * match.
  94      */
  95     public final void reread() {
  96         final int size = il.getLength();
  97         final char[] buf = new char[size]; // Create a string with length equal to il length
  98         handles = il.getInstructionHandles();
  99         // Map opcodes to characters
 100         for (int i = 0; i < size; i++) {
 101             buf[i] = makeChar(handles[i].getInstruction().getOpcode());
 102         }
 103         il_string = new String(buf);
 104     }
 105 
 106 
 107     /**
 108      * Map symbolic instruction names like "getfield" to a single character.
 109      *
 110      * @param pattern
 111      *          instruction pattern in lower case
 112      * @return encoded string for a pattern such as "BranchInstruction".
 113      */
 114     private static String mapName( final String pattern ) {
 115         final String result = map.get(pattern);
 116         if (result != null) {
 117             return result;
 118         }
 119         for (short i = 0; i < NO_OPCODES; i++) {
 120             if (pattern.equals(Const.getOpcodeName(i))) {
 121                 return "" + makeChar(i);
 122             }
 123         }
 124         throw new RuntimeException("Instruction unknown: " + pattern);
 125     }
 126 
 127 
 128     /**
 129      * Replace symbolic names of instructions with the appropiate character and
 130      * remove all white space from string. Meta characters such as +, * are
 131      * ignored.
 132      *
 133      * @param pattern
 134      *          The pattern to compile
 135      * @return translated regular expression string
 136      */
 137     private static String compilePattern( final String pattern ) {
 138         //Bug: BCEL-77 - Instructions are assumed to be english, to avoid odd Locale issues
 139         final String lower = pattern.toLowerCase(Locale.ENGLISH);
 140         final StringBuilder buf = new StringBuilder();
 141         final int size = pattern.length();
 142         for (int i = 0; i < size; i++) {
 143             char ch = lower.charAt(i);
 144             if (Character.isLetterOrDigit(ch)) {
 145                 final StringBuilder name = new StringBuilder();
 146                 while ((Character.isLetterOrDigit(ch) || ch == '_') && i < size) {
 147                     name.append(ch);
 148                     if (++i < size) {
 149                         ch = lower.charAt(i);
 150                     } else {
 151                         break;
 152                     }
 153                 }
 154                 i--;
 155                 buf.append(mapName(name.toString()));
 156             } else if (!Character.isWhitespace(ch)) {
 157                 buf.append(ch);
 158             }
 159         }
 160         return buf.toString();
 161     }
 162 
 163 
 164     /**
 165      * @return the matched piece of code as an array of instruction (handles)
 166      */
 167     private InstructionHandle[] getMatch( final int matched_from, final int match_length ) {
 168         final InstructionHandle[] match = new InstructionHandle[match_length];
 169         System.arraycopy(handles, matched_from, match, 0, match_length);
 170         return match;
 171     }
 172 
 173 
 174     /**
 175      * Search for the given pattern in the instruction list. You can search for
 176      * any valid opcode via its symbolic name, e.g. "istore". You can also use a
 177      * super class or an interface name to match a whole set of instructions, e.g.
 178      * "BranchInstruction" or "LoadInstruction". "istore" is also an alias for all
 179      * "istore_x" instructions. Additional aliases are "if" for "ifxx", "if_icmp"
 180      * for "if_icmpxx", "if_acmp" for "if_acmpxx".
 181      *
 182      * Consecutive instruction names must be separated by white space which will
 183      * be removed during the compilation of the pattern.
 184      *
 185      * For the rest the usual pattern matching rules for regular expressions
 186      * apply.
 187      * <P>
 188      * Example pattern:
 189      *
 190      * <pre>
 191      * search(&quot;BranchInstruction NOP ((IfInstruction|GOTO)+ ISTORE Instruction)*&quot;);
 192      * </pre>
 193      *
 194      * <p>
 195      * If you alter the instruction list upon a match such that other matching
 196      * areas are affected, you should call reread() to update the finder and call
 197      * search() again, because the matches are cached.
 198      *
 199      * @param pattern
 200      *          the instruction pattern to search for, where case is ignored
 201      * @param from
 202      *          where to start the search in the instruction list
 203      * @param constraint
 204      *          optional CodeConstraint to check the found code pattern for
 205      *          user-defined constraints
 206      * @return iterator of matches where e.nextElement() returns an array of
 207      *         instruction handles describing the matched area
 208      */
 209     public final Iterator<InstructionHandle[]> search( final String pattern,
 210             final InstructionHandle from, final CodeConstraint constraint ) {
 211         final String search = compilePattern(pattern);
 212         int start = -1;
 213         for (int i = 0; i < handles.length; i++) {
 214             if (handles[i] == from) {
 215                 start = i; // Where to start search from (index)
 216                 break;
 217             }
 218         }
 219         if (start == -1) {
 220             throw new ClassGenException("Instruction handle " + from
 221                     + " not found in instruction list.");
 222         }
 223         final Pattern regex = Pattern.compile(search);
 224         final List<InstructionHandle[]> matches = new ArrayList<>();
 225         final Matcher matcher = regex.matcher(il_string);
 226         while (start < il_string.length() && matcher.find(start)) {
 227             final int startExpr = matcher.start();
 228             final int endExpr = matcher.end();
 229             final int lenExpr = endExpr - startExpr;
 230             final InstructionHandle[] match = getMatch(startExpr, lenExpr);
 231             if ((constraint == null) || constraint.checkCode(match)) {
 232                 matches.add(match);
 233             }
 234             start = endExpr;
 235         }
 236         return matches.iterator();
 237     }
 238 
 239 
 240     /**
 241      * Start search beginning from the start of the given instruction list.
 242      *
 243      * @param pattern
 244      *          the instruction pattern to search for, where case is ignored
 245      * @return iterator of matches where e.nextElement() returns an array of
 246      *         instruction handles describing the matched area
 247      */
 248     public final Iterator<InstructionHandle[]> search( final String pattern ) {
 249         return search(pattern, il.getStart(), null);
 250     }
 251 
 252 
 253     /**
 254      * Start search beginning from `from'.
 255      *
 256      * @param pattern
 257      *          the instruction pattern to search for, where case is ignored
 258      * @param from
 259      *          where to start the search in the instruction list
 260      * @return iterator of matches where e.nextElement() returns an array of
 261      *         instruction handles describing the matched area
 262      */
 263     public final Iterator<InstructionHandle[]> search( final String pattern,
 264             final InstructionHandle from ) {
 265         return search(pattern, from, null);
 266     }
 267 
 268 
 269     /**
 270      * Start search beginning from the start of the given instruction list. Check
 271      * found matches with the constraint object.
 272      *
 273      * @param pattern
 274      *          the instruction pattern to search for, case is ignored
 275      * @param constraint
 276      *          constraints to be checked on matching code
 277      * @return instruction handle or `null' if the match failed
 278      */
 279     public final Iterator<InstructionHandle[]> search( final String pattern,
 280             final CodeConstraint constraint ) {
 281         return search(pattern, il.getStart(), constraint);
 282     }
 283 
 284 
 285     /**
 286      * Convert opcode number to char.
 287      */
 288     private static char makeChar( final short opcode ) {
 289         return (char) (opcode + OFFSET);
 290     }
 291 
 292 
 293     /**
 294      * @return the inquired instruction list
 295      */
 296     public final InstructionList getInstructionList() {
 297         return il;
 298     }
 299 
 300     /**
 301      * Code patterns found may be checked using an additional user-defined
 302      * constraint object whether they really match the needed criterion. I.e.,
 303      * check constraints that can not expressed with regular expressions.
 304      *
 305      */
 306     public interface CodeConstraint {
 307 
 308         /**
 309          * @param match
 310          *          array of instructions matching the requested pattern
 311          * @return true if the matched area is really useful
 312          */
 313         boolean checkCode( InstructionHandle[] match );
 314     }
 315 
 316     // Initialize pattern map
 317     static {
 318         map.put("arithmeticinstruction","(irem|lrem|iand|ior|ineg|isub|lneg|fneg|fmul|ldiv|fadd|lxor|frem|idiv|land|ixor|ishr|fsub|lshl|fdiv|iadd|lor|dmul|lsub|ishl|imul|lmul|lushr|dneg|iushr|lshr|ddiv|drem|dadd|ladd|dsub)");
 319         map.put("invokeinstruction", "(invokevirtual|invokeinterface|invokestatic|invokespecial|invokedynamic)");
 320         map.put("arrayinstruction", "(baload|aastore|saload|caload|fastore|lastore|iaload|castore|iastore|aaload|bastore|sastore|faload|laload|daload|dastore)");
 321         map.put("gotoinstruction", "(goto|goto_w)");
 322         map.put("conversioninstruction", "(d2l|l2d|i2s|d2i|l2i|i2b|l2f|d2f|f2i|i2d|i2l|f2d|i2c|f2l|i2f)");
 323         map.put("localvariableinstruction","(fstore|iinc|lload|dstore|dload|iload|aload|astore|istore|fload|lstore)");
 324         map.put("loadinstruction", "(fload|dload|lload|iload|aload)");
 325         map.put("fieldinstruction", "(getfield|putstatic|getstatic|putfield)");
 326         map.put("cpinstruction", "(ldc2_w|invokeinterface|invokedynamic|multianewarray|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|ldc_w|invokestatic|invokevirtual|putfield|ldc|new|anewarray)");
 327         map.put("stackinstruction", "(dup2|swap|dup2_x2|pop|pop2|dup|dup2_x1|dup_x2|dup_x1)");
 328         map.put("branchinstruction", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
 329         map.put("returninstruction", "(lreturn|ireturn|freturn|dreturn|areturn|return)");
 330         map.put("storeinstruction", "(istore|fstore|dstore|astore|lstore)");
 331         map.put("select", "(tableswitch|lookupswitch)");
 332         map.put("ifinstruction", "(ifeq|ifgt|if_icmpne|if_icmpeq|ifge|ifnull|ifne|if_icmple|if_icmpge|if_acmpeq|if_icmplt|if_acmpne|ifnonnull|iflt|if_icmpgt|ifle)");
 333         map.put("jsrinstruction", "(jsr|jsr_w)");
 334         map.put("variablelengthinstruction", "(tableswitch|jsr|goto|lookupswitch)");
 335         map.put("unconditionalbranch", "(goto|jsr|jsr_w|athrow|goto_w)");
 336         map.put("constantpushinstruction", "(dconst|bipush|sipush|fconst|iconst|lconst)");
 337         map.put("typedinstruction", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dastore|ret|f2d|f2i|drem|iinc|i2c|checkcast|frem|lreturn|astore|lushr|daload|dneg|fastore|istore|lshl|ldiv|lstore|areturn|ishr|ldc_w|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|faload|sipush|iushr|caload|instanceof|invokespecial|putfield|fmul|ireturn|laload|d2f|lneg|ixor|i2l|fdiv|lastore|multianewarray|i2b|getstatic|i2d|putstatic|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|freturn|ldc|aconst_null|castore|lmul|ldc2_w|dadd|iconst|f2l|ddiv|dstore|land|jsr|anewarray|dmul|bipush|dsub|sastore|d2i|i2s|lshr|iadd|l2i|lload|bastore|fstore|fneg|iload|fadd|baload|fconst|ior|ineg|dreturn|l2f|lconst|getfield|invokevirtual|invokestatic|iastore)");
 338         map.put("popinstruction", "(fstore|dstore|pop|pop2|astore|putstatic|istore|lstore)");
 339         map.put("allocationinstruction", "(multianewarray|new|anewarray|newarray)");
 340         map.put("indexedinstruction", "(lload|lstore|fload|ldc2_w|invokeinterface|invokedynamic|multianewarray|astore|dload|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|dstore|istore|iinc|ldc_w|ret|fstore|invokestatic|iload|putfield|invokevirtual|ldc|new|aload|anewarray)");
 341         map.put("pushinstruction", "(dup|lload|dup2|bipush|fload|ldc2_w|sipush|lconst|fconst|dload|getstatic|ldc_w|aconst_null|dconst|iload|ldc|iconst|aload)");
 342         map.put("stackproducer", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dup|f2d|f2i|drem|i2c|checkcast|frem|lushr|daload|dneg|lshl|ldiv|ishr|ldc_w|invokeinterface|invokedynamic|lxor|ishl|l2d|i2f|faload|sipush|iushr|caload|instanceof|invokespecial|fmul|laload|d2f|lneg|ixor|i2l|fdiv|getstatic|i2b|swap|i2d|dup2|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|ldc|arraylength|aconst_null|tableswitch|lmul|ldc2_w|iconst|dadd|f2l|ddiv|land|jsr|anewarray|dmul|bipush|dsub|d2i|newarray|i2s|lshr|iadd|lload|l2i|fneg|iload|fadd|baload|fconst|lookupswitch|ior|ineg|lconst|l2f|getfield|invokevirtual|invokestatic)");
 343         map.put("stackconsumer", "(imul|lsub|lor|iflt|fcmpg|if_icmpgt|iand|ifeq|if_icmplt|lrem|ifnonnull|idiv|d2l|isub|dcmpg|dastore|if_icmpeq|f2d|f2i|drem|i2c|checkcast|frem|lreturn|astore|lushr|pop2|monitorexit|dneg|fastore|istore|lshl|ldiv|lstore|areturn|if_icmpge|ishr|monitorenter|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|iushr|instanceof|invokespecial|fmul|ireturn|d2f|lneg|ixor|pop|i2l|ifnull|fdiv|lastore|i2b|if_acmpeq|ifge|swap|i2d|putstatic|fcmpl|ladd|irem|dcmpl|fsub|freturn|ifgt|castore|lmul|dadd|f2l|ddiv|dstore|land|if_icmpne|if_acmpne|dmul|dsub|sastore|ifle|d2i|i2s|lshr|iadd|l2i|bastore|fstore|fneg|fadd|ior|ineg|ifne|dreturn|l2f|if_icmple|getfield|invokevirtual|invokestatic|iastore)");
 344         map.put("exceptionthrower","(irem|lrem|laload|putstatic|baload|dastore|areturn|getstatic|ldiv|anewarray|iastore|castore|idiv|saload|lastore|fastore|putfield|lreturn|caload|getfield|return|aastore|freturn|newarray|instanceof|multianewarray|athrow|faload|iaload|aaload|dreturn|monitorenter|checkcast|bastore|arraylength|new|invokevirtual|sastore|ldc_w|ireturn|invokespecial|monitorexit|invokeinterface|invokedynamic|ldc|invokestatic|daload)");
 345         map.put("loadclass", "(multianewarray|invokeinterface|invokedynamic|instanceof|invokespecial|putfield|checkcast|putstatic|invokevirtual|new|getstatic|invokestatic|getfield|anewarray)");
 346         map.put("instructiontargeter", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
 347         // Some aliases
 348         map.put("if_icmp", "(if_icmpne|if_icmpeq|if_icmple|if_icmpge|if_icmplt|if_icmpgt)");
 349         map.put("if_acmp", "(if_acmpeq|if_acmpne)");
 350         map.put("if", "(ifeq|ifne|iflt|ifge|ifgt|ifle)");
 351         // Precompile some aliases first
 352         map.put("iconst", precompile(Const.ICONST_0, Const.ICONST_5, Const.ICONST_M1));
 353         map.put("lconst", new String(new char[] { '(', makeChar(Const.LCONST_0), '|', makeChar(Const.LCONST_1), ')' }));
 354         map.put("dconst", new String(new char[] { '(', makeChar(Const.DCONST_0), '|', makeChar(Const.DCONST_1), ')' }));
 355         map.put("fconst", new String(new char[] { '(', makeChar(Const.FCONST_0), '|', makeChar(Const.FCONST_1), '|', makeChar(Const.FCONST_2), ')' }));
 356         map.put("lload", precompile(Const.LLOAD_0, Const.LLOAD_3, Const.LLOAD));
 357         map.put("iload", precompile(Const.ILOAD_0, Const.ILOAD_3, Const.ILOAD));
 358         map.put("dload", precompile(Const.DLOAD_0, Const.DLOAD_3, Const.DLOAD));
 359         map.put("fload", precompile(Const.FLOAD_0, Const.FLOAD_3, Const.FLOAD));
 360         map.put("aload", precompile(Const.ALOAD_0, Const.ALOAD_3, Const.ALOAD));
 361         map.put("lstore", precompile(Const.LSTORE_0, Const.LSTORE_3, Const.LSTORE));
 362         map.put("istore", precompile(Const.ISTORE_0, Const.ISTORE_3, Const.ISTORE));
 363         map.put("dstore", precompile(Const.DSTORE_0, Const.DSTORE_3, Const.DSTORE));
 364         map.put("fstore", precompile(Const.FSTORE_0, Const.FSTORE_3, Const.FSTORE));
 365         map.put("astore", precompile(Const.ASTORE_0, Const.ASTORE_3, Const.ASTORE));
 366         // Compile strings
 367         for (final Map.Entry<String, String> entry : map.entrySet()) {
 368             final String key = entry.getKey();
 369             final String value = entry.getValue();
 370             final char ch = value.charAt(1); // Omit already precompiled patterns
 371             if (ch < OFFSET) {
 372                 map.put(key, compilePattern(value)); // precompile all patterns
 373             }
 374         }
 375         // Add instruction alias to match anything
 376         final StringBuilder buf = new StringBuilder("(");
 377         for (short i = 0; i < NO_OPCODES; i++) {
 378             if (Const.getNoOfOperands(i) != Const.UNDEFINED) { // Not an invalid opcode
 379                 buf.append(makeChar(i));
 380                 if (i < NO_OPCODES - 1) {
 381                     buf.append('|');
 382                 }
 383             }
 384         }
 385         buf.append(')');
 386         map.put("instruction", buf.toString());
 387     }
 388 
 389 
 390     private static String precompile( final short from, final short to, final short extra ) {
 391         final StringBuilder buf = new StringBuilder("(");
 392         for (short i = from; i <= to; i++) {
 393             buf.append(makeChar(i));
 394             buf.append('|');
 395         }
 396         buf.append(makeChar(extra));
 397         buf.append(")");
 398         return buf.toString();
 399     }
 400 
 401 
 402     /*
 403      * Internal debugging routines.
 404      */
 405 //    private static final String pattern2string( String pattern ) {
 406 //        return pattern2string(pattern, true);
 407 //    }
 408 
 409 
 410 //    private static final String pattern2string( String pattern, boolean make_string ) {
 411 //        StringBuffer buf = new StringBuffer();
 412 //        for (int i = 0; i < pattern.length(); i++) {
 413 //            char ch = pattern.charAt(i);
 414 //            if (ch >= OFFSET) {
 415 //                if (make_string) {
 416 //                    buf.append(Constants.getOpcodeName(ch - OFFSET));
 417 //                } else {
 418 //                    buf.append((ch - OFFSET));
 419 //                }
 420 //            } else {
 421 //                buf.append(ch);
 422 //            }
 423 //        }
 424 //        return buf.toString();
 425 //    }
 426 }