< prev index next >

src/hotspot/share/opto/compile.cpp

Print this page
rev 54960 : 8213084: Rework and enhance Print[Opto]Assembly output
Reviewed-by: kvn, thartmann
   1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


 696 #endif
 697 {
 698   C = this;
 699 #ifndef PRODUCT
 700   if (_printer != NULL) {
 701     _printer->set_compile(this);
 702   }
 703 #endif
 704   CompileWrapper cw(this);
 705 
 706   if (CITimeVerbose) {
 707     tty->print(" ");
 708     target->holder()->name()->print();
 709     tty->print(".");
 710     target->print_short_name();
 711     tty->print("  ");
 712   }
 713   TraceTime t1("Total compilation time", &_t_totalCompilation, CITime, CITimeVerbose);
 714   TraceTime t2(NULL, &_t_methodCompilation, CITime, false);
 715 
 716 #ifndef PRODUCT
 717   bool print_opto_assembly = directive->PrintOptoAssemblyOption;
 718   if (!print_opto_assembly) {



 719     bool print_assembly = directive->PrintAssemblyOption;
 720     if (print_assembly && !Disassembler::can_decode()) {
 721       tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");
 722       print_opto_assembly = true;
 723     }
 724   }
 725   set_print_assembly(print_opto_assembly);
 726   set_parsed_irreducible_loop(false);
 727 
 728   if (directive->ReplayInlineOption) {
 729     _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
 730   }
 731 #endif
 732   set_print_inlining(directive->PrintInliningOption || PrintOptoInlining);
 733   set_print_intrinsics(directive->PrintIntrinsicsOption);
 734   set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
 735 
 736   if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
 737     // Make sure the method being compiled gets its own MDO,
 738     // so we can at least track the decompile_count().
 739     // Need MDO to record RTM code generation state.
 740     method()->ensure_method_data();
 741   }
 742 
 743   Init(::AliasLevel);
 744 
 745 


1010     _print_inlining_output(NULL),
1011     _replay_inline_data(NULL),
1012     _java_calls(0),
1013     _inner_loops(0),
1014     _interpreter_frame_size(0),
1015     _node_bundling_limit(0),
1016     _node_bundling_base(NULL),
1017     _code_buffer("Compile::Fill_buffer"),
1018 #ifndef PRODUCT
1019     _in_dump_cnt(0),
1020 #endif
1021     _allowed_reasons(0) {
1022   C = this;
1023 
1024   TraceTime t1(NULL, &_t_totalCompilation, CITime, false);
1025   TraceTime t2(NULL, &_t_stubCompilation, CITime, false);
1026 
1027 #ifndef PRODUCT
1028   set_print_assembly(PrintFrameConverterAssembly);
1029   set_parsed_irreducible_loop(false);


1030 #endif
1031   set_has_irreducible_loop(false); // no loops
1032 
1033   CompileWrapper cw(this);
1034   Init(/*AliasLevel=*/ 0);
1035   init_tf((*generator)());
1036 
1037   {
1038     // The following is a dummy for the sake of GraphKit::gen_stub
1039     Unique_Node_List for_igvn(comp_arena());
1040     set_for_igvn(&for_igvn);  // not used, but some GraphKit guys push on this
1041     PhaseGVN gvn(Thread::current()->resource_area(),255);
1042     set_initial_gvn(&gvn);    // not significant, but GraphKit guys use it pervasively
1043     gvn.transform_no_reclaim(top());
1044 
1045     GraphKit kit;
1046     kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);
1047   }
1048 
1049   NOT_PRODUCT( verify_graph_edges(); )


2535     TracePhase tp("postalloc_expand", &timers[_t_postalloc_expand]);
2536     cfg.postalloc_expand(_regalloc);
2537   }
2538 
2539   // Convert Nodes to instruction bits in a buffer
2540   {
2541     TraceTime tp("output", &timers[_t_output], CITime);
2542     Output();
2543   }
2544 
2545   print_method(PHASE_FINAL_CODE);
2546 
2547   // He's dead, Jim.
2548   _cfg     = (PhaseCFG*)((intptr_t)0xdeadbeef);
2549   _regalloc = (PhaseChaitin*)((intptr_t)0xdeadbeef);
2550 }
2551 
2552 
2553 //------------------------------dump_asm---------------------------------------
2554 // Dump formatted assembly
2555 #ifndef PRODUCT
2556 void Compile::dump_asm(int *pcs, uint pc_limit) {













2557   bool cut_short = false;
2558   tty->print_cr("#");
2559   tty->print("#  ");  _tf->dump();  tty->cr();
2560   tty->print_cr("#");
2561 
2562   // For all blocks
2563   int pc = 0x0;                 // Program counter
2564   char starts_bundle = ' ';
2565   _regalloc->dump_frame();
2566 
2567   Node *n = NULL;
2568   for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
2569     if (VMThread::should_terminate()) {
2570       cut_short = true;
2571       break;
2572     }
2573     Block* block = _cfg->get_block(i);
2574     if (block->is_connector() && !Verbose) {
2575       continue;
2576     }
2577     n = block->head();
2578     if (pcs && n->_idx < pc_limit) {
2579       tty->print("%3.3x   ", pcs[n->_idx]);
2580     } else {
2581       tty->print("      ");
2582     }
2583     block->dump_head(_cfg);

2584     if (block->is_connector()) {
2585       tty->print_cr("        # Empty connector block");

2586     } else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
2587       tty->print_cr("        # Block is sole successor of call");

2588     }
2589 
2590     // For all instructions
2591     Node *delay = NULL;
2592     for (uint j = 0; j < block->number_of_nodes(); j++) {
2593       if (VMThread::should_terminate()) {
2594         cut_short = true;
2595         break;
2596       }
2597       n = block->get_node(j);
2598       if (valid_bundle_info(n)) {
2599         Bundle* bundle = node_bundling(n);
2600         if (bundle->used_in_unconditional_delay()) {
2601           delay = n;
2602           continue;
2603         }
2604         if (bundle->starts_bundle()) {
2605           starts_bundle = '+';
2606         }
2607       }
2608 
2609       if (WizardMode) {
2610         n->dump();
2611       }
2612 
2613       if( !n->is_Region() &&    // Dont print in the Assembly
2614           !n->is_Phi() &&       // a few noisely useless nodes
2615           !n->is_Proj() &&
2616           !n->is_MachTemp() &&
2617           !n->is_SafePointScalarObject() &&
2618           !n->is_Catch() &&     // Would be nice to print exception table targets
2619           !n->is_MergeMem() &&  // Not very interesting
2620           !n->is_top() &&       // Debug info table constants
2621           !(n->is_Con() && !n->is_Mach())// Debug info table constants
2622           ) {
2623         if (pcs && n->_idx < pc_limit)
2624           tty->print("%3.3x", pcs[n->_idx]);
2625         else
2626           tty->print("   ");
2627         tty->print(" %c ", starts_bundle);


2628         starts_bundle = ' ';
2629         tty->print("\t");
2630         n->format(_regalloc, tty);
2631         tty->cr();
2632       }
2633 
2634       // If we have an instruction with a delay slot, and have seen a delay,
2635       // then back up and print it
2636       if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
2637         assert(delay != NULL, "no unconditional delay instruction");

2638         if (WizardMode) delay->dump();
2639 
2640         if (node_bundling(delay)->starts_bundle())
2641           starts_bundle = '+';
2642         if (pcs && n->_idx < pc_limit)
2643           tty->print("%3.3x", pcs[n->_idx]);
2644         else
2645           tty->print("   ");
2646         tty->print(" %c ", starts_bundle);


2647         starts_bundle = ' ';
2648         tty->print("\t");
2649         delay->format(_regalloc, tty);
2650         tty->cr();
2651         delay = NULL;
2652       }
2653 
2654       // Dump the exception table as well
2655       if( n->is_Catch() && (Verbose || WizardMode) ) {
2656         // Print the exception table for this offset
2657         _handler_table.print_subtable_for(pc);
2658       }

2659     }
2660 
2661     if (pcs && n->_idx < pc_limit)
2662       tty->print_cr("%3.3x", pcs[n->_idx]);
2663     else
2664       tty->cr();
2665 
2666     assert(cut_short || delay == NULL, "no unconditional delay branch");
2667 
2668   } // End of per-block dump
2669   tty->cr();
2670 
2671   if (cut_short)  tty->print_cr("*** disassembly is cut short ***");
2672 }
2673 #endif
2674 
2675 //------------------------------Final_Reshape_Counts---------------------------
2676 // This class defines counters to help identify when a method
2677 // may/must be executed using hardware with only 24-bit precision.
2678 struct Final_Reshape_Counts : public StackObj {
2679   int  _call_count;             // count non-inlined 'common' calls
2680   int  _float_count;            // count float ops requiring 24-bit precision
2681   int  _double_count;           // count double ops requiring more precision
2682   int  _java_call_count;        // count non-inlined 'java' calls
2683   int  _inner_loop_count;       // count loops which need alignment
2684   VectorSet _visited;           // Visitation flags
2685   Node_List _tests;             // Set of IfNodes & PCTableNodes
2686 
2687   Final_Reshape_Counts() :
2688     _call_count(0), _float_count(0), _double_count(0),
2689     _java_call_count(0), _inner_loop_count(0),
2690     _visited( Thread::current()->resource_area() ) { }
2691 


   1 /*
   2  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


 696 #endif
 697 {
 698   C = this;
 699 #ifndef PRODUCT
 700   if (_printer != NULL) {
 701     _printer->set_compile(this);
 702   }
 703 #endif
 704   CompileWrapper cw(this);
 705 
 706   if (CITimeVerbose) {
 707     tty->print(" ");
 708     target->holder()->name()->print();
 709     tty->print(".");
 710     target->print_short_name();
 711     tty->print("  ");
 712   }
 713   TraceTime t1("Total compilation time", &_t_totalCompilation, CITime, CITimeVerbose);
 714   TraceTime t2(NULL, &_t_methodCompilation, CITime, false);
 715 
 716 #if defined(SUPPORT_ASSEMBLY) || defined(SUPPORT_ABSTRACT_ASSEMBLY)
 717   bool print_opto_assembly = directive->PrintOptoAssemblyOption;
 718   // We can always print a disassembly, either abstract (hex dump) or
 719   // with the help of a suitable hsdis library. Thus, we should not
 720   // couple print_assembly and print_opto_assembly controls.
 721   // But: always print opto and regular assembly on compile command 'print'.
 722   bool print_assembly = directive->PrintAssemblyOption;
 723   set_print_assembly(print_opto_assembly || print_assembly);
 724 #else
 725   set_print_assembly(false); // must initialize.
 726 #endif
 727 
 728 #ifndef PRODUCT
 729   set_parsed_irreducible_loop(false);
 730 
 731   if (directive->ReplayInlineOption) {
 732     _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
 733   }
 734 #endif
 735   set_print_inlining(directive->PrintInliningOption || PrintOptoInlining);
 736   set_print_intrinsics(directive->PrintIntrinsicsOption);
 737   set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
 738 
 739   if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
 740     // Make sure the method being compiled gets its own MDO,
 741     // so we can at least track the decompile_count().
 742     // Need MDO to record RTM code generation state.
 743     method()->ensure_method_data();
 744   }
 745 
 746   Init(::AliasLevel);
 747 
 748 


1013     _print_inlining_output(NULL),
1014     _replay_inline_data(NULL),
1015     _java_calls(0),
1016     _inner_loops(0),
1017     _interpreter_frame_size(0),
1018     _node_bundling_limit(0),
1019     _node_bundling_base(NULL),
1020     _code_buffer("Compile::Fill_buffer"),
1021 #ifndef PRODUCT
1022     _in_dump_cnt(0),
1023 #endif
1024     _allowed_reasons(0) {
1025   C = this;
1026 
1027   TraceTime t1(NULL, &_t_totalCompilation, CITime, false);
1028   TraceTime t2(NULL, &_t_stubCompilation, CITime, false);
1029 
1030 #ifndef PRODUCT
1031   set_print_assembly(PrintFrameConverterAssembly);
1032   set_parsed_irreducible_loop(false);
1033 #else
1034   set_print_assembly(false); // Must initialize.
1035 #endif
1036   set_has_irreducible_loop(false); // no loops
1037 
1038   CompileWrapper cw(this);
1039   Init(/*AliasLevel=*/ 0);
1040   init_tf((*generator)());
1041 
1042   {
1043     // The following is a dummy for the sake of GraphKit::gen_stub
1044     Unique_Node_List for_igvn(comp_arena());
1045     set_for_igvn(&for_igvn);  // not used, but some GraphKit guys push on this
1046     PhaseGVN gvn(Thread::current()->resource_area(),255);
1047     set_initial_gvn(&gvn);    // not significant, but GraphKit guys use it pervasively
1048     gvn.transform_no_reclaim(top());
1049 
1050     GraphKit kit;
1051     kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);
1052   }
1053 
1054   NOT_PRODUCT( verify_graph_edges(); )


2540     TracePhase tp("postalloc_expand", &timers[_t_postalloc_expand]);
2541     cfg.postalloc_expand(_regalloc);
2542   }
2543 
2544   // Convert Nodes to instruction bits in a buffer
2545   {
2546     TraceTime tp("output", &timers[_t_output], CITime);
2547     Output();
2548   }
2549 
2550   print_method(PHASE_FINAL_CODE);
2551 
2552   // He's dead, Jim.
2553   _cfg     = (PhaseCFG*)((intptr_t)0xdeadbeef);
2554   _regalloc = (PhaseChaitin*)((intptr_t)0xdeadbeef);
2555 }
2556 
2557 
2558 //------------------------------dump_asm---------------------------------------
2559 // Dump formatted assembly
2560 #if defined(SUPPORT_OPTO_ASSEMBLY)
2561 void Compile::dump_asm_on(outputStream* st, int* pcs, uint pc_limit) {
2562 
2563   int pc_digits = 3; // #chars required for pc
2564   int sb_chars  = 3; // #chars for "start bundle" indicator
2565   int tab_size  = 8;
2566   if (pcs != NULL) {
2567     int max_pc = 0;
2568     for (uint i = 0; i < pc_limit; i++) {
2569       max_pc = (max_pc < pcs[i]) ? pcs[i] : max_pc;
2570     }
2571     pc_digits  = ((max_pc < 4096) ? 3 : ((max_pc < 65536) ? 4 : ((max_pc < 65536*256) ? 6 : 8))); // #chars required for pc
2572   }
2573   int prefix_len = ((pc_digits + sb_chars + tab_size - 1)/tab_size)*tab_size;
2574 
2575   bool cut_short = false;
2576   st->print_cr("#");
2577   st->print("#  ");  _tf->dump_on(st);  st->cr();
2578   st->print_cr("#");
2579 
2580   // For all blocks
2581   int pc = 0x0;                 // Program counter
2582   char starts_bundle = ' ';
2583   _regalloc->dump_frame();
2584 
2585   Node *n = NULL;
2586   for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
2587     if (VMThread::should_terminate()) {
2588       cut_short = true;
2589       break;
2590     }
2591     Block* block = _cfg->get_block(i);
2592     if (block->is_connector() && !Verbose) {
2593       continue;
2594     }
2595     n = block->head();
2596     if ((pcs != NULL) && (n->_idx < pc_limit)) {
2597       pc = pcs[n->_idx];
2598       st->print("%*.*x", pc_digits, pc_digits, pc);

2599     }
2600     st->fill_to(prefix_len);
2601     block->dump_head(_cfg, st);
2602     if (block->is_connector()) {
2603       st->fill_to(prefix_len);
2604       st->print_cr("# Empty connector block");
2605     } else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
2606       st->fill_to(prefix_len);
2607       st->print_cr("# Block is sole successor of call");
2608     }
2609 
2610     // For all instructions
2611     Node *delay = NULL;
2612     for (uint j = 0; j < block->number_of_nodes(); j++) {
2613       if (VMThread::should_terminate()) {
2614         cut_short = true;
2615         break;
2616       }
2617       n = block->get_node(j);
2618       if (valid_bundle_info(n)) {
2619         Bundle* bundle = node_bundling(n);
2620         if (bundle->used_in_unconditional_delay()) {
2621           delay = n;
2622           continue;
2623         }
2624         if (bundle->starts_bundle()) {
2625           starts_bundle = '+';
2626         }
2627       }
2628 
2629       if (WizardMode) {
2630         n->dump();
2631       }
2632 
2633       if( !n->is_Region() &&    // Dont print in the Assembly
2634           !n->is_Phi() &&       // a few noisely useless nodes
2635           !n->is_Proj() &&
2636           !n->is_MachTemp() &&
2637           !n->is_SafePointScalarObject() &&
2638           !n->is_Catch() &&     // Would be nice to print exception table targets
2639           !n->is_MergeMem() &&  // Not very interesting
2640           !n->is_top() &&       // Debug info table constants
2641           !(n->is_Con() && !n->is_Mach())// Debug info table constants
2642           ) {
2643         if ((pcs != NULL) && (n->_idx < pc_limit)) {
2644           pc = pcs[n->_idx];
2645           st->print("%*.*x", pc_digits, pc_digits, pc);
2646         } else {
2647           st->fill_to(pc_digits);
2648         }
2649         st->print(" %c ", starts_bundle);
2650         starts_bundle = ' ';
2651         st->fill_to(prefix_len);
2652         n->format(_regalloc, st);
2653         st->cr();
2654       }
2655 
2656       // If we have an instruction with a delay slot, and have seen a delay,
2657       // then back up and print it
2658       if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
2659         // Coverity finding - Explicit null dereferenced.
2660         guarantee(delay != NULL, "no unconditional delay instruction");
2661         if (WizardMode) delay->dump();
2662 
2663         if (node_bundling(delay)->starts_bundle())
2664           starts_bundle = '+';
2665         if ((pcs != NULL) && (n->_idx < pc_limit)) {
2666           pc = pcs[n->_idx];
2667           st->print("%*.*x", pc_digits, pc_digits, pc);
2668         } else {
2669           st->fill_to(pc_digits);
2670         }
2671         st->print(" %c ", starts_bundle);
2672         starts_bundle = ' ';
2673         st->fill_to(prefix_len);
2674         delay->format(_regalloc, st);
2675         st->cr();
2676         delay = NULL;
2677       }
2678 
2679       // Dump the exception table as well
2680       if( n->is_Catch() && (Verbose || WizardMode) ) {
2681         // Print the exception table for this offset
2682         _handler_table.print_subtable_for(pc);
2683       }
2684       st->bol(); // Make sure we start on a new line
2685     }
2686     st->cr(); // one empty line between blocks





2687     assert(cut_short || delay == NULL, "no unconditional delay branch");

2688   } // End of per-block dump

2689 
2690   if (cut_short)  st->print_cr("*** disassembly is cut short ***");
2691 }
2692 #endif
2693 
2694 //------------------------------Final_Reshape_Counts---------------------------
2695 // This class defines counters to help identify when a method
2696 // may/must be executed using hardware with only 24-bit precision.
2697 struct Final_Reshape_Counts : public StackObj {
2698   int  _call_count;             // count non-inlined 'common' calls
2699   int  _float_count;            // count float ops requiring 24-bit precision
2700   int  _double_count;           // count double ops requiring more precision
2701   int  _java_call_count;        // count non-inlined 'java' calls
2702   int  _inner_loop_count;       // count loops which need alignment
2703   VectorSet _visited;           // Visitation flags
2704   Node_List _tests;             // Set of IfNodes & PCTableNodes
2705 
2706   Final_Reshape_Counts() :
2707     _call_count(0), _float_count(0), _double_count(0),
2708     _java_call_count(0), _inner_loop_count(0),
2709     _visited( Thread::current()->resource_area() ) { }
2710 


< prev index next >