jdk Sdiff src/hotspot/cpu/s390

src/hotspot/cpu/s390/s390.ad

rev 54542 : 8213084: Rework and enhance Print[Opto]Assembly output
Reviewed-by:

   1 //
   2 // Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2017, SAP SE. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //

1358     // The ic_miss_stub will handle the null pointer exception.
1359     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1360     __ z_br(R1_ic_miss_stub_addr);
1361     __ bind(valid);
1362   }
1363 
1364   // Check whether this method is the proper implementation for the class of
1365   // the receiver (ic miss check).
1366   {
1367     Label valid;
1368     // Compare cached class against klass from receiver.
1369     // This also does an implicit null check!
1370     __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
1371     __ z_bre(valid);
1372     // The inline cache points to the wrong method. Call the
1373     // ic_miss_stub to find the proper method.
1374     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1375     __ z_br(R1_ic_miss_stub_addr);
1376     __ bind(valid);
1377   }
1378 
1379 }
1380 
1381 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1382   // Determine size dynamically.
1383   return MachNode::size(ra_);
1384 }
1385 
1386 //=============================================================================
1387 
1388 %} // interrupt source section
1389 
1390 source_hpp %{ // Header information of the source block.
1391 
1392 class HandlerImpl {
1393  public:
1394 
1395   static int emit_exception_handler(CodeBuffer &cbuf);
1396   static int emit_deopt_handler(CodeBuffer& cbuf);
1397 
1398   static uint size_exception_handler() {

4684 // See cOop encoding classes for elaborate comment.
4685 
4686 // Moved here because it is needed in expand rules for encode.
4687 // Long negation.
4688 instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
4689   match(Set dst (SubL zero src));
4690   effect(KILL cr);
4691   size(4);
4692   format %{ "NEG     $dst, $src\t # long" %}
4693   ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
4694   ins_pipe(pipe_class_dummy);
4695 %}
4696 
4697 // Load Compressed Pointer
4698 
4699 // Load narrow oop
4700 instruct loadN(iRegN dst, memory mem) %{
4701   match(Set dst (LoadN mem));
4702   ins_cost(MEMORY_REF_COST);
4703   size(Z_DISP3_SIZE);
4704   format %{ "LoadN  $dst,$mem\t# (cOop)" %}
4705   opcode(LLGF_ZOPC, LLGF_ZOPC);
4706   ins_encode(z_form_rt_mem_opt(dst, mem));
4707   ins_pipe(pipe_class_dummy);
4708 %}
4709 
4710 // Load narrow Klass Pointer
4711 instruct loadNKlass(iRegN dst, memory mem) %{
4712   match(Set dst (LoadNKlass mem));
4713   ins_cost(MEMORY_REF_COST);
4714   size(Z_DISP3_SIZE);
4715   format %{ "LoadNKlass $dst,$mem\t# (klass cOop)" %}
4716   opcode(LLGF_ZOPC, LLGF_ZOPC);
4717   ins_encode(z_form_rt_mem_opt(dst, mem));
4718   ins_pipe(pipe_class_dummy);
4719 %}
4720 
4721 // Load constant Compressed Pointer
4722 
4723 instruct loadConN(iRegN dst, immN src) %{
4724   match(Set dst src);
4725   ins_cost(DEFAULT_COST);
4726   size(6);
4727   format %{ "loadConN    $dst,$src\t # (cOop)" %}
4728   ins_encode %{
4729     AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
4730     __ relocate(cOop.rspec(), 1);
4731     __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
4732   %}
4733   ins_pipe(pipe_class_dummy);
4734 %}
4735

4748   match(Set dst src);
4749   ins_cost(DEFAULT_COST);
4750   size(6);
4751   format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
4752   ins_encode %{
4753     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4754     __ relocate(NKlass.rspec(), 1);
4755     __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
4756   %}
4757   ins_pipe(pipe_class_dummy);
4758 %}
4759 
4760 // Load and Decode Compressed Pointer
4761 // optimized variants for Unscaled cOops
4762 
4763 instruct decodeLoadN(iRegP dst, memory mem) %{
4764   match(Set dst (DecodeN (LoadN mem)));
4765   predicate(false && (Universe::narrow_oop_base()==NULL)&&(Universe::narrow_oop_shift()==0));
4766   ins_cost(MEMORY_REF_COST);
4767   size(Z_DISP3_SIZE);
4768   format %{ "DecodeLoadN  $dst,$mem\t# (cOop Load+Decode)" %}
4769   opcode(LLGF_ZOPC, LLGF_ZOPC);
4770   ins_encode(z_form_rt_mem_opt(dst, mem));
4771   ins_pipe(pipe_class_dummy);
4772 %}
4773 
4774 instruct decodeLoadNKlass(iRegP dst, memory mem) %{
4775   match(Set dst (DecodeNKlass (LoadNKlass mem)));
4776   predicate(false && (Universe::narrow_klass_base()==NULL)&&(Universe::narrow_klass_shift()==0));
4777   ins_cost(MEMORY_REF_COST);
4778   size(Z_DISP3_SIZE);
4779   format %{ "DecodeLoadNKlass  $dst,$mem\t# (load/decode NKlass)" %}
4780   opcode(LLGF_ZOPC, LLGF_ZOPC);
4781   ins_encode(z_form_rt_mem_opt(dst, mem));
4782   ins_pipe(pipe_class_dummy);
4783 %}
4784 
4785 instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
4786   match(Set dst (DecodeNKlass src));
4787   ins_cost(3 * DEFAULT_COST);
4788   size(12);
4789   format %{ "DecodeLoadConNKlass  $dst,$src\t # decode(cKlass)" %}
4790   ins_encode %{
4791     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4792     __ relocate(NKlass.rspec(), 1);
4793     __ load_const($dst$$Register, (Klass*)NKlass.value());
4794   %}
4795   ins_pipe(pipe_class_dummy);
4796 %}
4797 
4798 // Decode Compressed Pointer
4799 
4800 // General decoder
4801 instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
4802   match(Set dst (DecodeN src));
4803   effect(KILL cr);
4804   predicate(Universe::narrow_oop_base() == NULL || !ExpandLoadingBaseDecode);
4805   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4806   // TODO: s390 port size(VARIABLE_SIZE);
4807   format %{ "decodeN  $dst,$src\t# (decode cOop)" %}
4808   ins_encode %{  __ oop_decoder($dst$$Register, $src$$Register, true); %}
4809   ins_pipe(pipe_class_dummy);
4810 %}
4811 
4812 // General Klass decoder
4813 instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
4814   match(Set dst (DecodeNKlass src));
4815   effect(KILL cr);
4816   ins_cost(3 * DEFAULT_COST);
4817   format %{ "decode_klass $dst,$src" %}
4818   ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
4819   ins_pipe(pipe_class_dummy);
4820 %}
4821 
4822 // General decoder
4823 instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
4824   match(Set dst (DecodeN src));
4825   effect(KILL cr);
4826   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4827              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4828             (Universe::narrow_oop_base()== NULL || !ExpandLoadingBaseDecode_NN));
4829   ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4830   // TODO: s390 port size(VARIABLE_SIZE);
4831   format %{ "decodeN  $dst,$src\t# (decode cOop NN)" %}
4832   ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
4833   ins_pipe(pipe_class_dummy);
4834 %}
4835 
4836   instruct loadBase(iRegL dst, immL baseImm) %{
4837     effect(DEF dst, USE baseImm);
4838     predicate(false);
4839     format %{ "llihl    $dst=$baseImm \t// load heap base" %}
4840     ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
4841     ins_pipe(pipe_class_dummy);
4842   %}
4843 
4844   // Decoder for heapbased mode peeling off loading the base.
4845   instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4846     match(Set dst (DecodeN src base));
4847     // Note: Effect TEMP dst was used with the intention to get
4848     // different regs for dst and base, but this has caused ADLC to
4849     // generate wrong code. Oop_decoder generates additional lgr when
4850     // dst==base.
4851     effect(KILL cr);
4852     predicate(false);
4853     // TODO: s390 port size(VARIABLE_SIZE);
4854     format %{ "decodeN  $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
4855     ins_encode %{
4856       __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
4857                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
4858     %}
4859     ins_pipe(pipe_class_dummy);
4860   %}
4861 
4862   // Decoder for heapbased mode peeling off loading the base.
4863   instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4864     match(Set dst (DecodeN src base));
4865     effect(KILL cr);
4866     predicate(false);
4867     // TODO: s390 port size(VARIABLE_SIZE);
4868     format %{ "decodeN  $dst = ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
4869     ins_encode %{
4870       __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
4871                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
4872     %}
4873     ins_pipe(pipe_class_dummy);
4874   %}
4875 
4876 // Decoder for heapbased mode peeling off loading the base.
4877 instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4878   match(Set dst (DecodeN src));
4879   predicate(Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode);
4880   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4881   // TODO: s390 port size(VARIABLE_SIZE);
4882   expand %{
4883     immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
4884     iRegL base;
4885     loadBase(base, baseImm);
4886     decodeN_base(dst, src, base, cr);
4887   %}
4888 %}

4898   expand %{
4899     immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
4900     iRegL base;
4901     loadBase(base, baseImm);
4902     decodeN_NN_base(dst, src, base, cr);
4903   %}
4904 %}
4905 
4906 //  Encode Compressed Pointer
4907 
4908 // General encoder
4909 instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
4910   match(Set dst (EncodeP src));
4911   effect(KILL cr);
4912   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4913             (Universe::narrow_oop_base() == 0 ||
4914              Universe::narrow_oop_base_disjoint() ||
4915              !ExpandLoadingBaseEncode));
4916   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4917   // TODO: s390 port size(VARIABLE_SIZE);
4918   format %{ "encodeP  $dst,$src\t# (encode cOop)" %}
4919   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4920   ins_pipe(pipe_class_dummy);
4921 %}
4922 
4923 // General class encoder
4924 instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
4925   match(Set dst (EncodePKlass src));
4926   effect(KILL cr);
4927   format %{ "encode_klass $dst,$src" %}
4928   ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
4929   ins_pipe(pipe_class_dummy);
4930 %}
4931 
4932 instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
4933   match(Set dst (EncodeP src));
4934   effect(KILL cr);
4935   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4936             (Universe::narrow_oop_base() == 0 ||
4937              Universe::narrow_oop_base_disjoint() ||
4938              !ExpandLoadingBaseEncode_NN));
4939   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4940   // TODO: s390 port size(VARIABLE_SIZE);
4941   format %{ "encodeP  $dst,$src\t# (encode cOop)" %}
4942   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4943   ins_pipe(pipe_class_dummy);
4944 %}
4945 
4946   // Encoder for heapbased mode peeling off loading the base.
4947   instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
4948     match(Set dst (EncodeP src (Binary base dst)));
4949     effect(TEMP_DEF dst);
4950     predicate(false);
4951     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4952     // TODO: s390 port size(VARIABLE_SIZE);
4953     format %{ "encodeP  $dst = ($src>>3) +$base + pow2_offset\t# (encode cOop)" %}
4954     ins_encode %{
4955       jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
4956         (((uint64_t)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift());
4957       __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
4958     %}
4959     ins_pipe(pipe_class_dummy);
4960   %}
4961 
4962   // Encoder for heapbased mode peeling off loading the base.
4963   instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
4964     match(Set dst (EncodeP src base));
4965     effect(USE pow2_offset);
4966     predicate(false);
4967     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4968     // TODO: s390 port size(VARIABLE_SIZE);
4969     format %{ "encodeP  $dst = ($src>>3) +$base + $pow2_offset\t# (encode cOop)" %}
4970     ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
4971     ins_pipe(pipe_class_dummy);
4972   %}
4973 
4974 // Encoder for heapbased mode peeling off loading the base.
4975 instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4976   match(Set dst (EncodeP src));
4977   effect(KILL cr);
4978   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4979             (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode));
4980   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4981   // TODO: s390 port size(VARIABLE_SIZE);
4982   expand %{
4983     immL baseImm %{ ((jlong)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift() %}
4984     immL_0 zero %{ (0) %}
4985     flagsReg ccr;
4986     iRegL base;
4987     iRegL negBase;
4988     loadBase(base, baseImm);
4989     negL_reg_reg(negBase, zero, base, ccr);

5002   expand %{
5003     immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
5004     immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)Universe::narrow_oop_base())) %}
5005     immL_0 zero %{ 0 %}
5006     flagsReg ccr;
5007     iRegL base;
5008     iRegL negBase;
5009     loadBase(base, baseImm);
5010     negL_reg_reg(negBase, zero, base, ccr);
5011     encodeP_NN_base(dst, src, negBase, pow2_offset);
5012   %}
5013 %}
5014 
5015 //  Store Compressed Pointer
5016 
5017 // Store Compressed Pointer
5018 instruct storeN(memory mem, iRegN_P2N src) %{
5019   match(Set mem (StoreN mem src));
5020   ins_cost(MEMORY_REF_COST);
5021   size(Z_DISP_SIZE);
5022   format %{ "ST      $src,$mem\t# (cOop)" %}
5023   opcode(STY_ZOPC, ST_ZOPC);
5024   ins_encode(z_form_rt_mem_opt(src, mem));
5025   ins_pipe(pipe_class_dummy);
5026 %}
5027 
5028 // Store Compressed Klass pointer
5029 instruct storeNKlass(memory mem, iRegN src) %{
5030   match(Set mem (StoreNKlass mem src));
5031   ins_cost(MEMORY_REF_COST);
5032   size(Z_DISP_SIZE);
5033   format %{ "ST      $src,$mem\t# (cKlass)" %}
5034   opcode(STY_ZOPC, ST_ZOPC);
5035   ins_encode(z_form_rt_mem_opt(src, mem));
5036   ins_pipe(pipe_class_dummy);
5037 %}
5038 
5039 // Compare Compressed Pointers
5040 
5041 instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
5042   match(Set cr (CmpN src1 src2));
5043   ins_cost(DEFAULT_COST);
5044   size(2);
5045   format %{ "CLR     $src1,$src2\t# (cOop)" %}
5046   opcode(CLR_ZOPC);
5047   ins_encode(z_rrform(src1, src2));
5048   ins_pipe(pipe_class_dummy);
5049 %}
5050 
5051 instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
5052   match(Set cr (CmpN src1 src2));
5053   ins_cost(DEFAULT_COST);
5054   size(6);
5055   format %{ "CLFI    $src1,$src2\t# (cOop) compare immediate narrow" %}
5056   ins_encode %{
5057     AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
5058     __ relocate(cOop.rspec(), 1);
5059     __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
5060   %}
5061   ins_pipe(pipe_class_dummy);
5062 %}
5063 
5064 instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
5065   match(Set cr (CmpN src1 src2));
5066   ins_cost(DEFAULT_COST);
5067   size(6);
5068   format %{ "CLFI    $src1,$src2\t# (NKlass) compare immediate narrow" %}
5069   ins_encode %{
5070     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
5071     __ relocate(NKlass.rspec(), 1);
5072     __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
5073   %}
5074   ins_pipe(pipe_class_dummy);
5075 %}
5076 
5077 instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
5078   match(Set cr (CmpN src1 src2));
5079   ins_cost(DEFAULT_COST);
5080   size(2);
5081   format %{ "LTR     $src1,$src2\t# (cOop) LTR because comparing against zero" %}
5082   opcode(LTR_ZOPC);
5083   ins_encode(z_rrform(src1, src1));
5084   ins_pipe(pipe_class_dummy);
5085 %}
5086 
5087 
5088 //----------MemBar Instructions-----------------------------------------------
5089 
5090 // Memory barrier flavors
5091 
5092 instruct membar_acquire() %{
5093   match(MemBarAcquire);
5094   match(LoadFence);
5095   ins_cost(4*MEMORY_REF_COST);
5096   size(0);
5097   format %{ "MEMBAR-acquire" %}
5098   ins_encode %{ __ z_acquire(); %}
5099   ins_pipe(pipe_class_dummy);
5100 %}
5101

6756       __ z_lghi(Z_R0_scratch, divisor);
6757       __ z_lgr($dst$$Register->successor(), $src1$$Register);
6758       __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch);  // Instruction kills tmp.
6759     } else {
6760       __ clear_reg($dst$$Register, true, false);
6761     }
6762   %}
6763   ins_pipe(pipe_class_dummy);
6764 %}
6765 
6766 // SHIFT
6767 
6768 // Shift left logical
6769 
6770 // Register Shift Left variable
6771 instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
6772   match(Set dst (LShiftI src nbits));
6773   effect(KILL cr); // R1 is killed, too.
6774   ins_cost(3 * DEFAULT_COST);
6775   size(14);
6776   format %{ "SLL     $dst,$src,[$nbits] & 31\t# use RISC-like SLLG also for int" %}
6777   ins_encode %{
6778     __ z_lgr(Z_R1_scratch, $nbits$$Register);
6779     __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
6780     __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
6781   %}
6782   ins_pipe(pipe_class_dummy);
6783 %}
6784 
6785 // Register Shift Left Immediate
6786 // Constant shift count is masked in ideal graph already.
6787 instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
6788   match(Set dst (LShiftI src nbits));
6789   size(6);
6790   format %{ "SLL     $dst,$src,$nbits\t# use RISC-like SLLG also for int" %}
6791   ins_encode %{
6792     int Nbit = $nbits$$constant;
6793     assert((Nbit & (BitsPerJavaInteger - 1)) == Nbit, "Check shift mask in ideal graph");
6794     __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
6795   %}
6796   ins_pipe(pipe_class_dummy);
6797 %}
6798 
6799 // Register Shift Left Immediate by 1bit
6800 instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
6801   match(Set dst (LShiftI src nbits));
6802   predicate(PreferLAoverADD);
6803   ins_cost(DEFAULT_COST_LOW);
6804   size(4);
6805   format %{ "LA      $dst,#0($src,$src)\t # SLL by 1 (int)" %}
6806   ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
6807   ins_pipe(pipe_class_dummy);
6808 %}
6809 
6810 // Register Shift Left Long

7086   %}
7087   ins_pipe(pipe_class_dummy);
7088 %}
7089 
7090 instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
7091   match(Set cr (OverflowSubL op1 op2));
7092   effect(DEF cr, USE op1, USE op2);
7093   // TODO: s390 port size(VARIABLE_SIZE);
7094   format %{ "SGR     $op1,$op2\t # overflow check long" %}
7095   ins_encode %{
7096     __ load_const_optimized(Z_R1_scratch, $op2$$constant);
7097     __ z_lgr(Z_R0_scratch, $op1$$Register);
7098     __ z_sgr(Z_R0_scratch, Z_R1_scratch);
7099   %}
7100   ins_pipe(pipe_class_dummy);
7101 %}
7102 
7103 instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
7104   match(Set cr (OverflowSubI zero op2));
7105   effect(DEF cr, USE op2);
7106   format %{ "NEG    $op2\t# overflow check int" %}
7107   ins_encode %{
7108     __ clear_reg(Z_R0_scratch, false, false);
7109     __ z_sr(Z_R0_scratch, $op2$$Register);
7110   %}
7111   ins_pipe(pipe_class_dummy);
7112 %}
7113 
7114 instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
7115   match(Set cr (OverflowSubL zero op2));
7116   effect(DEF cr, USE op2);
7117   format %{ "NEGG    $op2\t# overflow check long" %}
7118   ins_encode %{
7119     __ clear_reg(Z_R0_scratch, true, false);
7120     __ z_sgr(Z_R0_scratch, $op2$$Register);
7121   %}
7122   ins_pipe(pipe_class_dummy);
7123 %}
7124 
7125 // No intrinsics for multiplication, since there is no easy way
7126 // to check for overflow.
7127 
7128 
7129 //----------Floating Point Arithmetic Instructions-----------------------------
7130 
7131 //  ADD
7132 
7133 //  Add float single precision
7134 instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
7135   match(Set dst (AddF dst src));
7136   effect(KILL cr);
7137   ins_cost(ALU_REG_COST);

9152 // Direct Branch.
9153 instruct branchFar(label labl) %{
9154   match(Goto);
9155   effect(USE labl);
9156   ins_cost(BRANCH_COST);
9157   size(6);
9158   format %{ "BRUL   $labl" %}
9159   ins_encode(z_enc_brul(labl));
9160   ins_pipe(pipe_class_dummy);
9161   // This is not a short variant of a branch, but the long variant.
9162   ins_short_branch(0);
9163 %}
9164 
9165 // Conditional Near Branch
9166 instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
9167   // Same match rule as `branchConFar'.
9168   match(If cmp cr);
9169   effect(USE lbl);
9170   ins_cost(BRANCH_COST);
9171   size(4);
9172   format %{ "branch_con_short,$cmp   $cr, $lbl" %}
9173   ins_encode(z_enc_branch_con_short(cmp, lbl));
9174   ins_pipe(pipe_class_dummy);
9175   // If set to 1 this indicates that the current instruction is a
9176   // short variant of a long branch. This avoids using this
9177   // instruction in first-pass matching. It will then only be used in
9178   // the `Shorten_branches' pass.
9179   ins_short_branch(1);
9180 %}
9181 
9182 // This is for cases when the z/Architecture conditional branch instruction
9183 // does not reach far enough. So we emit a far branch here, which is
9184 // more expensive.
9185 //
9186 // Conditional Far Branch
9187 instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
9188   // Same match rule as `branchCon'.
9189   match(If cmp cr);
9190   effect(USE cr, USE lbl);
9191   // Make more expensive to prefer compare_and_branch over separate instructions.
9192   ins_cost(2 * BRANCH_COST);
9193   size(6);
9194   format %{ "branch_con_far,$cmp   $cr, $lbl" %}
9195   ins_encode(z_enc_branch_con_far(cmp, lbl));
9196   ins_pipe(pipe_class_dummy);
9197   // This is not a short variant of a branch, but the long variant..
9198   ins_short_branch(0);
9199 %}
9200 
9201 instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
9202   match(CountedLoopEnd cmp cr);
9203   effect(USE labl);
9204   ins_cost(BRANCH_COST);
9205   size(4);
9206   format %{ "branch_con_short,$cmp   $labl\t # counted loop end" %}
9207   ins_encode(z_enc_branch_con_short(cmp, labl));
9208   ins_pipe(pipe_class_dummy);
9209   // If set to 1 this indicates that the current instruction is a
9210   // short variant of a long branch. This avoids using this
9211   // instruction in first-pass matching. It will then only be used in
9212   // the `Shorten_branches' pass.
9213   ins_short_branch(1);
9214 %}

9743 instruct CallLeafNoFPDirect(method meth) %{
9744   match(CallLeafNoFP);
9745   effect(USE meth);
9746   ins_cost(CALL_COST);
9747   // TODO: s390 port size(VARIABLE_SIZE);
9748   ins_num_consts(1);
9749   format %{ "CALL,runtime leaf nofp $meth" %}
9750   ins_encode( z_enc_java_to_runtime_call(meth) );
9751   ins_pipe(pipe_class_dummy);
9752   ins_alignment(2);
9753 %}
9754 
9755 // Tail Call; Jump from runtime stub to Java code.
9756 // Also known as an 'interprocedural jump'.
9757 // Target of jump will eventually return to caller.
9758 // TailJump below removes the return address.
9759 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9760   match(TailCall jump_target method_oop);
9761   ins_cost(CALL_COST);
9762   size(2);
9763   format %{ "Jmp     $jump_target\t# $method_oop holds method oop" %}
9764   ins_encode %{ __ z_br($jump_target$$Register); %}
9765   ins_pipe(pipe_class_dummy);
9766 %}
9767 
9768 // Return Instruction
9769 instruct Ret() %{
9770   match(Return);
9771   size(2);
9772   format %{ "BR(Z_R14) // branch to link register" %}
9773   ins_encode %{ __ z_br(Z_R14); %}
9774   ins_pipe(pipe_class_dummy);
9775 %}
9776 
9777 // Tail Jump; remove the return address; jump to target.
9778 // TailCall above leaves the return address around.
9779 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9780 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9781 // "restore" before this instruction (in Epilogue), we need to materialize it
9782 // in %i0.
9783 instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{

10751 instruct loadV8(iRegL dst, memory mem) %{
10752   match(Set dst (LoadVector mem));
10753   predicate(n->as_LoadVector()->memory_size() == 8);
10754   ins_cost(MEMORY_REF_COST);
10755   // TODO: s390 port size(VARIABLE_SIZE);
10756   format %{ "LG      $dst,$mem\t # L(packed8B)" %}
10757   opcode(LG_ZOPC, LG_ZOPC);
10758   ins_encode(z_form_rt_mem_opt(dst, mem));
10759   ins_pipe(pipe_class_dummy);
10760 %}
10761 
10762 //----------POPULATION COUNT RULES--------------------------------------------
10763 
10764 // Byte reverse
10765 
10766 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
10767   match(Set dst (ReverseBytesI src));
10768   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10769   ins_cost(DEFAULT_COST);
10770   size(4);
10771   format %{ "LRVR    $dst,$src\t# byte reverse int" %}
10772   opcode(LRVR_ZOPC);
10773   ins_encode(z_rreform(dst, src));
10774   ins_pipe(pipe_class_dummy);
10775 %}
10776 
10777 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
10778   match(Set dst (ReverseBytesL src));
10779   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10780   ins_cost(DEFAULT_COST);
10781   // TODO: s390 port size(FIXED_SIZE);
10782   format %{ "LRVGR   $dst,$src\t# byte reverse long" %}
10783   opcode(LRVGR_ZOPC);
10784   ins_encode(z_rreform(dst, src));
10785   ins_pipe(pipe_class_dummy);
10786 %}
10787 
10788 // Leading zeroes
10789 
10790 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10791 // returns the bit position of the leftmost 1 in the 64bit source register.
10792 // As the bits are numbered from left to right (0..63), the returned
10793 // position index is equivalent to the number of leading zeroes.
10794 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10795 // returns position 64. That's exactly what we need.
10796 
10797 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10798   match(Set dst (CountLeadingZerosI src));
10799   effect(KILL tmp, KILL cr);
10800   ins_cost(3 * DEFAULT_COST);
10801   size(14);
10802   format %{ "SLLG    $dst,$src,32\t# no need to always count 32 zeroes first\n\t"
10803             "IILH    $dst,0x8000 \t# insert \"stop bit\" to force result 32 for zero src.\n\t"
10804             "FLOGR   $dst,$dst"
10805          %}
10806   ins_encode %{
10807     // Performance experiments indicate that "FLOGR" is using some kind of
10808     // iteration to find the leftmost "1" bit.
10809     //
10810     // The prior implementation zero-extended the 32-bit argument to 64 bit,
10811     // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10812     // We could gain measurable speedup in micro benchmark:
10813     //
10814     //               leading   trailing
10815     //   z10:   int     2.04       1.68
10816     //         long     1.00       1.02
10817     //   z196:  int     0.99       1.23
10818     //         long     1.00       1.11
10819     //
10820     // By shifting the argument into the high-word instead of zero-extending it.
10821     // The add'l branch on condition (taken for a zero argument, very infrequent,
10822     // good prediction) is well compensated for by the savings.
10823     //
10824     // We leave the previous implementation in for some time in the future when
10825     // the "FLOGR" instruction may become less iterative.
10826 
10827     // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10828     __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10829     __ z_iilh($dst$$Register, 0x8000);   // Insert "stop bit" to force result 32 for zero src.
10830     __ z_flogr($dst$$Register, $dst$$Register);
10831   %}
10832   ins_pipe(pipe_class_dummy);
10833 %}
10834 
10835 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10836   match(Set dst (CountLeadingZerosL src));
10837   effect(KILL tmp, KILL cr);
10838   ins_cost(DEFAULT_COST);
10839   size(4);
10840   format %{ "FLOGR   $dst,$src \t# count leading zeros (long)\n\t" %}
10841   ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10842   ins_pipe(pipe_class_dummy);
10843 %}
10844 
10845 // trailing zeroes
10846 
10847 // We transform the trailing zeroes problem to a leading zeroes problem
10848 // such that can use the FLOGR instruction to our advantage.
10849 
10850 // With
10851 //   tmp1 = src - 1
10852 // we flip all trailing zeroes to ones and the rightmost one to zero.
10853 // All other bits remain unchanged.
10854 // With the complement
10855 //   tmp2 = ~src
10856 // we get all ones in the trailing zeroes positions. Thus,
10857 //   tmp3 = tmp1 & tmp2
10858 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10859 // Now we can apply FLOGR and get 64-(trailing zeroes).
10860 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10861   match(Set dst (CountTrailingZerosI src));
10862   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10863   ins_cost(8 * DEFAULT_COST);
10864   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10865   format %{ "LLGFR   $dst,$src  \t# clear upper 32 bits (we are dealing with int)\n\t"
10866             "LCGFR   $tmp,$src  \t# load 2's complement (32->64 bit)\n\t"
10867             "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
10868             "AGHI    $tmp,-1    \t# tmp2 = -src-1 = ~src\n\t"
10869             "NGR     $dst,$tmp  \t# tmp3 = tmp1&tmp2\n\t"
10870             "FLOGR   $dst,$dst  \t# count trailing zeros (int)\n\t"
10871             "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10872             "LCR     $dst,$dst  \t# res = -tmp4"
10873          %}
10874   ins_encode %{
10875     Register Rdst = $dst$$Register;
10876     Register Rsrc = $src$$Register;
10877     // Rtmp only needed for for zero-argument shortcut. With kill effect in
10878     // match rule Rsrc = roddReg would be possible, saving one register.
10879     Register Rtmp = $tmp$$Register;
10880 
10881     assert_different_registers(Rdst, Rsrc, Rtmp);
10882 
10883     // Algorithm:
10884     // - Isolate the least significant (rightmost) set bit using (src & (-src)).
10885     //   All other bits in the result are zero.
10886     // - Find the "leftmost one" bit position in the single-bit result from previous step.
10887     // - 63-("leftmost one" bit position) gives the # of trailing zeros.
10888 
10889     // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
10890     Label done;
10891     __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
10892     __ z_lcgfr(Rtmp, Rsrc);

10898                                        // into upper half of reg. Not relevant with sllg below.
10899     __ z_sllg(Rdst, Rtmp, 32);         // Shift interesting contents to upper half of register.
10900     __ z_bre(done);                    // Shortcut for argument = 1, result will be 0.
10901                                        // Depends on CC set by ahi above.
10902                                        // Taken very infrequently, good prediction, no BHT entry.
10903                                        // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10904                                        // after SLLG Rdst == 0(64bit)).
10905     __ z_flogr(Rdst, Rdst);            // Kills tmp which is the oddReg for dst.
10906     __ add2reg(Rdst,  -32);            // 32-pos(leftmost1) is #trailing zeros
10907     __ z_lcgfr(Rdst, Rdst);            // Provide 64bit result at no cost.
10908     __ bind(done);
10909   %}
10910   ins_pipe(pipe_class_dummy);
10911 %}
10912 
10913 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10914   match(Set dst (CountTrailingZerosL src));
10915   effect(TEMP_DEF dst, KILL tmp, KILL cr);
10916   ins_cost(8 * DEFAULT_COST);
10917   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10918   format %{ "LCGR    $dst,$src  \t# preserve src\n\t"
10919             "NGR     $dst,$src  \t#"
10920             "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
10921             "FLOGR   $dst,$dst  \t# count trailing zeros (long), kill $tmp\n\t"
10922             "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10923             "LCR     $dst,$dst  \t#"
10924          %}
10925   ins_encode %{
10926     Register Rdst = $dst$$Register;
10927     Register Rsrc = $src$$Register;
10928     assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10929 
10930     // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10931     __ z_lcgr(Rdst, Rsrc);
10932     __ z_ngr(Rdst, Rsrc);
10933     __ add2reg(Rdst,   -1);
10934     __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10935     __ add2reg(Rdst,  -64);
10936     __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10937   %}
10938   ins_pipe(pipe_class_dummy);
10939 %}
10940 
10941 
10942 // bit count
10943 
10944 instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
10945   match(Set dst (PopCountI src));
10946   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10947   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10948   ins_cost(DEFAULT_COST);
10949   size(24);
10950   format %{ "POPCNT  $dst,$src\t# pop count int" %}
10951   ins_encode %{
10952     Register Rdst = $dst$$Register;
10953     Register Rsrc = $src$$Register;
10954     Register Rtmp = $tmp$$Register;
10955 
10956     // Prefer compile-time assertion over run-time SIGILL.
10957     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10958     assert_different_registers(Rdst, Rtmp);
10959 
10960     // Version 2: shows 10%(z196) improvement over original.
10961     __ z_popcnt(Rdst, Rsrc);
10962     __ z_srlg(Rtmp, Rdst, 16); // calc  byte4+byte6 and byte5+byte7
10963     __ z_alr(Rdst, Rtmp);      //   into byte6 and byte7
10964     __ z_srlg(Rtmp, Rdst,  8); // calc (byte4+byte6) + (byte5+byte7)
10965     __ z_alr(Rdst, Rtmp);      //   into byte7
10966     __ z_llgcr(Rdst, Rdst);    // zero-extend sum
10967   %}
10968   ins_pipe(pipe_class_dummy);
10969 %}
10970 
10971 instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
10972   match(Set dst (PopCountL src));
10973   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10974   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10975   ins_cost(DEFAULT_COST);
10976   // TODO: s390 port size(FIXED_SIZE);
10977   format %{ "POPCNT  $dst,$src\t# pop count long" %}
10978   ins_encode %{
10979     Register Rdst = $dst$$Register;
10980     Register Rsrc = $src$$Register;
10981     Register Rtmp = $tmp$$Register;
10982 
10983     // Prefer compile-time assertion over run-time SIGILL.
10984     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10985     assert_different_registers(Rdst, Rtmp);
10986 
10987     // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
10988     __ z_popcnt(Rdst, Rsrc);
10989     __ z_ahhlr(Rdst, Rdst, Rdst);
10990     __ z_sllg(Rtmp, Rdst, 16);
10991     __ z_algr(Rdst, Rtmp);
10992     __ z_sllg(Rtmp, Rdst,  8);
10993     __ z_algr(Rdst, Rtmp);
10994     __ z_srlg(Rdst, Rdst, 56);
10995   %}
10996   ins_pipe(pipe_class_dummy);
10997 %}

   1 //
   2 // Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2017, 2019 SAP SE. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //

1358     // The ic_miss_stub will handle the null pointer exception.
1359     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1360     __ z_br(R1_ic_miss_stub_addr);
1361     __ bind(valid);
1362   }
1363 
1364   // Check whether this method is the proper implementation for the class of
1365   // the receiver (ic miss check).
1366   {
1367     Label valid;
1368     // Compare cached class against klass from receiver.
1369     // This also does an implicit null check!
1370     __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
1371     __ z_bre(valid);
1372     // The inline cache points to the wrong method. Call the
1373     // ic_miss_stub to find the proper method.
1374     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1375     __ z_br(R1_ic_miss_stub_addr);
1376     __ bind(valid);
1377   }

1378 }
1379 
1380 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1381   // Determine size dynamically.
1382   return MachNode::size(ra_);
1383 }
1384 
1385 //=============================================================================
1386 
1387 %} // interrupt source section
1388 
1389 source_hpp %{ // Header information of the source block.
1390 
1391 class HandlerImpl {
1392  public:
1393 
1394   static int emit_exception_handler(CodeBuffer &cbuf);
1395   static int emit_deopt_handler(CodeBuffer& cbuf);
1396 
1397   static uint size_exception_handler() {

4683 // See cOop encoding classes for elaborate comment.
4684 
4685 // Moved here because it is needed in expand rules for encode.
4686 // Long negation.
4687 instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
4688   match(Set dst (SubL zero src));
4689   effect(KILL cr);
4690   size(4);
4691   format %{ "NEG     $dst, $src\t # long" %}
4692   ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
4693   ins_pipe(pipe_class_dummy);
4694 %}
4695 
4696 // Load Compressed Pointer
4697 
4698 // Load narrow oop
4699 instruct loadN(iRegN dst, memory mem) %{
4700   match(Set dst (LoadN mem));
4701   ins_cost(MEMORY_REF_COST);
4702   size(Z_DISP3_SIZE);
4703   format %{ "LoadN   $dst,$mem\t # (cOop)" %}
4704   opcode(LLGF_ZOPC, LLGF_ZOPC);
4705   ins_encode(z_form_rt_mem_opt(dst, mem));
4706   ins_pipe(pipe_class_dummy);
4707 %}
4708 
4709 // Load narrow Klass Pointer
4710 instruct loadNKlass(iRegN dst, memory mem) %{
4711   match(Set dst (LoadNKlass mem));
4712   ins_cost(MEMORY_REF_COST);
4713   size(Z_DISP3_SIZE);
4714   format %{ "LoadNKlass $dst,$mem\t # (klass cOop)" %}
4715   opcode(LLGF_ZOPC, LLGF_ZOPC);
4716   ins_encode(z_form_rt_mem_opt(dst, mem));
4717   ins_pipe(pipe_class_dummy);
4718 %}
4719 
4720 // Load constant Compressed Pointer
4721 
4722 instruct loadConN(iRegN dst, immN src) %{
4723   match(Set dst src);
4724   ins_cost(DEFAULT_COST);
4725   size(6);
4726   format %{ "loadConN    $dst,$src\t # (cOop)" %}
4727   ins_encode %{
4728     AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
4729     __ relocate(cOop.rspec(), 1);
4730     __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
4731   %}
4732   ins_pipe(pipe_class_dummy);
4733 %}
4734

4747   match(Set dst src);
4748   ins_cost(DEFAULT_COST);
4749   size(6);
4750   format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
4751   ins_encode %{
4752     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4753     __ relocate(NKlass.rspec(), 1);
4754     __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
4755   %}
4756   ins_pipe(pipe_class_dummy);
4757 %}
4758 
4759 // Load and Decode Compressed Pointer
4760 // optimized variants for Unscaled cOops
4761 
4762 instruct decodeLoadN(iRegP dst, memory mem) %{
4763   match(Set dst (DecodeN (LoadN mem)));
4764   predicate(false && (Universe::narrow_oop_base()==NULL)&&(Universe::narrow_oop_shift()==0));
4765   ins_cost(MEMORY_REF_COST);
4766   size(Z_DISP3_SIZE);
4767   format %{ "DecodeLoadN  $dst,$mem\t # (cOop Load+Decode)" %}
4768   opcode(LLGF_ZOPC, LLGF_ZOPC);
4769   ins_encode(z_form_rt_mem_opt(dst, mem));
4770   ins_pipe(pipe_class_dummy);
4771 %}
4772 
4773 instruct decodeLoadNKlass(iRegP dst, memory mem) %{
4774   match(Set dst (DecodeNKlass (LoadNKlass mem)));
4775   predicate(false && (Universe::narrow_klass_base()==NULL)&&(Universe::narrow_klass_shift()==0));
4776   ins_cost(MEMORY_REF_COST);
4777   size(Z_DISP3_SIZE);
4778   format %{ "DecodeLoadNKlass  $dst,$mem\t # (load/decode NKlass)" %}
4779   opcode(LLGF_ZOPC, LLGF_ZOPC);
4780   ins_encode(z_form_rt_mem_opt(dst, mem));
4781   ins_pipe(pipe_class_dummy);
4782 %}
4783 
4784 instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
4785   match(Set dst (DecodeNKlass src));
4786   ins_cost(3 * DEFAULT_COST);
4787   size(12);
4788   format %{ "DecodeLoadConNKlass  $dst,$src\t # decode(cKlass)" %}
4789   ins_encode %{
4790     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4791     __ relocate(NKlass.rspec(), 1);
4792     __ load_const($dst$$Register, (Klass*)NKlass.value());
4793   %}
4794   ins_pipe(pipe_class_dummy);
4795 %}
4796 
4797 // Decode Compressed Pointer
4798 
4799 // General decoder
4800 instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
4801   match(Set dst (DecodeN src));
4802   effect(KILL cr);
4803   predicate(Universe::narrow_oop_base() == NULL || !ExpandLoadingBaseDecode);
4804   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4805   // TODO: s390 port size(VARIABLE_SIZE);
4806   format %{ "decodeN  $dst,$src\t # (decode cOop)" %}
4807   ins_encode %{  __ oop_decoder($dst$$Register, $src$$Register, true); %}
4808   ins_pipe(pipe_class_dummy);
4809 %}
4810 
4811 // General Klass decoder
4812 instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
4813   match(Set dst (DecodeNKlass src));
4814   effect(KILL cr);
4815   ins_cost(3 * DEFAULT_COST);
4816   format %{ "decode_klass $dst,$src" %}
4817   ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
4818   ins_pipe(pipe_class_dummy);
4819 %}
4820 
4821 // General decoder
4822 instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
4823   match(Set dst (DecodeN src));
4824   effect(KILL cr);
4825   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4826              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4827             (Universe::narrow_oop_base()== NULL || !ExpandLoadingBaseDecode_NN));
4828   ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4829   // TODO: s390 port size(VARIABLE_SIZE);
4830   format %{ "decodeN  $dst,$src\t # (decode cOop NN)" %}
4831   ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
4832   ins_pipe(pipe_class_dummy);
4833 %}
4834 
4835   instruct loadBase(iRegL dst, immL baseImm) %{
4836     effect(DEF dst, USE baseImm);
4837     predicate(false);
4838     format %{ "llihl    $dst=$baseImm \t// load heap base" %}
4839     ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
4840     ins_pipe(pipe_class_dummy);
4841   %}
4842 
4843   // Decoder for heapbased mode peeling off loading the base.
4844   instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4845     match(Set dst (DecodeN src base));
4846     // Note: Effect TEMP dst was used with the intention to get
4847     // different regs for dst and base, but this has caused ADLC to
4848     // generate wrong code. Oop_decoder generates additional lgr when
4849     // dst==base.
4850     effect(KILL cr);
4851     predicate(false);
4852     // TODO: s390 port size(VARIABLE_SIZE);
4853     format %{ "decodeN  $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t # (decode cOop)" %}
4854     ins_encode %{
4855       __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
4856                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
4857     %}
4858     ins_pipe(pipe_class_dummy);
4859   %}
4860 
4861   // Decoder for heapbased mode peeling off loading the base.
4862   instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4863     match(Set dst (DecodeN src base));
4864     effect(KILL cr);
4865     predicate(false);
4866     // TODO: s390 port size(VARIABLE_SIZE);
4867     format %{ "decodeN  $dst = ($src << 3) + $base + pow2_offset\t # (decode cOop)" %}
4868     ins_encode %{
4869       __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
4870                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
4871     %}
4872     ins_pipe(pipe_class_dummy);
4873   %}
4874 
4875 // Decoder for heapbased mode peeling off loading the base.
4876 instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4877   match(Set dst (DecodeN src));
4878   predicate(Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode);
4879   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4880   // TODO: s390 port size(VARIABLE_SIZE);
4881   expand %{
4882     immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
4883     iRegL base;
4884     loadBase(base, baseImm);
4885     decodeN_base(dst, src, base, cr);
4886   %}
4887 %}

4897   expand %{
4898     immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
4899     iRegL base;
4900     loadBase(base, baseImm);
4901     decodeN_NN_base(dst, src, base, cr);
4902   %}
4903 %}
4904 
4905 //  Encode Compressed Pointer
4906 
4907 // General encoder
4908 instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
4909   match(Set dst (EncodeP src));
4910   effect(KILL cr);
4911   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4912             (Universe::narrow_oop_base() == 0 ||
4913              Universe::narrow_oop_base_disjoint() ||
4914              !ExpandLoadingBaseEncode));
4915   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4916   // TODO: s390 port size(VARIABLE_SIZE);
4917   format %{ "encodeP  $dst,$src\t # (encode cOop)" %}
4918   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4919   ins_pipe(pipe_class_dummy);
4920 %}
4921 
4922 // General class encoder
4923 instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
4924   match(Set dst (EncodePKlass src));
4925   effect(KILL cr);
4926   format %{ "encode_klass $dst,$src" %}
4927   ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
4928   ins_pipe(pipe_class_dummy);
4929 %}
4930 
4931 instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
4932   match(Set dst (EncodeP src));
4933   effect(KILL cr);
4934   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4935             (Universe::narrow_oop_base() == 0 ||
4936              Universe::narrow_oop_base_disjoint() ||
4937              !ExpandLoadingBaseEncode_NN));
4938   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4939   // TODO: s390 port size(VARIABLE_SIZE);
4940   format %{ "encodeP  $dst,$src\t # (encode cOop)" %}
4941   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4942   ins_pipe(pipe_class_dummy);
4943 %}
4944 
4945   // Encoder for heapbased mode peeling off loading the base.
4946   instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
4947     match(Set dst (EncodeP src (Binary base dst)));
4948     effect(TEMP_DEF dst);
4949     predicate(false);
4950     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4951     // TODO: s390 port size(VARIABLE_SIZE);
4952     format %{ "encodeP  $dst = ($src>>3) +$base + pow2_offset\t # (encode cOop)" %}
4953     ins_encode %{
4954       jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
4955         (((uint64_t)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift());
4956       __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
4957     %}
4958     ins_pipe(pipe_class_dummy);
4959   %}
4960 
4961   // Encoder for heapbased mode peeling off loading the base.
4962   instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
4963     match(Set dst (EncodeP src base));
4964     effect(USE pow2_offset);
4965     predicate(false);
4966     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4967     // TODO: s390 port size(VARIABLE_SIZE);
4968     format %{ "encodeP  $dst = ($src>>3) +$base + $pow2_offset\t # (encode cOop)" %}
4969     ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
4970     ins_pipe(pipe_class_dummy);
4971   %}
4972 
4973 // Encoder for heapbased mode peeling off loading the base.
4974 instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4975   match(Set dst (EncodeP src));
4976   effect(KILL cr);
4977   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4978             (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode));
4979   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4980   // TODO: s390 port size(VARIABLE_SIZE);
4981   expand %{
4982     immL baseImm %{ ((jlong)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift() %}
4983     immL_0 zero %{ (0) %}
4984     flagsReg ccr;
4985     iRegL base;
4986     iRegL negBase;
4987     loadBase(base, baseImm);
4988     negL_reg_reg(negBase, zero, base, ccr);

5001   expand %{
5002     immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
5003     immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)Universe::narrow_oop_base())) %}
5004     immL_0 zero %{ 0 %}
5005     flagsReg ccr;
5006     iRegL base;
5007     iRegL negBase;
5008     loadBase(base, baseImm);
5009     negL_reg_reg(negBase, zero, base, ccr);
5010     encodeP_NN_base(dst, src, negBase, pow2_offset);
5011   %}
5012 %}
5013 
5014 //  Store Compressed Pointer
5015 
5016 // Store Compressed Pointer
5017 instruct storeN(memory mem, iRegN_P2N src) %{
5018   match(Set mem (StoreN mem src));
5019   ins_cost(MEMORY_REF_COST);
5020   size(Z_DISP_SIZE);
5021   format %{ "ST      $src,$mem\t # (cOop)" %}
5022   opcode(STY_ZOPC, ST_ZOPC);
5023   ins_encode(z_form_rt_mem_opt(src, mem));
5024   ins_pipe(pipe_class_dummy);
5025 %}
5026 
5027 // Store Compressed Klass pointer
5028 instruct storeNKlass(memory mem, iRegN src) %{
5029   match(Set mem (StoreNKlass mem src));
5030   ins_cost(MEMORY_REF_COST);
5031   size(Z_DISP_SIZE);
5032   format %{ "ST      $src,$mem\t # (cKlass)" %}
5033   opcode(STY_ZOPC, ST_ZOPC);
5034   ins_encode(z_form_rt_mem_opt(src, mem));
5035   ins_pipe(pipe_class_dummy);
5036 %}
5037 
5038 // Compare Compressed Pointers
5039 
5040 instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
5041   match(Set cr (CmpN src1 src2));
5042   ins_cost(DEFAULT_COST);
5043   size(2);
5044   format %{ "CLR     $src1,$src2\t # (cOop)" %}
5045   opcode(CLR_ZOPC);
5046   ins_encode(z_rrform(src1, src2));
5047   ins_pipe(pipe_class_dummy);
5048 %}
5049 
5050 instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
5051   match(Set cr (CmpN src1 src2));
5052   ins_cost(DEFAULT_COST);
5053   size(6);
5054   format %{ "CLFI    $src1,$src2\t # (cOop) compare immediate narrow" %}
5055   ins_encode %{
5056     AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
5057     __ relocate(cOop.rspec(), 1);
5058     __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
5059   %}
5060   ins_pipe(pipe_class_dummy);
5061 %}
5062 
5063 instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
5064   match(Set cr (CmpN src1 src2));
5065   ins_cost(DEFAULT_COST);
5066   size(6);
5067   format %{ "CLFI    $src1,$src2\t # (NKlass) compare immediate narrow" %}
5068   ins_encode %{
5069     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
5070     __ relocate(NKlass.rspec(), 1);
5071     __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
5072   %}
5073   ins_pipe(pipe_class_dummy);
5074 %}
5075 
5076 instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
5077   match(Set cr (CmpN src1 src2));
5078   ins_cost(DEFAULT_COST);
5079   size(2);
5080   format %{ "LTR     $src1,$src2\t # (cOop) LTR because comparing against zero" %}
5081   opcode(LTR_ZOPC);
5082   ins_encode(z_rrform(src1, src1));
5083   ins_pipe(pipe_class_dummy);
5084 %}
5085 
5086 
5087 //----------MemBar Instructions-----------------------------------------------
5088 
5089 // Memory barrier flavors
5090 
5091 instruct membar_acquire() %{
5092   match(MemBarAcquire);
5093   match(LoadFence);
5094   ins_cost(4*MEMORY_REF_COST);
5095   size(0);
5096   format %{ "MEMBAR-acquire" %}
5097   ins_encode %{ __ z_acquire(); %}
5098   ins_pipe(pipe_class_dummy);
5099 %}
5100

6755       __ z_lghi(Z_R0_scratch, divisor);
6756       __ z_lgr($dst$$Register->successor(), $src1$$Register);
6757       __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch);  // Instruction kills tmp.
6758     } else {
6759       __ clear_reg($dst$$Register, true, false);
6760     }
6761   %}
6762   ins_pipe(pipe_class_dummy);
6763 %}
6764 
6765 // SHIFT
6766 
6767 // Shift left logical
6768 
6769 // Register Shift Left variable
6770 instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
6771   match(Set dst (LShiftI src nbits));
6772   effect(KILL cr); // R1 is killed, too.
6773   ins_cost(3 * DEFAULT_COST);
6774   size(14);
6775   format %{ "SLL     $dst,$src,[$nbits] & 31\t # use RISC-like SLLG also for int" %}
6776   ins_encode %{
6777     __ z_lgr(Z_R1_scratch, $nbits$$Register);
6778     __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
6779     __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
6780   %}
6781   ins_pipe(pipe_class_dummy);
6782 %}
6783 
6784 // Register Shift Left Immediate
6785 // Constant shift count is masked in ideal graph already.
6786 instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
6787   match(Set dst (LShiftI src nbits));
6788   size(6);
6789   format %{ "SLL     $dst,$src,$nbits\t # use RISC-like SLLG also for int" %}
6790   ins_encode %{
6791     int Nbit = $nbits$$constant;
6792     assert((Nbit & (BitsPerJavaInteger - 1)) == Nbit, "Check shift mask in ideal graph");
6793     __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
6794   %}
6795   ins_pipe(pipe_class_dummy);
6796 %}
6797 
6798 // Register Shift Left Immediate by 1bit
6799 instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
6800   match(Set dst (LShiftI src nbits));
6801   predicate(PreferLAoverADD);
6802   ins_cost(DEFAULT_COST_LOW);
6803   size(4);
6804   format %{ "LA      $dst,#0($src,$src)\t # SLL by 1 (int)" %}
6805   ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
6806   ins_pipe(pipe_class_dummy);
6807 %}
6808 
6809 // Register Shift Left Long

7085   %}
7086   ins_pipe(pipe_class_dummy);
7087 %}
7088 
7089 instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
7090   match(Set cr (OverflowSubL op1 op2));
7091   effect(DEF cr, USE op1, USE op2);
7092   // TODO: s390 port size(VARIABLE_SIZE);
7093   format %{ "SGR     $op1,$op2\t # overflow check long" %}
7094   ins_encode %{
7095     __ load_const_optimized(Z_R1_scratch, $op2$$constant);
7096     __ z_lgr(Z_R0_scratch, $op1$$Register);
7097     __ z_sgr(Z_R0_scratch, Z_R1_scratch);
7098   %}
7099   ins_pipe(pipe_class_dummy);
7100 %}
7101 
7102 instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
7103   match(Set cr (OverflowSubI zero op2));
7104   effect(DEF cr, USE op2);
7105   format %{ "NEG    $op2\t # overflow check int" %}
7106   ins_encode %{
7107     __ clear_reg(Z_R0_scratch, false, false);
7108     __ z_sr(Z_R0_scratch, $op2$$Register);
7109   %}
7110   ins_pipe(pipe_class_dummy);
7111 %}
7112 
7113 instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
7114   match(Set cr (OverflowSubL zero op2));
7115   effect(DEF cr, USE op2);
7116   format %{ "NEGG    $op2\t # overflow check long" %}
7117   ins_encode %{
7118     __ clear_reg(Z_R0_scratch, true, false);
7119     __ z_sgr(Z_R0_scratch, $op2$$Register);
7120   %}
7121   ins_pipe(pipe_class_dummy);
7122 %}
7123 
7124 // No intrinsics for multiplication, since there is no easy way
7125 // to check for overflow.
7126 
7127 
7128 //----------Floating Point Arithmetic Instructions-----------------------------
7129 
7130 //  ADD
7131 
7132 //  Add float single precision
7133 instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
7134   match(Set dst (AddF dst src));
7135   effect(KILL cr);
7136   ins_cost(ALU_REG_COST);

9151 // Direct Branch.
9152 instruct branchFar(label labl) %{
9153   match(Goto);
9154   effect(USE labl);
9155   ins_cost(BRANCH_COST);
9156   size(6);
9157   format %{ "BRUL   $labl" %}
9158   ins_encode(z_enc_brul(labl));
9159   ins_pipe(pipe_class_dummy);
9160   // This is not a short variant of a branch, but the long variant.
9161   ins_short_branch(0);
9162 %}
9163 
9164 // Conditional Near Branch
9165 instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
9166   // Same match rule as `branchConFar'.
9167   match(If cmp cr);
9168   effect(USE lbl);
9169   ins_cost(BRANCH_COST);
9170   size(4);
9171   format %{ "branch_con_short,$cmp   $lbl" %}
9172   ins_encode(z_enc_branch_con_short(cmp, lbl));
9173   ins_pipe(pipe_class_dummy);
9174   // If set to 1 this indicates that the current instruction is a
9175   // short variant of a long branch. This avoids using this
9176   // instruction in first-pass matching. It will then only be used in
9177   // the `Shorten_branches' pass.
9178   ins_short_branch(1);
9179 %}
9180 
9181 // This is for cases when the z/Architecture conditional branch instruction
9182 // does not reach far enough. So we emit a far branch here, which is
9183 // more expensive.
9184 //
9185 // Conditional Far Branch
9186 instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
9187   // Same match rule as `branchCon'.
9188   match(If cmp cr);
9189   effect(USE cr, USE lbl);
9190   // Make more expensive to prefer compare_and_branch over separate instructions.
9191   ins_cost(2 * BRANCH_COST);
9192   size(6);
9193   format %{ "branch_con_far,$cmp   $lbl" %}
9194   ins_encode(z_enc_branch_con_far(cmp, lbl));
9195   ins_pipe(pipe_class_dummy);
9196   // This is not a short variant of a branch, but the long variant..
9197   ins_short_branch(0);
9198 %}
9199 
9200 instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
9201   match(CountedLoopEnd cmp cr);
9202   effect(USE labl);
9203   ins_cost(BRANCH_COST);
9204   size(4);
9205   format %{ "branch_con_short,$cmp   $labl\t # counted loop end" %}
9206   ins_encode(z_enc_branch_con_short(cmp, labl));
9207   ins_pipe(pipe_class_dummy);
9208   // If set to 1 this indicates that the current instruction is a
9209   // short variant of a long branch. This avoids using this
9210   // instruction in first-pass matching. It will then only be used in
9211   // the `Shorten_branches' pass.
9212   ins_short_branch(1);
9213 %}

9742 instruct CallLeafNoFPDirect(method meth) %{
9743   match(CallLeafNoFP);
9744   effect(USE meth);
9745   ins_cost(CALL_COST);
9746   // TODO: s390 port size(VARIABLE_SIZE);
9747   ins_num_consts(1);
9748   format %{ "CALL,runtime leaf nofp $meth" %}
9749   ins_encode( z_enc_java_to_runtime_call(meth) );
9750   ins_pipe(pipe_class_dummy);
9751   ins_alignment(2);
9752 %}
9753 
9754 // Tail Call; Jump from runtime stub to Java code.
9755 // Also known as an 'interprocedural jump'.
9756 // Target of jump will eventually return to caller.
9757 // TailJump below removes the return address.
9758 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9759   match(TailCall jump_target method_oop);
9760   ins_cost(CALL_COST);
9761   size(2);
9762   format %{ "Jmp     $jump_target\t # $method_oop holds method oop" %}
9763   ins_encode %{ __ z_br($jump_target$$Register); %}
9764   ins_pipe(pipe_class_dummy);
9765 %}
9766 
9767 // Return Instruction
9768 instruct Ret() %{
9769   match(Return);
9770   size(2);
9771   format %{ "BR(Z_R14) // branch to link register" %}
9772   ins_encode %{ __ z_br(Z_R14); %}
9773   ins_pipe(pipe_class_dummy);
9774 %}
9775 
9776 // Tail Jump; remove the return address; jump to target.
9777 // TailCall above leaves the return address around.
9778 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9779 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9780 // "restore" before this instruction (in Epilogue), we need to materialize it
9781 // in %i0.
9782 instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{

10750 instruct loadV8(iRegL dst, memory mem) %{
10751   match(Set dst (LoadVector mem));
10752   predicate(n->as_LoadVector()->memory_size() == 8);
10753   ins_cost(MEMORY_REF_COST);
10754   // TODO: s390 port size(VARIABLE_SIZE);
10755   format %{ "LG      $dst,$mem\t # L(packed8B)" %}
10756   opcode(LG_ZOPC, LG_ZOPC);
10757   ins_encode(z_form_rt_mem_opt(dst, mem));
10758   ins_pipe(pipe_class_dummy);
10759 %}
10760 
10761 //----------POPULATION COUNT RULES--------------------------------------------
10762 
10763 // Byte reverse
10764 
10765 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
10766   match(Set dst (ReverseBytesI src));
10767   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10768   ins_cost(DEFAULT_COST);
10769   size(4);
10770   format %{ "LRVR    $dst,$src\t # byte reverse int" %}
10771   opcode(LRVR_ZOPC);
10772   ins_encode(z_rreform(dst, src));
10773   ins_pipe(pipe_class_dummy);
10774 %}
10775 
10776 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
10777   match(Set dst (ReverseBytesL src));
10778   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10779   ins_cost(DEFAULT_COST);
10780   // TODO: s390 port size(FIXED_SIZE);
10781   format %{ "LRVGR   $dst,$src\t # byte reverse long" %}
10782   opcode(LRVGR_ZOPC);
10783   ins_encode(z_rreform(dst, src));
10784   ins_pipe(pipe_class_dummy);
10785 %}
10786 
10787 // Leading zeroes
10788 
10789 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10790 // returns the bit position of the leftmost 1 in the 64bit source register.
10791 // As the bits are numbered from left to right (0..63), the returned
10792 // position index is equivalent to the number of leading zeroes.
10793 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10794 // returns position 64. That's exactly what we need.
10795 
10796 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10797   match(Set dst (CountLeadingZerosI src));
10798   effect(KILL tmp, KILL cr);
10799   ins_cost(3 * DEFAULT_COST);
10800   size(14);
10801   format %{ "SLLG    $dst,$src,32\t # no need to always count 32 zeroes first\n\t"
10802             "IILH    $dst,0x8000 \t # insert \"stop bit\" to force result 32 for zero src.\n\t"
10803             "FLOGR   $dst,$dst"
10804          %}
10805   ins_encode %{
10806     // Performance experiments indicate that "FLOGR" is using some kind of
10807     // iteration to find the leftmost "1" bit.
10808     //
10809     // The prior implementation zero-extended the 32-bit argument to 64 bit,
10810     // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10811     // We could gain measurable speedup in micro benchmark:
10812     //
10813     //               leading   trailing
10814     //   z10:   int     2.04       1.68
10815     //         long     1.00       1.02
10816     //   z196:  int     0.99       1.23
10817     //         long     1.00       1.11
10818     //
10819     // By shifting the argument into the high-word instead of zero-extending it.
10820     // The add'l branch on condition (taken for a zero argument, very infrequent,
10821     // good prediction) is well compensated for by the savings.
10822     //
10823     // We leave the previous implementation in for some time in the future when
10824     // the "FLOGR" instruction may become less iterative.
10825 
10826     // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10827     __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10828     __ z_iilh($dst$$Register, 0x8000);   // Insert "stop bit" to force result 32 for zero src.
10829     __ z_flogr($dst$$Register, $dst$$Register);
10830   %}
10831   ins_pipe(pipe_class_dummy);
10832 %}
10833 
10834 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10835   match(Set dst (CountLeadingZerosL src));
10836   effect(KILL tmp, KILL cr);
10837   ins_cost(DEFAULT_COST);
10838   size(4);
10839   format %{ "FLOGR   $dst,$src \t # count leading zeros (long)\n\t" %}
10840   ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10841   ins_pipe(pipe_class_dummy);
10842 %}
10843 
10844 // trailing zeroes
10845 
10846 // We transform the trailing zeroes problem to a leading zeroes problem
10847 // such that can use the FLOGR instruction to our advantage.
10848 
10849 // With
10850 //   tmp1 = src - 1
10851 // we flip all trailing zeroes to ones and the rightmost one to zero.
10852 // All other bits remain unchanged.
10853 // With the complement
10854 //   tmp2 = ~src
10855 // we get all ones in the trailing zeroes positions. Thus,
10856 //   tmp3 = tmp1 & tmp2
10857 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10858 // Now we can apply FLOGR and get 64-(trailing zeroes).
10859 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10860   match(Set dst (CountTrailingZerosI src));
10861   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10862   ins_cost(8 * DEFAULT_COST);
10863   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10864   format %{ "LLGFR   $dst,$src  \t # clear upper 32 bits (we are dealing with int)\n\t"
10865             "LCGFR   $tmp,$src  \t # load 2's complement (32->64 bit)\n\t"
10866             "AGHI    $dst,-1    \t # tmp1 = src-1\n\t"
10867             "AGHI    $tmp,-1    \t # tmp2 = -src-1 = ~src\n\t"
10868             "NGR     $dst,$tmp  \t # tmp3 = tmp1&tmp2\n\t"
10869             "FLOGR   $dst,$dst  \t # count trailing zeros (int)\n\t"
10870             "AHI     $dst,-64   \t # tmp4 = 64-(trailing zeroes)-64\n\t"
10871             "LCR     $dst,$dst  \t # res = -tmp4"
10872          %}
10873   ins_encode %{
10874     Register Rdst = $dst$$Register;
10875     Register Rsrc = $src$$Register;
10876     // Rtmp only needed for for zero-argument shortcut. With kill effect in
10877     // match rule Rsrc = roddReg would be possible, saving one register.
10878     Register Rtmp = $tmp$$Register;
10879 
10880     assert_different_registers(Rdst, Rsrc, Rtmp);
10881 
10882     // Algorithm:
10883     // - Isolate the least significant (rightmost) set bit using (src & (-src)).
10884     //   All other bits in the result are zero.
10885     // - Find the "leftmost one" bit position in the single-bit result from previous step.
10886     // - 63-("leftmost one" bit position) gives the # of trailing zeros.
10887 
10888     // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
10889     Label done;
10890     __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
10891     __ z_lcgfr(Rtmp, Rsrc);

10897                                        // into upper half of reg. Not relevant with sllg below.
10898     __ z_sllg(Rdst, Rtmp, 32);         // Shift interesting contents to upper half of register.
10899     __ z_bre(done);                    // Shortcut for argument = 1, result will be 0.
10900                                        // Depends on CC set by ahi above.
10901                                        // Taken very infrequently, good prediction, no BHT entry.
10902                                        // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10903                                        // after SLLG Rdst == 0(64bit)).
10904     __ z_flogr(Rdst, Rdst);            // Kills tmp which is the oddReg for dst.
10905     __ add2reg(Rdst,  -32);            // 32-pos(leftmost1) is #trailing zeros
10906     __ z_lcgfr(Rdst, Rdst);            // Provide 64bit result at no cost.
10907     __ bind(done);
10908   %}
10909   ins_pipe(pipe_class_dummy);
10910 %}
10911 
10912 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10913   match(Set dst (CountTrailingZerosL src));
10914   effect(TEMP_DEF dst, KILL tmp, KILL cr);
10915   ins_cost(8 * DEFAULT_COST);
10916   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10917   format %{ "LCGR    $dst,$src  \t # preserve src\n\t"
10918             "NGR     $dst,$src  \t #\n\t"
10919             "AGHI    $dst,-1    \t # tmp1 = src-1\n\t"
10920             "FLOGR   $dst,$dst  \t # count trailing zeros (long), kill $tmp\n\t"
10921             "AHI     $dst,-64   \t # tmp4 = 64-(trailing zeroes)-64\n\t"
10922             "LCR     $dst,$dst  \t #"
10923          %}
10924   ins_encode %{
10925     Register Rdst = $dst$$Register;
10926     Register Rsrc = $src$$Register;
10927     assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10928 
10929     // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10930     __ z_lcgr(Rdst, Rsrc);
10931     __ z_ngr(Rdst, Rsrc);
10932     __ add2reg(Rdst,   -1);
10933     __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10934     __ add2reg(Rdst,  -64);
10935     __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10936   %}
10937   ins_pipe(pipe_class_dummy);
10938 %}
10939 
10940 
10941 // bit count
10942 
10943 instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
10944   match(Set dst (PopCountI src));
10945   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10946   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10947   ins_cost(DEFAULT_COST);
10948   size(24);
10949   format %{ "POPCNT  $dst,$src\t # pop count int" %}
10950   ins_encode %{
10951     Register Rdst = $dst$$Register;
10952     Register Rsrc = $src$$Register;
10953     Register Rtmp = $tmp$$Register;
10954 
10955     // Prefer compile-time assertion over run-time SIGILL.
10956     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10957     assert_different_registers(Rdst, Rtmp);
10958 
10959     // Version 2: shows 10%(z196) improvement over original.
10960     __ z_popcnt(Rdst, Rsrc);
10961     __ z_srlg(Rtmp, Rdst, 16); // calc  byte4+byte6 and byte5+byte7
10962     __ z_alr(Rdst, Rtmp);      //   into byte6 and byte7
10963     __ z_srlg(Rtmp, Rdst,  8); // calc (byte4+byte6) + (byte5+byte7)
10964     __ z_alr(Rdst, Rtmp);      //   into byte7
10965     __ z_llgcr(Rdst, Rdst);    // zero-extend sum
10966   %}
10967   ins_pipe(pipe_class_dummy);
10968 %}
10969 
10970 instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
10971   match(Set dst (PopCountL src));
10972   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10973   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10974   ins_cost(DEFAULT_COST);
10975   // TODO: s390 port size(FIXED_SIZE);
10976   format %{ "POPCNT  $dst,$src\t # pop count long" %}
10977   ins_encode %{
10978     Register Rdst = $dst$$Register;
10979     Register Rsrc = $src$$Register;
10980     Register Rtmp = $tmp$$Register;
10981 
10982     // Prefer compile-time assertion over run-time SIGILL.
10983     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10984     assert_different_registers(Rdst, Rtmp);
10985 
10986     // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
10987     __ z_popcnt(Rdst, Rsrc);
10988     __ z_ahhlr(Rdst, Rdst, Rdst);
10989     __ z_sllg(Rtmp, Rdst, 16);
10990     __ z_algr(Rdst, Rtmp);
10991     __ z_sllg(Rtmp, Rdst,  8);
10992     __ z_algr(Rdst, Rtmp);
10993     __ z_srlg(Rdst, Rdst, 56);
10994   %}
10995   ins_pipe(pipe_class_dummy);
10996 %}

< prev index next >