jdk Sdiff src/hotspot/cpu/s390

src/hotspot/cpu/s390/s390.ad

rev 54960 : 8213084: Rework and enhance Print[Opto]Assembly output
Reviewed-by: kvn, thartmann

   1 //
   2 // Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2017, SAP SE. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //

1371     // The ic_miss_stub will handle the null pointer exception.
1372     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1373     __ z_br(R1_ic_miss_stub_addr);
1374     __ bind(valid);
1375   }
1376 
1377   // Check whether this method is the proper implementation for the class of
1378   // the receiver (ic miss check).
1379   {
1380     Label valid;
1381     // Compare cached class against klass from receiver.
1382     // This also does an implicit null check!
1383     __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
1384     __ z_bre(valid);
1385     // The inline cache points to the wrong method. Call the
1386     // ic_miss_stub to find the proper method.
1387     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1388     __ z_br(R1_ic_miss_stub_addr);
1389     __ bind(valid);
1390   }
1391 
1392 }
1393 
1394 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1395   // Determine size dynamically.
1396   return MachNode::size(ra_);
1397 }
1398 
1399 //=============================================================================
1400 
1401 %} // interrupt source section
1402 
1403 source_hpp %{ // Header information of the source block.
1404 
1405 class HandlerImpl {
1406  public:
1407 
1408   static int emit_exception_handler(CodeBuffer &cbuf);
1409   static int emit_deopt_handler(CodeBuffer& cbuf);
1410 
1411   static uint size_exception_handler() {

4706 // See cOop encoding classes for elaborate comment.
4707 
4708 // Moved here because it is needed in expand rules for encode.
4709 // Long negation.
4710 instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
4711   match(Set dst (SubL zero src));
4712   effect(KILL cr);
4713   size(4);
4714   format %{ "NEG     $dst, $src\t # long" %}
4715   ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
4716   ins_pipe(pipe_class_dummy);
4717 %}
4718 
4719 // Load Compressed Pointer
4720 
4721 // Load narrow oop
4722 instruct loadN(iRegN dst, memory mem) %{
4723   match(Set dst (LoadN mem));
4724   ins_cost(MEMORY_REF_COST);
4725   size(Z_DISP3_SIZE);
4726   format %{ "LoadN  $dst,$mem\t# (cOop)" %}
4727   opcode(LLGF_ZOPC, LLGF_ZOPC);
4728   ins_encode(z_form_rt_mem_opt(dst, mem));
4729   ins_pipe(pipe_class_dummy);
4730 %}
4731 
4732 // Load narrow Klass Pointer
4733 instruct loadNKlass(iRegN dst, memory mem) %{
4734   match(Set dst (LoadNKlass mem));
4735   ins_cost(MEMORY_REF_COST);
4736   size(Z_DISP3_SIZE);
4737   format %{ "LoadNKlass $dst,$mem\t# (klass cOop)" %}
4738   opcode(LLGF_ZOPC, LLGF_ZOPC);
4739   ins_encode(z_form_rt_mem_opt(dst, mem));
4740   ins_pipe(pipe_class_dummy);
4741 %}
4742 
4743 // Load constant Compressed Pointer
4744 
4745 instruct loadConN(iRegN dst, immN src) %{
4746   match(Set dst src);
4747   ins_cost(DEFAULT_COST);
4748   size(6);
4749   format %{ "loadConN    $dst,$src\t # (cOop)" %}
4750   ins_encode %{
4751     AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
4752     __ relocate(cOop.rspec(), 1);
4753     __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
4754   %}
4755   ins_pipe(pipe_class_dummy);
4756 %}
4757

4770   match(Set dst src);
4771   ins_cost(DEFAULT_COST);
4772   size(6);
4773   format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
4774   ins_encode %{
4775     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4776     __ relocate(NKlass.rspec(), 1);
4777     __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
4778   %}
4779   ins_pipe(pipe_class_dummy);
4780 %}
4781 
4782 // Load and Decode Compressed Pointer
4783 // optimized variants for Unscaled cOops
4784 
4785 instruct decodeLoadN(iRegP dst, memory mem) %{
4786   match(Set dst (DecodeN (LoadN mem)));
4787   predicate(false && (CompressedOops::base()==NULL)&&(CompressedOops::shift()==0));
4788   ins_cost(MEMORY_REF_COST);
4789   size(Z_DISP3_SIZE);
4790   format %{ "DecodeLoadN  $dst,$mem\t# (cOop Load+Decode)" %}
4791   opcode(LLGF_ZOPC, LLGF_ZOPC);
4792   ins_encode(z_form_rt_mem_opt(dst, mem));
4793   ins_pipe(pipe_class_dummy);
4794 %}
4795 
4796 instruct decodeLoadNKlass(iRegP dst, memory mem) %{
4797   match(Set dst (DecodeNKlass (LoadNKlass mem)));
4798   predicate(false && (CompressedKlassPointers::base()==NULL)&&(CompressedKlassPointers::shift()==0));
4799   ins_cost(MEMORY_REF_COST);
4800   size(Z_DISP3_SIZE);
4801   format %{ "DecodeLoadNKlass  $dst,$mem\t# (load/decode NKlass)" %}
4802   opcode(LLGF_ZOPC, LLGF_ZOPC);
4803   ins_encode(z_form_rt_mem_opt(dst, mem));
4804   ins_pipe(pipe_class_dummy);
4805 %}
4806 
4807 instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
4808   match(Set dst (DecodeNKlass src));
4809   ins_cost(3 * DEFAULT_COST);
4810   size(12);
4811   format %{ "DecodeLoadConNKlass  $dst,$src\t # decode(cKlass)" %}
4812   ins_encode %{
4813     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4814     __ relocate(NKlass.rspec(), 1);
4815     __ load_const($dst$$Register, (Klass*)NKlass.value());
4816   %}
4817   ins_pipe(pipe_class_dummy);
4818 %}
4819 
4820 // Decode Compressed Pointer
4821 
4822 // General decoder
4823 instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
4824   match(Set dst (DecodeN src));
4825   effect(KILL cr);
4826   predicate(CompressedOops::base() == NULL || !ExpandLoadingBaseDecode);
4827   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4828   // TODO: s390 port size(VARIABLE_SIZE);
4829   format %{ "decodeN  $dst,$src\t# (decode cOop)" %}
4830   ins_encode %{  __ oop_decoder($dst$$Register, $src$$Register, true); %}
4831   ins_pipe(pipe_class_dummy);
4832 %}
4833 
4834 // General Klass decoder
4835 instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
4836   match(Set dst (DecodeNKlass src));
4837   effect(KILL cr);
4838   ins_cost(3 * DEFAULT_COST);
4839   format %{ "decode_klass $dst,$src" %}
4840   ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
4841   ins_pipe(pipe_class_dummy);
4842 %}
4843 
4844 // General decoder
4845 instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
4846   match(Set dst (DecodeN src));
4847   effect(KILL cr);
4848   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4849              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4850             (CompressedOops::base()== NULL || !ExpandLoadingBaseDecode_NN));
4851   ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4852   // TODO: s390 port size(VARIABLE_SIZE);
4853   format %{ "decodeN  $dst,$src\t# (decode cOop NN)" %}
4854   ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
4855   ins_pipe(pipe_class_dummy);
4856 %}
4857 
4858   instruct loadBase(iRegL dst, immL baseImm) %{
4859     effect(DEF dst, USE baseImm);
4860     predicate(false);
4861     format %{ "llihl    $dst=$baseImm \t// load heap base" %}
4862     ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
4863     ins_pipe(pipe_class_dummy);
4864   %}
4865 
4866   // Decoder for heapbased mode peeling off loading the base.
4867   instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4868     match(Set dst (DecodeN src base));
4869     // Note: Effect TEMP dst was used with the intention to get
4870     // different regs for dst and base, but this has caused ADLC to
4871     // generate wrong code. Oop_decoder generates additional lgr when
4872     // dst==base.
4873     effect(KILL cr);
4874     predicate(false);
4875     // TODO: s390 port size(VARIABLE_SIZE);
4876     format %{ "decodeN  $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
4877     ins_encode %{
4878       __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
4879                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)CompressedOops::base()));
4880     %}
4881     ins_pipe(pipe_class_dummy);
4882   %}
4883 
4884   // Decoder for heapbased mode peeling off loading the base.
4885   instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4886     match(Set dst (DecodeN src base));
4887     effect(KILL cr);
4888     predicate(false);
4889     // TODO: s390 port size(VARIABLE_SIZE);
4890     format %{ "decodeN  $dst = ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
4891     ins_encode %{
4892       __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
4893                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)CompressedOops::base()));
4894     %}
4895     ins_pipe(pipe_class_dummy);
4896   %}
4897 
4898 // Decoder for heapbased mode peeling off loading the base.
4899 instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4900   match(Set dst (DecodeN src));
4901   predicate(CompressedOops::base() != NULL && ExpandLoadingBaseDecode);
4902   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4903   // TODO: s390 port size(VARIABLE_SIZE);
4904   expand %{
4905     immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
4906     iRegL base;
4907     loadBase(base, baseImm);
4908     decodeN_base(dst, src, base, cr);
4909   %}
4910 %}

4920   expand %{
4921     immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
4922     iRegL base;
4923     loadBase(base, baseImm);
4924     decodeN_NN_base(dst, src, base, cr);
4925   %}
4926 %}
4927 
4928 //  Encode Compressed Pointer
4929 
4930 // General encoder
4931 instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
4932   match(Set dst (EncodeP src));
4933   effect(KILL cr);
4934   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4935             (CompressedOops::base() == 0 ||
4936              CompressedOops::base_disjoint() ||
4937              !ExpandLoadingBaseEncode));
4938   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4939   // TODO: s390 port size(VARIABLE_SIZE);
4940   format %{ "encodeP  $dst,$src\t# (encode cOop)" %}
4941   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4942   ins_pipe(pipe_class_dummy);
4943 %}
4944 
4945 // General class encoder
4946 instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
4947   match(Set dst (EncodePKlass src));
4948   effect(KILL cr);
4949   format %{ "encode_klass $dst,$src" %}
4950   ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
4951   ins_pipe(pipe_class_dummy);
4952 %}
4953 
4954 instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
4955   match(Set dst (EncodeP src));
4956   effect(KILL cr);
4957   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4958             (CompressedOops::base() == 0 ||
4959              CompressedOops::base_disjoint() ||
4960              !ExpandLoadingBaseEncode_NN));
4961   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4962   // TODO: s390 port size(VARIABLE_SIZE);
4963   format %{ "encodeP  $dst,$src\t# (encode cOop)" %}
4964   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4965   ins_pipe(pipe_class_dummy);
4966 %}
4967 
4968   // Encoder for heapbased mode peeling off loading the base.
4969   instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
4970     match(Set dst (EncodeP src (Binary base dst)));
4971     effect(TEMP_DEF dst);
4972     predicate(false);
4973     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4974     // TODO: s390 port size(VARIABLE_SIZE);
4975     format %{ "encodeP  $dst = ($src>>3) +$base + pow2_offset\t# (encode cOop)" %}
4976     ins_encode %{
4977       jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
4978         (((uint64_t)(intptr_t)CompressedOops::base()) >> CompressedOops::shift());
4979       __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
4980     %}
4981     ins_pipe(pipe_class_dummy);
4982   %}
4983 
4984   // Encoder for heapbased mode peeling off loading the base.
4985   instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
4986     match(Set dst (EncodeP src base));
4987     effect(USE pow2_offset);
4988     predicate(false);
4989     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4990     // TODO: s390 port size(VARIABLE_SIZE);
4991     format %{ "encodeP  $dst = ($src>>3) +$base + $pow2_offset\t# (encode cOop)" %}
4992     ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
4993     ins_pipe(pipe_class_dummy);
4994   %}
4995 
4996 // Encoder for heapbased mode peeling off loading the base.
4997 instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4998   match(Set dst (EncodeP src));
4999   effect(KILL cr);
5000   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
5001             (CompressedOops::base_overlaps() && ExpandLoadingBaseEncode));
5002   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
5003   // TODO: s390 port size(VARIABLE_SIZE);
5004   expand %{
5005     immL baseImm %{ ((jlong)(intptr_t)CompressedOops::base()) >> CompressedOops::shift() %}
5006     immL_0 zero %{ (0) %}
5007     flagsReg ccr;
5008     iRegL base;
5009     iRegL negBase;
5010     loadBase(base, baseImm);
5011     negL_reg_reg(negBase, zero, base, ccr);

5024   expand %{
5025     immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
5026     immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)CompressedOops::base())) %}
5027     immL_0 zero %{ 0 %}
5028     flagsReg ccr;
5029     iRegL base;
5030     iRegL negBase;
5031     loadBase(base, baseImm);
5032     negL_reg_reg(negBase, zero, base, ccr);
5033     encodeP_NN_base(dst, src, negBase, pow2_offset);
5034   %}
5035 %}
5036 
5037 //  Store Compressed Pointer
5038 
5039 // Store Compressed Pointer
5040 instruct storeN(memory mem, iRegN_P2N src) %{
5041   match(Set mem (StoreN mem src));
5042   ins_cost(MEMORY_REF_COST);
5043   size(Z_DISP_SIZE);
5044   format %{ "ST      $src,$mem\t# (cOop)" %}
5045   opcode(STY_ZOPC, ST_ZOPC);
5046   ins_encode(z_form_rt_mem_opt(src, mem));
5047   ins_pipe(pipe_class_dummy);
5048 %}
5049 
5050 // Store Compressed Klass pointer
5051 instruct storeNKlass(memory mem, iRegN src) %{
5052   match(Set mem (StoreNKlass mem src));
5053   ins_cost(MEMORY_REF_COST);
5054   size(Z_DISP_SIZE);
5055   format %{ "ST      $src,$mem\t# (cKlass)" %}
5056   opcode(STY_ZOPC, ST_ZOPC);
5057   ins_encode(z_form_rt_mem_opt(src, mem));
5058   ins_pipe(pipe_class_dummy);
5059 %}
5060 
5061 // Compare Compressed Pointers
5062 
5063 instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
5064   match(Set cr (CmpN src1 src2));
5065   ins_cost(DEFAULT_COST);
5066   size(2);
5067   format %{ "CLR     $src1,$src2\t# (cOop)" %}
5068   opcode(CLR_ZOPC);
5069   ins_encode(z_rrform(src1, src2));
5070   ins_pipe(pipe_class_dummy);
5071 %}
5072 
5073 instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
5074   match(Set cr (CmpN src1 src2));
5075   ins_cost(DEFAULT_COST);
5076   size(6);
5077   format %{ "CLFI    $src1,$src2\t# (cOop) compare immediate narrow" %}
5078   ins_encode %{
5079     AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
5080     __ relocate(cOop.rspec(), 1);
5081     __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
5082   %}
5083   ins_pipe(pipe_class_dummy);
5084 %}
5085 
5086 instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
5087   match(Set cr (CmpN src1 src2));
5088   ins_cost(DEFAULT_COST);
5089   size(6);
5090   format %{ "CLFI    $src1,$src2\t# (NKlass) compare immediate narrow" %}
5091   ins_encode %{
5092     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
5093     __ relocate(NKlass.rspec(), 1);
5094     __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
5095   %}
5096   ins_pipe(pipe_class_dummy);
5097 %}
5098 
5099 instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
5100   match(Set cr (CmpN src1 src2));
5101   ins_cost(DEFAULT_COST);
5102   size(2);
5103   format %{ "LTR     $src1,$src2\t# (cOop) LTR because comparing against zero" %}
5104   opcode(LTR_ZOPC);
5105   ins_encode(z_rrform(src1, src1));
5106   ins_pipe(pipe_class_dummy);
5107 %}
5108 
5109 
5110 //----------MemBar Instructions-----------------------------------------------
5111 
5112 // Memory barrier flavors
5113 
5114 instruct membar_acquire() %{
5115   match(MemBarAcquire);
5116   match(LoadFence);
5117   ins_cost(4*MEMORY_REF_COST);
5118   size(0);
5119   format %{ "MEMBAR-acquire" %}
5120   ins_encode %{ __ z_acquire(); %}
5121   ins_pipe(pipe_class_dummy);
5122 %}
5123

6778       __ z_lghi(Z_R0_scratch, divisor);
6779       __ z_lgr($dst$$Register->successor(), $src1$$Register);
6780       __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch);  // Instruction kills tmp.
6781     } else {
6782       __ clear_reg($dst$$Register, true, false);
6783     }
6784   %}
6785   ins_pipe(pipe_class_dummy);
6786 %}
6787 
6788 // SHIFT
6789 
6790 // Shift left logical
6791 
6792 // Register Shift Left variable
6793 instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
6794   match(Set dst (LShiftI src nbits));
6795   effect(KILL cr); // R1 is killed, too.
6796   ins_cost(3 * DEFAULT_COST);
6797   size(14);
6798   format %{ "SLL     $dst,$src,[$nbits] & 31\t# use RISC-like SLLG also for int" %}
6799   ins_encode %{
6800     __ z_lgr(Z_R1_scratch, $nbits$$Register);
6801     __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
6802     __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
6803   %}
6804   ins_pipe(pipe_class_dummy);
6805 %}
6806 
6807 // Register Shift Left Immediate
6808 // Constant shift count is masked in ideal graph already.
6809 instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
6810   match(Set dst (LShiftI src nbits));
6811   size(6);
6812   format %{ "SLL     $dst,$src,$nbits\t# use RISC-like SLLG also for int" %}
6813   ins_encode %{
6814     int Nbit = $nbits$$constant;
6815     assert((Nbit & (BitsPerJavaInteger - 1)) == Nbit, "Check shift mask in ideal graph");
6816     __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
6817   %}
6818   ins_pipe(pipe_class_dummy);
6819 %}
6820 
6821 // Register Shift Left Immediate by 1bit
6822 instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
6823   match(Set dst (LShiftI src nbits));
6824   predicate(PreferLAoverADD);
6825   ins_cost(DEFAULT_COST_LOW);
6826   size(4);
6827   format %{ "LA      $dst,#0($src,$src)\t # SLL by 1 (int)" %}
6828   ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
6829   ins_pipe(pipe_class_dummy);
6830 %}
6831 
6832 // Register Shift Left Long

7108   %}
7109   ins_pipe(pipe_class_dummy);
7110 %}
7111 
7112 instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
7113   match(Set cr (OverflowSubL op1 op2));
7114   effect(DEF cr, USE op1, USE op2);
7115   // TODO: s390 port size(VARIABLE_SIZE);
7116   format %{ "SGR     $op1,$op2\t # overflow check long" %}
7117   ins_encode %{
7118     __ load_const_optimized(Z_R1_scratch, $op2$$constant);
7119     __ z_lgr(Z_R0_scratch, $op1$$Register);
7120     __ z_sgr(Z_R0_scratch, Z_R1_scratch);
7121   %}
7122   ins_pipe(pipe_class_dummy);
7123 %}
7124 
7125 instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
7126   match(Set cr (OverflowSubI zero op2));
7127   effect(DEF cr, USE op2);
7128   format %{ "NEG    $op2\t# overflow check int" %}
7129   ins_encode %{
7130     __ clear_reg(Z_R0_scratch, false, false);
7131     __ z_sr(Z_R0_scratch, $op2$$Register);
7132   %}
7133   ins_pipe(pipe_class_dummy);
7134 %}
7135 
7136 instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
7137   match(Set cr (OverflowSubL zero op2));
7138   effect(DEF cr, USE op2);
7139   format %{ "NEGG    $op2\t# overflow check long" %}
7140   ins_encode %{
7141     __ clear_reg(Z_R0_scratch, true, false);
7142     __ z_sgr(Z_R0_scratch, $op2$$Register);
7143   %}
7144   ins_pipe(pipe_class_dummy);
7145 %}
7146 
7147 // No intrinsics for multiplication, since there is no easy way
7148 // to check for overflow.
7149 
7150 
7151 //----------Floating Point Arithmetic Instructions-----------------------------
7152 
7153 //  ADD
7154 
7155 //  Add float single precision
7156 instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
7157   match(Set dst (AddF dst src));
7158   effect(KILL cr);
7159   ins_cost(ALU_REG_COST);

9174 // Direct Branch.
9175 instruct branchFar(label labl) %{
9176   match(Goto);
9177   effect(USE labl);
9178   ins_cost(BRANCH_COST);
9179   size(6);
9180   format %{ "BRUL   $labl" %}
9181   ins_encode(z_enc_brul(labl));
9182   ins_pipe(pipe_class_dummy);
9183   // This is not a short variant of a branch, but the long variant.
9184   ins_short_branch(0);
9185 %}
9186 
9187 // Conditional Near Branch
9188 instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
9189   // Same match rule as `branchConFar'.
9190   match(If cmp cr);
9191   effect(USE lbl);
9192   ins_cost(BRANCH_COST);
9193   size(4);
9194   format %{ "branch_con_short,$cmp   $cr, $lbl" %}
9195   ins_encode(z_enc_branch_con_short(cmp, lbl));
9196   ins_pipe(pipe_class_dummy);
9197   // If set to 1 this indicates that the current instruction is a
9198   // short variant of a long branch. This avoids using this
9199   // instruction in first-pass matching. It will then only be used in
9200   // the `Shorten_branches' pass.
9201   ins_short_branch(1);
9202 %}
9203 
9204 // This is for cases when the z/Architecture conditional branch instruction
9205 // does not reach far enough. So we emit a far branch here, which is
9206 // more expensive.
9207 //
9208 // Conditional Far Branch
9209 instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
9210   // Same match rule as `branchCon'.
9211   match(If cmp cr);
9212   effect(USE cr, USE lbl);
9213   // Make more expensive to prefer compare_and_branch over separate instructions.
9214   ins_cost(2 * BRANCH_COST);
9215   size(6);
9216   format %{ "branch_con_far,$cmp   $cr, $lbl" %}
9217   ins_encode(z_enc_branch_con_far(cmp, lbl));
9218   ins_pipe(pipe_class_dummy);
9219   // This is not a short variant of a branch, but the long variant..
9220   ins_short_branch(0);
9221 %}
9222 
9223 instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
9224   match(CountedLoopEnd cmp cr);
9225   effect(USE labl);
9226   ins_cost(BRANCH_COST);
9227   size(4);
9228   format %{ "branch_con_short,$cmp   $labl\t # counted loop end" %}
9229   ins_encode(z_enc_branch_con_short(cmp, labl));
9230   ins_pipe(pipe_class_dummy);
9231   // If set to 1 this indicates that the current instruction is a
9232   // short variant of a long branch. This avoids using this
9233   // instruction in first-pass matching. It will then only be used in
9234   // the `Shorten_branches' pass.
9235   ins_short_branch(1);
9236 %}

9765 instruct CallLeafNoFPDirect(method meth) %{
9766   match(CallLeafNoFP);
9767   effect(USE meth);
9768   ins_cost(CALL_COST);
9769   // TODO: s390 port size(VARIABLE_SIZE);
9770   ins_num_consts(1);
9771   format %{ "CALL,runtime leaf nofp $meth" %}
9772   ins_encode( z_enc_java_to_runtime_call(meth) );
9773   ins_pipe(pipe_class_dummy);
9774   ins_alignment(2);
9775 %}
9776 
9777 // Tail Call; Jump from runtime stub to Java code.
9778 // Also known as an 'interprocedural jump'.
9779 // Target of jump will eventually return to caller.
9780 // TailJump below removes the return address.
9781 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9782   match(TailCall jump_target method_oop);
9783   ins_cost(CALL_COST);
9784   size(2);
9785   format %{ "Jmp     $jump_target\t# $method_oop holds method oop" %}
9786   ins_encode %{ __ z_br($jump_target$$Register); %}
9787   ins_pipe(pipe_class_dummy);
9788 %}
9789 
9790 // Return Instruction
9791 instruct Ret() %{
9792   match(Return);
9793   size(2);
9794   format %{ "BR(Z_R14) // branch to link register" %}
9795   ins_encode %{ __ z_br(Z_R14); %}
9796   ins_pipe(pipe_class_dummy);
9797 %}
9798 
9799 // Tail Jump; remove the return address; jump to target.
9800 // TailCall above leaves the return address around.
9801 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9802 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9803 // "restore" before this instruction (in Epilogue), we need to materialize it
9804 // in %i0.
9805 instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{

10773 instruct loadV8(iRegL dst, memory mem) %{
10774   match(Set dst (LoadVector mem));
10775   predicate(n->as_LoadVector()->memory_size() == 8);
10776   ins_cost(MEMORY_REF_COST);
10777   // TODO: s390 port size(VARIABLE_SIZE);
10778   format %{ "LG      $dst,$mem\t # L(packed8B)" %}
10779   opcode(LG_ZOPC, LG_ZOPC);
10780   ins_encode(z_form_rt_mem_opt(dst, mem));
10781   ins_pipe(pipe_class_dummy);
10782 %}
10783 
10784 //----------POPULATION COUNT RULES--------------------------------------------
10785 
10786 // Byte reverse
10787 
10788 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
10789   match(Set dst (ReverseBytesI src));
10790   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10791   ins_cost(DEFAULT_COST);
10792   size(4);
10793   format %{ "LRVR    $dst,$src\t# byte reverse int" %}
10794   opcode(LRVR_ZOPC);
10795   ins_encode(z_rreform(dst, src));
10796   ins_pipe(pipe_class_dummy);
10797 %}
10798 
10799 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
10800   match(Set dst (ReverseBytesL src));
10801   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10802   ins_cost(DEFAULT_COST);
10803   // TODO: s390 port size(FIXED_SIZE);
10804   format %{ "LRVGR   $dst,$src\t# byte reverse long" %}
10805   opcode(LRVGR_ZOPC);
10806   ins_encode(z_rreform(dst, src));
10807   ins_pipe(pipe_class_dummy);
10808 %}
10809 
10810 // Leading zeroes
10811 
10812 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10813 // returns the bit position of the leftmost 1 in the 64bit source register.
10814 // As the bits are numbered from left to right (0..63), the returned
10815 // position index is equivalent to the number of leading zeroes.
10816 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10817 // returns position 64. That's exactly what we need.
10818 
10819 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10820   match(Set dst (CountLeadingZerosI src));
10821   effect(KILL tmp, KILL cr);
10822   ins_cost(3 * DEFAULT_COST);
10823   size(14);
10824   format %{ "SLLG    $dst,$src,32\t# no need to always count 32 zeroes first\n\t"
10825             "IILH    $dst,0x8000 \t# insert \"stop bit\" to force result 32 for zero src.\n\t"
10826             "FLOGR   $dst,$dst"
10827          %}
10828   ins_encode %{
10829     // Performance experiments indicate that "FLOGR" is using some kind of
10830     // iteration to find the leftmost "1" bit.
10831     //
10832     // The prior implementation zero-extended the 32-bit argument to 64 bit,
10833     // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10834     // We could gain measurable speedup in micro benchmark:
10835     //
10836     //               leading   trailing
10837     //   z10:   int     2.04       1.68
10838     //         long     1.00       1.02
10839     //   z196:  int     0.99       1.23
10840     //         long     1.00       1.11
10841     //
10842     // By shifting the argument into the high-word instead of zero-extending it.
10843     // The add'l branch on condition (taken for a zero argument, very infrequent,
10844     // good prediction) is well compensated for by the savings.
10845     //
10846     // We leave the previous implementation in for some time in the future when
10847     // the "FLOGR" instruction may become less iterative.
10848 
10849     // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10850     __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10851     __ z_iilh($dst$$Register, 0x8000);   // Insert "stop bit" to force result 32 for zero src.
10852     __ z_flogr($dst$$Register, $dst$$Register);
10853   %}
10854   ins_pipe(pipe_class_dummy);
10855 %}
10856 
10857 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10858   match(Set dst (CountLeadingZerosL src));
10859   effect(KILL tmp, KILL cr);
10860   ins_cost(DEFAULT_COST);
10861   size(4);
10862   format %{ "FLOGR   $dst,$src \t# count leading zeros (long)\n\t" %}
10863   ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10864   ins_pipe(pipe_class_dummy);
10865 %}
10866 
10867 // trailing zeroes
10868 
10869 // We transform the trailing zeroes problem to a leading zeroes problem
10870 // such that can use the FLOGR instruction to our advantage.
10871 
10872 // With
10873 //   tmp1 = src - 1
10874 // we flip all trailing zeroes to ones and the rightmost one to zero.
10875 // All other bits remain unchanged.
10876 // With the complement
10877 //   tmp2 = ~src
10878 // we get all ones in the trailing zeroes positions. Thus,
10879 //   tmp3 = tmp1 & tmp2
10880 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10881 // Now we can apply FLOGR and get 64-(trailing zeroes).
10882 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10883   match(Set dst (CountTrailingZerosI src));
10884   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10885   ins_cost(8 * DEFAULT_COST);
10886   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10887   format %{ "LLGFR   $dst,$src  \t# clear upper 32 bits (we are dealing with int)\n\t"
10888             "LCGFR   $tmp,$src  \t# load 2's complement (32->64 bit)\n\t"
10889             "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
10890             "AGHI    $tmp,-1    \t# tmp2 = -src-1 = ~src\n\t"
10891             "NGR     $dst,$tmp  \t# tmp3 = tmp1&tmp2\n\t"
10892             "FLOGR   $dst,$dst  \t# count trailing zeros (int)\n\t"
10893             "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10894             "LCR     $dst,$dst  \t# res = -tmp4"
10895          %}
10896   ins_encode %{
10897     Register Rdst = $dst$$Register;
10898     Register Rsrc = $src$$Register;
10899     // Rtmp only needed for for zero-argument shortcut. With kill effect in
10900     // match rule Rsrc = roddReg would be possible, saving one register.
10901     Register Rtmp = $tmp$$Register;
10902 
10903     assert_different_registers(Rdst, Rsrc, Rtmp);
10904 
10905     // Algorithm:
10906     // - Isolate the least significant (rightmost) set bit using (src & (-src)).
10907     //   All other bits in the result are zero.
10908     // - Find the "leftmost one" bit position in the single-bit result from previous step.
10909     // - 63-("leftmost one" bit position) gives the # of trailing zeros.
10910 
10911     // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
10912     Label done;
10913     __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
10914     __ z_lcgfr(Rtmp, Rsrc);

10920                                        // into upper half of reg. Not relevant with sllg below.
10921     __ z_sllg(Rdst, Rtmp, 32);         // Shift interesting contents to upper half of register.
10922     __ z_bre(done);                    // Shortcut for argument = 1, result will be 0.
10923                                        // Depends on CC set by ahi above.
10924                                        // Taken very infrequently, good prediction, no BHT entry.
10925                                        // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10926                                        // after SLLG Rdst == 0(64bit)).
10927     __ z_flogr(Rdst, Rdst);            // Kills tmp which is the oddReg for dst.
10928     __ add2reg(Rdst,  -32);            // 32-pos(leftmost1) is #trailing zeros
10929     __ z_lcgfr(Rdst, Rdst);            // Provide 64bit result at no cost.
10930     __ bind(done);
10931   %}
10932   ins_pipe(pipe_class_dummy);
10933 %}
10934 
10935 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10936   match(Set dst (CountTrailingZerosL src));
10937   effect(TEMP_DEF dst, KILL tmp, KILL cr);
10938   ins_cost(8 * DEFAULT_COST);
10939   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10940   format %{ "LCGR    $dst,$src  \t# preserve src\n\t"
10941             "NGR     $dst,$src  \t#"
10942             "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
10943             "FLOGR   $dst,$dst  \t# count trailing zeros (long), kill $tmp\n\t"
10944             "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10945             "LCR     $dst,$dst  \t#"
10946          %}
10947   ins_encode %{
10948     Register Rdst = $dst$$Register;
10949     Register Rsrc = $src$$Register;
10950     assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10951 
10952     // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10953     __ z_lcgr(Rdst, Rsrc);
10954     __ z_ngr(Rdst, Rsrc);
10955     __ add2reg(Rdst,   -1);
10956     __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10957     __ add2reg(Rdst,  -64);
10958     __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10959   %}
10960   ins_pipe(pipe_class_dummy);
10961 %}
10962 
10963 
10964 // bit count
10965 
10966 instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
10967   match(Set dst (PopCountI src));
10968   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10969   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10970   ins_cost(DEFAULT_COST);
10971   size(24);
10972   format %{ "POPCNT  $dst,$src\t# pop count int" %}
10973   ins_encode %{
10974     Register Rdst = $dst$$Register;
10975     Register Rsrc = $src$$Register;
10976     Register Rtmp = $tmp$$Register;
10977 
10978     // Prefer compile-time assertion over run-time SIGILL.
10979     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10980     assert_different_registers(Rdst, Rtmp);
10981 
10982     // Version 2: shows 10%(z196) improvement over original.
10983     __ z_popcnt(Rdst, Rsrc);
10984     __ z_srlg(Rtmp, Rdst, 16); // calc  byte4+byte6 and byte5+byte7
10985     __ z_alr(Rdst, Rtmp);      //   into byte6 and byte7
10986     __ z_srlg(Rtmp, Rdst,  8); // calc (byte4+byte6) + (byte5+byte7)
10987     __ z_alr(Rdst, Rtmp);      //   into byte7
10988     __ z_llgcr(Rdst, Rdst);    // zero-extend sum
10989   %}
10990   ins_pipe(pipe_class_dummy);
10991 %}
10992 
10993 instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
10994   match(Set dst (PopCountL src));
10995   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10996   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10997   ins_cost(DEFAULT_COST);
10998   // TODO: s390 port size(FIXED_SIZE);
10999   format %{ "POPCNT  $dst,$src\t# pop count long" %}
11000   ins_encode %{
11001     Register Rdst = $dst$$Register;
11002     Register Rsrc = $src$$Register;
11003     Register Rtmp = $tmp$$Register;
11004 
11005     // Prefer compile-time assertion over run-time SIGILL.
11006     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
11007     assert_different_registers(Rdst, Rtmp);
11008 
11009     // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
11010     __ z_popcnt(Rdst, Rsrc);
11011     __ z_ahhlr(Rdst, Rdst, Rdst);
11012     __ z_sllg(Rtmp, Rdst, 16);
11013     __ z_algr(Rdst, Rtmp);
11014     __ z_sllg(Rtmp, Rdst,  8);
11015     __ z_algr(Rdst, Rtmp);
11016     __ z_srlg(Rdst, Rdst, 56);
11017   %}
11018   ins_pipe(pipe_class_dummy);
11019 %}

   1 //
   2 // Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2017, 2019 SAP SE. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //

1371     // The ic_miss_stub will handle the null pointer exception.
1372     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1373     __ z_br(R1_ic_miss_stub_addr);
1374     __ bind(valid);
1375   }
1376 
1377   // Check whether this method is the proper implementation for the class of
1378   // the receiver (ic miss check).
1379   {
1380     Label valid;
1381     // Compare cached class against klass from receiver.
1382     // This also does an implicit null check!
1383     __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
1384     __ z_bre(valid);
1385     // The inline cache points to the wrong method. Call the
1386     // ic_miss_stub to find the proper method.
1387     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1388     __ z_br(R1_ic_miss_stub_addr);
1389     __ bind(valid);
1390   }

1391 }
1392 
1393 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1394   // Determine size dynamically.
1395   return MachNode::size(ra_);
1396 }
1397 
1398 //=============================================================================
1399 
1400 %} // interrupt source section
1401 
1402 source_hpp %{ // Header information of the source block.
1403 
1404 class HandlerImpl {
1405  public:
1406 
1407   static int emit_exception_handler(CodeBuffer &cbuf);
1408   static int emit_deopt_handler(CodeBuffer& cbuf);
1409 
1410   static uint size_exception_handler() {

4705 // See cOop encoding classes for elaborate comment.
4706 
4707 // Moved here because it is needed in expand rules for encode.
4708 // Long negation.
4709 instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
4710   match(Set dst (SubL zero src));
4711   effect(KILL cr);
4712   size(4);
4713   format %{ "NEG     $dst, $src\t # long" %}
4714   ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
4715   ins_pipe(pipe_class_dummy);
4716 %}
4717 
4718 // Load Compressed Pointer
4719 
4720 // Load narrow oop
4721 instruct loadN(iRegN dst, memory mem) %{
4722   match(Set dst (LoadN mem));
4723   ins_cost(MEMORY_REF_COST);
4724   size(Z_DISP3_SIZE);
4725   format %{ "LoadN   $dst,$mem\t # (cOop)" %}
4726   opcode(LLGF_ZOPC, LLGF_ZOPC);
4727   ins_encode(z_form_rt_mem_opt(dst, mem));
4728   ins_pipe(pipe_class_dummy);
4729 %}
4730 
4731 // Load narrow Klass Pointer
4732 instruct loadNKlass(iRegN dst, memory mem) %{
4733   match(Set dst (LoadNKlass mem));
4734   ins_cost(MEMORY_REF_COST);
4735   size(Z_DISP3_SIZE);
4736   format %{ "LoadNKlass $dst,$mem\t # (klass cOop)" %}
4737   opcode(LLGF_ZOPC, LLGF_ZOPC);
4738   ins_encode(z_form_rt_mem_opt(dst, mem));
4739   ins_pipe(pipe_class_dummy);
4740 %}
4741 
4742 // Load constant Compressed Pointer
4743 
4744 instruct loadConN(iRegN dst, immN src) %{
4745   match(Set dst src);
4746   ins_cost(DEFAULT_COST);
4747   size(6);
4748   format %{ "loadConN    $dst,$src\t # (cOop)" %}
4749   ins_encode %{
4750     AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
4751     __ relocate(cOop.rspec(), 1);
4752     __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
4753   %}
4754   ins_pipe(pipe_class_dummy);
4755 %}
4756

4769   match(Set dst src);
4770   ins_cost(DEFAULT_COST);
4771   size(6);
4772   format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
4773   ins_encode %{
4774     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4775     __ relocate(NKlass.rspec(), 1);
4776     __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
4777   %}
4778   ins_pipe(pipe_class_dummy);
4779 %}
4780 
4781 // Load and Decode Compressed Pointer
4782 // optimized variants for Unscaled cOops
4783 
4784 instruct decodeLoadN(iRegP dst, memory mem) %{
4785   match(Set dst (DecodeN (LoadN mem)));
4786   predicate(false && (CompressedOops::base()==NULL)&&(CompressedOops::shift()==0));
4787   ins_cost(MEMORY_REF_COST);
4788   size(Z_DISP3_SIZE);
4789   format %{ "DecodeLoadN  $dst,$mem\t # (cOop Load+Decode)" %}
4790   opcode(LLGF_ZOPC, LLGF_ZOPC);
4791   ins_encode(z_form_rt_mem_opt(dst, mem));
4792   ins_pipe(pipe_class_dummy);
4793 %}
4794 
4795 instruct decodeLoadNKlass(iRegP dst, memory mem) %{
4796   match(Set dst (DecodeNKlass (LoadNKlass mem)));
4797   predicate(false && (CompressedKlassPointers::base()==NULL)&&(CompressedKlassPointers::shift()==0));
4798   ins_cost(MEMORY_REF_COST);
4799   size(Z_DISP3_SIZE);
4800   format %{ "DecodeLoadNKlass  $dst,$mem\t # (load/decode NKlass)" %}
4801   opcode(LLGF_ZOPC, LLGF_ZOPC);
4802   ins_encode(z_form_rt_mem_opt(dst, mem));
4803   ins_pipe(pipe_class_dummy);
4804 %}
4805 
4806 instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
4807   match(Set dst (DecodeNKlass src));
4808   ins_cost(3 * DEFAULT_COST);
4809   size(12);
4810   format %{ "DecodeLoadConNKlass  $dst,$src\t # decode(cKlass)" %}
4811   ins_encode %{
4812     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4813     __ relocate(NKlass.rspec(), 1);
4814     __ load_const($dst$$Register, (Klass*)NKlass.value());
4815   %}
4816   ins_pipe(pipe_class_dummy);
4817 %}
4818 
4819 // Decode Compressed Pointer
4820 
4821 // General decoder
4822 instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
4823   match(Set dst (DecodeN src));
4824   effect(KILL cr);
4825   predicate(CompressedOops::base() == NULL || !ExpandLoadingBaseDecode);
4826   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4827   // TODO: s390 port size(VARIABLE_SIZE);
4828   format %{ "decodeN  $dst,$src\t # (decode cOop)" %}
4829   ins_encode %{  __ oop_decoder($dst$$Register, $src$$Register, true); %}
4830   ins_pipe(pipe_class_dummy);
4831 %}
4832 
4833 // General Klass decoder
4834 instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
4835   match(Set dst (DecodeNKlass src));
4836   effect(KILL cr);
4837   ins_cost(3 * DEFAULT_COST);
4838   format %{ "decode_klass $dst,$src" %}
4839   ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
4840   ins_pipe(pipe_class_dummy);
4841 %}
4842 
4843 // General decoder
4844 instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
4845   match(Set dst (DecodeN src));
4846   effect(KILL cr);
4847   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4848              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4849             (CompressedOops::base()== NULL || !ExpandLoadingBaseDecode_NN));
4850   ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4851   // TODO: s390 port size(VARIABLE_SIZE);
4852   format %{ "decodeN  $dst,$src\t # (decode cOop NN)" %}
4853   ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
4854   ins_pipe(pipe_class_dummy);
4855 %}
4856 
4857   instruct loadBase(iRegL dst, immL baseImm) %{
4858     effect(DEF dst, USE baseImm);
4859     predicate(false);
4860     format %{ "llihl    $dst=$baseImm \t// load heap base" %}
4861     ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
4862     ins_pipe(pipe_class_dummy);
4863   %}
4864 
4865   // Decoder for heapbased mode peeling off loading the base.
4866   instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4867     match(Set dst (DecodeN src base));
4868     // Note: Effect TEMP dst was used with the intention to get
4869     // different regs for dst and base, but this has caused ADLC to
4870     // generate wrong code. Oop_decoder generates additional lgr when
4871     // dst==base.
4872     effect(KILL cr);
4873     predicate(false);
4874     // TODO: s390 port size(VARIABLE_SIZE);
4875     format %{ "decodeN  $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t # (decode cOop)" %}
4876     ins_encode %{
4877       __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
4878                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)CompressedOops::base()));
4879     %}
4880     ins_pipe(pipe_class_dummy);
4881   %}
4882 
4883   // Decoder for heapbased mode peeling off loading the base.
4884   instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4885     match(Set dst (DecodeN src base));
4886     effect(KILL cr);
4887     predicate(false);
4888     // TODO: s390 port size(VARIABLE_SIZE);
4889     format %{ "decodeN  $dst = ($src << 3) + $base + pow2_offset\t # (decode cOop)" %}
4890     ins_encode %{
4891       __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
4892                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)CompressedOops::base()));
4893     %}
4894     ins_pipe(pipe_class_dummy);
4895   %}
4896 
4897 // Decoder for heapbased mode peeling off loading the base.
4898 instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4899   match(Set dst (DecodeN src));
4900   predicate(CompressedOops::base() != NULL && ExpandLoadingBaseDecode);
4901   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4902   // TODO: s390 port size(VARIABLE_SIZE);
4903   expand %{
4904     immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
4905     iRegL base;
4906     loadBase(base, baseImm);
4907     decodeN_base(dst, src, base, cr);
4908   %}
4909 %}

4919   expand %{
4920     immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
4921     iRegL base;
4922     loadBase(base, baseImm);
4923     decodeN_NN_base(dst, src, base, cr);
4924   %}
4925 %}
4926 
4927 //  Encode Compressed Pointer
4928 
4929 // General encoder
4930 instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
4931   match(Set dst (EncodeP src));
4932   effect(KILL cr);
4933   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4934             (CompressedOops::base() == 0 ||
4935              CompressedOops::base_disjoint() ||
4936              !ExpandLoadingBaseEncode));
4937   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4938   // TODO: s390 port size(VARIABLE_SIZE);
4939   format %{ "encodeP  $dst,$src\t # (encode cOop)" %}
4940   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4941   ins_pipe(pipe_class_dummy);
4942 %}
4943 
4944 // General class encoder
4945 instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
4946   match(Set dst (EncodePKlass src));
4947   effect(KILL cr);
4948   format %{ "encode_klass $dst,$src" %}
4949   ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
4950   ins_pipe(pipe_class_dummy);
4951 %}
4952 
4953 instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
4954   match(Set dst (EncodeP src));
4955   effect(KILL cr);
4956   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4957             (CompressedOops::base() == 0 ||
4958              CompressedOops::base_disjoint() ||
4959              !ExpandLoadingBaseEncode_NN));
4960   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4961   // TODO: s390 port size(VARIABLE_SIZE);
4962   format %{ "encodeP  $dst,$src\t # (encode cOop)" %}
4963   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4964   ins_pipe(pipe_class_dummy);
4965 %}
4966 
4967   // Encoder for heapbased mode peeling off loading the base.
4968   instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
4969     match(Set dst (EncodeP src (Binary base dst)));
4970     effect(TEMP_DEF dst);
4971     predicate(false);
4972     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4973     // TODO: s390 port size(VARIABLE_SIZE);
4974     format %{ "encodeP  $dst = ($src>>3) +$base + pow2_offset\t # (encode cOop)" %}
4975     ins_encode %{
4976       jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
4977         (((uint64_t)(intptr_t)CompressedOops::base()) >> CompressedOops::shift());
4978       __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
4979     %}
4980     ins_pipe(pipe_class_dummy);
4981   %}
4982 
4983   // Encoder for heapbased mode peeling off loading the base.
4984   instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
4985     match(Set dst (EncodeP src base));
4986     effect(USE pow2_offset);
4987     predicate(false);
4988     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4989     // TODO: s390 port size(VARIABLE_SIZE);
4990     format %{ "encodeP  $dst = ($src>>3) +$base + $pow2_offset\t # (encode cOop)" %}
4991     ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
4992     ins_pipe(pipe_class_dummy);
4993   %}
4994 
4995 // Encoder for heapbased mode peeling off loading the base.
4996 instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4997   match(Set dst (EncodeP src));
4998   effect(KILL cr);
4999   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
5000             (CompressedOops::base_overlaps() && ExpandLoadingBaseEncode));
5001   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
5002   // TODO: s390 port size(VARIABLE_SIZE);
5003   expand %{
5004     immL baseImm %{ ((jlong)(intptr_t)CompressedOops::base()) >> CompressedOops::shift() %}
5005     immL_0 zero %{ (0) %}
5006     flagsReg ccr;
5007     iRegL base;
5008     iRegL negBase;
5009     loadBase(base, baseImm);
5010     negL_reg_reg(negBase, zero, base, ccr);

5023   expand %{
5024     immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
5025     immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)CompressedOops::base())) %}
5026     immL_0 zero %{ 0 %}
5027     flagsReg ccr;
5028     iRegL base;
5029     iRegL negBase;
5030     loadBase(base, baseImm);
5031     negL_reg_reg(negBase, zero, base, ccr);
5032     encodeP_NN_base(dst, src, negBase, pow2_offset);
5033   %}
5034 %}
5035 
5036 //  Store Compressed Pointer
5037 
5038 // Store Compressed Pointer
5039 instruct storeN(memory mem, iRegN_P2N src) %{
5040   match(Set mem (StoreN mem src));
5041   ins_cost(MEMORY_REF_COST);
5042   size(Z_DISP_SIZE);
5043   format %{ "ST      $src,$mem\t # (cOop)" %}
5044   opcode(STY_ZOPC, ST_ZOPC);
5045   ins_encode(z_form_rt_mem_opt(src, mem));
5046   ins_pipe(pipe_class_dummy);
5047 %}
5048 
5049 // Store Compressed Klass pointer
5050 instruct storeNKlass(memory mem, iRegN src) %{
5051   match(Set mem (StoreNKlass mem src));
5052   ins_cost(MEMORY_REF_COST);
5053   size(Z_DISP_SIZE);
5054   format %{ "ST      $src,$mem\t # (cKlass)" %}
5055   opcode(STY_ZOPC, ST_ZOPC);
5056   ins_encode(z_form_rt_mem_opt(src, mem));
5057   ins_pipe(pipe_class_dummy);
5058 %}
5059 
5060 // Compare Compressed Pointers
5061 
5062 instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
5063   match(Set cr (CmpN src1 src2));
5064   ins_cost(DEFAULT_COST);
5065   size(2);
5066   format %{ "CLR     $src1,$src2\t # (cOop)" %}
5067   opcode(CLR_ZOPC);
5068   ins_encode(z_rrform(src1, src2));
5069   ins_pipe(pipe_class_dummy);
5070 %}
5071 
5072 instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
5073   match(Set cr (CmpN src1 src2));
5074   ins_cost(DEFAULT_COST);
5075   size(6);
5076   format %{ "CLFI    $src1,$src2\t # (cOop) compare immediate narrow" %}
5077   ins_encode %{
5078     AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
5079     __ relocate(cOop.rspec(), 1);
5080     __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
5081   %}
5082   ins_pipe(pipe_class_dummy);
5083 %}
5084 
5085 instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
5086   match(Set cr (CmpN src1 src2));
5087   ins_cost(DEFAULT_COST);
5088   size(6);
5089   format %{ "CLFI    $src1,$src2\t # (NKlass) compare immediate narrow" %}
5090   ins_encode %{
5091     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
5092     __ relocate(NKlass.rspec(), 1);
5093     __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
5094   %}
5095   ins_pipe(pipe_class_dummy);
5096 %}
5097 
5098 instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
5099   match(Set cr (CmpN src1 src2));
5100   ins_cost(DEFAULT_COST);
5101   size(2);
5102   format %{ "LTR     $src1,$src2\t # (cOop) LTR because comparing against zero" %}
5103   opcode(LTR_ZOPC);
5104   ins_encode(z_rrform(src1, src1));
5105   ins_pipe(pipe_class_dummy);
5106 %}
5107 
5108 
5109 //----------MemBar Instructions-----------------------------------------------
5110 
5111 // Memory barrier flavors
5112 
5113 instruct membar_acquire() %{
5114   match(MemBarAcquire);
5115   match(LoadFence);
5116   ins_cost(4*MEMORY_REF_COST);
5117   size(0);
5118   format %{ "MEMBAR-acquire" %}
5119   ins_encode %{ __ z_acquire(); %}
5120   ins_pipe(pipe_class_dummy);
5121 %}
5122

6777       __ z_lghi(Z_R0_scratch, divisor);
6778       __ z_lgr($dst$$Register->successor(), $src1$$Register);
6779       __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch);  // Instruction kills tmp.
6780     } else {
6781       __ clear_reg($dst$$Register, true, false);
6782     }
6783   %}
6784   ins_pipe(pipe_class_dummy);
6785 %}
6786 
6787 // SHIFT
6788 
6789 // Shift left logical
6790 
6791 // Register Shift Left variable
6792 instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
6793   match(Set dst (LShiftI src nbits));
6794   effect(KILL cr); // R1 is killed, too.
6795   ins_cost(3 * DEFAULT_COST);
6796   size(14);
6797   format %{ "SLL     $dst,$src,[$nbits] & 31\t # use RISC-like SLLG also for int" %}
6798   ins_encode %{
6799     __ z_lgr(Z_R1_scratch, $nbits$$Register);
6800     __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
6801     __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
6802   %}
6803   ins_pipe(pipe_class_dummy);
6804 %}
6805 
6806 // Register Shift Left Immediate
6807 // Constant shift count is masked in ideal graph already.
6808 instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
6809   match(Set dst (LShiftI src nbits));
6810   size(6);
6811   format %{ "SLL     $dst,$src,$nbits\t # use RISC-like SLLG also for int" %}
6812   ins_encode %{
6813     int Nbit = $nbits$$constant;
6814     assert((Nbit & (BitsPerJavaInteger - 1)) == Nbit, "Check shift mask in ideal graph");
6815     __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
6816   %}
6817   ins_pipe(pipe_class_dummy);
6818 %}
6819 
6820 // Register Shift Left Immediate by 1bit
6821 instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
6822   match(Set dst (LShiftI src nbits));
6823   predicate(PreferLAoverADD);
6824   ins_cost(DEFAULT_COST_LOW);
6825   size(4);
6826   format %{ "LA      $dst,#0($src,$src)\t # SLL by 1 (int)" %}
6827   ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
6828   ins_pipe(pipe_class_dummy);
6829 %}
6830 
6831 // Register Shift Left Long

7107   %}
7108   ins_pipe(pipe_class_dummy);
7109 %}
7110 
7111 instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
7112   match(Set cr (OverflowSubL op1 op2));
7113   effect(DEF cr, USE op1, USE op2);
7114   // TODO: s390 port size(VARIABLE_SIZE);
7115   format %{ "SGR     $op1,$op2\t # overflow check long" %}
7116   ins_encode %{
7117     __ load_const_optimized(Z_R1_scratch, $op2$$constant);
7118     __ z_lgr(Z_R0_scratch, $op1$$Register);
7119     __ z_sgr(Z_R0_scratch, Z_R1_scratch);
7120   %}
7121   ins_pipe(pipe_class_dummy);
7122 %}
7123 
7124 instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
7125   match(Set cr (OverflowSubI zero op2));
7126   effect(DEF cr, USE op2);
7127   format %{ "NEG    $op2\t # overflow check int" %}
7128   ins_encode %{
7129     __ clear_reg(Z_R0_scratch, false, false);
7130     __ z_sr(Z_R0_scratch, $op2$$Register);
7131   %}
7132   ins_pipe(pipe_class_dummy);
7133 %}
7134 
7135 instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
7136   match(Set cr (OverflowSubL zero op2));
7137   effect(DEF cr, USE op2);
7138   format %{ "NEGG    $op2\t # overflow check long" %}
7139   ins_encode %{
7140     __ clear_reg(Z_R0_scratch, true, false);
7141     __ z_sgr(Z_R0_scratch, $op2$$Register);
7142   %}
7143   ins_pipe(pipe_class_dummy);
7144 %}
7145 
7146 // No intrinsics for multiplication, since there is no easy way
7147 // to check for overflow.
7148 
7149 
7150 //----------Floating Point Arithmetic Instructions-----------------------------
7151 
7152 //  ADD
7153 
7154 //  Add float single precision
7155 instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
7156   match(Set dst (AddF dst src));
7157   effect(KILL cr);
7158   ins_cost(ALU_REG_COST);

9173 // Direct Branch.
9174 instruct branchFar(label labl) %{
9175   match(Goto);
9176   effect(USE labl);
9177   ins_cost(BRANCH_COST);
9178   size(6);
9179   format %{ "BRUL   $labl" %}
9180   ins_encode(z_enc_brul(labl));
9181   ins_pipe(pipe_class_dummy);
9182   // This is not a short variant of a branch, but the long variant.
9183   ins_short_branch(0);
9184 %}
9185 
9186 // Conditional Near Branch
9187 instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
9188   // Same match rule as `branchConFar'.
9189   match(If cmp cr);
9190   effect(USE lbl);
9191   ins_cost(BRANCH_COST);
9192   size(4);
9193   format %{ "branch_con_short,$cmp   $lbl" %}
9194   ins_encode(z_enc_branch_con_short(cmp, lbl));
9195   ins_pipe(pipe_class_dummy);
9196   // If set to 1 this indicates that the current instruction is a
9197   // short variant of a long branch. This avoids using this
9198   // instruction in first-pass matching. It will then only be used in
9199   // the `Shorten_branches' pass.
9200   ins_short_branch(1);
9201 %}
9202 
9203 // This is for cases when the z/Architecture conditional branch instruction
9204 // does not reach far enough. So we emit a far branch here, which is
9205 // more expensive.
9206 //
9207 // Conditional Far Branch
9208 instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
9209   // Same match rule as `branchCon'.
9210   match(If cmp cr);
9211   effect(USE cr, USE lbl);
9212   // Make more expensive to prefer compare_and_branch over separate instructions.
9213   ins_cost(2 * BRANCH_COST);
9214   size(6);
9215   format %{ "branch_con_far,$cmp   $lbl" %}
9216   ins_encode(z_enc_branch_con_far(cmp, lbl));
9217   ins_pipe(pipe_class_dummy);
9218   // This is not a short variant of a branch, but the long variant..
9219   ins_short_branch(0);
9220 %}
9221 
9222 instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
9223   match(CountedLoopEnd cmp cr);
9224   effect(USE labl);
9225   ins_cost(BRANCH_COST);
9226   size(4);
9227   format %{ "branch_con_short,$cmp   $labl\t # counted loop end" %}
9228   ins_encode(z_enc_branch_con_short(cmp, labl));
9229   ins_pipe(pipe_class_dummy);
9230   // If set to 1 this indicates that the current instruction is a
9231   // short variant of a long branch. This avoids using this
9232   // instruction in first-pass matching. It will then only be used in
9233   // the `Shorten_branches' pass.
9234   ins_short_branch(1);
9235 %}

9764 instruct CallLeafNoFPDirect(method meth) %{
9765   match(CallLeafNoFP);
9766   effect(USE meth);
9767   ins_cost(CALL_COST);
9768   // TODO: s390 port size(VARIABLE_SIZE);
9769   ins_num_consts(1);
9770   format %{ "CALL,runtime leaf nofp $meth" %}
9771   ins_encode( z_enc_java_to_runtime_call(meth) );
9772   ins_pipe(pipe_class_dummy);
9773   ins_alignment(2);
9774 %}
9775 
9776 // Tail Call; Jump from runtime stub to Java code.
9777 // Also known as an 'interprocedural jump'.
9778 // Target of jump will eventually return to caller.
9779 // TailJump below removes the return address.
9780 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9781   match(TailCall jump_target method_oop);
9782   ins_cost(CALL_COST);
9783   size(2);
9784   format %{ "Jmp     $jump_target\t # $method_oop holds method oop" %}
9785   ins_encode %{ __ z_br($jump_target$$Register); %}
9786   ins_pipe(pipe_class_dummy);
9787 %}
9788 
9789 // Return Instruction
9790 instruct Ret() %{
9791   match(Return);
9792   size(2);
9793   format %{ "BR(Z_R14) // branch to link register" %}
9794   ins_encode %{ __ z_br(Z_R14); %}
9795   ins_pipe(pipe_class_dummy);
9796 %}
9797 
9798 // Tail Jump; remove the return address; jump to target.
9799 // TailCall above leaves the return address around.
9800 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9801 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9802 // "restore" before this instruction (in Epilogue), we need to materialize it
9803 // in %i0.
9804 instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{

10772 instruct loadV8(iRegL dst, memory mem) %{
10773   match(Set dst (LoadVector mem));
10774   predicate(n->as_LoadVector()->memory_size() == 8);
10775   ins_cost(MEMORY_REF_COST);
10776   // TODO: s390 port size(VARIABLE_SIZE);
10777   format %{ "LG      $dst,$mem\t # L(packed8B)" %}
10778   opcode(LG_ZOPC, LG_ZOPC);
10779   ins_encode(z_form_rt_mem_opt(dst, mem));
10780   ins_pipe(pipe_class_dummy);
10781 %}
10782 
10783 //----------POPULATION COUNT RULES--------------------------------------------
10784 
10785 // Byte reverse
10786 
10787 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
10788   match(Set dst (ReverseBytesI src));
10789   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10790   ins_cost(DEFAULT_COST);
10791   size(4);
10792   format %{ "LRVR    $dst,$src\t # byte reverse int" %}
10793   opcode(LRVR_ZOPC);
10794   ins_encode(z_rreform(dst, src));
10795   ins_pipe(pipe_class_dummy);
10796 %}
10797 
10798 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
10799   match(Set dst (ReverseBytesL src));
10800   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10801   ins_cost(DEFAULT_COST);
10802   // TODO: s390 port size(FIXED_SIZE);
10803   format %{ "LRVGR   $dst,$src\t # byte reverse long" %}
10804   opcode(LRVGR_ZOPC);
10805   ins_encode(z_rreform(dst, src));
10806   ins_pipe(pipe_class_dummy);
10807 %}
10808 
10809 // Leading zeroes
10810 
10811 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10812 // returns the bit position of the leftmost 1 in the 64bit source register.
10813 // As the bits are numbered from left to right (0..63), the returned
10814 // position index is equivalent to the number of leading zeroes.
10815 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10816 // returns position 64. That's exactly what we need.
10817 
10818 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10819   match(Set dst (CountLeadingZerosI src));
10820   effect(KILL tmp, KILL cr);
10821   ins_cost(3 * DEFAULT_COST);
10822   size(14);
10823   format %{ "SLLG    $dst,$src,32\t # no need to always count 32 zeroes first\n\t"
10824             "IILH    $dst,0x8000 \t # insert \"stop bit\" to force result 32 for zero src.\n\t"
10825             "FLOGR   $dst,$dst"
10826          %}
10827   ins_encode %{
10828     // Performance experiments indicate that "FLOGR" is using some kind of
10829     // iteration to find the leftmost "1" bit.
10830     //
10831     // The prior implementation zero-extended the 32-bit argument to 64 bit,
10832     // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10833     // We could gain measurable speedup in micro benchmark:
10834     //
10835     //               leading   trailing
10836     //   z10:   int     2.04       1.68
10837     //         long     1.00       1.02
10838     //   z196:  int     0.99       1.23
10839     //         long     1.00       1.11
10840     //
10841     // By shifting the argument into the high-word instead of zero-extending it.
10842     // The add'l branch on condition (taken for a zero argument, very infrequent,
10843     // good prediction) is well compensated for by the savings.
10844     //
10845     // We leave the previous implementation in for some time in the future when
10846     // the "FLOGR" instruction may become less iterative.
10847 
10848     // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10849     __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10850     __ z_iilh($dst$$Register, 0x8000);   // Insert "stop bit" to force result 32 for zero src.
10851     __ z_flogr($dst$$Register, $dst$$Register);
10852   %}
10853   ins_pipe(pipe_class_dummy);
10854 %}
10855 
10856 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10857   match(Set dst (CountLeadingZerosL src));
10858   effect(KILL tmp, KILL cr);
10859   ins_cost(DEFAULT_COST);
10860   size(4);
10861   format %{ "FLOGR   $dst,$src \t # count leading zeros (long)\n\t" %}
10862   ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10863   ins_pipe(pipe_class_dummy);
10864 %}
10865 
10866 // trailing zeroes
10867 
10868 // We transform the trailing zeroes problem to a leading zeroes problem
10869 // such that can use the FLOGR instruction to our advantage.
10870 
10871 // With
10872 //   tmp1 = src - 1
10873 // we flip all trailing zeroes to ones and the rightmost one to zero.
10874 // All other bits remain unchanged.
10875 // With the complement
10876 //   tmp2 = ~src
10877 // we get all ones in the trailing zeroes positions. Thus,
10878 //   tmp3 = tmp1 & tmp2
10879 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10880 // Now we can apply FLOGR and get 64-(trailing zeroes).
10881 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10882   match(Set dst (CountTrailingZerosI src));
10883   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10884   ins_cost(8 * DEFAULT_COST);
10885   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10886   format %{ "LLGFR   $dst,$src  \t # clear upper 32 bits (we are dealing with int)\n\t"
10887             "LCGFR   $tmp,$src  \t # load 2's complement (32->64 bit)\n\t"
10888             "AGHI    $dst,-1    \t # tmp1 = src-1\n\t"
10889             "AGHI    $tmp,-1    \t # tmp2 = -src-1 = ~src\n\t"
10890             "NGR     $dst,$tmp  \t # tmp3 = tmp1&tmp2\n\t"
10891             "FLOGR   $dst,$dst  \t # count trailing zeros (int)\n\t"
10892             "AHI     $dst,-64   \t # tmp4 = 64-(trailing zeroes)-64\n\t"
10893             "LCR     $dst,$dst  \t # res = -tmp4"
10894          %}
10895   ins_encode %{
10896     Register Rdst = $dst$$Register;
10897     Register Rsrc = $src$$Register;
10898     // Rtmp only needed for for zero-argument shortcut. With kill effect in
10899     // match rule Rsrc = roddReg would be possible, saving one register.
10900     Register Rtmp = $tmp$$Register;
10901 
10902     assert_different_registers(Rdst, Rsrc, Rtmp);
10903 
10904     // Algorithm:
10905     // - Isolate the least significant (rightmost) set bit using (src & (-src)).
10906     //   All other bits in the result are zero.
10907     // - Find the "leftmost one" bit position in the single-bit result from previous step.
10908     // - 63-("leftmost one" bit position) gives the # of trailing zeros.
10909 
10910     // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
10911     Label done;
10912     __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
10913     __ z_lcgfr(Rtmp, Rsrc);

10919                                        // into upper half of reg. Not relevant with sllg below.
10920     __ z_sllg(Rdst, Rtmp, 32);         // Shift interesting contents to upper half of register.
10921     __ z_bre(done);                    // Shortcut for argument = 1, result will be 0.
10922                                        // Depends on CC set by ahi above.
10923                                        // Taken very infrequently, good prediction, no BHT entry.
10924                                        // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10925                                        // after SLLG Rdst == 0(64bit)).
10926     __ z_flogr(Rdst, Rdst);            // Kills tmp which is the oddReg for dst.
10927     __ add2reg(Rdst,  -32);            // 32-pos(leftmost1) is #trailing zeros
10928     __ z_lcgfr(Rdst, Rdst);            // Provide 64bit result at no cost.
10929     __ bind(done);
10930   %}
10931   ins_pipe(pipe_class_dummy);
10932 %}
10933 
10934 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10935   match(Set dst (CountTrailingZerosL src));
10936   effect(TEMP_DEF dst, KILL tmp, KILL cr);
10937   ins_cost(8 * DEFAULT_COST);
10938   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10939   format %{ "LCGR    $dst,$src  \t # preserve src\n\t"
10940             "NGR     $dst,$src  \t #\n\t"
10941             "AGHI    $dst,-1    \t # tmp1 = src-1\n\t"
10942             "FLOGR   $dst,$dst  \t # count trailing zeros (long), kill $tmp\n\t"
10943             "AHI     $dst,-64   \t # tmp4 = 64-(trailing zeroes)-64\n\t"
10944             "LCR     $dst,$dst  \t #"
10945          %}
10946   ins_encode %{
10947     Register Rdst = $dst$$Register;
10948     Register Rsrc = $src$$Register;
10949     assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10950 
10951     // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10952     __ z_lcgr(Rdst, Rsrc);
10953     __ z_ngr(Rdst, Rsrc);
10954     __ add2reg(Rdst,   -1);
10955     __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10956     __ add2reg(Rdst,  -64);
10957     __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10958   %}
10959   ins_pipe(pipe_class_dummy);
10960 %}
10961 
10962 
10963 // bit count
10964 
10965 instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
10966   match(Set dst (PopCountI src));
10967   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10968   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10969   ins_cost(DEFAULT_COST);
10970   size(24);
10971   format %{ "POPCNT  $dst,$src\t # pop count int" %}
10972   ins_encode %{
10973     Register Rdst = $dst$$Register;
10974     Register Rsrc = $src$$Register;
10975     Register Rtmp = $tmp$$Register;
10976 
10977     // Prefer compile-time assertion over run-time SIGILL.
10978     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10979     assert_different_registers(Rdst, Rtmp);
10980 
10981     // Version 2: shows 10%(z196) improvement over original.
10982     __ z_popcnt(Rdst, Rsrc);
10983     __ z_srlg(Rtmp, Rdst, 16); // calc  byte4+byte6 and byte5+byte7
10984     __ z_alr(Rdst, Rtmp);      //   into byte6 and byte7
10985     __ z_srlg(Rtmp, Rdst,  8); // calc (byte4+byte6) + (byte5+byte7)
10986     __ z_alr(Rdst, Rtmp);      //   into byte7
10987     __ z_llgcr(Rdst, Rdst);    // zero-extend sum
10988   %}
10989   ins_pipe(pipe_class_dummy);
10990 %}
10991 
10992 instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
10993   match(Set dst (PopCountL src));
10994   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10995   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10996   ins_cost(DEFAULT_COST);
10997   // TODO: s390 port size(FIXED_SIZE);
10998   format %{ "POPCNT  $dst,$src\t # pop count long" %}
10999   ins_encode %{
11000     Register Rdst = $dst$$Register;
11001     Register Rsrc = $src$$Register;
11002     Register Rtmp = $tmp$$Register;
11003 
11004     // Prefer compile-time assertion over run-time SIGILL.
11005     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
11006     assert_different_registers(Rdst, Rtmp);
11007 
11008     // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
11009     __ z_popcnt(Rdst, Rsrc);
11010     __ z_ahhlr(Rdst, Rdst, Rdst);
11011     __ z_sllg(Rtmp, Rdst, 16);
11012     __ z_algr(Rdst, Rtmp);
11013     __ z_sllg(Rtmp, Rdst,  8);
11014     __ z_algr(Rdst, Rtmp);
11015     __ z_srlg(Rdst, Rdst, 56);
11016   %}
11017   ins_pipe(pipe_class_dummy);
11018 %}

< prev index next >