1 //
2 // Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
3 // Copyright (c) 2017, SAP SE. All rights reserved.
4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 //
6 // This code is free software; you can redistribute it and/or modify it
7 // under the terms of the GNU General Public License version 2 only, as
8 // published by the Free Software Foundation.
9 //
10 // This code is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 // version 2 for more details (a copy is included in the LICENSE file that
14 // accompanied this code).
15 //
16 // You should have received a copy of the GNU General Public License version
17 // 2 along with this work; if not, write to the Free Software Foundation,
18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 //
20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 // or visit www.oracle.com if you need additional information or have any
22 // questions.
23 //
1371 // The ic_miss_stub will handle the null pointer exception.
1372 __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1373 __ z_br(R1_ic_miss_stub_addr);
1374 __ bind(valid);
1375 }
1376
1377 // Check whether this method is the proper implementation for the class of
1378 // the receiver (ic miss check).
1379 {
1380 Label valid;
1381 // Compare cached class against klass from receiver.
1382 // This also does an implicit null check!
1383 __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
1384 __ z_bre(valid);
1385 // The inline cache points to the wrong method. Call the
1386 // ic_miss_stub to find the proper method.
1387 __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1388 __ z_br(R1_ic_miss_stub_addr);
1389 __ bind(valid);
1390 }
1391
1392 }
1393
1394 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1395 // Determine size dynamically.
1396 return MachNode::size(ra_);
1397 }
1398
1399 //=============================================================================
1400
1401 %} // interrupt source section
1402
1403 source_hpp %{ // Header information of the source block.
1404
1405 class HandlerImpl {
1406 public:
1407
1408 static int emit_exception_handler(CodeBuffer &cbuf);
1409 static int emit_deopt_handler(CodeBuffer& cbuf);
1410
1411 static uint size_exception_handler() {
4706 // See cOop encoding classes for elaborate comment.
4707
4708 // Moved here because it is needed in expand rules for encode.
4709 // Long negation.
4710 instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
4711 match(Set dst (SubL zero src));
4712 effect(KILL cr);
4713 size(4);
4714 format %{ "NEG $dst, $src\t # long" %}
4715 ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
4716 ins_pipe(pipe_class_dummy);
4717 %}
4718
4719 // Load Compressed Pointer
4720
4721 // Load narrow oop
4722 instruct loadN(iRegN dst, memory mem) %{
4723 match(Set dst (LoadN mem));
4724 ins_cost(MEMORY_REF_COST);
4725 size(Z_DISP3_SIZE);
4726 format %{ "LoadN $dst,$mem\t# (cOop)" %}
4727 opcode(LLGF_ZOPC, LLGF_ZOPC);
4728 ins_encode(z_form_rt_mem_opt(dst, mem));
4729 ins_pipe(pipe_class_dummy);
4730 %}
4731
4732 // Load narrow Klass Pointer
4733 instruct loadNKlass(iRegN dst, memory mem) %{
4734 match(Set dst (LoadNKlass mem));
4735 ins_cost(MEMORY_REF_COST);
4736 size(Z_DISP3_SIZE);
4737 format %{ "LoadNKlass $dst,$mem\t# (klass cOop)" %}
4738 opcode(LLGF_ZOPC, LLGF_ZOPC);
4739 ins_encode(z_form_rt_mem_opt(dst, mem));
4740 ins_pipe(pipe_class_dummy);
4741 %}
4742
4743 // Load constant Compressed Pointer
4744
4745 instruct loadConN(iRegN dst, immN src) %{
4746 match(Set dst src);
4747 ins_cost(DEFAULT_COST);
4748 size(6);
4749 format %{ "loadConN $dst,$src\t # (cOop)" %}
4750 ins_encode %{
4751 AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
4752 __ relocate(cOop.rspec(), 1);
4753 __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
4754 %}
4755 ins_pipe(pipe_class_dummy);
4756 %}
4757
4770 match(Set dst src);
4771 ins_cost(DEFAULT_COST);
4772 size(6);
4773 format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
4774 ins_encode %{
4775 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4776 __ relocate(NKlass.rspec(), 1);
4777 __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
4778 %}
4779 ins_pipe(pipe_class_dummy);
4780 %}
4781
4782 // Load and Decode Compressed Pointer
4783 // optimized variants for Unscaled cOops
4784
4785 instruct decodeLoadN(iRegP dst, memory mem) %{
4786 match(Set dst (DecodeN (LoadN mem)));
4787 predicate(false && (CompressedOops::base()==NULL)&&(CompressedOops::shift()==0));
4788 ins_cost(MEMORY_REF_COST);
4789 size(Z_DISP3_SIZE);
4790 format %{ "DecodeLoadN $dst,$mem\t# (cOop Load+Decode)" %}
4791 opcode(LLGF_ZOPC, LLGF_ZOPC);
4792 ins_encode(z_form_rt_mem_opt(dst, mem));
4793 ins_pipe(pipe_class_dummy);
4794 %}
4795
4796 instruct decodeLoadNKlass(iRegP dst, memory mem) %{
4797 match(Set dst (DecodeNKlass (LoadNKlass mem)));
4798 predicate(false && (CompressedKlassPointers::base()==NULL)&&(CompressedKlassPointers::shift()==0));
4799 ins_cost(MEMORY_REF_COST);
4800 size(Z_DISP3_SIZE);
4801 format %{ "DecodeLoadNKlass $dst,$mem\t# (load/decode NKlass)" %}
4802 opcode(LLGF_ZOPC, LLGF_ZOPC);
4803 ins_encode(z_form_rt_mem_opt(dst, mem));
4804 ins_pipe(pipe_class_dummy);
4805 %}
4806
4807 instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
4808 match(Set dst (DecodeNKlass src));
4809 ins_cost(3 * DEFAULT_COST);
4810 size(12);
4811 format %{ "DecodeLoadConNKlass $dst,$src\t # decode(cKlass)" %}
4812 ins_encode %{
4813 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4814 __ relocate(NKlass.rspec(), 1);
4815 __ load_const($dst$$Register, (Klass*)NKlass.value());
4816 %}
4817 ins_pipe(pipe_class_dummy);
4818 %}
4819
4820 // Decode Compressed Pointer
4821
4822 // General decoder
4823 instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
4824 match(Set dst (DecodeN src));
4825 effect(KILL cr);
4826 predicate(CompressedOops::base() == NULL || !ExpandLoadingBaseDecode);
4827 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4828 // TODO: s390 port size(VARIABLE_SIZE);
4829 format %{ "decodeN $dst,$src\t# (decode cOop)" %}
4830 ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, true); %}
4831 ins_pipe(pipe_class_dummy);
4832 %}
4833
4834 // General Klass decoder
4835 instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
4836 match(Set dst (DecodeNKlass src));
4837 effect(KILL cr);
4838 ins_cost(3 * DEFAULT_COST);
4839 format %{ "decode_klass $dst,$src" %}
4840 ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
4841 ins_pipe(pipe_class_dummy);
4842 %}
4843
4844 // General decoder
4845 instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
4846 match(Set dst (DecodeN src));
4847 effect(KILL cr);
4848 predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4849 n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4850 (CompressedOops::base()== NULL || !ExpandLoadingBaseDecode_NN));
4851 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4852 // TODO: s390 port size(VARIABLE_SIZE);
4853 format %{ "decodeN $dst,$src\t# (decode cOop NN)" %}
4854 ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
4855 ins_pipe(pipe_class_dummy);
4856 %}
4857
4858 instruct loadBase(iRegL dst, immL baseImm) %{
4859 effect(DEF dst, USE baseImm);
4860 predicate(false);
4861 format %{ "llihl $dst=$baseImm \t// load heap base" %}
4862 ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
4863 ins_pipe(pipe_class_dummy);
4864 %}
4865
4866 // Decoder for heapbased mode peeling off loading the base.
4867 instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4868 match(Set dst (DecodeN src base));
4869 // Note: Effect TEMP dst was used with the intention to get
4870 // different regs for dst and base, but this has caused ADLC to
4871 // generate wrong code. Oop_decoder generates additional lgr when
4872 // dst==base.
4873 effect(KILL cr);
4874 predicate(false);
4875 // TODO: s390 port size(VARIABLE_SIZE);
4876 format %{ "decodeN $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
4877 ins_encode %{
4878 __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
4879 (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)CompressedOops::base()));
4880 %}
4881 ins_pipe(pipe_class_dummy);
4882 %}
4883
4884 // Decoder for heapbased mode peeling off loading the base.
4885 instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4886 match(Set dst (DecodeN src base));
4887 effect(KILL cr);
4888 predicate(false);
4889 // TODO: s390 port size(VARIABLE_SIZE);
4890 format %{ "decodeN $dst = ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
4891 ins_encode %{
4892 __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
4893 (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)CompressedOops::base()));
4894 %}
4895 ins_pipe(pipe_class_dummy);
4896 %}
4897
4898 // Decoder for heapbased mode peeling off loading the base.
4899 instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4900 match(Set dst (DecodeN src));
4901 predicate(CompressedOops::base() != NULL && ExpandLoadingBaseDecode);
4902 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4903 // TODO: s390 port size(VARIABLE_SIZE);
4904 expand %{
4905 immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
4906 iRegL base;
4907 loadBase(base, baseImm);
4908 decodeN_base(dst, src, base, cr);
4909 %}
4910 %}
4920 expand %{
4921 immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
4922 iRegL base;
4923 loadBase(base, baseImm);
4924 decodeN_NN_base(dst, src, base, cr);
4925 %}
4926 %}
4927
4928 // Encode Compressed Pointer
4929
4930 // General encoder
4931 instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
4932 match(Set dst (EncodeP src));
4933 effect(KILL cr);
4934 predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4935 (CompressedOops::base() == 0 ||
4936 CompressedOops::base_disjoint() ||
4937 !ExpandLoadingBaseEncode));
4938 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4939 // TODO: s390 port size(VARIABLE_SIZE);
4940 format %{ "encodeP $dst,$src\t# (encode cOop)" %}
4941 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4942 ins_pipe(pipe_class_dummy);
4943 %}
4944
4945 // General class encoder
4946 instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
4947 match(Set dst (EncodePKlass src));
4948 effect(KILL cr);
4949 format %{ "encode_klass $dst,$src" %}
4950 ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
4951 ins_pipe(pipe_class_dummy);
4952 %}
4953
4954 instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
4955 match(Set dst (EncodeP src));
4956 effect(KILL cr);
4957 predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4958 (CompressedOops::base() == 0 ||
4959 CompressedOops::base_disjoint() ||
4960 !ExpandLoadingBaseEncode_NN));
4961 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4962 // TODO: s390 port size(VARIABLE_SIZE);
4963 format %{ "encodeP $dst,$src\t# (encode cOop)" %}
4964 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4965 ins_pipe(pipe_class_dummy);
4966 %}
4967
4968 // Encoder for heapbased mode peeling off loading the base.
4969 instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
4970 match(Set dst (EncodeP src (Binary base dst)));
4971 effect(TEMP_DEF dst);
4972 predicate(false);
4973 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4974 // TODO: s390 port size(VARIABLE_SIZE);
4975 format %{ "encodeP $dst = ($src>>3) +$base + pow2_offset\t# (encode cOop)" %}
4976 ins_encode %{
4977 jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
4978 (((uint64_t)(intptr_t)CompressedOops::base()) >> CompressedOops::shift());
4979 __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
4980 %}
4981 ins_pipe(pipe_class_dummy);
4982 %}
4983
4984 // Encoder for heapbased mode peeling off loading the base.
4985 instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
4986 match(Set dst (EncodeP src base));
4987 effect(USE pow2_offset);
4988 predicate(false);
4989 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4990 // TODO: s390 port size(VARIABLE_SIZE);
4991 format %{ "encodeP $dst = ($src>>3) +$base + $pow2_offset\t# (encode cOop)" %}
4992 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
4993 ins_pipe(pipe_class_dummy);
4994 %}
4995
4996 // Encoder for heapbased mode peeling off loading the base.
4997 instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4998 match(Set dst (EncodeP src));
4999 effect(KILL cr);
5000 predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
5001 (CompressedOops::base_overlaps() && ExpandLoadingBaseEncode));
5002 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
5003 // TODO: s390 port size(VARIABLE_SIZE);
5004 expand %{
5005 immL baseImm %{ ((jlong)(intptr_t)CompressedOops::base()) >> CompressedOops::shift() %}
5006 immL_0 zero %{ (0) %}
5007 flagsReg ccr;
5008 iRegL base;
5009 iRegL negBase;
5010 loadBase(base, baseImm);
5011 negL_reg_reg(negBase, zero, base, ccr);
5024 expand %{
5025 immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
5026 immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)CompressedOops::base())) %}
5027 immL_0 zero %{ 0 %}
5028 flagsReg ccr;
5029 iRegL base;
5030 iRegL negBase;
5031 loadBase(base, baseImm);
5032 negL_reg_reg(negBase, zero, base, ccr);
5033 encodeP_NN_base(dst, src, negBase, pow2_offset);
5034 %}
5035 %}
5036
5037 // Store Compressed Pointer
5038
5039 // Store Compressed Pointer
5040 instruct storeN(memory mem, iRegN_P2N src) %{
5041 match(Set mem (StoreN mem src));
5042 ins_cost(MEMORY_REF_COST);
5043 size(Z_DISP_SIZE);
5044 format %{ "ST $src,$mem\t# (cOop)" %}
5045 opcode(STY_ZOPC, ST_ZOPC);
5046 ins_encode(z_form_rt_mem_opt(src, mem));
5047 ins_pipe(pipe_class_dummy);
5048 %}
5049
5050 // Store Compressed Klass pointer
5051 instruct storeNKlass(memory mem, iRegN src) %{
5052 match(Set mem (StoreNKlass mem src));
5053 ins_cost(MEMORY_REF_COST);
5054 size(Z_DISP_SIZE);
5055 format %{ "ST $src,$mem\t# (cKlass)" %}
5056 opcode(STY_ZOPC, ST_ZOPC);
5057 ins_encode(z_form_rt_mem_opt(src, mem));
5058 ins_pipe(pipe_class_dummy);
5059 %}
5060
5061 // Compare Compressed Pointers
5062
5063 instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
5064 match(Set cr (CmpN src1 src2));
5065 ins_cost(DEFAULT_COST);
5066 size(2);
5067 format %{ "CLR $src1,$src2\t# (cOop)" %}
5068 opcode(CLR_ZOPC);
5069 ins_encode(z_rrform(src1, src2));
5070 ins_pipe(pipe_class_dummy);
5071 %}
5072
5073 instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
5074 match(Set cr (CmpN src1 src2));
5075 ins_cost(DEFAULT_COST);
5076 size(6);
5077 format %{ "CLFI $src1,$src2\t# (cOop) compare immediate narrow" %}
5078 ins_encode %{
5079 AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
5080 __ relocate(cOop.rspec(), 1);
5081 __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
5082 %}
5083 ins_pipe(pipe_class_dummy);
5084 %}
5085
5086 instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
5087 match(Set cr (CmpN src1 src2));
5088 ins_cost(DEFAULT_COST);
5089 size(6);
5090 format %{ "CLFI $src1,$src2\t# (NKlass) compare immediate narrow" %}
5091 ins_encode %{
5092 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
5093 __ relocate(NKlass.rspec(), 1);
5094 __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
5095 %}
5096 ins_pipe(pipe_class_dummy);
5097 %}
5098
5099 instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
5100 match(Set cr (CmpN src1 src2));
5101 ins_cost(DEFAULT_COST);
5102 size(2);
5103 format %{ "LTR $src1,$src2\t# (cOop) LTR because comparing against zero" %}
5104 opcode(LTR_ZOPC);
5105 ins_encode(z_rrform(src1, src1));
5106 ins_pipe(pipe_class_dummy);
5107 %}
5108
5109
5110 //----------MemBar Instructions-----------------------------------------------
5111
5112 // Memory barrier flavors
5113
5114 instruct membar_acquire() %{
5115 match(MemBarAcquire);
5116 match(LoadFence);
5117 ins_cost(4*MEMORY_REF_COST);
5118 size(0);
5119 format %{ "MEMBAR-acquire" %}
5120 ins_encode %{ __ z_acquire(); %}
5121 ins_pipe(pipe_class_dummy);
5122 %}
5123
6778 __ z_lghi(Z_R0_scratch, divisor);
6779 __ z_lgr($dst$$Register->successor(), $src1$$Register);
6780 __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch); // Instruction kills tmp.
6781 } else {
6782 __ clear_reg($dst$$Register, true, false);
6783 }
6784 %}
6785 ins_pipe(pipe_class_dummy);
6786 %}
6787
6788 // SHIFT
6789
6790 // Shift left logical
6791
6792 // Register Shift Left variable
6793 instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
6794 match(Set dst (LShiftI src nbits));
6795 effect(KILL cr); // R1 is killed, too.
6796 ins_cost(3 * DEFAULT_COST);
6797 size(14);
6798 format %{ "SLL $dst,$src,[$nbits] & 31\t# use RISC-like SLLG also for int" %}
6799 ins_encode %{
6800 __ z_lgr(Z_R1_scratch, $nbits$$Register);
6801 __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
6802 __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
6803 %}
6804 ins_pipe(pipe_class_dummy);
6805 %}
6806
6807 // Register Shift Left Immediate
6808 // Constant shift count is masked in ideal graph already.
6809 instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
6810 match(Set dst (LShiftI src nbits));
6811 size(6);
6812 format %{ "SLL $dst,$src,$nbits\t# use RISC-like SLLG also for int" %}
6813 ins_encode %{
6814 int Nbit = $nbits$$constant;
6815 assert((Nbit & (BitsPerJavaInteger - 1)) == Nbit, "Check shift mask in ideal graph");
6816 __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
6817 %}
6818 ins_pipe(pipe_class_dummy);
6819 %}
6820
6821 // Register Shift Left Immediate by 1bit
6822 instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
6823 match(Set dst (LShiftI src nbits));
6824 predicate(PreferLAoverADD);
6825 ins_cost(DEFAULT_COST_LOW);
6826 size(4);
6827 format %{ "LA $dst,#0($src,$src)\t # SLL by 1 (int)" %}
6828 ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
6829 ins_pipe(pipe_class_dummy);
6830 %}
6831
6832 // Register Shift Left Long
7108 %}
7109 ins_pipe(pipe_class_dummy);
7110 %}
7111
7112 instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
7113 match(Set cr (OverflowSubL op1 op2));
7114 effect(DEF cr, USE op1, USE op2);
7115 // TODO: s390 port size(VARIABLE_SIZE);
7116 format %{ "SGR $op1,$op2\t # overflow check long" %}
7117 ins_encode %{
7118 __ load_const_optimized(Z_R1_scratch, $op2$$constant);
7119 __ z_lgr(Z_R0_scratch, $op1$$Register);
7120 __ z_sgr(Z_R0_scratch, Z_R1_scratch);
7121 %}
7122 ins_pipe(pipe_class_dummy);
7123 %}
7124
7125 instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
7126 match(Set cr (OverflowSubI zero op2));
7127 effect(DEF cr, USE op2);
7128 format %{ "NEG $op2\t# overflow check int" %}
7129 ins_encode %{
7130 __ clear_reg(Z_R0_scratch, false, false);
7131 __ z_sr(Z_R0_scratch, $op2$$Register);
7132 %}
7133 ins_pipe(pipe_class_dummy);
7134 %}
7135
7136 instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
7137 match(Set cr (OverflowSubL zero op2));
7138 effect(DEF cr, USE op2);
7139 format %{ "NEGG $op2\t# overflow check long" %}
7140 ins_encode %{
7141 __ clear_reg(Z_R0_scratch, true, false);
7142 __ z_sgr(Z_R0_scratch, $op2$$Register);
7143 %}
7144 ins_pipe(pipe_class_dummy);
7145 %}
7146
7147 // No intrinsics for multiplication, since there is no easy way
7148 // to check for overflow.
7149
7150
7151 //----------Floating Point Arithmetic Instructions-----------------------------
7152
7153 // ADD
7154
7155 // Add float single precision
7156 instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
7157 match(Set dst (AddF dst src));
7158 effect(KILL cr);
7159 ins_cost(ALU_REG_COST);
9174 // Direct Branch.
9175 instruct branchFar(label labl) %{
9176 match(Goto);
9177 effect(USE labl);
9178 ins_cost(BRANCH_COST);
9179 size(6);
9180 format %{ "BRUL $labl" %}
9181 ins_encode(z_enc_brul(labl));
9182 ins_pipe(pipe_class_dummy);
9183 // This is not a short variant of a branch, but the long variant.
9184 ins_short_branch(0);
9185 %}
9186
9187 // Conditional Near Branch
9188 instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
9189 // Same match rule as `branchConFar'.
9190 match(If cmp cr);
9191 effect(USE lbl);
9192 ins_cost(BRANCH_COST);
9193 size(4);
9194 format %{ "branch_con_short,$cmp $cr, $lbl" %}
9195 ins_encode(z_enc_branch_con_short(cmp, lbl));
9196 ins_pipe(pipe_class_dummy);
9197 // If set to 1 this indicates that the current instruction is a
9198 // short variant of a long branch. This avoids using this
9199 // instruction in first-pass matching. It will then only be used in
9200 // the `Shorten_branches' pass.
9201 ins_short_branch(1);
9202 %}
9203
9204 // This is for cases when the z/Architecture conditional branch instruction
9205 // does not reach far enough. So we emit a far branch here, which is
9206 // more expensive.
9207 //
9208 // Conditional Far Branch
9209 instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
9210 // Same match rule as `branchCon'.
9211 match(If cmp cr);
9212 effect(USE cr, USE lbl);
9213 // Make more expensive to prefer compare_and_branch over separate instructions.
9214 ins_cost(2 * BRANCH_COST);
9215 size(6);
9216 format %{ "branch_con_far,$cmp $cr, $lbl" %}
9217 ins_encode(z_enc_branch_con_far(cmp, lbl));
9218 ins_pipe(pipe_class_dummy);
9219 // This is not a short variant of a branch, but the long variant..
9220 ins_short_branch(0);
9221 %}
9222
9223 instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
9224 match(CountedLoopEnd cmp cr);
9225 effect(USE labl);
9226 ins_cost(BRANCH_COST);
9227 size(4);
9228 format %{ "branch_con_short,$cmp $labl\t # counted loop end" %}
9229 ins_encode(z_enc_branch_con_short(cmp, labl));
9230 ins_pipe(pipe_class_dummy);
9231 // If set to 1 this indicates that the current instruction is a
9232 // short variant of a long branch. This avoids using this
9233 // instruction in first-pass matching. It will then only be used in
9234 // the `Shorten_branches' pass.
9235 ins_short_branch(1);
9236 %}
9765 instruct CallLeafNoFPDirect(method meth) %{
9766 match(CallLeafNoFP);
9767 effect(USE meth);
9768 ins_cost(CALL_COST);
9769 // TODO: s390 port size(VARIABLE_SIZE);
9770 ins_num_consts(1);
9771 format %{ "CALL,runtime leaf nofp $meth" %}
9772 ins_encode( z_enc_java_to_runtime_call(meth) );
9773 ins_pipe(pipe_class_dummy);
9774 ins_alignment(2);
9775 %}
9776
9777 // Tail Call; Jump from runtime stub to Java code.
9778 // Also known as an 'interprocedural jump'.
9779 // Target of jump will eventually return to caller.
9780 // TailJump below removes the return address.
9781 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9782 match(TailCall jump_target method_oop);
9783 ins_cost(CALL_COST);
9784 size(2);
9785 format %{ "Jmp $jump_target\t# $method_oop holds method oop" %}
9786 ins_encode %{ __ z_br($jump_target$$Register); %}
9787 ins_pipe(pipe_class_dummy);
9788 %}
9789
9790 // Return Instruction
9791 instruct Ret() %{
9792 match(Return);
9793 size(2);
9794 format %{ "BR(Z_R14) // branch to link register" %}
9795 ins_encode %{ __ z_br(Z_R14); %}
9796 ins_pipe(pipe_class_dummy);
9797 %}
9798
9799 // Tail Jump; remove the return address; jump to target.
9800 // TailCall above leaves the return address around.
9801 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9802 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9803 // "restore" before this instruction (in Epilogue), we need to materialize it
9804 // in %i0.
9805 instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{
10773 instruct loadV8(iRegL dst, memory mem) %{
10774 match(Set dst (LoadVector mem));
10775 predicate(n->as_LoadVector()->memory_size() == 8);
10776 ins_cost(MEMORY_REF_COST);
10777 // TODO: s390 port size(VARIABLE_SIZE);
10778 format %{ "LG $dst,$mem\t # L(packed8B)" %}
10779 opcode(LG_ZOPC, LG_ZOPC);
10780 ins_encode(z_form_rt_mem_opt(dst, mem));
10781 ins_pipe(pipe_class_dummy);
10782 %}
10783
10784 //----------POPULATION COUNT RULES--------------------------------------------
10785
10786 // Byte reverse
10787
10788 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
10789 match(Set dst (ReverseBytesI src));
10790 predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
10791 ins_cost(DEFAULT_COST);
10792 size(4);
10793 format %{ "LRVR $dst,$src\t# byte reverse int" %}
10794 opcode(LRVR_ZOPC);
10795 ins_encode(z_rreform(dst, src));
10796 ins_pipe(pipe_class_dummy);
10797 %}
10798
10799 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
10800 match(Set dst (ReverseBytesL src));
10801 predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
10802 ins_cost(DEFAULT_COST);
10803 // TODO: s390 port size(FIXED_SIZE);
10804 format %{ "LRVGR $dst,$src\t# byte reverse long" %}
10805 opcode(LRVGR_ZOPC);
10806 ins_encode(z_rreform(dst, src));
10807 ins_pipe(pipe_class_dummy);
10808 %}
10809
10810 // Leading zeroes
10811
10812 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10813 // returns the bit position of the leftmost 1 in the 64bit source register.
10814 // As the bits are numbered from left to right (0..63), the returned
10815 // position index is equivalent to the number of leading zeroes.
10816 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10817 // returns position 64. That's exactly what we need.
10818
10819 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10820 match(Set dst (CountLeadingZerosI src));
10821 effect(KILL tmp, KILL cr);
10822 ins_cost(3 * DEFAULT_COST);
10823 size(14);
10824 format %{ "SLLG $dst,$src,32\t# no need to always count 32 zeroes first\n\t"
10825 "IILH $dst,0x8000 \t# insert \"stop bit\" to force result 32 for zero src.\n\t"
10826 "FLOGR $dst,$dst"
10827 %}
10828 ins_encode %{
10829 // Performance experiments indicate that "FLOGR" is using some kind of
10830 // iteration to find the leftmost "1" bit.
10831 //
10832 // The prior implementation zero-extended the 32-bit argument to 64 bit,
10833 // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10834 // We could gain measurable speedup in micro benchmark:
10835 //
10836 // leading trailing
10837 // z10: int 2.04 1.68
10838 // long 1.00 1.02
10839 // z196: int 0.99 1.23
10840 // long 1.00 1.11
10841 //
10842 // By shifting the argument into the high-word instead of zero-extending it.
10843 // The add'l branch on condition (taken for a zero argument, very infrequent,
10844 // good prediction) is well compensated for by the savings.
10845 //
10846 // We leave the previous implementation in for some time in the future when
10847 // the "FLOGR" instruction may become less iterative.
10848
10849 // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10850 __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10851 __ z_iilh($dst$$Register, 0x8000); // Insert "stop bit" to force result 32 for zero src.
10852 __ z_flogr($dst$$Register, $dst$$Register);
10853 %}
10854 ins_pipe(pipe_class_dummy);
10855 %}
10856
10857 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10858 match(Set dst (CountLeadingZerosL src));
10859 effect(KILL tmp, KILL cr);
10860 ins_cost(DEFAULT_COST);
10861 size(4);
10862 format %{ "FLOGR $dst,$src \t# count leading zeros (long)\n\t" %}
10863 ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10864 ins_pipe(pipe_class_dummy);
10865 %}
10866
10867 // trailing zeroes
10868
10869 // We transform the trailing zeroes problem to a leading zeroes problem
10870 // such that can use the FLOGR instruction to our advantage.
10871
10872 // With
10873 // tmp1 = src - 1
10874 // we flip all trailing zeroes to ones and the rightmost one to zero.
10875 // All other bits remain unchanged.
10876 // With the complement
10877 // tmp2 = ~src
10878 // we get all ones in the trailing zeroes positions. Thus,
10879 // tmp3 = tmp1 & tmp2
10880 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10881 // Now we can apply FLOGR and get 64-(trailing zeroes).
10882 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10883 match(Set dst (CountTrailingZerosI src));
10884 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10885 ins_cost(8 * DEFAULT_COST);
10886 // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off.
10887 format %{ "LLGFR $dst,$src \t# clear upper 32 bits (we are dealing with int)\n\t"
10888 "LCGFR $tmp,$src \t# load 2's complement (32->64 bit)\n\t"
10889 "AGHI $dst,-1 \t# tmp1 = src-1\n\t"
10890 "AGHI $tmp,-1 \t# tmp2 = -src-1 = ~src\n\t"
10891 "NGR $dst,$tmp \t# tmp3 = tmp1&tmp2\n\t"
10892 "FLOGR $dst,$dst \t# count trailing zeros (int)\n\t"
10893 "AHI $dst,-64 \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10894 "LCR $dst,$dst \t# res = -tmp4"
10895 %}
10896 ins_encode %{
10897 Register Rdst = $dst$$Register;
10898 Register Rsrc = $src$$Register;
10899 // Rtmp only needed for for zero-argument shortcut. With kill effect in
10900 // match rule Rsrc = roddReg would be possible, saving one register.
10901 Register Rtmp = $tmp$$Register;
10902
10903 assert_different_registers(Rdst, Rsrc, Rtmp);
10904
10905 // Algorithm:
10906 // - Isolate the least significant (rightmost) set bit using (src & (-src)).
10907 // All other bits in the result are zero.
10908 // - Find the "leftmost one" bit position in the single-bit result from previous step.
10909 // - 63-("leftmost one" bit position) gives the # of trailing zeros.
10910
10911 // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
10912 Label done;
10913 __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
10914 __ z_lcgfr(Rtmp, Rsrc);
10920 // into upper half of reg. Not relevant with sllg below.
10921 __ z_sllg(Rdst, Rtmp, 32); // Shift interesting contents to upper half of register.
10922 __ z_bre(done); // Shortcut for argument = 1, result will be 0.
10923 // Depends on CC set by ahi above.
10924 // Taken very infrequently, good prediction, no BHT entry.
10925 // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10926 // after SLLG Rdst == 0(64bit)).
10927 __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10928 __ add2reg(Rdst, -32); // 32-pos(leftmost1) is #trailing zeros
10929 __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10930 __ bind(done);
10931 %}
10932 ins_pipe(pipe_class_dummy);
10933 %}
10934
10935 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10936 match(Set dst (CountTrailingZerosL src));
10937 effect(TEMP_DEF dst, KILL tmp, KILL cr);
10938 ins_cost(8 * DEFAULT_COST);
10939 // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off.
10940 format %{ "LCGR $dst,$src \t# preserve src\n\t"
10941 "NGR $dst,$src \t#"
10942 "AGHI $dst,-1 \t# tmp1 = src-1\n\t"
10943 "FLOGR $dst,$dst \t# count trailing zeros (long), kill $tmp\n\t"
10944 "AHI $dst,-64 \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10945 "LCR $dst,$dst \t#"
10946 %}
10947 ins_encode %{
10948 Register Rdst = $dst$$Register;
10949 Register Rsrc = $src$$Register;
10950 assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10951
10952 // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10953 __ z_lcgr(Rdst, Rsrc);
10954 __ z_ngr(Rdst, Rsrc);
10955 __ add2reg(Rdst, -1);
10956 __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10957 __ add2reg(Rdst, -64);
10958 __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10959 %}
10960 ins_pipe(pipe_class_dummy);
10961 %}
10962
10963
10964 // bit count
10965
10966 instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
10967 match(Set dst (PopCountI src));
10968 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10969 predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10970 ins_cost(DEFAULT_COST);
10971 size(24);
10972 format %{ "POPCNT $dst,$src\t# pop count int" %}
10973 ins_encode %{
10974 Register Rdst = $dst$$Register;
10975 Register Rsrc = $src$$Register;
10976 Register Rtmp = $tmp$$Register;
10977
10978 // Prefer compile-time assertion over run-time SIGILL.
10979 assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10980 assert_different_registers(Rdst, Rtmp);
10981
10982 // Version 2: shows 10%(z196) improvement over original.
10983 __ z_popcnt(Rdst, Rsrc);
10984 __ z_srlg(Rtmp, Rdst, 16); // calc byte4+byte6 and byte5+byte7
10985 __ z_alr(Rdst, Rtmp); // into byte6 and byte7
10986 __ z_srlg(Rtmp, Rdst, 8); // calc (byte4+byte6) + (byte5+byte7)
10987 __ z_alr(Rdst, Rtmp); // into byte7
10988 __ z_llgcr(Rdst, Rdst); // zero-extend sum
10989 %}
10990 ins_pipe(pipe_class_dummy);
10991 %}
10992
10993 instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
10994 match(Set dst (PopCountL src));
10995 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10996 predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10997 ins_cost(DEFAULT_COST);
10998 // TODO: s390 port size(FIXED_SIZE);
10999 format %{ "POPCNT $dst,$src\t# pop count long" %}
11000 ins_encode %{
11001 Register Rdst = $dst$$Register;
11002 Register Rsrc = $src$$Register;
11003 Register Rtmp = $tmp$$Register;
11004
11005 // Prefer compile-time assertion over run-time SIGILL.
11006 assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
11007 assert_different_registers(Rdst, Rtmp);
11008
11009 // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
11010 __ z_popcnt(Rdst, Rsrc);
11011 __ z_ahhlr(Rdst, Rdst, Rdst);
11012 __ z_sllg(Rtmp, Rdst, 16);
11013 __ z_algr(Rdst, Rtmp);
11014 __ z_sllg(Rtmp, Rdst, 8);
11015 __ z_algr(Rdst, Rtmp);
11016 __ z_srlg(Rdst, Rdst, 56);
11017 %}
11018 ins_pipe(pipe_class_dummy);
11019 %}
|
1 //
2 // Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
3 // Copyright (c) 2017, 2019 SAP SE. All rights reserved.
4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 //
6 // This code is free software; you can redistribute it and/or modify it
7 // under the terms of the GNU General Public License version 2 only, as
8 // published by the Free Software Foundation.
9 //
10 // This code is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 // version 2 for more details (a copy is included in the LICENSE file that
14 // accompanied this code).
15 //
16 // You should have received a copy of the GNU General Public License version
17 // 2 along with this work; if not, write to the Free Software Foundation,
18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 //
20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 // or visit www.oracle.com if you need additional information or have any
22 // questions.
23 //
1371 // The ic_miss_stub will handle the null pointer exception.
1372 __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1373 __ z_br(R1_ic_miss_stub_addr);
1374 __ bind(valid);
1375 }
1376
1377 // Check whether this method is the proper implementation for the class of
1378 // the receiver (ic miss check).
1379 {
1380 Label valid;
1381 // Compare cached class against klass from receiver.
1382 // This also does an implicit null check!
1383 __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
1384 __ z_bre(valid);
1385 // The inline cache points to the wrong method. Call the
1386 // ic_miss_stub to find the proper method.
1387 __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1388 __ z_br(R1_ic_miss_stub_addr);
1389 __ bind(valid);
1390 }
1391 }
1392
1393 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1394 // Determine size dynamically.
1395 return MachNode::size(ra_);
1396 }
1397
1398 //=============================================================================
1399
1400 %} // interrupt source section
1401
1402 source_hpp %{ // Header information of the source block.
1403
1404 class HandlerImpl {
1405 public:
1406
1407 static int emit_exception_handler(CodeBuffer &cbuf);
1408 static int emit_deopt_handler(CodeBuffer& cbuf);
1409
1410 static uint size_exception_handler() {
4705 // See cOop encoding classes for elaborate comment.
4706
4707 // Moved here because it is needed in expand rules for encode.
4708 // Long negation.
4709 instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
4710 match(Set dst (SubL zero src));
4711 effect(KILL cr);
4712 size(4);
4713 format %{ "NEG $dst, $src\t # long" %}
4714 ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
4715 ins_pipe(pipe_class_dummy);
4716 %}
4717
4718 // Load Compressed Pointer
4719
4720 // Load narrow oop
4721 instruct loadN(iRegN dst, memory mem) %{
4722 match(Set dst (LoadN mem));
4723 ins_cost(MEMORY_REF_COST);
4724 size(Z_DISP3_SIZE);
4725 format %{ "LoadN $dst,$mem\t # (cOop)" %}
4726 opcode(LLGF_ZOPC, LLGF_ZOPC);
4727 ins_encode(z_form_rt_mem_opt(dst, mem));
4728 ins_pipe(pipe_class_dummy);
4729 %}
4730
4731 // Load narrow Klass Pointer
4732 instruct loadNKlass(iRegN dst, memory mem) %{
4733 match(Set dst (LoadNKlass mem));
4734 ins_cost(MEMORY_REF_COST);
4735 size(Z_DISP3_SIZE);
4736 format %{ "LoadNKlass $dst,$mem\t # (klass cOop)" %}
4737 opcode(LLGF_ZOPC, LLGF_ZOPC);
4738 ins_encode(z_form_rt_mem_opt(dst, mem));
4739 ins_pipe(pipe_class_dummy);
4740 %}
4741
4742 // Load constant Compressed Pointer
4743
4744 instruct loadConN(iRegN dst, immN src) %{
4745 match(Set dst src);
4746 ins_cost(DEFAULT_COST);
4747 size(6);
4748 format %{ "loadConN $dst,$src\t # (cOop)" %}
4749 ins_encode %{
4750 AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
4751 __ relocate(cOop.rspec(), 1);
4752 __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
4753 %}
4754 ins_pipe(pipe_class_dummy);
4755 %}
4756
4769 match(Set dst src);
4770 ins_cost(DEFAULT_COST);
4771 size(6);
4772 format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
4773 ins_encode %{
4774 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4775 __ relocate(NKlass.rspec(), 1);
4776 __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
4777 %}
4778 ins_pipe(pipe_class_dummy);
4779 %}
4780
4781 // Load and Decode Compressed Pointer
4782 // optimized variants for Unscaled cOops
4783
4784 instruct decodeLoadN(iRegP dst, memory mem) %{
4785 match(Set dst (DecodeN (LoadN mem)));
4786 predicate(false && (CompressedOops::base()==NULL)&&(CompressedOops::shift()==0));
4787 ins_cost(MEMORY_REF_COST);
4788 size(Z_DISP3_SIZE);
4789 format %{ "DecodeLoadN $dst,$mem\t # (cOop Load+Decode)" %}
4790 opcode(LLGF_ZOPC, LLGF_ZOPC);
4791 ins_encode(z_form_rt_mem_opt(dst, mem));
4792 ins_pipe(pipe_class_dummy);
4793 %}
4794
4795 instruct decodeLoadNKlass(iRegP dst, memory mem) %{
4796 match(Set dst (DecodeNKlass (LoadNKlass mem)));
4797 predicate(false && (CompressedKlassPointers::base()==NULL)&&(CompressedKlassPointers::shift()==0));
4798 ins_cost(MEMORY_REF_COST);
4799 size(Z_DISP3_SIZE);
4800 format %{ "DecodeLoadNKlass $dst,$mem\t # (load/decode NKlass)" %}
4801 opcode(LLGF_ZOPC, LLGF_ZOPC);
4802 ins_encode(z_form_rt_mem_opt(dst, mem));
4803 ins_pipe(pipe_class_dummy);
4804 %}
4805
4806 instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
4807 match(Set dst (DecodeNKlass src));
4808 ins_cost(3 * DEFAULT_COST);
4809 size(12);
4810 format %{ "DecodeLoadConNKlass $dst,$src\t # decode(cKlass)" %}
4811 ins_encode %{
4812 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4813 __ relocate(NKlass.rspec(), 1);
4814 __ load_const($dst$$Register, (Klass*)NKlass.value());
4815 %}
4816 ins_pipe(pipe_class_dummy);
4817 %}
4818
4819 // Decode Compressed Pointer
4820
4821 // General decoder
4822 instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
4823 match(Set dst (DecodeN src));
4824 effect(KILL cr);
4825 predicate(CompressedOops::base() == NULL || !ExpandLoadingBaseDecode);
4826 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4827 // TODO: s390 port size(VARIABLE_SIZE);
4828 format %{ "decodeN $dst,$src\t # (decode cOop)" %}
4829 ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, true); %}
4830 ins_pipe(pipe_class_dummy);
4831 %}
4832
4833 // General Klass decoder
4834 instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
4835 match(Set dst (DecodeNKlass src));
4836 effect(KILL cr);
4837 ins_cost(3 * DEFAULT_COST);
4838 format %{ "decode_klass $dst,$src" %}
4839 ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
4840 ins_pipe(pipe_class_dummy);
4841 %}
4842
4843 // General decoder
4844 instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
4845 match(Set dst (DecodeN src));
4846 effect(KILL cr);
4847 predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4848 n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4849 (CompressedOops::base()== NULL || !ExpandLoadingBaseDecode_NN));
4850 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4851 // TODO: s390 port size(VARIABLE_SIZE);
4852 format %{ "decodeN $dst,$src\t # (decode cOop NN)" %}
4853 ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
4854 ins_pipe(pipe_class_dummy);
4855 %}
4856
4857 instruct loadBase(iRegL dst, immL baseImm) %{
4858 effect(DEF dst, USE baseImm);
4859 predicate(false);
4860 format %{ "llihl $dst=$baseImm \t// load heap base" %}
4861 ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
4862 ins_pipe(pipe_class_dummy);
4863 %}
4864
4865 // Decoder for heapbased mode peeling off loading the base.
4866 instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4867 match(Set dst (DecodeN src base));
4868 // Note: Effect TEMP dst was used with the intention to get
4869 // different regs for dst and base, but this has caused ADLC to
4870 // generate wrong code. Oop_decoder generates additional lgr when
4871 // dst==base.
4872 effect(KILL cr);
4873 predicate(false);
4874 // TODO: s390 port size(VARIABLE_SIZE);
4875 format %{ "decodeN $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t # (decode cOop)" %}
4876 ins_encode %{
4877 __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
4878 (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)CompressedOops::base()));
4879 %}
4880 ins_pipe(pipe_class_dummy);
4881 %}
4882
4883 // Decoder for heapbased mode peeling off loading the base.
4884 instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4885 match(Set dst (DecodeN src base));
4886 effect(KILL cr);
4887 predicate(false);
4888 // TODO: s390 port size(VARIABLE_SIZE);
4889 format %{ "decodeN $dst = ($src << 3) + $base + pow2_offset\t # (decode cOop)" %}
4890 ins_encode %{
4891 __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
4892 (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)CompressedOops::base()));
4893 %}
4894 ins_pipe(pipe_class_dummy);
4895 %}
4896
4897 // Decoder for heapbased mode peeling off loading the base.
4898 instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4899 match(Set dst (DecodeN src));
4900 predicate(CompressedOops::base() != NULL && ExpandLoadingBaseDecode);
4901 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4902 // TODO: s390 port size(VARIABLE_SIZE);
4903 expand %{
4904 immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
4905 iRegL base;
4906 loadBase(base, baseImm);
4907 decodeN_base(dst, src, base, cr);
4908 %}
4909 %}
4919 expand %{
4920 immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
4921 iRegL base;
4922 loadBase(base, baseImm);
4923 decodeN_NN_base(dst, src, base, cr);
4924 %}
4925 %}
4926
4927 // Encode Compressed Pointer
4928
4929 // General encoder
4930 instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
4931 match(Set dst (EncodeP src));
4932 effect(KILL cr);
4933 predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4934 (CompressedOops::base() == 0 ||
4935 CompressedOops::base_disjoint() ||
4936 !ExpandLoadingBaseEncode));
4937 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4938 // TODO: s390 port size(VARIABLE_SIZE);
4939 format %{ "encodeP $dst,$src\t # (encode cOop)" %}
4940 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4941 ins_pipe(pipe_class_dummy);
4942 %}
4943
4944 // General class encoder
4945 instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
4946 match(Set dst (EncodePKlass src));
4947 effect(KILL cr);
4948 format %{ "encode_klass $dst,$src" %}
4949 ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
4950 ins_pipe(pipe_class_dummy);
4951 %}
4952
4953 instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
4954 match(Set dst (EncodeP src));
4955 effect(KILL cr);
4956 predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4957 (CompressedOops::base() == 0 ||
4958 CompressedOops::base_disjoint() ||
4959 !ExpandLoadingBaseEncode_NN));
4960 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4961 // TODO: s390 port size(VARIABLE_SIZE);
4962 format %{ "encodeP $dst,$src\t # (encode cOop)" %}
4963 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4964 ins_pipe(pipe_class_dummy);
4965 %}
4966
4967 // Encoder for heapbased mode peeling off loading the base.
4968 instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
4969 match(Set dst (EncodeP src (Binary base dst)));
4970 effect(TEMP_DEF dst);
4971 predicate(false);
4972 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4973 // TODO: s390 port size(VARIABLE_SIZE);
4974 format %{ "encodeP $dst = ($src>>3) +$base + pow2_offset\t # (encode cOop)" %}
4975 ins_encode %{
4976 jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
4977 (((uint64_t)(intptr_t)CompressedOops::base()) >> CompressedOops::shift());
4978 __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
4979 %}
4980 ins_pipe(pipe_class_dummy);
4981 %}
4982
4983 // Encoder for heapbased mode peeling off loading the base.
4984 instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
4985 match(Set dst (EncodeP src base));
4986 effect(USE pow2_offset);
4987 predicate(false);
4988 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4989 // TODO: s390 port size(VARIABLE_SIZE);
4990 format %{ "encodeP $dst = ($src>>3) +$base + $pow2_offset\t # (encode cOop)" %}
4991 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
4992 ins_pipe(pipe_class_dummy);
4993 %}
4994
4995 // Encoder for heapbased mode peeling off loading the base.
4996 instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4997 match(Set dst (EncodeP src));
4998 effect(KILL cr);
4999 predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
5000 (CompressedOops::base_overlaps() && ExpandLoadingBaseEncode));
5001 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
5002 // TODO: s390 port size(VARIABLE_SIZE);
5003 expand %{
5004 immL baseImm %{ ((jlong)(intptr_t)CompressedOops::base()) >> CompressedOops::shift() %}
5005 immL_0 zero %{ (0) %}
5006 flagsReg ccr;
5007 iRegL base;
5008 iRegL negBase;
5009 loadBase(base, baseImm);
5010 negL_reg_reg(negBase, zero, base, ccr);
5023 expand %{
5024 immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
5025 immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)CompressedOops::base())) %}
5026 immL_0 zero %{ 0 %}
5027 flagsReg ccr;
5028 iRegL base;
5029 iRegL negBase;
5030 loadBase(base, baseImm);
5031 negL_reg_reg(negBase, zero, base, ccr);
5032 encodeP_NN_base(dst, src, negBase, pow2_offset);
5033 %}
5034 %}
5035
5036 // Store Compressed Pointer
5037
5038 // Store Compressed Pointer
5039 instruct storeN(memory mem, iRegN_P2N src) %{
5040 match(Set mem (StoreN mem src));
5041 ins_cost(MEMORY_REF_COST);
5042 size(Z_DISP_SIZE);
5043 format %{ "ST $src,$mem\t # (cOop)" %}
5044 opcode(STY_ZOPC, ST_ZOPC);
5045 ins_encode(z_form_rt_mem_opt(src, mem));
5046 ins_pipe(pipe_class_dummy);
5047 %}
5048
5049 // Store Compressed Klass pointer
5050 instruct storeNKlass(memory mem, iRegN src) %{
5051 match(Set mem (StoreNKlass mem src));
5052 ins_cost(MEMORY_REF_COST);
5053 size(Z_DISP_SIZE);
5054 format %{ "ST $src,$mem\t # (cKlass)" %}
5055 opcode(STY_ZOPC, ST_ZOPC);
5056 ins_encode(z_form_rt_mem_opt(src, mem));
5057 ins_pipe(pipe_class_dummy);
5058 %}
5059
5060 // Compare Compressed Pointers
5061
5062 instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
5063 match(Set cr (CmpN src1 src2));
5064 ins_cost(DEFAULT_COST);
5065 size(2);
5066 format %{ "CLR $src1,$src2\t # (cOop)" %}
5067 opcode(CLR_ZOPC);
5068 ins_encode(z_rrform(src1, src2));
5069 ins_pipe(pipe_class_dummy);
5070 %}
5071
5072 instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
5073 match(Set cr (CmpN src1 src2));
5074 ins_cost(DEFAULT_COST);
5075 size(6);
5076 format %{ "CLFI $src1,$src2\t # (cOop) compare immediate narrow" %}
5077 ins_encode %{
5078 AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
5079 __ relocate(cOop.rspec(), 1);
5080 __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
5081 %}
5082 ins_pipe(pipe_class_dummy);
5083 %}
5084
5085 instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
5086 match(Set cr (CmpN src1 src2));
5087 ins_cost(DEFAULT_COST);
5088 size(6);
5089 format %{ "CLFI $src1,$src2\t # (NKlass) compare immediate narrow" %}
5090 ins_encode %{
5091 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
5092 __ relocate(NKlass.rspec(), 1);
5093 __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
5094 %}
5095 ins_pipe(pipe_class_dummy);
5096 %}
5097
5098 instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
5099 match(Set cr (CmpN src1 src2));
5100 ins_cost(DEFAULT_COST);
5101 size(2);
5102 format %{ "LTR $src1,$src2\t # (cOop) LTR because comparing against zero" %}
5103 opcode(LTR_ZOPC);
5104 ins_encode(z_rrform(src1, src1));
5105 ins_pipe(pipe_class_dummy);
5106 %}
5107
5108
5109 //----------MemBar Instructions-----------------------------------------------
5110
5111 // Memory barrier flavors
5112
5113 instruct membar_acquire() %{
5114 match(MemBarAcquire);
5115 match(LoadFence);
5116 ins_cost(4*MEMORY_REF_COST);
5117 size(0);
5118 format %{ "MEMBAR-acquire" %}
5119 ins_encode %{ __ z_acquire(); %}
5120 ins_pipe(pipe_class_dummy);
5121 %}
5122
6777 __ z_lghi(Z_R0_scratch, divisor);
6778 __ z_lgr($dst$$Register->successor(), $src1$$Register);
6779 __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch); // Instruction kills tmp.
6780 } else {
6781 __ clear_reg($dst$$Register, true, false);
6782 }
6783 %}
6784 ins_pipe(pipe_class_dummy);
6785 %}
6786
6787 // SHIFT
6788
6789 // Shift left logical
6790
6791 // Register Shift Left variable
6792 instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
6793 match(Set dst (LShiftI src nbits));
6794 effect(KILL cr); // R1 is killed, too.
6795 ins_cost(3 * DEFAULT_COST);
6796 size(14);
6797 format %{ "SLL $dst,$src,[$nbits] & 31\t # use RISC-like SLLG also for int" %}
6798 ins_encode %{
6799 __ z_lgr(Z_R1_scratch, $nbits$$Register);
6800 __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
6801 __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
6802 %}
6803 ins_pipe(pipe_class_dummy);
6804 %}
6805
6806 // Register Shift Left Immediate
6807 // Constant shift count is masked in ideal graph already.
6808 instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
6809 match(Set dst (LShiftI src nbits));
6810 size(6);
6811 format %{ "SLL $dst,$src,$nbits\t # use RISC-like SLLG also for int" %}
6812 ins_encode %{
6813 int Nbit = $nbits$$constant;
6814 assert((Nbit & (BitsPerJavaInteger - 1)) == Nbit, "Check shift mask in ideal graph");
6815 __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
6816 %}
6817 ins_pipe(pipe_class_dummy);
6818 %}
6819
6820 // Register Shift Left Immediate by 1bit
6821 instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
6822 match(Set dst (LShiftI src nbits));
6823 predicate(PreferLAoverADD);
6824 ins_cost(DEFAULT_COST_LOW);
6825 size(4);
6826 format %{ "LA $dst,#0($src,$src)\t # SLL by 1 (int)" %}
6827 ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
6828 ins_pipe(pipe_class_dummy);
6829 %}
6830
6831 // Register Shift Left Long
7107 %}
7108 ins_pipe(pipe_class_dummy);
7109 %}
7110
7111 instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
7112 match(Set cr (OverflowSubL op1 op2));
7113 effect(DEF cr, USE op1, USE op2);
7114 // TODO: s390 port size(VARIABLE_SIZE);
7115 format %{ "SGR $op1,$op2\t # overflow check long" %}
7116 ins_encode %{
7117 __ load_const_optimized(Z_R1_scratch, $op2$$constant);
7118 __ z_lgr(Z_R0_scratch, $op1$$Register);
7119 __ z_sgr(Z_R0_scratch, Z_R1_scratch);
7120 %}
7121 ins_pipe(pipe_class_dummy);
7122 %}
7123
7124 instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
7125 match(Set cr (OverflowSubI zero op2));
7126 effect(DEF cr, USE op2);
7127 format %{ "NEG $op2\t # overflow check int" %}
7128 ins_encode %{
7129 __ clear_reg(Z_R0_scratch, false, false);
7130 __ z_sr(Z_R0_scratch, $op2$$Register);
7131 %}
7132 ins_pipe(pipe_class_dummy);
7133 %}
7134
7135 instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
7136 match(Set cr (OverflowSubL zero op2));
7137 effect(DEF cr, USE op2);
7138 format %{ "NEGG $op2\t # overflow check long" %}
7139 ins_encode %{
7140 __ clear_reg(Z_R0_scratch, true, false);
7141 __ z_sgr(Z_R0_scratch, $op2$$Register);
7142 %}
7143 ins_pipe(pipe_class_dummy);
7144 %}
7145
7146 // No intrinsics for multiplication, since there is no easy way
7147 // to check for overflow.
7148
7149
7150 //----------Floating Point Arithmetic Instructions-----------------------------
7151
7152 // ADD
7153
7154 // Add float single precision
7155 instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
7156 match(Set dst (AddF dst src));
7157 effect(KILL cr);
7158 ins_cost(ALU_REG_COST);
9173 // Direct Branch.
9174 instruct branchFar(label labl) %{
9175 match(Goto);
9176 effect(USE labl);
9177 ins_cost(BRANCH_COST);
9178 size(6);
9179 format %{ "BRUL $labl" %}
9180 ins_encode(z_enc_brul(labl));
9181 ins_pipe(pipe_class_dummy);
9182 // This is not a short variant of a branch, but the long variant.
9183 ins_short_branch(0);
9184 %}
9185
9186 // Conditional Near Branch
9187 instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
9188 // Same match rule as `branchConFar'.
9189 match(If cmp cr);
9190 effect(USE lbl);
9191 ins_cost(BRANCH_COST);
9192 size(4);
9193 format %{ "branch_con_short,$cmp $lbl" %}
9194 ins_encode(z_enc_branch_con_short(cmp, lbl));
9195 ins_pipe(pipe_class_dummy);
9196 // If set to 1 this indicates that the current instruction is a
9197 // short variant of a long branch. This avoids using this
9198 // instruction in first-pass matching. It will then only be used in
9199 // the `Shorten_branches' pass.
9200 ins_short_branch(1);
9201 %}
9202
9203 // This is for cases when the z/Architecture conditional branch instruction
9204 // does not reach far enough. So we emit a far branch here, which is
9205 // more expensive.
9206 //
9207 // Conditional Far Branch
9208 instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
9209 // Same match rule as `branchCon'.
9210 match(If cmp cr);
9211 effect(USE cr, USE lbl);
9212 // Make more expensive to prefer compare_and_branch over separate instructions.
9213 ins_cost(2 * BRANCH_COST);
9214 size(6);
9215 format %{ "branch_con_far,$cmp $lbl" %}
9216 ins_encode(z_enc_branch_con_far(cmp, lbl));
9217 ins_pipe(pipe_class_dummy);
9218 // This is not a short variant of a branch, but the long variant..
9219 ins_short_branch(0);
9220 %}
9221
9222 instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
9223 match(CountedLoopEnd cmp cr);
9224 effect(USE labl);
9225 ins_cost(BRANCH_COST);
9226 size(4);
9227 format %{ "branch_con_short,$cmp $labl\t # counted loop end" %}
9228 ins_encode(z_enc_branch_con_short(cmp, labl));
9229 ins_pipe(pipe_class_dummy);
9230 // If set to 1 this indicates that the current instruction is a
9231 // short variant of a long branch. This avoids using this
9232 // instruction in first-pass matching. It will then only be used in
9233 // the `Shorten_branches' pass.
9234 ins_short_branch(1);
9235 %}
9764 instruct CallLeafNoFPDirect(method meth) %{
9765 match(CallLeafNoFP);
9766 effect(USE meth);
9767 ins_cost(CALL_COST);
9768 // TODO: s390 port size(VARIABLE_SIZE);
9769 ins_num_consts(1);
9770 format %{ "CALL,runtime leaf nofp $meth" %}
9771 ins_encode( z_enc_java_to_runtime_call(meth) );
9772 ins_pipe(pipe_class_dummy);
9773 ins_alignment(2);
9774 %}
9775
9776 // Tail Call; Jump from runtime stub to Java code.
9777 // Also known as an 'interprocedural jump'.
9778 // Target of jump will eventually return to caller.
9779 // TailJump below removes the return address.
9780 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9781 match(TailCall jump_target method_oop);
9782 ins_cost(CALL_COST);
9783 size(2);
9784 format %{ "Jmp $jump_target\t # $method_oop holds method oop" %}
9785 ins_encode %{ __ z_br($jump_target$$Register); %}
9786 ins_pipe(pipe_class_dummy);
9787 %}
9788
9789 // Return Instruction
9790 instruct Ret() %{
9791 match(Return);
9792 size(2);
9793 format %{ "BR(Z_R14) // branch to link register" %}
9794 ins_encode %{ __ z_br(Z_R14); %}
9795 ins_pipe(pipe_class_dummy);
9796 %}
9797
9798 // Tail Jump; remove the return address; jump to target.
9799 // TailCall above leaves the return address around.
9800 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9801 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9802 // "restore" before this instruction (in Epilogue), we need to materialize it
9803 // in %i0.
9804 instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{
10772 instruct loadV8(iRegL dst, memory mem) %{
10773 match(Set dst (LoadVector mem));
10774 predicate(n->as_LoadVector()->memory_size() == 8);
10775 ins_cost(MEMORY_REF_COST);
10776 // TODO: s390 port size(VARIABLE_SIZE);
10777 format %{ "LG $dst,$mem\t # L(packed8B)" %}
10778 opcode(LG_ZOPC, LG_ZOPC);
10779 ins_encode(z_form_rt_mem_opt(dst, mem));
10780 ins_pipe(pipe_class_dummy);
10781 %}
10782
10783 //----------POPULATION COUNT RULES--------------------------------------------
10784
10785 // Byte reverse
10786
10787 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
10788 match(Set dst (ReverseBytesI src));
10789 predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
10790 ins_cost(DEFAULT_COST);
10791 size(4);
10792 format %{ "LRVR $dst,$src\t # byte reverse int" %}
10793 opcode(LRVR_ZOPC);
10794 ins_encode(z_rreform(dst, src));
10795 ins_pipe(pipe_class_dummy);
10796 %}
10797
10798 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
10799 match(Set dst (ReverseBytesL src));
10800 predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
10801 ins_cost(DEFAULT_COST);
10802 // TODO: s390 port size(FIXED_SIZE);
10803 format %{ "LRVGR $dst,$src\t # byte reverse long" %}
10804 opcode(LRVGR_ZOPC);
10805 ins_encode(z_rreform(dst, src));
10806 ins_pipe(pipe_class_dummy);
10807 %}
10808
10809 // Leading zeroes
10810
10811 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10812 // returns the bit position of the leftmost 1 in the 64bit source register.
10813 // As the bits are numbered from left to right (0..63), the returned
10814 // position index is equivalent to the number of leading zeroes.
10815 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10816 // returns position 64. That's exactly what we need.
10817
10818 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10819 match(Set dst (CountLeadingZerosI src));
10820 effect(KILL tmp, KILL cr);
10821 ins_cost(3 * DEFAULT_COST);
10822 size(14);
10823 format %{ "SLLG $dst,$src,32\t # no need to always count 32 zeroes first\n\t"
10824 "IILH $dst,0x8000 \t # insert \"stop bit\" to force result 32 for zero src.\n\t"
10825 "FLOGR $dst,$dst"
10826 %}
10827 ins_encode %{
10828 // Performance experiments indicate that "FLOGR" is using some kind of
10829 // iteration to find the leftmost "1" bit.
10830 //
10831 // The prior implementation zero-extended the 32-bit argument to 64 bit,
10832 // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10833 // We could gain measurable speedup in micro benchmark:
10834 //
10835 // leading trailing
10836 // z10: int 2.04 1.68
10837 // long 1.00 1.02
10838 // z196: int 0.99 1.23
10839 // long 1.00 1.11
10840 //
10841 // By shifting the argument into the high-word instead of zero-extending it.
10842 // The add'l branch on condition (taken for a zero argument, very infrequent,
10843 // good prediction) is well compensated for by the savings.
10844 //
10845 // We leave the previous implementation in for some time in the future when
10846 // the "FLOGR" instruction may become less iterative.
10847
10848 // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10849 __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10850 __ z_iilh($dst$$Register, 0x8000); // Insert "stop bit" to force result 32 for zero src.
10851 __ z_flogr($dst$$Register, $dst$$Register);
10852 %}
10853 ins_pipe(pipe_class_dummy);
10854 %}
10855
10856 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10857 match(Set dst (CountLeadingZerosL src));
10858 effect(KILL tmp, KILL cr);
10859 ins_cost(DEFAULT_COST);
10860 size(4);
10861 format %{ "FLOGR $dst,$src \t # count leading zeros (long)\n\t" %}
10862 ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10863 ins_pipe(pipe_class_dummy);
10864 %}
10865
10866 // trailing zeroes
10867
10868 // We transform the trailing zeroes problem to a leading zeroes problem
10869 // such that can use the FLOGR instruction to our advantage.
10870
10871 // With
10872 // tmp1 = src - 1
10873 // we flip all trailing zeroes to ones and the rightmost one to zero.
10874 // All other bits remain unchanged.
10875 // With the complement
10876 // tmp2 = ~src
10877 // we get all ones in the trailing zeroes positions. Thus,
10878 // tmp3 = tmp1 & tmp2
10879 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10880 // Now we can apply FLOGR and get 64-(trailing zeroes).
10881 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10882 match(Set dst (CountTrailingZerosI src));
10883 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10884 ins_cost(8 * DEFAULT_COST);
10885 // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off.
10886 format %{ "LLGFR $dst,$src \t # clear upper 32 bits (we are dealing with int)\n\t"
10887 "LCGFR $tmp,$src \t # load 2's complement (32->64 bit)\n\t"
10888 "AGHI $dst,-1 \t # tmp1 = src-1\n\t"
10889 "AGHI $tmp,-1 \t # tmp2 = -src-1 = ~src\n\t"
10890 "NGR $dst,$tmp \t # tmp3 = tmp1&tmp2\n\t"
10891 "FLOGR $dst,$dst \t # count trailing zeros (int)\n\t"
10892 "AHI $dst,-64 \t # tmp4 = 64-(trailing zeroes)-64\n\t"
10893 "LCR $dst,$dst \t # res = -tmp4"
10894 %}
10895 ins_encode %{
10896 Register Rdst = $dst$$Register;
10897 Register Rsrc = $src$$Register;
10898 // Rtmp only needed for for zero-argument shortcut. With kill effect in
10899 // match rule Rsrc = roddReg would be possible, saving one register.
10900 Register Rtmp = $tmp$$Register;
10901
10902 assert_different_registers(Rdst, Rsrc, Rtmp);
10903
10904 // Algorithm:
10905 // - Isolate the least significant (rightmost) set bit using (src & (-src)).
10906 // All other bits in the result are zero.
10907 // - Find the "leftmost one" bit position in the single-bit result from previous step.
10908 // - 63-("leftmost one" bit position) gives the # of trailing zeros.
10909
10910 // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
10911 Label done;
10912 __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
10913 __ z_lcgfr(Rtmp, Rsrc);
10919 // into upper half of reg. Not relevant with sllg below.
10920 __ z_sllg(Rdst, Rtmp, 32); // Shift interesting contents to upper half of register.
10921 __ z_bre(done); // Shortcut for argument = 1, result will be 0.
10922 // Depends on CC set by ahi above.
10923 // Taken very infrequently, good prediction, no BHT entry.
10924 // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10925 // after SLLG Rdst == 0(64bit)).
10926 __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10927 __ add2reg(Rdst, -32); // 32-pos(leftmost1) is #trailing zeros
10928 __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10929 __ bind(done);
10930 %}
10931 ins_pipe(pipe_class_dummy);
10932 %}
10933
10934 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10935 match(Set dst (CountTrailingZerosL src));
10936 effect(TEMP_DEF dst, KILL tmp, KILL cr);
10937 ins_cost(8 * DEFAULT_COST);
10938 // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off.
10939 format %{ "LCGR $dst,$src \t # preserve src\n\t"
10940 "NGR $dst,$src \t #\n\t"
10941 "AGHI $dst,-1 \t # tmp1 = src-1\n\t"
10942 "FLOGR $dst,$dst \t # count trailing zeros (long), kill $tmp\n\t"
10943 "AHI $dst,-64 \t # tmp4 = 64-(trailing zeroes)-64\n\t"
10944 "LCR $dst,$dst \t #"
10945 %}
10946 ins_encode %{
10947 Register Rdst = $dst$$Register;
10948 Register Rsrc = $src$$Register;
10949 assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10950
10951 // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10952 __ z_lcgr(Rdst, Rsrc);
10953 __ z_ngr(Rdst, Rsrc);
10954 __ add2reg(Rdst, -1);
10955 __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10956 __ add2reg(Rdst, -64);
10957 __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10958 %}
10959 ins_pipe(pipe_class_dummy);
10960 %}
10961
10962
10963 // bit count
10964
10965 instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
10966 match(Set dst (PopCountI src));
10967 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10968 predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10969 ins_cost(DEFAULT_COST);
10970 size(24);
10971 format %{ "POPCNT $dst,$src\t # pop count int" %}
10972 ins_encode %{
10973 Register Rdst = $dst$$Register;
10974 Register Rsrc = $src$$Register;
10975 Register Rtmp = $tmp$$Register;
10976
10977 // Prefer compile-time assertion over run-time SIGILL.
10978 assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10979 assert_different_registers(Rdst, Rtmp);
10980
10981 // Version 2: shows 10%(z196) improvement over original.
10982 __ z_popcnt(Rdst, Rsrc);
10983 __ z_srlg(Rtmp, Rdst, 16); // calc byte4+byte6 and byte5+byte7
10984 __ z_alr(Rdst, Rtmp); // into byte6 and byte7
10985 __ z_srlg(Rtmp, Rdst, 8); // calc (byte4+byte6) + (byte5+byte7)
10986 __ z_alr(Rdst, Rtmp); // into byte7
10987 __ z_llgcr(Rdst, Rdst); // zero-extend sum
10988 %}
10989 ins_pipe(pipe_class_dummy);
10990 %}
10991
10992 instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
10993 match(Set dst (PopCountL src));
10994 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10995 predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10996 ins_cost(DEFAULT_COST);
10997 // TODO: s390 port size(FIXED_SIZE);
10998 format %{ "POPCNT $dst,$src\t # pop count long" %}
10999 ins_encode %{
11000 Register Rdst = $dst$$Register;
11001 Register Rsrc = $src$$Register;
11002 Register Rtmp = $tmp$$Register;
11003
11004 // Prefer compile-time assertion over run-time SIGILL.
11005 assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
11006 assert_different_registers(Rdst, Rtmp);
11007
11008 // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
11009 __ z_popcnt(Rdst, Rsrc);
11010 __ z_ahhlr(Rdst, Rdst, Rdst);
11011 __ z_sllg(Rtmp, Rdst, 16);
11012 __ z_algr(Rdst, Rtmp);
11013 __ z_sllg(Rtmp, Rdst, 8);
11014 __ z_algr(Rdst, Rtmp);
11015 __ z_srlg(Rdst, Rdst, 56);
11016 %}
11017 ins_pipe(pipe_class_dummy);
11018 %}
|