1 //
2 // Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
3 // Copyright (c) 2017, SAP SE. All rights reserved.
4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 //
6 // This code is free software; you can redistribute it and/or modify it
7 // under the terms of the GNU General Public License version 2 only, as
8 // published by the Free Software Foundation.
9 //
10 // This code is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 // version 2 for more details (a copy is included in the LICENSE file that
14 // accompanied this code).
15 //
16 // You should have received a copy of the GNU General Public License version
17 // 2 along with this work; if not, write to the Free Software Foundation,
18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 //
20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 // or visit www.oracle.com if you need additional information or have any
22 // questions.
23 //
1358 // The ic_miss_stub will handle the null pointer exception.
1359 __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1360 __ z_br(R1_ic_miss_stub_addr);
1361 __ bind(valid);
1362 }
1363
1364 // Check whether this method is the proper implementation for the class of
1365 // the receiver (ic miss check).
1366 {
1367 Label valid;
1368 // Compare cached class against klass from receiver.
1369 // This also does an implicit null check!
1370 __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
1371 __ z_bre(valid);
1372 // The inline cache points to the wrong method. Call the
1373 // ic_miss_stub to find the proper method.
1374 __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1375 __ z_br(R1_ic_miss_stub_addr);
1376 __ bind(valid);
1377 }
1378
1379 }
1380
1381 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1382 // Determine size dynamically.
1383 return MachNode::size(ra_);
1384 }
1385
1386 //=============================================================================
1387
1388 %} // interrupt source section
1389
1390 source_hpp %{ // Header information of the source block.
1391
1392 class HandlerImpl {
1393 public:
1394
1395 static int emit_exception_handler(CodeBuffer &cbuf);
1396 static int emit_deopt_handler(CodeBuffer& cbuf);
1397
1398 static uint size_exception_handler() {
4684 // See cOop encoding classes for elaborate comment.
4685
4686 // Moved here because it is needed in expand rules for encode.
4687 // Long negation.
4688 instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
4689 match(Set dst (SubL zero src));
4690 effect(KILL cr);
4691 size(4);
4692 format %{ "NEG $dst, $src\t # long" %}
4693 ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
4694 ins_pipe(pipe_class_dummy);
4695 %}
4696
4697 // Load Compressed Pointer
4698
4699 // Load narrow oop
4700 instruct loadN(iRegN dst, memory mem) %{
4701 match(Set dst (LoadN mem));
4702 ins_cost(MEMORY_REF_COST);
4703 size(Z_DISP3_SIZE);
4704 format %{ "LoadN $dst,$mem\t# (cOop)" %}
4705 opcode(LLGF_ZOPC, LLGF_ZOPC);
4706 ins_encode(z_form_rt_mem_opt(dst, mem));
4707 ins_pipe(pipe_class_dummy);
4708 %}
4709
4710 // Load narrow Klass Pointer
4711 instruct loadNKlass(iRegN dst, memory mem) %{
4712 match(Set dst (LoadNKlass mem));
4713 ins_cost(MEMORY_REF_COST);
4714 size(Z_DISP3_SIZE);
4715 format %{ "LoadNKlass $dst,$mem\t# (klass cOop)" %}
4716 opcode(LLGF_ZOPC, LLGF_ZOPC);
4717 ins_encode(z_form_rt_mem_opt(dst, mem));
4718 ins_pipe(pipe_class_dummy);
4719 %}
4720
4721 // Load constant Compressed Pointer
4722
4723 instruct loadConN(iRegN dst, immN src) %{
4724 match(Set dst src);
4725 ins_cost(DEFAULT_COST);
4726 size(6);
4727 format %{ "loadConN $dst,$src\t # (cOop)" %}
4728 ins_encode %{
4729 AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
4730 __ relocate(cOop.rspec(), 1);
4731 __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
4732 %}
4733 ins_pipe(pipe_class_dummy);
4734 %}
4735
4748 match(Set dst src);
4749 ins_cost(DEFAULT_COST);
4750 size(6);
4751 format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
4752 ins_encode %{
4753 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4754 __ relocate(NKlass.rspec(), 1);
4755 __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
4756 %}
4757 ins_pipe(pipe_class_dummy);
4758 %}
4759
4760 // Load and Decode Compressed Pointer
4761 // optimized variants for Unscaled cOops
4762
4763 instruct decodeLoadN(iRegP dst, memory mem) %{
4764 match(Set dst (DecodeN (LoadN mem)));
4765 predicate(false && (Universe::narrow_oop_base()==NULL)&&(Universe::narrow_oop_shift()==0));
4766 ins_cost(MEMORY_REF_COST);
4767 size(Z_DISP3_SIZE);
4768 format %{ "DecodeLoadN $dst,$mem\t# (cOop Load+Decode)" %}
4769 opcode(LLGF_ZOPC, LLGF_ZOPC);
4770 ins_encode(z_form_rt_mem_opt(dst, mem));
4771 ins_pipe(pipe_class_dummy);
4772 %}
4773
4774 instruct decodeLoadNKlass(iRegP dst, memory mem) %{
4775 match(Set dst (DecodeNKlass (LoadNKlass mem)));
4776 predicate(false && (Universe::narrow_klass_base()==NULL)&&(Universe::narrow_klass_shift()==0));
4777 ins_cost(MEMORY_REF_COST);
4778 size(Z_DISP3_SIZE);
4779 format %{ "DecodeLoadNKlass $dst,$mem\t# (load/decode NKlass)" %}
4780 opcode(LLGF_ZOPC, LLGF_ZOPC);
4781 ins_encode(z_form_rt_mem_opt(dst, mem));
4782 ins_pipe(pipe_class_dummy);
4783 %}
4784
4785 instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
4786 match(Set dst (DecodeNKlass src));
4787 ins_cost(3 * DEFAULT_COST);
4788 size(12);
4789 format %{ "DecodeLoadConNKlass $dst,$src\t # decode(cKlass)" %}
4790 ins_encode %{
4791 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4792 __ relocate(NKlass.rspec(), 1);
4793 __ load_const($dst$$Register, (Klass*)NKlass.value());
4794 %}
4795 ins_pipe(pipe_class_dummy);
4796 %}
4797
4798 // Decode Compressed Pointer
4799
4800 // General decoder
4801 instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
4802 match(Set dst (DecodeN src));
4803 effect(KILL cr);
4804 predicate(Universe::narrow_oop_base() == NULL || !ExpandLoadingBaseDecode);
4805 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4806 // TODO: s390 port size(VARIABLE_SIZE);
4807 format %{ "decodeN $dst,$src\t# (decode cOop)" %}
4808 ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, true); %}
4809 ins_pipe(pipe_class_dummy);
4810 %}
4811
4812 // General Klass decoder
4813 instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
4814 match(Set dst (DecodeNKlass src));
4815 effect(KILL cr);
4816 ins_cost(3 * DEFAULT_COST);
4817 format %{ "decode_klass $dst,$src" %}
4818 ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
4819 ins_pipe(pipe_class_dummy);
4820 %}
4821
4822 // General decoder
4823 instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
4824 match(Set dst (DecodeN src));
4825 effect(KILL cr);
4826 predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4827 n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4828 (Universe::narrow_oop_base()== NULL || !ExpandLoadingBaseDecode_NN));
4829 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4830 // TODO: s390 port size(VARIABLE_SIZE);
4831 format %{ "decodeN $dst,$src\t# (decode cOop NN)" %}
4832 ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
4833 ins_pipe(pipe_class_dummy);
4834 %}
4835
4836 instruct loadBase(iRegL dst, immL baseImm) %{
4837 effect(DEF dst, USE baseImm);
4838 predicate(false);
4839 format %{ "llihl $dst=$baseImm \t// load heap base" %}
4840 ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
4841 ins_pipe(pipe_class_dummy);
4842 %}
4843
4844 // Decoder for heapbased mode peeling off loading the base.
4845 instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4846 match(Set dst (DecodeN src base));
4847 // Note: Effect TEMP dst was used with the intention to get
4848 // different regs for dst and base, but this has caused ADLC to
4849 // generate wrong code. Oop_decoder generates additional lgr when
4850 // dst==base.
4851 effect(KILL cr);
4852 predicate(false);
4853 // TODO: s390 port size(VARIABLE_SIZE);
4854 format %{ "decodeN $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
4855 ins_encode %{
4856 __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
4857 (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
4858 %}
4859 ins_pipe(pipe_class_dummy);
4860 %}
4861
4862 // Decoder for heapbased mode peeling off loading the base.
4863 instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4864 match(Set dst (DecodeN src base));
4865 effect(KILL cr);
4866 predicate(false);
4867 // TODO: s390 port size(VARIABLE_SIZE);
4868 format %{ "decodeN $dst = ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
4869 ins_encode %{
4870 __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
4871 (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
4872 %}
4873 ins_pipe(pipe_class_dummy);
4874 %}
4875
4876 // Decoder for heapbased mode peeling off loading the base.
4877 instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4878 match(Set dst (DecodeN src));
4879 predicate(Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode);
4880 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4881 // TODO: s390 port size(VARIABLE_SIZE);
4882 expand %{
4883 immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
4884 iRegL base;
4885 loadBase(base, baseImm);
4886 decodeN_base(dst, src, base, cr);
4887 %}
4888 %}
4898 expand %{
4899 immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
4900 iRegL base;
4901 loadBase(base, baseImm);
4902 decodeN_NN_base(dst, src, base, cr);
4903 %}
4904 %}
4905
4906 // Encode Compressed Pointer
4907
4908 // General encoder
4909 instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
4910 match(Set dst (EncodeP src));
4911 effect(KILL cr);
4912 predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4913 (Universe::narrow_oop_base() == 0 ||
4914 Universe::narrow_oop_base_disjoint() ||
4915 !ExpandLoadingBaseEncode));
4916 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4917 // TODO: s390 port size(VARIABLE_SIZE);
4918 format %{ "encodeP $dst,$src\t# (encode cOop)" %}
4919 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4920 ins_pipe(pipe_class_dummy);
4921 %}
4922
4923 // General class encoder
4924 instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
4925 match(Set dst (EncodePKlass src));
4926 effect(KILL cr);
4927 format %{ "encode_klass $dst,$src" %}
4928 ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
4929 ins_pipe(pipe_class_dummy);
4930 %}
4931
4932 instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
4933 match(Set dst (EncodeP src));
4934 effect(KILL cr);
4935 predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4936 (Universe::narrow_oop_base() == 0 ||
4937 Universe::narrow_oop_base_disjoint() ||
4938 !ExpandLoadingBaseEncode_NN));
4939 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4940 // TODO: s390 port size(VARIABLE_SIZE);
4941 format %{ "encodeP $dst,$src\t# (encode cOop)" %}
4942 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4943 ins_pipe(pipe_class_dummy);
4944 %}
4945
4946 // Encoder for heapbased mode peeling off loading the base.
4947 instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
4948 match(Set dst (EncodeP src (Binary base dst)));
4949 effect(TEMP_DEF dst);
4950 predicate(false);
4951 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4952 // TODO: s390 port size(VARIABLE_SIZE);
4953 format %{ "encodeP $dst = ($src>>3) +$base + pow2_offset\t# (encode cOop)" %}
4954 ins_encode %{
4955 jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
4956 (((uint64_t)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift());
4957 __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
4958 %}
4959 ins_pipe(pipe_class_dummy);
4960 %}
4961
4962 // Encoder for heapbased mode peeling off loading the base.
4963 instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
4964 match(Set dst (EncodeP src base));
4965 effect(USE pow2_offset);
4966 predicate(false);
4967 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4968 // TODO: s390 port size(VARIABLE_SIZE);
4969 format %{ "encodeP $dst = ($src>>3) +$base + $pow2_offset\t# (encode cOop)" %}
4970 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
4971 ins_pipe(pipe_class_dummy);
4972 %}
4973
4974 // Encoder for heapbased mode peeling off loading the base.
4975 instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4976 match(Set dst (EncodeP src));
4977 effect(KILL cr);
4978 predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4979 (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode));
4980 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4981 // TODO: s390 port size(VARIABLE_SIZE);
4982 expand %{
4983 immL baseImm %{ ((jlong)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift() %}
4984 immL_0 zero %{ (0) %}
4985 flagsReg ccr;
4986 iRegL base;
4987 iRegL negBase;
4988 loadBase(base, baseImm);
4989 negL_reg_reg(negBase, zero, base, ccr);
5002 expand %{
5003 immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
5004 immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)Universe::narrow_oop_base())) %}
5005 immL_0 zero %{ 0 %}
5006 flagsReg ccr;
5007 iRegL base;
5008 iRegL negBase;
5009 loadBase(base, baseImm);
5010 negL_reg_reg(negBase, zero, base, ccr);
5011 encodeP_NN_base(dst, src, negBase, pow2_offset);
5012 %}
5013 %}
5014
5015 // Store Compressed Pointer
5016
5017 // Store Compressed Pointer
5018 instruct storeN(memory mem, iRegN_P2N src) %{
5019 match(Set mem (StoreN mem src));
5020 ins_cost(MEMORY_REF_COST);
5021 size(Z_DISP_SIZE);
5022 format %{ "ST $src,$mem\t# (cOop)" %}
5023 opcode(STY_ZOPC, ST_ZOPC);
5024 ins_encode(z_form_rt_mem_opt(src, mem));
5025 ins_pipe(pipe_class_dummy);
5026 %}
5027
5028 // Store Compressed Klass pointer
5029 instruct storeNKlass(memory mem, iRegN src) %{
5030 match(Set mem (StoreNKlass mem src));
5031 ins_cost(MEMORY_REF_COST);
5032 size(Z_DISP_SIZE);
5033 format %{ "ST $src,$mem\t# (cKlass)" %}
5034 opcode(STY_ZOPC, ST_ZOPC);
5035 ins_encode(z_form_rt_mem_opt(src, mem));
5036 ins_pipe(pipe_class_dummy);
5037 %}
5038
5039 // Compare Compressed Pointers
5040
5041 instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
5042 match(Set cr (CmpN src1 src2));
5043 ins_cost(DEFAULT_COST);
5044 size(2);
5045 format %{ "CLR $src1,$src2\t# (cOop)" %}
5046 opcode(CLR_ZOPC);
5047 ins_encode(z_rrform(src1, src2));
5048 ins_pipe(pipe_class_dummy);
5049 %}
5050
5051 instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
5052 match(Set cr (CmpN src1 src2));
5053 ins_cost(DEFAULT_COST);
5054 size(6);
5055 format %{ "CLFI $src1,$src2\t# (cOop) compare immediate narrow" %}
5056 ins_encode %{
5057 AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
5058 __ relocate(cOop.rspec(), 1);
5059 __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
5060 %}
5061 ins_pipe(pipe_class_dummy);
5062 %}
5063
5064 instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
5065 match(Set cr (CmpN src1 src2));
5066 ins_cost(DEFAULT_COST);
5067 size(6);
5068 format %{ "CLFI $src1,$src2\t# (NKlass) compare immediate narrow" %}
5069 ins_encode %{
5070 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
5071 __ relocate(NKlass.rspec(), 1);
5072 __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
5073 %}
5074 ins_pipe(pipe_class_dummy);
5075 %}
5076
5077 instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
5078 match(Set cr (CmpN src1 src2));
5079 ins_cost(DEFAULT_COST);
5080 size(2);
5081 format %{ "LTR $src1,$src2\t# (cOop) LTR because comparing against zero" %}
5082 opcode(LTR_ZOPC);
5083 ins_encode(z_rrform(src1, src1));
5084 ins_pipe(pipe_class_dummy);
5085 %}
5086
5087
5088 //----------MemBar Instructions-----------------------------------------------
5089
5090 // Memory barrier flavors
5091
5092 instruct membar_acquire() %{
5093 match(MemBarAcquire);
5094 match(LoadFence);
5095 ins_cost(4*MEMORY_REF_COST);
5096 size(0);
5097 format %{ "MEMBAR-acquire" %}
5098 ins_encode %{ __ z_acquire(); %}
5099 ins_pipe(pipe_class_dummy);
5100 %}
5101
6756 __ z_lghi(Z_R0_scratch, divisor);
6757 __ z_lgr($dst$$Register->successor(), $src1$$Register);
6758 __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch); // Instruction kills tmp.
6759 } else {
6760 __ clear_reg($dst$$Register, true, false);
6761 }
6762 %}
6763 ins_pipe(pipe_class_dummy);
6764 %}
6765
6766 // SHIFT
6767
6768 // Shift left logical
6769
6770 // Register Shift Left variable
6771 instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
6772 match(Set dst (LShiftI src nbits));
6773 effect(KILL cr); // R1 is killed, too.
6774 ins_cost(3 * DEFAULT_COST);
6775 size(14);
6776 format %{ "SLL $dst,$src,[$nbits] & 31\t# use RISC-like SLLG also for int" %}
6777 ins_encode %{
6778 __ z_lgr(Z_R1_scratch, $nbits$$Register);
6779 __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
6780 __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
6781 %}
6782 ins_pipe(pipe_class_dummy);
6783 %}
6784
6785 // Register Shift Left Immediate
6786 // Constant shift count is masked in ideal graph already.
6787 instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
6788 match(Set dst (LShiftI src nbits));
6789 size(6);
6790 format %{ "SLL $dst,$src,$nbits\t# use RISC-like SLLG also for int" %}
6791 ins_encode %{
6792 int Nbit = $nbits$$constant;
6793 assert((Nbit & (BitsPerJavaInteger - 1)) == Nbit, "Check shift mask in ideal graph");
6794 __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
6795 %}
6796 ins_pipe(pipe_class_dummy);
6797 %}
6798
6799 // Register Shift Left Immediate by 1bit
6800 instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
6801 match(Set dst (LShiftI src nbits));
6802 predicate(PreferLAoverADD);
6803 ins_cost(DEFAULT_COST_LOW);
6804 size(4);
6805 format %{ "LA $dst,#0($src,$src)\t # SLL by 1 (int)" %}
6806 ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
6807 ins_pipe(pipe_class_dummy);
6808 %}
6809
6810 // Register Shift Left Long
7086 %}
7087 ins_pipe(pipe_class_dummy);
7088 %}
7089
7090 instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
7091 match(Set cr (OverflowSubL op1 op2));
7092 effect(DEF cr, USE op1, USE op2);
7093 // TODO: s390 port size(VARIABLE_SIZE);
7094 format %{ "SGR $op1,$op2\t # overflow check long" %}
7095 ins_encode %{
7096 __ load_const_optimized(Z_R1_scratch, $op2$$constant);
7097 __ z_lgr(Z_R0_scratch, $op1$$Register);
7098 __ z_sgr(Z_R0_scratch, Z_R1_scratch);
7099 %}
7100 ins_pipe(pipe_class_dummy);
7101 %}
7102
7103 instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
7104 match(Set cr (OverflowSubI zero op2));
7105 effect(DEF cr, USE op2);
7106 format %{ "NEG $op2\t# overflow check int" %}
7107 ins_encode %{
7108 __ clear_reg(Z_R0_scratch, false, false);
7109 __ z_sr(Z_R0_scratch, $op2$$Register);
7110 %}
7111 ins_pipe(pipe_class_dummy);
7112 %}
7113
7114 instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
7115 match(Set cr (OverflowSubL zero op2));
7116 effect(DEF cr, USE op2);
7117 format %{ "NEGG $op2\t# overflow check long" %}
7118 ins_encode %{
7119 __ clear_reg(Z_R0_scratch, true, false);
7120 __ z_sgr(Z_R0_scratch, $op2$$Register);
7121 %}
7122 ins_pipe(pipe_class_dummy);
7123 %}
7124
7125 // No intrinsics for multiplication, since there is no easy way
7126 // to check for overflow.
7127
7128
7129 //----------Floating Point Arithmetic Instructions-----------------------------
7130
7131 // ADD
7132
7133 // Add float single precision
7134 instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
7135 match(Set dst (AddF dst src));
7136 effect(KILL cr);
7137 ins_cost(ALU_REG_COST);
9152 // Direct Branch.
9153 instruct branchFar(label labl) %{
9154 match(Goto);
9155 effect(USE labl);
9156 ins_cost(BRANCH_COST);
9157 size(6);
9158 format %{ "BRUL $labl" %}
9159 ins_encode(z_enc_brul(labl));
9160 ins_pipe(pipe_class_dummy);
9161 // This is not a short variant of a branch, but the long variant.
9162 ins_short_branch(0);
9163 %}
9164
9165 // Conditional Near Branch
9166 instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
9167 // Same match rule as `branchConFar'.
9168 match(If cmp cr);
9169 effect(USE lbl);
9170 ins_cost(BRANCH_COST);
9171 size(4);
9172 format %{ "branch_con_short,$cmp $cr, $lbl" %}
9173 ins_encode(z_enc_branch_con_short(cmp, lbl));
9174 ins_pipe(pipe_class_dummy);
9175 // If set to 1 this indicates that the current instruction is a
9176 // short variant of a long branch. This avoids using this
9177 // instruction in first-pass matching. It will then only be used in
9178 // the `Shorten_branches' pass.
9179 ins_short_branch(1);
9180 %}
9181
9182 // This is for cases when the z/Architecture conditional branch instruction
9183 // does not reach far enough. So we emit a far branch here, which is
9184 // more expensive.
9185 //
9186 // Conditional Far Branch
9187 instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
9188 // Same match rule as `branchCon'.
9189 match(If cmp cr);
9190 effect(USE cr, USE lbl);
9191 // Make more expensive to prefer compare_and_branch over separate instructions.
9192 ins_cost(2 * BRANCH_COST);
9193 size(6);
9194 format %{ "branch_con_far,$cmp $cr, $lbl" %}
9195 ins_encode(z_enc_branch_con_far(cmp, lbl));
9196 ins_pipe(pipe_class_dummy);
9197 // This is not a short variant of a branch, but the long variant..
9198 ins_short_branch(0);
9199 %}
9200
9201 instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
9202 match(CountedLoopEnd cmp cr);
9203 effect(USE labl);
9204 ins_cost(BRANCH_COST);
9205 size(4);
9206 format %{ "branch_con_short,$cmp $labl\t # counted loop end" %}
9207 ins_encode(z_enc_branch_con_short(cmp, labl));
9208 ins_pipe(pipe_class_dummy);
9209 // If set to 1 this indicates that the current instruction is a
9210 // short variant of a long branch. This avoids using this
9211 // instruction in first-pass matching. It will then only be used in
9212 // the `Shorten_branches' pass.
9213 ins_short_branch(1);
9214 %}
9743 instruct CallLeafNoFPDirect(method meth) %{
9744 match(CallLeafNoFP);
9745 effect(USE meth);
9746 ins_cost(CALL_COST);
9747 // TODO: s390 port size(VARIABLE_SIZE);
9748 ins_num_consts(1);
9749 format %{ "CALL,runtime leaf nofp $meth" %}
9750 ins_encode( z_enc_java_to_runtime_call(meth) );
9751 ins_pipe(pipe_class_dummy);
9752 ins_alignment(2);
9753 %}
9754
9755 // Tail Call; Jump from runtime stub to Java code.
9756 // Also known as an 'interprocedural jump'.
9757 // Target of jump will eventually return to caller.
9758 // TailJump below removes the return address.
9759 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9760 match(TailCall jump_target method_oop);
9761 ins_cost(CALL_COST);
9762 size(2);
9763 format %{ "Jmp $jump_target\t# $method_oop holds method oop" %}
9764 ins_encode %{ __ z_br($jump_target$$Register); %}
9765 ins_pipe(pipe_class_dummy);
9766 %}
9767
9768 // Return Instruction
9769 instruct Ret() %{
9770 match(Return);
9771 size(2);
9772 format %{ "BR(Z_R14) // branch to link register" %}
9773 ins_encode %{ __ z_br(Z_R14); %}
9774 ins_pipe(pipe_class_dummy);
9775 %}
9776
9777 // Tail Jump; remove the return address; jump to target.
9778 // TailCall above leaves the return address around.
9779 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9780 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9781 // "restore" before this instruction (in Epilogue), we need to materialize it
9782 // in %i0.
9783 instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{
10751 instruct loadV8(iRegL dst, memory mem) %{
10752 match(Set dst (LoadVector mem));
10753 predicate(n->as_LoadVector()->memory_size() == 8);
10754 ins_cost(MEMORY_REF_COST);
10755 // TODO: s390 port size(VARIABLE_SIZE);
10756 format %{ "LG $dst,$mem\t # L(packed8B)" %}
10757 opcode(LG_ZOPC, LG_ZOPC);
10758 ins_encode(z_form_rt_mem_opt(dst, mem));
10759 ins_pipe(pipe_class_dummy);
10760 %}
10761
10762 //----------POPULATION COUNT RULES--------------------------------------------
10763
10764 // Byte reverse
10765
10766 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
10767 match(Set dst (ReverseBytesI src));
10768 predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
10769 ins_cost(DEFAULT_COST);
10770 size(4);
10771 format %{ "LRVR $dst,$src\t# byte reverse int" %}
10772 opcode(LRVR_ZOPC);
10773 ins_encode(z_rreform(dst, src));
10774 ins_pipe(pipe_class_dummy);
10775 %}
10776
10777 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
10778 match(Set dst (ReverseBytesL src));
10779 predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
10780 ins_cost(DEFAULT_COST);
10781 // TODO: s390 port size(FIXED_SIZE);
10782 format %{ "LRVGR $dst,$src\t# byte reverse long" %}
10783 opcode(LRVGR_ZOPC);
10784 ins_encode(z_rreform(dst, src));
10785 ins_pipe(pipe_class_dummy);
10786 %}
10787
10788 // Leading zeroes
10789
10790 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10791 // returns the bit position of the leftmost 1 in the 64bit source register.
10792 // As the bits are numbered from left to right (0..63), the returned
10793 // position index is equivalent to the number of leading zeroes.
10794 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10795 // returns position 64. That's exactly what we need.
10796
10797 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10798 match(Set dst (CountLeadingZerosI src));
10799 effect(KILL tmp, KILL cr);
10800 ins_cost(3 * DEFAULT_COST);
10801 size(14);
10802 format %{ "SLLG $dst,$src,32\t# no need to always count 32 zeroes first\n\t"
10803 "IILH $dst,0x8000 \t# insert \"stop bit\" to force result 32 for zero src.\n\t"
10804 "FLOGR $dst,$dst"
10805 %}
10806 ins_encode %{
10807 // Performance experiments indicate that "FLOGR" is using some kind of
10808 // iteration to find the leftmost "1" bit.
10809 //
10810 // The prior implementation zero-extended the 32-bit argument to 64 bit,
10811 // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10812 // We could gain measurable speedup in micro benchmark:
10813 //
10814 // leading trailing
10815 // z10: int 2.04 1.68
10816 // long 1.00 1.02
10817 // z196: int 0.99 1.23
10818 // long 1.00 1.11
10819 //
10820 // By shifting the argument into the high-word instead of zero-extending it.
10821 // The add'l branch on condition (taken for a zero argument, very infrequent,
10822 // good prediction) is well compensated for by the savings.
10823 //
10824 // We leave the previous implementation in for some time in the future when
10825 // the "FLOGR" instruction may become less iterative.
10826
10827 // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10828 __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10829 __ z_iilh($dst$$Register, 0x8000); // Insert "stop bit" to force result 32 for zero src.
10830 __ z_flogr($dst$$Register, $dst$$Register);
10831 %}
10832 ins_pipe(pipe_class_dummy);
10833 %}
10834
10835 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10836 match(Set dst (CountLeadingZerosL src));
10837 effect(KILL tmp, KILL cr);
10838 ins_cost(DEFAULT_COST);
10839 size(4);
10840 format %{ "FLOGR $dst,$src \t# count leading zeros (long)\n\t" %}
10841 ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10842 ins_pipe(pipe_class_dummy);
10843 %}
10844
10845 // trailing zeroes
10846
10847 // We transform the trailing zeroes problem to a leading zeroes problem
10848 // such that can use the FLOGR instruction to our advantage.
10849
10850 // With
10851 // tmp1 = src - 1
10852 // we flip all trailing zeroes to ones and the rightmost one to zero.
10853 // All other bits remain unchanged.
10854 // With the complement
10855 // tmp2 = ~src
10856 // we get all ones in the trailing zeroes positions. Thus,
10857 // tmp3 = tmp1 & tmp2
10858 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10859 // Now we can apply FLOGR and get 64-(trailing zeroes).
10860 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10861 match(Set dst (CountTrailingZerosI src));
10862 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10863 ins_cost(8 * DEFAULT_COST);
10864 // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off.
10865 format %{ "LLGFR $dst,$src \t# clear upper 32 bits (we are dealing with int)\n\t"
10866 "LCGFR $tmp,$src \t# load 2's complement (32->64 bit)\n\t"
10867 "AGHI $dst,-1 \t# tmp1 = src-1\n\t"
10868 "AGHI $tmp,-1 \t# tmp2 = -src-1 = ~src\n\t"
10869 "NGR $dst,$tmp \t# tmp3 = tmp1&tmp2\n\t"
10870 "FLOGR $dst,$dst \t# count trailing zeros (int)\n\t"
10871 "AHI $dst,-64 \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10872 "LCR $dst,$dst \t# res = -tmp4"
10873 %}
10874 ins_encode %{
10875 Register Rdst = $dst$$Register;
10876 Register Rsrc = $src$$Register;
10877 // Rtmp only needed for for zero-argument shortcut. With kill effect in
10878 // match rule Rsrc = roddReg would be possible, saving one register.
10879 Register Rtmp = $tmp$$Register;
10880
10881 assert_different_registers(Rdst, Rsrc, Rtmp);
10882
10883 // Algorithm:
10884 // - Isolate the least significant (rightmost) set bit using (src & (-src)).
10885 // All other bits in the result are zero.
10886 // - Find the "leftmost one" bit position in the single-bit result from previous step.
10887 // - 63-("leftmost one" bit position) gives the # of trailing zeros.
10888
10889 // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
10890 Label done;
10891 __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
10892 __ z_lcgfr(Rtmp, Rsrc);
10898 // into upper half of reg. Not relevant with sllg below.
10899 __ z_sllg(Rdst, Rtmp, 32); // Shift interesting contents to upper half of register.
10900 __ z_bre(done); // Shortcut for argument = 1, result will be 0.
10901 // Depends on CC set by ahi above.
10902 // Taken very infrequently, good prediction, no BHT entry.
10903 // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10904 // after SLLG Rdst == 0(64bit)).
10905 __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10906 __ add2reg(Rdst, -32); // 32-pos(leftmost1) is #trailing zeros
10907 __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10908 __ bind(done);
10909 %}
10910 ins_pipe(pipe_class_dummy);
10911 %}
10912
10913 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10914 match(Set dst (CountTrailingZerosL src));
10915 effect(TEMP_DEF dst, KILL tmp, KILL cr);
10916 ins_cost(8 * DEFAULT_COST);
10917 // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off.
10918 format %{ "LCGR $dst,$src \t# preserve src\n\t"
10919 "NGR $dst,$src \t#"
10920 "AGHI $dst,-1 \t# tmp1 = src-1\n\t"
10921 "FLOGR $dst,$dst \t# count trailing zeros (long), kill $tmp\n\t"
10922 "AHI $dst,-64 \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10923 "LCR $dst,$dst \t#"
10924 %}
10925 ins_encode %{
10926 Register Rdst = $dst$$Register;
10927 Register Rsrc = $src$$Register;
10928 assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10929
10930 // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10931 __ z_lcgr(Rdst, Rsrc);
10932 __ z_ngr(Rdst, Rsrc);
10933 __ add2reg(Rdst, -1);
10934 __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10935 __ add2reg(Rdst, -64);
10936 __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10937 %}
10938 ins_pipe(pipe_class_dummy);
10939 %}
10940
10941
10942 // bit count
10943
10944 instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
10945 match(Set dst (PopCountI src));
10946 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10947 predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10948 ins_cost(DEFAULT_COST);
10949 size(24);
10950 format %{ "POPCNT $dst,$src\t# pop count int" %}
10951 ins_encode %{
10952 Register Rdst = $dst$$Register;
10953 Register Rsrc = $src$$Register;
10954 Register Rtmp = $tmp$$Register;
10955
10956 // Prefer compile-time assertion over run-time SIGILL.
10957 assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10958 assert_different_registers(Rdst, Rtmp);
10959
10960 // Version 2: shows 10%(z196) improvement over original.
10961 __ z_popcnt(Rdst, Rsrc);
10962 __ z_srlg(Rtmp, Rdst, 16); // calc byte4+byte6 and byte5+byte7
10963 __ z_alr(Rdst, Rtmp); // into byte6 and byte7
10964 __ z_srlg(Rtmp, Rdst, 8); // calc (byte4+byte6) + (byte5+byte7)
10965 __ z_alr(Rdst, Rtmp); // into byte7
10966 __ z_llgcr(Rdst, Rdst); // zero-extend sum
10967 %}
10968 ins_pipe(pipe_class_dummy);
10969 %}
10970
10971 instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
10972 match(Set dst (PopCountL src));
10973 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10974 predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10975 ins_cost(DEFAULT_COST);
10976 // TODO: s390 port size(FIXED_SIZE);
10977 format %{ "POPCNT $dst,$src\t# pop count long" %}
10978 ins_encode %{
10979 Register Rdst = $dst$$Register;
10980 Register Rsrc = $src$$Register;
10981 Register Rtmp = $tmp$$Register;
10982
10983 // Prefer compile-time assertion over run-time SIGILL.
10984 assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10985 assert_different_registers(Rdst, Rtmp);
10986
10987 // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
10988 __ z_popcnt(Rdst, Rsrc);
10989 __ z_ahhlr(Rdst, Rdst, Rdst);
10990 __ z_sllg(Rtmp, Rdst, 16);
10991 __ z_algr(Rdst, Rtmp);
10992 __ z_sllg(Rtmp, Rdst, 8);
10993 __ z_algr(Rdst, Rtmp);
10994 __ z_srlg(Rdst, Rdst, 56);
10995 %}
10996 ins_pipe(pipe_class_dummy);
10997 %}
|
1 //
2 // Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
3 // Copyright (c) 2017, 2019 SAP SE. All rights reserved.
4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 //
6 // This code is free software; you can redistribute it and/or modify it
7 // under the terms of the GNU General Public License version 2 only, as
8 // published by the Free Software Foundation.
9 //
10 // This code is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 // version 2 for more details (a copy is included in the LICENSE file that
14 // accompanied this code).
15 //
16 // You should have received a copy of the GNU General Public License version
17 // 2 along with this work; if not, write to the Free Software Foundation,
18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 //
20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 // or visit www.oracle.com if you need additional information or have any
22 // questions.
23 //
1358 // The ic_miss_stub will handle the null pointer exception.
1359 __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1360 __ z_br(R1_ic_miss_stub_addr);
1361 __ bind(valid);
1362 }
1363
1364 // Check whether this method is the proper implementation for the class of
1365 // the receiver (ic miss check).
1366 {
1367 Label valid;
1368 // Compare cached class against klass from receiver.
1369 // This also does an implicit null check!
1370 __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
1371 __ z_bre(valid);
1372 // The inline cache points to the wrong method. Call the
1373 // ic_miss_stub to find the proper method.
1374 __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1375 __ z_br(R1_ic_miss_stub_addr);
1376 __ bind(valid);
1377 }
1378 }
1379
1380 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1381 // Determine size dynamically.
1382 return MachNode::size(ra_);
1383 }
1384
1385 //=============================================================================
1386
1387 %} // interrupt source section
1388
1389 source_hpp %{ // Header information of the source block.
1390
1391 class HandlerImpl {
1392 public:
1393
1394 static int emit_exception_handler(CodeBuffer &cbuf);
1395 static int emit_deopt_handler(CodeBuffer& cbuf);
1396
1397 static uint size_exception_handler() {
4683 // See cOop encoding classes for elaborate comment.
4684
4685 // Moved here because it is needed in expand rules for encode.
4686 // Long negation.
4687 instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
4688 match(Set dst (SubL zero src));
4689 effect(KILL cr);
4690 size(4);
4691 format %{ "NEG $dst, $src\t # long" %}
4692 ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
4693 ins_pipe(pipe_class_dummy);
4694 %}
4695
4696 // Load Compressed Pointer
4697
4698 // Load narrow oop
4699 instruct loadN(iRegN dst, memory mem) %{
4700 match(Set dst (LoadN mem));
4701 ins_cost(MEMORY_REF_COST);
4702 size(Z_DISP3_SIZE);
4703 format %{ "LoadN $dst,$mem\t # (cOop)" %}
4704 opcode(LLGF_ZOPC, LLGF_ZOPC);
4705 ins_encode(z_form_rt_mem_opt(dst, mem));
4706 ins_pipe(pipe_class_dummy);
4707 %}
4708
4709 // Load narrow Klass Pointer
4710 instruct loadNKlass(iRegN dst, memory mem) %{
4711 match(Set dst (LoadNKlass mem));
4712 ins_cost(MEMORY_REF_COST);
4713 size(Z_DISP3_SIZE);
4714 format %{ "LoadNKlass $dst,$mem\t # (klass cOop)" %}
4715 opcode(LLGF_ZOPC, LLGF_ZOPC);
4716 ins_encode(z_form_rt_mem_opt(dst, mem));
4717 ins_pipe(pipe_class_dummy);
4718 %}
4719
4720 // Load constant Compressed Pointer
4721
4722 instruct loadConN(iRegN dst, immN src) %{
4723 match(Set dst src);
4724 ins_cost(DEFAULT_COST);
4725 size(6);
4726 format %{ "loadConN $dst,$src\t # (cOop)" %}
4727 ins_encode %{
4728 AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
4729 __ relocate(cOop.rspec(), 1);
4730 __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
4731 %}
4732 ins_pipe(pipe_class_dummy);
4733 %}
4734
4747 match(Set dst src);
4748 ins_cost(DEFAULT_COST);
4749 size(6);
4750 format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
4751 ins_encode %{
4752 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4753 __ relocate(NKlass.rspec(), 1);
4754 __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
4755 %}
4756 ins_pipe(pipe_class_dummy);
4757 %}
4758
4759 // Load and Decode Compressed Pointer
4760 // optimized variants for Unscaled cOops
4761
4762 instruct decodeLoadN(iRegP dst, memory mem) %{
4763 match(Set dst (DecodeN (LoadN mem)));
4764 predicate(false && (Universe::narrow_oop_base()==NULL)&&(Universe::narrow_oop_shift()==0));
4765 ins_cost(MEMORY_REF_COST);
4766 size(Z_DISP3_SIZE);
4767 format %{ "DecodeLoadN $dst,$mem\t # (cOop Load+Decode)" %}
4768 opcode(LLGF_ZOPC, LLGF_ZOPC);
4769 ins_encode(z_form_rt_mem_opt(dst, mem));
4770 ins_pipe(pipe_class_dummy);
4771 %}
4772
4773 instruct decodeLoadNKlass(iRegP dst, memory mem) %{
4774 match(Set dst (DecodeNKlass (LoadNKlass mem)));
4775 predicate(false && (Universe::narrow_klass_base()==NULL)&&(Universe::narrow_klass_shift()==0));
4776 ins_cost(MEMORY_REF_COST);
4777 size(Z_DISP3_SIZE);
4778 format %{ "DecodeLoadNKlass $dst,$mem\t # (load/decode NKlass)" %}
4779 opcode(LLGF_ZOPC, LLGF_ZOPC);
4780 ins_encode(z_form_rt_mem_opt(dst, mem));
4781 ins_pipe(pipe_class_dummy);
4782 %}
4783
4784 instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
4785 match(Set dst (DecodeNKlass src));
4786 ins_cost(3 * DEFAULT_COST);
4787 size(12);
4788 format %{ "DecodeLoadConNKlass $dst,$src\t # decode(cKlass)" %}
4789 ins_encode %{
4790 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4791 __ relocate(NKlass.rspec(), 1);
4792 __ load_const($dst$$Register, (Klass*)NKlass.value());
4793 %}
4794 ins_pipe(pipe_class_dummy);
4795 %}
4796
4797 // Decode Compressed Pointer
4798
4799 // General decoder
4800 instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
4801 match(Set dst (DecodeN src));
4802 effect(KILL cr);
4803 predicate(Universe::narrow_oop_base() == NULL || !ExpandLoadingBaseDecode);
4804 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4805 // TODO: s390 port size(VARIABLE_SIZE);
4806 format %{ "decodeN $dst,$src\t # (decode cOop)" %}
4807 ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, true); %}
4808 ins_pipe(pipe_class_dummy);
4809 %}
4810
4811 // General Klass decoder
4812 instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
4813 match(Set dst (DecodeNKlass src));
4814 effect(KILL cr);
4815 ins_cost(3 * DEFAULT_COST);
4816 format %{ "decode_klass $dst,$src" %}
4817 ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
4818 ins_pipe(pipe_class_dummy);
4819 %}
4820
4821 // General decoder
4822 instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
4823 match(Set dst (DecodeN src));
4824 effect(KILL cr);
4825 predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4826 n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4827 (Universe::narrow_oop_base()== NULL || !ExpandLoadingBaseDecode_NN));
4828 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4829 // TODO: s390 port size(VARIABLE_SIZE);
4830 format %{ "decodeN $dst,$src\t # (decode cOop NN)" %}
4831 ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
4832 ins_pipe(pipe_class_dummy);
4833 %}
4834
4835 instruct loadBase(iRegL dst, immL baseImm) %{
4836 effect(DEF dst, USE baseImm);
4837 predicate(false);
4838 format %{ "llihl $dst=$baseImm \t// load heap base" %}
4839 ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
4840 ins_pipe(pipe_class_dummy);
4841 %}
4842
4843 // Decoder for heapbased mode peeling off loading the base.
4844 instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4845 match(Set dst (DecodeN src base));
4846 // Note: Effect TEMP dst was used with the intention to get
4847 // different regs for dst and base, but this has caused ADLC to
4848 // generate wrong code. Oop_decoder generates additional lgr when
4849 // dst==base.
4850 effect(KILL cr);
4851 predicate(false);
4852 // TODO: s390 port size(VARIABLE_SIZE);
4853 format %{ "decodeN $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t # (decode cOop)" %}
4854 ins_encode %{
4855 __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
4856 (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
4857 %}
4858 ins_pipe(pipe_class_dummy);
4859 %}
4860
4861 // Decoder for heapbased mode peeling off loading the base.
4862 instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4863 match(Set dst (DecodeN src base));
4864 effect(KILL cr);
4865 predicate(false);
4866 // TODO: s390 port size(VARIABLE_SIZE);
4867 format %{ "decodeN $dst = ($src << 3) + $base + pow2_offset\t # (decode cOop)" %}
4868 ins_encode %{
4869 __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
4870 (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
4871 %}
4872 ins_pipe(pipe_class_dummy);
4873 %}
4874
4875 // Decoder for heapbased mode peeling off loading the base.
4876 instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4877 match(Set dst (DecodeN src));
4878 predicate(Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode);
4879 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4880 // TODO: s390 port size(VARIABLE_SIZE);
4881 expand %{
4882 immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
4883 iRegL base;
4884 loadBase(base, baseImm);
4885 decodeN_base(dst, src, base, cr);
4886 %}
4887 %}
4897 expand %{
4898 immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
4899 iRegL base;
4900 loadBase(base, baseImm);
4901 decodeN_NN_base(dst, src, base, cr);
4902 %}
4903 %}
4904
4905 // Encode Compressed Pointer
4906
4907 // General encoder
4908 instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
4909 match(Set dst (EncodeP src));
4910 effect(KILL cr);
4911 predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4912 (Universe::narrow_oop_base() == 0 ||
4913 Universe::narrow_oop_base_disjoint() ||
4914 !ExpandLoadingBaseEncode));
4915 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4916 // TODO: s390 port size(VARIABLE_SIZE);
4917 format %{ "encodeP $dst,$src\t # (encode cOop)" %}
4918 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4919 ins_pipe(pipe_class_dummy);
4920 %}
4921
4922 // General class encoder
4923 instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
4924 match(Set dst (EncodePKlass src));
4925 effect(KILL cr);
4926 format %{ "encode_klass $dst,$src" %}
4927 ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
4928 ins_pipe(pipe_class_dummy);
4929 %}
4930
4931 instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
4932 match(Set dst (EncodeP src));
4933 effect(KILL cr);
4934 predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4935 (Universe::narrow_oop_base() == 0 ||
4936 Universe::narrow_oop_base_disjoint() ||
4937 !ExpandLoadingBaseEncode_NN));
4938 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4939 // TODO: s390 port size(VARIABLE_SIZE);
4940 format %{ "encodeP $dst,$src\t # (encode cOop)" %}
4941 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4942 ins_pipe(pipe_class_dummy);
4943 %}
4944
4945 // Encoder for heapbased mode peeling off loading the base.
4946 instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
4947 match(Set dst (EncodeP src (Binary base dst)));
4948 effect(TEMP_DEF dst);
4949 predicate(false);
4950 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4951 // TODO: s390 port size(VARIABLE_SIZE);
4952 format %{ "encodeP $dst = ($src>>3) +$base + pow2_offset\t # (encode cOop)" %}
4953 ins_encode %{
4954 jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
4955 (((uint64_t)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift());
4956 __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
4957 %}
4958 ins_pipe(pipe_class_dummy);
4959 %}
4960
4961 // Encoder for heapbased mode peeling off loading the base.
4962 instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
4963 match(Set dst (EncodeP src base));
4964 effect(USE pow2_offset);
4965 predicate(false);
4966 ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4967 // TODO: s390 port size(VARIABLE_SIZE);
4968 format %{ "encodeP $dst = ($src>>3) +$base + $pow2_offset\t # (encode cOop)" %}
4969 ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
4970 ins_pipe(pipe_class_dummy);
4971 %}
4972
4973 // Encoder for heapbased mode peeling off loading the base.
4974 instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4975 match(Set dst (EncodeP src));
4976 effect(KILL cr);
4977 predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4978 (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode));
4979 ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4980 // TODO: s390 port size(VARIABLE_SIZE);
4981 expand %{
4982 immL baseImm %{ ((jlong)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift() %}
4983 immL_0 zero %{ (0) %}
4984 flagsReg ccr;
4985 iRegL base;
4986 iRegL negBase;
4987 loadBase(base, baseImm);
4988 negL_reg_reg(negBase, zero, base, ccr);
5001 expand %{
5002 immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
5003 immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)Universe::narrow_oop_base())) %}
5004 immL_0 zero %{ 0 %}
5005 flagsReg ccr;
5006 iRegL base;
5007 iRegL negBase;
5008 loadBase(base, baseImm);
5009 negL_reg_reg(negBase, zero, base, ccr);
5010 encodeP_NN_base(dst, src, negBase, pow2_offset);
5011 %}
5012 %}
5013
5014 // Store Compressed Pointer
5015
5016 // Store Compressed Pointer
5017 instruct storeN(memory mem, iRegN_P2N src) %{
5018 match(Set mem (StoreN mem src));
5019 ins_cost(MEMORY_REF_COST);
5020 size(Z_DISP_SIZE);
5021 format %{ "ST $src,$mem\t # (cOop)" %}
5022 opcode(STY_ZOPC, ST_ZOPC);
5023 ins_encode(z_form_rt_mem_opt(src, mem));
5024 ins_pipe(pipe_class_dummy);
5025 %}
5026
5027 // Store Compressed Klass pointer
5028 instruct storeNKlass(memory mem, iRegN src) %{
5029 match(Set mem (StoreNKlass mem src));
5030 ins_cost(MEMORY_REF_COST);
5031 size(Z_DISP_SIZE);
5032 format %{ "ST $src,$mem\t # (cKlass)" %}
5033 opcode(STY_ZOPC, ST_ZOPC);
5034 ins_encode(z_form_rt_mem_opt(src, mem));
5035 ins_pipe(pipe_class_dummy);
5036 %}
5037
5038 // Compare Compressed Pointers
5039
5040 instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
5041 match(Set cr (CmpN src1 src2));
5042 ins_cost(DEFAULT_COST);
5043 size(2);
5044 format %{ "CLR $src1,$src2\t # (cOop)" %}
5045 opcode(CLR_ZOPC);
5046 ins_encode(z_rrform(src1, src2));
5047 ins_pipe(pipe_class_dummy);
5048 %}
5049
5050 instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
5051 match(Set cr (CmpN src1 src2));
5052 ins_cost(DEFAULT_COST);
5053 size(6);
5054 format %{ "CLFI $src1,$src2\t # (cOop) compare immediate narrow" %}
5055 ins_encode %{
5056 AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
5057 __ relocate(cOop.rspec(), 1);
5058 __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
5059 %}
5060 ins_pipe(pipe_class_dummy);
5061 %}
5062
5063 instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
5064 match(Set cr (CmpN src1 src2));
5065 ins_cost(DEFAULT_COST);
5066 size(6);
5067 format %{ "CLFI $src1,$src2\t # (NKlass) compare immediate narrow" %}
5068 ins_encode %{
5069 AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
5070 __ relocate(NKlass.rspec(), 1);
5071 __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
5072 %}
5073 ins_pipe(pipe_class_dummy);
5074 %}
5075
5076 instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
5077 match(Set cr (CmpN src1 src2));
5078 ins_cost(DEFAULT_COST);
5079 size(2);
5080 format %{ "LTR $src1,$src2\t # (cOop) LTR because comparing against zero" %}
5081 opcode(LTR_ZOPC);
5082 ins_encode(z_rrform(src1, src1));
5083 ins_pipe(pipe_class_dummy);
5084 %}
5085
5086
5087 //----------MemBar Instructions-----------------------------------------------
5088
5089 // Memory barrier flavors
5090
5091 instruct membar_acquire() %{
5092 match(MemBarAcquire);
5093 match(LoadFence);
5094 ins_cost(4*MEMORY_REF_COST);
5095 size(0);
5096 format %{ "MEMBAR-acquire" %}
5097 ins_encode %{ __ z_acquire(); %}
5098 ins_pipe(pipe_class_dummy);
5099 %}
5100
6755 __ z_lghi(Z_R0_scratch, divisor);
6756 __ z_lgr($dst$$Register->successor(), $src1$$Register);
6757 __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch); // Instruction kills tmp.
6758 } else {
6759 __ clear_reg($dst$$Register, true, false);
6760 }
6761 %}
6762 ins_pipe(pipe_class_dummy);
6763 %}
6764
6765 // SHIFT
6766
6767 // Shift left logical
6768
6769 // Register Shift Left variable
6770 instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
6771 match(Set dst (LShiftI src nbits));
6772 effect(KILL cr); // R1 is killed, too.
6773 ins_cost(3 * DEFAULT_COST);
6774 size(14);
6775 format %{ "SLL $dst,$src,[$nbits] & 31\t # use RISC-like SLLG also for int" %}
6776 ins_encode %{
6777 __ z_lgr(Z_R1_scratch, $nbits$$Register);
6778 __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
6779 __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
6780 %}
6781 ins_pipe(pipe_class_dummy);
6782 %}
6783
6784 // Register Shift Left Immediate
6785 // Constant shift count is masked in ideal graph already.
6786 instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
6787 match(Set dst (LShiftI src nbits));
6788 size(6);
6789 format %{ "SLL $dst,$src,$nbits\t # use RISC-like SLLG also for int" %}
6790 ins_encode %{
6791 int Nbit = $nbits$$constant;
6792 assert((Nbit & (BitsPerJavaInteger - 1)) == Nbit, "Check shift mask in ideal graph");
6793 __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
6794 %}
6795 ins_pipe(pipe_class_dummy);
6796 %}
6797
6798 // Register Shift Left Immediate by 1bit
6799 instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
6800 match(Set dst (LShiftI src nbits));
6801 predicate(PreferLAoverADD);
6802 ins_cost(DEFAULT_COST_LOW);
6803 size(4);
6804 format %{ "LA $dst,#0($src,$src)\t # SLL by 1 (int)" %}
6805 ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
6806 ins_pipe(pipe_class_dummy);
6807 %}
6808
6809 // Register Shift Left Long
7085 %}
7086 ins_pipe(pipe_class_dummy);
7087 %}
7088
7089 instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
7090 match(Set cr (OverflowSubL op1 op2));
7091 effect(DEF cr, USE op1, USE op2);
7092 // TODO: s390 port size(VARIABLE_SIZE);
7093 format %{ "SGR $op1,$op2\t # overflow check long" %}
7094 ins_encode %{
7095 __ load_const_optimized(Z_R1_scratch, $op2$$constant);
7096 __ z_lgr(Z_R0_scratch, $op1$$Register);
7097 __ z_sgr(Z_R0_scratch, Z_R1_scratch);
7098 %}
7099 ins_pipe(pipe_class_dummy);
7100 %}
7101
7102 instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
7103 match(Set cr (OverflowSubI zero op2));
7104 effect(DEF cr, USE op2);
7105 format %{ "NEG $op2\t # overflow check int" %}
7106 ins_encode %{
7107 __ clear_reg(Z_R0_scratch, false, false);
7108 __ z_sr(Z_R0_scratch, $op2$$Register);
7109 %}
7110 ins_pipe(pipe_class_dummy);
7111 %}
7112
7113 instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
7114 match(Set cr (OverflowSubL zero op2));
7115 effect(DEF cr, USE op2);
7116 format %{ "NEGG $op2\t # overflow check long" %}
7117 ins_encode %{
7118 __ clear_reg(Z_R0_scratch, true, false);
7119 __ z_sgr(Z_R0_scratch, $op2$$Register);
7120 %}
7121 ins_pipe(pipe_class_dummy);
7122 %}
7123
7124 // No intrinsics for multiplication, since there is no easy way
7125 // to check for overflow.
7126
7127
7128 //----------Floating Point Arithmetic Instructions-----------------------------
7129
7130 // ADD
7131
7132 // Add float single precision
7133 instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
7134 match(Set dst (AddF dst src));
7135 effect(KILL cr);
7136 ins_cost(ALU_REG_COST);
9151 // Direct Branch.
9152 instruct branchFar(label labl) %{
9153 match(Goto);
9154 effect(USE labl);
9155 ins_cost(BRANCH_COST);
9156 size(6);
9157 format %{ "BRUL $labl" %}
9158 ins_encode(z_enc_brul(labl));
9159 ins_pipe(pipe_class_dummy);
9160 // This is not a short variant of a branch, but the long variant.
9161 ins_short_branch(0);
9162 %}
9163
9164 // Conditional Near Branch
9165 instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
9166 // Same match rule as `branchConFar'.
9167 match(If cmp cr);
9168 effect(USE lbl);
9169 ins_cost(BRANCH_COST);
9170 size(4);
9171 format %{ "branch_con_short,$cmp $lbl" %}
9172 ins_encode(z_enc_branch_con_short(cmp, lbl));
9173 ins_pipe(pipe_class_dummy);
9174 // If set to 1 this indicates that the current instruction is a
9175 // short variant of a long branch. This avoids using this
9176 // instruction in first-pass matching. It will then only be used in
9177 // the `Shorten_branches' pass.
9178 ins_short_branch(1);
9179 %}
9180
9181 // This is for cases when the z/Architecture conditional branch instruction
9182 // does not reach far enough. So we emit a far branch here, which is
9183 // more expensive.
9184 //
9185 // Conditional Far Branch
9186 instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
9187 // Same match rule as `branchCon'.
9188 match(If cmp cr);
9189 effect(USE cr, USE lbl);
9190 // Make more expensive to prefer compare_and_branch over separate instructions.
9191 ins_cost(2 * BRANCH_COST);
9192 size(6);
9193 format %{ "branch_con_far,$cmp $lbl" %}
9194 ins_encode(z_enc_branch_con_far(cmp, lbl));
9195 ins_pipe(pipe_class_dummy);
9196 // This is not a short variant of a branch, but the long variant..
9197 ins_short_branch(0);
9198 %}
9199
9200 instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
9201 match(CountedLoopEnd cmp cr);
9202 effect(USE labl);
9203 ins_cost(BRANCH_COST);
9204 size(4);
9205 format %{ "branch_con_short,$cmp $labl\t # counted loop end" %}
9206 ins_encode(z_enc_branch_con_short(cmp, labl));
9207 ins_pipe(pipe_class_dummy);
9208 // If set to 1 this indicates that the current instruction is a
9209 // short variant of a long branch. This avoids using this
9210 // instruction in first-pass matching. It will then only be used in
9211 // the `Shorten_branches' pass.
9212 ins_short_branch(1);
9213 %}
9742 instruct CallLeafNoFPDirect(method meth) %{
9743 match(CallLeafNoFP);
9744 effect(USE meth);
9745 ins_cost(CALL_COST);
9746 // TODO: s390 port size(VARIABLE_SIZE);
9747 ins_num_consts(1);
9748 format %{ "CALL,runtime leaf nofp $meth" %}
9749 ins_encode( z_enc_java_to_runtime_call(meth) );
9750 ins_pipe(pipe_class_dummy);
9751 ins_alignment(2);
9752 %}
9753
9754 // Tail Call; Jump from runtime stub to Java code.
9755 // Also known as an 'interprocedural jump'.
9756 // Target of jump will eventually return to caller.
9757 // TailJump below removes the return address.
9758 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9759 match(TailCall jump_target method_oop);
9760 ins_cost(CALL_COST);
9761 size(2);
9762 format %{ "Jmp $jump_target\t # $method_oop holds method oop" %}
9763 ins_encode %{ __ z_br($jump_target$$Register); %}
9764 ins_pipe(pipe_class_dummy);
9765 %}
9766
9767 // Return Instruction
9768 instruct Ret() %{
9769 match(Return);
9770 size(2);
9771 format %{ "BR(Z_R14) // branch to link register" %}
9772 ins_encode %{ __ z_br(Z_R14); %}
9773 ins_pipe(pipe_class_dummy);
9774 %}
9775
9776 // Tail Jump; remove the return address; jump to target.
9777 // TailCall above leaves the return address around.
9778 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9779 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9780 // "restore" before this instruction (in Epilogue), we need to materialize it
9781 // in %i0.
9782 instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{
10750 instruct loadV8(iRegL dst, memory mem) %{
10751 match(Set dst (LoadVector mem));
10752 predicate(n->as_LoadVector()->memory_size() == 8);
10753 ins_cost(MEMORY_REF_COST);
10754 // TODO: s390 port size(VARIABLE_SIZE);
10755 format %{ "LG $dst,$mem\t # L(packed8B)" %}
10756 opcode(LG_ZOPC, LG_ZOPC);
10757 ins_encode(z_form_rt_mem_opt(dst, mem));
10758 ins_pipe(pipe_class_dummy);
10759 %}
10760
10761 //----------POPULATION COUNT RULES--------------------------------------------
10762
10763 // Byte reverse
10764
10765 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
10766 match(Set dst (ReverseBytesI src));
10767 predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
10768 ins_cost(DEFAULT_COST);
10769 size(4);
10770 format %{ "LRVR $dst,$src\t # byte reverse int" %}
10771 opcode(LRVR_ZOPC);
10772 ins_encode(z_rreform(dst, src));
10773 ins_pipe(pipe_class_dummy);
10774 %}
10775
10776 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
10777 match(Set dst (ReverseBytesL src));
10778 predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
10779 ins_cost(DEFAULT_COST);
10780 // TODO: s390 port size(FIXED_SIZE);
10781 format %{ "LRVGR $dst,$src\t # byte reverse long" %}
10782 opcode(LRVGR_ZOPC);
10783 ins_encode(z_rreform(dst, src));
10784 ins_pipe(pipe_class_dummy);
10785 %}
10786
10787 // Leading zeroes
10788
10789 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10790 // returns the bit position of the leftmost 1 in the 64bit source register.
10791 // As the bits are numbered from left to right (0..63), the returned
10792 // position index is equivalent to the number of leading zeroes.
10793 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10794 // returns position 64. That's exactly what we need.
10795
10796 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10797 match(Set dst (CountLeadingZerosI src));
10798 effect(KILL tmp, KILL cr);
10799 ins_cost(3 * DEFAULT_COST);
10800 size(14);
10801 format %{ "SLLG $dst,$src,32\t # no need to always count 32 zeroes first\n\t"
10802 "IILH $dst,0x8000 \t # insert \"stop bit\" to force result 32 for zero src.\n\t"
10803 "FLOGR $dst,$dst"
10804 %}
10805 ins_encode %{
10806 // Performance experiments indicate that "FLOGR" is using some kind of
10807 // iteration to find the leftmost "1" bit.
10808 //
10809 // The prior implementation zero-extended the 32-bit argument to 64 bit,
10810 // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10811 // We could gain measurable speedup in micro benchmark:
10812 //
10813 // leading trailing
10814 // z10: int 2.04 1.68
10815 // long 1.00 1.02
10816 // z196: int 0.99 1.23
10817 // long 1.00 1.11
10818 //
10819 // By shifting the argument into the high-word instead of zero-extending it.
10820 // The add'l branch on condition (taken for a zero argument, very infrequent,
10821 // good prediction) is well compensated for by the savings.
10822 //
10823 // We leave the previous implementation in for some time in the future when
10824 // the "FLOGR" instruction may become less iterative.
10825
10826 // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10827 __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10828 __ z_iilh($dst$$Register, 0x8000); // Insert "stop bit" to force result 32 for zero src.
10829 __ z_flogr($dst$$Register, $dst$$Register);
10830 %}
10831 ins_pipe(pipe_class_dummy);
10832 %}
10833
10834 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10835 match(Set dst (CountLeadingZerosL src));
10836 effect(KILL tmp, KILL cr);
10837 ins_cost(DEFAULT_COST);
10838 size(4);
10839 format %{ "FLOGR $dst,$src \t # count leading zeros (long)\n\t" %}
10840 ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10841 ins_pipe(pipe_class_dummy);
10842 %}
10843
10844 // trailing zeroes
10845
10846 // We transform the trailing zeroes problem to a leading zeroes problem
10847 // such that can use the FLOGR instruction to our advantage.
10848
10849 // With
10850 // tmp1 = src - 1
10851 // we flip all trailing zeroes to ones and the rightmost one to zero.
10852 // All other bits remain unchanged.
10853 // With the complement
10854 // tmp2 = ~src
10855 // we get all ones in the trailing zeroes positions. Thus,
10856 // tmp3 = tmp1 & tmp2
10857 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10858 // Now we can apply FLOGR and get 64-(trailing zeroes).
10859 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10860 match(Set dst (CountTrailingZerosI src));
10861 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10862 ins_cost(8 * DEFAULT_COST);
10863 // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off.
10864 format %{ "LLGFR $dst,$src \t # clear upper 32 bits (we are dealing with int)\n\t"
10865 "LCGFR $tmp,$src \t # load 2's complement (32->64 bit)\n\t"
10866 "AGHI $dst,-1 \t # tmp1 = src-1\n\t"
10867 "AGHI $tmp,-1 \t # tmp2 = -src-1 = ~src\n\t"
10868 "NGR $dst,$tmp \t # tmp3 = tmp1&tmp2\n\t"
10869 "FLOGR $dst,$dst \t # count trailing zeros (int)\n\t"
10870 "AHI $dst,-64 \t # tmp4 = 64-(trailing zeroes)-64\n\t"
10871 "LCR $dst,$dst \t # res = -tmp4"
10872 %}
10873 ins_encode %{
10874 Register Rdst = $dst$$Register;
10875 Register Rsrc = $src$$Register;
10876 // Rtmp only needed for for zero-argument shortcut. With kill effect in
10877 // match rule Rsrc = roddReg would be possible, saving one register.
10878 Register Rtmp = $tmp$$Register;
10879
10880 assert_different_registers(Rdst, Rsrc, Rtmp);
10881
10882 // Algorithm:
10883 // - Isolate the least significant (rightmost) set bit using (src & (-src)).
10884 // All other bits in the result are zero.
10885 // - Find the "leftmost one" bit position in the single-bit result from previous step.
10886 // - 63-("leftmost one" bit position) gives the # of trailing zeros.
10887
10888 // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
10889 Label done;
10890 __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
10891 __ z_lcgfr(Rtmp, Rsrc);
10897 // into upper half of reg. Not relevant with sllg below.
10898 __ z_sllg(Rdst, Rtmp, 32); // Shift interesting contents to upper half of register.
10899 __ z_bre(done); // Shortcut for argument = 1, result will be 0.
10900 // Depends on CC set by ahi above.
10901 // Taken very infrequently, good prediction, no BHT entry.
10902 // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10903 // after SLLG Rdst == 0(64bit)).
10904 __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10905 __ add2reg(Rdst, -32); // 32-pos(leftmost1) is #trailing zeros
10906 __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10907 __ bind(done);
10908 %}
10909 ins_pipe(pipe_class_dummy);
10910 %}
10911
10912 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10913 match(Set dst (CountTrailingZerosL src));
10914 effect(TEMP_DEF dst, KILL tmp, KILL cr);
10915 ins_cost(8 * DEFAULT_COST);
10916 // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off.
10917 format %{ "LCGR $dst,$src \t # preserve src\n\t"
10918 "NGR $dst,$src \t #\n\t"
10919 "AGHI $dst,-1 \t # tmp1 = src-1\n\t"
10920 "FLOGR $dst,$dst \t # count trailing zeros (long), kill $tmp\n\t"
10921 "AHI $dst,-64 \t # tmp4 = 64-(trailing zeroes)-64\n\t"
10922 "LCR $dst,$dst \t #"
10923 %}
10924 ins_encode %{
10925 Register Rdst = $dst$$Register;
10926 Register Rsrc = $src$$Register;
10927 assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10928
10929 // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10930 __ z_lcgr(Rdst, Rsrc);
10931 __ z_ngr(Rdst, Rsrc);
10932 __ add2reg(Rdst, -1);
10933 __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10934 __ add2reg(Rdst, -64);
10935 __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10936 %}
10937 ins_pipe(pipe_class_dummy);
10938 %}
10939
10940
10941 // bit count
10942
10943 instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
10944 match(Set dst (PopCountI src));
10945 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10946 predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10947 ins_cost(DEFAULT_COST);
10948 size(24);
10949 format %{ "POPCNT $dst,$src\t # pop count int" %}
10950 ins_encode %{
10951 Register Rdst = $dst$$Register;
10952 Register Rsrc = $src$$Register;
10953 Register Rtmp = $tmp$$Register;
10954
10955 // Prefer compile-time assertion over run-time SIGILL.
10956 assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10957 assert_different_registers(Rdst, Rtmp);
10958
10959 // Version 2: shows 10%(z196) improvement over original.
10960 __ z_popcnt(Rdst, Rsrc);
10961 __ z_srlg(Rtmp, Rdst, 16); // calc byte4+byte6 and byte5+byte7
10962 __ z_alr(Rdst, Rtmp); // into byte6 and byte7
10963 __ z_srlg(Rtmp, Rdst, 8); // calc (byte4+byte6) + (byte5+byte7)
10964 __ z_alr(Rdst, Rtmp); // into byte7
10965 __ z_llgcr(Rdst, Rdst); // zero-extend sum
10966 %}
10967 ins_pipe(pipe_class_dummy);
10968 %}
10969
10970 instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
10971 match(Set dst (PopCountL src));
10972 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10973 predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10974 ins_cost(DEFAULT_COST);
10975 // TODO: s390 port size(FIXED_SIZE);
10976 format %{ "POPCNT $dst,$src\t # pop count long" %}
10977 ins_encode %{
10978 Register Rdst = $dst$$Register;
10979 Register Rsrc = $src$$Register;
10980 Register Rtmp = $tmp$$Register;
10981
10982 // Prefer compile-time assertion over run-time SIGILL.
10983 assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10984 assert_different_registers(Rdst, Rtmp);
10985
10986 // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
10987 __ z_popcnt(Rdst, Rsrc);
10988 __ z_ahhlr(Rdst, Rdst, Rdst);
10989 __ z_sllg(Rtmp, Rdst, 16);
10990 __ z_algr(Rdst, Rtmp);
10991 __ z_sllg(Rtmp, Rdst, 8);
10992 __ z_algr(Rdst, Rtmp);
10993 __ z_srlg(Rdst, Rdst, 56);
10994 %}
10995 ins_pipe(pipe_class_dummy);
10996 %}
|