1 /*
   2  * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
  26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP
  27 
  28 // C2_MacroAssembler contains high-level macros for C2
  29 
  30 public:
  31   Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
  32 
  33   // special instructions for EVEX
  34   void setvectmask(Register dst, Register src);
  35   void restorevectmask();
  36 
  37   // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
  38   // See full desription in macroAssembler_x86.cpp.
  39   void fast_lock(Register obj, Register box, Register tmp,
  40                  Register scr, Register cx1, Register cx2,
  41                  BiasedLockingCounters* counters,
  42                  RTMLockingCounters* rtm_counters,
  43                  RTMLockingCounters* stack_rtm_counters,
  44                  Metadata* method_data,
  45                  bool use_rtm, bool profile_rtm);
  46   void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
  47 
  48 #if INCLUDE_RTM_OPT
  49   void rtm_counters_update(Register abort_status, Register rtm_counters);
  50   void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
  51   void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
  52                                    RTMLockingCounters* rtm_counters,
  53                                    Metadata* method_data);
  54   void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
  55                      RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
  56   void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
  57   void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
  58   void rtm_stack_locking(Register obj, Register tmp, Register scr,
  59                          Register retry_on_abort_count,
  60                          RTMLockingCounters* stack_rtm_counters,
  61                          Metadata* method_data, bool profile_rtm,
  62                          Label& DONE_LABEL, Label& IsInflated);
  63   void rtm_inflated_locking(Register obj, Register box, Register tmp,
  64                             Register scr, Register retry_on_busy_count,
  65                             Register retry_on_abort_count,
  66                             RTMLockingCounters* rtm_counters,
  67                             Metadata* method_data, bool profile_rtm,
  68                             Label& DONE_LABEL);
  69 #endif
  70 
  71   // Generic instructions support for use in .ad files C2 code generation
  72   void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr);
  73   void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
  74   void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
  75   void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
  76 
  77   void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
  78                XMMRegister tmp = xnoreg);
  79   void vpminmax(int opcode, BasicType elem_bt,
  80                 XMMRegister dst, XMMRegister src1, XMMRegister src2,
  81                 int vlen_enc);
  82 
  83   void vminmax_fp(int opcode, BasicType elem_bt,
  84                   XMMRegister dst, XMMRegister a, XMMRegister b,
  85                   XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
  86                   int vlen_enc);
  87   void evminmax_fp(int opcode, BasicType elem_bt,
  88                    XMMRegister dst, XMMRegister a, XMMRegister b,
  89                    KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
  90                    int vlen_enc);
  91 
  92   void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
  93   void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
  94   void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
  95   void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
  96 
  97   void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
  98   void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
  99   void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
 100   void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
 101   void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
 102   void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
 103   void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
 104   void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
 105   void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
 106   void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
 107   void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
 108 
 109   void insert(BasicType typ, XMMRegister dst, Register val, int idx);
 110   void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
 111   void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
 112   void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
 113   void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
 114 
 115   // extract
 116   void extract(BasicType typ, Register dst, XMMRegister src, int idx);
 117   XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
 118   void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
 119   void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg);
 120 
 121   // blend
 122   void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
 123   void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
 124 
 125   void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt);
 126   void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
 127 
 128   // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
 129 
 130   // dst = src1  reduce(op, src2) using vtmp as temps
 131   void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 132 #ifdef _LP64
 133   void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 134 #endif // _LP64
 135 
 136   // dst = reduce(op, src2) using vtmp as temps
 137   void reduce_fp(int opcode, int vlen,
 138                  XMMRegister dst, XMMRegister src,
 139                  XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
 140   void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 141   void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 142   void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 143   void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
 144                          XMMRegister dst, XMMRegister src,
 145                          XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
 146   void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
 147                           XMMRegister dst, XMMRegister src,
 148                           XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
 149  private:
 150   void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
 151   void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
 152 
 153   // Int Reduction
 154   void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 155   void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 156   void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 157   void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 158 
 159   // Byte Reduction
 160   void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 161   void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 162   void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 163   void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 164   void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 165   void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 166   void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 167   void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 168 
 169   // Short Reduction
 170   void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 171   void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 172   void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 173   void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 174 
 175   // Long Reduction
 176 #ifdef _LP64
 177   void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 178   void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 179   void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
 180 #endif // _LP64
 181 
 182   // Float Reduction
 183   void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
 184   void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
 185   void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
 186   void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
 187 
 188   // Double Reduction
 189   void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
 190   void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
 191   void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
 192 
 193   // Base reduction instruction
 194   void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
 195   void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
 196 
 197  public:
 198 
 199   void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
 200                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
 201 
 202   // IndexOf strings.
 203   // Small strings are loaded through stack if they cross page boundary.
 204   void string_indexof(Register str1, Register str2,
 205                       Register cnt1, Register cnt2,
 206                       int int_cnt2,  Register result,
 207                       XMMRegister vec, Register tmp,
 208                       int ae);
 209 
 210   // IndexOf for constant substrings with size >= 8 elements
 211   // which don't need to be loaded through stack.
 212   void string_indexofC8(Register str1, Register str2,
 213                       Register cnt1, Register cnt2,
 214                       int int_cnt2,  Register result,
 215                       XMMRegister vec, Register tmp,
 216                       int ae);
 217 
 218     // Smallest code: we don't need to load through stack,
 219     // check string tail.
 220 
 221   // helper function for string_compare
 222   void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
 223                           Address::ScaleFactor scale, Address::ScaleFactor scale1,
 224                           Address::ScaleFactor scale2, Register index, int ae);
 225   // Compare strings.
 226   void string_compare(Register str1, Register str2,
 227                       Register cnt1, Register cnt2, Register result,
 228                       XMMRegister vec1, int ae);
 229 
 230   // Search for Non-ASCII character (Negative byte value) in a byte array,
 231   // return true if it has any and false otherwise.
 232   void has_negatives(Register ary1, Register len,
 233                      Register result, Register tmp1,
 234                      XMMRegister vec1, XMMRegister vec2);
 235 
 236   // Compare char[] or byte[] arrays.
 237   void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
 238                      Register limit, Register result, Register chr,
 239                      XMMRegister vec1, XMMRegister vec2, bool is_char);
 240 
 241 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP