1 /*
   2  * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2019, SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "gc/shared/gcLocker.hpp"
  32 #include "interpreter/interpreter.hpp"
  33 #include "interpreter/interp_masm.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "oops/compiledICHolder.hpp"
  36 #include "oops/klass.inline.hpp"
  37 #include "registerSaver_s390.hpp"
  38 #include "runtime/safepointMechanism.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/vframeArray.hpp"
  41 #include "utilities/align.hpp"
  42 #include "vmreg_s390.inline.hpp"
  43 #ifdef COMPILER1
  44 #include "c1/c1_Runtime1.hpp"
  45 #endif
  46 #ifdef COMPILER2
  47 #include "opto/ad.hpp"
  48 #include "opto/runtime.hpp"
  49 #endif
  50 
  51 #ifdef PRODUCT
  52 #define __ masm->
  53 #else
  54 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
  55 #endif
  56 
  57 #define BLOCK_COMMENT(str) __ block_comment(str)
  58 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  59 
  60 #define RegisterSaver_LiveIntReg(regname) \
  61   { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
  62 
  63 #define RegisterSaver_LiveFloatReg(regname) \
  64   { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
  65 
  66 // Registers which are not saved/restored, but still they have got a frame slot.
  67 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
  68 #define RegisterSaver_ExcludedIntReg(regname) \
  69   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  70 
  71 // Registers which are not saved/restored, but still they have got a frame slot.
  72 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
  73 #define RegisterSaver_ExcludedFloatReg(regname) \
  74   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  75 
  76 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
  77   // Live registers which get spilled to the stack. Register positions
  78   // in this array correspond directly to the stack layout.
  79   //
  80   // live float registers:
  81   //
  82   RegisterSaver_LiveFloatReg(Z_F0 ),
  83   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  84   RegisterSaver_LiveFloatReg(Z_F2 ),
  85   RegisterSaver_LiveFloatReg(Z_F3 ),
  86   RegisterSaver_LiveFloatReg(Z_F4 ),
  87   RegisterSaver_LiveFloatReg(Z_F5 ),
  88   RegisterSaver_LiveFloatReg(Z_F6 ),
  89   RegisterSaver_LiveFloatReg(Z_F7 ),
  90   RegisterSaver_LiveFloatReg(Z_F8 ),
  91   RegisterSaver_LiveFloatReg(Z_F9 ),
  92   RegisterSaver_LiveFloatReg(Z_F10),
  93   RegisterSaver_LiveFloatReg(Z_F11),
  94   RegisterSaver_LiveFloatReg(Z_F12),
  95   RegisterSaver_LiveFloatReg(Z_F13),
  96   RegisterSaver_LiveFloatReg(Z_F14),
  97   RegisterSaver_LiveFloatReg(Z_F15),
  98   //
  99   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 100   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 101   RegisterSaver_LiveIntReg(Z_R2 ),
 102   RegisterSaver_LiveIntReg(Z_R3 ),
 103   RegisterSaver_LiveIntReg(Z_R4 ),
 104   RegisterSaver_LiveIntReg(Z_R5 ),
 105   RegisterSaver_LiveIntReg(Z_R6 ),
 106   RegisterSaver_LiveIntReg(Z_R7 ),
 107   RegisterSaver_LiveIntReg(Z_R8 ),
 108   RegisterSaver_LiveIntReg(Z_R9 ),
 109   RegisterSaver_LiveIntReg(Z_R10),
 110   RegisterSaver_LiveIntReg(Z_R11),
 111   RegisterSaver_LiveIntReg(Z_R12),
 112   RegisterSaver_LiveIntReg(Z_R13),
 113   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 114   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 115 };
 116 
 117 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
 118   // Live registers which get spilled to the stack. Register positions
 119   // in this array correspond directly to the stack layout.
 120   //
 121   // live float registers: All excluded, but still they get a stack slot to get same frame size.
 122   //
 123   RegisterSaver_ExcludedFloatReg(Z_F0 ),
 124   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 125   RegisterSaver_ExcludedFloatReg(Z_F2 ),
 126   RegisterSaver_ExcludedFloatReg(Z_F3 ),
 127   RegisterSaver_ExcludedFloatReg(Z_F4 ),
 128   RegisterSaver_ExcludedFloatReg(Z_F5 ),
 129   RegisterSaver_ExcludedFloatReg(Z_F6 ),
 130   RegisterSaver_ExcludedFloatReg(Z_F7 ),
 131   RegisterSaver_ExcludedFloatReg(Z_F8 ),
 132   RegisterSaver_ExcludedFloatReg(Z_F9 ),
 133   RegisterSaver_ExcludedFloatReg(Z_F10),
 134   RegisterSaver_ExcludedFloatReg(Z_F11),
 135   RegisterSaver_ExcludedFloatReg(Z_F12),
 136   RegisterSaver_ExcludedFloatReg(Z_F13),
 137   RegisterSaver_ExcludedFloatReg(Z_F14),
 138   RegisterSaver_ExcludedFloatReg(Z_F15),
 139   //
 140   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 141   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 142   RegisterSaver_LiveIntReg(Z_R2 ),
 143   RegisterSaver_LiveIntReg(Z_R3 ),
 144   RegisterSaver_LiveIntReg(Z_R4 ),
 145   RegisterSaver_LiveIntReg(Z_R5 ),
 146   RegisterSaver_LiveIntReg(Z_R6 ),
 147   RegisterSaver_LiveIntReg(Z_R7 ),
 148   RegisterSaver_LiveIntReg(Z_R8 ),
 149   RegisterSaver_LiveIntReg(Z_R9 ),
 150   RegisterSaver_LiveIntReg(Z_R10),
 151   RegisterSaver_LiveIntReg(Z_R11),
 152   RegisterSaver_LiveIntReg(Z_R12),
 153   RegisterSaver_LiveIntReg(Z_R13),
 154   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 155   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 156 };
 157 
 158 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
 159   // Live registers which get spilled to the stack. Register positions
 160   // in this array correspond directly to the stack layout.
 161   //
 162   // live float registers:
 163   //
 164   RegisterSaver_LiveFloatReg(Z_F0 ),
 165   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 166   RegisterSaver_LiveFloatReg(Z_F2 ),
 167   RegisterSaver_LiveFloatReg(Z_F3 ),
 168   RegisterSaver_LiveFloatReg(Z_F4 ),
 169   RegisterSaver_LiveFloatReg(Z_F5 ),
 170   RegisterSaver_LiveFloatReg(Z_F6 ),
 171   RegisterSaver_LiveFloatReg(Z_F7 ),
 172   RegisterSaver_LiveFloatReg(Z_F8 ),
 173   RegisterSaver_LiveFloatReg(Z_F9 ),
 174   RegisterSaver_LiveFloatReg(Z_F10),
 175   RegisterSaver_LiveFloatReg(Z_F11),
 176   RegisterSaver_LiveFloatReg(Z_F12),
 177   RegisterSaver_LiveFloatReg(Z_F13),
 178   RegisterSaver_LiveFloatReg(Z_F14),
 179   RegisterSaver_LiveFloatReg(Z_F15),
 180   //
 181   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 182   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 183   RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
 184   RegisterSaver_LiveIntReg(Z_R3 ),
 185   RegisterSaver_LiveIntReg(Z_R4 ),
 186   RegisterSaver_LiveIntReg(Z_R5 ),
 187   RegisterSaver_LiveIntReg(Z_R6 ),
 188   RegisterSaver_LiveIntReg(Z_R7 ),
 189   RegisterSaver_LiveIntReg(Z_R8 ),
 190   RegisterSaver_LiveIntReg(Z_R9 ),
 191   RegisterSaver_LiveIntReg(Z_R10),
 192   RegisterSaver_LiveIntReg(Z_R11),
 193   RegisterSaver_LiveIntReg(Z_R12),
 194   RegisterSaver_LiveIntReg(Z_R13),
 195   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 196   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 197 };
 198 
 199 // Live argument registers which get spilled to the stack.
 200 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
 201   RegisterSaver_LiveFloatReg(Z_FARG1),
 202   RegisterSaver_LiveFloatReg(Z_FARG2),
 203   RegisterSaver_LiveFloatReg(Z_FARG3),
 204   RegisterSaver_LiveFloatReg(Z_FARG4),
 205   RegisterSaver_LiveIntReg(Z_ARG1),
 206   RegisterSaver_LiveIntReg(Z_ARG2),
 207   RegisterSaver_LiveIntReg(Z_ARG3),
 208   RegisterSaver_LiveIntReg(Z_ARG4),
 209   RegisterSaver_LiveIntReg(Z_ARG5)
 210 };
 211 
 212 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
 213   // Live registers which get spilled to the stack. Register positions
 214   // in this array correspond directly to the stack layout.
 215   //
 216   // live float registers:
 217   //
 218   RegisterSaver_LiveFloatReg(Z_F0 ),
 219   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 220   RegisterSaver_LiveFloatReg(Z_F2 ),
 221   RegisterSaver_LiveFloatReg(Z_F3 ),
 222   RegisterSaver_LiveFloatReg(Z_F4 ),
 223   RegisterSaver_LiveFloatReg(Z_F5 ),
 224   RegisterSaver_LiveFloatReg(Z_F6 ),
 225   RegisterSaver_LiveFloatReg(Z_F7 ),
 226   // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
 227   // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
 228   // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
 229   // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
 230   // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
 231   // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
 232   // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
 233   // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
 234   //
 235   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 236   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 237   RegisterSaver_LiveIntReg(Z_R2 ),
 238   RegisterSaver_LiveIntReg(Z_R3 ),
 239   RegisterSaver_LiveIntReg(Z_R4 ),
 240   RegisterSaver_LiveIntReg(Z_R5 ),
 241   // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
 242   // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
 243   // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
 244   // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
 245   // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
 246   // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
 247   // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
 248   // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
 249   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 250   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 251 };
 252 
 253 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
 254   int reg_space = -1;
 255   switch (reg_set) {
 256     case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
 257     case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
 258     case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
 259     case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
 260     case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
 261     default: ShouldNotReachHere();
 262   }
 263   return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
 264 }
 265 
 266 
 267 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
 268   return live_reg_save_size(reg_set) + frame::z_abi_160_size;
 269 }
 270 
 271 
 272 // return_pc: Specify the register that should be stored as the return pc in the current frame.
 273 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
 274   // Record volatile registers as callee-save values in an OopMap so
 275   // their save locations will be propagated to the caller frame's
 276   // RegisterMap during StackFrameStream construction (needed for
 277   // deoptimization; see compiledVFrame::create_stack_value).
 278 
 279   // Calculate frame size.
 280   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 281   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 282   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 283 
 284   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 285   OopMap* map = new OopMap(frame_size_in_slots, 0);
 286 
 287   int regstosave_num = 0;
 288   const RegisterSaver::LiveRegType* live_regs = NULL;
 289 
 290   switch (reg_set) {
 291     case all_registers:
 292       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 293       live_regs      = RegisterSaver_LiveRegs;
 294       break;
 295     case all_registers_except_r2:
 296       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 297       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 298       break;
 299     case all_integer_registers:
 300       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 301       live_regs      = RegisterSaver_LiveIntRegs;
 302       break;
 303     case all_volatile_registers:
 304       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 305       live_regs      = RegisterSaver_LiveVolatileRegs;
 306       break;
 307     case arg_registers:
 308       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 309       live_regs      = RegisterSaver_LiveArgRegs;
 310       break;
 311     default: ShouldNotReachHere();
 312   }
 313 
 314   // Save return pc in old frame.
 315   __ save_return_pc(return_pc);
 316 
 317   // Push a new frame (includes stack linkage).
 318   // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
 319   // illegally used to pass parameters by RangeCheckStub::emit_code().
 320   __ push_frame(frame_size_in_bytes, return_pc);
 321   // We have to restore return_pc right away.
 322   // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
 323   // Nobody else knows which register we saved.
 324   __ z_lg(return_pc, _z_abi16(return_pc) + frame_size_in_bytes, Z_SP);
 325 
 326   // Register save area in new frame starts above z_abi_160 area.
 327   int offset = register_save_offset;
 328 
 329   Register first = noreg;
 330   Register last  = noreg;
 331   int      first_offset = -1;
 332   bool     float_spilled = false;
 333 
 334   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 335     int reg_num  = live_regs[i].reg_num;
 336     int reg_type = live_regs[i].reg_type;
 337 
 338     switch (reg_type) {
 339       case RegisterSaver::int_reg: {
 340         Register reg = as_Register(reg_num);
 341         if (last != reg->predecessor()) {
 342           if (first != noreg) {
 343             __ z_stmg(first, last, first_offset, Z_SP);
 344           }
 345           first = reg;
 346           first_offset = offset;
 347           DEBUG_ONLY(float_spilled = false);
 348         }
 349         last = reg;
 350         assert(last != Z_R0, "r0 would require special treatment");
 351         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 352         break;
 353       }
 354 
 355       case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
 356         continue; // Continue with next loop iteration.
 357 
 358       case RegisterSaver::float_reg: {
 359         FloatRegister freg = as_FloatRegister(reg_num);
 360         __ z_std(freg, offset, Z_SP);
 361         DEBUG_ONLY(float_spilled = true);
 362         break;
 363       }
 364 
 365       default:
 366         ShouldNotReachHere();
 367         break;
 368     }
 369 
 370     // Second set_callee_saved is really a waste but we'll keep things as they were for now
 371     map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
 372     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
 373   }
 374   assert(first != noreg, "Should spill at least one int reg.");
 375   __ z_stmg(first, last, first_offset, Z_SP);
 376 
 377   // And we're done.
 378   return map;
 379 }
 380 
 381 
 382 // Generate the OopMap (again, regs where saved before).
 383 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
 384   // Calculate frame size.
 385   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 386   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 387   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 388 
 389   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 390   OopMap* map = new OopMap(frame_size_in_slots, 0);
 391 
 392   int regstosave_num = 0;
 393   const RegisterSaver::LiveRegType* live_regs = NULL;
 394 
 395   switch (reg_set) {
 396     case all_registers:
 397       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 398       live_regs      = RegisterSaver_LiveRegs;
 399       break;
 400     case all_registers_except_r2:
 401       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 402       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 403       break;
 404     case all_integer_registers:
 405       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 406       live_regs      = RegisterSaver_LiveIntRegs;
 407       break;
 408     case all_volatile_registers:
 409       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 410       live_regs      = RegisterSaver_LiveVolatileRegs;
 411       break;
 412     case arg_registers:
 413       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 414       live_regs      = RegisterSaver_LiveArgRegs;
 415       break;
 416     default: ShouldNotReachHere();
 417   }
 418 
 419   // Register save area in new frame starts above z_abi_160 area.
 420   int offset = register_save_offset;
 421   for (int i = 0; i < regstosave_num; i++) {
 422     if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
 423       map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
 424       map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
 425     }
 426     offset += reg_size;
 427   }
 428   return map;
 429 }
 430 
 431 
 432 // Pop the current frame and restore all the registers that we saved.
 433 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
 434   int offset;
 435   const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
 436 
 437   Register first = noreg;
 438   Register last = noreg;
 439   int      first_offset = -1;
 440   bool     float_spilled = false;
 441 
 442   int regstosave_num = 0;
 443   const RegisterSaver::LiveRegType* live_regs = NULL;
 444 
 445   switch (reg_set) {
 446     case all_registers:
 447       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
 448       live_regs      = RegisterSaver_LiveRegs;
 449       break;
 450     case all_registers_except_r2:
 451       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 452       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 453       break;
 454     case all_integer_registers:
 455       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 456       live_regs      = RegisterSaver_LiveIntRegs;
 457       break;
 458     case all_volatile_registers:
 459       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
 460       live_regs      = RegisterSaver_LiveVolatileRegs;
 461       break;
 462     case arg_registers:
 463       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 464       live_regs      = RegisterSaver_LiveArgRegs;
 465       break;
 466     default: ShouldNotReachHere();
 467   }
 468 
 469   // Restore all registers (ints and floats).
 470 
 471   // Register save area in new frame starts above z_abi_160 area.
 472   offset = register_save_offset;
 473 
 474   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 475     int reg_num  = live_regs[i].reg_num;
 476     int reg_type = live_regs[i].reg_type;
 477 
 478     switch (reg_type) {
 479       case RegisterSaver::excluded_reg:
 480         continue; // Continue with next loop iteration.
 481 
 482       case RegisterSaver::int_reg: {
 483         Register reg = as_Register(reg_num);
 484         if (last != reg->predecessor()) {
 485           if (first != noreg) {
 486             __ z_lmg(first, last, first_offset, Z_SP);
 487           }
 488           first = reg;
 489           first_offset = offset;
 490           DEBUG_ONLY(float_spilled = false);
 491         }
 492         last = reg;
 493         assert(last != Z_R0, "r0 would require special treatment");
 494         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 495         break;
 496       }
 497 
 498       case RegisterSaver::float_reg: {
 499         FloatRegister freg = as_FloatRegister(reg_num);
 500         __ z_ld(freg, offset, Z_SP);
 501         DEBUG_ONLY(float_spilled = true);
 502         break;
 503       }
 504 
 505       default:
 506         ShouldNotReachHere();
 507     }
 508   }
 509   assert(first != noreg, "Should spill at least one int reg.");
 510   __ z_lmg(first, last, first_offset, Z_SP);
 511 
 512   // Pop the frame.
 513   __ pop_frame();
 514 
 515   // Restore the flags.
 516   __ restore_return_pc();
 517 }
 518 
 519 
 520 // Pop the current frame and restore the registers that might be holding a result.
 521 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 522   int i;
 523   int offset;
 524   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 525                                    sizeof(RegisterSaver::LiveRegType);
 526   const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
 527 
 528   // Restore all result registers (ints and floats).
 529   offset = register_save_offset;
 530   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 531     int reg_num = RegisterSaver_LiveRegs[i].reg_num;
 532     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 533     switch (reg_type) {
 534       case RegisterSaver::excluded_reg:
 535         continue; // Continue with next loop iteration.
 536       case RegisterSaver::int_reg: {
 537         if (as_Register(reg_num) == Z_RET) { // int result_reg
 538           __ z_lg(as_Register(reg_num), offset, Z_SP);
 539         }
 540         break;
 541       }
 542       case RegisterSaver::float_reg: {
 543         if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
 544           __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
 545         }
 546         break;
 547       }
 548       default:
 549         ShouldNotReachHere();
 550     }
 551   }
 552 }
 553 
 554 size_t SharedRuntime::trampoline_size() {
 555   return MacroAssembler::load_const_size() + 2;
 556 }
 557 
 558 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 559   // Think about using pc-relative branch.
 560   __ load_const(Z_R1_scratch, destination);
 561   __ z_br(Z_R1_scratch);
 562 }
 563 
 564 // ---------------------------------------------------------------------------
 565 void SharedRuntime::save_native_result(MacroAssembler * masm,
 566                                        BasicType ret_type,
 567                                        int frame_slots) {
 568   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 569 
 570   switch (ret_type) {
 571     case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
 572     case T_BYTE:
 573     case T_CHAR:
 574     case T_SHORT:
 575     case T_INT:
 576       __ reg2mem_opt(Z_RET, memaddr, false);
 577       break;
 578     case T_OBJECT:   // Save pointer types as long.
 579     case T_ARRAY:
 580     case T_ADDRESS:
 581     case T_VOID:
 582     case T_LONG:
 583       __ reg2mem_opt(Z_RET, memaddr);
 584       break;
 585     case T_FLOAT:
 586       __ freg2mem_opt(Z_FRET, memaddr, false);
 587       break;
 588     case T_DOUBLE:
 589       __ freg2mem_opt(Z_FRET, memaddr);
 590       break;
 591     default:
 592       ShouldNotReachHere();
 593       break;
 594   }
 595 }
 596 
 597 void SharedRuntime::restore_native_result(MacroAssembler *masm,
 598                                           BasicType       ret_type,
 599                                           int             frame_slots) {
 600   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 601 
 602   switch (ret_type) {
 603     case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
 604     case T_BYTE:
 605     case T_CHAR:
 606     case T_SHORT:
 607     case T_INT:
 608       __ mem2reg_opt(Z_RET, memaddr, false);
 609       break;
 610     case T_OBJECT:   // Restore pointer types as long.
 611     case T_ARRAY:
 612     case T_ADDRESS:
 613     case T_VOID:
 614     case T_LONG:
 615       __ mem2reg_opt(Z_RET, memaddr);
 616       break;
 617     case T_FLOAT:
 618       __ mem2freg_opt(Z_FRET, memaddr, false);
 619       break;
 620     case T_DOUBLE:
 621       __ mem2freg_opt(Z_FRET, memaddr);
 622       break;
 623     default:
 624       ShouldNotReachHere();
 625       break;
 626   }
 627 }
 628 
 629 // ---------------------------------------------------------------------------
 630 // Read the array of BasicTypes from a signature, and compute where the
 631 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
 632 // quantities. Values less than VMRegImpl::stack0 are registers, those above
 633 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
 634 // as framesizes are fixed.
 635 // VMRegImpl::stack0 refers to the first slot 0(sp).
 636 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
 637 // up to RegisterImpl::number_of_registers are the 64-bit integer registers.
 638 
 639 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 640 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
 641 // units regardless of build.
 642 
 643 // The Java calling convention is a "shifted" version of the C ABI.
 644 // By skipping the first C ABI register we can call non-static jni methods
 645 // with small numbers of arguments without having to shuffle the arguments
 646 // at all. Since we control the java ABI we ought to at least get some
 647 // advantage out of it.
 648 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 649                                            VMRegPair *regs,
 650                                            int total_args_passed,
 651                                            int is_outgoing) {
 652   // c2c calling conventions for compiled-compiled calls.
 653 
 654   // An int/float occupies 1 slot here.
 655   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
 656   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 657 
 658   const VMReg z_iarg_reg[5] = {
 659     Z_R2->as_VMReg(),
 660     Z_R3->as_VMReg(),
 661     Z_R4->as_VMReg(),
 662     Z_R5->as_VMReg(),
 663     Z_R6->as_VMReg()
 664   };
 665   const VMReg z_farg_reg[4] = {
 666     Z_F0->as_VMReg(),
 667     Z_F2->as_VMReg(),
 668     Z_F4->as_VMReg(),
 669     Z_F6->as_VMReg()
 670   };
 671   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 672   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 673 
 674   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 675   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 676 
 677   int i;
 678   int stk = 0;
 679   int ireg = 0;
 680   int freg = 0;
 681 
 682   for (int i = 0; i < total_args_passed; ++i) {
 683     switch (sig_bt[i]) {
 684       case T_BOOLEAN:
 685       case T_CHAR:
 686       case T_BYTE:
 687       case T_SHORT:
 688       case T_INT:
 689         if (ireg < z_num_iarg_registers) {
 690           // Put int/ptr in register.
 691           regs[i].set1(z_iarg_reg[ireg]);
 692           ++ireg;
 693         } else {
 694           // Put int/ptr on stack.
 695           regs[i].set1(VMRegImpl::stack2reg(stk));
 696           stk += inc_stk_for_intfloat;
 697         }
 698         break;
 699       case T_LONG:
 700         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 701         if (ireg < z_num_iarg_registers) {
 702           // Put long in register.
 703           regs[i].set2(z_iarg_reg[ireg]);
 704           ++ireg;
 705         } else {
 706           // Put long on stack and align to 2 slots.
 707           if (stk & 0x1) { ++stk; }
 708           regs[i].set2(VMRegImpl::stack2reg(stk));
 709           stk += inc_stk_for_longdouble;
 710         }
 711         break;
 712       case T_OBJECT:
 713       case T_ARRAY:
 714       case T_ADDRESS:
 715         if (ireg < z_num_iarg_registers) {
 716           // Put ptr in register.
 717           regs[i].set2(z_iarg_reg[ireg]);
 718           ++ireg;
 719         } else {
 720           // Put ptr on stack and align to 2 slots, because
 721           // "64-bit pointers record oop-ishness on 2 aligned adjacent
 722           // registers." (see OopFlow::build_oop_map).
 723           if (stk & 0x1) { ++stk; }
 724           regs[i].set2(VMRegImpl::stack2reg(stk));
 725           stk += inc_stk_for_longdouble;
 726         }
 727         break;
 728       case T_FLOAT:
 729         if (freg < z_num_farg_registers) {
 730           // Put float in register.
 731           regs[i].set1(z_farg_reg[freg]);
 732           ++freg;
 733         } else {
 734           // Put float on stack.
 735           regs[i].set1(VMRegImpl::stack2reg(stk));
 736           stk += inc_stk_for_intfloat;
 737         }
 738         break;
 739       case T_DOUBLE:
 740         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 741         if (freg < z_num_farg_registers) {
 742           // Put double in register.
 743           regs[i].set2(z_farg_reg[freg]);
 744           ++freg;
 745         } else {
 746           // Put double on stack and align to 2 slots.
 747           if (stk & 0x1) { ++stk; }
 748           regs[i].set2(VMRegImpl::stack2reg(stk));
 749           stk += inc_stk_for_longdouble;
 750         }
 751         break;
 752       case T_VOID:
 753         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 754         // Do not count halves.
 755         regs[i].set_bad();
 756         break;
 757       default:
 758         ShouldNotReachHere();
 759     }
 760   }
 761   return align_up(stk, 2);
 762 }
 763 
 764 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 765                                         VMRegPair *regs,
 766                                         VMRegPair *regs2,
 767                                         int total_args_passed) {
 768   assert(regs2 == NULL, "second VMRegPair array not used on this platform");
 769 
 770   // Calling conventions for C runtime calls and calls to JNI native methods.
 771   const VMReg z_iarg_reg[5] = {
 772     Z_R2->as_VMReg(),
 773     Z_R3->as_VMReg(),
 774     Z_R4->as_VMReg(),
 775     Z_R5->as_VMReg(),
 776     Z_R6->as_VMReg()
 777   };
 778   const VMReg z_farg_reg[4] = {
 779     Z_F0->as_VMReg(),
 780     Z_F2->as_VMReg(),
 781     Z_F4->as_VMReg(),
 782     Z_F6->as_VMReg()
 783   };
 784   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 785   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 786 
 787   // Check calling conventions consistency.
 788   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 789   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 790 
 791   // Avoid passing C arguments in the wrong stack slots.
 792 
 793   // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
 794   // 2 such slots, like 64 bit values do.
 795   const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
 796   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 797 
 798   int i;
 799   // Leave room for C-compatible ABI
 800   int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
 801   int freg = 0;
 802   int ireg = 0;
 803 
 804   // We put the first 5 arguments into registers and the rest on the
 805   // stack. Float arguments are already in their argument registers
 806   // due to c2c calling conventions (see calling_convention).
 807   for (int i = 0; i < total_args_passed; ++i) {
 808     switch (sig_bt[i]) {
 809       case T_BOOLEAN:
 810       case T_CHAR:
 811       case T_BYTE:
 812       case T_SHORT:
 813       case T_INT:
 814         // Fall through, handle as long.
 815       case T_LONG:
 816       case T_OBJECT:
 817       case T_ARRAY:
 818       case T_ADDRESS:
 819       case T_METADATA:
 820         // Oops are already boxed if required (JNI).
 821         if (ireg < z_num_iarg_registers) {
 822           regs[i].set2(z_iarg_reg[ireg]);
 823           ++ireg;
 824         } else {
 825           regs[i].set2(VMRegImpl::stack2reg(stk));
 826           stk += inc_stk_for_longdouble;
 827         }
 828         break;
 829       case T_FLOAT:
 830         if (freg < z_num_farg_registers) {
 831           regs[i].set1(z_farg_reg[freg]);
 832           ++freg;
 833         } else {
 834           regs[i].set1(VMRegImpl::stack2reg(stk+1));
 835           stk +=  inc_stk_for_intfloat;
 836         }
 837         break;
 838       case T_DOUBLE:
 839         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 840         if (freg < z_num_farg_registers) {
 841           regs[i].set2(z_farg_reg[freg]);
 842           ++freg;
 843         } else {
 844           // Put double on stack.
 845           regs[i].set2(VMRegImpl::stack2reg(stk));
 846           stk += inc_stk_for_longdouble;
 847         }
 848         break;
 849       case T_VOID:
 850         // Do not count halves.
 851         regs[i].set_bad();
 852         break;
 853       default:
 854         ShouldNotReachHere();
 855     }
 856   }
 857   return align_up(stk, 2);
 858 }
 859 
 860 ////////////////////////////////////////////////////////////////////////
 861 //
 862 //  Argument shufflers
 863 //
 864 ////////////////////////////////////////////////////////////////////////
 865 
 866 //----------------------------------------------------------------------
 867 // The java_calling_convention describes stack locations as ideal slots on
 868 // a frame with no abi restrictions. Since we must observe abi restrictions
 869 // (like the placement of the register window) the slots must be biased by
 870 // the following value.
 871 //----------------------------------------------------------------------
 872 static int reg2slot(VMReg r) {
 873   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 874 }
 875 
 876 static int reg2offset(VMReg r) {
 877   return reg2slot(r) * VMRegImpl::stack_slot_size;
 878 }
 879 
 880 static void verify_oop_args(MacroAssembler *masm,
 881                             int total_args_passed,
 882                             const BasicType *sig_bt,
 883                             const VMRegPair *regs) {
 884   if (!VerifyOops) { return; }
 885 
 886   for (int i = 0; i < total_args_passed; i++) {
 887     if (is_reference_type(sig_bt[i])) {
 888       VMReg r = regs[i].first();
 889       assert(r->is_valid(), "bad oop arg");
 890 
 891       if (r->is_stack()) {
 892         __ z_lg(Z_R0_scratch,
 893                 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
 894         __ verify_oop(Z_R0_scratch);
 895       } else {
 896         __ verify_oop(r->as_Register());
 897       }
 898     }
 899   }
 900 }
 901 
 902 static void gen_special_dispatch(MacroAssembler *masm,
 903                                  int total_args_passed,
 904                                  vmIntrinsics::ID special_dispatch,
 905                                  const BasicType *sig_bt,
 906                                  const VMRegPair *regs) {
 907   verify_oop_args(masm, total_args_passed, sig_bt, regs);
 908 
 909   // Now write the args into the outgoing interpreter space.
 910   bool     has_receiver   = false;
 911   Register receiver_reg   = noreg;
 912   int      member_arg_pos = -1;
 913   Register member_reg     = noreg;
 914   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
 915 
 916   if (ref_kind != 0) {
 917     member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
 918     member_reg = Z_R9;                       // Known to be free at this point.
 919     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 920   } else {
 921     guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch);
 922     has_receiver = true;
 923   }
 924 
 925   if (member_reg != noreg) {
 926     // Load the member_arg into register, if necessary.
 927     assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
 928     assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
 929 
 930     VMReg r = regs[member_arg_pos].first();
 931     assert(r->is_valid(), "bad member arg");
 932 
 933     if (r->is_stack()) {
 934       __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
 935     } else {
 936       // No data motion is needed.
 937       member_reg = r->as_Register();
 938     }
 939   }
 940 
 941   if (has_receiver) {
 942     // Make sure the receiver is loaded into a register.
 943     assert(total_args_passed > 0, "oob");
 944     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 945 
 946     VMReg r = regs[0].first();
 947     assert(r->is_valid(), "bad receiver arg");
 948 
 949     if (r->is_stack()) {
 950       // Porting note: This assumes that compiled calling conventions always
 951       // pass the receiver oop in a register. If this is not true on some
 952       // platform, pick a temp and load the receiver from stack.
 953       assert(false, "receiver always in a register");
 954       receiver_reg = Z_R13;  // Known to be free at this point.
 955       __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
 956     } else {
 957       // No data motion is needed.
 958       receiver_reg = r->as_Register();
 959     }
 960   }
 961 
 962   // Figure out which address we are really jumping to:
 963   MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
 964                                                  receiver_reg, member_reg,
 965                                                  /*for_compiler_entry:*/ true);
 966 }
 967 
 968 ////////////////////////////////////////////////////////////////////////
 969 //
 970 //  Argument shufflers
 971 //
 972 ////////////////////////////////////////////////////////////////////////
 973 
 974 // Is the size of a vector size (in bytes) bigger than a size saved by default?
 975 // 8 bytes registers are saved by default on z/Architecture.
 976 bool SharedRuntime::is_wide_vector(int size) {
 977   // Note, MaxVectorSize == 8 on this platform.
 978   assert(size <= 8, "%d bytes vectors are not supported", size);
 979   return size > 8;
 980 }
 981 
 982 //----------------------------------------------------------------------
 983 // An oop arg. Must pass a handle not the oop itself
 984 //----------------------------------------------------------------------
 985 static void object_move(MacroAssembler *masm,
 986                         OopMap *map,
 987                         int oop_handle_offset,
 988                         int framesize_in_slots,
 989                         VMRegPair src,
 990                         VMRegPair dst,
 991                         bool is_receiver,
 992                         int *receiver_offset) {
 993   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
 994 
 995   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
 996 
 997   // Must pass a handle. First figure out the location we use as a handle.
 998 
 999   if (src.first()->is_stack()) {
1000     // Oop is already on the stack, put handle on stack or in register
1001     // If handle will be on the stack, use temp reg to calculate it.
1002     Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1003     Label    skip;
1004     int      slot_in_older_frame = reg2slot(src.first());
1005 
1006     guarantee(!is_receiver, "expecting receiver in register");
1007     map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
1008 
1009     __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1010     __ load_and_test_long(Z_R0, Address(rHandle));
1011     __ z_brne(skip);
1012     // Use a NULL handle if oop is NULL.
1013     __ clear_reg(rHandle, true, false);
1014     __ bind(skip);
1015 
1016     // Copy handle to the right place (register or stack).
1017     if (dst.first()->is_stack()) {
1018       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1019     } // else
1020       // nothing to do. rHandle uses the correct register
1021   } else {
1022     // Oop is passed in an input register. We must flush it to the stack.
1023     const Register rOop = src.first()->as_Register();
1024     const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1025     int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1026     int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1027     NearLabel skip;
1028 
1029     if (is_receiver) {
1030       *receiver_offset = oop_slot_offset;
1031     }
1032     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1033 
1034     // Flush Oop to stack, calculate handle.
1035     __ z_stg(rOop, oop_slot_offset, Z_SP);
1036     __ add2reg(rHandle, oop_slot_offset, Z_SP);
1037 
1038     // If Oop == NULL, use a NULL handle.
1039     __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1040     __ clear_reg(rHandle, true, false);
1041     __ bind(skip);
1042 
1043     // Copy handle to the right place (register or stack).
1044     if (dst.first()->is_stack()) {
1045       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1046     } // else
1047       // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1048   }
1049 }
1050 
1051 //----------------------------------------------------------------------
1052 // A float arg. May have to do float reg to int reg conversion
1053 //----------------------------------------------------------------------
1054 static void float_move(MacroAssembler *masm,
1055                        VMRegPair src,
1056                        VMRegPair dst,
1057                        int framesize_in_slots,
1058                        int workspace_slot_offset) {
1059   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1060   int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1061 
1062   // We do not accept an argument in a VMRegPair to be spread over two slots,
1063   // no matter what physical location (reg or stack) the slots may have.
1064   // We just check for the unaccepted slot to be invalid.
1065   assert(!src.second()->is_valid(), "float in arg spread over two slots");
1066   assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1067 
1068   if (src.first()->is_stack()) {
1069     if (dst.first()->is_stack()) {
1070       // stack -> stack. The easiest of the bunch.
1071       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1072                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1073     } else {
1074       // stack to reg
1075       Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1076       if (dst.first()->is_Register()) {
1077         __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1078       } else {
1079         __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1080       }
1081     }
1082   } else if (src.first()->is_Register()) {
1083     if (dst.first()->is_stack()) {
1084       // gpr -> stack
1085       __ reg2mem_opt(src.first()->as_Register(),
1086                      Address(Z_SP, reg2offset(dst.first()), false ));
1087     } else {
1088       if (dst.first()->is_Register()) {
1089         // gpr -> gpr
1090         __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1091                               src.first()->as_Register(), T_INT);
1092       } else {
1093         if (VM_Version::has_FPSupportEnhancements()) {
1094           // gpr -> fpr. Exploit z10 capability of direct transfer.
1095           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1096         } else {
1097           // gpr -> fpr. Use work space on stack to transfer data.
1098           Address   stackaddr(Z_SP, workspace_offset);
1099 
1100           __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1101           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1102         }
1103       }
1104     }
1105   } else {
1106     if (dst.first()->is_stack()) {
1107       // fpr -> stack
1108       __ freg2mem_opt(src.first()->as_FloatRegister(),
1109                       Address(Z_SP, reg2offset(dst.first())), false);
1110     } else {
1111       if (dst.first()->is_Register()) {
1112         if (VM_Version::has_FPSupportEnhancements()) {
1113           // fpr -> gpr.
1114           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1115         } else {
1116           // fpr -> gpr. Use work space on stack to transfer data.
1117           Address   stackaddr(Z_SP, workspace_offset);
1118 
1119           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1120           __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1121         }
1122       } else {
1123         // fpr -> fpr
1124         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1125                                src.first()->as_FloatRegister(), T_FLOAT);
1126       }
1127     }
1128   }
1129 }
1130 
1131 //----------------------------------------------------------------------
1132 // A double arg. May have to do double reg to long reg conversion
1133 //----------------------------------------------------------------------
1134 static void double_move(MacroAssembler *masm,
1135                         VMRegPair src,
1136                         VMRegPair dst,
1137                         int framesize_in_slots,
1138                         int workspace_slot_offset) {
1139   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1140   int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1141 
1142   // Since src is always a java calling convention we know that the
1143   // src pair is always either all registers or all stack (and aligned?)
1144 
1145   if (src.first()->is_stack()) {
1146     if (dst.first()->is_stack()) {
1147       // stack -> stack. The easiest of the bunch.
1148       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1149                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1150     } else {
1151       // stack to reg
1152       Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1153 
1154       if (dst.first()->is_Register()) {
1155         __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1156       } else {
1157         __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1158       }
1159     }
1160   } else if (src.first()->is_Register()) {
1161     if (dst.first()->is_stack()) {
1162       // gpr -> stack
1163       __ reg2mem_opt(src.first()->as_Register(),
1164                      Address(Z_SP, reg2offset(dst.first())));
1165     } else {
1166       if (dst.first()->is_Register()) {
1167         // gpr -> gpr
1168         __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1169                               src.first()->as_Register(), T_LONG);
1170       } else {
1171         if (VM_Version::has_FPSupportEnhancements()) {
1172           // gpr -> fpr. Exploit z10 capability of direct transfer.
1173           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1174         } else {
1175           // gpr -> fpr. Use work space on stack to transfer data.
1176           Address stackaddr(Z_SP, workspace_offset);
1177           __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1178           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1179         }
1180       }
1181     }
1182   } else {
1183     if (dst.first()->is_stack()) {
1184       // fpr -> stack
1185       __ freg2mem_opt(src.first()->as_FloatRegister(),
1186                       Address(Z_SP, reg2offset(dst.first())));
1187     } else {
1188       if (dst.first()->is_Register()) {
1189         if (VM_Version::has_FPSupportEnhancements()) {
1190           // fpr -> gpr. Exploit z10 capability of direct transfer.
1191           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1192         } else {
1193           // fpr -> gpr. Use work space on stack to transfer data.
1194           Address stackaddr(Z_SP, workspace_offset);
1195 
1196           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1197           __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1198         }
1199       } else {
1200         // fpr -> fpr
1201         // In theory these overlap but the ordering is such that this is likely a nop.
1202         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1203                                src.first()->as_FloatRegister(), T_DOUBLE);
1204       }
1205     }
1206   }
1207 }
1208 
1209 //----------------------------------------------------------------------
1210 // A long arg.
1211 //----------------------------------------------------------------------
1212 static void long_move(MacroAssembler *masm,
1213                       VMRegPair src,
1214                       VMRegPair dst,
1215                       int framesize_in_slots) {
1216   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1217 
1218   if (src.first()->is_stack()) {
1219     if (dst.first()->is_stack()) {
1220       // stack -> stack. The easiest of the bunch.
1221       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1222                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1223     } else {
1224       // stack to reg
1225       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1226       __ mem2reg_opt(dst.first()->as_Register(),
1227                       Address(Z_SP, reg2offset(src.first()) + frame_offset));
1228     }
1229   } else {
1230     // reg to reg
1231     assert(src.first()->is_Register(), "long src value must be in GPR");
1232     if (dst.first()->is_stack()) {
1233       // reg -> stack
1234       __ reg2mem_opt(src.first()->as_Register(),
1235                      Address(Z_SP, reg2offset(dst.first())));
1236     } else {
1237       // reg -> reg
1238       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1239       __ move_reg_if_needed(dst.first()->as_Register(),
1240                             T_LONG, src.first()->as_Register(), T_LONG);
1241     }
1242   }
1243 }
1244 
1245 
1246 //----------------------------------------------------------------------
1247 // A int-like arg.
1248 //----------------------------------------------------------------------
1249 // On z/Architecture we will store integer like items to the stack as 64 bit
1250 // items, according to the z/Architecture ABI, even though Java would only store
1251 // 32 bits for a parameter.
1252 // We do sign extension for all base types. That is ok since the only
1253 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1254 // Sign extension 32->64 bit will thus not affect the value.
1255 //----------------------------------------------------------------------
1256 static void move32_64(MacroAssembler *masm,
1257                       VMRegPair src,
1258                       VMRegPair dst,
1259                       int framesize_in_slots) {
1260   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1261 
1262   if (src.first()->is_stack()) {
1263     Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1264     if (dst.first()->is_stack()) {
1265       // stack -> stack. MVC not posible due to sign extension.
1266       Address firstaddr(Z_SP, reg2offset(dst.first()));
1267       __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1268       __ reg2mem_opt(Z_R0_scratch, firstaddr);
1269     } else {
1270       // stack -> reg, sign extended
1271       __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1272     }
1273   } else {
1274     if (dst.first()->is_stack()) {
1275       // reg -> stack, sign extended
1276       Address firstaddr(Z_SP, reg2offset(dst.first()));
1277       __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1278       __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1279     } else {
1280       // reg -> reg, sign extended
1281       __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1282     }
1283   }
1284 }
1285 
1286 static void save_or_restore_arguments(MacroAssembler *masm,
1287                                       const int stack_slots,
1288                                       const int total_in_args,
1289                                       const int arg_save_area,
1290                                       OopMap *map,
1291                                       VMRegPair *in_regs,
1292                                       BasicType *in_sig_bt) {
1293 
1294   // If map is non-NULL then the code should store the values,
1295   // otherwise it should load them.
1296   int slot = arg_save_area;
1297   // Handle double words first.
1298   for (int i = 0; i < total_in_args; i++) {
1299     if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
1300       int offset = slot * VMRegImpl::stack_slot_size;
1301       slot += VMRegImpl::slots_per_word;
1302       assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
1303       const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1304       Address   stackaddr(Z_SP, offset);
1305       if (map != NULL) {
1306         __ freg2mem_opt(freg, stackaddr);
1307       } else {
1308         __ mem2freg_opt(freg, stackaddr);
1309       }
1310     } else if (in_regs[i].first()->is_Register() &&
1311                (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
1312       int offset = slot * VMRegImpl::stack_slot_size;
1313       const Register   reg = in_regs[i].first()->as_Register();
1314       if (map != NULL) {
1315         __ z_stg(reg, offset, Z_SP);
1316         if (in_sig_bt[i] == T_ARRAY) {
1317           map->set_oop(VMRegImpl::stack2reg(slot));
1318         }
1319       } else {
1320         __ z_lg(reg, offset, Z_SP);
1321       }
1322       slot += VMRegImpl::slots_per_word;
1323       assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
1324     }
1325   }
1326 
1327   // Save or restore single word registers.
1328   for (int i = 0; i < total_in_args; i++) {
1329     if (in_regs[i].first()->is_Register()) {
1330       int offset = slot * VMRegImpl::stack_slot_size;
1331       // Value lives in an input register. Save it on stack.
1332       switch (in_sig_bt[i]) {
1333         case T_BOOLEAN:
1334         case T_CHAR:
1335         case T_BYTE:
1336         case T_SHORT:
1337         case T_INT: {
1338           const Register   reg = in_regs[i].first()->as_Register();
1339           Address   stackaddr(Z_SP, offset);
1340           if (map != NULL) {
1341             __ z_st(reg, stackaddr);
1342           } else {
1343             __ z_lgf(reg, stackaddr);
1344           }
1345           slot++;
1346           assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)");
1347           break;
1348         }
1349         case T_ARRAY:
1350         case T_LONG:
1351           // handled above
1352           break;
1353         case T_OBJECT:
1354         default: ShouldNotReachHere();
1355       }
1356     } else if (in_regs[i].first()->is_FloatRegister()) {
1357       if (in_sig_bt[i] == T_FLOAT) {
1358         int offset = slot * VMRegImpl::stack_slot_size;
1359         slot++;
1360         assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
1361         const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1362         Address   stackaddr(Z_SP, offset);
1363         if (map != NULL) {
1364           __ freg2mem_opt(freg, stackaddr, false);
1365         } else {
1366           __ mem2freg_opt(freg, stackaddr, false);
1367         }
1368       }
1369     } else if (in_regs[i].first()->is_stack() &&
1370                in_sig_bt[i] == T_ARRAY && map != NULL) {
1371       int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1372       map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1373     }
1374   }
1375 }
1376 
1377 // Check GCLocker::needs_gc and enter the runtime if it's true. This
1378 // keeps a new JNI critical region from starting until a GC has been
1379 // forced. Save down any oops in registers and describe them in an OopMap.
1380 static void check_needs_gc_for_critical_native(MacroAssembler   *masm,
1381                                                 const int stack_slots,
1382                                                 const int total_in_args,
1383                                                 const int arg_save_area,
1384                                                 OopMapSet *oop_maps,
1385                                                 VMRegPair *in_regs,
1386                                                 BasicType *in_sig_bt) {
1387   __ block_comment("check GCLocker::needs_gc");
1388   Label cont;
1389 
1390   // Check GCLocker::_needs_gc flag.
1391   __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
1392   __ z_cli(0, Z_R1_scratch, 0);
1393   __ z_bre(cont);
1394 
1395   // Save down any values that are live in registers and call into the
1396   // runtime to halt for a GC.
1397   OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1398 
1399   save_or_restore_arguments(masm, stack_slots, total_in_args,
1400                             arg_save_area, map, in_regs, in_sig_bt);
1401   address the_pc = __ pc();
1402   __ set_last_Java_frame(Z_SP, noreg);
1403 
1404   __ block_comment("block_for_jni_critical");
1405   __ z_lgr(Z_ARG1, Z_thread);
1406 
1407   address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
1408   __ call_c(entry_point);
1409   oop_maps->add_gc_map(__ offset(), map);
1410 
1411   __ reset_last_Java_frame();
1412 
1413   // Reload all the register arguments.
1414   save_or_restore_arguments(masm, stack_slots, total_in_args,
1415                             arg_save_area, NULL, in_regs, in_sig_bt);
1416 
1417   __ bind(cont);
1418 
1419   if (StressCriticalJNINatives) {
1420     // Stress register saving
1421     OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1422     save_or_restore_arguments(masm, stack_slots, total_in_args,
1423                               arg_save_area, map, in_regs, in_sig_bt);
1424 
1425     // Destroy argument registers.
1426     for (int i = 0; i < total_in_args; i++) {
1427       if (in_regs[i].first()->is_Register()) {
1428         // Don't set CC.
1429         __ clear_reg(in_regs[i].first()->as_Register(), true, false);
1430       } else {
1431         if (in_regs[i].first()->is_FloatRegister()) {
1432           FloatRegister fr = in_regs[i].first()->as_FloatRegister();
1433           __ z_lcdbr(fr, fr);
1434         }
1435       }
1436     }
1437 
1438     save_or_restore_arguments(masm, stack_slots, total_in_args,
1439                               arg_save_area, NULL, in_regs, in_sig_bt);
1440   }
1441 }
1442 
1443 static void move_ptr(MacroAssembler *masm,
1444                      VMRegPair src,
1445                      VMRegPair dst,
1446                      int framesize_in_slots) {
1447   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1448 
1449   if (src.first()->is_stack()) {
1450     if (dst.first()->is_stack()) {
1451       // stack to stack
1452       __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset));
1453       __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first())));
1454     } else {
1455       // stack to reg
1456       __ mem2reg_opt(dst.first()->as_Register(),
1457                      Address(Z_SP, reg2offset(src.first()) + frame_offset));
1458     }
1459   } else {
1460     if (dst.first()->is_stack()) {
1461       // reg to stack
1462     __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first())));
1463     } else {
1464     __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register());
1465     }
1466   }
1467 }
1468 
1469 // Unpack an array argument into a pointer to the body and the length
1470 // if the array is non-null, otherwise pass 0 for both.
1471 static void unpack_array_argument(MacroAssembler *masm,
1472                                    VMRegPair reg,
1473                                    BasicType in_elem_type,
1474                                    VMRegPair body_arg,
1475                                    VMRegPair length_arg,
1476                                    int framesize_in_slots) {
1477   Register tmp_reg = Z_tmp_2;
1478   Register tmp2_reg = Z_tmp_1;
1479 
1480   assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1481          "possible collision");
1482   assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1483          "possible collision");
1484 
1485   // Pass the length, ptr pair.
1486   NearLabel set_out_args;
1487   VMRegPair tmp, tmp2;
1488 
1489   tmp.set_ptr(tmp_reg->as_VMReg());
1490   tmp2.set_ptr(tmp2_reg->as_VMReg());
1491   if (reg.first()->is_stack()) {
1492     // Load the arg up from the stack.
1493     move_ptr(masm, reg, tmp, framesize_in_slots);
1494     reg = tmp;
1495   }
1496 
1497   const Register first = reg.first()->as_Register();
1498 
1499   // Don't set CC, indicate unused result.
1500   (void) __ clear_reg(tmp2_reg, true, false);
1501   if (tmp_reg != first) {
1502     __ clear_reg(tmp_reg, true, false);  // Don't set CC.
1503   }
1504   __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args);
1505   __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes()));
1506   __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first);
1507 
1508   __ bind(set_out_args);
1509   move_ptr(masm, tmp, body_arg, framesize_in_slots);
1510   move32_64(masm, tmp2, length_arg, framesize_in_slots);
1511 }
1512 
1513 //----------------------------------------------------------------------
1514 // Wrap a JNI call.
1515 //----------------------------------------------------------------------
1516 #undef USE_RESIZE_FRAME
1517 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1518                                                 const methodHandle& method,
1519                                                 int compile_id,
1520                                                 BasicType *in_sig_bt,
1521                                                 VMRegPair *in_regs,
1522                                                 BasicType ret_type,
1523                                                 address critical_entry) {
1524 #ifdef COMPILER2
1525   int total_in_args = method->size_of_parameters();
1526   if (method->is_method_handle_intrinsic()) {
1527     vmIntrinsics::ID iid = method->intrinsic_id();
1528     intptr_t start = (intptr_t) __ pc();
1529     int vep_offset = ((intptr_t) __ pc()) - start;
1530 
1531     gen_special_dispatch(masm, total_in_args,
1532                          method->intrinsic_id(), in_sig_bt, in_regs);
1533 
1534     int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1535 
1536     __ flush();
1537 
1538     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1539 
1540     return nmethod::new_native_nmethod(method,
1541                                        compile_id,
1542                                        masm->code(),
1543                                        vep_offset,
1544                                        frame_complete,
1545                                        stack_slots / VMRegImpl::slots_per_word,
1546                                        in_ByteSize(-1),
1547                                        in_ByteSize(-1),
1548                                        (OopMapSet *) NULL);
1549   }
1550 
1551 
1552   ///////////////////////////////////////////////////////////////////////
1553   //
1554   //  Precalculations before generating any code
1555   //
1556   ///////////////////////////////////////////////////////////////////////
1557 
1558   bool is_critical_native = true;
1559   address native_func = critical_entry;
1560   if (native_func == NULL) {
1561     native_func = method->native_function();
1562     is_critical_native = false;
1563   }
1564   assert(native_func != NULL, "must have function");
1565 
1566   //---------------------------------------------------------------------
1567   // We have received a description of where all the java args are located
1568   // on entry to the wrapper. We need to convert these args to where
1569   // the jni function will expect them. To figure out where they go
1570   // we convert the java signature to a C signature by inserting
1571   // the hidden arguments as arg[0] and possibly arg[1] (static method).
1572   //
1573   // The first hidden argument arg[0] is a pointer to the JNI environment.
1574   // It is generated for every call.
1575   // The second argument arg[1] to the JNI call, which is hidden for static
1576   // methods, is the boxed lock object. For static calls, the lock object
1577   // is the static method itself. The oop is constructed here. for instance
1578   // calls, the lock is performed on the object itself, the pointer of
1579   // which is passed as the first visible argument.
1580   //---------------------------------------------------------------------
1581 
1582   // Additionally, on z/Architecture we must convert integers
1583   // to longs in the C signature. We do this in advance in order to have
1584   // no trouble with indexes into the bt-arrays.
1585   // So convert the signature and registers now, and adjust the total number
1586   // of in-arguments accordingly.
1587   bool method_is_static = method->is_static();
1588   int  total_c_args     = total_in_args;
1589 
1590   if (!is_critical_native) {
1591     int n_hidden_args = method_is_static ? 2 : 1;
1592     total_c_args += n_hidden_args;
1593   } else {
1594     // No JNIEnv*, no this*, but unpacked arrays (base+length).
1595     for (int i = 0; i < total_in_args; i++) {
1596       if (in_sig_bt[i] == T_ARRAY) {
1597         total_c_args ++;
1598       }
1599     }
1600   }
1601 
1602   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1603   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1604   BasicType* in_elem_bt = NULL;
1605 
1606   // Create the signature for the C call:
1607   //   1) add the JNIEnv*
1608   //   2) add the class if the method is static
1609   //   3) copy the rest of the incoming signature (shifted by the number of
1610   //      hidden arguments)
1611 
1612   int argc = 0;
1613   if (!is_critical_native) {
1614     out_sig_bt[argc++] = T_ADDRESS;
1615     if (method->is_static()) {
1616       out_sig_bt[argc++] = T_OBJECT;
1617     }
1618 
1619     for (int i = 0; i < total_in_args; i++) {
1620       out_sig_bt[argc++] = in_sig_bt[i];
1621     }
1622   } else {
1623     in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1624     SignatureStream ss(method->signature());
1625     int o = 0;
1626     for (int i = 0; i < total_in_args; i++, o++) {
1627       if (in_sig_bt[i] == T_ARRAY) {
1628         // Arrays are passed as tuples (int, elem*).
1629         Symbol* atype = ss.as_symbol();
1630         const char* at = atype->as_C_string();
1631         if (strlen(at) == 2) {
1632           assert(at[0] == '[', "must be");
1633           switch (at[1]) {
1634             case 'B': in_elem_bt[o]  = T_BYTE; break;
1635             case 'C': in_elem_bt[o]  = T_CHAR; break;
1636             case 'D': in_elem_bt[o]  = T_DOUBLE; break;
1637             case 'F': in_elem_bt[o]  = T_FLOAT; break;
1638             case 'I': in_elem_bt[o]  = T_INT; break;
1639             case 'J': in_elem_bt[o]  = T_LONG; break;
1640             case 'S': in_elem_bt[o]  = T_SHORT; break;
1641             case 'Z': in_elem_bt[o]  = T_BOOLEAN; break;
1642             default: ShouldNotReachHere();
1643           }
1644         }
1645       } else {
1646         in_elem_bt[o] = T_VOID;
1647       }
1648       if (in_sig_bt[i] != T_VOID) {
1649         assert(in_sig_bt[i] == ss.type(), "must match");
1650         ss.next();
1651       }
1652     }
1653     assert(total_in_args == o, "must match");
1654 
1655     for (int i = 0; i < total_in_args; i++) {
1656       if (in_sig_bt[i] == T_ARRAY) {
1657         // Arrays are passed as tuples (int, elem*).
1658         out_sig_bt[argc++] = T_INT;
1659         out_sig_bt[argc++] = T_ADDRESS;
1660       } else {
1661         out_sig_bt[argc++] = in_sig_bt[i];
1662       }
1663     }
1664   }
1665 
1666   ///////////////////////////////////////////////////////////////////////
1667   // Now figure out where the args must be stored and how much stack space
1668   // they require (neglecting out_preserve_stack_slots but providing space
1669   // for storing the first five register arguments).
1670   // It's weird, see int_stk_helper.
1671   ///////////////////////////////////////////////////////////////////////
1672 
1673   //---------------------------------------------------------------------
1674   // Compute framesize for the wrapper.
1675   //
1676   // - We need to handlize all oops passed in registers.
1677   // - We must create space for them here that is disjoint from the save area.
1678   // - We always just allocate 5 words for storing down these object.
1679   //   This allows us to simply record the base and use the Ireg number to
1680   //   decide which slot to use.
1681   // - Note that the reg number used to index the stack slot is the inbound
1682   //   number, not the outbound number.
1683   // - We must shuffle args to match the native convention,
1684   //   and to include var-args space.
1685   //---------------------------------------------------------------------
1686 
1687   //---------------------------------------------------------------------
1688   // Calculate the total number of stack slots we will need:
1689   // - 1) abi requirements
1690   // - 2) outgoing args
1691   // - 3) space for inbound oop handle area
1692   // - 4) space for handlizing a klass if static method
1693   // - 5) space for a lock if synchronized method
1694   // - 6) workspace (save rtn value, int<->float reg moves, ...)
1695   // - 7) filler slots for alignment
1696   //---------------------------------------------------------------------
1697   // Here is how the space we have allocated will look like.
1698   // Since we use resize_frame, we do not create a new stack frame,
1699   // but just extend the one we got with our own data area.
1700   //
1701   // If an offset or pointer name points to a separator line, it is
1702   // assumed that addressing with offset 0 selects storage starting
1703   // at the first byte above the separator line.
1704   //
1705   //
1706   //     ...                   ...
1707   //      | caller's frame      |
1708   // FP-> |---------------------|
1709   //      | filler slots, if any|
1710   //     7| #slots == mult of 2 |
1711   //      |---------------------|
1712   //      | work space          |
1713   //     6| 2 slots = 8 bytes   |
1714   //      |---------------------|
1715   //     5| lock box (if sync)  |
1716   //      |---------------------| <- lock_slot_offset
1717   //     4| klass (if static)   |
1718   //      |---------------------| <- klass_slot_offset
1719   //     3| oopHandle area      |
1720   //      | (save area for      |
1721   //      |  critical natives)  |
1722   //      |                     |
1723   //      |                     |
1724   //      |---------------------| <- oop_handle_offset
1725   //     2| outbound memory     |
1726   //     ...                   ...
1727   //      | based arguments     |
1728   //      |---------------------|
1729   //      | vararg              |
1730   //     ...                   ...
1731   //      | area                |
1732   //      |---------------------| <- out_arg_slot_offset
1733   //     1| out_preserved_slots |
1734   //     ...                   ...
1735   //      | (z_abi spec)        |
1736   // SP-> |---------------------| <- FP_slot_offset (back chain)
1737   //     ...                   ...
1738   //
1739   //---------------------------------------------------------------------
1740 
1741   // *_slot_offset indicates offset from SP in #stack slots
1742   // *_offset      indicates offset from SP in #bytes
1743 
1744   int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
1745                     SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1746 
1747   // Now the space for the inbound oop handle area.
1748   int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
1749   if (is_critical_native) {
1750     // Critical natives may have to call out so they need a save area
1751     // for register arguments.
1752     int double_slots = 0;
1753     int single_slots = 0;
1754     for (int i = 0; i < total_in_args; i++) {
1755       if (in_regs[i].first()->is_Register()) {
1756         const Register reg = in_regs[i].first()->as_Register();
1757         switch (in_sig_bt[i]) {
1758           case T_BOOLEAN:
1759           case T_BYTE:
1760           case T_SHORT:
1761           case T_CHAR:
1762           case T_INT:
1763           // Fall through.
1764           case T_ARRAY:
1765           case T_LONG: double_slots++; break;
1766           default:  ShouldNotReachHere();
1767         }
1768       } else {
1769         if (in_regs[i].first()->is_FloatRegister()) {
1770           switch (in_sig_bt[i]) {
1771             case T_FLOAT:  single_slots++; break;
1772             case T_DOUBLE: double_slots++; break;
1773             default:  ShouldNotReachHere();
1774           }
1775         }
1776       }
1777     }  // for
1778     total_save_slots = double_slots * 2 + align_up(single_slots, 2); // Round to even.
1779   }
1780 
1781   int oop_handle_slot_offset = stack_slots;
1782   stack_slots += total_save_slots;                                        // 3)
1783 
1784   int klass_slot_offset = 0;
1785   int klass_offset      = -1;
1786   if (method_is_static && !is_critical_native) {                          // 4)
1787     klass_slot_offset  = stack_slots;
1788     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1789     stack_slots       += VMRegImpl::slots_per_word;
1790   }
1791 
1792   int lock_slot_offset = 0;
1793   int lock_offset      = -1;
1794   if (method->is_synchronized()) {                                        // 5)
1795     lock_slot_offset   = stack_slots;
1796     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1797     stack_slots       += VMRegImpl::slots_per_word;
1798   }
1799 
1800   int workspace_slot_offset= stack_slots;                                 // 6)
1801   stack_slots         += 2;
1802 
1803   // Now compute actual number of stack words we need.
1804   // Round to align stack properly.
1805   stack_slots = align_up(stack_slots,                                     // 7)
1806                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1807   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1808 
1809 
1810   ///////////////////////////////////////////////////////////////////////
1811   // Now we can start generating code
1812   ///////////////////////////////////////////////////////////////////////
1813 
1814   unsigned int wrapper_CodeStart  = __ offset();
1815   unsigned int wrapper_UEPStart;
1816   unsigned int wrapper_VEPStart;
1817   unsigned int wrapper_FrameDone;
1818   unsigned int wrapper_CRegsSet;
1819   Label     handle_pending_exception;
1820   Label     ic_miss;
1821 
1822   //---------------------------------------------------------------------
1823   // Unverified entry point (UEP)
1824   //---------------------------------------------------------------------
1825   wrapper_UEPStart = __ offset();
1826 
1827   // check ic: object class <-> cached class
1828   if (!method_is_static) __ nmethod_UEP(ic_miss);
1829   // Fill with nops (alignment of verified entry point).
1830   __ align(CodeEntryAlignment);
1831 
1832   //---------------------------------------------------------------------
1833   // Verified entry point (VEP)
1834   //---------------------------------------------------------------------
1835   wrapper_VEPStart = __ offset();
1836 
1837   if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
1838     Label L_skip_barrier;
1839     Register klass = Z_R1_scratch;
1840     // Notify OOP recorder (don't need the relocation)
1841     AddressLiteral md = __ constant_metadata_address(method->method_holder());
1842     __ load_const_optimized(klass, md.value());
1843     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
1844 
1845     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
1846     __ z_br(klass);
1847 
1848     __ bind(L_skip_barrier);
1849   }
1850 
1851   __ save_return_pc();
1852   __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1853 #ifndef USE_RESIZE_FRAME
1854   __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1855 #else
1856   __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1857                                                           // Just resize the existing one.
1858 #endif
1859 
1860   wrapper_FrameDone = __ offset();
1861 
1862   __ verify_thread();
1863 
1864   // Native nmethod wrappers never take possession of the oop arguments.
1865   // So the caller will gc the arguments.
1866   // The only thing we need an oopMap for is if the call is static.
1867   //
1868   // An OopMap for lock (and class if static), and one for the VM call itself
1869   OopMapSet  *oop_maps        = new OopMapSet();
1870   OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1871 
1872   if (is_critical_native) {
1873     check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
1874                                        oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
1875   }
1876 
1877 
1878   //////////////////////////////////////////////////////////////////////
1879   //
1880   // The Grand Shuffle
1881   //
1882   //////////////////////////////////////////////////////////////////////
1883   //
1884   // We immediately shuffle the arguments so that for any vm call we have
1885   // to make from here on out (sync slow path, jvmti, etc.) we will have
1886   // captured the oops from our caller and have a valid oopMap for them.
1887   //
1888   //--------------------------------------------------------------------
1889   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1890   // (derived from JavaThread* which is in Z_thread) and, if static,
1891   // the class mirror instead of a receiver. This pretty much guarantees that
1892   // register layout will not match. We ignore these extra arguments during
1893   // the shuffle. The shuffle is described by the two calling convention
1894   // vectors we have in our possession. We simply walk the java vector to
1895   // get the source locations and the c vector to get the destinations.
1896   //
1897   // This is a trick. We double the stack slots so we can claim
1898   // the oops in the caller's frame. Since we are sure to have
1899   // more args than the caller doubling is enough to make
1900   // sure we can capture all the incoming oop args from the caller.
1901   //--------------------------------------------------------------------
1902 
1903   // Record sp-based slot for receiver on stack for non-static methods.
1904   int receiver_offset = -1;
1905 
1906   //--------------------------------------------------------------------
1907   // We move the arguments backwards because the floating point registers
1908   // destination will always be to a register with a greater or equal
1909   // register number or the stack.
1910   //   jix is the index of the incoming Java arguments.
1911   //   cix is the index of the outgoing C arguments.
1912   //--------------------------------------------------------------------
1913 
1914 #ifdef ASSERT
1915   bool reg_destroyed[RegisterImpl::number_of_registers];
1916   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1917   for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
1918     reg_destroyed[r] = false;
1919   }
1920   for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
1921     freg_destroyed[f] = false;
1922   }
1923 #endif // ASSERT
1924 
1925   for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1926 #ifdef ASSERT
1927     if (in_regs[jix].first()->is_Register()) {
1928       assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1929     } else {
1930       if (in_regs[jix].first()->is_FloatRegister()) {
1931         assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1932       }
1933     }
1934     if (out_regs[cix].first()->is_Register()) {
1935       reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1936     } else {
1937       if (out_regs[cix].first()->is_FloatRegister()) {
1938         freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1939       }
1940     }
1941 #endif // ASSERT
1942 
1943     switch (in_sig_bt[jix]) {
1944       // Due to casting, small integers should only occur in pairs with type T_LONG.
1945       case T_BOOLEAN:
1946       case T_CHAR:
1947       case T_BYTE:
1948       case T_SHORT:
1949       case T_INT:
1950         // Move int and do sign extension.
1951         move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1952         break;
1953 
1954       case T_LONG :
1955         long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1956         break;
1957 
1958       case T_ARRAY:
1959         if (is_critical_native) {
1960           int body_arg = cix;
1961           cix -= 1; // Point to length arg.
1962           unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots);
1963           break;
1964         }
1965         // else fallthrough
1966       case T_OBJECT:
1967         assert(!is_critical_native, "no oop arguments");
1968         object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1969                     ((jix == 0) && (!method_is_static)),
1970                     &receiver_offset);
1971         break;
1972       case T_VOID:
1973         break;
1974 
1975       case T_FLOAT:
1976         float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1977         break;
1978 
1979       case T_DOUBLE:
1980         assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1981         double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1982         break;
1983 
1984       case T_ADDRESS:
1985         assert(false, "found T_ADDRESS in java args");
1986         break;
1987 
1988       default:
1989         ShouldNotReachHere();
1990     }
1991   }
1992 
1993   //--------------------------------------------------------------------
1994   // Pre-load a static method's oop into ARG2.
1995   // Used both by locking code and the normal JNI call code.
1996   //--------------------------------------------------------------------
1997   if (method_is_static && !is_critical_native) {
1998     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1999 
2000     // Now handlize the static class mirror in ARG2. It's known not-null.
2001     __ z_stg(Z_ARG2, klass_offset, Z_SP);
2002     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2003     __ add2reg(Z_ARG2, klass_offset, Z_SP);
2004   }
2005 
2006   // Get JNIEnv* which is first argument to native.
2007   if (!is_critical_native) {
2008     __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
2009   }
2010 
2011   //////////////////////////////////////////////////////////////////////
2012   // We have all of the arguments setup at this point.
2013   // We MUST NOT touch any outgoing regs from this point on.
2014   // So if we must call out we must push a new frame.
2015   //////////////////////////////////////////////////////////////////////
2016 
2017 
2018   // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
2019   // Both values represent the same position.
2020   __ get_PC(Z_R10);                // PC into register
2021   wrapper_CRegsSet = __ offset();  // and into into variable.
2022 
2023   // Z_R10 now has the pc loaded that we will use when we finally call to native.
2024 
2025   // We use the same pc/oopMap repeatedly when we call out.
2026   oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
2027 
2028   // Lock a synchronized method.
2029 
2030   if (method->is_synchronized()) {
2031     assert(!is_critical_native, "unhandled");
2032 
2033     // ATTENTION: args and Z_R10 must be preserved.
2034     Register r_oop  = Z_R11;
2035     Register r_box  = Z_R12;
2036     Register r_tmp1 = Z_R13;
2037     Register r_tmp2 = Z_R7;
2038     Label done;
2039 
2040     // Load the oop for the object or class. R_carg2_classorobject contains
2041     // either the handlized oop from the incoming arguments or the handlized
2042     // class mirror (if the method is static).
2043     __ z_lg(r_oop, 0, Z_ARG2);
2044 
2045     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
2046     // Get the lock box slot's address.
2047     __ add2reg(r_box, lock_offset, Z_SP);
2048 
2049 #ifdef ASSERT
2050     if (UseBiasedLocking)
2051       // Making the box point to itself will make it clear it went unused
2052       // but also be obviously invalid.
2053       __ z_stg(r_box, 0, r_box);
2054 #endif // ASSERT
2055 
2056     // Try fastpath for locking.
2057     // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
2058     __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
2059     __ z_bre(done);
2060 
2061     //-------------------------------------------------------------------------
2062     // None of the above fast optimizations worked so we have to get into the
2063     // slow case of monitor enter. Inline a special case of call_VM that
2064     // disallows any pending_exception.
2065     //-------------------------------------------------------------------------
2066 
2067     Register oldSP = Z_R11;
2068 
2069     __ z_lgr(oldSP, Z_SP);
2070 
2071     RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2072 
2073     // Prepare arguments for call.
2074     __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
2075     __ add2reg(Z_ARG2, lock_offset, oldSP);
2076     __ z_lgr(Z_ARG3, Z_thread);
2077 
2078     __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
2079 
2080     // Do the call.
2081     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
2082     __ call(Z_R1_scratch);
2083 
2084     __ reset_last_Java_frame();
2085 
2086     RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2087 #ifdef ASSERT
2088     { Label L;
2089       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2090       __ z_bre(L);
2091       __ stop("no pending exception allowed on exit from IR::monitorenter");
2092       __ bind(L);
2093     }
2094 #endif
2095     __ bind(done);
2096   } // lock for synchronized methods
2097 
2098 
2099   //////////////////////////////////////////////////////////////////////
2100   // Finally just about ready to make the JNI call.
2101   //////////////////////////////////////////////////////////////////////
2102 
2103   // Use that pc we placed in Z_R10 a while back as the current frame anchor.
2104   __ set_last_Java_frame(Z_SP, Z_R10);
2105 
2106   // Transition from _thread_in_Java to _thread_in_native.
2107   __ set_thread_state(_thread_in_native);
2108 
2109 
2110   //////////////////////////////////////////////////////////////////////
2111   // This is the JNI call.
2112   //////////////////////////////////////////////////////////////////////
2113 
2114   __ call_c(native_func);
2115 
2116 
2117   //////////////////////////////////////////////////////////////////////
2118   // We have survived the call once we reach here.
2119   //////////////////////////////////////////////////////////////////////
2120 
2121 
2122   //--------------------------------------------------------------------
2123   // Unpack native results.
2124   //--------------------------------------------------------------------
2125   // For int-types, we do any needed sign-extension required.
2126   // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
2127   // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
2128   // blocking or unlocking.
2129   // An OOP result (handle) is done specially in the slow-path code.
2130   //--------------------------------------------------------------------
2131   switch (ret_type) {
2132     case T_VOID:    break;         // Nothing to do!
2133     case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
2134     case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
2135     case T_LONG:    break;         // Got it where we want it (unless slow-path)
2136     case T_OBJECT:  break;         // Really a handle.
2137                                    // Cannot de-handlize until after reclaiming jvm_lock.
2138     case T_ARRAY:   break;
2139 
2140     case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
2141       __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
2142       __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
2143       break;
2144     case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
2145     case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
2146     case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
2147     case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
2148 
2149     default:
2150       ShouldNotReachHere();
2151       break;
2152   }
2153 
2154 
2155   // Switch thread to "native transition" state before reading the synchronization state.
2156   // This additional state is necessary because reading and testing the synchronization
2157   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2158   //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2159   //   - VM thread changes sync state to synchronizing and suspends threads for GC.
2160   //   - Thread A is resumed to finish this native method, but doesn't block here since it
2161   //     didn't see any synchronization in progress, and escapes.
2162 
2163   // Transition from _thread_in_native to _thread_in_native_trans.
2164   __ set_thread_state(_thread_in_native_trans);
2165 
2166   // Safepoint synchronization
2167   //--------------------------------------------------------------------
2168   // Must we block?
2169   //--------------------------------------------------------------------
2170   // Block, if necessary, before resuming in _thread_in_Java state.
2171   // In order for GC to work, don't clear the last_Java_sp until after blocking.
2172   //--------------------------------------------------------------------
2173   Label after_transition;
2174   {
2175     Label no_block, sync;
2176 
2177     save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
2178 
2179     // Force this write out before the read below.
2180     __ z_fence();
2181 
2182     __ safepoint_poll(sync, Z_R1);
2183 
2184     __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
2185     __ z_bre(no_block);
2186 
2187     // Block. Save any potential method result value before the operation and
2188     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2189     // lets us share the oopMap we used when we went native rather than create
2190     // a distinct one for this pc.
2191     //
2192     __ bind(sync);
2193     __ z_acquire();
2194 
2195     address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
2196                                              : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2197 
2198     __ call_VM_leaf(entry_point, Z_thread);
2199 
2200     if (is_critical_native) {
2201       restore_native_result(masm, ret_type, workspace_slot_offset);
2202       __ z_bru(after_transition); // No thread state transition here.
2203     }
2204     __ bind(no_block);
2205     restore_native_result(masm, ret_type, workspace_slot_offset);
2206   }
2207 
2208   //--------------------------------------------------------------------
2209   // Thread state is thread_in_native_trans. Any safepoint blocking has
2210   // already happened so we can now change state to _thread_in_Java.
2211   //--------------------------------------------------------------------
2212   // Transition from _thread_in_native_trans to _thread_in_Java.
2213   __ set_thread_state(_thread_in_Java);
2214   __ bind(after_transition);
2215 
2216 
2217   //--------------------------------------------------------------------
2218   // Reguard any pages if necessary.
2219   // Protect native result from being destroyed.
2220   //--------------------------------------------------------------------
2221 
2222   Label no_reguard;
2223 
2224   __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)),
2225            JavaThread::stack_guard_yellow_reserved_disabled);
2226 
2227   __ z_bre(no_reguard);
2228 
2229   save_native_result(masm, ret_type, workspace_slot_offset);
2230   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
2231   restore_native_result(masm, ret_type, workspace_slot_offset);
2232 
2233   __ bind(no_reguard);
2234 
2235 
2236   // Synchronized methods (slow path only)
2237   // No pending exceptions for now.
2238   //--------------------------------------------------------------------
2239   // Handle possibly pending exception (will unlock if necessary).
2240   // Native result is, if any is live, in Z_FRES or Z_RES.
2241   //--------------------------------------------------------------------
2242   // Unlock
2243   //--------------------------------------------------------------------
2244   if (method->is_synchronized()) {
2245     const Register r_oop        = Z_R11;
2246     const Register r_box        = Z_R12;
2247     const Register r_tmp1       = Z_R13;
2248     const Register r_tmp2       = Z_R7;
2249     Label done;
2250 
2251     // Get unboxed oop of class mirror or object ...
2252     int   offset = method_is_static ? klass_offset : receiver_offset;
2253 
2254     assert(offset != -1, "");
2255     __ z_lg(r_oop, offset, Z_SP);
2256 
2257     // ... and address of lock object box.
2258     __ add2reg(r_box, lock_offset, Z_SP);
2259 
2260     // Try fastpath for unlocking.
2261     __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
2262     __ z_bre(done);
2263 
2264     // Slow path for unlocking.
2265     // Save and restore any potential method result value around the unlocking operation.
2266     const Register R_exc = Z_R11;
2267 
2268     save_native_result(masm, ret_type, workspace_slot_offset);
2269 
2270     // Must save pending exception around the slow-path VM call. Since it's a
2271     // leaf call, the pending exception (if any) can be kept in a register.
2272     __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2273     assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
2274 
2275     // Must clear pending-exception before re-entering the VM. Since this is
2276     // a leaf call, pending-exception-oop can be safely kept in a register.
2277     __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
2278 
2279     // Inline a special case of call_VM that disallows any pending_exception.
2280 
2281     // Get locked oop from the handle we passed to jni.
2282     __ z_lg(Z_ARG1, offset, Z_SP);
2283     __ add2reg(Z_ARG2, lock_offset, Z_SP);
2284     __ z_lgr(Z_ARG3, Z_thread);
2285 
2286     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
2287 
2288     __ call(Z_R1_scratch);
2289 
2290 #ifdef ASSERT
2291     {
2292       Label L;
2293       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2294       __ z_bre(L);
2295       __ stop("no pending exception allowed on exit from IR::monitorexit");
2296       __ bind(L);
2297     }
2298 #endif
2299 
2300     // Check_forward_pending_exception jump to forward_exception if any pending
2301     // exception is set. The forward_exception routine expects to see the
2302     // exception in pending_exception and not in a register. Kind of clumsy,
2303     // since all folks who branch to forward_exception must have tested
2304     // pending_exception first and hence have it in a register already.
2305     __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2306     restore_native_result(masm, ret_type, workspace_slot_offset);
2307     __ z_bru(done);
2308     __ z_illtrap(0x66);
2309 
2310     __ bind(done);
2311   }
2312 
2313 
2314   //--------------------------------------------------------------------
2315   // Clear "last Java frame" SP and PC.
2316   //--------------------------------------------------------------------
2317   __ verify_thread(); // Z_thread must be correct.
2318 
2319   __ reset_last_Java_frame();
2320 
2321   // Unpack oop result, e.g. JNIHandles::resolve result.
2322   if (is_reference_type(ret_type)) {
2323     __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
2324   }
2325 
2326   if (CheckJNICalls) {
2327     // clear_pending_jni_exception_check
2328     __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
2329   }
2330 
2331   // Reset handle block.
2332   if (!is_critical_native) {
2333     __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
2334     __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
2335 
2336     // Check for pending exceptions.
2337     __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2338     __ z_brne(handle_pending_exception);
2339   }
2340 
2341 
2342   //////////////////////////////////////////////////////////////////////
2343   // Return
2344   //////////////////////////////////////////////////////////////////////
2345 
2346 
2347 #ifndef USE_RESIZE_FRAME
2348   __ pop_frame();                     // Pop wrapper frame.
2349 #else
2350   __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
2351 #endif
2352   __ restore_return_pc();             // This is the way back to the caller.
2353   __ z_br(Z_R14);
2354 
2355 
2356   //////////////////////////////////////////////////////////////////////
2357   // Out-of-line calls to the runtime.
2358   //////////////////////////////////////////////////////////////////////
2359 
2360 
2361   if (!is_critical_native) {
2362 
2363     //---------------------------------------------------------------------
2364     // Handler for pending exceptions (out-of-line).
2365     //---------------------------------------------------------------------
2366     // Since this is a native call, we know the proper exception handler
2367     // is the empty function. We just pop this frame and then jump to
2368     // forward_exception_entry. Z_R14 will contain the native caller's
2369     // return PC.
2370     __ bind(handle_pending_exception);
2371     __ pop_frame();
2372     __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2373     __ restore_return_pc();
2374     __ z_br(Z_R1_scratch);
2375 
2376     //---------------------------------------------------------------------
2377     // Handler for a cache miss (out-of-line)
2378     //---------------------------------------------------------------------
2379     __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2380   }
2381   __ flush();
2382 
2383 
2384   //////////////////////////////////////////////////////////////////////
2385   // end of code generation
2386   //////////////////////////////////////////////////////////////////////
2387 
2388 
2389   nmethod *nm = nmethod::new_native_nmethod(method,
2390                                             compile_id,
2391                                             masm->code(),
2392                                             (int)(wrapper_VEPStart-wrapper_CodeStart),
2393                                             (int)(wrapper_FrameDone-wrapper_CodeStart),
2394                                             stack_slots / VMRegImpl::slots_per_word,
2395                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2396                                             in_ByteSize(lock_offset),
2397                                             oop_maps);
2398 
2399   if (is_critical_native) {
2400     nm->set_lazy_critical_native(true);
2401   }
2402 
2403   return nm;
2404 #else
2405   ShouldNotReachHere();
2406   return NULL;
2407 #endif // COMPILER2
2408 }
2409 
2410 static address gen_c2i_adapter(MacroAssembler  *masm,
2411                                int total_args_passed,
2412                                int comp_args_on_stack,
2413                                const BasicType *sig_bt,
2414                                const VMRegPair *regs,
2415                                Label &skip_fixup) {
2416   // Before we get into the guts of the C2I adapter, see if we should be here
2417   // at all. We've come from compiled code and are attempting to jump to the
2418   // interpreter, which means the caller made a static call to get here
2419   // (vcalls always get a compiled target if there is one). Check for a
2420   // compiled target. If there is one, we need to patch the caller's call.
2421 
2422   // These two defs MUST MATCH code in gen_i2c2i_adapter!
2423   const Register ientry = Z_R11;
2424   const Register code   = Z_R11;
2425 
2426   address c2i_entrypoint;
2427   Label   patch_callsite;
2428 
2429   // Regular (verified) c2i entry point.
2430   c2i_entrypoint = __ pc();
2431 
2432   // Call patching needed?
2433   __ load_and_test_long(Z_R0_scratch, method_(code));
2434   __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2435   __ z_brne(patch_callsite);                    // Patch required if code != NULL (compiled target exists).
2436 
2437   __ bind(skip_fixup);  // Return point from patch_callsite.
2438 
2439   // Since all args are passed on the stack, total_args_passed*wordSize is the
2440   // space we need. We need ABI scratch area but we use the caller's since
2441   // it has already been allocated.
2442 
2443   const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2444   int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2445   Register  sender_SP   = Z_R10;
2446   Register  value       = Z_R12;
2447 
2448   // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2449   // In addition, frame manager expects initial_caller_sp in Z_R10.
2450   __ z_lgr(sender_SP, Z_SP);
2451 
2452   // This should always fit in 14 bit immediate.
2453   __ resize_frame(-extraspace, Z_R0_scratch);
2454 
2455   // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2456   // args. This essentially moves the callers ABI scratch area from the top to the
2457   // bottom of the arg area.
2458 
2459   int st_off =  extraspace - wordSize;
2460 
2461   // Now write the args into the outgoing interpreter space.
2462   for (int i = 0; i < total_args_passed; i++) {
2463     VMReg r_1 = regs[i].first();
2464     VMReg r_2 = regs[i].second();
2465     if (!r_1->is_valid()) {
2466       assert(!r_2->is_valid(), "");
2467       continue;
2468     }
2469     if (r_1->is_stack()) {
2470       // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2471       // We must account for it here.
2472       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2473 
2474       if (!r_2->is_valid()) {
2475         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2476       } else {
2477         // longs are given 2 64-bit slots in the interpreter,
2478         // but the data is passed in only 1 slot.
2479         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2480 #ifdef ASSERT
2481           __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2482 #endif
2483           st_off -= wordSize;
2484         }
2485         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2486       }
2487     } else {
2488       if (r_1->is_Register()) {
2489         if (!r_2->is_valid()) {
2490           __ z_st(r_1->as_Register(), st_off, Z_SP);
2491         } else {
2492           // longs are given 2 64-bit slots in the interpreter, but the
2493           // data is passed in only 1 slot.
2494           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2495 #ifdef ASSERT
2496             __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2497 #endif
2498             st_off -= wordSize;
2499           }
2500           __ z_stg(r_1->as_Register(), st_off, Z_SP);
2501         }
2502       } else {
2503         assert(r_1->is_FloatRegister(), "");
2504         if (!r_2->is_valid()) {
2505           __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2506         } else {
2507           // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2508           // data is passed in only 1 slot.
2509           // One of these should get known junk...
2510 #ifdef ASSERT
2511           __ z_lzdr(Z_F1);
2512           __ z_std(Z_F1, st_off, Z_SP);
2513 #endif
2514           st_off-=wordSize;
2515           __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2516         }
2517       }
2518     }
2519     st_off -= wordSize;
2520   }
2521 
2522 
2523   // Jump to the interpreter just as if interpreter was doing it.
2524   __ add2reg(Z_esp, st_off, Z_SP);
2525 
2526   // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2527   __ z_br(ientry);
2528 
2529 
2530   // Prevent illegal entry to out-of-line code.
2531   __ z_illtrap(0x22);
2532 
2533   // Generate out-of-line runtime call to patch caller,
2534   // then continue as interpreted.
2535 
2536   // IF you lose the race you go interpreted.
2537   // We don't see any possible endless c2i -> i2c -> c2i ...
2538   // transitions no matter how rare.
2539   __ bind(patch_callsite);
2540 
2541   RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2542   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2543   RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2544   __ z_bru(skip_fixup);
2545 
2546   // end of out-of-line code
2547 
2548   return c2i_entrypoint;
2549 }
2550 
2551 // On entry, the following registers are set
2552 //
2553 //    Z_thread  r8  - JavaThread*
2554 //    Z_method  r9  - callee's method (method to be invoked)
2555 //    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2556 //    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2557 //
2558 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2559                                     int total_args_passed,
2560                                     int comp_args_on_stack,
2561                                     const BasicType *sig_bt,
2562                                     const VMRegPair *regs) {
2563   const Register value = Z_R12;
2564   const Register ld_ptr= Z_esp;
2565 
2566   int ld_offset = total_args_passed * wordSize;
2567 
2568   // Cut-out for having no stack args.
2569   if (comp_args_on_stack) {
2570     // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2571     // registers are below. By subtracting stack0, we either get a negative
2572     // number (all values in registers) or the maximum stack slot accessed.
2573     // Convert VMRegImpl (4 byte) stack slots to words.
2574     int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2575     // Round up to miminum stack alignment, in wordSize
2576     comp_words_on_stack = align_up(comp_words_on_stack, 2);
2577 
2578     __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2579   }
2580 
2581   // Now generate the shuffle code. Pick up all register args and move the
2582   // rest through register value=Z_R12.
2583   for (int i = 0; i < total_args_passed; i++) {
2584     if (sig_bt[i] == T_VOID) {
2585       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2586       continue;
2587     }
2588 
2589     // Pick up 0, 1 or 2 words from ld_ptr.
2590     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2591            "scrambled load targets?");
2592     VMReg r_1 = regs[i].first();
2593     VMReg r_2 = regs[i].second();
2594     if (!r_1->is_valid()) {
2595       assert(!r_2->is_valid(), "");
2596       continue;
2597     }
2598     if (r_1->is_FloatRegister()) {
2599       if (!r_2->is_valid()) {
2600         __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2601         ld_offset-=wordSize;
2602       } else {
2603         // Skip the unused interpreter slot.
2604         __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2605         ld_offset -= 2 * wordSize;
2606       }
2607     } else {
2608       if (r_1->is_stack()) {
2609         // Must do a memory to memory move.
2610         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2611 
2612         if (!r_2->is_valid()) {
2613           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2614         } else {
2615           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2616           // data is passed in only 1 slot.
2617           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2618             ld_offset -= wordSize;
2619           }
2620           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2621         }
2622       } else {
2623         if (!r_2->is_valid()) {
2624           // Not sure we need to do this but it shouldn't hurt.
2625           if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) {
2626             __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2627           } else {
2628             __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2629           }
2630         } else {
2631           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2632           // data is passed in only 1 slot.
2633           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2634             ld_offset -= wordSize;
2635           }
2636           __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2637         }
2638       }
2639       ld_offset -= wordSize;
2640     }
2641   }
2642 
2643   // Jump to the compiled code just as if compiled code was doing it.
2644   // load target address from method oop:
2645   __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2646 
2647   // Store method oop into thread->callee_target.
2648   // 6243940: We might end up in handle_wrong_method if
2649   // the callee is deoptimized as we race thru here. If that
2650   // happens we don't want to take a safepoint because the
2651   // caller frame will look interpreted and arguments are now
2652   // "compiled" so it is much better to make this transition
2653   // invisible to the stack walking code. Unfortunately, if
2654   // we try and find the callee by normal means a safepoint
2655   // is possible. So we stash the desired callee in the thread
2656   // and the vm will find it there should this case occur.
2657   __ z_stg(Z_method, thread_(callee_target));
2658 
2659   __ z_br(Z_R1_scratch);
2660 }
2661 
2662 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2663                                                             int total_args_passed,
2664                                                             int comp_args_on_stack,
2665                                                             const BasicType *sig_bt,
2666                                                             const VMRegPair *regs,
2667                                                             AdapterFingerPrint* fingerprint) {
2668   __ align(CodeEntryAlignment);
2669   address i2c_entry = __ pc();
2670   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2671 
2672   address c2i_unverified_entry;
2673 
2674   Label skip_fixup;
2675   {
2676     Label ic_miss;
2677     const int klass_offset           = oopDesc::klass_offset_in_bytes();
2678     const int holder_klass_offset    = CompiledICHolder::holder_klass_offset();
2679     const int holder_metadata_offset = CompiledICHolder::holder_metadata_offset();
2680 
2681     // Out-of-line call to ic_miss handler.
2682     __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2683 
2684     // Unverified Entry Point UEP
2685     __ align(CodeEntryAlignment);
2686     c2i_unverified_entry = __ pc();
2687 
2688     // Check the pointers.
2689     if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2690       __ z_ltgr(Z_ARG1, Z_ARG1);
2691       __ z_bre(ic_miss);
2692     }
2693     __ verify_oop(Z_ARG1);
2694 
2695     // Check ic: object class <-> cached class
2696     // Compress cached class for comparison. That's more efficient.
2697     if (UseCompressedClassPointers) {
2698       __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
2699       __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2700     } else {
2701       __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2702     }
2703     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2704 
2705     // This def MUST MATCH code in gen_c2i_adapter!
2706     const Register code = Z_R11;
2707 
2708     __ z_lg(Z_method, holder_metadata_offset, Z_method);
2709     __ load_and_test_long(Z_R0, method_(code));
2710     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2711 
2712     // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2713   }
2714 
2715   address c2i_entry = __ pc();
2716 
2717   // Class initialization barrier for static methods
2718   address c2i_no_clinit_check_entry = NULL;
2719   if (VM_Version::supports_fast_class_init_checks()) {
2720     Label L_skip_barrier;
2721 
2722     { // Bypass the barrier for non-static methods
2723       __ testbit(Address(Z_method, Method::access_flags_offset()), JVM_ACC_STATIC_BIT);
2724       __ z_bfalse(L_skip_barrier); // non-static
2725     }
2726 
2727     Register klass = Z_R11;
2728     __ load_method_holder(klass, Z_method);
2729     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
2730 
2731     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
2732     __ z_br(klass);
2733 
2734     __ bind(L_skip_barrier);
2735     c2i_no_clinit_check_entry = __ pc();
2736   }
2737 
2738   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2739 
2740   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
2741 }
2742 
2743 // This function returns the adjust size (in number of words) to a c2i adapter
2744 // activation for use during deoptimization.
2745 //
2746 // Actually only compiled frames need to be adjusted, but it
2747 // doesn't harm to adjust entry and interpreter frames, too.
2748 //
2749 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2750   assert(callee_locals >= callee_parameters,
2751           "test and remove; got more parms than locals");
2752   // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2753   return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2754          frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2755 }
2756 
2757 uint SharedRuntime::out_preserve_stack_slots() {
2758   return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2759 }
2760 
2761 //
2762 // Frame generation for deopt and uncommon trap blobs.
2763 //
2764 static void push_skeleton_frame(MacroAssembler* masm,
2765                           /* Unchanged */
2766                           Register frame_sizes_reg,
2767                           Register pcs_reg,
2768                           /* Invalidate */
2769                           Register frame_size_reg,
2770                           Register pc_reg) {
2771   BLOCK_COMMENT("  push_skeleton_frame {");
2772    __ z_lg(pc_reg, 0, pcs_reg);
2773    __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2774    __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2775    Register fp = pc_reg;
2776    __ push_frame(frame_size_reg, fp);
2777 #ifdef ASSERT
2778    // The magic is required for successful walking skeletal frames.
2779    __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2780    __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2781    // Fill other slots that are supposedly not necessary with eye catchers.
2782    __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2783    __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2784    // The sender_sp of the bottom frame is set before pushing it.
2785    // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2786    // is unknown here. Luckily it is not needed before filling the frame in
2787    // layout_activation(), we assert this by setting an eye catcher (see
2788    // comments on sender_sp in frame_s390.hpp).
2789    __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2790 #endif // ASSERT
2791   BLOCK_COMMENT("  } push_skeleton_frame");
2792 }
2793 
2794 // Loop through the UnrollBlock info and create new frames.
2795 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2796                             /* read */
2797                             Register unroll_block_reg,
2798                             /* invalidate */
2799                             Register frame_sizes_reg,
2800                             Register number_of_frames_reg,
2801                             Register pcs_reg,
2802                             Register tmp1,
2803                             Register tmp2) {
2804   BLOCK_COMMENT("push_skeleton_frames {");
2805   // _number_of_frames is of type int (deoptimization.hpp).
2806   __ z_lgf(number_of_frames_reg,
2807            Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2808   __ z_lg(pcs_reg,
2809           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2810   __ z_lg(frame_sizes_reg,
2811           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2812 
2813   // stack: (caller_of_deoptee, ...).
2814 
2815   // If caller_of_deoptee is a compiled frame, then we extend it to make
2816   // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2817   // See also Deoptimization::last_frame_adjust() above.
2818   // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2819 
2820   __ z_lgf(Z_R1_scratch,
2821            Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2822   __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2823   __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2824   // The oldest skeletal frame requires a valid sender_sp to make it walkable
2825   // (it is required to find the original pc of caller_of_deoptee if it is marked
2826   // for deoptimization - see nmethod::orig_pc_addr()).
2827   __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2828 
2829   // Now push the new interpreter frames.
2830   Label loop, loop_entry;
2831 
2832   // Make sure that there is at least one entry in the array.
2833   DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2834   __ asm_assert_ne("array_size must be > 0", 0x205);
2835 
2836   __ z_bru(loop_entry);
2837 
2838   __ bind(loop);
2839 
2840   __ add2reg(frame_sizes_reg, wordSize);
2841   __ add2reg(pcs_reg, wordSize);
2842 
2843   __ bind(loop_entry);
2844 
2845   // Allocate a new frame, fill in the pc.
2846   push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2847 
2848   __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2849   __ z_brne(loop);
2850 
2851   // Set the top frame's return pc.
2852   __ add2reg(pcs_reg, wordSize);
2853   __ z_lg(Z_R0_scratch, 0, pcs_reg);
2854   __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2855   BLOCK_COMMENT("} push_skeleton_frames");
2856 }
2857 
2858 //------------------------------generate_deopt_blob----------------------------
2859 void SharedRuntime::generate_deopt_blob() {
2860   // Allocate space for the code.
2861   ResourceMark rm;
2862   // Setup code generation tools.
2863   CodeBuffer buffer("deopt_blob", 2048, 1024);
2864   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2865   Label exec_mode_initialized;
2866   OopMap* map = NULL;
2867   OopMapSet *oop_maps = new OopMapSet();
2868 
2869   unsigned int start_off = __ offset();
2870   Label cont;
2871 
2872   // --------------------------------------------------------------------------
2873   // Normal entry (non-exception case)
2874   //
2875   // We have been called from the deopt handler of the deoptee.
2876   // Z_R14 points behind the call in the deopt handler. We adjust
2877   // it such that it points to the start of the deopt handler.
2878   // The return_pc has been stored in the frame of the deoptee and
2879   // will replace the address of the deopt_handler in the call
2880   // to Deoptimization::fetch_unroll_info below.
2881   // The (int) cast is necessary, because -((unsigned int)14)
2882   // is an unsigned int.
2883   __ add2reg(Z_R14, -(int)HandlerImpl::size_deopt_handler());
2884 
2885   const Register   exec_mode_reg = Z_tmp_1;
2886 
2887   // stack: (deoptee, caller of deoptee, ...)
2888 
2889   // pushes an "unpack" frame
2890   // R14 contains the return address pointing into the deoptimized
2891   // nmethod that was valid just before the nmethod was deoptimized.
2892   // save R14 into the deoptee frame.  the `fetch_unroll_info'
2893   // procedure called below will read it from there.
2894   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2895 
2896   // note the entry point.
2897   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2898   __ z_bru(exec_mode_initialized);
2899 
2900 #ifndef COMPILER1
2901   int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2902 #else
2903   // --------------------------------------------------------------------------
2904   // Reexecute entry
2905   // - Z_R14 = Deopt Handler in nmethod
2906 
2907   int reexecute_offset = __ offset() - start_off;
2908 
2909   // No need to update map as each call to save_live_registers will produce identical oopmap
2910   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2911 
2912   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2913   __ z_bru(exec_mode_initialized);
2914 #endif
2915 
2916 
2917   // --------------------------------------------------------------------------
2918   // Exception entry. We reached here via a branch. Registers on entry:
2919   // - Z_EXC_OOP (Z_ARG1) = exception oop
2920   // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2921 
2922   int exception_offset = __ offset() - start_off;
2923 
2924   // all registers are dead at this entry point, except for Z_EXC_OOP, and
2925   // Z_EXC_PC which contain the exception oop and exception pc
2926   // respectively.  Set them in TLS and fall thru to the
2927   // unpack_with_exception_in_tls entry point.
2928 
2929   // Store exception oop and pc in thread (location known to GC).
2930   // Need this since the call to "fetch_unroll_info()" may safepoint.
2931   __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2932   __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2933 
2934   // fall through
2935 
2936   int exception_in_tls_offset = __ offset() - start_off;
2937 
2938   // new implementation because exception oop is now passed in JavaThread
2939 
2940   // Prolog for exception case
2941   // All registers must be preserved because they might be used by LinearScan
2942   // Exceptiop oop and throwing PC are passed in JavaThread
2943 
2944   // load throwing pc from JavaThread and us it as the return address of the current frame.
2945   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2946 
2947   // Save everything in sight.
2948   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2949 
2950   // Now it is safe to overwrite any register
2951 
2952   // Clear the exception pc field in JavaThread
2953   __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2954 
2955   // Deopt during an exception.  Save exec mode for unpack_frames.
2956   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2957 
2958 
2959 #ifdef ASSERT
2960   // verify that there is really an exception oop in JavaThread
2961   __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2962   __ verify_oop(Z_ARG1);
2963 
2964   // verify that there is no pending exception
2965   __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2966                              "must not have pending exception here", __LINE__);
2967 #endif
2968 
2969   // --------------------------------------------------------------------------
2970   // At this point, the live registers are saved and
2971   // the exec_mode_reg has been set up correctly.
2972   __ bind(exec_mode_initialized);
2973 
2974   // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2975 
2976   {
2977   const Register unroll_block_reg  = Z_tmp_2;
2978 
2979   // we need to set `last_Java_frame' because `fetch_unroll_info' will
2980   // call `last_Java_frame()'.  however we can't block and no gc will
2981   // occur so we don't need an oopmap. the value of the pc in the
2982   // frame is not particularly important.  it just needs to identify the blob.
2983 
2984   // Don't set last_Java_pc anymore here (is implicitly NULL then).
2985   // the correct PC is retrieved in pd_last_frame() in that case.
2986   __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2987   // With EscapeAnalysis turned on, this call may safepoint
2988   // despite it's marked as "leaf call"!
2989   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2990   // Set an oopmap for the call site this describes all our saved volatile registers
2991   int offs = __ offset();
2992   oop_maps->add_gc_map(offs, map);
2993 
2994   __ reset_last_Java_frame();
2995   // save the return value.
2996   __ z_lgr(unroll_block_reg, Z_RET);
2997   // restore the return registers that have been saved
2998   // (among other registers) by save_live_registers(...).
2999   RegisterSaver::restore_result_registers(masm);
3000 
3001   // reload the exec mode from the UnrollBlock (it might have changed)
3002   __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
3003 
3004   // In excp_deopt_mode, restore and clear exception oop which we
3005   // stored in the thread during exception entry above. The exception
3006   // oop will be the return value of this stub.
3007   NearLabel skip_restore_excp;
3008   __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
3009   __ z_lg(Z_RET, thread_(exception_oop));
3010   __ clear_mem(thread_(exception_oop), 8);
3011   __ bind(skip_restore_excp);
3012 
3013   // remove the "unpack" frame
3014   __ pop_frame();
3015 
3016   // stack: (deoptee, caller of deoptee, ...).
3017 
3018   // pop the deoptee's frame
3019   __ pop_frame();
3020 
3021   // stack: (caller_of_deoptee, ...).
3022 
3023   // loop through the `UnrollBlock' info and create interpreter frames.
3024   push_skeleton_frames(masm, true/*deopt*/,
3025                   unroll_block_reg,
3026                   Z_tmp_3,
3027                   Z_tmp_4,
3028                   Z_ARG5,
3029                   Z_ARG4,
3030                   Z_ARG3);
3031 
3032   // stack: (skeletal interpreter frame, ..., optional skeletal
3033   // interpreter frame, caller of deoptee, ...).
3034   }
3035 
3036   // push an "unpack" frame taking care of float / int return values.
3037   __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
3038 
3039   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3040   // skeletal interpreter frame, caller of deoptee, ...).
3041 
3042   // spill live volatile registers since we'll do a call.
3043   __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
3044   __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
3045 
3046   // let the unpacker layout information in the skeletal frames just allocated.
3047   __ get_PC(Z_RET);
3048   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
3049   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3050                   Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
3051 
3052   __ reset_last_Java_frame();
3053 
3054   // restore the volatiles saved above.
3055   __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
3056   __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
3057 
3058   // pop the "unpack" frame.
3059   __ pop_frame();
3060   __ restore_return_pc();
3061 
3062   // stack: (top interpreter frame, ..., optional interpreter frame,
3063   // caller of deoptee, ...).
3064 
3065   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3066   __ restore_bcp();
3067   __ restore_locals();
3068   __ restore_esp();
3069 
3070   // return to the interpreter entry point.
3071   __ z_br(Z_R14);
3072 
3073   // Make sure all code is generated
3074   masm->flush();
3075 
3076   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3077   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3078 }
3079 
3080 
3081 #ifdef COMPILER2
3082 //------------------------------generate_uncommon_trap_blob--------------------
3083 void SharedRuntime::generate_uncommon_trap_blob() {
3084   // Allocate space for the code
3085   ResourceMark rm;
3086   // Setup code generation tools
3087   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3088   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3089 
3090   Register unroll_block_reg = Z_tmp_1;
3091   Register klass_index_reg  = Z_ARG2;
3092   Register unc_trap_reg     = Z_ARG2;
3093 
3094   // stack: (deoptee, caller_of_deoptee, ...).
3095 
3096   // push a dummy "unpack" frame and call
3097   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3098   // vframe array and return the `UnrollBlock' information.
3099 
3100   // save R14 to compiled frame.
3101   __ save_return_pc();
3102   // push the "unpack_frame".
3103   __ push_frame_abi160(0);
3104 
3105   // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
3106 
3107   // set the "unpack" frame as last_Java_frame.
3108   // `Deoptimization::uncommon_trap' expects it and considers its
3109   // sender frame as the deoptee frame.
3110   __ get_PC(Z_R1_scratch);
3111   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3112 
3113   __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
3114   __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
3115   BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
3116   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
3117 
3118   __ reset_last_Java_frame();
3119 
3120   // pop the "unpack" frame
3121   __ pop_frame();
3122 
3123   // stack: (deoptee, caller_of_deoptee, ...).
3124 
3125   // save the return value.
3126   __ z_lgr(unroll_block_reg, Z_RET);
3127 
3128   // pop the deoptee frame.
3129   __ pop_frame();
3130 
3131   // stack: (caller_of_deoptee, ...).
3132 
3133 #ifdef ASSERT
3134   assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
3135   assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
3136   const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
3137 #ifndef VM_LITTLE_ENDIAN
3138   + 3
3139 #endif
3140   ;
3141   if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
3142     __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3143   } else {
3144     __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3145   }
3146   __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
3147 #endif
3148 
3149   __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
3150 
3151   // allocate new interpreter frame(s) and possibly resize the caller's frame
3152   // (no more adapters !)
3153   push_skeleton_frames(masm, false/*deopt*/,
3154                   unroll_block_reg,
3155                   Z_tmp_2,
3156                   Z_tmp_3,
3157                   Z_tmp_4,
3158                   Z_ARG5,
3159                   Z_ARG4);
3160 
3161   // stack: (skeletal interpreter frame, ..., optional skeletal
3162   // interpreter frame, (resized) caller of deoptee, ...).
3163 
3164   // push a dummy "unpack" frame taking care of float return values.
3165   // call `Deoptimization::unpack_frames' to layout information in the
3166   // interpreter frames just created
3167 
3168   // push the "unpack" frame
3169    const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
3170 
3171   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3172   // skeletal interpreter frame, (resized) caller of deoptee, ...).
3173 
3174   // set the "unpack" frame as last_Java_frame
3175   __ get_PC(Z_R1_scratch);
3176   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3177 
3178   // indicate it is the uncommon trap case
3179   BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
3180   __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3181   // let the unpacker layout information in the skeletal frames just allocated.
3182   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
3183 
3184   __ reset_last_Java_frame();
3185   // pop the "unpack" frame
3186   __ pop_frame();
3187   // restore LR from top interpreter frame
3188   __ restore_return_pc();
3189 
3190   // stack: (top interpreter frame, ..., optional interpreter frame,
3191   // (resized) caller of deoptee, ...).
3192 
3193   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3194   __ restore_bcp();
3195   __ restore_locals();
3196   __ restore_esp();
3197 
3198   // return to the interpreter entry point
3199   __ z_br(Z_R14);
3200 
3201   masm->flush();
3202   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
3203 }
3204 #endif // COMPILER2
3205 
3206 
3207 //------------------------------generate_handler_blob------
3208 //
3209 // Generate a special Compile2Runtime blob that saves all registers,
3210 // and setup oopmap.
3211 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
3212   assert(StubRoutines::forward_exception_entry() != NULL,
3213          "must be generated before");
3214 
3215   ResourceMark rm;
3216   OopMapSet *oop_maps = new OopMapSet();
3217   OopMap* map;
3218 
3219   // Allocate space for the code. Setup code generation tools.
3220   CodeBuffer buffer("handler_blob", 2048, 1024);
3221   MacroAssembler* masm = new MacroAssembler(&buffer);
3222 
3223   unsigned int start_off = __ offset();
3224   address call_pc = NULL;
3225   int frame_size_in_bytes;
3226 
3227   bool cause_return = (poll_type == POLL_AT_RETURN);
3228   // Make room for return address (or push it again)
3229   if (!cause_return) {
3230     __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
3231   }
3232 
3233   // Save registers, fpu state, and flags
3234   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3235 
3236   if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
3237     // Keep a copy of the return pc to detect if it gets modified.
3238     __ z_lgr(Z_R6, Z_R14);
3239   }
3240 
3241   // The following is basically a call_VM. However, we need the precise
3242   // address of the call in order to generate an oopmap. Hence, we do all the
3243   // work outselves.
3244   __ set_last_Java_frame(Z_SP, noreg);
3245 
3246   // call into the runtime to handle the safepoint poll
3247   __ call_VM_leaf(call_ptr, Z_thread);
3248 
3249 
3250   // Set an oopmap for the call site. This oopmap will map all
3251   // oop-registers and debug-info registers as callee-saved. This
3252   // will allow deoptimization at this safepoint to find all possible
3253   // debug-info recordings, as well as let GC find all oops.
3254 
3255   oop_maps->add_gc_map((int)(__ offset()-start_off), map);
3256 
3257   Label noException;
3258 
3259   __ reset_last_Java_frame();
3260 
3261   __ load_and_test_long(Z_R1, thread_(pending_exception));
3262   __ z_bre(noException);
3263 
3264   // Pending exception case, used (sporadically) by
3265   // api/java_lang/Thread.State/index#ThreadState et al.
3266   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3267 
3268   // Jump to forward_exception_entry, with the issuing PC in Z_R14
3269   // so it looks like the original nmethod called forward_exception_entry.
3270   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3271   __ z_br(Z_R1_scratch);
3272 
3273   // No exception case
3274   __ bind(noException);
3275 
3276   if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
3277     Label no_adjust;
3278      // If our stashed return pc was modified by the runtime we avoid touching it
3279     const int offset_of_return_pc = _z_abi16(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers);
3280     __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
3281     __ z_brne(no_adjust);
3282 
3283     // Adjust return pc forward to step over the safepoint poll instruction
3284     __ instr_size(Z_R1_scratch, Z_R6);
3285     __ z_agr(Z_R6, Z_R1_scratch);
3286     __ z_stg(Z_R6, offset_of_return_pc, Z_SP);
3287 
3288     __ bind(no_adjust);
3289   }
3290 
3291   // Normal exit, restore registers and exit.
3292   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3293 
3294   __ z_br(Z_R14);
3295 
3296   // Make sure all code is generated
3297   masm->flush();
3298 
3299   // Fill-out other meta info
3300   return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3301 }
3302 
3303 
3304 //
3305 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3306 //
3307 // Generate a stub that calls into vm to find out the proper destination
3308 // of a Java call. All the argument registers are live at this point
3309 // but since this is generic code we don't know what they are and the caller
3310 // must do any gc of the args.
3311 //
3312 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3313   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3314 
3315   // allocate space for the code
3316   ResourceMark rm;
3317 
3318   CodeBuffer buffer(name, 1000, 512);
3319   MacroAssembler* masm                = new MacroAssembler(&buffer);
3320 
3321   OopMapSet *oop_maps = new OopMapSet();
3322   OopMap* map = NULL;
3323 
3324   unsigned int start_off = __ offset();
3325 
3326   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3327 
3328   // We must save a PC from within the stub as return PC
3329   // C code doesn't store the LR where we expect the PC,
3330   // so we would run into trouble upon stack walking.
3331   __ get_PC(Z_R1_scratch);
3332 
3333   unsigned int frame_complete = __ offset();
3334 
3335   __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
3336 
3337   __ call_VM_leaf(destination, Z_thread, Z_method);
3338 
3339 
3340   // Set an oopmap for the call site.
3341   // We need this not only for callee-saved registers, but also for volatile
3342   // registers that the compiler might be keeping live across a safepoint.
3343 
3344   oop_maps->add_gc_map((int)(frame_complete-start_off), map);
3345 
3346   // clear last_Java_sp
3347   __ reset_last_Java_frame();
3348 
3349   // check for pending exceptions
3350   Label pending;
3351   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3352   __ z_brne(pending);
3353 
3354   __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3355   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3356 
3357   // get the returned method
3358   __ get_vm_result_2(Z_method);
3359 
3360   // We are back the the original state on entry and ready to go.
3361   __ z_br(Z_R1_scratch);
3362 
3363   // Pending exception after the safepoint
3364 
3365   __ bind(pending);
3366 
3367   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3368 
3369   // exception pending => remove activation and forward to exception handler
3370 
3371   __ z_lgr(Z_R2, Z_R0); // pending_exception
3372   __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3373   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3374   __ z_br(Z_R1_scratch);
3375 
3376   // -------------
3377   // make sure all code is generated
3378   masm->flush();
3379 
3380   // return the blob
3381   // frame_size_words or bytes??
3382   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3383                                        oop_maps, true);
3384 
3385 }
3386 
3387 //------------------------------Montgomery multiplication------------------------
3388 //
3389 
3390 // Subtract 0:b from carry:a. Return carry.
3391 static unsigned long
3392 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3393   unsigned long i, c = 8 * (unsigned long)(len - 1);
3394   __asm__ __volatile__ (
3395     "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3396     "LGHI   0, 8               \n" // index increment (for BRXLG)
3397     "LGR    1, %[c]            \n" // index limit (for BRXLG)
3398     "0:                        \n"
3399     "LG     %[c], 0(%[i],%[a]) \n"
3400     "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3401     "STG    %[c], 0(%[i],%[a]) \n"
3402     "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3403     "SLBGR  %[c], %[c]         \n" // save carry - 1
3404     : [i]"=&a"(i), [c]"+r"(c)
3405     : [a]"a"(a), [b]"a"(b)
3406     : "cc", "memory", "r0", "r1"
3407  );
3408   return carry + c;
3409 }
3410 
3411 // Multiply (unsigned) Long A by Long B, accumulating the double-
3412 // length result into the accumulator formed of T0, T1, and T2.
3413 inline void MACC(unsigned long A[], long A_ind,
3414                  unsigned long B[], long B_ind,
3415                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3416   long A_si = 8 * A_ind,
3417        B_si = 8 * B_ind;
3418   __asm__ __volatile__ (
3419     "LG     1, 0(%[A_si],%[A]) \n"
3420     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3421     "ALGR   %[T0], 1           \n"
3422     "LGHI   1, 0               \n" // r1 = 0
3423     "ALCGR  %[T1], 0           \n"
3424     "ALCGR  %[T2], 1           \n"
3425     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3426     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3427     : "cc", "r0", "r1"
3428  );
3429 }
3430 
3431 // As above, but add twice the double-length result into the
3432 // accumulator.
3433 inline void MACC2(unsigned long A[], long A_ind,
3434                   unsigned long B[], long B_ind,
3435                   unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3436   const unsigned long zero = 0;
3437   long A_si = 8 * A_ind,
3438        B_si = 8 * B_ind;
3439   __asm__ __volatile__ (
3440     "LG     1, 0(%[A_si],%[A]) \n"
3441     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3442     "ALGR   %[T0], 1           \n"
3443     "ALCGR  %[T1], 0           \n"
3444     "ALCGR  %[T2], %[zero]     \n"
3445     "ALGR   %[T0], 1           \n"
3446     "ALCGR  %[T1], 0           \n"
3447     "ALCGR  %[T2], %[zero]     \n"
3448     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3449     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3450     : "cc", "r0", "r1"
3451  );
3452 }
3453 
3454 // Fast Montgomery multiplication. The derivation of the algorithm is
3455 // in "A Cryptographic Library for the Motorola DSP56000,
3456 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3457 static void
3458 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3459                     unsigned long m[], unsigned long inv, int len) {
3460   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3461   int i;
3462 
3463   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3464 
3465   for (i = 0; i < len; i++) {
3466     int j;
3467     for (j = 0; j < i; j++) {
3468       MACC(a, j, b, i-j, t0, t1, t2);
3469       MACC(m, j, n, i-j, t0, t1, t2);
3470     }
3471     MACC(a, i, b, 0, t0, t1, t2);
3472     m[i] = t0 * inv;
3473     MACC(m, i, n, 0, t0, t1, t2);
3474 
3475     assert(t0 == 0, "broken Montgomery multiply");
3476 
3477     t0 = t1; t1 = t2; t2 = 0;
3478   }
3479 
3480   for (i = len; i < 2 * len; i++) {
3481     int j;
3482     for (j = i - len + 1; j < len; j++) {
3483       MACC(a, j, b, i-j, t0, t1, t2);
3484       MACC(m, j, n, i-j, t0, t1, t2);
3485     }
3486     m[i-len] = t0;
3487     t0 = t1; t1 = t2; t2 = 0;
3488   }
3489 
3490   while (t0) {
3491     t0 = sub(m, n, t0, len);
3492   }
3493 }
3494 
3495 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3496 // multiplies so it should be up to 25% faster than Montgomery
3497 // multiplication. However, its loop control is more complex and it
3498 // may actually run slower on some machines.
3499 static void
3500 montgomery_square(unsigned long a[], unsigned long n[],
3501                   unsigned long m[], unsigned long inv, int len) {
3502   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3503   int i;
3504 
3505   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3506 
3507   for (i = 0; i < len; i++) {
3508     int j;
3509     int end = (i+1)/2;
3510     for (j = 0; j < end; j++) {
3511       MACC2(a, j, a, i-j, t0, t1, t2);
3512       MACC(m, j, n, i-j, t0, t1, t2);
3513     }
3514     if ((i & 1) == 0) {
3515       MACC(a, j, a, j, t0, t1, t2);
3516     }
3517     for (; j < i; j++) {
3518       MACC(m, j, n, i-j, t0, t1, t2);
3519     }
3520     m[i] = t0 * inv;
3521     MACC(m, i, n, 0, t0, t1, t2);
3522 
3523     assert(t0 == 0, "broken Montgomery square");
3524 
3525     t0 = t1; t1 = t2; t2 = 0;
3526   }
3527 
3528   for (i = len; i < 2*len; i++) {
3529     int start = i-len+1;
3530     int end = start + (len - start)/2;
3531     int j;
3532     for (j = start; j < end; j++) {
3533       MACC2(a, j, a, i-j, t0, t1, t2);
3534       MACC(m, j, n, i-j, t0, t1, t2);
3535     }
3536     if ((i & 1) == 0) {
3537       MACC(a, j, a, j, t0, t1, t2);
3538     }
3539     for (; j < len; j++) {
3540       MACC(m, j, n, i-j, t0, t1, t2);
3541     }
3542     m[i-len] = t0;
3543     t0 = t1; t1 = t2; t2 = 0;
3544   }
3545 
3546   while (t0) {
3547     t0 = sub(m, n, t0, len);
3548   }
3549 }
3550 
3551 // The threshold at which squaring is advantageous was determined
3552 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3553 // Value seems to be ok for other platforms, too.
3554 #define MONTGOMERY_SQUARING_THRESHOLD 64
3555 
3556 // Copy len longwords from s to d, word-swapping as we go. The
3557 // destination array is reversed.
3558 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3559   d += len;
3560   while(len-- > 0) {
3561     d--;
3562     unsigned long s_val = *s;
3563     // Swap words in a longword on little endian machines.
3564 #ifdef VM_LITTLE_ENDIAN
3565      Unimplemented();
3566 #endif
3567     *d = s_val;
3568     s++;
3569   }
3570 }
3571 
3572 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3573                                         jint len, jlong inv,
3574                                         jint *m_ints) {
3575   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3576   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3577   int longwords = len/2;
3578 
3579   // Make very sure we don't use so much space that the stack might
3580   // overflow. 512 jints corresponds to an 16384-bit integer and
3581   // will use here a total of 8k bytes of stack space.
3582   int total_allocation = longwords * sizeof (unsigned long) * 4;
3583   guarantee(total_allocation <= 8192, "must be");
3584   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3585 
3586   // Local scratch arrays
3587   unsigned long
3588     *a = scratch + 0 * longwords,
3589     *b = scratch + 1 * longwords,
3590     *n = scratch + 2 * longwords,
3591     *m = scratch + 3 * longwords;
3592 
3593   reverse_words((unsigned long *)a_ints, a, longwords);
3594   reverse_words((unsigned long *)b_ints, b, longwords);
3595   reverse_words((unsigned long *)n_ints, n, longwords);
3596 
3597   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3598 
3599   reverse_words(m, (unsigned long *)m_ints, longwords);
3600 }
3601 
3602 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3603                                       jint len, jlong inv,
3604                                       jint *m_ints) {
3605   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3606   assert(len % 2 == 0, "array length in montgomery_square must be even");
3607   int longwords = len/2;
3608 
3609   // Make very sure we don't use so much space that the stack might
3610   // overflow. 512 jints corresponds to an 16384-bit integer and
3611   // will use here a total of 6k bytes of stack space.
3612   int total_allocation = longwords * sizeof (unsigned long) * 3;
3613   guarantee(total_allocation <= 8192, "must be");
3614   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3615 
3616   // Local scratch arrays
3617   unsigned long
3618     *a = scratch + 0 * longwords,
3619     *n = scratch + 1 * longwords,
3620     *m = scratch + 2 * longwords;
3621 
3622   reverse_words((unsigned long *)a_ints, a, longwords);
3623   reverse_words((unsigned long *)n_ints, n, longwords);
3624 
3625   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3626     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3627   } else {
3628     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3629   }
3630 
3631   reverse_words(m, (unsigned long *)m_ints, longwords);
3632 }
3633 
3634 extern "C"
3635 int SpinPause() {
3636   return 0;
3637 }