1 /*
   2  * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2019, SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "gc/shared/gcLocker.hpp"
  32 #include "interpreter/interpreter.hpp"
  33 #include "interpreter/interp_masm.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "oops/compiledICHolder.hpp"
  36 #include "registerSaver_s390.hpp"
  37 #include "runtime/safepointMechanism.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/vframeArray.hpp"
  40 #include "utilities/align.hpp"
  41 #include "vmreg_s390.inline.hpp"
  42 #ifdef COMPILER1
  43 #include "c1/c1_Runtime1.hpp"
  44 #endif
  45 #ifdef COMPILER2
  46 #include "opto/ad.hpp"
  47 #include "opto/runtime.hpp"
  48 #endif
  49 
  50 #ifdef PRODUCT
  51 #define __ masm->
  52 #else
  53 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
  54 #endif
  55 
  56 #define BLOCK_COMMENT(str) __ block_comment(str)
  57 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  58 
  59 #define RegisterSaver_LiveIntReg(regname) \
  60   { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
  61 
  62 #define RegisterSaver_LiveFloatReg(regname) \
  63   { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
  64 
  65 // Registers which are not saved/restored, but still they have got a frame slot.
  66 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
  67 #define RegisterSaver_ExcludedIntReg(regname) \
  68   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  69 
  70 // Registers which are not saved/restored, but still they have got a frame slot.
  71 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
  72 #define RegisterSaver_ExcludedFloatReg(regname) \
  73   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  74 
  75 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
  76   // Live registers which get spilled to the stack. Register positions
  77   // in this array correspond directly to the stack layout.
  78   //
  79   // live float registers:
  80   //
  81   RegisterSaver_LiveFloatReg(Z_F0 ),
  82   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  83   RegisterSaver_LiveFloatReg(Z_F2 ),
  84   RegisterSaver_LiveFloatReg(Z_F3 ),
  85   RegisterSaver_LiveFloatReg(Z_F4 ),
  86   RegisterSaver_LiveFloatReg(Z_F5 ),
  87   RegisterSaver_LiveFloatReg(Z_F6 ),
  88   RegisterSaver_LiveFloatReg(Z_F7 ),
  89   RegisterSaver_LiveFloatReg(Z_F8 ),
  90   RegisterSaver_LiveFloatReg(Z_F9 ),
  91   RegisterSaver_LiveFloatReg(Z_F10),
  92   RegisterSaver_LiveFloatReg(Z_F11),
  93   RegisterSaver_LiveFloatReg(Z_F12),
  94   RegisterSaver_LiveFloatReg(Z_F13),
  95   RegisterSaver_LiveFloatReg(Z_F14),
  96   RegisterSaver_LiveFloatReg(Z_F15),
  97   //
  98   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
  99   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 100   RegisterSaver_LiveIntReg(Z_R2 ),
 101   RegisterSaver_LiveIntReg(Z_R3 ),
 102   RegisterSaver_LiveIntReg(Z_R4 ),
 103   RegisterSaver_LiveIntReg(Z_R5 ),
 104   RegisterSaver_LiveIntReg(Z_R6 ),
 105   RegisterSaver_LiveIntReg(Z_R7 ),
 106   RegisterSaver_LiveIntReg(Z_R8 ),
 107   RegisterSaver_LiveIntReg(Z_R9 ),
 108   RegisterSaver_LiveIntReg(Z_R10),
 109   RegisterSaver_LiveIntReg(Z_R11),
 110   RegisterSaver_LiveIntReg(Z_R12),
 111   RegisterSaver_LiveIntReg(Z_R13),
 112   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 113   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 114 };
 115 
 116 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
 117   // Live registers which get spilled to the stack. Register positions
 118   // in this array correspond directly to the stack layout.
 119   //
 120   // live float registers: All excluded, but still they get a stack slot to get same frame size.
 121   //
 122   RegisterSaver_ExcludedFloatReg(Z_F0 ),
 123   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 124   RegisterSaver_ExcludedFloatReg(Z_F2 ),
 125   RegisterSaver_ExcludedFloatReg(Z_F3 ),
 126   RegisterSaver_ExcludedFloatReg(Z_F4 ),
 127   RegisterSaver_ExcludedFloatReg(Z_F5 ),
 128   RegisterSaver_ExcludedFloatReg(Z_F6 ),
 129   RegisterSaver_ExcludedFloatReg(Z_F7 ),
 130   RegisterSaver_ExcludedFloatReg(Z_F8 ),
 131   RegisterSaver_ExcludedFloatReg(Z_F9 ),
 132   RegisterSaver_ExcludedFloatReg(Z_F10),
 133   RegisterSaver_ExcludedFloatReg(Z_F11),
 134   RegisterSaver_ExcludedFloatReg(Z_F12),
 135   RegisterSaver_ExcludedFloatReg(Z_F13),
 136   RegisterSaver_ExcludedFloatReg(Z_F14),
 137   RegisterSaver_ExcludedFloatReg(Z_F15),
 138   //
 139   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 140   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 141   RegisterSaver_LiveIntReg(Z_R2 ),
 142   RegisterSaver_LiveIntReg(Z_R3 ),
 143   RegisterSaver_LiveIntReg(Z_R4 ),
 144   RegisterSaver_LiveIntReg(Z_R5 ),
 145   RegisterSaver_LiveIntReg(Z_R6 ),
 146   RegisterSaver_LiveIntReg(Z_R7 ),
 147   RegisterSaver_LiveIntReg(Z_R8 ),
 148   RegisterSaver_LiveIntReg(Z_R9 ),
 149   RegisterSaver_LiveIntReg(Z_R10),
 150   RegisterSaver_LiveIntReg(Z_R11),
 151   RegisterSaver_LiveIntReg(Z_R12),
 152   RegisterSaver_LiveIntReg(Z_R13),
 153   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 154   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 155 };
 156 
 157 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
 158   // Live registers which get spilled to the stack. Register positions
 159   // in this array correspond directly to the stack layout.
 160   //
 161   // live float registers:
 162   //
 163   RegisterSaver_LiveFloatReg(Z_F0 ),
 164   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 165   RegisterSaver_LiveFloatReg(Z_F2 ),
 166   RegisterSaver_LiveFloatReg(Z_F3 ),
 167   RegisterSaver_LiveFloatReg(Z_F4 ),
 168   RegisterSaver_LiveFloatReg(Z_F5 ),
 169   RegisterSaver_LiveFloatReg(Z_F6 ),
 170   RegisterSaver_LiveFloatReg(Z_F7 ),
 171   RegisterSaver_LiveFloatReg(Z_F8 ),
 172   RegisterSaver_LiveFloatReg(Z_F9 ),
 173   RegisterSaver_LiveFloatReg(Z_F10),
 174   RegisterSaver_LiveFloatReg(Z_F11),
 175   RegisterSaver_LiveFloatReg(Z_F12),
 176   RegisterSaver_LiveFloatReg(Z_F13),
 177   RegisterSaver_LiveFloatReg(Z_F14),
 178   RegisterSaver_LiveFloatReg(Z_F15),
 179   //
 180   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 181   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 182   RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
 183   RegisterSaver_LiveIntReg(Z_R3 ),
 184   RegisterSaver_LiveIntReg(Z_R4 ),
 185   RegisterSaver_LiveIntReg(Z_R5 ),
 186   RegisterSaver_LiveIntReg(Z_R6 ),
 187   RegisterSaver_LiveIntReg(Z_R7 ),
 188   RegisterSaver_LiveIntReg(Z_R8 ),
 189   RegisterSaver_LiveIntReg(Z_R9 ),
 190   RegisterSaver_LiveIntReg(Z_R10),
 191   RegisterSaver_LiveIntReg(Z_R11),
 192   RegisterSaver_LiveIntReg(Z_R12),
 193   RegisterSaver_LiveIntReg(Z_R13),
 194   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 195   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 196 };
 197 
 198 // Live argument registers which get spilled to the stack.
 199 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
 200   RegisterSaver_LiveFloatReg(Z_FARG1),
 201   RegisterSaver_LiveFloatReg(Z_FARG2),
 202   RegisterSaver_LiveFloatReg(Z_FARG3),
 203   RegisterSaver_LiveFloatReg(Z_FARG4),
 204   RegisterSaver_LiveIntReg(Z_ARG1),
 205   RegisterSaver_LiveIntReg(Z_ARG2),
 206   RegisterSaver_LiveIntReg(Z_ARG3),
 207   RegisterSaver_LiveIntReg(Z_ARG4),
 208   RegisterSaver_LiveIntReg(Z_ARG5)
 209 };
 210 
 211 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
 212   // Live registers which get spilled to the stack. Register positions
 213   // in this array correspond directly to the stack layout.
 214   //
 215   // live float registers:
 216   //
 217   RegisterSaver_LiveFloatReg(Z_F0 ),
 218   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 219   RegisterSaver_LiveFloatReg(Z_F2 ),
 220   RegisterSaver_LiveFloatReg(Z_F3 ),
 221   RegisterSaver_LiveFloatReg(Z_F4 ),
 222   RegisterSaver_LiveFloatReg(Z_F5 ),
 223   RegisterSaver_LiveFloatReg(Z_F6 ),
 224   RegisterSaver_LiveFloatReg(Z_F7 ),
 225   // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
 226   // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
 227   // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
 228   // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
 229   // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
 230   // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
 231   // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
 232   // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
 233   //
 234   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 235   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 236   RegisterSaver_LiveIntReg(Z_R2 ),
 237   RegisterSaver_LiveIntReg(Z_R3 ),
 238   RegisterSaver_LiveIntReg(Z_R4 ),
 239   RegisterSaver_LiveIntReg(Z_R5 ),
 240   // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
 241   // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
 242   // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
 243   // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
 244   // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
 245   // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
 246   // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
 247   // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
 248   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 249   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 250 };
 251 
 252 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
 253   int reg_space = -1;
 254   switch (reg_set) {
 255     case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
 256     case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
 257     case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
 258     case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
 259     case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
 260     default: ShouldNotReachHere();
 261   }
 262   return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
 263 }
 264 
 265 
 266 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
 267   return live_reg_save_size(reg_set) + frame::z_abi_160_size;
 268 }
 269 
 270 
 271 // return_pc: Specify the register that should be stored as the return pc in the current frame.
 272 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
 273   // Record volatile registers as callee-save values in an OopMap so
 274   // their save locations will be propagated to the caller frame's
 275   // RegisterMap during StackFrameStream construction (needed for
 276   // deoptimization; see compiledVFrame::create_stack_value).
 277 
 278   // Calculate frame size.
 279   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 280   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 281   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 282 
 283   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 284   OopMap* map = new OopMap(frame_size_in_slots, 0);
 285 
 286   int regstosave_num = 0;
 287   const RegisterSaver::LiveRegType* live_regs = NULL;
 288 
 289   switch (reg_set) {
 290     case all_registers:
 291       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 292       live_regs      = RegisterSaver_LiveRegs;
 293       break;
 294     case all_registers_except_r2:
 295       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 296       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 297       break;
 298     case all_integer_registers:
 299       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 300       live_regs      = RegisterSaver_LiveIntRegs;
 301       break;
 302     case all_volatile_registers:
 303       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 304       live_regs      = RegisterSaver_LiveVolatileRegs;
 305       break;
 306     case arg_registers:
 307       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 308       live_regs      = RegisterSaver_LiveArgRegs;
 309       break;
 310     default: ShouldNotReachHere();
 311   }
 312 
 313   // Save return pc in old frame.
 314   __ save_return_pc(return_pc);
 315 
 316   // Push a new frame (includes stack linkage).
 317   // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
 318   // illegally used to pass parameters by RangeCheckStub::emit_code().
 319   __ push_frame(frame_size_in_bytes, return_pc);
 320   // We have to restore return_pc right away.
 321   // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
 322   // Nobody else knows which register we saved.
 323   __ z_lg(return_pc, _z_abi16(return_pc) + frame_size_in_bytes, Z_SP);
 324 
 325   // Register save area in new frame starts above z_abi_160 area.
 326   int offset = register_save_offset;
 327 
 328   Register first = noreg;
 329   Register last  = noreg;
 330   int      first_offset = -1;
 331   bool     float_spilled = false;
 332 
 333   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 334     int reg_num  = live_regs[i].reg_num;
 335     int reg_type = live_regs[i].reg_type;
 336 
 337     switch (reg_type) {
 338       case RegisterSaver::int_reg: {
 339         Register reg = as_Register(reg_num);
 340         if (last != reg->predecessor()) {
 341           if (first != noreg) {
 342             __ z_stmg(first, last, first_offset, Z_SP);
 343           }
 344           first = reg;
 345           first_offset = offset;
 346           DEBUG_ONLY(float_spilled = false);
 347         }
 348         last = reg;
 349         assert(last != Z_R0, "r0 would require special treatment");
 350         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 351         break;
 352       }
 353 
 354       case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
 355         continue; // Continue with next loop iteration.
 356 
 357       case RegisterSaver::float_reg: {
 358         FloatRegister freg = as_FloatRegister(reg_num);
 359         __ z_std(freg, offset, Z_SP);
 360         DEBUG_ONLY(float_spilled = true);
 361         break;
 362       }
 363 
 364       default:
 365         ShouldNotReachHere();
 366         break;
 367     }
 368 
 369     // Second set_callee_saved is really a waste but we'll keep things as they were for now
 370     map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
 371     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
 372   }
 373   assert(first != noreg, "Should spill at least one int reg.");
 374   __ z_stmg(first, last, first_offset, Z_SP);
 375 
 376   // And we're done.
 377   return map;
 378 }
 379 
 380 
 381 // Generate the OopMap (again, regs where saved before).
 382 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
 383   // Calculate frame size.
 384   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 385   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 386   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 387 
 388   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 389   OopMap* map = new OopMap(frame_size_in_slots, 0);
 390 
 391   int regstosave_num = 0;
 392   const RegisterSaver::LiveRegType* live_regs = NULL;
 393 
 394   switch (reg_set) {
 395     case all_registers:
 396       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 397       live_regs      = RegisterSaver_LiveRegs;
 398       break;
 399     case all_registers_except_r2:
 400       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 401       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 402       break;
 403     case all_integer_registers:
 404       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 405       live_regs      = RegisterSaver_LiveIntRegs;
 406       break;
 407     case all_volatile_registers:
 408       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 409       live_regs      = RegisterSaver_LiveVolatileRegs;
 410       break;
 411     case arg_registers:
 412       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 413       live_regs      = RegisterSaver_LiveArgRegs;
 414       break;
 415     default: ShouldNotReachHere();
 416   }
 417 
 418   // Register save area in new frame starts above z_abi_160 area.
 419   int offset = register_save_offset;
 420   for (int i = 0; i < regstosave_num; i++) {
 421     if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
 422       map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
 423       map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
 424     }
 425     offset += reg_size;
 426   }
 427   return map;
 428 }
 429 
 430 
 431 // Pop the current frame and restore all the registers that we saved.
 432 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
 433   int offset;
 434   const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
 435 
 436   Register first = noreg;
 437   Register last = noreg;
 438   int      first_offset = -1;
 439   bool     float_spilled = false;
 440 
 441   int regstosave_num = 0;
 442   const RegisterSaver::LiveRegType* live_regs = NULL;
 443 
 444   switch (reg_set) {
 445     case all_registers:
 446       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
 447       live_regs      = RegisterSaver_LiveRegs;
 448       break;
 449     case all_registers_except_r2:
 450       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 451       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 452       break;
 453     case all_integer_registers:
 454       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 455       live_regs      = RegisterSaver_LiveIntRegs;
 456       break;
 457     case all_volatile_registers:
 458       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
 459       live_regs      = RegisterSaver_LiveVolatileRegs;
 460       break;
 461     case arg_registers:
 462       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 463       live_regs      = RegisterSaver_LiveArgRegs;
 464       break;
 465     default: ShouldNotReachHere();
 466   }
 467 
 468   // Restore all registers (ints and floats).
 469 
 470   // Register save area in new frame starts above z_abi_160 area.
 471   offset = register_save_offset;
 472 
 473   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 474     int reg_num  = live_regs[i].reg_num;
 475     int reg_type = live_regs[i].reg_type;
 476 
 477     switch (reg_type) {
 478       case RegisterSaver::excluded_reg:
 479         continue; // Continue with next loop iteration.
 480 
 481       case RegisterSaver::int_reg: {
 482         Register reg = as_Register(reg_num);
 483         if (last != reg->predecessor()) {
 484           if (first != noreg) {
 485             __ z_lmg(first, last, first_offset, Z_SP);
 486           }
 487           first = reg;
 488           first_offset = offset;
 489           DEBUG_ONLY(float_spilled = false);
 490         }
 491         last = reg;
 492         assert(last != Z_R0, "r0 would require special treatment");
 493         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 494         break;
 495       }
 496 
 497       case RegisterSaver::float_reg: {
 498         FloatRegister freg = as_FloatRegister(reg_num);
 499         __ z_ld(freg, offset, Z_SP);
 500         DEBUG_ONLY(float_spilled = true);
 501         break;
 502       }
 503 
 504       default:
 505         ShouldNotReachHere();
 506     }
 507   }
 508   assert(first != noreg, "Should spill at least one int reg.");
 509   __ z_lmg(first, last, first_offset, Z_SP);
 510 
 511   // Pop the frame.
 512   __ pop_frame();
 513 
 514   // Restore the flags.
 515   __ restore_return_pc();
 516 }
 517 
 518 
 519 // Pop the current frame and restore the registers that might be holding a result.
 520 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 521   int i;
 522   int offset;
 523   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 524                                    sizeof(RegisterSaver::LiveRegType);
 525   const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
 526 
 527   // Restore all result registers (ints and floats).
 528   offset = register_save_offset;
 529   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 530     int reg_num = RegisterSaver_LiveRegs[i].reg_num;
 531     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 532     switch (reg_type) {
 533       case RegisterSaver::excluded_reg:
 534         continue; // Continue with next loop iteration.
 535       case RegisterSaver::int_reg: {
 536         if (as_Register(reg_num) == Z_RET) { // int result_reg
 537           __ z_lg(as_Register(reg_num), offset, Z_SP);
 538         }
 539         break;
 540       }
 541       case RegisterSaver::float_reg: {
 542         if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
 543           __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
 544         }
 545         break;
 546       }
 547       default:
 548         ShouldNotReachHere();
 549     }
 550   }
 551 }
 552 
 553 size_t SharedRuntime::trampoline_size() {
 554   return MacroAssembler::load_const_size() + 2;
 555 }
 556 
 557 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 558   // Think about using pc-relative branch.
 559   __ load_const(Z_R1_scratch, destination);
 560   __ z_br(Z_R1_scratch);
 561 }
 562 
 563 // ---------------------------------------------------------------------------
 564 void SharedRuntime::save_native_result(MacroAssembler * masm,
 565                                        BasicType ret_type,
 566                                        int frame_slots) {
 567   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 568 
 569   switch (ret_type) {
 570     case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
 571     case T_BYTE:
 572     case T_CHAR:
 573     case T_SHORT:
 574     case T_INT:
 575       __ reg2mem_opt(Z_RET, memaddr, false);
 576       break;
 577     case T_OBJECT:   // Save pointer types as long.
 578     case T_ARRAY:
 579     case T_ADDRESS:
 580     case T_VOID:
 581     case T_LONG:
 582       __ reg2mem_opt(Z_RET, memaddr);
 583       break;
 584     case T_FLOAT:
 585       __ freg2mem_opt(Z_FRET, memaddr, false);
 586       break;
 587     case T_DOUBLE:
 588       __ freg2mem_opt(Z_FRET, memaddr);
 589       break;
 590     default:
 591       ShouldNotReachHere();
 592       break;
 593   }
 594 }
 595 
 596 void SharedRuntime::restore_native_result(MacroAssembler *masm,
 597                                           BasicType       ret_type,
 598                                           int             frame_slots) {
 599   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 600 
 601   switch (ret_type) {
 602     case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
 603     case T_BYTE:
 604     case T_CHAR:
 605     case T_SHORT:
 606     case T_INT:
 607       __ mem2reg_opt(Z_RET, memaddr, false);
 608       break;
 609     case T_OBJECT:   // Restore pointer types as long.
 610     case T_ARRAY:
 611     case T_ADDRESS:
 612     case T_VOID:
 613     case T_LONG:
 614       __ mem2reg_opt(Z_RET, memaddr);
 615       break;
 616     case T_FLOAT:
 617       __ mem2freg_opt(Z_FRET, memaddr, false);
 618       break;
 619     case T_DOUBLE:
 620       __ mem2freg_opt(Z_FRET, memaddr);
 621       break;
 622     default:
 623       ShouldNotReachHere();
 624       break;
 625   }
 626 }
 627 
 628 // ---------------------------------------------------------------------------
 629 // Read the array of BasicTypes from a signature, and compute where the
 630 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
 631 // quantities. Values less than VMRegImpl::stack0 are registers, those above
 632 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
 633 // as framesizes are fixed.
 634 // VMRegImpl::stack0 refers to the first slot 0(sp).
 635 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
 636 // up to RegisterImpl::number_of_registers are the 64-bit integer registers.
 637 
 638 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 639 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
 640 // units regardless of build.
 641 
 642 // The Java calling convention is a "shifted" version of the C ABI.
 643 // By skipping the first C ABI register we can call non-static jni methods
 644 // with small numbers of arguments without having to shuffle the arguments
 645 // at all. Since we control the java ABI we ought to at least get some
 646 // advantage out of it.
 647 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 648                                            VMRegPair *regs,
 649                                            int total_args_passed,
 650                                            int is_outgoing) {
 651   // c2c calling conventions for compiled-compiled calls.
 652 
 653   // An int/float occupies 1 slot here.
 654   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
 655   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 656 
 657   const VMReg z_iarg_reg[5] = {
 658     Z_R2->as_VMReg(),
 659     Z_R3->as_VMReg(),
 660     Z_R4->as_VMReg(),
 661     Z_R5->as_VMReg(),
 662     Z_R6->as_VMReg()
 663   };
 664   const VMReg z_farg_reg[4] = {
 665     Z_F0->as_VMReg(),
 666     Z_F2->as_VMReg(),
 667     Z_F4->as_VMReg(),
 668     Z_F6->as_VMReg()
 669   };
 670   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 671   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 672 
 673   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 674   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 675 
 676   int i;
 677   int stk = 0;
 678   int ireg = 0;
 679   int freg = 0;
 680 
 681   for (int i = 0; i < total_args_passed; ++i) {
 682     switch (sig_bt[i]) {
 683       case T_BOOLEAN:
 684       case T_CHAR:
 685       case T_BYTE:
 686       case T_SHORT:
 687       case T_INT:
 688         if (ireg < z_num_iarg_registers) {
 689           // Put int/ptr in register.
 690           regs[i].set1(z_iarg_reg[ireg]);
 691           ++ireg;
 692         } else {
 693           // Put int/ptr on stack.
 694           regs[i].set1(VMRegImpl::stack2reg(stk));
 695           stk += inc_stk_for_intfloat;
 696         }
 697         break;
 698       case T_LONG:
 699         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 700         if (ireg < z_num_iarg_registers) {
 701           // Put long in register.
 702           regs[i].set2(z_iarg_reg[ireg]);
 703           ++ireg;
 704         } else {
 705           // Put long on stack and align to 2 slots.
 706           if (stk & 0x1) { ++stk; }
 707           regs[i].set2(VMRegImpl::stack2reg(stk));
 708           stk += inc_stk_for_longdouble;
 709         }
 710         break;
 711       case T_OBJECT:
 712       case T_ARRAY:
 713       case T_ADDRESS:
 714         if (ireg < z_num_iarg_registers) {
 715           // Put ptr in register.
 716           regs[i].set2(z_iarg_reg[ireg]);
 717           ++ireg;
 718         } else {
 719           // Put ptr on stack and align to 2 slots, because
 720           // "64-bit pointers record oop-ishness on 2 aligned adjacent
 721           // registers." (see OopFlow::build_oop_map).
 722           if (stk & 0x1) { ++stk; }
 723           regs[i].set2(VMRegImpl::stack2reg(stk));
 724           stk += inc_stk_for_longdouble;
 725         }
 726         break;
 727       case T_FLOAT:
 728         if (freg < z_num_farg_registers) {
 729           // Put float in register.
 730           regs[i].set1(z_farg_reg[freg]);
 731           ++freg;
 732         } else {
 733           // Put float on stack.
 734           regs[i].set1(VMRegImpl::stack2reg(stk));
 735           stk += inc_stk_for_intfloat;
 736         }
 737         break;
 738       case T_DOUBLE:
 739         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 740         if (freg < z_num_farg_registers) {
 741           // Put double in register.
 742           regs[i].set2(z_farg_reg[freg]);
 743           ++freg;
 744         } else {
 745           // Put double on stack and align to 2 slots.
 746           if (stk & 0x1) { ++stk; }
 747           regs[i].set2(VMRegImpl::stack2reg(stk));
 748           stk += inc_stk_for_longdouble;
 749         }
 750         break;
 751       case T_VOID:
 752         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 753         // Do not count halves.
 754         regs[i].set_bad();
 755         break;
 756       default:
 757         ShouldNotReachHere();
 758     }
 759   }
 760   return align_up(stk, 2);
 761 }
 762 
 763 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 764                                         VMRegPair *regs,
 765                                         VMRegPair *regs2,
 766                                         int total_args_passed) {
 767   assert(regs2 == NULL, "second VMRegPair array not used on this platform");
 768 
 769   // Calling conventions for C runtime calls and calls to JNI native methods.
 770   const VMReg z_iarg_reg[5] = {
 771     Z_R2->as_VMReg(),
 772     Z_R3->as_VMReg(),
 773     Z_R4->as_VMReg(),
 774     Z_R5->as_VMReg(),
 775     Z_R6->as_VMReg()
 776   };
 777   const VMReg z_farg_reg[4] = {
 778     Z_F0->as_VMReg(),
 779     Z_F2->as_VMReg(),
 780     Z_F4->as_VMReg(),
 781     Z_F6->as_VMReg()
 782   };
 783   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 784   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 785 
 786   // Check calling conventions consistency.
 787   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 788   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 789 
 790   // Avoid passing C arguments in the wrong stack slots.
 791 
 792   // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
 793   // 2 such slots, like 64 bit values do.
 794   const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
 795   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 796 
 797   int i;
 798   // Leave room for C-compatible ABI
 799   int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
 800   int freg = 0;
 801   int ireg = 0;
 802 
 803   // We put the first 5 arguments into registers and the rest on the
 804   // stack. Float arguments are already in their argument registers
 805   // due to c2c calling conventions (see calling_convention).
 806   for (int i = 0; i < total_args_passed; ++i) {
 807     switch (sig_bt[i]) {
 808       case T_BOOLEAN:
 809       case T_CHAR:
 810       case T_BYTE:
 811       case T_SHORT:
 812       case T_INT:
 813         // Fall through, handle as long.
 814       case T_LONG:
 815       case T_OBJECT:
 816       case T_ARRAY:
 817       case T_ADDRESS:
 818       case T_METADATA:
 819         // Oops are already boxed if required (JNI).
 820         if (ireg < z_num_iarg_registers) {
 821           regs[i].set2(z_iarg_reg[ireg]);
 822           ++ireg;
 823         } else {
 824           regs[i].set2(VMRegImpl::stack2reg(stk));
 825           stk += inc_stk_for_longdouble;
 826         }
 827         break;
 828       case T_FLOAT:
 829         if (freg < z_num_farg_registers) {
 830           regs[i].set1(z_farg_reg[freg]);
 831           ++freg;
 832         } else {
 833           regs[i].set1(VMRegImpl::stack2reg(stk+1));
 834           stk +=  inc_stk_for_intfloat;
 835         }
 836         break;
 837       case T_DOUBLE:
 838         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 839         if (freg < z_num_farg_registers) {
 840           regs[i].set2(z_farg_reg[freg]);
 841           ++freg;
 842         } else {
 843           // Put double on stack.
 844           regs[i].set2(VMRegImpl::stack2reg(stk));
 845           stk += inc_stk_for_longdouble;
 846         }
 847         break;
 848       case T_VOID:
 849         // Do not count halves.
 850         regs[i].set_bad();
 851         break;
 852       default:
 853         ShouldNotReachHere();
 854     }
 855   }
 856   return align_up(stk, 2);
 857 }
 858 
 859 ////////////////////////////////////////////////////////////////////////
 860 //
 861 //  Argument shufflers
 862 //
 863 ////////////////////////////////////////////////////////////////////////
 864 
 865 //----------------------------------------------------------------------
 866 // The java_calling_convention describes stack locations as ideal slots on
 867 // a frame with no abi restrictions. Since we must observe abi restrictions
 868 // (like the placement of the register window) the slots must be biased by
 869 // the following value.
 870 //----------------------------------------------------------------------
 871 static int reg2slot(VMReg r) {
 872   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 873 }
 874 
 875 static int reg2offset(VMReg r) {
 876   return reg2slot(r) * VMRegImpl::stack_slot_size;
 877 }
 878 
 879 static void verify_oop_args(MacroAssembler *masm,
 880                             int total_args_passed,
 881                             const BasicType *sig_bt,
 882                             const VMRegPair *regs) {
 883   if (!VerifyOops) { return; }
 884 
 885   for (int i = 0; i < total_args_passed; i++) {
 886     if (is_reference_type(sig_bt[i])) {
 887       VMReg r = regs[i].first();
 888       assert(r->is_valid(), "bad oop arg");
 889 
 890       if (r->is_stack()) {
 891         __ z_lg(Z_R0_scratch,
 892                 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
 893         __ verify_oop(Z_R0_scratch);
 894       } else {
 895         __ verify_oop(r->as_Register());
 896       }
 897     }
 898   }
 899 }
 900 
 901 static void gen_special_dispatch(MacroAssembler *masm,
 902                                  int total_args_passed,
 903                                  vmIntrinsics::ID special_dispatch,
 904                                  const BasicType *sig_bt,
 905                                  const VMRegPair *regs) {
 906   verify_oop_args(masm, total_args_passed, sig_bt, regs);
 907 
 908   // Now write the args into the outgoing interpreter space.
 909   bool     has_receiver   = false;
 910   Register receiver_reg   = noreg;
 911   int      member_arg_pos = -1;
 912   Register member_reg     = noreg;
 913   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
 914 
 915   if (ref_kind != 0) {
 916     member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
 917     member_reg = Z_R9;                       // Known to be free at this point.
 918     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 919   } else {
 920     guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch);
 921     has_receiver = true;
 922   }
 923 
 924   if (member_reg != noreg) {
 925     // Load the member_arg into register, if necessary.
 926     assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
 927     assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
 928 
 929     VMReg r = regs[member_arg_pos].first();
 930     assert(r->is_valid(), "bad member arg");
 931 
 932     if (r->is_stack()) {
 933       __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
 934     } else {
 935       // No data motion is needed.
 936       member_reg = r->as_Register();
 937     }
 938   }
 939 
 940   if (has_receiver) {
 941     // Make sure the receiver is loaded into a register.
 942     assert(total_args_passed > 0, "oob");
 943     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 944 
 945     VMReg r = regs[0].first();
 946     assert(r->is_valid(), "bad receiver arg");
 947 
 948     if (r->is_stack()) {
 949       // Porting note: This assumes that compiled calling conventions always
 950       // pass the receiver oop in a register. If this is not true on some
 951       // platform, pick a temp and load the receiver from stack.
 952       assert(false, "receiver always in a register");
 953       receiver_reg = Z_R13;  // Known to be free at this point.
 954       __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
 955     } else {
 956       // No data motion is needed.
 957       receiver_reg = r->as_Register();
 958     }
 959   }
 960 
 961   // Figure out which address we are really jumping to:
 962   MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
 963                                                  receiver_reg, member_reg,
 964                                                  /*for_compiler_entry:*/ true);
 965 }
 966 
 967 ////////////////////////////////////////////////////////////////////////
 968 //
 969 //  Argument shufflers
 970 //
 971 ////////////////////////////////////////////////////////////////////////
 972 
 973 // Is the size of a vector size (in bytes) bigger than a size saved by default?
 974 // 8 bytes registers are saved by default on z/Architecture.
 975 bool SharedRuntime::is_wide_vector(int size) {
 976   // Note, MaxVectorSize == 8 on this platform.
 977   assert(size <= 8, "%d bytes vectors are not supported", size);
 978   return size > 8;
 979 }
 980 
 981 //----------------------------------------------------------------------
 982 // An oop arg. Must pass a handle not the oop itself
 983 //----------------------------------------------------------------------
 984 static void object_move(MacroAssembler *masm,
 985                         OopMap *map,
 986                         int oop_handle_offset,
 987                         int framesize_in_slots,
 988                         VMRegPair src,
 989                         VMRegPair dst,
 990                         bool is_receiver,
 991                         int *receiver_offset) {
 992   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
 993 
 994   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
 995 
 996   // Must pass a handle. First figure out the location we use as a handle.
 997 
 998   if (src.first()->is_stack()) {
 999     // Oop is already on the stack, put handle on stack or in register
1000     // If handle will be on the stack, use temp reg to calculate it.
1001     Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1002     Label    skip;
1003     int      slot_in_older_frame = reg2slot(src.first());
1004 
1005     guarantee(!is_receiver, "expecting receiver in register");
1006     map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
1007 
1008     __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1009     __ load_and_test_long(Z_R0, Address(rHandle));
1010     __ z_brne(skip);
1011     // Use a NULL handle if oop is NULL.
1012     __ clear_reg(rHandle, true, false);
1013     __ bind(skip);
1014 
1015     // Copy handle to the right place (register or stack).
1016     if (dst.first()->is_stack()) {
1017       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1018     } // else
1019       // nothing to do. rHandle uses the correct register
1020   } else {
1021     // Oop is passed in an input register. We must flush it to the stack.
1022     const Register rOop = src.first()->as_Register();
1023     const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1024     int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1025     int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1026     NearLabel skip;
1027 
1028     if (is_receiver) {
1029       *receiver_offset = oop_slot_offset;
1030     }
1031     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1032 
1033     // Flush Oop to stack, calculate handle.
1034     __ z_stg(rOop, oop_slot_offset, Z_SP);
1035     __ add2reg(rHandle, oop_slot_offset, Z_SP);
1036 
1037     // If Oop == NULL, use a NULL handle.
1038     __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1039     __ clear_reg(rHandle, true, false);
1040     __ bind(skip);
1041 
1042     // Copy handle to the right place (register or stack).
1043     if (dst.first()->is_stack()) {
1044       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1045     } // else
1046       // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1047   }
1048 }
1049 
1050 //----------------------------------------------------------------------
1051 // A float arg. May have to do float reg to int reg conversion
1052 //----------------------------------------------------------------------
1053 static void float_move(MacroAssembler *masm,
1054                        VMRegPair src,
1055                        VMRegPair dst,
1056                        int framesize_in_slots,
1057                        int workspace_slot_offset) {
1058   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1059   int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1060 
1061   // We do not accept an argument in a VMRegPair to be spread over two slots,
1062   // no matter what physical location (reg or stack) the slots may have.
1063   // We just check for the unaccepted slot to be invalid.
1064   assert(!src.second()->is_valid(), "float in arg spread over two slots");
1065   assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1066 
1067   if (src.first()->is_stack()) {
1068     if (dst.first()->is_stack()) {
1069       // stack -> stack. The easiest of the bunch.
1070       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1071                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1072     } else {
1073       // stack to reg
1074       Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1075       if (dst.first()->is_Register()) {
1076         __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1077       } else {
1078         __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1079       }
1080     }
1081   } else if (src.first()->is_Register()) {
1082     if (dst.first()->is_stack()) {
1083       // gpr -> stack
1084       __ reg2mem_opt(src.first()->as_Register(),
1085                      Address(Z_SP, reg2offset(dst.first()), false ));
1086     } else {
1087       if (dst.first()->is_Register()) {
1088         // gpr -> gpr
1089         __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1090                               src.first()->as_Register(), T_INT);
1091       } else {
1092         if (VM_Version::has_FPSupportEnhancements()) {
1093           // gpr -> fpr. Exploit z10 capability of direct transfer.
1094           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1095         } else {
1096           // gpr -> fpr. Use work space on stack to transfer data.
1097           Address   stackaddr(Z_SP, workspace_offset);
1098 
1099           __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1100           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1101         }
1102       }
1103     }
1104   } else {
1105     if (dst.first()->is_stack()) {
1106       // fpr -> stack
1107       __ freg2mem_opt(src.first()->as_FloatRegister(),
1108                       Address(Z_SP, reg2offset(dst.first())), false);
1109     } else {
1110       if (dst.first()->is_Register()) {
1111         if (VM_Version::has_FPSupportEnhancements()) {
1112           // fpr -> gpr.
1113           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1114         } else {
1115           // fpr -> gpr. Use work space on stack to transfer data.
1116           Address   stackaddr(Z_SP, workspace_offset);
1117 
1118           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1119           __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1120         }
1121       } else {
1122         // fpr -> fpr
1123         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1124                                src.first()->as_FloatRegister(), T_FLOAT);
1125       }
1126     }
1127   }
1128 }
1129 
1130 //----------------------------------------------------------------------
1131 // A double arg. May have to do double reg to long reg conversion
1132 //----------------------------------------------------------------------
1133 static void double_move(MacroAssembler *masm,
1134                         VMRegPair src,
1135                         VMRegPair dst,
1136                         int framesize_in_slots,
1137                         int workspace_slot_offset) {
1138   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1139   int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1140 
1141   // Since src is always a java calling convention we know that the
1142   // src pair is always either all registers or all stack (and aligned?)
1143 
1144   if (src.first()->is_stack()) {
1145     if (dst.first()->is_stack()) {
1146       // stack -> stack. The easiest of the bunch.
1147       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1148                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1149     } else {
1150       // stack to reg
1151       Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1152 
1153       if (dst.first()->is_Register()) {
1154         __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1155       } else {
1156         __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1157       }
1158     }
1159   } else if (src.first()->is_Register()) {
1160     if (dst.first()->is_stack()) {
1161       // gpr -> stack
1162       __ reg2mem_opt(src.first()->as_Register(),
1163                      Address(Z_SP, reg2offset(dst.first())));
1164     } else {
1165       if (dst.first()->is_Register()) {
1166         // gpr -> gpr
1167         __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1168                               src.first()->as_Register(), T_LONG);
1169       } else {
1170         if (VM_Version::has_FPSupportEnhancements()) {
1171           // gpr -> fpr. Exploit z10 capability of direct transfer.
1172           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1173         } else {
1174           // gpr -> fpr. Use work space on stack to transfer data.
1175           Address stackaddr(Z_SP, workspace_offset);
1176           __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1177           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1178         }
1179       }
1180     }
1181   } else {
1182     if (dst.first()->is_stack()) {
1183       // fpr -> stack
1184       __ freg2mem_opt(src.first()->as_FloatRegister(),
1185                       Address(Z_SP, reg2offset(dst.first())));
1186     } else {
1187       if (dst.first()->is_Register()) {
1188         if (VM_Version::has_FPSupportEnhancements()) {
1189           // fpr -> gpr. Exploit z10 capability of direct transfer.
1190           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1191         } else {
1192           // fpr -> gpr. Use work space on stack to transfer data.
1193           Address stackaddr(Z_SP, workspace_offset);
1194 
1195           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1196           __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1197         }
1198       } else {
1199         // fpr -> fpr
1200         // In theory these overlap but the ordering is such that this is likely a nop.
1201         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1202                                src.first()->as_FloatRegister(), T_DOUBLE);
1203       }
1204     }
1205   }
1206 }
1207 
1208 //----------------------------------------------------------------------
1209 // A long arg.
1210 //----------------------------------------------------------------------
1211 static void long_move(MacroAssembler *masm,
1212                       VMRegPair src,
1213                       VMRegPair dst,
1214                       int framesize_in_slots) {
1215   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1216 
1217   if (src.first()->is_stack()) {
1218     if (dst.first()->is_stack()) {
1219       // stack -> stack. The easiest of the bunch.
1220       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1221                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1222     } else {
1223       // stack to reg
1224       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1225       __ mem2reg_opt(dst.first()->as_Register(),
1226                       Address(Z_SP, reg2offset(src.first()) + frame_offset));
1227     }
1228   } else {
1229     // reg to reg
1230     assert(src.first()->is_Register(), "long src value must be in GPR");
1231     if (dst.first()->is_stack()) {
1232       // reg -> stack
1233       __ reg2mem_opt(src.first()->as_Register(),
1234                      Address(Z_SP, reg2offset(dst.first())));
1235     } else {
1236       // reg -> reg
1237       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1238       __ move_reg_if_needed(dst.first()->as_Register(),
1239                             T_LONG, src.first()->as_Register(), T_LONG);
1240     }
1241   }
1242 }
1243 
1244 
1245 //----------------------------------------------------------------------
1246 // A int-like arg.
1247 //----------------------------------------------------------------------
1248 // On z/Architecture we will store integer like items to the stack as 64 bit
1249 // items, according to the z/Architecture ABI, even though Java would only store
1250 // 32 bits for a parameter.
1251 // We do sign extension for all base types. That is ok since the only
1252 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1253 // Sign extension 32->64 bit will thus not affect the value.
1254 //----------------------------------------------------------------------
1255 static void move32_64(MacroAssembler *masm,
1256                       VMRegPair src,
1257                       VMRegPair dst,
1258                       int framesize_in_slots) {
1259   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1260 
1261   if (src.first()->is_stack()) {
1262     Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1263     if (dst.first()->is_stack()) {
1264       // stack -> stack. MVC not posible due to sign extension.
1265       Address firstaddr(Z_SP, reg2offset(dst.first()));
1266       __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1267       __ reg2mem_opt(Z_R0_scratch, firstaddr);
1268     } else {
1269       // stack -> reg, sign extended
1270       __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1271     }
1272   } else {
1273     if (dst.first()->is_stack()) {
1274       // reg -> stack, sign extended
1275       Address firstaddr(Z_SP, reg2offset(dst.first()));
1276       __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1277       __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1278     } else {
1279       // reg -> reg, sign extended
1280       __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1281     }
1282   }
1283 }
1284 
1285 static void save_or_restore_arguments(MacroAssembler *masm,
1286                                       const int stack_slots,
1287                                       const int total_in_args,
1288                                       const int arg_save_area,
1289                                       OopMap *map,
1290                                       VMRegPair *in_regs,
1291                                       BasicType *in_sig_bt) {
1292 
1293   // If map is non-NULL then the code should store the values,
1294   // otherwise it should load them.
1295   int slot = arg_save_area;
1296   // Handle double words first.
1297   for (int i = 0; i < total_in_args; i++) {
1298     if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
1299       int offset = slot * VMRegImpl::stack_slot_size;
1300       slot += VMRegImpl::slots_per_word;
1301       assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
1302       const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1303       Address   stackaddr(Z_SP, offset);
1304       if (map != NULL) {
1305         __ freg2mem_opt(freg, stackaddr);
1306       } else {
1307         __ mem2freg_opt(freg, stackaddr);
1308       }
1309     } else if (in_regs[i].first()->is_Register() &&
1310                (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
1311       int offset = slot * VMRegImpl::stack_slot_size;
1312       const Register   reg = in_regs[i].first()->as_Register();
1313       if (map != NULL) {
1314         __ z_stg(reg, offset, Z_SP);
1315         if (in_sig_bt[i] == T_ARRAY) {
1316           map->set_oop(VMRegImpl::stack2reg(slot));
1317         }
1318       } else {
1319         __ z_lg(reg, offset, Z_SP);
1320       }
1321       slot += VMRegImpl::slots_per_word;
1322       assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
1323     }
1324   }
1325 
1326   // Save or restore single word registers.
1327   for (int i = 0; i < total_in_args; i++) {
1328     if (in_regs[i].first()->is_Register()) {
1329       int offset = slot * VMRegImpl::stack_slot_size;
1330       // Value lives in an input register. Save it on stack.
1331       switch (in_sig_bt[i]) {
1332         case T_BOOLEAN:
1333         case T_CHAR:
1334         case T_BYTE:
1335         case T_SHORT:
1336         case T_INT: {
1337           const Register   reg = in_regs[i].first()->as_Register();
1338           Address   stackaddr(Z_SP, offset);
1339           if (map != NULL) {
1340             __ z_st(reg, stackaddr);
1341           } else {
1342             __ z_lgf(reg, stackaddr);
1343           }
1344           slot++;
1345           assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)");
1346           break;
1347         }
1348         case T_ARRAY:
1349         case T_LONG:
1350           // handled above
1351           break;
1352         case T_OBJECT:
1353         default: ShouldNotReachHere();
1354       }
1355     } else if (in_regs[i].first()->is_FloatRegister()) {
1356       if (in_sig_bt[i] == T_FLOAT) {
1357         int offset = slot * VMRegImpl::stack_slot_size;
1358         slot++;
1359         assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
1360         const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1361         Address   stackaddr(Z_SP, offset);
1362         if (map != NULL) {
1363           __ freg2mem_opt(freg, stackaddr, false);
1364         } else {
1365           __ mem2freg_opt(freg, stackaddr, false);
1366         }
1367       }
1368     } else if (in_regs[i].first()->is_stack() &&
1369                in_sig_bt[i] == T_ARRAY && map != NULL) {
1370       int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1371       map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1372     }
1373   }
1374 }
1375 
1376 // Check GCLocker::needs_gc and enter the runtime if it's true. This
1377 // keeps a new JNI critical region from starting until a GC has been
1378 // forced. Save down any oops in registers and describe them in an OopMap.
1379 static void check_needs_gc_for_critical_native(MacroAssembler   *masm,
1380                                                 const int stack_slots,
1381                                                 const int total_in_args,
1382                                                 const int arg_save_area,
1383                                                 OopMapSet *oop_maps,
1384                                                 VMRegPair *in_regs,
1385                                                 BasicType *in_sig_bt) {
1386   __ block_comment("check GCLocker::needs_gc");
1387   Label cont;
1388 
1389   // Check GCLocker::_needs_gc flag.
1390   __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
1391   __ z_cli(0, Z_R1_scratch, 0);
1392   __ z_bre(cont);
1393 
1394   // Save down any values that are live in registers and call into the
1395   // runtime to halt for a GC.
1396   OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1397 
1398   save_or_restore_arguments(masm, stack_slots, total_in_args,
1399                             arg_save_area, map, in_regs, in_sig_bt);
1400   address the_pc = __ pc();
1401   __ set_last_Java_frame(Z_SP, noreg);
1402 
1403   __ block_comment("block_for_jni_critical");
1404   __ z_lgr(Z_ARG1, Z_thread);
1405 
1406   address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
1407   __ call_c(entry_point);
1408   oop_maps->add_gc_map(__ offset(), map);
1409 
1410   __ reset_last_Java_frame();
1411 
1412   // Reload all the register arguments.
1413   save_or_restore_arguments(masm, stack_slots, total_in_args,
1414                             arg_save_area, NULL, in_regs, in_sig_bt);
1415 
1416   __ bind(cont);
1417 
1418   if (StressCriticalJNINatives) {
1419     // Stress register saving
1420     OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1421     save_or_restore_arguments(masm, stack_slots, total_in_args,
1422                               arg_save_area, map, in_regs, in_sig_bt);
1423 
1424     // Destroy argument registers.
1425     for (int i = 0; i < total_in_args; i++) {
1426       if (in_regs[i].first()->is_Register()) {
1427         // Don't set CC.
1428         __ clear_reg(in_regs[i].first()->as_Register(), true, false);
1429       } else {
1430         if (in_regs[i].first()->is_FloatRegister()) {
1431           FloatRegister fr = in_regs[i].first()->as_FloatRegister();
1432           __ z_lcdbr(fr, fr);
1433         }
1434       }
1435     }
1436 
1437     save_or_restore_arguments(masm, stack_slots, total_in_args,
1438                               arg_save_area, NULL, in_regs, in_sig_bt);
1439   }
1440 }
1441 
1442 static void move_ptr(MacroAssembler *masm,
1443                      VMRegPair src,
1444                      VMRegPair dst,
1445                      int framesize_in_slots) {
1446   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1447 
1448   if (src.first()->is_stack()) {
1449     if (dst.first()->is_stack()) {
1450       // stack to stack
1451       __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset));
1452       __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first())));
1453     } else {
1454       // stack to reg
1455       __ mem2reg_opt(dst.first()->as_Register(),
1456                      Address(Z_SP, reg2offset(src.first()) + frame_offset));
1457     }
1458   } else {
1459     if (dst.first()->is_stack()) {
1460       // reg to stack
1461     __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first())));
1462     } else {
1463     __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register());
1464     }
1465   }
1466 }
1467 
1468 // Unpack an array argument into a pointer to the body and the length
1469 // if the array is non-null, otherwise pass 0 for both.
1470 static void unpack_array_argument(MacroAssembler *masm,
1471                                    VMRegPair reg,
1472                                    BasicType in_elem_type,
1473                                    VMRegPair body_arg,
1474                                    VMRegPair length_arg,
1475                                    int framesize_in_slots) {
1476   Register tmp_reg = Z_tmp_2;
1477   Register tmp2_reg = Z_tmp_1;
1478 
1479   assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1480          "possible collision");
1481   assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1482          "possible collision");
1483 
1484   // Pass the length, ptr pair.
1485   NearLabel set_out_args;
1486   VMRegPair tmp, tmp2;
1487 
1488   tmp.set_ptr(tmp_reg->as_VMReg());
1489   tmp2.set_ptr(tmp2_reg->as_VMReg());
1490   if (reg.first()->is_stack()) {
1491     // Load the arg up from the stack.
1492     move_ptr(masm, reg, tmp, framesize_in_slots);
1493     reg = tmp;
1494   }
1495 
1496   const Register first = reg.first()->as_Register();
1497 
1498   // Don't set CC, indicate unused result.
1499   (void) __ clear_reg(tmp2_reg, true, false);
1500   if (tmp_reg != first) {
1501     __ clear_reg(tmp_reg, true, false);  // Don't set CC.
1502   }
1503   __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args);
1504   __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes()));
1505   __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first);
1506 
1507   __ bind(set_out_args);
1508   move_ptr(masm, tmp, body_arg, framesize_in_slots);
1509   move32_64(masm, tmp2, length_arg, framesize_in_slots);
1510 }
1511 
1512 //----------------------------------------------------------------------
1513 // Wrap a JNI call.
1514 //----------------------------------------------------------------------
1515 #undef USE_RESIZE_FRAME
1516 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1517                                                 const methodHandle& method,
1518                                                 int compile_id,
1519                                                 BasicType *in_sig_bt,
1520                                                 VMRegPair *in_regs,
1521                                                 BasicType ret_type,
1522                                                 address critical_entry) {
1523 #ifdef COMPILER2
1524   int total_in_args = method->size_of_parameters();
1525   if (method->is_method_handle_intrinsic()) {
1526     vmIntrinsics::ID iid = method->intrinsic_id();
1527     intptr_t start = (intptr_t) __ pc();
1528     int vep_offset = ((intptr_t) __ pc()) - start;
1529 
1530     gen_special_dispatch(masm, total_in_args,
1531                          method->intrinsic_id(), in_sig_bt, in_regs);
1532 
1533     int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1534 
1535     __ flush();
1536 
1537     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1538 
1539     return nmethod::new_native_nmethod(method,
1540                                        compile_id,
1541                                        masm->code(),
1542                                        vep_offset,
1543                                        frame_complete,
1544                                        stack_slots / VMRegImpl::slots_per_word,
1545                                        in_ByteSize(-1),
1546                                        in_ByteSize(-1),
1547                                        (OopMapSet *) NULL);
1548   }
1549 
1550 
1551   ///////////////////////////////////////////////////////////////////////
1552   //
1553   //  Precalculations before generating any code
1554   //
1555   ///////////////////////////////////////////////////////////////////////
1556 
1557   bool is_critical_native = true;
1558   address native_func = critical_entry;
1559   if (native_func == NULL) {
1560     native_func = method->native_function();
1561     is_critical_native = false;
1562   }
1563   assert(native_func != NULL, "must have function");
1564 
1565   //---------------------------------------------------------------------
1566   // We have received a description of where all the java args are located
1567   // on entry to the wrapper. We need to convert these args to where
1568   // the jni function will expect them. To figure out where they go
1569   // we convert the java signature to a C signature by inserting
1570   // the hidden arguments as arg[0] and possibly arg[1] (static method).
1571   //
1572   // The first hidden argument arg[0] is a pointer to the JNI environment.
1573   // It is generated for every call.
1574   // The second argument arg[1] to the JNI call, which is hidden for static
1575   // methods, is the boxed lock object. For static calls, the lock object
1576   // is the static method itself. The oop is constructed here. for instance
1577   // calls, the lock is performed on the object itself, the pointer of
1578   // which is passed as the first visible argument.
1579   //---------------------------------------------------------------------
1580 
1581   // Additionally, on z/Architecture we must convert integers
1582   // to longs in the C signature. We do this in advance in order to have
1583   // no trouble with indexes into the bt-arrays.
1584   // So convert the signature and registers now, and adjust the total number
1585   // of in-arguments accordingly.
1586   bool method_is_static = method->is_static();
1587   int  total_c_args     = total_in_args;
1588 
1589   if (!is_critical_native) {
1590     int n_hidden_args = method_is_static ? 2 : 1;
1591     total_c_args += n_hidden_args;
1592   } else {
1593     // No JNIEnv*, no this*, but unpacked arrays (base+length).
1594     for (int i = 0; i < total_in_args; i++) {
1595       if (in_sig_bt[i] == T_ARRAY) {
1596         total_c_args ++;
1597       }
1598     }
1599   }
1600 
1601   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1602   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1603   BasicType* in_elem_bt = NULL;
1604 
1605   // Create the signature for the C call:
1606   //   1) add the JNIEnv*
1607   //   2) add the class if the method is static
1608   //   3) copy the rest of the incoming signature (shifted by the number of
1609   //      hidden arguments)
1610 
1611   int argc = 0;
1612   if (!is_critical_native) {
1613     out_sig_bt[argc++] = T_ADDRESS;
1614     if (method->is_static()) {
1615       out_sig_bt[argc++] = T_OBJECT;
1616     }
1617 
1618     for (int i = 0; i < total_in_args; i++) {
1619       out_sig_bt[argc++] = in_sig_bt[i];
1620     }
1621   } else {
1622     in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1623     SignatureStream ss(method->signature());
1624     int o = 0;
1625     for (int i = 0; i < total_in_args; i++, o++) {
1626       if (in_sig_bt[i] == T_ARRAY) {
1627         // Arrays are passed as tuples (int, elem*).
1628         Symbol* atype = ss.as_symbol();
1629         const char* at = atype->as_C_string();
1630         if (strlen(at) == 2) {
1631           assert(at[0] == '[', "must be");
1632           switch (at[1]) {
1633             case 'B': in_elem_bt[o]  = T_BYTE; break;
1634             case 'C': in_elem_bt[o]  = T_CHAR; break;
1635             case 'D': in_elem_bt[o]  = T_DOUBLE; break;
1636             case 'F': in_elem_bt[o]  = T_FLOAT; break;
1637             case 'I': in_elem_bt[o]  = T_INT; break;
1638             case 'J': in_elem_bt[o]  = T_LONG; break;
1639             case 'S': in_elem_bt[o]  = T_SHORT; break;
1640             case 'Z': in_elem_bt[o]  = T_BOOLEAN; break;
1641             default: ShouldNotReachHere();
1642           }
1643         }
1644       } else {
1645         in_elem_bt[o] = T_VOID;
1646       }
1647       if (in_sig_bt[i] != T_VOID) {
1648         assert(in_sig_bt[i] == ss.type(), "must match");
1649         ss.next();
1650       }
1651     }
1652     assert(total_in_args == o, "must match");
1653 
1654     for (int i = 0; i < total_in_args; i++) {
1655       if (in_sig_bt[i] == T_ARRAY) {
1656         // Arrays are passed as tuples (int, elem*).
1657         out_sig_bt[argc++] = T_INT;
1658         out_sig_bt[argc++] = T_ADDRESS;
1659       } else {
1660         out_sig_bt[argc++] = in_sig_bt[i];
1661       }
1662     }
1663   }
1664 
1665   ///////////////////////////////////////////////////////////////////////
1666   // Now figure out where the args must be stored and how much stack space
1667   // they require (neglecting out_preserve_stack_slots but providing space
1668   // for storing the first five register arguments).
1669   // It's weird, see int_stk_helper.
1670   ///////////////////////////////////////////////////////////////////////
1671 
1672   //---------------------------------------------------------------------
1673   // Compute framesize for the wrapper.
1674   //
1675   // - We need to handlize all oops passed in registers.
1676   // - We must create space for them here that is disjoint from the save area.
1677   // - We always just allocate 5 words for storing down these object.
1678   //   This allows us to simply record the base and use the Ireg number to
1679   //   decide which slot to use.
1680   // - Note that the reg number used to index the stack slot is the inbound
1681   //   number, not the outbound number.
1682   // - We must shuffle args to match the native convention,
1683   //   and to include var-args space.
1684   //---------------------------------------------------------------------
1685 
1686   //---------------------------------------------------------------------
1687   // Calculate the total number of stack slots we will need:
1688   // - 1) abi requirements
1689   // - 2) outgoing args
1690   // - 3) space for inbound oop handle area
1691   // - 4) space for handlizing a klass if static method
1692   // - 5) space for a lock if synchronized method
1693   // - 6) workspace (save rtn value, int<->float reg moves, ...)
1694   // - 7) filler slots for alignment
1695   //---------------------------------------------------------------------
1696   // Here is how the space we have allocated will look like.
1697   // Since we use resize_frame, we do not create a new stack frame,
1698   // but just extend the one we got with our own data area.
1699   //
1700   // If an offset or pointer name points to a separator line, it is
1701   // assumed that addressing with offset 0 selects storage starting
1702   // at the first byte above the separator line.
1703   //
1704   //
1705   //     ...                   ...
1706   //      | caller's frame      |
1707   // FP-> |---------------------|
1708   //      | filler slots, if any|
1709   //     7| #slots == mult of 2 |
1710   //      |---------------------|
1711   //      | work space          |
1712   //     6| 2 slots = 8 bytes   |
1713   //      |---------------------|
1714   //     5| lock box (if sync)  |
1715   //      |---------------------| <- lock_slot_offset
1716   //     4| klass (if static)   |
1717   //      |---------------------| <- klass_slot_offset
1718   //     3| oopHandle area      |
1719   //      | (save area for      |
1720   //      |  critical natives)  |
1721   //      |                     |
1722   //      |                     |
1723   //      |---------------------| <- oop_handle_offset
1724   //     2| outbound memory     |
1725   //     ...                   ...
1726   //      | based arguments     |
1727   //      |---------------------|
1728   //      | vararg              |
1729   //     ...                   ...
1730   //      | area                |
1731   //      |---------------------| <- out_arg_slot_offset
1732   //     1| out_preserved_slots |
1733   //     ...                   ...
1734   //      | (z_abi spec)        |
1735   // SP-> |---------------------| <- FP_slot_offset (back chain)
1736   //     ...                   ...
1737   //
1738   //---------------------------------------------------------------------
1739 
1740   // *_slot_offset indicates offset from SP in #stack slots
1741   // *_offset      indicates offset from SP in #bytes
1742 
1743   int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
1744                     SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1745 
1746   // Now the space for the inbound oop handle area.
1747   int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
1748   if (is_critical_native) {
1749     // Critical natives may have to call out so they need a save area
1750     // for register arguments.
1751     int double_slots = 0;
1752     int single_slots = 0;
1753     for (int i = 0; i < total_in_args; i++) {
1754       if (in_regs[i].first()->is_Register()) {
1755         const Register reg = in_regs[i].first()->as_Register();
1756         switch (in_sig_bt[i]) {
1757           case T_BOOLEAN:
1758           case T_BYTE:
1759           case T_SHORT:
1760           case T_CHAR:
1761           case T_INT:
1762           // Fall through.
1763           case T_ARRAY:
1764           case T_LONG: double_slots++; break;
1765           default:  ShouldNotReachHere();
1766         }
1767       } else {
1768         if (in_regs[i].first()->is_FloatRegister()) {
1769           switch (in_sig_bt[i]) {
1770             case T_FLOAT:  single_slots++; break;
1771             case T_DOUBLE: double_slots++; break;
1772             default:  ShouldNotReachHere();
1773           }
1774         }
1775       }
1776     }  // for
1777     total_save_slots = double_slots * 2 + align_up(single_slots, 2); // Round to even.
1778   }
1779 
1780   int oop_handle_slot_offset = stack_slots;
1781   stack_slots += total_save_slots;                                        // 3)
1782 
1783   int klass_slot_offset = 0;
1784   int klass_offset      = -1;
1785   if (method_is_static && !is_critical_native) {                          // 4)
1786     klass_slot_offset  = stack_slots;
1787     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1788     stack_slots       += VMRegImpl::slots_per_word;
1789   }
1790 
1791   int lock_slot_offset = 0;
1792   int lock_offset      = -1;
1793   if (method->is_synchronized()) {                                        // 5)
1794     lock_slot_offset   = stack_slots;
1795     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1796     stack_slots       += VMRegImpl::slots_per_word;
1797   }
1798 
1799   int workspace_slot_offset= stack_slots;                                 // 6)
1800   stack_slots         += 2;
1801 
1802   // Now compute actual number of stack words we need.
1803   // Round to align stack properly.
1804   stack_slots = align_up(stack_slots,                                     // 7)
1805                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1806   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1807 
1808 
1809   ///////////////////////////////////////////////////////////////////////
1810   // Now we can start generating code
1811   ///////////////////////////////////////////////////////////////////////
1812 
1813   unsigned int wrapper_CodeStart  = __ offset();
1814   unsigned int wrapper_UEPStart;
1815   unsigned int wrapper_VEPStart;
1816   unsigned int wrapper_FrameDone;
1817   unsigned int wrapper_CRegsSet;
1818   Label     handle_pending_exception;
1819   Label     ic_miss;
1820 
1821   //---------------------------------------------------------------------
1822   // Unverified entry point (UEP)
1823   //---------------------------------------------------------------------
1824   wrapper_UEPStart = __ offset();
1825 
1826   // check ic: object class <-> cached class
1827   if (!method_is_static) __ nmethod_UEP(ic_miss);
1828   // Fill with nops (alignment of verified entry point).
1829   __ align(CodeEntryAlignment);
1830 
1831   //---------------------------------------------------------------------
1832   // Verified entry point (VEP)
1833   //---------------------------------------------------------------------
1834   wrapper_VEPStart = __ offset();
1835 
1836   if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
1837     Label L_skip_barrier;
1838     Register klass = Z_R1_scratch;
1839     // Notify OOP recorder (don't need the relocation)
1840     AddressLiteral md = __ constant_metadata_address(method->method_holder());
1841     __ load_const_optimized(klass, md.value());
1842     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
1843 
1844     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
1845     __ z_br(klass);
1846 
1847     __ bind(L_skip_barrier);
1848   }
1849 
1850   __ save_return_pc();
1851   __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1852 #ifndef USE_RESIZE_FRAME
1853   __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1854 #else
1855   __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1856                                                           // Just resize the existing one.
1857 #endif
1858 
1859   wrapper_FrameDone = __ offset();
1860 
1861   __ verify_thread();
1862 
1863   // Native nmethod wrappers never take possession of the oop arguments.
1864   // So the caller will gc the arguments.
1865   // The only thing we need an oopMap for is if the call is static.
1866   //
1867   // An OopMap for lock (and class if static), and one for the VM call itself
1868   OopMapSet  *oop_maps        = new OopMapSet();
1869   OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1870 
1871   if (is_critical_native) {
1872     check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
1873                                        oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
1874   }
1875 
1876 
1877   //////////////////////////////////////////////////////////////////////
1878   //
1879   // The Grand Shuffle
1880   //
1881   //////////////////////////////////////////////////////////////////////
1882   //
1883   // We immediately shuffle the arguments so that for any vm call we have
1884   // to make from here on out (sync slow path, jvmti, etc.) we will have
1885   // captured the oops from our caller and have a valid oopMap for them.
1886   //
1887   //--------------------------------------------------------------------
1888   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1889   // (derived from JavaThread* which is in Z_thread) and, if static,
1890   // the class mirror instead of a receiver. This pretty much guarantees that
1891   // register layout will not match. We ignore these extra arguments during
1892   // the shuffle. The shuffle is described by the two calling convention
1893   // vectors we have in our possession. We simply walk the java vector to
1894   // get the source locations and the c vector to get the destinations.
1895   //
1896   // This is a trick. We double the stack slots so we can claim
1897   // the oops in the caller's frame. Since we are sure to have
1898   // more args than the caller doubling is enough to make
1899   // sure we can capture all the incoming oop args from the caller.
1900   //--------------------------------------------------------------------
1901 
1902   // Record sp-based slot for receiver on stack for non-static methods.
1903   int receiver_offset = -1;
1904 
1905   //--------------------------------------------------------------------
1906   // We move the arguments backwards because the floating point registers
1907   // destination will always be to a register with a greater or equal
1908   // register number or the stack.
1909   //   jix is the index of the incoming Java arguments.
1910   //   cix is the index of the outgoing C arguments.
1911   //--------------------------------------------------------------------
1912 
1913 #ifdef ASSERT
1914   bool reg_destroyed[RegisterImpl::number_of_registers];
1915   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1916   for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
1917     reg_destroyed[r] = false;
1918   }
1919   for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
1920     freg_destroyed[f] = false;
1921   }
1922 #endif // ASSERT
1923 
1924   for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1925 #ifdef ASSERT
1926     if (in_regs[jix].first()->is_Register()) {
1927       assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1928     } else {
1929       if (in_regs[jix].first()->is_FloatRegister()) {
1930         assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1931       }
1932     }
1933     if (out_regs[cix].first()->is_Register()) {
1934       reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1935     } else {
1936       if (out_regs[cix].first()->is_FloatRegister()) {
1937         freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1938       }
1939     }
1940 #endif // ASSERT
1941 
1942     switch (in_sig_bt[jix]) {
1943       // Due to casting, small integers should only occur in pairs with type T_LONG.
1944       case T_BOOLEAN:
1945       case T_CHAR:
1946       case T_BYTE:
1947       case T_SHORT:
1948       case T_INT:
1949         // Move int and do sign extension.
1950         move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1951         break;
1952 
1953       case T_LONG :
1954         long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1955         break;
1956 
1957       case T_ARRAY:
1958         if (is_critical_native) {
1959           int body_arg = cix;
1960           cix -= 1; // Point to length arg.
1961           unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots);
1962           break;
1963         }
1964         // else fallthrough
1965       case T_OBJECT:
1966         assert(!is_critical_native, "no oop arguments");
1967         object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1968                     ((jix == 0) && (!method_is_static)),
1969                     &receiver_offset);
1970         break;
1971       case T_VOID:
1972         break;
1973 
1974       case T_FLOAT:
1975         float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1976         break;
1977 
1978       case T_DOUBLE:
1979         assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1980         double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1981         break;
1982 
1983       case T_ADDRESS:
1984         assert(false, "found T_ADDRESS in java args");
1985         break;
1986 
1987       default:
1988         ShouldNotReachHere();
1989     }
1990   }
1991 
1992   //--------------------------------------------------------------------
1993   // Pre-load a static method's oop into ARG2.
1994   // Used both by locking code and the normal JNI call code.
1995   //--------------------------------------------------------------------
1996   if (method_is_static && !is_critical_native) {
1997     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1998 
1999     // Now handlize the static class mirror in ARG2. It's known not-null.
2000     __ z_stg(Z_ARG2, klass_offset, Z_SP);
2001     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2002     __ add2reg(Z_ARG2, klass_offset, Z_SP);
2003   }
2004 
2005   // Get JNIEnv* which is first argument to native.
2006   if (!is_critical_native) {
2007     __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
2008   }
2009 
2010   //////////////////////////////////////////////////////////////////////
2011   // We have all of the arguments setup at this point.
2012   // We MUST NOT touch any outgoing regs from this point on.
2013   // So if we must call out we must push a new frame.
2014   //////////////////////////////////////////////////////////////////////
2015 
2016 
2017   // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
2018   // Both values represent the same position.
2019   __ get_PC(Z_R10);                // PC into register
2020   wrapper_CRegsSet = __ offset();  // and into into variable.
2021 
2022   // Z_R10 now has the pc loaded that we will use when we finally call to native.
2023 
2024   // We use the same pc/oopMap repeatedly when we call out.
2025   oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
2026 
2027   // Lock a synchronized method.
2028 
2029   if (method->is_synchronized()) {
2030     assert(!is_critical_native, "unhandled");
2031 
2032     // ATTENTION: args and Z_R10 must be preserved.
2033     Register r_oop  = Z_R11;
2034     Register r_box  = Z_R12;
2035     Register r_tmp1 = Z_R13;
2036     Register r_tmp2 = Z_R7;
2037     Label done;
2038 
2039     // Load the oop for the object or class. R_carg2_classorobject contains
2040     // either the handlized oop from the incoming arguments or the handlized
2041     // class mirror (if the method is static).
2042     __ z_lg(r_oop, 0, Z_ARG2);
2043 
2044     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
2045     // Get the lock box slot's address.
2046     __ add2reg(r_box, lock_offset, Z_SP);
2047 
2048 #ifdef ASSERT
2049     if (UseBiasedLocking)
2050       // Making the box point to itself will make it clear it went unused
2051       // but also be obviously invalid.
2052       __ z_stg(r_box, 0, r_box);
2053 #endif // ASSERT
2054 
2055     // Try fastpath for locking.
2056     // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
2057     __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
2058     __ z_bre(done);
2059 
2060     //-------------------------------------------------------------------------
2061     // None of the above fast optimizations worked so we have to get into the
2062     // slow case of monitor enter. Inline a special case of call_VM that
2063     // disallows any pending_exception.
2064     //-------------------------------------------------------------------------
2065 
2066     Register oldSP = Z_R11;
2067 
2068     __ z_lgr(oldSP, Z_SP);
2069 
2070     RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2071 
2072     // Prepare arguments for call.
2073     __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
2074     __ add2reg(Z_ARG2, lock_offset, oldSP);
2075     __ z_lgr(Z_ARG3, Z_thread);
2076 
2077     __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
2078 
2079     // Do the call.
2080     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
2081     __ call(Z_R1_scratch);
2082 
2083     __ reset_last_Java_frame();
2084 
2085     RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2086 #ifdef ASSERT
2087     { Label L;
2088       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2089       __ z_bre(L);
2090       __ stop("no pending exception allowed on exit from IR::monitorenter");
2091       __ bind(L);
2092     }
2093 #endif
2094     __ bind(done);
2095   } // lock for synchronized methods
2096 
2097 
2098   //////////////////////////////////////////////////////////////////////
2099   // Finally just about ready to make the JNI call.
2100   //////////////////////////////////////////////////////////////////////
2101 
2102   // Use that pc we placed in Z_R10 a while back as the current frame anchor.
2103   __ set_last_Java_frame(Z_SP, Z_R10);
2104 
2105   // Transition from _thread_in_Java to _thread_in_native.
2106   __ set_thread_state(_thread_in_native);
2107 
2108 
2109   //////////////////////////////////////////////////////////////////////
2110   // This is the JNI call.
2111   //////////////////////////////////////////////////////////////////////
2112 
2113   __ call_c(native_func);
2114 
2115 
2116   //////////////////////////////////////////////////////////////////////
2117   // We have survived the call once we reach here.
2118   //////////////////////////////////////////////////////////////////////
2119 
2120 
2121   //--------------------------------------------------------------------
2122   // Unpack native results.
2123   //--------------------------------------------------------------------
2124   // For int-types, we do any needed sign-extension required.
2125   // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
2126   // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
2127   // blocking or unlocking.
2128   // An OOP result (handle) is done specially in the slow-path code.
2129   //--------------------------------------------------------------------
2130   switch (ret_type) {
2131     case T_VOID:    break;         // Nothing to do!
2132     case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
2133     case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
2134     case T_LONG:    break;         // Got it where we want it (unless slow-path)
2135     case T_OBJECT:  break;         // Really a handle.
2136                                    // Cannot de-handlize until after reclaiming jvm_lock.
2137     case T_ARRAY:   break;
2138 
2139     case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
2140       __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
2141       __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
2142       break;
2143     case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
2144     case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
2145     case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
2146     case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
2147 
2148     default:
2149       ShouldNotReachHere();
2150       break;
2151   }
2152 
2153 
2154   // Switch thread to "native transition" state before reading the synchronization state.
2155   // This additional state is necessary because reading and testing the synchronization
2156   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2157   //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2158   //   - VM thread changes sync state to synchronizing and suspends threads for GC.
2159   //   - Thread A is resumed to finish this native method, but doesn't block here since it
2160   //     didn't see any synchronization in progress, and escapes.
2161 
2162   // Transition from _thread_in_native to _thread_in_native_trans.
2163   __ set_thread_state(_thread_in_native_trans);
2164 
2165   // Safepoint synchronization
2166   //--------------------------------------------------------------------
2167   // Must we block?
2168   //--------------------------------------------------------------------
2169   // Block, if necessary, before resuming in _thread_in_Java state.
2170   // In order for GC to work, don't clear the last_Java_sp until after blocking.
2171   //--------------------------------------------------------------------
2172   Label after_transition;
2173   {
2174     Label no_block, sync;
2175 
2176     save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
2177 
2178     // Force this write out before the read below.
2179     __ z_fence();
2180 
2181     __ safepoint_poll(sync, Z_R1);
2182 
2183     __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
2184     __ z_bre(no_block);
2185 
2186     // Block. Save any potential method result value before the operation and
2187     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2188     // lets us share the oopMap we used when we went native rather than create
2189     // a distinct one for this pc.
2190     //
2191     __ bind(sync);
2192     __ z_acquire();
2193 
2194     address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
2195                                              : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2196 
2197     __ call_VM_leaf(entry_point, Z_thread);
2198 
2199     if (is_critical_native) {
2200       restore_native_result(masm, ret_type, workspace_slot_offset);
2201       __ z_bru(after_transition); // No thread state transition here.
2202     }
2203     __ bind(no_block);
2204     restore_native_result(masm, ret_type, workspace_slot_offset);
2205   }
2206 
2207   //--------------------------------------------------------------------
2208   // Thread state is thread_in_native_trans. Any safepoint blocking has
2209   // already happened so we can now change state to _thread_in_Java.
2210   //--------------------------------------------------------------------
2211   // Transition from _thread_in_native_trans to _thread_in_Java.
2212   __ set_thread_state(_thread_in_Java);
2213   __ bind(after_transition);
2214 
2215 
2216   //--------------------------------------------------------------------
2217   // Reguard any pages if necessary.
2218   // Protect native result from being destroyed.
2219   //--------------------------------------------------------------------
2220 
2221   Label no_reguard;
2222 
2223   __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)),
2224            JavaThread::stack_guard_yellow_reserved_disabled);
2225 
2226   __ z_bre(no_reguard);
2227 
2228   save_native_result(masm, ret_type, workspace_slot_offset);
2229   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
2230   restore_native_result(masm, ret_type, workspace_slot_offset);
2231 
2232   __ bind(no_reguard);
2233 
2234 
2235   // Synchronized methods (slow path only)
2236   // No pending exceptions for now.
2237   //--------------------------------------------------------------------
2238   // Handle possibly pending exception (will unlock if necessary).
2239   // Native result is, if any is live, in Z_FRES or Z_RES.
2240   //--------------------------------------------------------------------
2241   // Unlock
2242   //--------------------------------------------------------------------
2243   if (method->is_synchronized()) {
2244     const Register r_oop        = Z_R11;
2245     const Register r_box        = Z_R12;
2246     const Register r_tmp1       = Z_R13;
2247     const Register r_tmp2       = Z_R7;
2248     Label done;
2249 
2250     // Get unboxed oop of class mirror or object ...
2251     int   offset = method_is_static ? klass_offset : receiver_offset;
2252 
2253     assert(offset != -1, "");
2254     __ z_lg(r_oop, offset, Z_SP);
2255 
2256     // ... and address of lock object box.
2257     __ add2reg(r_box, lock_offset, Z_SP);
2258 
2259     // Try fastpath for unlocking.
2260     __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
2261     __ z_bre(done);
2262 
2263     // Slow path for unlocking.
2264     // Save and restore any potential method result value around the unlocking operation.
2265     const Register R_exc = Z_R11;
2266 
2267     save_native_result(masm, ret_type, workspace_slot_offset);
2268 
2269     // Must save pending exception around the slow-path VM call. Since it's a
2270     // leaf call, the pending exception (if any) can be kept in a register.
2271     __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2272     assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
2273 
2274     // Must clear pending-exception before re-entering the VM. Since this is
2275     // a leaf call, pending-exception-oop can be safely kept in a register.
2276     __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
2277 
2278     // Inline a special case of call_VM that disallows any pending_exception.
2279 
2280     // Get locked oop from the handle we passed to jni.
2281     __ z_lg(Z_ARG1, offset, Z_SP);
2282     __ add2reg(Z_ARG2, lock_offset, Z_SP);
2283     __ z_lgr(Z_ARG3, Z_thread);
2284 
2285     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
2286 
2287     __ call(Z_R1_scratch);
2288 
2289 #ifdef ASSERT
2290     {
2291       Label L;
2292       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2293       __ z_bre(L);
2294       __ stop("no pending exception allowed on exit from IR::monitorexit");
2295       __ bind(L);
2296     }
2297 #endif
2298 
2299     // Check_forward_pending_exception jump to forward_exception if any pending
2300     // exception is set. The forward_exception routine expects to see the
2301     // exception in pending_exception and not in a register. Kind of clumsy,
2302     // since all folks who branch to forward_exception must have tested
2303     // pending_exception first and hence have it in a register already.
2304     __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2305     restore_native_result(masm, ret_type, workspace_slot_offset);
2306     __ z_bru(done);
2307     __ z_illtrap(0x66);
2308 
2309     __ bind(done);
2310   }
2311 
2312 
2313   //--------------------------------------------------------------------
2314   // Clear "last Java frame" SP and PC.
2315   //--------------------------------------------------------------------
2316   __ verify_thread(); // Z_thread must be correct.
2317 
2318   __ reset_last_Java_frame();
2319 
2320   // Unpack oop result, e.g. JNIHandles::resolve result.
2321   if (is_reference_type(ret_type)) {
2322     __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
2323   }
2324 
2325   if (CheckJNICalls) {
2326     // clear_pending_jni_exception_check
2327     __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
2328   }
2329 
2330   // Reset handle block.
2331   if (!is_critical_native) {
2332     __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
2333     __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
2334 
2335     // Check for pending exceptions.
2336     __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2337     __ z_brne(handle_pending_exception);
2338   }
2339 
2340 
2341   //////////////////////////////////////////////////////////////////////
2342   // Return
2343   //////////////////////////////////////////////////////////////////////
2344 
2345 
2346 #ifndef USE_RESIZE_FRAME
2347   __ pop_frame();                     // Pop wrapper frame.
2348 #else
2349   __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
2350 #endif
2351   __ restore_return_pc();             // This is the way back to the caller.
2352   __ z_br(Z_R14);
2353 
2354 
2355   //////////////////////////////////////////////////////////////////////
2356   // Out-of-line calls to the runtime.
2357   //////////////////////////////////////////////////////////////////////
2358 
2359 
2360   if (!is_critical_native) {
2361 
2362     //---------------------------------------------------------------------
2363     // Handler for pending exceptions (out-of-line).
2364     //---------------------------------------------------------------------
2365     // Since this is a native call, we know the proper exception handler
2366     // is the empty function. We just pop this frame and then jump to
2367     // forward_exception_entry. Z_R14 will contain the native caller's
2368     // return PC.
2369     __ bind(handle_pending_exception);
2370     __ pop_frame();
2371     __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2372     __ restore_return_pc();
2373     __ z_br(Z_R1_scratch);
2374 
2375     //---------------------------------------------------------------------
2376     // Handler for a cache miss (out-of-line)
2377     //---------------------------------------------------------------------
2378     __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2379   }
2380   __ flush();
2381 
2382 
2383   //////////////////////////////////////////////////////////////////////
2384   // end of code generation
2385   //////////////////////////////////////////////////////////////////////
2386 
2387 
2388   nmethod *nm = nmethod::new_native_nmethod(method,
2389                                             compile_id,
2390                                             masm->code(),
2391                                             (int)(wrapper_VEPStart-wrapper_CodeStart),
2392                                             (int)(wrapper_FrameDone-wrapper_CodeStart),
2393                                             stack_slots / VMRegImpl::slots_per_word,
2394                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2395                                             in_ByteSize(lock_offset),
2396                                             oop_maps);
2397 
2398   if (is_critical_native) {
2399     nm->set_lazy_critical_native(true);
2400   }
2401 
2402   return nm;
2403 #else
2404   ShouldNotReachHere();
2405   return NULL;
2406 #endif // COMPILER2
2407 }
2408 
2409 static address gen_c2i_adapter(MacroAssembler  *masm,
2410                                int total_args_passed,
2411                                int comp_args_on_stack,
2412                                const BasicType *sig_bt,
2413                                const VMRegPair *regs,
2414                                Label &skip_fixup) {
2415   // Before we get into the guts of the C2I adapter, see if we should be here
2416   // at all. We've come from compiled code and are attempting to jump to the
2417   // interpreter, which means the caller made a static call to get here
2418   // (vcalls always get a compiled target if there is one). Check for a
2419   // compiled target. If there is one, we need to patch the caller's call.
2420 
2421   // These two defs MUST MATCH code in gen_i2c2i_adapter!
2422   const Register ientry = Z_R11;
2423   const Register code   = Z_R11;
2424 
2425   address c2i_entrypoint;
2426   Label   patch_callsite;
2427 
2428   // Regular (verified) c2i entry point.
2429   c2i_entrypoint = __ pc();
2430 
2431   // Call patching needed?
2432   __ load_and_test_long(Z_R0_scratch, method_(code));
2433   __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2434   __ z_brne(patch_callsite);                    // Patch required if code != NULL (compiled target exists).
2435 
2436   __ bind(skip_fixup);  // Return point from patch_callsite.
2437 
2438   // Since all args are passed on the stack, total_args_passed*wordSize is the
2439   // space we need. We need ABI scratch area but we use the caller's since
2440   // it has already been allocated.
2441 
2442   const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2443   int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2444   Register  sender_SP   = Z_R10;
2445   Register  value       = Z_R12;
2446 
2447   // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2448   // In addition, frame manager expects initial_caller_sp in Z_R10.
2449   __ z_lgr(sender_SP, Z_SP);
2450 
2451   // This should always fit in 14 bit immediate.
2452   __ resize_frame(-extraspace, Z_R0_scratch);
2453 
2454   // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2455   // args. This essentially moves the callers ABI scratch area from the top to the
2456   // bottom of the arg area.
2457 
2458   int st_off =  extraspace - wordSize;
2459 
2460   // Now write the args into the outgoing interpreter space.
2461   for (int i = 0; i < total_args_passed; i++) {
2462     VMReg r_1 = regs[i].first();
2463     VMReg r_2 = regs[i].second();
2464     if (!r_1->is_valid()) {
2465       assert(!r_2->is_valid(), "");
2466       continue;
2467     }
2468     if (r_1->is_stack()) {
2469       // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2470       // We must account for it here.
2471       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2472 
2473       if (!r_2->is_valid()) {
2474         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2475       } else {
2476         // longs are given 2 64-bit slots in the interpreter,
2477         // but the data is passed in only 1 slot.
2478         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2479 #ifdef ASSERT
2480           __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2481 #endif
2482           st_off -= wordSize;
2483         }
2484         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2485       }
2486     } else {
2487       if (r_1->is_Register()) {
2488         if (!r_2->is_valid()) {
2489           __ z_st(r_1->as_Register(), st_off, Z_SP);
2490         } else {
2491           // longs are given 2 64-bit slots in the interpreter, but the
2492           // data is passed in only 1 slot.
2493           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2494 #ifdef ASSERT
2495             __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2496 #endif
2497             st_off -= wordSize;
2498           }
2499           __ z_stg(r_1->as_Register(), st_off, Z_SP);
2500         }
2501       } else {
2502         assert(r_1->is_FloatRegister(), "");
2503         if (!r_2->is_valid()) {
2504           __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2505         } else {
2506           // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2507           // data is passed in only 1 slot.
2508           // One of these should get known junk...
2509 #ifdef ASSERT
2510           __ z_lzdr(Z_F1);
2511           __ z_std(Z_F1, st_off, Z_SP);
2512 #endif
2513           st_off-=wordSize;
2514           __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2515         }
2516       }
2517     }
2518     st_off -= wordSize;
2519   }
2520 
2521 
2522   // Jump to the interpreter just as if interpreter was doing it.
2523   __ add2reg(Z_esp, st_off, Z_SP);
2524 
2525   // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2526   __ z_br(ientry);
2527 
2528 
2529   // Prevent illegal entry to out-of-line code.
2530   __ z_illtrap(0x22);
2531 
2532   // Generate out-of-line runtime call to patch caller,
2533   // then continue as interpreted.
2534 
2535   // IF you lose the race you go interpreted.
2536   // We don't see any possible endless c2i -> i2c -> c2i ...
2537   // transitions no matter how rare.
2538   __ bind(patch_callsite);
2539 
2540   RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2541   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2542   RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2543   __ z_bru(skip_fixup);
2544 
2545   // end of out-of-line code
2546 
2547   return c2i_entrypoint;
2548 }
2549 
2550 // On entry, the following registers are set
2551 //
2552 //    Z_thread  r8  - JavaThread*
2553 //    Z_method  r9  - callee's method (method to be invoked)
2554 //    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2555 //    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2556 //
2557 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2558                                     int total_args_passed,
2559                                     int comp_args_on_stack,
2560                                     const BasicType *sig_bt,
2561                                     const VMRegPair *regs) {
2562   const Register value = Z_R12;
2563   const Register ld_ptr= Z_esp;
2564 
2565   int ld_offset = total_args_passed * wordSize;
2566 
2567   // Cut-out for having no stack args.
2568   if (comp_args_on_stack) {
2569     // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2570     // registers are below. By subtracting stack0, we either get a negative
2571     // number (all values in registers) or the maximum stack slot accessed.
2572     // Convert VMRegImpl (4 byte) stack slots to words.
2573     int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2574     // Round up to miminum stack alignment, in wordSize
2575     comp_words_on_stack = align_up(comp_words_on_stack, 2);
2576 
2577     __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2578   }
2579 
2580   // Now generate the shuffle code. Pick up all register args and move the
2581   // rest through register value=Z_R12.
2582   for (int i = 0; i < total_args_passed; i++) {
2583     if (sig_bt[i] == T_VOID) {
2584       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2585       continue;
2586     }
2587 
2588     // Pick up 0, 1 or 2 words from ld_ptr.
2589     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2590            "scrambled load targets?");
2591     VMReg r_1 = regs[i].first();
2592     VMReg r_2 = regs[i].second();
2593     if (!r_1->is_valid()) {
2594       assert(!r_2->is_valid(), "");
2595       continue;
2596     }
2597     if (r_1->is_FloatRegister()) {
2598       if (!r_2->is_valid()) {
2599         __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2600         ld_offset-=wordSize;
2601       } else {
2602         // Skip the unused interpreter slot.
2603         __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2604         ld_offset -= 2 * wordSize;
2605       }
2606     } else {
2607       if (r_1->is_stack()) {
2608         // Must do a memory to memory move.
2609         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2610 
2611         if (!r_2->is_valid()) {
2612           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2613         } else {
2614           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2615           // data is passed in only 1 slot.
2616           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2617             ld_offset -= wordSize;
2618           }
2619           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2620         }
2621       } else {
2622         if (!r_2->is_valid()) {
2623           // Not sure we need to do this but it shouldn't hurt.
2624           if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) {
2625             __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2626           } else {
2627             __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2628           }
2629         } else {
2630           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2631           // data is passed in only 1 slot.
2632           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2633             ld_offset -= wordSize;
2634           }
2635           __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2636         }
2637       }
2638       ld_offset -= wordSize;
2639     }
2640   }
2641 
2642   // Jump to the compiled code just as if compiled code was doing it.
2643   // load target address from method oop:
2644   __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2645 
2646   // Store method oop into thread->callee_target.
2647   // 6243940: We might end up in handle_wrong_method if
2648   // the callee is deoptimized as we race thru here. If that
2649   // happens we don't want to take a safepoint because the
2650   // caller frame will look interpreted and arguments are now
2651   // "compiled" so it is much better to make this transition
2652   // invisible to the stack walking code. Unfortunately, if
2653   // we try and find the callee by normal means a safepoint
2654   // is possible. So we stash the desired callee in the thread
2655   // and the vm will find it there should this case occur.
2656   __ z_stg(Z_method, thread_(callee_target));
2657 
2658   __ z_br(Z_R1_scratch);
2659 }
2660 
2661 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2662                                                             int total_args_passed,
2663                                                             int comp_args_on_stack,
2664                                                             const BasicType *sig_bt,
2665                                                             const VMRegPair *regs,
2666                                                             AdapterFingerPrint* fingerprint) {
2667   __ align(CodeEntryAlignment);
2668   address i2c_entry = __ pc();
2669   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2670 
2671   address c2i_unverified_entry;
2672 
2673   Label skip_fixup;
2674   {
2675     Label ic_miss;
2676     const int klass_offset           = oopDesc::klass_offset_in_bytes();
2677     const int holder_klass_offset    = CompiledICHolder::holder_klass_offset();
2678     const int holder_metadata_offset = CompiledICHolder::holder_metadata_offset();
2679 
2680     // Out-of-line call to ic_miss handler.
2681     __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2682 
2683     // Unverified Entry Point UEP
2684     __ align(CodeEntryAlignment);
2685     c2i_unverified_entry = __ pc();
2686 
2687     // Check the pointers.
2688     if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2689       __ z_ltgr(Z_ARG1, Z_ARG1);
2690       __ z_bre(ic_miss);
2691     }
2692     __ verify_oop(Z_ARG1);
2693 
2694     // Check ic: object class <-> cached class
2695     // Compress cached class for comparison. That's more efficient.
2696     if (UseCompressedClassPointers) {
2697       __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
2698       __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2699     } else {
2700       __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2701     }
2702     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2703 
2704     // This def MUST MATCH code in gen_c2i_adapter!
2705     const Register code = Z_R11;
2706 
2707     __ z_lg(Z_method, holder_metadata_offset, Z_method);
2708     __ load_and_test_long(Z_R0, method_(code));
2709     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2710 
2711     // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2712   }
2713 
2714   address c2i_entry = __ pc();
2715 
2716   // Class initialization barrier for static methods
2717   address c2i_no_clinit_check_entry = NULL;
2718   if (VM_Version::supports_fast_class_init_checks()) {
2719     Label L_skip_barrier;
2720 
2721     { // Bypass the barrier for non-static methods
2722       __ testbit(Address(Z_method, Method::access_flags_offset()), JVM_ACC_STATIC_BIT);
2723       __ z_bfalse(L_skip_barrier); // non-static
2724     }
2725 
2726     Register klass = Z_R11;
2727     __ load_method_holder(klass, Z_method);
2728     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
2729 
2730     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
2731     __ z_br(klass);
2732 
2733     __ bind(L_skip_barrier);
2734     c2i_no_clinit_check_entry = __ pc();
2735   }
2736 
2737   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2738 
2739   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
2740 }
2741 
2742 // This function returns the adjust size (in number of words) to a c2i adapter
2743 // activation for use during deoptimization.
2744 //
2745 // Actually only compiled frames need to be adjusted, but it
2746 // doesn't harm to adjust entry and interpreter frames, too.
2747 //
2748 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2749   assert(callee_locals >= callee_parameters,
2750           "test and remove; got more parms than locals");
2751   // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2752   return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2753          frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2754 }
2755 
2756 uint SharedRuntime::out_preserve_stack_slots() {
2757   return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2758 }
2759 
2760 //
2761 // Frame generation for deopt and uncommon trap blobs.
2762 //
2763 static void push_skeleton_frame(MacroAssembler* masm,
2764                           /* Unchanged */
2765                           Register frame_sizes_reg,
2766                           Register pcs_reg,
2767                           /* Invalidate */
2768                           Register frame_size_reg,
2769                           Register pc_reg) {
2770   BLOCK_COMMENT("  push_skeleton_frame {");
2771    __ z_lg(pc_reg, 0, pcs_reg);
2772    __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2773    __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2774    Register fp = pc_reg;
2775    __ push_frame(frame_size_reg, fp);
2776 #ifdef ASSERT
2777    // The magic is required for successful walking skeletal frames.
2778    __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2779    __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2780    // Fill other slots that are supposedly not necessary with eye catchers.
2781    __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2782    __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2783    // The sender_sp of the bottom frame is set before pushing it.
2784    // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2785    // is unknown here. Luckily it is not needed before filling the frame in
2786    // layout_activation(), we assert this by setting an eye catcher (see
2787    // comments on sender_sp in frame_s390.hpp).
2788    __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2789 #endif // ASSERT
2790   BLOCK_COMMENT("  } push_skeleton_frame");
2791 }
2792 
2793 // Loop through the UnrollBlock info and create new frames.
2794 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2795                             /* read */
2796                             Register unroll_block_reg,
2797                             /* invalidate */
2798                             Register frame_sizes_reg,
2799                             Register number_of_frames_reg,
2800                             Register pcs_reg,
2801                             Register tmp1,
2802                             Register tmp2) {
2803   BLOCK_COMMENT("push_skeleton_frames {");
2804   // _number_of_frames is of type int (deoptimization.hpp).
2805   __ z_lgf(number_of_frames_reg,
2806            Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2807   __ z_lg(pcs_reg,
2808           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2809   __ z_lg(frame_sizes_reg,
2810           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2811 
2812   // stack: (caller_of_deoptee, ...).
2813 
2814   // If caller_of_deoptee is a compiled frame, then we extend it to make
2815   // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2816   // See also Deoptimization::last_frame_adjust() above.
2817   // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2818 
2819   __ z_lgf(Z_R1_scratch,
2820            Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2821   __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2822   __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2823   // The oldest skeletal frame requires a valid sender_sp to make it walkable
2824   // (it is required to find the original pc of caller_of_deoptee if it is marked
2825   // for deoptimization - see nmethod::orig_pc_addr()).
2826   __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2827 
2828   // Now push the new interpreter frames.
2829   Label loop, loop_entry;
2830 
2831   // Make sure that there is at least one entry in the array.
2832   DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2833   __ asm_assert_ne("array_size must be > 0", 0x205);
2834 
2835   __ z_bru(loop_entry);
2836 
2837   __ bind(loop);
2838 
2839   __ add2reg(frame_sizes_reg, wordSize);
2840   __ add2reg(pcs_reg, wordSize);
2841 
2842   __ bind(loop_entry);
2843 
2844   // Allocate a new frame, fill in the pc.
2845   push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2846 
2847   __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2848   __ z_brne(loop);
2849 
2850   // Set the top frame's return pc.
2851   __ add2reg(pcs_reg, wordSize);
2852   __ z_lg(Z_R0_scratch, 0, pcs_reg);
2853   __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2854   BLOCK_COMMENT("} push_skeleton_frames");
2855 }
2856 
2857 //------------------------------generate_deopt_blob----------------------------
2858 void SharedRuntime::generate_deopt_blob() {
2859   // Allocate space for the code.
2860   ResourceMark rm;
2861   // Setup code generation tools.
2862   CodeBuffer buffer("deopt_blob", 2048, 1024);
2863   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2864   Label exec_mode_initialized;
2865   OopMap* map = NULL;
2866   OopMapSet *oop_maps = new OopMapSet();
2867 
2868   unsigned int start_off = __ offset();
2869   Label cont;
2870 
2871   // --------------------------------------------------------------------------
2872   // Normal entry (non-exception case)
2873   //
2874   // We have been called from the deopt handler of the deoptee.
2875   // Z_R14 points behind the call in the deopt handler. We adjust
2876   // it such that it points to the start of the deopt handler.
2877   // The return_pc has been stored in the frame of the deoptee and
2878   // will replace the address of the deopt_handler in the call
2879   // to Deoptimization::fetch_unroll_info below.
2880   // The (int) cast is necessary, because -((unsigned int)14)
2881   // is an unsigned int.
2882   __ add2reg(Z_R14, -(int)HandlerImpl::size_deopt_handler());
2883 
2884   const Register   exec_mode_reg = Z_tmp_1;
2885 
2886   // stack: (deoptee, caller of deoptee, ...)
2887 
2888   // pushes an "unpack" frame
2889   // R14 contains the return address pointing into the deoptimized
2890   // nmethod that was valid just before the nmethod was deoptimized.
2891   // save R14 into the deoptee frame.  the `fetch_unroll_info'
2892   // procedure called below will read it from there.
2893   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2894 
2895   // note the entry point.
2896   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2897   __ z_bru(exec_mode_initialized);
2898 
2899 #ifndef COMPILER1
2900   int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2901 #else
2902   // --------------------------------------------------------------------------
2903   // Reexecute entry
2904   // - Z_R14 = Deopt Handler in nmethod
2905 
2906   int reexecute_offset = __ offset() - start_off;
2907 
2908   // No need to update map as each call to save_live_registers will produce identical oopmap
2909   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2910 
2911   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2912   __ z_bru(exec_mode_initialized);
2913 #endif
2914 
2915 
2916   // --------------------------------------------------------------------------
2917   // Exception entry. We reached here via a branch. Registers on entry:
2918   // - Z_EXC_OOP (Z_ARG1) = exception oop
2919   // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2920 
2921   int exception_offset = __ offset() - start_off;
2922 
2923   // all registers are dead at this entry point, except for Z_EXC_OOP, and
2924   // Z_EXC_PC which contain the exception oop and exception pc
2925   // respectively.  Set them in TLS and fall thru to the
2926   // unpack_with_exception_in_tls entry point.
2927 
2928   // Store exception oop and pc in thread (location known to GC).
2929   // Need this since the call to "fetch_unroll_info()" may safepoint.
2930   __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2931   __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2932 
2933   // fall through
2934 
2935   int exception_in_tls_offset = __ offset() - start_off;
2936 
2937   // new implementation because exception oop is now passed in JavaThread
2938 
2939   // Prolog for exception case
2940   // All registers must be preserved because they might be used by LinearScan
2941   // Exceptiop oop and throwing PC are passed in JavaThread
2942 
2943   // load throwing pc from JavaThread and us it as the return address of the current frame.
2944   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2945 
2946   // Save everything in sight.
2947   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2948 
2949   // Now it is safe to overwrite any register
2950 
2951   // Clear the exception pc field in JavaThread
2952   __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2953 
2954   // Deopt during an exception.  Save exec mode for unpack_frames.
2955   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2956 
2957 
2958 #ifdef ASSERT
2959   // verify that there is really an exception oop in JavaThread
2960   __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2961   __ verify_oop(Z_ARG1);
2962 
2963   // verify that there is no pending exception
2964   __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2965                              "must not have pending exception here", __LINE__);
2966 #endif
2967 
2968   // --------------------------------------------------------------------------
2969   // At this point, the live registers are saved and
2970   // the exec_mode_reg has been set up correctly.
2971   __ bind(exec_mode_initialized);
2972 
2973   // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2974 
2975   {
2976   const Register unroll_block_reg  = Z_tmp_2;
2977 
2978   // we need to set `last_Java_frame' because `fetch_unroll_info' will
2979   // call `last_Java_frame()'.  however we can't block and no gc will
2980   // occur so we don't need an oopmap. the value of the pc in the
2981   // frame is not particularly important.  it just needs to identify the blob.
2982 
2983   // Don't set last_Java_pc anymore here (is implicitly NULL then).
2984   // the correct PC is retrieved in pd_last_frame() in that case.
2985   __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2986   // With EscapeAnalysis turned on, this call may safepoint
2987   // despite it's marked as "leaf call"!
2988   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2989   // Set an oopmap for the call site this describes all our saved volatile registers
2990   int offs = __ offset();
2991   oop_maps->add_gc_map(offs, map);
2992 
2993   __ reset_last_Java_frame();
2994   // save the return value.
2995   __ z_lgr(unroll_block_reg, Z_RET);
2996   // restore the return registers that have been saved
2997   // (among other registers) by save_live_registers(...).
2998   RegisterSaver::restore_result_registers(masm);
2999 
3000   // reload the exec mode from the UnrollBlock (it might have changed)
3001   __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
3002 
3003   // In excp_deopt_mode, restore and clear exception oop which we
3004   // stored in the thread during exception entry above. The exception
3005   // oop will be the return value of this stub.
3006   NearLabel skip_restore_excp;
3007   __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
3008   __ z_lg(Z_RET, thread_(exception_oop));
3009   __ clear_mem(thread_(exception_oop), 8);
3010   __ bind(skip_restore_excp);
3011 
3012   // remove the "unpack" frame
3013   __ pop_frame();
3014 
3015   // stack: (deoptee, caller of deoptee, ...).
3016 
3017   // pop the deoptee's frame
3018   __ pop_frame();
3019 
3020   // stack: (caller_of_deoptee, ...).
3021 
3022   // loop through the `UnrollBlock' info and create interpreter frames.
3023   push_skeleton_frames(masm, true/*deopt*/,
3024                   unroll_block_reg,
3025                   Z_tmp_3,
3026                   Z_tmp_4,
3027                   Z_ARG5,
3028                   Z_ARG4,
3029                   Z_ARG3);
3030 
3031   // stack: (skeletal interpreter frame, ..., optional skeletal
3032   // interpreter frame, caller of deoptee, ...).
3033   }
3034 
3035   // push an "unpack" frame taking care of float / int return values.
3036   __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
3037 
3038   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3039   // skeletal interpreter frame, caller of deoptee, ...).
3040 
3041   // spill live volatile registers since we'll do a call.
3042   __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
3043   __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
3044 
3045   // let the unpacker layout information in the skeletal frames just allocated.
3046   __ get_PC(Z_RET);
3047   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
3048   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3049                   Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
3050 
3051   __ reset_last_Java_frame();
3052 
3053   // restore the volatiles saved above.
3054   __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
3055   __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
3056 
3057   // pop the "unpack" frame.
3058   __ pop_frame();
3059   __ restore_return_pc();
3060 
3061   // stack: (top interpreter frame, ..., optional interpreter frame,
3062   // caller of deoptee, ...).
3063 
3064   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3065   __ restore_bcp();
3066   __ restore_locals();
3067   __ restore_esp();
3068 
3069   // return to the interpreter entry point.
3070   __ z_br(Z_R14);
3071 
3072   // Make sure all code is generated
3073   masm->flush();
3074 
3075   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3076   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3077 }
3078 
3079 
3080 #ifdef COMPILER2
3081 //------------------------------generate_uncommon_trap_blob--------------------
3082 void SharedRuntime::generate_uncommon_trap_blob() {
3083   // Allocate space for the code
3084   ResourceMark rm;
3085   // Setup code generation tools
3086   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3087   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3088 
3089   Register unroll_block_reg = Z_tmp_1;
3090   Register klass_index_reg  = Z_ARG2;
3091   Register unc_trap_reg     = Z_ARG2;
3092 
3093   // stack: (deoptee, caller_of_deoptee, ...).
3094 
3095   // push a dummy "unpack" frame and call
3096   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3097   // vframe array and return the `UnrollBlock' information.
3098 
3099   // save R14 to compiled frame.
3100   __ save_return_pc();
3101   // push the "unpack_frame".
3102   __ push_frame_abi160(0);
3103 
3104   // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
3105 
3106   // set the "unpack" frame as last_Java_frame.
3107   // `Deoptimization::uncommon_trap' expects it and considers its
3108   // sender frame as the deoptee frame.
3109   __ get_PC(Z_R1_scratch);
3110   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3111 
3112   __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
3113   __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
3114   BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
3115   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
3116 
3117   __ reset_last_Java_frame();
3118 
3119   // pop the "unpack" frame
3120   __ pop_frame();
3121 
3122   // stack: (deoptee, caller_of_deoptee, ...).
3123 
3124   // save the return value.
3125   __ z_lgr(unroll_block_reg, Z_RET);
3126 
3127   // pop the deoptee frame.
3128   __ pop_frame();
3129 
3130   // stack: (caller_of_deoptee, ...).
3131 
3132 #ifdef ASSERT
3133   assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
3134   assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
3135   const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
3136 #ifndef VM_LITTLE_ENDIAN
3137   + 3
3138 #endif
3139   ;
3140   if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
3141     __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3142   } else {
3143     __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3144   }
3145   __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
3146 #endif
3147 
3148   __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
3149 
3150   // allocate new interpreter frame(s) and possibly resize the caller's frame
3151   // (no more adapters !)
3152   push_skeleton_frames(masm, false/*deopt*/,
3153                   unroll_block_reg,
3154                   Z_tmp_2,
3155                   Z_tmp_3,
3156                   Z_tmp_4,
3157                   Z_ARG5,
3158                   Z_ARG4);
3159 
3160   // stack: (skeletal interpreter frame, ..., optional skeletal
3161   // interpreter frame, (resized) caller of deoptee, ...).
3162 
3163   // push a dummy "unpack" frame taking care of float return values.
3164   // call `Deoptimization::unpack_frames' to layout information in the
3165   // interpreter frames just created
3166 
3167   // push the "unpack" frame
3168    const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
3169 
3170   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3171   // skeletal interpreter frame, (resized) caller of deoptee, ...).
3172 
3173   // set the "unpack" frame as last_Java_frame
3174   __ get_PC(Z_R1_scratch);
3175   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3176 
3177   // indicate it is the uncommon trap case
3178   BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
3179   __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3180   // let the unpacker layout information in the skeletal frames just allocated.
3181   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
3182 
3183   __ reset_last_Java_frame();
3184   // pop the "unpack" frame
3185   __ pop_frame();
3186   // restore LR from top interpreter frame
3187   __ restore_return_pc();
3188 
3189   // stack: (top interpreter frame, ..., optional interpreter frame,
3190   // (resized) caller of deoptee, ...).
3191 
3192   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3193   __ restore_bcp();
3194   __ restore_locals();
3195   __ restore_esp();
3196 
3197   // return to the interpreter entry point
3198   __ z_br(Z_R14);
3199 
3200   masm->flush();
3201   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
3202 }
3203 #endif // COMPILER2
3204 
3205 
3206 //------------------------------generate_handler_blob------
3207 //
3208 // Generate a special Compile2Runtime blob that saves all registers,
3209 // and setup oopmap.
3210 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
3211   assert(StubRoutines::forward_exception_entry() != NULL,
3212          "must be generated before");
3213 
3214   ResourceMark rm;
3215   OopMapSet *oop_maps = new OopMapSet();
3216   OopMap* map;
3217 
3218   // Allocate space for the code. Setup code generation tools.
3219   CodeBuffer buffer("handler_blob", 2048, 1024);
3220   MacroAssembler* masm = new MacroAssembler(&buffer);
3221 
3222   unsigned int start_off = __ offset();
3223   address call_pc = NULL;
3224   int frame_size_in_bytes;
3225 
3226   bool cause_return = (poll_type == POLL_AT_RETURN);
3227   // Make room for return address (or push it again)
3228   if (!cause_return) {
3229     __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
3230   }
3231 
3232   // Save registers, fpu state, and flags
3233   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3234 
3235   if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
3236     // Keep a copy of the return pc to detect if it gets modified.
3237     __ z_lgr(Z_R6, Z_R14);
3238   }
3239 
3240   // The following is basically a call_VM. However, we need the precise
3241   // address of the call in order to generate an oopmap. Hence, we do all the
3242   // work outselves.
3243   __ set_last_Java_frame(Z_SP, noreg);
3244 
3245   // call into the runtime to handle the safepoint poll
3246   __ call_VM_leaf(call_ptr, Z_thread);
3247 
3248 
3249   // Set an oopmap for the call site. This oopmap will map all
3250   // oop-registers and debug-info registers as callee-saved. This
3251   // will allow deoptimization at this safepoint to find all possible
3252   // debug-info recordings, as well as let GC find all oops.
3253 
3254   oop_maps->add_gc_map((int)(__ offset()-start_off), map);
3255 
3256   Label noException;
3257 
3258   __ reset_last_Java_frame();
3259 
3260   __ load_and_test_long(Z_R1, thread_(pending_exception));
3261   __ z_bre(noException);
3262 
3263   // Pending exception case, used (sporadically) by
3264   // api/java_lang/Thread.State/index#ThreadState et al.
3265   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3266 
3267   // Jump to forward_exception_entry, with the issuing PC in Z_R14
3268   // so it looks like the original nmethod called forward_exception_entry.
3269   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3270   __ z_br(Z_R1_scratch);
3271 
3272   // No exception case
3273   __ bind(noException);
3274 
3275   if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
3276     Label no_adjust;
3277      // If our stashed return pc was modified by the runtime we avoid touching it
3278     const int offset_of_return_pc = _z_abi16(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers);
3279     __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
3280     __ z_brne(no_adjust);
3281 
3282     // Adjust return pc forward to step over the safepoint poll instruction
3283     __ instr_size(Z_R1_scratch, Z_R6);
3284     __ z_agr(Z_R6, Z_R1_scratch);
3285     __ z_stg(Z_R6, offset_of_return_pc, Z_SP);
3286 
3287     __ bind(no_adjust);
3288   }
3289 
3290   // Normal exit, restore registers and exit.
3291   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3292 
3293   __ z_br(Z_R14);
3294 
3295   // Make sure all code is generated
3296   masm->flush();
3297 
3298   // Fill-out other meta info
3299   return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3300 }
3301 
3302 
3303 //
3304 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3305 //
3306 // Generate a stub that calls into vm to find out the proper destination
3307 // of a Java call. All the argument registers are live at this point
3308 // but since this is generic code we don't know what they are and the caller
3309 // must do any gc of the args.
3310 //
3311 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3312   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3313 
3314   // allocate space for the code
3315   ResourceMark rm;
3316 
3317   CodeBuffer buffer(name, 1000, 512);
3318   MacroAssembler* masm                = new MacroAssembler(&buffer);
3319 
3320   OopMapSet *oop_maps = new OopMapSet();
3321   OopMap* map = NULL;
3322 
3323   unsigned int start_off = __ offset();
3324 
3325   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3326 
3327   // We must save a PC from within the stub as return PC
3328   // C code doesn't store the LR where we expect the PC,
3329   // so we would run into trouble upon stack walking.
3330   __ get_PC(Z_R1_scratch);
3331 
3332   unsigned int frame_complete = __ offset();
3333 
3334   __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
3335 
3336   __ call_VM_leaf(destination, Z_thread, Z_method);
3337 
3338 
3339   // Set an oopmap for the call site.
3340   // We need this not only for callee-saved registers, but also for volatile
3341   // registers that the compiler might be keeping live across a safepoint.
3342 
3343   oop_maps->add_gc_map((int)(frame_complete-start_off), map);
3344 
3345   // clear last_Java_sp
3346   __ reset_last_Java_frame();
3347 
3348   // check for pending exceptions
3349   Label pending;
3350   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3351   __ z_brne(pending);
3352 
3353   __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3354   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3355 
3356   // get the returned method
3357   __ get_vm_result_2(Z_method);
3358 
3359   // We are back the the original state on entry and ready to go.
3360   __ z_br(Z_R1_scratch);
3361 
3362   // Pending exception after the safepoint
3363 
3364   __ bind(pending);
3365 
3366   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3367 
3368   // exception pending => remove activation and forward to exception handler
3369 
3370   __ z_lgr(Z_R2, Z_R0); // pending_exception
3371   __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3372   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3373   __ z_br(Z_R1_scratch);
3374 
3375   // -------------
3376   // make sure all code is generated
3377   masm->flush();
3378 
3379   // return the blob
3380   // frame_size_words or bytes??
3381   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3382                                        oop_maps, true);
3383 
3384 }
3385 
3386 //------------------------------Montgomery multiplication------------------------
3387 //
3388 
3389 // Subtract 0:b from carry:a. Return carry.
3390 static unsigned long
3391 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3392   unsigned long i, c = 8 * (unsigned long)(len - 1);
3393   __asm__ __volatile__ (
3394     "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3395     "LGHI   0, 8               \n" // index increment (for BRXLG)
3396     "LGR    1, %[c]            \n" // index limit (for BRXLG)
3397     "0:                        \n"
3398     "LG     %[c], 0(%[i],%[a]) \n"
3399     "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3400     "STG    %[c], 0(%[i],%[a]) \n"
3401     "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3402     "SLBGR  %[c], %[c]         \n" // save carry - 1
3403     : [i]"=&a"(i), [c]"+r"(c)
3404     : [a]"a"(a), [b]"a"(b)
3405     : "cc", "memory", "r0", "r1"
3406  );
3407   return carry + c;
3408 }
3409 
3410 // Multiply (unsigned) Long A by Long B, accumulating the double-
3411 // length result into the accumulator formed of T0, T1, and T2.
3412 inline void MACC(unsigned long A[], long A_ind,
3413                  unsigned long B[], long B_ind,
3414                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3415   long A_si = 8 * A_ind,
3416        B_si = 8 * B_ind;
3417   __asm__ __volatile__ (
3418     "LG     1, 0(%[A_si],%[A]) \n"
3419     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3420     "ALGR   %[T0], 1           \n"
3421     "LGHI   1, 0               \n" // r1 = 0
3422     "ALCGR  %[T1], 0           \n"
3423     "ALCGR  %[T2], 1           \n"
3424     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3425     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3426     : "cc", "r0", "r1"
3427  );
3428 }
3429 
3430 // As above, but add twice the double-length result into the
3431 // accumulator.
3432 inline void MACC2(unsigned long A[], long A_ind,
3433                   unsigned long B[], long B_ind,
3434                   unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3435   const unsigned long zero = 0;
3436   long A_si = 8 * A_ind,
3437        B_si = 8 * B_ind;
3438   __asm__ __volatile__ (
3439     "LG     1, 0(%[A_si],%[A]) \n"
3440     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3441     "ALGR   %[T0], 1           \n"
3442     "ALCGR  %[T1], 0           \n"
3443     "ALCGR  %[T2], %[zero]     \n"
3444     "ALGR   %[T0], 1           \n"
3445     "ALCGR  %[T1], 0           \n"
3446     "ALCGR  %[T2], %[zero]     \n"
3447     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3448     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3449     : "cc", "r0", "r1"
3450  );
3451 }
3452 
3453 // Fast Montgomery multiplication. The derivation of the algorithm is
3454 // in "A Cryptographic Library for the Motorola DSP56000,
3455 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3456 static void
3457 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3458                     unsigned long m[], unsigned long inv, int len) {
3459   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3460   int i;
3461 
3462   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3463 
3464   for (i = 0; i < len; i++) {
3465     int j;
3466     for (j = 0; j < i; j++) {
3467       MACC(a, j, b, i-j, t0, t1, t2);
3468       MACC(m, j, n, i-j, t0, t1, t2);
3469     }
3470     MACC(a, i, b, 0, t0, t1, t2);
3471     m[i] = t0 * inv;
3472     MACC(m, i, n, 0, t0, t1, t2);
3473 
3474     assert(t0 == 0, "broken Montgomery multiply");
3475 
3476     t0 = t1; t1 = t2; t2 = 0;
3477   }
3478 
3479   for (i = len; i < 2 * len; i++) {
3480     int j;
3481     for (j = i - len + 1; j < len; j++) {
3482       MACC(a, j, b, i-j, t0, t1, t2);
3483       MACC(m, j, n, i-j, t0, t1, t2);
3484     }
3485     m[i-len] = t0;
3486     t0 = t1; t1 = t2; t2 = 0;
3487   }
3488 
3489   while (t0) {
3490     t0 = sub(m, n, t0, len);
3491   }
3492 }
3493 
3494 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3495 // multiplies so it should be up to 25% faster than Montgomery
3496 // multiplication. However, its loop control is more complex and it
3497 // may actually run slower on some machines.
3498 static void
3499 montgomery_square(unsigned long a[], unsigned long n[],
3500                   unsigned long m[], unsigned long inv, int len) {
3501   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3502   int i;
3503 
3504   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3505 
3506   for (i = 0; i < len; i++) {
3507     int j;
3508     int end = (i+1)/2;
3509     for (j = 0; j < end; j++) {
3510       MACC2(a, j, a, i-j, t0, t1, t2);
3511       MACC(m, j, n, i-j, t0, t1, t2);
3512     }
3513     if ((i & 1) == 0) {
3514       MACC(a, j, a, j, t0, t1, t2);
3515     }
3516     for (; j < i; j++) {
3517       MACC(m, j, n, i-j, t0, t1, t2);
3518     }
3519     m[i] = t0 * inv;
3520     MACC(m, i, n, 0, t0, t1, t2);
3521 
3522     assert(t0 == 0, "broken Montgomery square");
3523 
3524     t0 = t1; t1 = t2; t2 = 0;
3525   }
3526 
3527   for (i = len; i < 2*len; i++) {
3528     int start = i-len+1;
3529     int end = start + (len - start)/2;
3530     int j;
3531     for (j = start; j < end; j++) {
3532       MACC2(a, j, a, i-j, t0, t1, t2);
3533       MACC(m, j, n, i-j, t0, t1, t2);
3534     }
3535     if ((i & 1) == 0) {
3536       MACC(a, j, a, j, t0, t1, t2);
3537     }
3538     for (; j < len; j++) {
3539       MACC(m, j, n, i-j, t0, t1, t2);
3540     }
3541     m[i-len] = t0;
3542     t0 = t1; t1 = t2; t2 = 0;
3543   }
3544 
3545   while (t0) {
3546     t0 = sub(m, n, t0, len);
3547   }
3548 }
3549 
3550 // The threshold at which squaring is advantageous was determined
3551 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3552 // Value seems to be ok for other platforms, too.
3553 #define MONTGOMERY_SQUARING_THRESHOLD 64
3554 
3555 // Copy len longwords from s to d, word-swapping as we go. The
3556 // destination array is reversed.
3557 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3558   d += len;
3559   while(len-- > 0) {
3560     d--;
3561     unsigned long s_val = *s;
3562     // Swap words in a longword on little endian machines.
3563 #ifdef VM_LITTLE_ENDIAN
3564      Unimplemented();
3565 #endif
3566     *d = s_val;
3567     s++;
3568   }
3569 }
3570 
3571 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3572                                         jint len, jlong inv,
3573                                         jint *m_ints) {
3574   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3575   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3576   int longwords = len/2;
3577 
3578   // Make very sure we don't use so much space that the stack might
3579   // overflow. 512 jints corresponds to an 16384-bit integer and
3580   // will use here a total of 8k bytes of stack space.
3581   int total_allocation = longwords * sizeof (unsigned long) * 4;
3582   guarantee(total_allocation <= 8192, "must be");
3583   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3584 
3585   // Local scratch arrays
3586   unsigned long
3587     *a = scratch + 0 * longwords,
3588     *b = scratch + 1 * longwords,
3589     *n = scratch + 2 * longwords,
3590     *m = scratch + 3 * longwords;
3591 
3592   reverse_words((unsigned long *)a_ints, a, longwords);
3593   reverse_words((unsigned long *)b_ints, b, longwords);
3594   reverse_words((unsigned long *)n_ints, n, longwords);
3595 
3596   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3597 
3598   reverse_words(m, (unsigned long *)m_ints, longwords);
3599 }
3600 
3601 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3602                                       jint len, jlong inv,
3603                                       jint *m_ints) {
3604   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3605   assert(len % 2 == 0, "array length in montgomery_square must be even");
3606   int longwords = len/2;
3607 
3608   // Make very sure we don't use so much space that the stack might
3609   // overflow. 512 jints corresponds to an 16384-bit integer and
3610   // will use here a total of 6k bytes of stack space.
3611   int total_allocation = longwords * sizeof (unsigned long) * 3;
3612   guarantee(total_allocation <= 8192, "must be");
3613   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3614 
3615   // Local scratch arrays
3616   unsigned long
3617     *a = scratch + 0 * longwords,
3618     *n = scratch + 1 * longwords,
3619     *m = scratch + 2 * longwords;
3620 
3621   reverse_words((unsigned long *)a_ints, a, longwords);
3622   reverse_words((unsigned long *)n_ints, n, longwords);
3623 
3624   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3625     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3626   } else {
3627     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3628   }
3629 
3630   reverse_words(m, (unsigned long *)m_ints, longwords);
3631 }
3632 
3633 extern "C"
3634 int SpinPause() {
3635   return 0;
3636 }