1 /*
   2  * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "interpreter/interp_masm.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "registerSaver_s390.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/vframeArray.hpp"
  38 #include "utilities/align.hpp"
  39 #include "vmreg_s390.inline.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_Runtime1.hpp"
  42 #endif
  43 #ifdef COMPILER2
  44 #include "opto/ad.hpp"
  45 #include "opto/runtime.hpp"
  46 #endif
  47 
  48 #ifdef PRODUCT
  49 #define __ masm->
  50 #else
  51 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
  52 #endif
  53 
  54 #define BLOCK_COMMENT(str) __ block_comment(str)
  55 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  56 
  57 #define RegisterSaver_LiveIntReg(regname) \
  58   { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
  59 
  60 #define RegisterSaver_LiveFloatReg(regname) \
  61   { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
  62 
  63 // Registers which are not saved/restored, but still they have got a frame slot.
  64 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
  65 #define RegisterSaver_ExcludedIntReg(regname) \
  66   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  67 
  68 // Registers which are not saved/restored, but still they have got a frame slot.
  69 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
  70 #define RegisterSaver_ExcludedFloatReg(regname) \
  71   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  72 
  73 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
  74   // Live registers which get spilled to the stack. Register positions
  75   // in this array correspond directly to the stack layout.
  76   //
  77   // live float registers:
  78   //
  79   RegisterSaver_LiveFloatReg(Z_F0 ),
  80   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  81   RegisterSaver_LiveFloatReg(Z_F2 ),
  82   RegisterSaver_LiveFloatReg(Z_F3 ),
  83   RegisterSaver_LiveFloatReg(Z_F4 ),
  84   RegisterSaver_LiveFloatReg(Z_F5 ),
  85   RegisterSaver_LiveFloatReg(Z_F6 ),
  86   RegisterSaver_LiveFloatReg(Z_F7 ),
  87   RegisterSaver_LiveFloatReg(Z_F8 ),
  88   RegisterSaver_LiveFloatReg(Z_F9 ),
  89   RegisterSaver_LiveFloatReg(Z_F10),
  90   RegisterSaver_LiveFloatReg(Z_F11),
  91   RegisterSaver_LiveFloatReg(Z_F12),
  92   RegisterSaver_LiveFloatReg(Z_F13),
  93   RegisterSaver_LiveFloatReg(Z_F14),
  94   RegisterSaver_LiveFloatReg(Z_F15),
  95   //
  96   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
  97   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
  98   RegisterSaver_LiveIntReg(Z_R2 ),
  99   RegisterSaver_LiveIntReg(Z_R3 ),
 100   RegisterSaver_LiveIntReg(Z_R4 ),
 101   RegisterSaver_LiveIntReg(Z_R5 ),
 102   RegisterSaver_LiveIntReg(Z_R6 ),
 103   RegisterSaver_LiveIntReg(Z_R7 ),
 104   RegisterSaver_LiveIntReg(Z_R8 ),
 105   RegisterSaver_LiveIntReg(Z_R9 ),
 106   RegisterSaver_LiveIntReg(Z_R10),
 107   RegisterSaver_LiveIntReg(Z_R11),
 108   RegisterSaver_LiveIntReg(Z_R12),
 109   RegisterSaver_LiveIntReg(Z_R13),
 110   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 111   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 112 };
 113 
 114 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
 115   // Live registers which get spilled to the stack. Register positions
 116   // in this array correspond directly to the stack layout.
 117   //
 118   // live float registers: All excluded, but still they get a stack slot to get same frame size.
 119   //
 120   RegisterSaver_ExcludedFloatReg(Z_F0 ),
 121   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 122   RegisterSaver_ExcludedFloatReg(Z_F2 ),
 123   RegisterSaver_ExcludedFloatReg(Z_F3 ),
 124   RegisterSaver_ExcludedFloatReg(Z_F4 ),
 125   RegisterSaver_ExcludedFloatReg(Z_F5 ),
 126   RegisterSaver_ExcludedFloatReg(Z_F6 ),
 127   RegisterSaver_ExcludedFloatReg(Z_F7 ),
 128   RegisterSaver_ExcludedFloatReg(Z_F8 ),
 129   RegisterSaver_ExcludedFloatReg(Z_F9 ),
 130   RegisterSaver_ExcludedFloatReg(Z_F10),
 131   RegisterSaver_ExcludedFloatReg(Z_F11),
 132   RegisterSaver_ExcludedFloatReg(Z_F12),
 133   RegisterSaver_ExcludedFloatReg(Z_F13),
 134   RegisterSaver_ExcludedFloatReg(Z_F14),
 135   RegisterSaver_ExcludedFloatReg(Z_F15),
 136   //
 137   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 138   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 139   RegisterSaver_LiveIntReg(Z_R2 ),
 140   RegisterSaver_LiveIntReg(Z_R3 ),
 141   RegisterSaver_LiveIntReg(Z_R4 ),
 142   RegisterSaver_LiveIntReg(Z_R5 ),
 143   RegisterSaver_LiveIntReg(Z_R6 ),
 144   RegisterSaver_LiveIntReg(Z_R7 ),
 145   RegisterSaver_LiveIntReg(Z_R8 ),
 146   RegisterSaver_LiveIntReg(Z_R9 ),
 147   RegisterSaver_LiveIntReg(Z_R10),
 148   RegisterSaver_LiveIntReg(Z_R11),
 149   RegisterSaver_LiveIntReg(Z_R12),
 150   RegisterSaver_LiveIntReg(Z_R13),
 151   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 152   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 153 };
 154 
 155 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
 156   // Live registers which get spilled to the stack. Register positions
 157   // in this array correspond directly to the stack layout.
 158   //
 159   // live float registers:
 160   //
 161   RegisterSaver_LiveFloatReg(Z_F0 ),
 162   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 163   RegisterSaver_LiveFloatReg(Z_F2 ),
 164   RegisterSaver_LiveFloatReg(Z_F3 ),
 165   RegisterSaver_LiveFloatReg(Z_F4 ),
 166   RegisterSaver_LiveFloatReg(Z_F5 ),
 167   RegisterSaver_LiveFloatReg(Z_F6 ),
 168   RegisterSaver_LiveFloatReg(Z_F7 ),
 169   RegisterSaver_LiveFloatReg(Z_F8 ),
 170   RegisterSaver_LiveFloatReg(Z_F9 ),
 171   RegisterSaver_LiveFloatReg(Z_F10),
 172   RegisterSaver_LiveFloatReg(Z_F11),
 173   RegisterSaver_LiveFloatReg(Z_F12),
 174   RegisterSaver_LiveFloatReg(Z_F13),
 175   RegisterSaver_LiveFloatReg(Z_F14),
 176   RegisterSaver_LiveFloatReg(Z_F15),
 177   //
 178   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 179   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 180   RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
 181   RegisterSaver_LiveIntReg(Z_R3 ),
 182   RegisterSaver_LiveIntReg(Z_R4 ),
 183   RegisterSaver_LiveIntReg(Z_R5 ),
 184   RegisterSaver_LiveIntReg(Z_R6 ),
 185   RegisterSaver_LiveIntReg(Z_R7 ),
 186   RegisterSaver_LiveIntReg(Z_R8 ),
 187   RegisterSaver_LiveIntReg(Z_R9 ),
 188   RegisterSaver_LiveIntReg(Z_R10),
 189   RegisterSaver_LiveIntReg(Z_R11),
 190   RegisterSaver_LiveIntReg(Z_R12),
 191   RegisterSaver_LiveIntReg(Z_R13),
 192   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 193   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 194 };
 195 
 196 // Live argument registers which get spilled to the stack.
 197 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
 198   RegisterSaver_LiveFloatReg(Z_FARG1),
 199   RegisterSaver_LiveFloatReg(Z_FARG2),
 200   RegisterSaver_LiveFloatReg(Z_FARG3),
 201   RegisterSaver_LiveFloatReg(Z_FARG4),
 202   RegisterSaver_LiveIntReg(Z_ARG1),
 203   RegisterSaver_LiveIntReg(Z_ARG2),
 204   RegisterSaver_LiveIntReg(Z_ARG3),
 205   RegisterSaver_LiveIntReg(Z_ARG4),
 206   RegisterSaver_LiveIntReg(Z_ARG5)
 207 };
 208 
 209 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
 210   // Live registers which get spilled to the stack. Register positions
 211   // in this array correspond directly to the stack layout.
 212   //
 213   // live float registers:
 214   //
 215   RegisterSaver_LiveFloatReg(Z_F0 ),
 216   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 217   RegisterSaver_LiveFloatReg(Z_F2 ),
 218   RegisterSaver_LiveFloatReg(Z_F3 ),
 219   RegisterSaver_LiveFloatReg(Z_F4 ),
 220   RegisterSaver_LiveFloatReg(Z_F5 ),
 221   RegisterSaver_LiveFloatReg(Z_F6 ),
 222   RegisterSaver_LiveFloatReg(Z_F7 ),
 223   // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
 224   // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
 225   // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
 226   // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
 227   // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
 228   // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
 229   // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
 230   // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
 231   //
 232   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 233   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 234   RegisterSaver_LiveIntReg(Z_R2 ),
 235   RegisterSaver_LiveIntReg(Z_R3 ),
 236   RegisterSaver_LiveIntReg(Z_R4 ),
 237   RegisterSaver_LiveIntReg(Z_R5 ),
 238   // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
 239   // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
 240   // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
 241   // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
 242   // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
 243   // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
 244   // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
 245   // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
 246   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 247   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 248 };
 249 
 250 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
 251   int reg_space = -1;
 252   switch (reg_set) {
 253     case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
 254     case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
 255     case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
 256     case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
 257     case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
 258     default: ShouldNotReachHere();
 259   }
 260   return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
 261 }
 262 
 263 
 264 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
 265   return live_reg_save_size(reg_set) + frame::z_abi_160_size;
 266 }
 267 
 268 
 269 // return_pc: Specify the register that should be stored as the return pc in the current frame.
 270 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
 271   // Record volatile registers as callee-save values in an OopMap so
 272   // their save locations will be propagated to the caller frame's
 273   // RegisterMap during StackFrameStream construction (needed for
 274   // deoptimization; see compiledVFrame::create_stack_value).
 275 
 276   // Calculate frame size.
 277   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 278   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 279   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 280 
 281   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 282   OopMap* map = new OopMap(frame_size_in_slots, 0);
 283 
 284   int regstosave_num = 0;
 285   const RegisterSaver::LiveRegType* live_regs = NULL;
 286 
 287   switch (reg_set) {
 288     case all_registers:
 289       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 290       live_regs      = RegisterSaver_LiveRegs;
 291       break;
 292     case all_registers_except_r2:
 293       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 294       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 295       break;
 296     case all_integer_registers:
 297       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 298       live_regs      = RegisterSaver_LiveIntRegs;
 299       break;
 300     case all_volatile_registers:
 301       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 302       live_regs      = RegisterSaver_LiveVolatileRegs;
 303       break;
 304     case arg_registers:
 305       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 306       live_regs      = RegisterSaver_LiveArgRegs;
 307       break;
 308     default: ShouldNotReachHere();
 309   }
 310 
 311   // Save return pc in old frame.
 312   __ save_return_pc(return_pc);
 313 
 314   // Push a new frame (includes stack linkage).
 315   // use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
 316   // illegally used to pass parameters (SAPJVM extension) by RangeCheckStub::emit_code().
 317   __ push_frame(frame_size_in_bytes, return_pc);
 318   // We have to restore return_pc right away.
 319   // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
 320   // Nobody else knows which register we saved.
 321   __ z_lg(return_pc, _z_abi16(return_pc) + frame_size_in_bytes, Z_SP);
 322 
 323   // Register save area in new frame starts above z_abi_160 area.
 324   int offset = register_save_offset;
 325 
 326   Register first = noreg;
 327   Register last  = noreg;
 328   int      first_offset = -1;
 329   bool     float_spilled = false;
 330 
 331   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 332     int reg_num  = live_regs[i].reg_num;
 333     int reg_type = live_regs[i].reg_type;
 334 
 335     switch (reg_type) {
 336       case RegisterSaver::int_reg: {
 337         Register reg = as_Register(reg_num);
 338         if (last != reg->predecessor()) {
 339           if (first != noreg) {
 340             __ z_stmg(first, last, first_offset, Z_SP);
 341           }
 342           first = reg;
 343           first_offset = offset;
 344           DEBUG_ONLY(float_spilled = false);
 345         }
 346         last = reg;
 347         assert(last != Z_R0, "r0 would require special treatment");
 348         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 349         break;
 350       }
 351 
 352       case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
 353         continue; // Continue with next loop iteration.
 354 
 355       case RegisterSaver::float_reg: {
 356         FloatRegister freg = as_FloatRegister(reg_num);
 357         __ z_std(freg, offset, Z_SP);
 358         DEBUG_ONLY(float_spilled = true);
 359         break;
 360       }
 361 
 362       default:
 363         ShouldNotReachHere();
 364         break;
 365     }
 366 
 367     // Second set_callee_saved is really a waste but we'll keep things as they were for now
 368     map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
 369     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
 370   }
 371   assert(first != noreg, "Should spill at least one int reg.");
 372   __ z_stmg(first, last, first_offset, Z_SP);
 373 
 374   // And we're done.
 375   return map;
 376 }
 377 
 378 
 379 // Generate the OopMap (again, regs where saved before).
 380 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
 381   // Calculate frame size.
 382   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 383   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 384   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 385 
 386   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 387   OopMap* map = new OopMap(frame_size_in_slots, 0);
 388 
 389   int regstosave_num = 0;
 390   const RegisterSaver::LiveRegType* live_regs = NULL;
 391 
 392   switch (reg_set) {
 393     case all_registers:
 394       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 395       live_regs      = RegisterSaver_LiveRegs;
 396       break;
 397     case all_registers_except_r2:
 398       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 399       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 400       break;
 401     case all_integer_registers:
 402       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 403       live_regs      = RegisterSaver_LiveIntRegs;
 404       break;
 405     case all_volatile_registers:
 406       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 407       live_regs      = RegisterSaver_LiveVolatileRegs;
 408       break;
 409     case arg_registers:
 410       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 411       live_regs      = RegisterSaver_LiveArgRegs;
 412       break;
 413     default: ShouldNotReachHere();
 414   }
 415 
 416   // Register save area in new frame starts above z_abi_160 area.
 417   int offset = register_save_offset;
 418   for (int i = 0; i < regstosave_num; i++) {
 419     if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
 420       map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
 421       map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
 422     }
 423     offset += reg_size;
 424   }
 425   return map;
 426 }
 427 
 428 
 429 // Pop the current frame and restore all the registers that we saved.
 430 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
 431   int offset;
 432   const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
 433 
 434   Register first = noreg;
 435   Register last = noreg;
 436   int      first_offset = -1;
 437   bool     float_spilled = false;
 438 
 439   int regstosave_num = 0;
 440   const RegisterSaver::LiveRegType* live_regs = NULL;
 441 
 442   switch (reg_set) {
 443     case all_registers:
 444       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
 445       live_regs      = RegisterSaver_LiveRegs;
 446       break;
 447     case all_registers_except_r2:
 448       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 449       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 450       break;
 451     case all_integer_registers:
 452       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 453       live_regs      = RegisterSaver_LiveIntRegs;
 454       break;
 455     case all_volatile_registers:
 456       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
 457       live_regs      = RegisterSaver_LiveVolatileRegs;
 458       break;
 459     case arg_registers:
 460       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 461       live_regs      = RegisterSaver_LiveArgRegs;
 462       break;
 463     default: ShouldNotReachHere();
 464   }
 465 
 466   // Restore all registers (ints and floats).
 467 
 468   // Register save area in new frame starts above z_abi_160 area.
 469   offset = register_save_offset;
 470 
 471   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 472     int reg_num  = live_regs[i].reg_num;
 473     int reg_type = live_regs[i].reg_type;
 474 
 475     switch (reg_type) {
 476       case RegisterSaver::excluded_reg:
 477         continue; // Continue with next loop iteration.
 478 
 479       case RegisterSaver::int_reg: {
 480         Register reg = as_Register(reg_num);
 481         if (last != reg->predecessor()) {
 482           if (first != noreg) {
 483             __ z_lmg(first, last, first_offset, Z_SP);
 484           }
 485           first = reg;
 486           first_offset = offset;
 487           DEBUG_ONLY(float_spilled = false);
 488         }
 489         last = reg;
 490         assert(last != Z_R0, "r0 would require special treatment");
 491         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 492         break;
 493       }
 494 
 495       case RegisterSaver::float_reg: {
 496         FloatRegister freg = as_FloatRegister(reg_num);
 497         __ z_ld(freg, offset, Z_SP);
 498         DEBUG_ONLY(float_spilled = true);
 499         break;
 500       }
 501 
 502       default:
 503         ShouldNotReachHere();
 504     }
 505   }
 506   assert(first != noreg, "Should spill at least one int reg.");
 507   __ z_lmg(first, last, first_offset, Z_SP);
 508 
 509   // Pop the frame.
 510   __ pop_frame();
 511 
 512   // Restore the flags.
 513   __ restore_return_pc();
 514 }
 515 
 516 
 517 // Pop the current frame and restore the registers that might be holding a result.
 518 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 519   int i;
 520   int offset;
 521   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 522                                    sizeof(RegisterSaver::LiveRegType);
 523   const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
 524 
 525   // Restore all result registers (ints and floats).
 526   offset = register_save_offset;
 527   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 528     int reg_num = RegisterSaver_LiveRegs[i].reg_num;
 529     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 530     switch (reg_type) {
 531       case RegisterSaver::excluded_reg:
 532         continue; // Continue with next loop iteration.
 533       case RegisterSaver::int_reg: {
 534         if (as_Register(reg_num) == Z_RET) { // int result_reg
 535           __ z_lg(as_Register(reg_num), offset, Z_SP);
 536         }
 537         break;
 538       }
 539       case RegisterSaver::float_reg: {
 540         if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
 541           __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
 542         }
 543         break;
 544       }
 545       default:
 546         ShouldNotReachHere();
 547     }
 548   }
 549 }
 550 
 551 size_t SharedRuntime::trampoline_size() {
 552   return MacroAssembler::load_const_size() + 2;
 553 }
 554 
 555 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 556   // Think about using pc-relative branch.
 557   __ load_const(Z_R1_scratch, destination);
 558   __ z_br(Z_R1_scratch);
 559 }
 560 
 561 // ---------------------------------------------------------------------------
 562 void SharedRuntime::save_native_result(MacroAssembler * masm,
 563                                        BasicType ret_type,
 564                                        int frame_slots) {
 565   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 566 
 567   switch (ret_type) {
 568     case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
 569     case T_BYTE:
 570     case T_CHAR:
 571     case T_SHORT:
 572     case T_INT:
 573       __ reg2mem_opt(Z_RET, memaddr, false);
 574       break;
 575     case T_OBJECT:   // Save pointer types as long.
 576     case T_ARRAY:
 577     case T_ADDRESS:
 578     case T_VOID:
 579     case T_LONG:
 580       __ reg2mem_opt(Z_RET, memaddr);
 581       break;
 582     case T_FLOAT:
 583       __ freg2mem_opt(Z_FRET, memaddr, false);
 584       break;
 585     case T_DOUBLE:
 586       __ freg2mem_opt(Z_FRET, memaddr);
 587       break;
 588   }
 589 }
 590 
 591 void SharedRuntime::restore_native_result(MacroAssembler *masm,
 592                                           BasicType       ret_type,
 593                                           int             frame_slots) {
 594   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 595 
 596   switch (ret_type) {
 597     case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
 598     case T_BYTE:
 599     case T_CHAR:
 600     case T_SHORT:
 601     case T_INT:
 602       __ mem2reg_opt(Z_RET, memaddr, false);
 603       break;
 604     case T_OBJECT:   // Restore pointer types as long.
 605     case T_ARRAY:
 606     case T_ADDRESS:
 607     case T_VOID:
 608     case T_LONG:
 609       __ mem2reg_opt(Z_RET, memaddr);
 610       break;
 611     case T_FLOAT:
 612       __ mem2freg_opt(Z_FRET, memaddr, false);
 613       break;
 614     case T_DOUBLE:
 615       __ mem2freg_opt(Z_FRET, memaddr);
 616       break;
 617   }
 618 }
 619 
 620 // ---------------------------------------------------------------------------
 621 // Read the array of BasicTypes from a signature, and compute where the
 622 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
 623 // quantities. Values less than VMRegImpl::stack0 are registers, those above
 624 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
 625 // as framesizes are fixed.
 626 // VMRegImpl::stack0 refers to the first slot 0(sp).
 627 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
 628 // up to RegisterImpl::number_of_registers are the 64-bit integer registers.
 629 
 630 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 631 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
 632 // units regardless of build.
 633 
 634 // The Java calling convention is a "shifted" version of the C ABI.
 635 // By skipping the first C ABI register we can call non-static jni methods
 636 // with small numbers of arguments without having to shuffle the arguments
 637 // at all. Since we control the java ABI we ought to at least get some
 638 // advantage out of it.
 639 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 640                                            VMRegPair *regs,
 641                                            int total_args_passed,
 642                                            int is_outgoing) {
 643   // c2c calling conventions for compiled-compiled calls.
 644 
 645   // An int/float occupies 1 slot here.
 646   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
 647   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 648 
 649   const VMReg z_iarg_reg[5] = {
 650     Z_R2->as_VMReg(),
 651     Z_R3->as_VMReg(),
 652     Z_R4->as_VMReg(),
 653     Z_R5->as_VMReg(),
 654     Z_R6->as_VMReg()
 655   };
 656   const VMReg z_farg_reg[4] = {
 657     Z_F0->as_VMReg(),
 658     Z_F2->as_VMReg(),
 659     Z_F4->as_VMReg(),
 660     Z_F6->as_VMReg()
 661   };
 662   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 663   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 664 
 665   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 666   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 667 
 668   int i;
 669   int stk = 0;
 670   int ireg = 0;
 671   int freg = 0;
 672 
 673   for (int i = 0; i < total_args_passed; ++i) {
 674     switch (sig_bt[i]) {
 675       case T_BOOLEAN:
 676       case T_CHAR:
 677       case T_BYTE:
 678       case T_SHORT:
 679       case T_INT:
 680         if (ireg < z_num_iarg_registers) {
 681           // Put int/ptr in register.
 682           regs[i].set1(z_iarg_reg[ireg]);
 683           ++ireg;
 684         } else {
 685           // Put int/ptr on stack.
 686           regs[i].set1(VMRegImpl::stack2reg(stk));
 687           stk += inc_stk_for_intfloat;
 688         }
 689         break;
 690       case T_LONG:
 691         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 692         if (ireg < z_num_iarg_registers) {
 693           // Put long in register.
 694           regs[i].set2(z_iarg_reg[ireg]);
 695           ++ireg;
 696         } else {
 697           // Put long on stack and align to 2 slots.
 698           if (stk & 0x1) { ++stk; }
 699           regs[i].set2(VMRegImpl::stack2reg(stk));
 700           stk += inc_stk_for_longdouble;
 701         }
 702         break;
 703       case T_OBJECT:
 704       case T_ARRAY:
 705       case T_ADDRESS:
 706         if (ireg < z_num_iarg_registers) {
 707           // Put ptr in register.
 708           regs[i].set2(z_iarg_reg[ireg]);
 709           ++ireg;
 710         } else {
 711           // Put ptr on stack and align to 2 slots, because
 712           // "64-bit pointers record oop-ishness on 2 aligned adjacent
 713           // registers." (see OopFlow::build_oop_map).
 714           if (stk & 0x1) { ++stk; }
 715           regs[i].set2(VMRegImpl::stack2reg(stk));
 716           stk += inc_stk_for_longdouble;
 717         }
 718         break;
 719       case T_FLOAT:
 720         if (freg < z_num_farg_registers) {
 721           // Put float in register.
 722           regs[i].set1(z_farg_reg[freg]);
 723           ++freg;
 724         } else {
 725           // Put float on stack.
 726           regs[i].set1(VMRegImpl::stack2reg(stk));
 727           stk += inc_stk_for_intfloat;
 728         }
 729         break;
 730       case T_DOUBLE:
 731         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 732         if (freg < z_num_farg_registers) {
 733           // Put double in register.
 734           regs[i].set2(z_farg_reg[freg]);
 735           ++freg;
 736         } else {
 737           // Put double on stack and align to 2 slots.
 738           if (stk & 0x1) { ++stk; }
 739           regs[i].set2(VMRegImpl::stack2reg(stk));
 740           stk += inc_stk_for_longdouble;
 741         }
 742         break;
 743       case T_VOID:
 744         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 745         // Do not count halves.
 746         regs[i].set_bad();
 747         break;
 748       default:
 749         ShouldNotReachHere();
 750     }
 751   }
 752   return align_up(stk, 2);
 753 }
 754 
 755 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 756                                         VMRegPair *regs,
 757                                         VMRegPair *regs2,
 758                                         int total_args_passed) {
 759   assert(regs2 == NULL, "second VMRegPair array not used on this platform");
 760 
 761   // Calling conventions for C runtime calls and calls to JNI native methods.
 762   const VMReg z_iarg_reg[5] = {
 763     Z_R2->as_VMReg(),
 764     Z_R3->as_VMReg(),
 765     Z_R4->as_VMReg(),
 766     Z_R5->as_VMReg(),
 767     Z_R6->as_VMReg()
 768   };
 769   const VMReg z_farg_reg[4] = {
 770     Z_F0->as_VMReg(),
 771     Z_F2->as_VMReg(),
 772     Z_F4->as_VMReg(),
 773     Z_F6->as_VMReg()
 774   };
 775   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 776   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 777 
 778   // Check calling conventions consistency.
 779   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 780   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 781 
 782   // Avoid passing C arguments in the wrong stack slots.
 783 
 784   // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
 785   // 2 such slots, like 64 bit values do.
 786   const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
 787   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 788 
 789   int i;
 790   // Leave room for C-compatible ABI
 791   int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
 792   int freg = 0;
 793   int ireg = 0;
 794 
 795   // We put the first 5 arguments into registers and the rest on the
 796   // stack. Float arguments are already in their argument registers
 797   // due to c2c calling conventions (see calling_convention).
 798   for (int i = 0; i < total_args_passed; ++i) {
 799     switch (sig_bt[i]) {
 800       case T_BOOLEAN:
 801       case T_CHAR:
 802       case T_BYTE:
 803       case T_SHORT:
 804       case T_INT:
 805         // Fall through, handle as long.
 806       case T_LONG:
 807       case T_OBJECT:
 808       case T_ARRAY:
 809       case T_ADDRESS:
 810       case T_METADATA:
 811         // Oops are already boxed if required (JNI).
 812         if (ireg < z_num_iarg_registers) {
 813           regs[i].set2(z_iarg_reg[ireg]);
 814           ++ireg;
 815         } else {
 816           regs[i].set2(VMRegImpl::stack2reg(stk));
 817           stk += inc_stk_for_longdouble;
 818         }
 819         break;
 820       case T_FLOAT:
 821         if (freg < z_num_farg_registers) {
 822           regs[i].set1(z_farg_reg[freg]);
 823           ++freg;
 824         } else {
 825           regs[i].set1(VMRegImpl::stack2reg(stk+1));
 826           stk +=  inc_stk_for_intfloat;
 827         }
 828         break;
 829       case T_DOUBLE:
 830         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 831         if (freg < z_num_farg_registers) {
 832           regs[i].set2(z_farg_reg[freg]);
 833           ++freg;
 834         } else {
 835           // Put double on stack.
 836           regs[i].set2(VMRegImpl::stack2reg(stk));
 837           stk += inc_stk_for_longdouble;
 838         }
 839         break;
 840       case T_VOID:
 841         // Do not count halves.
 842         regs[i].set_bad();
 843         break;
 844       default:
 845         ShouldNotReachHere();
 846     }
 847   }
 848   return align_up(stk, 2);
 849 }
 850 
 851 ////////////////////////////////////////////////////////////////////////
 852 //
 853 //  Argument shufflers
 854 //
 855 ////////////////////////////////////////////////////////////////////////
 856 
 857 //----------------------------------------------------------------------
 858 // The java_calling_convention describes stack locations as ideal slots on
 859 // a frame with no abi restrictions. Since we must observe abi restrictions
 860 // (like the placement of the register window) the slots must be biased by
 861 // the following value.
 862 //----------------------------------------------------------------------
 863 static int reg2slot(VMReg r) {
 864   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 865 }
 866 
 867 static int reg2offset(VMReg r) {
 868   return reg2slot(r) * VMRegImpl::stack_slot_size;
 869 }
 870 
 871 static void verify_oop_args(MacroAssembler *masm,
 872                             int total_args_passed,
 873                             const BasicType *sig_bt,
 874                             const VMRegPair *regs) {
 875   if (!VerifyOops) { return; }
 876 
 877   for (int i = 0; i < total_args_passed; i++) {
 878     if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
 879       VMReg r = regs[i].first();
 880       assert(r->is_valid(), "bad oop arg");
 881 
 882       if (r->is_stack()) {
 883         __ z_lg(Z_R0_scratch,
 884                 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
 885         __ verify_oop(Z_R0_scratch);
 886       } else {
 887         __ verify_oop(r->as_Register());
 888       }
 889     }
 890   }
 891 }
 892 
 893 static void gen_special_dispatch(MacroAssembler *masm,
 894                                  int total_args_passed,
 895                                  vmIntrinsics::ID special_dispatch,
 896                                  const BasicType *sig_bt,
 897                                  const VMRegPair *regs) {
 898   verify_oop_args(masm, total_args_passed, sig_bt, regs);
 899 
 900   // Now write the args into the outgoing interpreter space.
 901   bool     has_receiver   = false;
 902   Register receiver_reg   = noreg;
 903   int      member_arg_pos = -1;
 904   Register member_reg     = noreg;
 905   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
 906 
 907   if (ref_kind != 0) {
 908     member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
 909     member_reg = Z_R9;                       // Known to be free at this point.
 910     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 911   } else {
 912     guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch);
 913     has_receiver = true;
 914   }
 915 
 916   if (member_reg != noreg) {
 917     // Load the member_arg into register, if necessary.
 918     assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
 919     assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
 920 
 921     VMReg r = regs[member_arg_pos].first();
 922     assert(r->is_valid(), "bad member arg");
 923 
 924     if (r->is_stack()) {
 925       __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
 926     } else {
 927       // No data motion is needed.
 928       member_reg = r->as_Register();
 929     }
 930   }
 931 
 932   if (has_receiver) {
 933     // Make sure the receiver is loaded into a register.
 934     assert(total_args_passed > 0, "oob");
 935     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 936 
 937     VMReg r = regs[0].first();
 938     assert(r->is_valid(), "bad receiver arg");
 939 
 940     if (r->is_stack()) {
 941       // Porting note: This assumes that compiled calling conventions always
 942       // pass the receiver oop in a register. If this is not true on some
 943       // platform, pick a temp and load the receiver from stack.
 944       assert(false, "receiver always in a register");
 945       receiver_reg = Z_R13;  // Known to be free at this point.
 946       __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
 947     } else {
 948       // No data motion is needed.
 949       receiver_reg = r->as_Register();
 950     }
 951   }
 952 
 953   // Figure out which address we are really jumping to:
 954   MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
 955                                                  receiver_reg, member_reg,
 956                                                  /*for_compiler_entry:*/ true);
 957 }
 958 
 959 ////////////////////////////////////////////////////////////////////////
 960 //
 961 //  Argument shufflers
 962 //
 963 ////////////////////////////////////////////////////////////////////////
 964 
 965 // Is the size of a vector size (in bytes) bigger than a size saved by default?
 966 // 8 bytes registers are saved by default on z/Architecture.
 967 bool SharedRuntime::is_wide_vector(int size) {
 968   // Note, MaxVectorSize == 8 on this platform.
 969   assert(size <= 8, "%d bytes vectors are not supported", size);
 970   return size > 8;
 971 }
 972 
 973 //----------------------------------------------------------------------
 974 // An oop arg. Must pass a handle not the oop itself
 975 //----------------------------------------------------------------------
 976 static void object_move(MacroAssembler *masm,
 977                         OopMap *map,
 978                         int oop_handle_offset,
 979                         int framesize_in_slots,
 980                         VMRegPair src,
 981                         VMRegPair dst,
 982                         bool is_receiver,
 983                         int *receiver_offset) {
 984   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
 985 
 986   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
 987 
 988   // Must pass a handle. First figure out the location we use as a handle.
 989 
 990   if (src.first()->is_stack()) {
 991     // Oop is already on the stack, put handle on stack or in register
 992     // If handle will be on the stack, use temp reg to calculate it.
 993     Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
 994     Label    skip;
 995     int      slot_in_older_frame = reg2slot(src.first());
 996 
 997     guarantee(!is_receiver, "expecting receiver in register");
 998     map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
 999 
1000     __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1001     __ load_and_test_long(Z_R0, Address(rHandle));
1002     __ z_brne(skip);
1003     // Use a NULL handle if oop is NULL.
1004     __ clear_reg(rHandle, true, false);
1005     __ bind(skip);
1006 
1007     // Copy handle to the right place (register or stack).
1008     if (dst.first()->is_stack()) {
1009       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1010     } // else
1011       // nothing to do. rHandle uses the correct register
1012   } else {
1013     // Oop is passed in an input register. We must flush it to the stack.
1014     const Register rOop = src.first()->as_Register();
1015     const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1016     int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1017     int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1018     NearLabel skip;
1019 
1020     if (is_receiver) {
1021       *receiver_offset = oop_slot_offset;
1022     }
1023     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1024 
1025     // Flush Oop to stack, calculate handle.
1026     __ z_stg(rOop, oop_slot_offset, Z_SP);
1027     __ add2reg(rHandle, oop_slot_offset, Z_SP);
1028 
1029     // If Oop == NULL, use a NULL handle.
1030     __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1031     __ clear_reg(rHandle, true, false);
1032     __ bind(skip);
1033 
1034     // Copy handle to the right place (register or stack).
1035     if (dst.first()->is_stack()) {
1036       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1037     } // else
1038       // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1039   }
1040 }
1041 
1042 //----------------------------------------------------------------------
1043 // A float arg. May have to do float reg to int reg conversion
1044 //----------------------------------------------------------------------
1045 static void float_move(MacroAssembler *masm,
1046                        VMRegPair src,
1047                        VMRegPair dst,
1048                        int framesize_in_slots,
1049                        int workspace_slot_offset) {
1050   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1051   int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1052 
1053   // We do not accept an argument in a VMRegPair to be spread over two slots,
1054   // no matter what physical location (reg or stack) the slots may have.
1055   // We just check for the unaccepted slot to be invalid.
1056   assert(!src.second()->is_valid(), "float in arg spread over two slots");
1057   assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1058 
1059   if (src.first()->is_stack()) {
1060     if (dst.first()->is_stack()) {
1061       // stack -> stack. The easiest of the bunch.
1062       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1063                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1064     } else {
1065       // stack to reg
1066       Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1067       if (dst.first()->is_Register()) {
1068         __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1069       } else {
1070         __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1071       }
1072     }
1073   } else if (src.first()->is_Register()) {
1074     if (dst.first()->is_stack()) {
1075       // gpr -> stack
1076       __ reg2mem_opt(src.first()->as_Register(),
1077                      Address(Z_SP, reg2offset(dst.first()), false ));
1078     } else {
1079       if (dst.first()->is_Register()) {
1080         // gpr -> gpr
1081         __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1082                               src.first()->as_Register(), T_INT);
1083       } else {
1084         if (VM_Version::has_FPSupportEnhancements()) {
1085           // gpr -> fpr. Exploit z10 capability of direct transfer.
1086           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1087         } else {
1088           // gpr -> fpr. Use work space on stack to transfer data.
1089           Address   stackaddr(Z_SP, workspace_offset);
1090 
1091           __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1092           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1093         }
1094       }
1095     }
1096   } else {
1097     if (dst.first()->is_stack()) {
1098       // fpr -> stack
1099       __ freg2mem_opt(src.first()->as_FloatRegister(),
1100                       Address(Z_SP, reg2offset(dst.first())), false);
1101     } else {
1102       if (dst.first()->is_Register()) {
1103         if (VM_Version::has_FPSupportEnhancements()) {
1104           // fpr -> gpr.
1105           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1106         } else {
1107           // fpr -> gpr. Use work space on stack to transfer data.
1108           Address   stackaddr(Z_SP, workspace_offset);
1109 
1110           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1111           __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1112         }
1113       } else {
1114         // fpr -> fpr
1115         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1116                                src.first()->as_FloatRegister(), T_FLOAT);
1117       }
1118     }
1119   }
1120 }
1121 
1122 //----------------------------------------------------------------------
1123 // A double arg. May have to do double reg to long reg conversion
1124 //----------------------------------------------------------------------
1125 static void double_move(MacroAssembler *masm,
1126                         VMRegPair src,
1127                         VMRegPair dst,
1128                         int framesize_in_slots,
1129                         int workspace_slot_offset) {
1130   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1131   int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1132 
1133   // Since src is always a java calling convention we know that the
1134   // src pair is always either all registers or all stack (and aligned?)
1135 
1136   if (src.first()->is_stack()) {
1137     if (dst.first()->is_stack()) {
1138       // stack -> stack. The easiest of the bunch.
1139       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1140                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1141     } else {
1142       // stack to reg
1143       Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1144 
1145       if (dst.first()->is_Register()) {
1146         __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1147       } else {
1148         __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1149       }
1150     }
1151   } else if (src.first()->is_Register()) {
1152     if (dst.first()->is_stack()) {
1153       // gpr -> stack
1154       __ reg2mem_opt(src.first()->as_Register(),
1155                      Address(Z_SP, reg2offset(dst.first())));
1156     } else {
1157       if (dst.first()->is_Register()) {
1158         // gpr -> gpr
1159         __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1160                               src.first()->as_Register(), T_LONG);
1161       } else {
1162         if (VM_Version::has_FPSupportEnhancements()) {
1163           // gpr -> fpr. Exploit z10 capability of direct transfer.
1164           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1165         } else {
1166           // gpr -> fpr. Use work space on stack to transfer data.
1167           Address stackaddr(Z_SP, workspace_offset);
1168           __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1169           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1170         }
1171       }
1172     }
1173   } else {
1174     if (dst.first()->is_stack()) {
1175       // fpr -> stack
1176       __ freg2mem_opt(src.first()->as_FloatRegister(),
1177                       Address(Z_SP, reg2offset(dst.first())));
1178     } else {
1179       if (dst.first()->is_Register()) {
1180         if (VM_Version::has_FPSupportEnhancements()) {
1181           // fpr -> gpr. Exploit z10 capability of direct transfer.
1182           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1183         } else {
1184           // fpr -> gpr. Use work space on stack to transfer data.
1185           Address stackaddr(Z_SP, workspace_offset);
1186 
1187           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1188           __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1189         }
1190       } else {
1191         // fpr -> fpr
1192         // In theory these overlap but the ordering is such that this is likely a nop.
1193         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1194                                src.first()->as_FloatRegister(), T_DOUBLE);
1195       }
1196     }
1197   }
1198 }
1199 
1200 //----------------------------------------------------------------------
1201 // A long arg.
1202 //----------------------------------------------------------------------
1203 static void long_move(MacroAssembler *masm,
1204                       VMRegPair src,
1205                       VMRegPair dst,
1206                       int framesize_in_slots) {
1207   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1208 
1209   if (src.first()->is_stack()) {
1210     if (dst.first()->is_stack()) {
1211       // stack -> stack. The easiest of the bunch.
1212       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1213                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1214     } else {
1215       // stack to reg
1216       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1217       __ mem2reg_opt(dst.first()->as_Register(),
1218                       Address(Z_SP, reg2offset(src.first()) + frame_offset));
1219     }
1220   } else {
1221     // reg to reg
1222     assert(src.first()->is_Register(), "long src value must be in GPR");
1223     if (dst.first()->is_stack()) {
1224       // reg -> stack
1225       __ reg2mem_opt(src.first()->as_Register(),
1226                      Address(Z_SP, reg2offset(dst.first())));
1227     } else {
1228       // reg -> reg
1229       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1230       __ move_reg_if_needed(dst.first()->as_Register(),
1231                             T_LONG, src.first()->as_Register(), T_LONG);
1232     }
1233   }
1234 }
1235 
1236 
1237 //----------------------------------------------------------------------
1238 // A int-like arg.
1239 //----------------------------------------------------------------------
1240 // On z/Architecture we will store integer like items to the stack as 64 bit
1241 // items, according to the z/Architecture ABI, even though Java would only store
1242 // 32 bits for a parameter.
1243 // We do sign extension for all base types. That is ok since the only
1244 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1245 // Sign extension 32->64 bit will thus not affect the value.
1246 //----------------------------------------------------------------------
1247 static void move32_64(MacroAssembler *masm,
1248                       VMRegPair src,
1249                       VMRegPair dst,
1250                       int framesize_in_slots) {
1251   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1252 
1253   if (src.first()->is_stack()) {
1254     Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1255     if (dst.first()->is_stack()) {
1256       // stack -> stack. MVC not posible due to sign extension.
1257       Address firstaddr(Z_SP, reg2offset(dst.first()));
1258       __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1259       __ reg2mem_opt(Z_R0_scratch, firstaddr);
1260     } else {
1261       // stack -> reg, sign extended
1262       __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1263     }
1264   } else {
1265     if (dst.first()->is_stack()) {
1266       // reg -> stack, sign extended
1267       Address firstaddr(Z_SP, reg2offset(dst.first()));
1268       __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1269       __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1270     } else {
1271       // reg -> reg, sign extended
1272       __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1273     }
1274   }
1275 }
1276 
1277 static void save_or_restore_arguments(MacroAssembler *masm,
1278                                       const int stack_slots,
1279                                       const int total_in_args,
1280                                       const int arg_save_area,
1281                                       OopMap *map,
1282                                       VMRegPair *in_regs,
1283                                       BasicType *in_sig_bt) {
1284 
1285   // If map is non-NULL then the code should store the values,
1286   // otherwise it should load them.
1287   int slot = arg_save_area;
1288   // Handle double words first.
1289   for (int i = 0; i < total_in_args; i++) {
1290     if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
1291       int offset = slot * VMRegImpl::stack_slot_size;
1292       slot += VMRegImpl::slots_per_word;
1293       assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
1294       const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1295       Address   stackaddr(Z_SP, offset);
1296       if (map != NULL) {
1297         __ freg2mem_opt(freg, stackaddr);
1298       } else {
1299         __ mem2freg_opt(freg, stackaddr);
1300       }
1301     } else if (in_regs[i].first()->is_Register() &&
1302                (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
1303       int offset = slot * VMRegImpl::stack_slot_size;
1304       const Register   reg = in_regs[i].first()->as_Register();
1305       if (map != NULL) {
1306         __ z_stg(reg, offset, Z_SP);
1307         if (in_sig_bt[i] == T_ARRAY) {
1308           map->set_oop(VMRegImpl::stack2reg(slot));
1309         }
1310       } else {
1311         __ z_lg(reg, offset, Z_SP);
1312       }
1313       slot += VMRegImpl::slots_per_word;
1314       assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
1315     }
1316   }
1317 
1318   // Save or restore single word registers.
1319   for (int i = 0; i < total_in_args; i++) {
1320     if (in_regs[i].first()->is_Register()) {
1321       int offset = slot * VMRegImpl::stack_slot_size;
1322       // Value lives in an input register. Save it on stack.
1323       switch (in_sig_bt[i]) {
1324         case T_BOOLEAN:
1325         case T_CHAR:
1326         case T_BYTE:
1327         case T_SHORT:
1328         case T_INT: {
1329           const Register   reg = in_regs[i].first()->as_Register();
1330           Address   stackaddr(Z_SP, offset);
1331           if (map != NULL) {
1332             __ z_st(reg, stackaddr);
1333           } else {
1334             __ z_lgf(reg, stackaddr);
1335           }
1336           slot++;
1337           assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)");
1338           break;
1339         }
1340         case T_ARRAY:
1341         case T_LONG:
1342           // handled above
1343           break;
1344         case T_OBJECT:
1345         default: ShouldNotReachHere();
1346       }
1347     } else if (in_regs[i].first()->is_FloatRegister()) {
1348       if (in_sig_bt[i] == T_FLOAT) {
1349         int offset = slot * VMRegImpl::stack_slot_size;
1350         slot++;
1351         assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
1352         const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1353         Address   stackaddr(Z_SP, offset);
1354         if (map != NULL) {
1355           __ freg2mem_opt(freg, stackaddr, false);
1356         } else {
1357           __ mem2freg_opt(freg, stackaddr, false);
1358         }
1359       }
1360     } else if (in_regs[i].first()->is_stack() &&
1361                in_sig_bt[i] == T_ARRAY && map != NULL) {
1362       int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1363       map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1364     }
1365   }
1366 }
1367 
1368 // Check GCLocker::needs_gc and enter the runtime if it's true. This
1369 // keeps a new JNI critical region from starting until a GC has been
1370 // forced. Save down any oops in registers and describe them in an OopMap.
1371 static void check_needs_gc_for_critical_native(MacroAssembler   *masm,
1372                                                 const int stack_slots,
1373                                                 const int total_in_args,
1374                                                 const int arg_save_area,
1375                                                 OopMapSet *oop_maps,
1376                                                 VMRegPair *in_regs,
1377                                                 BasicType *in_sig_bt) {
1378   __ block_comment("check GCLocker::needs_gc");
1379   Label cont;
1380 
1381   // Check GCLocker::_needs_gc flag.
1382   __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
1383   __ z_cli(0, Z_R1_scratch, 0);
1384   __ z_bre(cont);
1385 
1386   // Save down any values that are live in registers and call into the
1387   // runtime to halt for a GC.
1388   OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1389 
1390   save_or_restore_arguments(masm, stack_slots, total_in_args,
1391                             arg_save_area, map, in_regs, in_sig_bt);
1392   address the_pc = __ pc();
1393   __ set_last_Java_frame(Z_SP, noreg);
1394 
1395   __ block_comment("block_for_jni_critical");
1396   __ z_lgr(Z_ARG1, Z_thread);
1397 
1398   address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
1399   __ call_c(entry_point);
1400   oop_maps->add_gc_map(__ offset(), map);
1401 
1402   __ reset_last_Java_frame();
1403 
1404   // Reload all the register arguments.
1405   save_or_restore_arguments(masm, stack_slots, total_in_args,
1406                             arg_save_area, NULL, in_regs, in_sig_bt);
1407 
1408   __ bind(cont);
1409 
1410   if (StressCriticalJNINatives) {
1411     // Stress register saving
1412     OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1413     save_or_restore_arguments(masm, stack_slots, total_in_args,
1414                               arg_save_area, map, in_regs, in_sig_bt);
1415 
1416     // Destroy argument registers.
1417     for (int i = 0; i < total_in_args; i++) {
1418       if (in_regs[i].first()->is_Register()) {
1419         // Don't set CC.
1420         __ clear_reg(in_regs[i].first()->as_Register(), true, false);
1421       } else {
1422         if (in_regs[i].first()->is_FloatRegister()) {
1423           FloatRegister fr = in_regs[i].first()->as_FloatRegister();
1424           __ z_lcdbr(fr, fr);
1425         }
1426       }
1427     }
1428 
1429     save_or_restore_arguments(masm, stack_slots, total_in_args,
1430                               arg_save_area, NULL, in_regs, in_sig_bt);
1431   }
1432 }
1433 
1434 static void move_ptr(MacroAssembler *masm,
1435                      VMRegPair src,
1436                      VMRegPair dst,
1437                      int framesize_in_slots) {
1438   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1439 
1440   if (src.first()->is_stack()) {
1441     if (dst.first()->is_stack()) {
1442       // stack to stack
1443       __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset));
1444       __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first())));
1445     } else {
1446       // stack to reg
1447       __ mem2reg_opt(dst.first()->as_Register(),
1448                      Address(Z_SP, reg2offset(src.first()) + frame_offset));
1449     }
1450   } else {
1451     if (dst.first()->is_stack()) {
1452       // reg to stack
1453     __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first())));
1454     } else {
1455     __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register());
1456     }
1457   }
1458 }
1459 
1460 // Unpack an array argument into a pointer to the body and the length
1461 // if the array is non-null, otherwise pass 0 for both.
1462 static void unpack_array_argument(MacroAssembler *masm,
1463                                    VMRegPair reg,
1464                                    BasicType in_elem_type,
1465                                    VMRegPair body_arg,
1466                                    VMRegPair length_arg,
1467                                    int framesize_in_slots) {
1468   Register tmp_reg = Z_tmp_2;
1469   Register tmp2_reg = Z_tmp_1;
1470 
1471   assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1472          "possible collision");
1473   assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1474          "possible collision");
1475 
1476   // Pass the length, ptr pair.
1477   NearLabel set_out_args;
1478   VMRegPair tmp, tmp2;
1479 
1480   tmp.set_ptr(tmp_reg->as_VMReg());
1481   tmp2.set_ptr(tmp2_reg->as_VMReg());
1482   if (reg.first()->is_stack()) {
1483     // Load the arg up from the stack.
1484     move_ptr(masm, reg, tmp, framesize_in_slots);
1485     reg = tmp;
1486   }
1487 
1488   const Register first = reg.first()->as_Register();
1489 
1490   // Don't set CC, indicate unused result.
1491   (void) __ clear_reg(tmp2_reg, true, false);
1492   if (tmp_reg != first) {
1493     __ clear_reg(tmp_reg, true, false);  // Don't set CC.
1494   }
1495   __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args);
1496   __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes()));
1497   __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first);
1498 
1499   __ bind(set_out_args);
1500   move_ptr(masm, tmp, body_arg, framesize_in_slots);
1501   move32_64(masm, tmp2, length_arg, framesize_in_slots);
1502 }
1503 
1504 //----------------------------------------------------------------------
1505 // Wrap a JNI call.
1506 //----------------------------------------------------------------------
1507 #undef USE_RESIZE_FRAME
1508 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1509                                                 const methodHandle& method,
1510                                                 int compile_id,
1511                                                 BasicType *in_sig_bt,
1512                                                 VMRegPair *in_regs,
1513                                                 BasicType ret_type) {
1514 #ifdef COMPILER2
1515   int total_in_args = method->size_of_parameters();
1516   if (method->is_method_handle_intrinsic()) {
1517     vmIntrinsics::ID iid = method->intrinsic_id();
1518     intptr_t start = (intptr_t) __ pc();
1519     int vep_offset = ((intptr_t) __ pc()) - start;
1520 
1521     gen_special_dispatch(masm, total_in_args,
1522                          method->intrinsic_id(), in_sig_bt, in_regs);
1523 
1524     int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1525 
1526     __ flush();
1527 
1528     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1529 
1530     return nmethod::new_native_nmethod(method,
1531                                        compile_id,
1532                                        masm->code(),
1533                                        vep_offset,
1534                                        frame_complete,
1535                                        stack_slots / VMRegImpl::slots_per_word,
1536                                        in_ByteSize(-1),
1537                                        in_ByteSize(-1),
1538                                        (OopMapSet *) NULL);
1539   }
1540 
1541 
1542   ///////////////////////////////////////////////////////////////////////
1543   //
1544   //  Precalculations before generating any code
1545   //
1546   ///////////////////////////////////////////////////////////////////////
1547 
1548   bool is_critical_native = true;
1549   address native_func = method->critical_native_function();
1550   if (native_func == NULL) {
1551     native_func = method->native_function();
1552     is_critical_native = false;
1553   }
1554   assert(native_func != NULL, "must have function");
1555 
1556   //---------------------------------------------------------------------
1557   // We have received a description of where all the java args are located
1558   // on entry to the wrapper. We need to convert these args to where
1559   // the jni function will expect them. To figure out where they go
1560   // we convert the java signature to a C signature by inserting
1561   // the hidden arguments as arg[0] and possibly arg[1] (static method).
1562   //
1563   // The first hidden argument arg[0] is a pointer to the JNI environment.
1564   // It is generated for every call.
1565   // The second argument arg[1] to the JNI call, which is hidden for static
1566   // methods, is the boxed lock object. For static calls, the lock object
1567   // is the static method itself. The oop is constructed here. for instance
1568   // calls, the lock is performed on the object itself, the pointer of
1569   // which is passed as the first visible argument.
1570   //---------------------------------------------------------------------
1571 
1572   // Additionally, on z/Architecture we must convert integers
1573   // to longs in the C signature. We do this in advance in order to have
1574   // no trouble with indexes into the bt-arrays.
1575   // So convert the signature and registers now, and adjust the total number
1576   // of in-arguments accordingly.
1577   bool method_is_static = method->is_static();
1578   int  total_c_args     = total_in_args;
1579 
1580   if (!is_critical_native) {
1581     int n_hidden_args = method_is_static ? 2 : 1;
1582     total_c_args += n_hidden_args;
1583   } else {
1584     // No JNIEnv*, no this*, but unpacked arrays (base+length).
1585     for (int i = 0; i < total_in_args; i++) {
1586       if (in_sig_bt[i] == T_ARRAY) {
1587         total_c_args ++;
1588       }
1589     }
1590   }
1591 
1592   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1593   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1594   BasicType* in_elem_bt = NULL;
1595 
1596   // Create the signature for the C call:
1597   //   1) add the JNIEnv*
1598   //   2) add the class if the method is static
1599   //   3) copy the rest of the incoming signature (shifted by the number of
1600   //      hidden arguments)
1601 
1602   int argc = 0;
1603   if (!is_critical_native) {
1604     out_sig_bt[argc++] = T_ADDRESS;
1605     if (method->is_static()) {
1606       out_sig_bt[argc++] = T_OBJECT;
1607     }
1608 
1609     for (int i = 0; i < total_in_args; i++) {
1610       out_sig_bt[argc++] = in_sig_bt[i];
1611     }
1612   } else {
1613     Thread* THREAD = Thread::current();
1614     in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1615     SignatureStream ss(method->signature());
1616     int o = 0;
1617     for (int i = 0; i < total_in_args; i++, o++) {
1618       if (in_sig_bt[i] == T_ARRAY) {
1619         // Arrays are passed as tuples (int, elem*).
1620         Symbol* atype = ss.as_symbol(CHECK_NULL);
1621         const char* at = atype->as_C_string();
1622         if (strlen(at) == 2) {
1623           assert(at[0] == '[', "must be");
1624           switch (at[1]) {
1625             case 'B': in_elem_bt[o]  = T_BYTE; break;
1626             case 'C': in_elem_bt[o]  = T_CHAR; break;
1627             case 'D': in_elem_bt[o]  = T_DOUBLE; break;
1628             case 'F': in_elem_bt[o]  = T_FLOAT; break;
1629             case 'I': in_elem_bt[o]  = T_INT; break;
1630             case 'J': in_elem_bt[o]  = T_LONG; break;
1631             case 'S': in_elem_bt[o]  = T_SHORT; break;
1632             case 'Z': in_elem_bt[o]  = T_BOOLEAN; break;
1633             default: ShouldNotReachHere();
1634           }
1635         }
1636       } else {
1637         in_elem_bt[o] = T_VOID;
1638       }
1639       if (in_sig_bt[i] != T_VOID) {
1640         assert(in_sig_bt[i] == ss.type(), "must match");
1641         ss.next();
1642       }
1643     }
1644     assert(total_in_args == o, "must match");
1645 
1646     for (int i = 0; i < total_in_args; i++) {
1647       if (in_sig_bt[i] == T_ARRAY) {
1648         // Arrays are passed as tuples (int, elem*).
1649         out_sig_bt[argc++] = T_INT;
1650         out_sig_bt[argc++] = T_ADDRESS;
1651       } else {
1652         out_sig_bt[argc++] = in_sig_bt[i];
1653       }
1654     }
1655   }
1656 
1657   ///////////////////////////////////////////////////////////////////////
1658   // Now figure out where the args must be stored and how much stack space
1659   // they require (neglecting out_preserve_stack_slots but providing space
1660   // for storing the first five register arguments).
1661   // It's weird, see int_stk_helper.
1662   ///////////////////////////////////////////////////////////////////////
1663 
1664   //---------------------------------------------------------------------
1665   // Compute framesize for the wrapper.
1666   //
1667   // - We need to handlize all oops passed in registers.
1668   // - We must create space for them here that is disjoint from the save area.
1669   // - We always just allocate 5 words for storing down these object.
1670   //   This allows us to simply record the base and use the Ireg number to
1671   //   decide which slot to use.
1672   // - Note that the reg number used to index the stack slot is the inbound
1673   //   number, not the outbound number.
1674   // - We must shuffle args to match the native convention,
1675   //   and to include var-args space.
1676   //---------------------------------------------------------------------
1677 
1678   //---------------------------------------------------------------------
1679   // Calculate the total number of stack slots we will need:
1680   // - 1) abi requirements
1681   // - 2) outgoing args
1682   // - 3) space for inbound oop handle area
1683   // - 4) space for handlizing a klass if static method
1684   // - 5) space for a lock if synchronized method
1685   // - 6) workspace (save rtn value, int<->float reg moves, ...)
1686   // - 7) filler slots for alignment
1687   //---------------------------------------------------------------------
1688   // Here is how the space we have allocated will look like.
1689   // Since we use resize_frame, we do not create a new stack frame,
1690   // but just extend the one we got with our own data area.
1691   //
1692   // If an offset or pointer name points to a separator line, it is
1693   // assumed that addressing with offset 0 selects storage starting
1694   // at the first byte above the separator line.
1695   //
1696   //
1697   //     ...                   ...
1698   //      | caller's frame      |
1699   // FP-> |---------------------|
1700   //      | filler slots, if any|
1701   //     7| #slots == mult of 2 |
1702   //      |---------------------|
1703   //      | work space          |
1704   //     6| 2 slots = 8 bytes   |
1705   //      |---------------------|
1706   //     5| lock box (if sync)  |
1707   //      |---------------------| <- lock_slot_offset
1708   //     4| klass (if static)   |
1709   //      |---------------------| <- klass_slot_offset
1710   //     3| oopHandle area      |
1711   //      | (save area for      |
1712   //      |  critical natives)  |
1713   //      |                     |
1714   //      |                     |
1715   //      |---------------------| <- oop_handle_offset
1716   //     2| outbound memory     |
1717   //     ...                   ...
1718   //      | based arguments     |
1719   //      |---------------------|
1720   //      | vararg              |
1721   //     ...                   ...
1722   //      | area                |
1723   //      |---------------------| <- out_arg_slot_offset
1724   //     1| out_preserved_slots |
1725   //     ...                   ...
1726   //      | (z_abi spec)        |
1727   // SP-> |---------------------| <- FP_slot_offset (back chain)
1728   //     ...                   ...
1729   //
1730   //---------------------------------------------------------------------
1731 
1732   // *_slot_offset indicates offset from SP in #stack slots
1733   // *_offset      indicates offset from SP in #bytes
1734 
1735   int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
1736                     SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1737 
1738   // Now the space for the inbound oop handle area.
1739   int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
1740   if (is_critical_native) {
1741     // Critical natives may have to call out so they need a save area
1742     // for register arguments.
1743     int double_slots = 0;
1744     int single_slots = 0;
1745     for (int i = 0; i < total_in_args; i++) {
1746       if (in_regs[i].first()->is_Register()) {
1747         const Register reg = in_regs[i].first()->as_Register();
1748         switch (in_sig_bt[i]) {
1749           case T_BOOLEAN:
1750           case T_BYTE:
1751           case T_SHORT:
1752           case T_CHAR:
1753           case T_INT:
1754           // Fall through.
1755           case T_ARRAY:
1756           case T_LONG: double_slots++; break;
1757           default:  ShouldNotReachHere();
1758         }
1759       } else {
1760         if (in_regs[i].first()->is_FloatRegister()) {
1761           switch (in_sig_bt[i]) {
1762             case T_FLOAT:  single_slots++; break;
1763             case T_DOUBLE: double_slots++; break;
1764             default:  ShouldNotReachHere();
1765           }
1766         }
1767       }
1768     }  // for
1769     total_save_slots = double_slots * 2 + align_up(single_slots, 2); // Round to even.
1770   }
1771 
1772   int oop_handle_slot_offset = stack_slots;
1773   stack_slots += total_save_slots;                                        // 3)
1774 
1775   int klass_slot_offset = 0;
1776   int klass_offset      = -1;
1777   if (method_is_static && !is_critical_native) {                          // 4)
1778     klass_slot_offset  = stack_slots;
1779     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1780     stack_slots       += VMRegImpl::slots_per_word;
1781   }
1782 
1783   int lock_slot_offset = 0;
1784   int lock_offset      = -1;
1785   if (method->is_synchronized()) {                                        // 5)
1786     lock_slot_offset   = stack_slots;
1787     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1788     stack_slots       += VMRegImpl::slots_per_word;
1789   }
1790 
1791   int workspace_slot_offset= stack_slots;                                 // 6)
1792   stack_slots         += 2;
1793 
1794   // Now compute actual number of stack words we need.
1795   // Round to align stack properly.
1796   stack_slots = align_up(stack_slots,                                     // 7)
1797                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1798   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1799 
1800 
1801   ///////////////////////////////////////////////////////////////////////
1802   // Now we can start generating code
1803   ///////////////////////////////////////////////////////////////////////
1804 
1805   unsigned int wrapper_CodeStart  = __ offset();
1806   unsigned int wrapper_UEPStart;
1807   unsigned int wrapper_VEPStart;
1808   unsigned int wrapper_FrameDone;
1809   unsigned int wrapper_CRegsSet;
1810   Label     handle_pending_exception;
1811   Label     ic_miss;
1812 
1813   //---------------------------------------------------------------------
1814   // Unverified entry point (UEP)
1815   //---------------------------------------------------------------------
1816   wrapper_UEPStart = __ offset();
1817 
1818   // check ic: object class <-> cached class
1819   if (!method_is_static) __ nmethod_UEP(ic_miss);
1820   // Fill with nops (alignment of verified entry point).
1821   __ align(CodeEntryAlignment);
1822 
1823   //---------------------------------------------------------------------
1824   // Verified entry point (VEP)
1825   //---------------------------------------------------------------------
1826   wrapper_VEPStart = __ offset();
1827 
1828   __ save_return_pc();
1829   __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1830 #ifndef USE_RESIZE_FRAME
1831   __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1832 #else
1833   __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1834                                                           // Just resize the existing one.
1835 #endif
1836 
1837   wrapper_FrameDone = __ offset();
1838 
1839   __ verify_thread();
1840 
1841   // Native nmethod wrappers never take possession of the oop arguments.
1842   // So the caller will gc the arguments.
1843   // The only thing we need an oopMap for is if the call is static.
1844   //
1845   // An OopMap for lock (and class if static), and one for the VM call itself
1846   OopMapSet  *oop_maps        = new OopMapSet();
1847   OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1848 
1849   if (is_critical_native) {
1850     check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
1851                                        oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
1852   }
1853 
1854 
1855   //////////////////////////////////////////////////////////////////////
1856   //
1857   // The Grand Shuffle
1858   //
1859   //////////////////////////////////////////////////////////////////////
1860   //
1861   // We immediately shuffle the arguments so that for any vm call we have
1862   // to make from here on out (sync slow path, jvmti, etc.) we will have
1863   // captured the oops from our caller and have a valid oopMap for them.
1864   //
1865   //--------------------------------------------------------------------
1866   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1867   // (derived from JavaThread* which is in Z_thread) and, if static,
1868   // the class mirror instead of a receiver. This pretty much guarantees that
1869   // register layout will not match. We ignore these extra arguments during
1870   // the shuffle. The shuffle is described by the two calling convention
1871   // vectors we have in our possession. We simply walk the java vector to
1872   // get the source locations and the c vector to get the destinations.
1873   //
1874   // This is a trick. We double the stack slots so we can claim
1875   // the oops in the caller's frame. Since we are sure to have
1876   // more args than the caller doubling is enough to make
1877   // sure we can capture all the incoming oop args from the caller.
1878   //--------------------------------------------------------------------
1879 
1880   // Record sp-based slot for receiver on stack for non-static methods.
1881   int receiver_offset = -1;
1882 
1883   //--------------------------------------------------------------------
1884   // We move the arguments backwards because the floating point registers
1885   // destination will always be to a register with a greater or equal
1886   // register number or the stack.
1887   //   jix is the index of the incoming Java arguments.
1888   //   cix is the index of the outgoing C arguments.
1889   //--------------------------------------------------------------------
1890 
1891 #ifdef ASSERT
1892   bool reg_destroyed[RegisterImpl::number_of_registers];
1893   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1894   for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
1895     reg_destroyed[r] = false;
1896   }
1897   for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
1898     freg_destroyed[f] = false;
1899   }
1900 #endif // ASSERT
1901 
1902   for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1903 #ifdef ASSERT
1904     if (in_regs[jix].first()->is_Register()) {
1905       assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1906     } else {
1907       if (in_regs[jix].first()->is_FloatRegister()) {
1908         assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1909       }
1910     }
1911     if (out_regs[cix].first()->is_Register()) {
1912       reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1913     } else {
1914       if (out_regs[cix].first()->is_FloatRegister()) {
1915         freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1916       }
1917     }
1918 #endif // ASSERT
1919 
1920     switch (in_sig_bt[jix]) {
1921       // Due to casting, small integers should only occur in pairs with type T_LONG.
1922       case T_BOOLEAN:
1923       case T_CHAR:
1924       case T_BYTE:
1925       case T_SHORT:
1926       case T_INT:
1927         // Move int and do sign extension.
1928         move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1929         break;
1930 
1931       case T_LONG :
1932         long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1933         break;
1934 
1935       case T_ARRAY:
1936         if (is_critical_native) {
1937           int body_arg = cix;
1938           cix -= 1; // Point to length arg.
1939           unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots);
1940           break;
1941         }
1942         // else fallthrough
1943       case T_OBJECT:
1944         assert(!is_critical_native, "no oop arguments");
1945         object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1946                     ((jix == 0) && (!method_is_static)),
1947                     &receiver_offset);
1948         break;
1949       case T_VOID:
1950         break;
1951 
1952       case T_FLOAT:
1953         float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1954         break;
1955 
1956       case T_DOUBLE:
1957         assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1958         double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1959         break;
1960 
1961       case T_ADDRESS:
1962         assert(false, "found T_ADDRESS in java args");
1963         break;
1964 
1965       default:
1966         ShouldNotReachHere();
1967     }
1968   }
1969 
1970   //--------------------------------------------------------------------
1971   // Pre-load a static method's oop into ARG2.
1972   // Used both by locking code and the normal JNI call code.
1973   //--------------------------------------------------------------------
1974   if (method_is_static && !is_critical_native) {
1975     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1976 
1977     // Now handlize the static class mirror in ARG2. It's known not-null.
1978     __ z_stg(Z_ARG2, klass_offset, Z_SP);
1979     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1980     __ add2reg(Z_ARG2, klass_offset, Z_SP);
1981   }
1982 
1983   // Get JNIEnv* which is first argument to native.
1984   if (!is_critical_native) {
1985     __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1986   }
1987 
1988   //////////////////////////////////////////////////////////////////////
1989   // We have all of the arguments setup at this point.
1990   // We MUST NOT touch any outgoing regs from this point on.
1991   // So if we must call out we must push a new frame.
1992   //////////////////////////////////////////////////////////////////////
1993 
1994 
1995   // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1996   // Both values represent the same position.
1997   __ get_PC(Z_R10);                // PC into register
1998   wrapper_CRegsSet = __ offset();  // and into into variable.
1999 
2000   // Z_R10 now has the pc loaded that we will use when we finally call to native.
2001 
2002   // We use the same pc/oopMap repeatedly when we call out.
2003   oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
2004 
2005   // Lock a synchronized method.
2006 
2007   if (method->is_synchronized()) {
2008     assert(!is_critical_native, "unhandled");
2009 
2010     // ATTENTION: args and Z_R10 must be preserved.
2011     Register r_oop  = Z_R11;
2012     Register r_box  = Z_R12;
2013     Register r_tmp1 = Z_R13;
2014     Register r_tmp2 = Z_R7;
2015     Label done;
2016 
2017     // Load the oop for the object or class. R_carg2_classorobject contains
2018     // either the handlized oop from the incoming arguments or the handlized
2019     // class mirror (if the method is static).
2020     __ z_lg(r_oop, 0, Z_ARG2);
2021 
2022     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
2023     // Get the lock box slot's address.
2024     __ add2reg(r_box, lock_offset, Z_SP);
2025 
2026 #ifdef ASSERT
2027     if (UseBiasedLocking)
2028       // Making the box point to itself will make it clear it went unused
2029       // but also be obviously invalid.
2030       __ z_stg(r_box, 0, r_box);
2031 #endif // ASSERT
2032 
2033     // Try fastpath for locking.
2034     // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
2035     __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
2036     __ z_bre(done);
2037 
2038     //-------------------------------------------------------------------------
2039     // None of the above fast optimizations worked so we have to get into the
2040     // slow case of monitor enter. Inline a special case of call_VM that
2041     // disallows any pending_exception.
2042     //-------------------------------------------------------------------------
2043 
2044     Register oldSP = Z_R11;
2045 
2046     __ z_lgr(oldSP, Z_SP);
2047 
2048     RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2049 
2050     // Prepare arguments for call.
2051     __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
2052     __ add2reg(Z_ARG2, lock_offset, oldSP);
2053     __ z_lgr(Z_ARG3, Z_thread);
2054 
2055     __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
2056 
2057     // Do the call.
2058     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
2059     __ call(Z_R1_scratch);
2060 
2061     __ reset_last_Java_frame();
2062 
2063     RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2064 #ifdef ASSERT
2065     { Label L;
2066       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2067       __ z_bre(L);
2068       __ stop("no pending exception allowed on exit from IR::monitorenter");
2069       __ bind(L);
2070     }
2071 #endif
2072     __ bind(done);
2073   } // lock for synchronized methods
2074 
2075 
2076   //////////////////////////////////////////////////////////////////////
2077   // Finally just about ready to make the JNI call.
2078   //////////////////////////////////////////////////////////////////////
2079 
2080   // Use that pc we placed in Z_R10 a while back as the current frame anchor.
2081   __ set_last_Java_frame(Z_SP, Z_R10);
2082 
2083   // Transition from _thread_in_Java to _thread_in_native.
2084   __ set_thread_state(_thread_in_native);
2085 
2086 
2087   //////////////////////////////////////////////////////////////////////
2088   // This is the JNI call.
2089   //////////////////////////////////////////////////////////////////////
2090 
2091   __ call_c(native_func);
2092 
2093 
2094   //////////////////////////////////////////////////////////////////////
2095   // We have survived the call once we reach here.
2096   //////////////////////////////////////////////////////////////////////
2097 
2098 
2099   //--------------------------------------------------------------------
2100   // Unpack native results.
2101   //--------------------------------------------------------------------
2102   // For int-types, we do any needed sign-extension required.
2103   // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
2104   // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
2105   // blocking or unlocking.
2106   // An OOP result (handle) is done specially in the slow-path code.
2107   //--------------------------------------------------------------------
2108   switch (ret_type) {
2109     case T_VOID:    break;         // Nothing to do!
2110     case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
2111     case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
2112     case T_LONG:    break;         // Got it where we want it (unless slow-path)
2113     case T_OBJECT:  break;         // Really a handle.
2114                                    // Cannot de-handlize until after reclaiming jvm_lock.
2115     case T_ARRAY:   break;
2116 
2117     case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
2118       __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
2119       __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
2120       break;
2121     case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
2122     case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
2123     case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
2124     case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
2125 
2126     default:
2127       ShouldNotReachHere();
2128       break;
2129   }
2130 
2131 
2132   // Switch thread to "native transition" state before reading the synchronization state.
2133   // This additional state is necessary because reading and testing the synchronization
2134   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2135   //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2136   //   - VM thread changes sync state to synchronizing and suspends threads for GC.
2137   //   - Thread A is resumed to finish this native method, but doesn't block here since it
2138   //     didn't see any synchronization in progress, and escapes.
2139 
2140   // Transition from _thread_in_native to _thread_in_native_trans.
2141   __ set_thread_state(_thread_in_native_trans);
2142 
2143   // Safepoint synchronization
2144   //--------------------------------------------------------------------
2145   // Must we block?
2146   //--------------------------------------------------------------------
2147   // Block, if necessary, before resuming in _thread_in_Java state.
2148   // In order for GC to work, don't clear the last_Java_sp until after blocking.
2149   //--------------------------------------------------------------------
2150   Label after_transition;
2151   {
2152     Label no_block, sync;
2153 
2154     save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
2155 
2156     if (os::is_MP()) {
2157       if (UseMembar) {
2158         // Force this write out before the read below.
2159         __ z_fence();
2160       } else {
2161         // Write serialization page so VM thread can do a pseudo remote membar.
2162         // We use the current thread pointer to calculate a thread specific
2163         // offset to write to within the page. This minimizes bus traffic
2164         // due to cache line collision.
2165         __ serialize_memory(Z_thread, Z_R1, Z_R2);
2166       }
2167     }
2168     __ safepoint_poll(sync, Z_R1);
2169 
2170     __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
2171     __ z_bre(no_block);
2172 
2173     // Block. Save any potential method result value before the operation and
2174     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2175     // lets us share the oopMap we used when we went native rather than create
2176     // a distinct one for this pc.
2177     //
2178     __ bind(sync);
2179     __ z_acquire();
2180 
2181     address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
2182                                              : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2183 
2184     __ call_VM_leaf(entry_point, Z_thread);
2185 
2186     if (is_critical_native) {
2187       restore_native_result(masm, ret_type, workspace_slot_offset);
2188       __ z_bru(after_transition); // No thread state transition here.
2189     }
2190     __ bind(no_block);
2191     restore_native_result(masm, ret_type, workspace_slot_offset);
2192   }
2193 
2194   //--------------------------------------------------------------------
2195   // Thread state is thread_in_native_trans. Any safepoint blocking has
2196   // already happened so we can now change state to _thread_in_Java.
2197   //--------------------------------------------------------------------
2198   // Transition from _thread_in_native_trans to _thread_in_Java.
2199   __ set_thread_state(_thread_in_Java);
2200   __ bind(after_transition);
2201 
2202 
2203   //--------------------------------------------------------------------
2204   // Reguard any pages if necessary.
2205   // Protect native result from being destroyed.
2206   //--------------------------------------------------------------------
2207 
2208   Label no_reguard;
2209 
2210   __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)),
2211            JavaThread::stack_guard_yellow_reserved_disabled);
2212 
2213   __ z_bre(no_reguard);
2214 
2215   save_native_result(masm, ret_type, workspace_slot_offset);
2216   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
2217   restore_native_result(masm, ret_type, workspace_slot_offset);
2218 
2219   __ bind(no_reguard);
2220 
2221 
2222   // Synchronized methods (slow path only)
2223   // No pending exceptions for now.
2224   //--------------------------------------------------------------------
2225   // Handle possibly pending exception (will unlock if necessary).
2226   // Native result is, if any is live, in Z_FRES or Z_RES.
2227   //--------------------------------------------------------------------
2228   // Unlock
2229   //--------------------------------------------------------------------
2230   if (method->is_synchronized()) {
2231     const Register r_oop        = Z_R11;
2232     const Register r_box        = Z_R12;
2233     const Register r_tmp1       = Z_R13;
2234     const Register r_tmp2       = Z_R7;
2235     Label done;
2236 
2237     // Get unboxed oop of class mirror or object ...
2238     int   offset = method_is_static ? klass_offset : receiver_offset;
2239 
2240     assert(offset != -1, "");
2241     __ z_lg(r_oop, offset, Z_SP);
2242 
2243     // ... and address of lock object box.
2244     __ add2reg(r_box, lock_offset, Z_SP);
2245 
2246     // Try fastpath for unlocking.
2247     __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
2248     __ z_bre(done);
2249 
2250     // Slow path for unlocking.
2251     // Save and restore any potential method result value around the unlocking operation.
2252     const Register R_exc = Z_R11;
2253 
2254     save_native_result(masm, ret_type, workspace_slot_offset);
2255 
2256     // Must save pending exception around the slow-path VM call. Since it's a
2257     // leaf call, the pending exception (if any) can be kept in a register.
2258     __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2259     assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
2260 
2261     // Must clear pending-exception before re-entering the VM. Since this is
2262     // a leaf call, pending-exception-oop can be safely kept in a register.
2263     __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
2264 
2265     // Inline a special case of call_VM that disallows any pending_exception.
2266 
2267     // Get locked oop from the handle we passed to jni.
2268     __ z_lg(Z_ARG1, offset, Z_SP);
2269     __ add2reg(Z_ARG2, lock_offset, Z_SP);
2270     __ z_lgr(Z_ARG3, Z_thread);
2271 
2272     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
2273 
2274     __ call(Z_R1_scratch);
2275 
2276 #ifdef ASSERT
2277     {
2278       Label L;
2279       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2280       __ z_bre(L);
2281       __ stop("no pending exception allowed on exit from IR::monitorexit");
2282       __ bind(L);
2283     }
2284 #endif
2285 
2286     // Check_forward_pending_exception jump to forward_exception if any pending
2287     // exception is set. The forward_exception routine expects to see the
2288     // exception in pending_exception and not in a register. Kind of clumsy,
2289     // since all folks who branch to forward_exception must have tested
2290     // pending_exception first and hence have it in a register already.
2291     __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2292     restore_native_result(masm, ret_type, workspace_slot_offset);
2293     __ z_bru(done);
2294     __ z_illtrap(0x66);
2295 
2296     __ bind(done);
2297   }
2298 
2299 
2300   //--------------------------------------------------------------------
2301   // Clear "last Java frame" SP and PC.
2302   //--------------------------------------------------------------------
2303   __ verify_thread(); // Z_thread must be correct.
2304 
2305   __ reset_last_Java_frame();
2306 
2307   // Unpack oop result, e.g. JNIHandles::resolve result.
2308   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2309     __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
2310   }
2311 
2312   if (CheckJNICalls) {
2313     // clear_pending_jni_exception_check
2314     __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
2315   }
2316 
2317   // Reset handle block.
2318   if (!is_critical_native) {
2319     __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
2320     __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
2321 
2322     // Check for pending exceptions.
2323     __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2324     __ z_brne(handle_pending_exception);
2325   }
2326 
2327 
2328   //////////////////////////////////////////////////////////////////////
2329   // Return
2330   //////////////////////////////////////////////////////////////////////
2331 
2332 
2333 #ifndef USE_RESIZE_FRAME
2334   __ pop_frame();                     // Pop wrapper frame.
2335 #else
2336   __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
2337 #endif
2338   __ restore_return_pc();             // This is the way back to the caller.
2339   __ z_br(Z_R14);
2340 
2341 
2342   //////////////////////////////////////////////////////////////////////
2343   // Out-of-line calls to the runtime.
2344   //////////////////////////////////////////////////////////////////////
2345 
2346 
2347   if (!is_critical_native) {
2348 
2349     //---------------------------------------------------------------------
2350     // Handler for pending exceptions (out-of-line).
2351     //---------------------------------------------------------------------
2352     // Since this is a native call, we know the proper exception handler
2353     // is the empty function. We just pop this frame and then jump to
2354     // forward_exception_entry. Z_R14 will contain the native caller's
2355     // return PC.
2356     __ bind(handle_pending_exception);
2357     __ pop_frame();
2358     __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2359     __ restore_return_pc();
2360     __ z_br(Z_R1_scratch);
2361 
2362     //---------------------------------------------------------------------
2363     // Handler for a cache miss (out-of-line)
2364     //---------------------------------------------------------------------
2365     __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2366   }
2367   __ flush();
2368 
2369 
2370   //////////////////////////////////////////////////////////////////////
2371   // end of code generation
2372   //////////////////////////////////////////////////////////////////////
2373 
2374 
2375   nmethod *nm = nmethod::new_native_nmethod(method,
2376                                             compile_id,
2377                                             masm->code(),
2378                                             (int)(wrapper_VEPStart-wrapper_CodeStart),
2379                                             (int)(wrapper_FrameDone-wrapper_CodeStart),
2380                                             stack_slots / VMRegImpl::slots_per_word,
2381                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2382                                             in_ByteSize(lock_offset),
2383                                             oop_maps);
2384 
2385   if (is_critical_native) {
2386     nm->set_lazy_critical_native(true);
2387   }
2388 
2389   return nm;
2390 #else
2391   ShouldNotReachHere();
2392   return NULL;
2393 #endif // COMPILER2
2394 }
2395 
2396 static address gen_c2i_adapter(MacroAssembler  *masm,
2397                                int total_args_passed,
2398                                int comp_args_on_stack,
2399                                const BasicType *sig_bt,
2400                                const VMRegPair *regs,
2401                                Label &skip_fixup) {
2402   // Before we get into the guts of the C2I adapter, see if we should be here
2403   // at all. We've come from compiled code and are attempting to jump to the
2404   // interpreter, which means the caller made a static call to get here
2405   // (vcalls always get a compiled target if there is one). Check for a
2406   // compiled target. If there is one, we need to patch the caller's call.
2407 
2408   // These two defs MUST MATCH code in gen_i2c2i_adapter!
2409   const Register ientry = Z_R11;
2410   const Register code   = Z_R11;
2411 
2412   address c2i_entrypoint;
2413   Label   patch_callsite;
2414 
2415   // Regular (verified) c2i entry point.
2416   c2i_entrypoint = __ pc();
2417 
2418   // Call patching needed?
2419   __ load_and_test_long(Z_R0_scratch, method_(code));
2420   __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2421   __ z_brne(patch_callsite);                    // Patch required if code != NULL (compiled target exists).
2422 
2423   __ bind(skip_fixup);  // Return point from patch_callsite.
2424 
2425   // Since all args are passed on the stack, total_args_passed*wordSize is the
2426   // space we need. We need ABI scratch area but we use the caller's since
2427   // it has already been allocated.
2428 
2429   const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2430   int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2431   Register  sender_SP   = Z_R10;
2432   Register  value       = Z_R12;
2433 
2434   // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2435   // In addition, frame manager expects initial_caller_sp in Z_R10.
2436   __ z_lgr(sender_SP, Z_SP);
2437 
2438   // This should always fit in 14 bit immediate.
2439   __ resize_frame(-extraspace, Z_R0_scratch);
2440 
2441   // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2442   // args. This essentially moves the callers ABI scratch area from the top to the
2443   // bottom of the arg area.
2444 
2445   int st_off =  extraspace - wordSize;
2446 
2447   // Now write the args into the outgoing interpreter space.
2448   for (int i = 0; i < total_args_passed; i++) {
2449     VMReg r_1 = regs[i].first();
2450     VMReg r_2 = regs[i].second();
2451     if (!r_1->is_valid()) {
2452       assert(!r_2->is_valid(), "");
2453       continue;
2454     }
2455     if (r_1->is_stack()) {
2456       // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2457       // We must account for it here.
2458       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2459 
2460       if (!r_2->is_valid()) {
2461         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2462       } else {
2463         // longs are given 2 64-bit slots in the interpreter,
2464         // but the data is passed in only 1 slot.
2465         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2466 #ifdef ASSERT
2467           __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2468 #endif
2469           st_off -= wordSize;
2470         }
2471         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2472       }
2473     } else {
2474       if (r_1->is_Register()) {
2475         if (!r_2->is_valid()) {
2476           __ z_st(r_1->as_Register(), st_off, Z_SP);
2477         } else {
2478           // longs are given 2 64-bit slots in the interpreter, but the
2479           // data is passed in only 1 slot.
2480           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2481 #ifdef ASSERT
2482             __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2483 #endif
2484             st_off -= wordSize;
2485           }
2486           __ z_stg(r_1->as_Register(), st_off, Z_SP);
2487         }
2488       } else {
2489         assert(r_1->is_FloatRegister(), "");
2490         if (!r_2->is_valid()) {
2491           __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2492         } else {
2493           // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2494           // data is passed in only 1 slot.
2495           // One of these should get known junk...
2496 #ifdef ASSERT
2497           __ z_lzdr(Z_F1);
2498           __ z_std(Z_F1, st_off, Z_SP);
2499 #endif
2500           st_off-=wordSize;
2501           __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2502         }
2503       }
2504     }
2505     st_off -= wordSize;
2506   }
2507 
2508 
2509   // Jump to the interpreter just as if interpreter was doing it.
2510   __ add2reg(Z_esp, st_off, Z_SP);
2511 
2512   // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2513   __ z_br(ientry);
2514 
2515 
2516   // Prevent illegal entry to out-of-line code.
2517   __ z_illtrap(0x22);
2518 
2519   // Generate out-of-line runtime call to patch caller,
2520   // then continue as interpreted.
2521 
2522   // IF you lose the race you go interpreted.
2523   // We don't see any possible endless c2i -> i2c -> c2i ...
2524   // transitions no matter how rare.
2525   __ bind(patch_callsite);
2526 
2527   RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2528   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2529   RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2530   __ z_bru(skip_fixup);
2531 
2532   // end of out-of-line code
2533 
2534   return c2i_entrypoint;
2535 }
2536 
2537 // On entry, the following registers are set
2538 //
2539 //    Z_thread  r8  - JavaThread*
2540 //    Z_method  r9  - callee's method (method to be invoked)
2541 //    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2542 //    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2543 //
2544 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2545                                     int total_args_passed,
2546                                     int comp_args_on_stack,
2547                                     const BasicType *sig_bt,
2548                                     const VMRegPair *regs) {
2549   const Register value = Z_R12;
2550   const Register ld_ptr= Z_esp;
2551 
2552   int ld_offset = total_args_passed * wordSize;
2553 
2554   // Cut-out for having no stack args.
2555   if (comp_args_on_stack) {
2556     // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2557     // registers are below. By subtracting stack0, we either get a negative
2558     // number (all values in registers) or the maximum stack slot accessed.
2559     // Convert VMRegImpl (4 byte) stack slots to words.
2560     int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2561     // Round up to miminum stack alignment, in wordSize
2562     comp_words_on_stack = align_up(comp_words_on_stack, 2);
2563 
2564     __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2565   }
2566 
2567   // Now generate the shuffle code. Pick up all register args and move the
2568   // rest through register value=Z_R12.
2569   for (int i = 0; i < total_args_passed; i++) {
2570     if (sig_bt[i] == T_VOID) {
2571       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2572       continue;
2573     }
2574 
2575     // Pick up 0, 1 or 2 words from ld_ptr.
2576     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2577            "scrambled load targets?");
2578     VMReg r_1 = regs[i].first();
2579     VMReg r_2 = regs[i].second();
2580     if (!r_1->is_valid()) {
2581       assert(!r_2->is_valid(), "");
2582       continue;
2583     }
2584     if (r_1->is_FloatRegister()) {
2585       if (!r_2->is_valid()) {
2586         __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2587         ld_offset-=wordSize;
2588       } else {
2589         // Skip the unused interpreter slot.
2590         __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2591         ld_offset -= 2 * wordSize;
2592       }
2593     } else {
2594       if (r_1->is_stack()) {
2595         // Must do a memory to memory move.
2596         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2597 
2598         if (!r_2->is_valid()) {
2599           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2600         } else {
2601           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2602           // data is passed in only 1 slot.
2603           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2604             ld_offset -= wordSize;
2605           }
2606           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2607         }
2608       } else {
2609         if (!r_2->is_valid()) {
2610           // Not sure we need to do this but it shouldn't hurt.
2611           if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
2612             __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2613           } else {
2614             __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2615           }
2616         } else {
2617           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2618           // data is passed in only 1 slot.
2619           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2620             ld_offset -= wordSize;
2621           }
2622           __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2623         }
2624       }
2625       ld_offset -= wordSize;
2626     }
2627   }
2628 
2629   // Jump to the compiled code just as if compiled code was doing it.
2630   // load target address from method oop:
2631   __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2632 
2633   // Store method oop into thread->callee_target.
2634   // 6243940: We might end up in handle_wrong_method if
2635   // the callee is deoptimized as we race thru here. If that
2636   // happens we don't want to take a safepoint because the
2637   // caller frame will look interpreted and arguments are now
2638   // "compiled" so it is much better to make this transition
2639   // invisible to the stack walking code. Unfortunately, if
2640   // we try and find the callee by normal means a safepoint
2641   // is possible. So we stash the desired callee in the thread
2642   // and the vm will find it there should this case occur.
2643   __ z_stg(Z_method, thread_(callee_target));
2644 
2645   __ z_br(Z_R1_scratch);
2646 }
2647 
2648 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2649                                                             int total_args_passed,
2650                                                             int comp_args_on_stack,
2651                                                             const BasicType *sig_bt,
2652                                                             const VMRegPair *regs,
2653                                                             AdapterFingerPrint* fingerprint) {
2654   __ align(CodeEntryAlignment);
2655   address i2c_entry = __ pc();
2656   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2657 
2658   address c2i_unverified_entry;
2659 
2660   Label skip_fixup;
2661   {
2662     Label ic_miss;
2663     const int klass_offset           = oopDesc::klass_offset_in_bytes();
2664     const int holder_klass_offset    = CompiledICHolder::holder_klass_offset();
2665     const int holder_metadata_offset = CompiledICHolder::holder_metadata_offset();
2666 
2667     // Out-of-line call to ic_miss handler.
2668     __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2669 
2670     // Unverified Entry Point UEP
2671     __ align(CodeEntryAlignment);
2672     c2i_unverified_entry = __ pc();
2673 
2674     // Check the pointers.
2675     if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2676       __ z_ltgr(Z_ARG1, Z_ARG1);
2677       __ z_bre(ic_miss);
2678     }
2679     __ verify_oop(Z_ARG1);
2680 
2681     // Check ic: object class <-> cached class
2682     // Compress cached class for comparison. That's more efficient.
2683     if (UseCompressedClassPointers) {
2684       __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
2685       __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2686     } else {
2687       __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2688     }
2689     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2690 
2691     // This def MUST MATCH code in gen_c2i_adapter!
2692     const Register code = Z_R11;
2693 
2694     __ z_lg(Z_method, holder_metadata_offset, Z_method);
2695     __ load_and_test_long(Z_R0, method_(code));
2696     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2697 
2698     // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2699   }
2700 
2701   address c2i_entry;
2702   c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2703 
2704   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
2705 }
2706 
2707 // This function returns the adjust size (in number of words) to a c2i adapter
2708 // activation for use during deoptimization.
2709 //
2710 // Actually only compiled frames need to be adjusted, but it
2711 // doesn't harm to adjust entry and interpreter frames, too.
2712 //
2713 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2714   assert(callee_locals >= callee_parameters,
2715           "test and remove; got more parms than locals");
2716   // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2717   return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2718          frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2719 }
2720 
2721 uint SharedRuntime::out_preserve_stack_slots() {
2722   return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2723 }
2724 
2725 //
2726 // Frame generation for deopt and uncommon trap blobs.
2727 //
2728 static void push_skeleton_frame(MacroAssembler* masm,
2729                           /* Unchanged */
2730                           Register frame_sizes_reg,
2731                           Register pcs_reg,
2732                           /* Invalidate */
2733                           Register frame_size_reg,
2734                           Register pc_reg) {
2735   BLOCK_COMMENT("  push_skeleton_frame {");
2736    __ z_lg(pc_reg, 0, pcs_reg);
2737    __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2738    __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2739    Register fp = pc_reg;
2740    __ push_frame(frame_size_reg, fp);
2741 #ifdef ASSERT
2742    // The magic is required for successful walking skeletal frames.
2743    __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2744    __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2745    // Fill other slots that are supposedly not necessary with eye catchers.
2746    __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2747    __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2748    // The sender_sp of the bottom frame is set before pushing it.
2749    // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2750    // is unknown here. Luckily it is not needed before filling the frame in
2751    // layout_activation(), we assert this by setting an eye catcher (see
2752    // comments on sender_sp in frame_s390.hpp).
2753    __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2754 #endif // ASSERT
2755   BLOCK_COMMENT("  } push_skeleton_frame");
2756 }
2757 
2758 // Loop through the UnrollBlock info and create new frames.
2759 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2760                             /* read */
2761                             Register unroll_block_reg,
2762                             /* invalidate */
2763                             Register frame_sizes_reg,
2764                             Register number_of_frames_reg,
2765                             Register pcs_reg,
2766                             Register tmp1,
2767                             Register tmp2) {
2768   BLOCK_COMMENT("push_skeleton_frames {");
2769   // _number_of_frames is of type int (deoptimization.hpp).
2770   __ z_lgf(number_of_frames_reg,
2771            Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2772   __ z_lg(pcs_reg,
2773           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2774   __ z_lg(frame_sizes_reg,
2775           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2776 
2777   // stack: (caller_of_deoptee, ...).
2778 
2779   // If caller_of_deoptee is a compiled frame, then we extend it to make
2780   // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2781   // See also Deoptimization::last_frame_adjust() above.
2782   // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2783 
2784   __ z_lgf(Z_R1_scratch,
2785            Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2786   __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2787   __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2788   // The oldest skeletal frame requires a valid sender_sp to make it walkable
2789   // (it is required to find the original pc of caller_of_deoptee if it is marked
2790   // for deoptimization - see nmethod::orig_pc_addr()).
2791   __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2792 
2793   // Now push the new interpreter frames.
2794   Label loop, loop_entry;
2795 
2796   // Make sure that there is at least one entry in the array.
2797   DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2798   __ asm_assert_ne("array_size must be > 0", 0x205);
2799 
2800   __ z_bru(loop_entry);
2801 
2802   __ bind(loop);
2803 
2804   __ add2reg(frame_sizes_reg, wordSize);
2805   __ add2reg(pcs_reg, wordSize);
2806 
2807   __ bind(loop_entry);
2808 
2809   // Allocate a new frame, fill in the pc.
2810   push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2811 
2812   __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2813   __ z_brne(loop);
2814 
2815   // Set the top frame's return pc.
2816   __ add2reg(pcs_reg, wordSize);
2817   __ z_lg(Z_R0_scratch, 0, pcs_reg);
2818   __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2819   BLOCK_COMMENT("} push_skeleton_frames");
2820 }
2821 
2822 //------------------------------generate_deopt_blob----------------------------
2823 void SharedRuntime::generate_deopt_blob() {
2824   // Allocate space for the code.
2825   ResourceMark rm;
2826   // Setup code generation tools.
2827   CodeBuffer buffer("deopt_blob", 2048, 1024);
2828   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2829   Label exec_mode_initialized;
2830   OopMap* map = NULL;
2831   OopMapSet *oop_maps = new OopMapSet();
2832 
2833   unsigned int start_off = __ offset();
2834   Label cont;
2835 
2836   // --------------------------------------------------------------------------
2837   // Normal entry (non-exception case)
2838   //
2839   // We have been called from the deopt handler of the deoptee.
2840   // Z_R14 points behind the call in the deopt handler. We adjust
2841   // it such that it points to the start of the deopt handler.
2842   // The return_pc has been stored in the frame of the deoptee and
2843   // will replace the address of the deopt_handler in the call
2844   // to Deoptimization::fetch_unroll_info below.
2845   // The (int) cast is necessary, because -((unsigned int)14)
2846   // is an unsigned int.
2847   __ add2reg(Z_R14, -(int)HandlerImpl::size_deopt_handler());
2848 
2849   const Register   exec_mode_reg = Z_tmp_1;
2850 
2851   // stack: (deoptee, caller of deoptee, ...)
2852 
2853   // pushes an "unpack" frame
2854   // R14 contains the return address pointing into the deoptimized
2855   // nmethod that was valid just before the nmethod was deoptimized.
2856   // save R14 into the deoptee frame.  the `fetch_unroll_info'
2857   // procedure called below will read it from there.
2858   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2859 
2860   // note the entry point.
2861   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2862   __ z_bru(exec_mode_initialized);
2863 
2864 #ifndef COMPILER1
2865   int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2866 #else
2867   // --------------------------------------------------------------------------
2868   // Reexecute entry
2869   // - Z_R14 = Deopt Handler in nmethod
2870 
2871   int reexecute_offset = __ offset() - start_off;
2872 
2873   // No need to update map as each call to save_live_registers will produce identical oopmap
2874   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2875 
2876   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2877   __ z_bru(exec_mode_initialized);
2878 #endif
2879 
2880 
2881   // --------------------------------------------------------------------------
2882   // Exception entry. We reached here via a branch. Registers on entry:
2883   // - Z_EXC_OOP (Z_ARG1) = exception oop
2884   // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2885 
2886   int exception_offset = __ offset() - start_off;
2887 
2888   // all registers are dead at this entry point, except for Z_EXC_OOP, and
2889   // Z_EXC_PC which contain the exception oop and exception pc
2890   // respectively.  Set them in TLS and fall thru to the
2891   // unpack_with_exception_in_tls entry point.
2892 
2893   // Store exception oop and pc in thread (location known to GC).
2894   // Need this since the call to "fetch_unroll_info()" may safepoint.
2895   __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2896   __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2897 
2898   // fall through
2899 
2900   int exception_in_tls_offset = __ offset() - start_off;
2901 
2902   // new implementation because exception oop is now passed in JavaThread
2903 
2904   // Prolog for exception case
2905   // All registers must be preserved because they might be used by LinearScan
2906   // Exceptiop oop and throwing PC are passed in JavaThread
2907 
2908   // load throwing pc from JavaThread and us it as the return address of the current frame.
2909   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2910 
2911   // Save everything in sight.
2912   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2913 
2914   // Now it is safe to overwrite any register
2915 
2916   // Clear the exception pc field in JavaThread
2917   __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2918 
2919   // Deopt during an exception.  Save exec mode for unpack_frames.
2920   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2921 
2922 
2923 #ifdef ASSERT
2924   // verify that there is really an exception oop in JavaThread
2925   __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2926   __ verify_oop(Z_ARG1);
2927 
2928   // verify that there is no pending exception
2929   __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2930                              "must not have pending exception here", __LINE__);
2931 #endif
2932 
2933   // --------------------------------------------------------------------------
2934   // At this point, the live registers are saved and
2935   // the exec_mode_reg has been set up correctly.
2936   __ bind(exec_mode_initialized);
2937 
2938   // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2939 
2940   {
2941   const Register unroll_block_reg  = Z_tmp_2;
2942 
2943   // we need to set `last_Java_frame' because `fetch_unroll_info' will
2944   // call `last_Java_frame()'.  however we can't block and no gc will
2945   // occur so we don't need an oopmap. the value of the pc in the
2946   // frame is not particularly important.  it just needs to identify the blob.
2947 
2948   // Don't set last_Java_pc anymore here (is implicitly NULL then).
2949   // the correct PC is retrieved in pd_last_frame() in that case.
2950   __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2951   // With EscapeAnalysis turned on, this call may safepoint
2952   // despite it's marked as "leaf call"!
2953   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2954   // Set an oopmap for the call site this describes all our saved volatile registers
2955   int offs = __ offset();
2956   oop_maps->add_gc_map(offs, map);
2957 
2958   __ reset_last_Java_frame();
2959   // save the return value.
2960   __ z_lgr(unroll_block_reg, Z_RET);
2961   // restore the return registers that have been saved
2962   // (among other registers) by save_live_registers(...).
2963   RegisterSaver::restore_result_registers(masm);
2964 
2965   // reload the exec mode from the UnrollBlock (it might have changed)
2966   __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2967 
2968   // In excp_deopt_mode, restore and clear exception oop which we
2969   // stored in the thread during exception entry above. The exception
2970   // oop will be the return value of this stub.
2971   NearLabel skip_restore_excp;
2972   __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2973   __ z_lg(Z_RET, thread_(exception_oop));
2974   __ clear_mem(thread_(exception_oop), 8);
2975   __ bind(skip_restore_excp);
2976 
2977   // remove the "unpack" frame
2978   __ pop_frame();
2979 
2980   // stack: (deoptee, caller of deoptee, ...).
2981 
2982   // pop the deoptee's frame
2983   __ pop_frame();
2984 
2985   // stack: (caller_of_deoptee, ...).
2986 
2987   // loop through the `UnrollBlock' info and create interpreter frames.
2988   push_skeleton_frames(masm, true/*deopt*/,
2989                   unroll_block_reg,
2990                   Z_tmp_3,
2991                   Z_tmp_4,
2992                   Z_ARG5,
2993                   Z_ARG4,
2994                   Z_ARG3);
2995 
2996   // stack: (skeletal interpreter frame, ..., optional skeletal
2997   // interpreter frame, caller of deoptee, ...).
2998   }
2999 
3000   // push an "unpack" frame taking care of float / int return values.
3001   __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
3002 
3003   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3004   // skeletal interpreter frame, caller of deoptee, ...).
3005 
3006   // spill live volatile registers since we'll do a call.
3007   __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
3008   __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
3009 
3010   // let the unpacker layout information in the skeletal frames just allocated.
3011   __ get_PC(Z_RET);
3012   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
3013   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3014                   Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
3015 
3016   __ reset_last_Java_frame();
3017 
3018   // restore the volatiles saved above.
3019   __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
3020   __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
3021 
3022   // pop the "unpack" frame.
3023   __ pop_frame();
3024   __ restore_return_pc();
3025 
3026   // stack: (top interpreter frame, ..., optional interpreter frame,
3027   // caller of deoptee, ...).
3028 
3029   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3030   __ restore_bcp();
3031   __ restore_locals();
3032   __ restore_esp();
3033 
3034   // return to the interpreter entry point.
3035   __ z_br(Z_R14);
3036 
3037   // Make sure all code is generated
3038   masm->flush();
3039 
3040   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3041   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3042 }
3043 
3044 
3045 #ifdef COMPILER2
3046 //------------------------------generate_uncommon_trap_blob--------------------
3047 void SharedRuntime::generate_uncommon_trap_blob() {
3048   // Allocate space for the code
3049   ResourceMark rm;
3050   // Setup code generation tools
3051   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3052   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3053 
3054   Register unroll_block_reg = Z_tmp_1;
3055   Register klass_index_reg  = Z_ARG2;
3056   Register unc_trap_reg     = Z_ARG2;
3057 
3058   // stack: (deoptee, caller_of_deoptee, ...).
3059 
3060   // push a dummy "unpack" frame and call
3061   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3062   // vframe array and return the `UnrollBlock' information.
3063 
3064   // save R14 to compiled frame.
3065   __ save_return_pc();
3066   // push the "unpack_frame".
3067   __ push_frame_abi160(0);
3068 
3069   // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
3070 
3071   // set the "unpack" frame as last_Java_frame.
3072   // `Deoptimization::uncommon_trap' expects it and considers its
3073   // sender frame as the deoptee frame.
3074   __ get_PC(Z_R1_scratch);
3075   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3076 
3077   __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
3078   __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
3079   BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
3080   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
3081 
3082   __ reset_last_Java_frame();
3083 
3084   // pop the "unpack" frame
3085   __ pop_frame();
3086 
3087   // stack: (deoptee, caller_of_deoptee, ...).
3088 
3089   // save the return value.
3090   __ z_lgr(unroll_block_reg, Z_RET);
3091 
3092   // pop the deoptee frame.
3093   __ pop_frame();
3094 
3095   // stack: (caller_of_deoptee, ...).
3096 
3097 #ifdef ASSERT
3098   assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
3099   assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
3100   const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
3101 #ifndef VM_LITTLE_ENDIAN
3102   + 3
3103 #endif
3104   ;
3105   if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
3106     __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3107   } else {
3108     __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3109   }
3110   __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
3111 #endif
3112 
3113   __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
3114 
3115   // allocate new interpreter frame(s) and possibly resize the caller's frame
3116   // (no more adapters !)
3117   push_skeleton_frames(masm, false/*deopt*/,
3118                   unroll_block_reg,
3119                   Z_tmp_2,
3120                   Z_tmp_3,
3121                   Z_tmp_4,
3122                   Z_ARG5,
3123                   Z_ARG4);
3124 
3125   // stack: (skeletal interpreter frame, ..., optional skeletal
3126   // interpreter frame, (resized) caller of deoptee, ...).
3127 
3128   // push a dummy "unpack" frame taking care of float return values.
3129   // call `Deoptimization::unpack_frames' to layout information in the
3130   // interpreter frames just created
3131 
3132   // push the "unpack" frame
3133    const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
3134 
3135   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3136   // skeletal interpreter frame, (resized) caller of deoptee, ...).
3137 
3138   // set the "unpack" frame as last_Java_frame
3139   __ get_PC(Z_R1_scratch);
3140   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3141 
3142   // indicate it is the uncommon trap case
3143   BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
3144   __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3145   // let the unpacker layout information in the skeletal frames just allocated.
3146   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
3147 
3148   __ reset_last_Java_frame();
3149   // pop the "unpack" frame
3150   __ pop_frame();
3151   // restore LR from top interpreter frame
3152   __ restore_return_pc();
3153 
3154   // stack: (top interpreter frame, ..., optional interpreter frame,
3155   // (resized) caller of deoptee, ...).
3156 
3157   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3158   __ restore_bcp();
3159   __ restore_locals();
3160   __ restore_esp();
3161 
3162   // return to the interpreter entry point
3163   __ z_br(Z_R14);
3164 
3165   masm->flush();
3166   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
3167 }
3168 #endif // COMPILER2
3169 
3170 
3171 //------------------------------generate_handler_blob------
3172 //
3173 // Generate a special Compile2Runtime blob that saves all registers,
3174 // and setup oopmap.
3175 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
3176   assert(StubRoutines::forward_exception_entry() != NULL,
3177          "must be generated before");
3178 
3179   ResourceMark rm;
3180   OopMapSet *oop_maps = new OopMapSet();
3181   OopMap* map;
3182 
3183   // Allocate space for the code. Setup code generation tools.
3184   CodeBuffer buffer("handler_blob", 2048, 1024);
3185   MacroAssembler* masm = new MacroAssembler(&buffer);
3186 
3187   unsigned int start_off = __ offset();
3188   address call_pc = NULL;
3189   int frame_size_in_bytes;
3190 
3191   bool cause_return = (poll_type == POLL_AT_RETURN);
3192   // Make room for return address (or push it again)
3193   if (!cause_return) {
3194     __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
3195   }
3196 
3197   // Save registers, fpu state, and flags
3198   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3199 
3200   if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
3201     // Keep a copy of the return pc to detect if it gets modified.
3202     __ z_lgr(Z_R6, Z_R14);
3203   }
3204 
3205   // The following is basically a call_VM. However, we need the precise
3206   // address of the call in order to generate an oopmap. Hence, we do all the
3207   // work outselves.
3208   __ set_last_Java_frame(Z_SP, noreg);
3209 
3210   // call into the runtime to handle the safepoint poll
3211   __ call_VM_leaf(call_ptr, Z_thread);
3212 
3213 
3214   // Set an oopmap for the call site. This oopmap will map all
3215   // oop-registers and debug-info registers as callee-saved. This
3216   // will allow deoptimization at this safepoint to find all possible
3217   // debug-info recordings, as well as let GC find all oops.
3218 
3219   oop_maps->add_gc_map((int)(__ offset()-start_off), map);
3220 
3221   Label noException;
3222 
3223   __ reset_last_Java_frame();
3224 
3225   __ load_and_test_long(Z_R1, thread_(pending_exception));
3226   __ z_bre(noException);
3227 
3228   // Pending exception case, used (sporadically) by
3229   // api/java_lang/Thread.State/index#ThreadState et al.
3230   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3231 
3232   // Jump to forward_exception_entry, with the issuing PC in Z_R14
3233   // so it looks like the original nmethod called forward_exception_entry.
3234   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3235   __ z_br(Z_R1_scratch);
3236 
3237   // No exception case
3238   __ bind(noException);
3239 
3240   if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
3241     Label no_adjust;
3242      // If our stashed return pc was modified by the runtime we avoid touching it
3243     const int offset_of_return_pc = _z_abi16(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers);
3244     __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
3245     __ z_brne(no_adjust);
3246 
3247     // Adjust return pc forward to step over the safepoint poll instruction
3248     __ instr_size(Z_R1_scratch, Z_R6);
3249     __ z_agr(Z_R6, Z_R1_scratch);
3250     __ z_stg(Z_R6, offset_of_return_pc, Z_SP);
3251 
3252     __ bind(no_adjust);
3253   }
3254 
3255   // Normal exit, restore registers and exit.
3256   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3257 
3258   __ z_br(Z_R14);
3259 
3260   // Make sure all code is generated
3261   masm->flush();
3262 
3263   // Fill-out other meta info
3264   return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3265 }
3266 
3267 
3268 //
3269 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3270 //
3271 // Generate a stub that calls into vm to find out the proper destination
3272 // of a Java call. All the argument registers are live at this point
3273 // but since this is generic code we don't know what they are and the caller
3274 // must do any gc of the args.
3275 //
3276 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3277   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3278 
3279   // allocate space for the code
3280   ResourceMark rm;
3281 
3282   CodeBuffer buffer(name, 1000, 512);
3283   MacroAssembler* masm                = new MacroAssembler(&buffer);
3284 
3285   OopMapSet *oop_maps = new OopMapSet();
3286   OopMap* map = NULL;
3287 
3288   unsigned int start_off = __ offset();
3289 
3290   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3291 
3292   // We must save a PC from within the stub as return PC
3293   // C code doesn't store the LR where we expect the PC,
3294   // so we would run into trouble upon stack walking.
3295   __ get_PC(Z_R1_scratch);
3296 
3297   unsigned int frame_complete = __ offset();
3298 
3299   __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
3300 
3301   __ call_VM_leaf(destination, Z_thread, Z_method);
3302 
3303 
3304   // Set an oopmap for the call site.
3305   // We need this not only for callee-saved registers, but also for volatile
3306   // registers that the compiler might be keeping live across a safepoint.
3307 
3308   oop_maps->add_gc_map((int)(frame_complete-start_off), map);
3309 
3310   // clear last_Java_sp
3311   __ reset_last_Java_frame();
3312 
3313   // check for pending exceptions
3314   Label pending;
3315   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3316   __ z_brne(pending);
3317 
3318   __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3319   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3320 
3321   // get the returned method
3322   __ get_vm_result_2(Z_method);
3323 
3324   // We are back the the original state on entry and ready to go.
3325   __ z_br(Z_R1_scratch);
3326 
3327   // Pending exception after the safepoint
3328 
3329   __ bind(pending);
3330 
3331   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3332 
3333   // exception pending => remove activation and forward to exception handler
3334 
3335   __ z_lgr(Z_R2, Z_R0); // pending_exception
3336   __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3337   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3338   __ z_br(Z_R1_scratch);
3339 
3340   // -------------
3341   // make sure all code is generated
3342   masm->flush();
3343 
3344   // return the blob
3345   // frame_size_words or bytes??
3346   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3347                                        oop_maps, true);
3348 
3349 }
3350 
3351 //------------------------------Montgomery multiplication------------------------
3352 //
3353 
3354 // Subtract 0:b from carry:a. Return carry.
3355 static unsigned long
3356 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3357   unsigned long i, c = 8 * (unsigned long)(len - 1);
3358   __asm__ __volatile__ (
3359     "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3360     "LGHI   0, 8               \n" // index increment (for BRXLG)
3361     "LGR    1, %[c]            \n" // index limit (for BRXLG)
3362     "0:                        \n"
3363     "LG     %[c], 0(%[i],%[a]) \n"
3364     "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3365     "STG    %[c], 0(%[i],%[a]) \n"
3366     "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3367     "SLBGR  %[c], %[c]         \n" // save carry - 1
3368     : [i]"=&a"(i), [c]"+r"(c)
3369     : [a]"a"(a), [b]"a"(b)
3370     : "cc", "memory", "r0", "r1"
3371  );
3372   return carry + c;
3373 }
3374 
3375 // Multiply (unsigned) Long A by Long B, accumulating the double-
3376 // length result into the accumulator formed of T0, T1, and T2.
3377 inline void MACC(unsigned long A[], long A_ind,
3378                  unsigned long B[], long B_ind,
3379                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3380   long A_si = 8 * A_ind,
3381        B_si = 8 * B_ind;
3382   __asm__ __volatile__ (
3383     "LG     1, 0(%[A_si],%[A]) \n"
3384     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3385     "ALGR   %[T0], 1           \n"
3386     "LGHI   1, 0               \n" // r1 = 0
3387     "ALCGR  %[T1], 0           \n"
3388     "ALCGR  %[T2], 1           \n"
3389     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3390     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3391     : "cc", "r0", "r1"
3392  );
3393 }
3394 
3395 // As above, but add twice the double-length result into the
3396 // accumulator.
3397 inline void MACC2(unsigned long A[], long A_ind,
3398                   unsigned long B[], long B_ind,
3399                   unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3400   const unsigned long zero = 0;
3401   long A_si = 8 * A_ind,
3402        B_si = 8 * B_ind;
3403   __asm__ __volatile__ (
3404     "LG     1, 0(%[A_si],%[A]) \n"
3405     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3406     "ALGR   %[T0], 1           \n"
3407     "ALCGR  %[T1], 0           \n"
3408     "ALCGR  %[T2], %[zero]     \n"
3409     "ALGR   %[T0], 1           \n"
3410     "ALCGR  %[T1], 0           \n"
3411     "ALCGR  %[T2], %[zero]     \n"
3412     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3413     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3414     : "cc", "r0", "r1"
3415  );
3416 }
3417 
3418 // Fast Montgomery multiplication. The derivation of the algorithm is
3419 // in "A Cryptographic Library for the Motorola DSP56000,
3420 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3421 static void
3422 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3423                     unsigned long m[], unsigned long inv, int len) {
3424   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3425   int i;
3426 
3427   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3428 
3429   for (i = 0; i < len; i++) {
3430     int j;
3431     for (j = 0; j < i; j++) {
3432       MACC(a, j, b, i-j, t0, t1, t2);
3433       MACC(m, j, n, i-j, t0, t1, t2);
3434     }
3435     MACC(a, i, b, 0, t0, t1, t2);
3436     m[i] = t0 * inv;
3437     MACC(m, i, n, 0, t0, t1, t2);
3438 
3439     assert(t0 == 0, "broken Montgomery multiply");
3440 
3441     t0 = t1; t1 = t2; t2 = 0;
3442   }
3443 
3444   for (i = len; i < 2 * len; i++) {
3445     int j;
3446     for (j = i - len + 1; j < len; j++) {
3447       MACC(a, j, b, i-j, t0, t1, t2);
3448       MACC(m, j, n, i-j, t0, t1, t2);
3449     }
3450     m[i-len] = t0;
3451     t0 = t1; t1 = t2; t2 = 0;
3452   }
3453 
3454   while (t0) {
3455     t0 = sub(m, n, t0, len);
3456   }
3457 }
3458 
3459 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3460 // multiplies so it should be up to 25% faster than Montgomery
3461 // multiplication. However, its loop control is more complex and it
3462 // may actually run slower on some machines.
3463 static void
3464 montgomery_square(unsigned long a[], unsigned long n[],
3465                   unsigned long m[], unsigned long inv, int len) {
3466   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3467   int i;
3468 
3469   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3470 
3471   for (i = 0; i < len; i++) {
3472     int j;
3473     int end = (i+1)/2;
3474     for (j = 0; j < end; j++) {
3475       MACC2(a, j, a, i-j, t0, t1, t2);
3476       MACC(m, j, n, i-j, t0, t1, t2);
3477     }
3478     if ((i & 1) == 0) {
3479       MACC(a, j, a, j, t0, t1, t2);
3480     }
3481     for (; j < i; j++) {
3482       MACC(m, j, n, i-j, t0, t1, t2);
3483     }
3484     m[i] = t0 * inv;
3485     MACC(m, i, n, 0, t0, t1, t2);
3486 
3487     assert(t0 == 0, "broken Montgomery square");
3488 
3489     t0 = t1; t1 = t2; t2 = 0;
3490   }
3491 
3492   for (i = len; i < 2*len; i++) {
3493     int start = i-len+1;
3494     int end = start + (len - start)/2;
3495     int j;
3496     for (j = start; j < end; j++) {
3497       MACC2(a, j, a, i-j, t0, t1, t2);
3498       MACC(m, j, n, i-j, t0, t1, t2);
3499     }
3500     if ((i & 1) == 0) {
3501       MACC(a, j, a, j, t0, t1, t2);
3502     }
3503     for (; j < len; j++) {
3504       MACC(m, j, n, i-j, t0, t1, t2);
3505     }
3506     m[i-len] = t0;
3507     t0 = t1; t1 = t2; t2 = 0;
3508   }
3509 
3510   while (t0) {
3511     t0 = sub(m, n, t0, len);
3512   }
3513 }
3514 
3515 // The threshold at which squaring is advantageous was determined
3516 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3517 // Value seems to be ok for other platforms, too.
3518 #define MONTGOMERY_SQUARING_THRESHOLD 64
3519 
3520 // Copy len longwords from s to d, word-swapping as we go. The
3521 // destination array is reversed.
3522 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3523   d += len;
3524   while(len-- > 0) {
3525     d--;
3526     unsigned long s_val = *s;
3527     // Swap words in a longword on little endian machines.
3528 #ifdef VM_LITTLE_ENDIAN
3529      Unimplemented();
3530 #endif
3531     *d = s_val;
3532     s++;
3533   }
3534 }
3535 
3536 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3537                                         jint len, jlong inv,
3538                                         jint *m_ints) {
3539   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3540   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3541   int longwords = len/2;
3542 
3543   // Make very sure we don't use so much space that the stack might
3544   // overflow. 512 jints corresponds to an 16384-bit integer and
3545   // will use here a total of 8k bytes of stack space.
3546   int total_allocation = longwords * sizeof (unsigned long) * 4;
3547   guarantee(total_allocation <= 8192, "must be");
3548   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3549 
3550   // Local scratch arrays
3551   unsigned long
3552     *a = scratch + 0 * longwords,
3553     *b = scratch + 1 * longwords,
3554     *n = scratch + 2 * longwords,
3555     *m = scratch + 3 * longwords;
3556 
3557   reverse_words((unsigned long *)a_ints, a, longwords);
3558   reverse_words((unsigned long *)b_ints, b, longwords);
3559   reverse_words((unsigned long *)n_ints, n, longwords);
3560 
3561   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3562 
3563   reverse_words(m, (unsigned long *)m_ints, longwords);
3564 }
3565 
3566 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3567                                       jint len, jlong inv,
3568                                       jint *m_ints) {
3569   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3570   assert(len % 2 == 0, "array length in montgomery_square must be even");
3571   int longwords = len/2;
3572 
3573   // Make very sure we don't use so much space that the stack might
3574   // overflow. 512 jints corresponds to an 16384-bit integer and
3575   // will use here a total of 6k bytes of stack space.
3576   int total_allocation = longwords * sizeof (unsigned long) * 3;
3577   guarantee(total_allocation <= 8192, "must be");
3578   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3579 
3580   // Local scratch arrays
3581   unsigned long
3582     *a = scratch + 0 * longwords,
3583     *n = scratch + 1 * longwords,
3584     *m = scratch + 2 * longwords;
3585 
3586   reverse_words((unsigned long *)a_ints, a, longwords);
3587   reverse_words((unsigned long *)n_ints, n, longwords);
3588 
3589   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3590     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3591   } else {
3592     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3593   }
3594 
3595   reverse_words(m, (unsigned long *)m_ints, longwords);
3596 }
3597 
3598 extern "C"
3599 int SpinPause() {
3600   return 0;
3601 }