1 /*
   2  * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "interpreter/interp_masm.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "registerSaver_s390.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/vframeArray.hpp"
  38 #include "utilities/align.hpp"
  39 #include "vmreg_s390.inline.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_Runtime1.hpp"
  42 #endif
  43 #ifdef COMPILER2
  44 #include "opto/ad.hpp"
  45 #include "opto/runtime.hpp"
  46 #endif
  47 
  48 #ifdef PRODUCT
  49 #define __ masm->
  50 #else
  51 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
  52 #endif
  53 
  54 #define BLOCK_COMMENT(str) __ block_comment(str)
  55 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  56 
  57 #define RegisterSaver_LiveIntReg(regname) \
  58   { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
  59 
  60 #define RegisterSaver_LiveFloatReg(regname) \
  61   { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
  62 
  63 // Registers which are not saved/restored, but still they have got a frame slot.
  64 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
  65 #define RegisterSaver_ExcludedIntReg(regname) \
  66   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  67 
  68 // Registers which are not saved/restored, but still they have got a frame slot.
  69 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
  70 #define RegisterSaver_ExcludedFloatReg(regname) \
  71   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  72 
  73 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
  74   // Live registers which get spilled to the stack. Register positions
  75   // in this array correspond directly to the stack layout.
  76   //
  77   // live float registers:
  78   //
  79   RegisterSaver_LiveFloatReg(Z_F0 ),
  80   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  81   RegisterSaver_LiveFloatReg(Z_F2 ),
  82   RegisterSaver_LiveFloatReg(Z_F3 ),
  83   RegisterSaver_LiveFloatReg(Z_F4 ),
  84   RegisterSaver_LiveFloatReg(Z_F5 ),
  85   RegisterSaver_LiveFloatReg(Z_F6 ),
  86   RegisterSaver_LiveFloatReg(Z_F7 ),
  87   RegisterSaver_LiveFloatReg(Z_F8 ),
  88   RegisterSaver_LiveFloatReg(Z_F9 ),
  89   RegisterSaver_LiveFloatReg(Z_F10),
  90   RegisterSaver_LiveFloatReg(Z_F11),
  91   RegisterSaver_LiveFloatReg(Z_F12),
  92   RegisterSaver_LiveFloatReg(Z_F13),
  93   RegisterSaver_LiveFloatReg(Z_F14),
  94   RegisterSaver_LiveFloatReg(Z_F15),
  95   //
  96   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
  97   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
  98   RegisterSaver_LiveIntReg(Z_R2 ),
  99   RegisterSaver_LiveIntReg(Z_R3 ),
 100   RegisterSaver_LiveIntReg(Z_R4 ),
 101   RegisterSaver_LiveIntReg(Z_R5 ),
 102   RegisterSaver_LiveIntReg(Z_R6 ),
 103   RegisterSaver_LiveIntReg(Z_R7 ),
 104   RegisterSaver_LiveIntReg(Z_R8 ),
 105   RegisterSaver_LiveIntReg(Z_R9 ),
 106   RegisterSaver_LiveIntReg(Z_R10),
 107   RegisterSaver_LiveIntReg(Z_R11),
 108   RegisterSaver_LiveIntReg(Z_R12),
 109   RegisterSaver_LiveIntReg(Z_R13),
 110   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 111   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 112 };
 113 
 114 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
 115   // Live registers which get spilled to the stack. Register positions
 116   // in this array correspond directly to the stack layout.
 117   //
 118   // live float registers: All excluded, but still they get a stack slot to get same frame size.
 119   //
 120   RegisterSaver_ExcludedFloatReg(Z_F0 ),
 121   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 122   RegisterSaver_ExcludedFloatReg(Z_F2 ),
 123   RegisterSaver_ExcludedFloatReg(Z_F3 ),
 124   RegisterSaver_ExcludedFloatReg(Z_F4 ),
 125   RegisterSaver_ExcludedFloatReg(Z_F5 ),
 126   RegisterSaver_ExcludedFloatReg(Z_F6 ),
 127   RegisterSaver_ExcludedFloatReg(Z_F7 ),
 128   RegisterSaver_ExcludedFloatReg(Z_F8 ),
 129   RegisterSaver_ExcludedFloatReg(Z_F9 ),
 130   RegisterSaver_ExcludedFloatReg(Z_F10),
 131   RegisterSaver_ExcludedFloatReg(Z_F11),
 132   RegisterSaver_ExcludedFloatReg(Z_F12),
 133   RegisterSaver_ExcludedFloatReg(Z_F13),
 134   RegisterSaver_ExcludedFloatReg(Z_F14),
 135   RegisterSaver_ExcludedFloatReg(Z_F15),
 136   //
 137   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 138   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 139   RegisterSaver_LiveIntReg(Z_R2 ),
 140   RegisterSaver_LiveIntReg(Z_R3 ),
 141   RegisterSaver_LiveIntReg(Z_R4 ),
 142   RegisterSaver_LiveIntReg(Z_R5 ),
 143   RegisterSaver_LiveIntReg(Z_R6 ),
 144   RegisterSaver_LiveIntReg(Z_R7 ),
 145   RegisterSaver_LiveIntReg(Z_R8 ),
 146   RegisterSaver_LiveIntReg(Z_R9 ),
 147   RegisterSaver_LiveIntReg(Z_R10),
 148   RegisterSaver_LiveIntReg(Z_R11),
 149   RegisterSaver_LiveIntReg(Z_R12),
 150   RegisterSaver_LiveIntReg(Z_R13),
 151   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 152   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 153 };
 154 
 155 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
 156   // Live registers which get spilled to the stack. Register positions
 157   // in this array correspond directly to the stack layout.
 158   //
 159   // live float registers:
 160   //
 161   RegisterSaver_LiveFloatReg(Z_F0 ),
 162   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 163   RegisterSaver_LiveFloatReg(Z_F2 ),
 164   RegisterSaver_LiveFloatReg(Z_F3 ),
 165   RegisterSaver_LiveFloatReg(Z_F4 ),
 166   RegisterSaver_LiveFloatReg(Z_F5 ),
 167   RegisterSaver_LiveFloatReg(Z_F6 ),
 168   RegisterSaver_LiveFloatReg(Z_F7 ),
 169   RegisterSaver_LiveFloatReg(Z_F8 ),
 170   RegisterSaver_LiveFloatReg(Z_F9 ),
 171   RegisterSaver_LiveFloatReg(Z_F10),
 172   RegisterSaver_LiveFloatReg(Z_F11),
 173   RegisterSaver_LiveFloatReg(Z_F12),
 174   RegisterSaver_LiveFloatReg(Z_F13),
 175   RegisterSaver_LiveFloatReg(Z_F14),
 176   RegisterSaver_LiveFloatReg(Z_F15),
 177   //
 178   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 179   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 180   RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
 181   RegisterSaver_LiveIntReg(Z_R3 ),
 182   RegisterSaver_LiveIntReg(Z_R4 ),
 183   RegisterSaver_LiveIntReg(Z_R5 ),
 184   RegisterSaver_LiveIntReg(Z_R6 ),
 185   RegisterSaver_LiveIntReg(Z_R7 ),
 186   RegisterSaver_LiveIntReg(Z_R8 ),
 187   RegisterSaver_LiveIntReg(Z_R9 ),
 188   RegisterSaver_LiveIntReg(Z_R10),
 189   RegisterSaver_LiveIntReg(Z_R11),
 190   RegisterSaver_LiveIntReg(Z_R12),
 191   RegisterSaver_LiveIntReg(Z_R13),
 192   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 193   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 194 };
 195 
 196 // Live argument registers which get spilled to the stack.
 197 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
 198   RegisterSaver_LiveFloatReg(Z_FARG1),
 199   RegisterSaver_LiveFloatReg(Z_FARG2),
 200   RegisterSaver_LiveFloatReg(Z_FARG3),
 201   RegisterSaver_LiveFloatReg(Z_FARG4),
 202   RegisterSaver_LiveIntReg(Z_ARG1),
 203   RegisterSaver_LiveIntReg(Z_ARG2),
 204   RegisterSaver_LiveIntReg(Z_ARG3),
 205   RegisterSaver_LiveIntReg(Z_ARG4),
 206   RegisterSaver_LiveIntReg(Z_ARG5)
 207 };
 208 
 209 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
 210   // Live registers which get spilled to the stack. Register positions
 211   // in this array correspond directly to the stack layout.
 212   //
 213   // live float registers:
 214   //
 215   RegisterSaver_LiveFloatReg(Z_F0 ),
 216   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 217   RegisterSaver_LiveFloatReg(Z_F2 ),
 218   RegisterSaver_LiveFloatReg(Z_F3 ),
 219   RegisterSaver_LiveFloatReg(Z_F4 ),
 220   RegisterSaver_LiveFloatReg(Z_F5 ),
 221   RegisterSaver_LiveFloatReg(Z_F6 ),
 222   RegisterSaver_LiveFloatReg(Z_F7 ),
 223   // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
 224   // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
 225   // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
 226   // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
 227   // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
 228   // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
 229   // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
 230   // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
 231   //
 232   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 233   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 234   RegisterSaver_LiveIntReg(Z_R2 ),
 235   RegisterSaver_LiveIntReg(Z_R3 ),
 236   RegisterSaver_LiveIntReg(Z_R4 ),
 237   RegisterSaver_LiveIntReg(Z_R5 ),
 238   // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
 239   // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
 240   // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
 241   // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
 242   // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
 243   // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
 244   // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
 245   // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
 246   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 247   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 248 };
 249 
 250 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
 251   int reg_space = -1;
 252   switch (reg_set) {
 253     case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
 254     case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
 255     case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
 256     case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
 257     case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
 258     default: ShouldNotReachHere();
 259   }
 260   return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
 261 }
 262 
 263 
 264 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
 265   return live_reg_save_size(reg_set) + frame::z_abi_160_size;
 266 }
 267 
 268 
 269 // return_pc: Specify the register that should be stored as the return pc in the current frame.
 270 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
 271   // Record volatile registers as callee-save values in an OopMap so
 272   // their save locations will be propagated to the caller frame's
 273   // RegisterMap during StackFrameStream construction (needed for
 274   // deoptimization; see compiledVFrame::create_stack_value).
 275 
 276   // Calculate frame size.
 277   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 278   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 279   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 280 
 281   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 282   OopMap* map = new OopMap(frame_size_in_slots, 0);
 283 
 284   int regstosave_num = 0;
 285   const RegisterSaver::LiveRegType* live_regs = NULL;
 286 
 287   switch (reg_set) {
 288     case all_registers:
 289       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 290       live_regs      = RegisterSaver_LiveRegs;
 291       break;
 292     case all_registers_except_r2:
 293       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 294       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 295       break;
 296     case all_integer_registers:
 297       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 298       live_regs      = RegisterSaver_LiveIntRegs;
 299       break;
 300     case all_volatile_registers:
 301       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 302       live_regs      = RegisterSaver_LiveVolatileRegs;
 303       break;
 304     case arg_registers:
 305       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 306       live_regs      = RegisterSaver_LiveArgRegs;
 307       break;
 308     default: ShouldNotReachHere();
 309   }
 310 
 311   // Save return pc in old frame.
 312   __ save_return_pc(return_pc);
 313 
 314   // Push a new frame (includes stack linkage).
 315   __ push_frame(frame_size_in_bytes);
 316 
 317   // Register save area in new frame starts above z_abi_160 area.
 318   int offset = register_save_offset;
 319 
 320   Register first = noreg;
 321   Register last  = noreg;
 322   int      first_offset = -1;
 323   bool     float_spilled = false;
 324 
 325   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 326     int reg_num  = live_regs[i].reg_num;
 327     int reg_type = live_regs[i].reg_type;
 328 
 329     switch (reg_type) {
 330       case RegisterSaver::int_reg: {
 331         Register reg = as_Register(reg_num);
 332         if (last != reg->predecessor()) {
 333           if (first != noreg) {
 334             __ z_stmg(first, last, first_offset, Z_SP);
 335           }
 336           first = reg;
 337           first_offset = offset;
 338           DEBUG_ONLY(float_spilled = false);
 339         }
 340         last = reg;
 341         assert(last != Z_R0, "r0 would require special treatment");
 342         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 343         break;
 344       }
 345 
 346       case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
 347         continue; // Continue with next loop iteration.
 348 
 349       case RegisterSaver::float_reg: {
 350         FloatRegister freg = as_FloatRegister(reg_num);
 351         __ z_std(freg, offset, Z_SP);
 352         DEBUG_ONLY(float_spilled = true);
 353         break;
 354       }
 355 
 356       default:
 357         ShouldNotReachHere();
 358         break;
 359     }
 360 
 361     // Second set_callee_saved is really a waste but we'll keep things as they were for now
 362     map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
 363     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
 364   }
 365   assert(first != noreg, "Should spill at least one int reg.");
 366   __ z_stmg(first, last, first_offset, Z_SP);
 367 
 368   // And we're done.
 369   return map;
 370 }
 371 
 372 
 373 // Generate the OopMap (again, regs where saved before).
 374 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
 375   // Calculate frame size.
 376   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 377   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 378   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 379 
 380   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 381   OopMap* map = new OopMap(frame_size_in_slots, 0);
 382 
 383   int regstosave_num = 0;
 384   const RegisterSaver::LiveRegType* live_regs = NULL;
 385 
 386   switch (reg_set) {
 387     case all_registers:
 388       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 389       live_regs      = RegisterSaver_LiveRegs;
 390       break;
 391     case all_registers_except_r2:
 392       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 393       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 394       break;
 395     case all_integer_registers:
 396       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 397       live_regs      = RegisterSaver_LiveIntRegs;
 398       break;
 399     case all_volatile_registers:
 400       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 401       live_regs      = RegisterSaver_LiveVolatileRegs;
 402       break;
 403     case arg_registers:
 404       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 405       live_regs      = RegisterSaver_LiveArgRegs;
 406       break;
 407     default: ShouldNotReachHere();
 408   }
 409 
 410   // Register save area in new frame starts above z_abi_160 area.
 411   int offset = register_save_offset;
 412   for (int i = 0; i < regstosave_num; i++) {
 413     if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
 414       map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
 415       map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
 416     }
 417     offset += reg_size;
 418   }
 419   return map;
 420 }
 421 
 422 
 423 // Pop the current frame and restore all the registers that we saved.
 424 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
 425   int offset;
 426   const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
 427 
 428   Register first = noreg;
 429   Register last = noreg;
 430   int      first_offset = -1;
 431   bool     float_spilled = false;
 432 
 433   int regstosave_num = 0;
 434   const RegisterSaver::LiveRegType* live_regs = NULL;
 435 
 436   switch (reg_set) {
 437     case all_registers:
 438       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
 439       live_regs      = RegisterSaver_LiveRegs;
 440       break;
 441     case all_registers_except_r2:
 442       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 443       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 444       break;
 445     case all_integer_registers:
 446       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 447       live_regs      = RegisterSaver_LiveIntRegs;
 448       break;
 449     case all_volatile_registers:
 450       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
 451       live_regs      = RegisterSaver_LiveVolatileRegs;
 452       break;
 453     case arg_registers:
 454       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 455       live_regs      = RegisterSaver_LiveArgRegs;
 456       break;
 457     default: ShouldNotReachHere();
 458   }
 459 
 460   // Restore all registers (ints and floats).
 461 
 462   // Register save area in new frame starts above z_abi_160 area.
 463   offset = register_save_offset;
 464 
 465   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 466     int reg_num  = live_regs[i].reg_num;
 467     int reg_type = live_regs[i].reg_type;
 468 
 469     switch (reg_type) {
 470       case RegisterSaver::excluded_reg:
 471         continue; // Continue with next loop iteration.
 472 
 473       case RegisterSaver::int_reg: {
 474         Register reg = as_Register(reg_num);
 475         if (last != reg->predecessor()) {
 476           if (first != noreg) {
 477             __ z_lmg(first, last, first_offset, Z_SP);
 478           }
 479           first = reg;
 480           first_offset = offset;
 481           DEBUG_ONLY(float_spilled = false);
 482         }
 483         last = reg;
 484         assert(last != Z_R0, "r0 would require special treatment");
 485         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 486         break;
 487       }
 488 
 489       case RegisterSaver::float_reg: {
 490         FloatRegister freg = as_FloatRegister(reg_num);
 491         __ z_ld(freg, offset, Z_SP);
 492         DEBUG_ONLY(float_spilled = true);
 493         break;
 494       }
 495 
 496       default:
 497         ShouldNotReachHere();
 498     }
 499   }
 500   assert(first != noreg, "Should spill at least one int reg.");
 501   __ z_lmg(first, last, first_offset, Z_SP);
 502 
 503   // Pop the frame.
 504   __ pop_frame();
 505 
 506   // Restore the flags.
 507   __ restore_return_pc();
 508 }
 509 
 510 
 511 // Pop the current frame and restore the registers that might be holding a result.
 512 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 513   int i;
 514   int offset;
 515   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 516                                    sizeof(RegisterSaver::LiveRegType);
 517   const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
 518 
 519   // Restore all result registers (ints and floats).
 520   offset = register_save_offset;
 521   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 522     int reg_num = RegisterSaver_LiveRegs[i].reg_num;
 523     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 524     switch (reg_type) {
 525       case RegisterSaver::excluded_reg:
 526         continue; // Continue with next loop iteration.
 527       case RegisterSaver::int_reg: {
 528         if (as_Register(reg_num) == Z_RET) { // int result_reg
 529           __ z_lg(as_Register(reg_num), offset, Z_SP);
 530         }
 531         break;
 532       }
 533       case RegisterSaver::float_reg: {
 534         if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
 535           __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
 536         }
 537         break;
 538       }
 539       default:
 540         ShouldNotReachHere();
 541     }
 542   }
 543 }
 544 
 545 #if INCLUDE_CDS
 546 size_t SharedRuntime::trampoline_size() {
 547   return MacroAssembler::load_const_size() + 2;
 548 }
 549 
 550 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 551   // Think about using pc-relative branch.
 552   __ load_const(Z_R1_scratch, destination);
 553   __ z_br(Z_R1_scratch);
 554 }
 555 #endif
 556 
 557 // ---------------------------------------------------------------------------
 558 void SharedRuntime::save_native_result(MacroAssembler * masm,
 559                                        BasicType ret_type,
 560                                        int frame_slots) {
 561   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 562 
 563   switch (ret_type) {
 564     case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
 565     case T_BYTE:
 566     case T_CHAR:
 567     case T_SHORT:
 568     case T_INT:
 569       __ reg2mem_opt(Z_RET, memaddr, false);
 570       break;
 571     case T_OBJECT:   // Save pointer types as long.
 572     case T_ARRAY:
 573     case T_ADDRESS:
 574     case T_VOID:
 575     case T_LONG:
 576       __ reg2mem_opt(Z_RET, memaddr);
 577       break;
 578     case T_FLOAT:
 579       __ freg2mem_opt(Z_FRET, memaddr, false);
 580       break;
 581     case T_DOUBLE:
 582       __ freg2mem_opt(Z_FRET, memaddr);
 583       break;
 584   }
 585 }
 586 
 587 void SharedRuntime::restore_native_result(MacroAssembler *masm,
 588                                           BasicType       ret_type,
 589                                           int             frame_slots) {
 590   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 591 
 592   switch (ret_type) {
 593     case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
 594     case T_BYTE:
 595     case T_CHAR:
 596     case T_SHORT:
 597     case T_INT:
 598       __ mem2reg_opt(Z_RET, memaddr, false);
 599       break;
 600     case T_OBJECT:   // Restore pointer types as long.
 601     case T_ARRAY:
 602     case T_ADDRESS:
 603     case T_VOID:
 604     case T_LONG:
 605       __ mem2reg_opt(Z_RET, memaddr);
 606       break;
 607     case T_FLOAT:
 608       __ mem2freg_opt(Z_FRET, memaddr, false);
 609       break;
 610     case T_DOUBLE:
 611       __ mem2freg_opt(Z_FRET, memaddr);
 612       break;
 613   }
 614 }
 615 
 616 // ---------------------------------------------------------------------------
 617 // Read the array of BasicTypes from a signature, and compute where the
 618 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
 619 // quantities. Values less than VMRegImpl::stack0 are registers, those above
 620 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
 621 // as framesizes are fixed.
 622 // VMRegImpl::stack0 refers to the first slot 0(sp).
 623 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
 624 // up to RegisterImpl::number_of_registers are the 64-bit integer registers.
 625 
 626 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 627 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
 628 // units regardless of build.
 629 
 630 // The Java calling convention is a "shifted" version of the C ABI.
 631 // By skipping the first C ABI register we can call non-static jni methods
 632 // with small numbers of arguments without having to shuffle the arguments
 633 // at all. Since we control the java ABI we ought to at least get some
 634 // advantage out of it.
 635 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 636                                            VMRegPair *regs,
 637                                            int total_args_passed,
 638                                            int is_outgoing) {
 639   // c2c calling conventions for compiled-compiled calls.
 640 
 641   // An int/float occupies 1 slot here.
 642   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
 643   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 644 
 645   const VMReg z_iarg_reg[5] = {
 646     Z_R2->as_VMReg(),
 647     Z_R3->as_VMReg(),
 648     Z_R4->as_VMReg(),
 649     Z_R5->as_VMReg(),
 650     Z_R6->as_VMReg()
 651   };
 652   const VMReg z_farg_reg[4] = {
 653     Z_F0->as_VMReg(),
 654     Z_F2->as_VMReg(),
 655     Z_F4->as_VMReg(),
 656     Z_F6->as_VMReg()
 657   };
 658   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 659   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 660 
 661   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 662   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 663 
 664   int i;
 665   int stk = 0;
 666   int ireg = 0;
 667   int freg = 0;
 668 
 669   for (int i = 0; i < total_args_passed; ++i) {
 670     switch (sig_bt[i]) {
 671       case T_BOOLEAN:
 672       case T_CHAR:
 673       case T_BYTE:
 674       case T_SHORT:
 675       case T_INT:
 676         if (ireg < z_num_iarg_registers) {
 677           // Put int/ptr in register.
 678           regs[i].set1(z_iarg_reg[ireg]);
 679           ++ireg;
 680         } else {
 681           // Put int/ptr on stack.
 682           regs[i].set1(VMRegImpl::stack2reg(stk));
 683           stk += inc_stk_for_intfloat;
 684         }
 685         break;
 686       case T_LONG:
 687         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 688         if (ireg < z_num_iarg_registers) {
 689           // Put long in register.
 690           regs[i].set2(z_iarg_reg[ireg]);
 691           ++ireg;
 692         } else {
 693           // Put long on stack and align to 2 slots.
 694           if (stk & 0x1) { ++stk; }
 695           regs[i].set2(VMRegImpl::stack2reg(stk));
 696           stk += inc_stk_for_longdouble;
 697         }
 698         break;
 699       case T_OBJECT:
 700       case T_ARRAY:
 701       case T_ADDRESS:
 702         if (ireg < z_num_iarg_registers) {
 703           // Put ptr in register.
 704           regs[i].set2(z_iarg_reg[ireg]);
 705           ++ireg;
 706         } else {
 707           // Put ptr on stack and align to 2 slots, because
 708           // "64-bit pointers record oop-ishness on 2 aligned adjacent
 709           // registers." (see OopFlow::build_oop_map).
 710           if (stk & 0x1) { ++stk; }
 711           regs[i].set2(VMRegImpl::stack2reg(stk));
 712           stk += inc_stk_for_longdouble;
 713         }
 714         break;
 715       case T_FLOAT:
 716         if (freg < z_num_farg_registers) {
 717           // Put float in register.
 718           regs[i].set1(z_farg_reg[freg]);
 719           ++freg;
 720         } else {
 721           // Put float on stack.
 722           regs[i].set1(VMRegImpl::stack2reg(stk));
 723           stk += inc_stk_for_intfloat;
 724         }
 725         break;
 726       case T_DOUBLE:
 727         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 728         if (freg < z_num_farg_registers) {
 729           // Put double in register.
 730           regs[i].set2(z_farg_reg[freg]);
 731           ++freg;
 732         } else {
 733           // Put double on stack and align to 2 slots.
 734           if (stk & 0x1) { ++stk; }
 735           regs[i].set2(VMRegImpl::stack2reg(stk));
 736           stk += inc_stk_for_longdouble;
 737         }
 738         break;
 739       case T_VOID:
 740         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 741         // Do not count halves.
 742         regs[i].set_bad();
 743         break;
 744       default:
 745         ShouldNotReachHere();
 746     }
 747   }
 748   return align_up(stk, 2);
 749 }
 750 
 751 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 752                                         VMRegPair *regs,
 753                                         VMRegPair *regs2,
 754                                         int total_args_passed) {
 755   assert(regs2 == NULL, "second VMRegPair array not used on this platform");
 756 
 757   // Calling conventions for C runtime calls and calls to JNI native methods.
 758   const VMReg z_iarg_reg[5] = {
 759     Z_R2->as_VMReg(),
 760     Z_R3->as_VMReg(),
 761     Z_R4->as_VMReg(),
 762     Z_R5->as_VMReg(),
 763     Z_R6->as_VMReg()
 764   };
 765   const VMReg z_farg_reg[4] = {
 766     Z_F0->as_VMReg(),
 767     Z_F2->as_VMReg(),
 768     Z_F4->as_VMReg(),
 769     Z_F6->as_VMReg()
 770   };
 771   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 772   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 773 
 774   // Check calling conventions consistency.
 775   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 776   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 777 
 778   // Avoid passing C arguments in the wrong stack slots.
 779 
 780   // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
 781   // 2 such slots, like 64 bit values do.
 782   const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
 783   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 784 
 785   int i;
 786   // Leave room for C-compatible ABI
 787   int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
 788   int freg = 0;
 789   int ireg = 0;
 790 
 791   // We put the first 5 arguments into registers and the rest on the
 792   // stack. Float arguments are already in their argument registers
 793   // due to c2c calling conventions (see calling_convention).
 794   for (int i = 0; i < total_args_passed; ++i) {
 795     switch (sig_bt[i]) {
 796       case T_BOOLEAN:
 797       case T_CHAR:
 798       case T_BYTE:
 799       case T_SHORT:
 800       case T_INT:
 801         // Fall through, handle as long.
 802       case T_LONG:
 803       case T_OBJECT:
 804       case T_ARRAY:
 805       case T_ADDRESS:
 806       case T_METADATA:
 807         // Oops are already boxed if required (JNI).
 808         if (ireg < z_num_iarg_registers) {
 809           regs[i].set2(z_iarg_reg[ireg]);
 810           ++ireg;
 811         } else {
 812           regs[i].set2(VMRegImpl::stack2reg(stk));
 813           stk += inc_stk_for_longdouble;
 814         }
 815         break;
 816       case T_FLOAT:
 817         if (freg < z_num_farg_registers) {
 818           regs[i].set1(z_farg_reg[freg]);
 819           ++freg;
 820         } else {
 821           regs[i].set1(VMRegImpl::stack2reg(stk+1));
 822           stk +=  inc_stk_for_intfloat;
 823         }
 824         break;
 825       case T_DOUBLE:
 826         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 827         if (freg < z_num_farg_registers) {
 828           regs[i].set2(z_farg_reg[freg]);
 829           ++freg;
 830         } else {
 831           // Put double on stack.
 832           regs[i].set2(VMRegImpl::stack2reg(stk));
 833           stk += inc_stk_for_longdouble;
 834         }
 835         break;
 836       case T_VOID:
 837         // Do not count halves.
 838         regs[i].set_bad();
 839         break;
 840       default:
 841         ShouldNotReachHere();
 842     }
 843   }
 844   return align_up(stk, 2);
 845 }
 846 
 847 ////////////////////////////////////////////////////////////////////////
 848 //
 849 //  Argument shufflers
 850 //
 851 ////////////////////////////////////////////////////////////////////////
 852 
 853 //----------------------------------------------------------------------
 854 // The java_calling_convention describes stack locations as ideal slots on
 855 // a frame with no abi restrictions. Since we must observe abi restrictions
 856 // (like the placement of the register window) the slots must be biased by
 857 // the following value.
 858 //----------------------------------------------------------------------
 859 static int reg2slot(VMReg r) {
 860   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 861 }
 862 
 863 static int reg2offset(VMReg r) {
 864   return reg2slot(r) * VMRegImpl::stack_slot_size;
 865 }
 866 
 867 static void verify_oop_args(MacroAssembler *masm,
 868                             int total_args_passed,
 869                             const BasicType *sig_bt,
 870                             const VMRegPair *regs) {
 871   if (!VerifyOops) { return; }
 872 
 873   for (int i = 0; i < total_args_passed; i++) {
 874     if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
 875       VMReg r = regs[i].first();
 876       assert(r->is_valid(), "bad oop arg");
 877 
 878       if (r->is_stack()) {
 879         __ z_lg(Z_R0_scratch,
 880                 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
 881         __ verify_oop(Z_R0_scratch);
 882       } else {
 883         __ verify_oop(r->as_Register());
 884       }
 885     }
 886   }
 887 }
 888 
 889 static void gen_special_dispatch(MacroAssembler *masm,
 890                                  int total_args_passed,
 891                                  vmIntrinsics::ID special_dispatch,
 892                                  const BasicType *sig_bt,
 893                                  const VMRegPair *regs) {
 894   verify_oop_args(masm, total_args_passed, sig_bt, regs);
 895 
 896   // Now write the args into the outgoing interpreter space.
 897   bool     has_receiver   = false;
 898   Register receiver_reg   = noreg;
 899   int      member_arg_pos = -1;
 900   Register member_reg     = noreg;
 901   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
 902 
 903   if (ref_kind != 0) {
 904     member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
 905     member_reg = Z_R9;                       // Known to be free at this point.
 906     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 907   } else {
 908     guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch);
 909     has_receiver = true;
 910   }
 911 
 912   if (member_reg != noreg) {
 913     // Load the member_arg into register, if necessary.
 914     assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
 915     assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
 916 
 917     VMReg r = regs[member_arg_pos].first();
 918     assert(r->is_valid(), "bad member arg");
 919 
 920     if (r->is_stack()) {
 921       __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
 922     } else {
 923       // No data motion is needed.
 924       member_reg = r->as_Register();
 925     }
 926   }
 927 
 928   if (has_receiver) {
 929     // Make sure the receiver is loaded into a register.
 930     assert(total_args_passed > 0, "oob");
 931     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 932 
 933     VMReg r = regs[0].first();
 934     assert(r->is_valid(), "bad receiver arg");
 935 
 936     if (r->is_stack()) {
 937       // Porting note: This assumes that compiled calling conventions always
 938       // pass the receiver oop in a register. If this is not true on some
 939       // platform, pick a temp and load the receiver from stack.
 940       assert(false, "receiver always in a register");
 941       receiver_reg = Z_R13;  // Known to be free at this point.
 942       __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
 943     } else {
 944       // No data motion is needed.
 945       receiver_reg = r->as_Register();
 946     }
 947   }
 948 
 949   // Figure out which address we are really jumping to:
 950   MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
 951                                                  receiver_reg, member_reg,
 952                                                  /*for_compiler_entry:*/ true);
 953 }
 954 
 955 ////////////////////////////////////////////////////////////////////////
 956 //
 957 //  Argument shufflers
 958 //
 959 ////////////////////////////////////////////////////////////////////////
 960 
 961 // Is the size of a vector size (in bytes) bigger than a size saved by default?
 962 // 8 bytes registers are saved by default on z/Architecture.
 963 bool SharedRuntime::is_wide_vector(int size) {
 964   // Note, MaxVectorSize == 8 on this platform.
 965   assert(size <= 8, "%d bytes vectors are not supported", size);
 966   return size > 8;
 967 }
 968 
 969 //----------------------------------------------------------------------
 970 // An oop arg. Must pass a handle not the oop itself
 971 //----------------------------------------------------------------------
 972 static void object_move(MacroAssembler *masm,
 973                         OopMap *map,
 974                         int oop_handle_offset,
 975                         int framesize_in_slots,
 976                         VMRegPair src,
 977                         VMRegPair dst,
 978                         bool is_receiver,
 979                         int *receiver_offset) {
 980   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
 981 
 982   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
 983 
 984   // Must pass a handle. First figure out the location we use as a handle.
 985 
 986   if (src.first()->is_stack()) {
 987     // Oop is already on the stack, put handle on stack or in register
 988     // If handle will be on the stack, use temp reg to calculate it.
 989     Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
 990     Label    skip;
 991     int      slot_in_older_frame = reg2slot(src.first());
 992 
 993     guarantee(!is_receiver, "expecting receiver in register");
 994     map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
 995 
 996     __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
 997     __ load_and_test_long(Z_R0, Address(rHandle));
 998     __ z_brne(skip);
 999     // Use a NULL handle if oop is NULL.
1000     __ clear_reg(rHandle, true, false);
1001     __ bind(skip);
1002 
1003     // Copy handle to the right place (register or stack).
1004     if (dst.first()->is_stack()) {
1005       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1006     } // else
1007       // nothing to do. rHandle uses the correct register
1008   } else {
1009     // Oop is passed in an input register. We must flush it to the stack.
1010     const Register rOop = src.first()->as_Register();
1011     const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1012     int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1013     int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1014     NearLabel skip;
1015 
1016     if (is_receiver) {
1017       *receiver_offset = oop_slot_offset;
1018     }
1019     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1020 
1021     // Flush Oop to stack, calculate handle.
1022     __ z_stg(rOop, oop_slot_offset, Z_SP);
1023     __ add2reg(rHandle, oop_slot_offset, Z_SP);
1024 
1025     // If Oop == NULL, use a NULL handle.
1026     __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1027     __ clear_reg(rHandle, true, false);
1028     __ bind(skip);
1029 
1030     // Copy handle to the right place (register or stack).
1031     if (dst.first()->is_stack()) {
1032       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1033     } // else
1034       // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1035   }
1036 }
1037 
1038 //----------------------------------------------------------------------
1039 // A float arg. May have to do float reg to int reg conversion
1040 //----------------------------------------------------------------------
1041 static void float_move(MacroAssembler *masm,
1042                        VMRegPair src,
1043                        VMRegPair dst,
1044                        int framesize_in_slots,
1045                        int workspace_slot_offset) {
1046   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1047   int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1048 
1049   // We do not accept an argument in a VMRegPair to be spread over two slots,
1050   // no matter what physical location (reg or stack) the slots may have.
1051   // We just check for the unaccepted slot to be invalid.
1052   assert(!src.second()->is_valid(), "float in arg spread over two slots");
1053   assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1054 
1055   if (src.first()->is_stack()) {
1056     if (dst.first()->is_stack()) {
1057       // stack -> stack. The easiest of the bunch.
1058       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1059                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1060     } else {
1061       // stack to reg
1062       Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1063       if (dst.first()->is_Register()) {
1064         __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1065       } else {
1066         __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1067       }
1068     }
1069   } else if (src.first()->is_Register()) {
1070     if (dst.first()->is_stack()) {
1071       // gpr -> stack
1072       __ reg2mem_opt(src.first()->as_Register(),
1073                      Address(Z_SP, reg2offset(dst.first()), false ));
1074     } else {
1075       if (dst.first()->is_Register()) {
1076         // gpr -> gpr
1077         __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1078                               src.first()->as_Register(), T_INT);
1079       } else {
1080         if (VM_Version::has_FPSupportEnhancements()) {
1081           // gpr -> fpr. Exploit z10 capability of direct transfer.
1082           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1083         } else {
1084           // gpr -> fpr. Use work space on stack to transfer data.
1085           Address   stackaddr(Z_SP, workspace_offset);
1086 
1087           __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1088           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1089         }
1090       }
1091     }
1092   } else {
1093     if (dst.first()->is_stack()) {
1094       // fpr -> stack
1095       __ freg2mem_opt(src.first()->as_FloatRegister(),
1096                       Address(Z_SP, reg2offset(dst.first())), false);
1097     } else {
1098       if (dst.first()->is_Register()) {
1099         if (VM_Version::has_FPSupportEnhancements()) {
1100           // fpr -> gpr.
1101           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1102         } else {
1103           // fpr -> gpr. Use work space on stack to transfer data.
1104           Address   stackaddr(Z_SP, workspace_offset);
1105 
1106           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1107           __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1108         }
1109       } else {
1110         // fpr -> fpr
1111         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1112                                src.first()->as_FloatRegister(), T_FLOAT);
1113       }
1114     }
1115   }
1116 }
1117 
1118 //----------------------------------------------------------------------
1119 // A double arg. May have to do double reg to long reg conversion
1120 //----------------------------------------------------------------------
1121 static void double_move(MacroAssembler *masm,
1122                         VMRegPair src,
1123                         VMRegPair dst,
1124                         int framesize_in_slots,
1125                         int workspace_slot_offset) {
1126   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1127   int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1128 
1129   // Since src is always a java calling convention we know that the
1130   // src pair is always either all registers or all stack (and aligned?)
1131 
1132   if (src.first()->is_stack()) {
1133     if (dst.first()->is_stack()) {
1134       // stack -> stack. The easiest of the bunch.
1135       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1136                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1137     } else {
1138       // stack to reg
1139       Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1140 
1141       if (dst.first()->is_Register()) {
1142         __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1143       } else {
1144         __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1145       }
1146     }
1147   } else if (src.first()->is_Register()) {
1148     if (dst.first()->is_stack()) {
1149       // gpr -> stack
1150       __ reg2mem_opt(src.first()->as_Register(),
1151                      Address(Z_SP, reg2offset(dst.first())));
1152     } else {
1153       if (dst.first()->is_Register()) {
1154         // gpr -> gpr
1155         __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1156                               src.first()->as_Register(), T_LONG);
1157       } else {
1158         if (VM_Version::has_FPSupportEnhancements()) {
1159           // gpr -> fpr. Exploit z10 capability of direct transfer.
1160           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1161         } else {
1162           // gpr -> fpr. Use work space on stack to transfer data.
1163           Address stackaddr(Z_SP, workspace_offset);
1164           __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1165           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1166         }
1167       }
1168     }
1169   } else {
1170     if (dst.first()->is_stack()) {
1171       // fpr -> stack
1172       __ freg2mem_opt(src.first()->as_FloatRegister(),
1173                       Address(Z_SP, reg2offset(dst.first())));
1174     } else {
1175       if (dst.first()->is_Register()) {
1176         if (VM_Version::has_FPSupportEnhancements()) {
1177           // fpr -> gpr. Exploit z10 capability of direct transfer.
1178           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1179         } else {
1180           // fpr -> gpr. Use work space on stack to transfer data.
1181           Address stackaddr(Z_SP, workspace_offset);
1182 
1183           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1184           __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1185         }
1186       } else {
1187         // fpr -> fpr
1188         // In theory these overlap but the ordering is such that this is likely a nop.
1189         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1190                                src.first()->as_FloatRegister(), T_DOUBLE);
1191       }
1192     }
1193   }
1194 }
1195 
1196 //----------------------------------------------------------------------
1197 // A long arg.
1198 //----------------------------------------------------------------------
1199 static void long_move(MacroAssembler *masm,
1200                       VMRegPair src,
1201                       VMRegPair dst,
1202                       int framesize_in_slots) {
1203   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1204 
1205   if (src.first()->is_stack()) {
1206     if (dst.first()->is_stack()) {
1207       // stack -> stack. The easiest of the bunch.
1208       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1209                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1210     } else {
1211       // stack to reg
1212       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1213       __ mem2reg_opt(dst.first()->as_Register(),
1214                       Address(Z_SP, reg2offset(src.first()) + frame_offset));
1215     }
1216   } else {
1217     // reg to reg
1218     assert(src.first()->is_Register(), "long src value must be in GPR");
1219     if (dst.first()->is_stack()) {
1220       // reg -> stack
1221       __ reg2mem_opt(src.first()->as_Register(),
1222                      Address(Z_SP, reg2offset(dst.first())));
1223     } else {
1224       // reg -> reg
1225       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1226       __ move_reg_if_needed(dst.first()->as_Register(),
1227                             T_LONG, src.first()->as_Register(), T_LONG);
1228     }
1229   }
1230 }
1231 
1232 
1233 //----------------------------------------------------------------------
1234 // A int-like arg.
1235 //----------------------------------------------------------------------
1236 // On z/Architecture we will store integer like items to the stack as 64 bit
1237 // items, according to the z/Architecture ABI, even though Java would only store
1238 // 32 bits for a parameter.
1239 // We do sign extension for all base types. That is ok since the only
1240 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1241 // Sign extension 32->64 bit will thus not affect the value.
1242 //----------------------------------------------------------------------
1243 static void move32_64(MacroAssembler *masm,
1244                       VMRegPair src,
1245                       VMRegPair dst,
1246                       int framesize_in_slots) {
1247   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1248 
1249   if (src.first()->is_stack()) {
1250     Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1251     if (dst.first()->is_stack()) {
1252       // stack -> stack. MVC not posible due to sign extension.
1253       Address firstaddr(Z_SP, reg2offset(dst.first()));
1254       __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1255       __ reg2mem_opt(Z_R0_scratch, firstaddr);
1256     } else {
1257       // stack -> reg, sign extended
1258       __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1259     }
1260   } else {
1261     if (dst.first()->is_stack()) {
1262       // reg -> stack, sign extended
1263       Address firstaddr(Z_SP, reg2offset(dst.first()));
1264       __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1265       __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1266     } else {
1267       // reg -> reg, sign extended
1268       __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1269     }
1270   }
1271 }
1272 
1273 static void save_or_restore_arguments(MacroAssembler *masm,
1274                                       const int stack_slots,
1275                                       const int total_in_args,
1276                                       const int arg_save_area,
1277                                       OopMap *map,
1278                                       VMRegPair *in_regs,
1279                                       BasicType *in_sig_bt) {
1280 
1281   // If map is non-NULL then the code should store the values,
1282   // otherwise it should load them.
1283   int slot = arg_save_area;
1284   // Handle double words first.
1285   for (int i = 0; i < total_in_args; i++) {
1286     if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
1287       int offset = slot * VMRegImpl::stack_slot_size;
1288       slot += VMRegImpl::slots_per_word;
1289       assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
1290       const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1291       Address   stackaddr(Z_SP, offset);
1292       if (map != NULL) {
1293         __ freg2mem_opt(freg, stackaddr);
1294       } else {
1295         __ mem2freg_opt(freg, stackaddr);
1296       }
1297     } else if (in_regs[i].first()->is_Register() &&
1298                (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
1299       int offset = slot * VMRegImpl::stack_slot_size;
1300       const Register   reg = in_regs[i].first()->as_Register();
1301       if (map != NULL) {
1302         __ z_stg(reg, offset, Z_SP);
1303         if (in_sig_bt[i] == T_ARRAY) {
1304           map->set_oop(VMRegImpl::stack2reg(slot));
1305         }
1306       } else {
1307         __ z_lg(reg, offset, Z_SP);
1308         slot += VMRegImpl::slots_per_word;
1309         assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
1310       }
1311     }
1312   }
1313 
1314   // Save or restore single word registers.
1315   for (int i = 0; i < total_in_args; i++) {
1316     if (in_regs[i].first()->is_FloatRegister()) {
1317       if (in_sig_bt[i] == T_FLOAT) {
1318         int offset = slot * VMRegImpl::stack_slot_size;
1319         slot++;
1320         assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
1321         const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1322         Address   stackaddr(Z_SP, offset);
1323         if (map != NULL) {
1324           __ freg2mem_opt(freg, stackaddr, false);
1325         } else {
1326           __ mem2freg_opt(freg, stackaddr, false);
1327         }
1328       }
1329     } else if (in_regs[i].first()->is_stack() &&
1330                in_sig_bt[i] == T_ARRAY && map != NULL) {
1331       int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1332       map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1333     }
1334   }
1335 }
1336 
1337 // Check GCLocker::needs_gc and enter the runtime if it's true. This
1338 // keeps a new JNI critical region from starting until a GC has been
1339 // forced. Save down any oops in registers and describe them in an OopMap.
1340 static void check_needs_gc_for_critical_native(MacroAssembler   *masm,
1341                                                 const int stack_slots,
1342                                                 const int total_in_args,
1343                                                 const int arg_save_area,
1344                                                 OopMapSet *oop_maps,
1345                                                 VMRegPair *in_regs,
1346                                                 BasicType *in_sig_bt) {
1347   __ block_comment("check GCLocker::needs_gc");
1348   Label cont;
1349 
1350   // Check GCLocker::_needs_gc flag.
1351   __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
1352   __ z_cli(0, Z_R1_scratch, 0);
1353   __ z_bre(cont);
1354 
1355   // Save down any values that are live in registers and call into the
1356   // runtime to halt for a GC.
1357   OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1358 
1359   save_or_restore_arguments(masm, stack_slots, total_in_args,
1360                             arg_save_area, map, in_regs, in_sig_bt);
1361   address the_pc = __ pc();
1362   __ set_last_Java_frame(Z_SP, noreg);
1363 
1364   __ block_comment("block_for_jni_critical");
1365   __ z_lgr(Z_ARG1, Z_thread);
1366 
1367   address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
1368   __ call_c(entry_point);
1369   oop_maps->add_gc_map(__ offset(), map);
1370 
1371   __ reset_last_Java_frame();
1372 
1373   // Reload all the register arguments.
1374   save_or_restore_arguments(masm, stack_slots, total_in_args,
1375                             arg_save_area, NULL, in_regs, in_sig_bt);
1376 
1377   __ bind(cont);
1378 
1379   if (StressCriticalJNINatives) {
1380     // Stress register saving
1381     OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1382     save_or_restore_arguments(masm, stack_slots, total_in_args,
1383                               arg_save_area, map, in_regs, in_sig_bt);
1384 
1385     // Destroy argument registers.
1386     for (int i = 0; i < total_in_args; i++) {
1387       if (in_regs[i].first()->is_Register()) {
1388         // Don't set CC.
1389         __ clear_reg(in_regs[i].first()->as_Register(), true, false);
1390       } else {
1391         if (in_regs[i].first()->is_FloatRegister()) {
1392           FloatRegister fr = in_regs[i].first()->as_FloatRegister();
1393           __ z_lcdbr(fr, fr);
1394         }
1395       }
1396     }
1397 
1398     save_or_restore_arguments(masm, stack_slots, total_in_args,
1399                               arg_save_area, NULL, in_regs, in_sig_bt);
1400   }
1401 }
1402 
1403 static void move_ptr(MacroAssembler *masm,
1404                      VMRegPair src,
1405                      VMRegPair dst,
1406                      int framesize_in_slots) {
1407   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1408 
1409   if (src.first()->is_stack()) {
1410     if (dst.first()->is_stack()) {
1411       // stack to stack
1412       __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset));
1413       __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first())));
1414     } else {
1415       // stack to reg
1416       __ mem2reg_opt(dst.first()->as_Register(),
1417                      Address(Z_SP, reg2offset(src.first()) + frame_offset));
1418     }
1419   } else {
1420     if (dst.first()->is_stack()) {
1421       // reg to stack
1422     __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first())));
1423     } else {
1424     __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register());
1425     }
1426   }
1427 }
1428 
1429 // Unpack an array argument into a pointer to the body and the length
1430 // if the array is non-null, otherwise pass 0 for both.
1431 static void unpack_array_argument(MacroAssembler *masm,
1432                                    VMRegPair reg,
1433                                    BasicType in_elem_type,
1434                                    VMRegPair body_arg,
1435                                    VMRegPair length_arg,
1436                                    int framesize_in_slots) {
1437   Register tmp_reg = Z_tmp_2;
1438   Register tmp2_reg = Z_tmp_1;
1439 
1440   assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1441          "possible collision");
1442   assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1443          "possible collision");
1444 
1445   // Pass the length, ptr pair.
1446   NearLabel set_out_args;
1447   VMRegPair tmp, tmp2;
1448 
1449   tmp.set_ptr(tmp_reg->as_VMReg());
1450   tmp2.set_ptr(tmp2_reg->as_VMReg());
1451   if (reg.first()->is_stack()) {
1452     // Load the arg up from the stack.
1453     move_ptr(masm, reg, tmp, framesize_in_slots);
1454     reg = tmp;
1455   }
1456 
1457   const Register first = reg.first()->as_Register();
1458 
1459   // Don't set CC, indicate unused result.
1460   (void) __ clear_reg(tmp2_reg, true, false);
1461   if (tmp_reg != first) {
1462     __ clear_reg(tmp_reg, true, false);  // Don't set CC.
1463   }
1464   __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args);
1465   __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes()));
1466   __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first);
1467 
1468   __ bind(set_out_args);
1469   move_ptr(masm, tmp, body_arg, framesize_in_slots);
1470   move32_64(masm, tmp2, length_arg, framesize_in_slots);
1471 }
1472 
1473 //----------------------------------------------------------------------
1474 // Wrap a JNI call.
1475 //----------------------------------------------------------------------
1476 #undef USE_RESIZE_FRAME
1477 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1478                                                 const methodHandle& method,
1479                                                 int compile_id,
1480                                                 BasicType *in_sig_bt,
1481                                                 VMRegPair *in_regs,
1482                                                 BasicType ret_type) {
1483 #ifdef COMPILER2
1484   int total_in_args = method->size_of_parameters();
1485   if (method->is_method_handle_intrinsic()) {
1486     vmIntrinsics::ID iid = method->intrinsic_id();
1487     intptr_t start = (intptr_t) __ pc();
1488     int vep_offset = ((intptr_t) __ pc()) - start;
1489 
1490     gen_special_dispatch(masm, total_in_args,
1491                          method->intrinsic_id(), in_sig_bt, in_regs);
1492 
1493     int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1494 
1495     __ flush();
1496 
1497     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1498 
1499     return nmethod::new_native_nmethod(method,
1500                                        compile_id,
1501                                        masm->code(),
1502                                        vep_offset,
1503                                        frame_complete,
1504                                        stack_slots / VMRegImpl::slots_per_word,
1505                                        in_ByteSize(-1),
1506                                        in_ByteSize(-1),
1507                                        (OopMapSet *) NULL);
1508   }
1509 
1510 
1511   ///////////////////////////////////////////////////////////////////////
1512   //
1513   //  Precalculations before generating any code
1514   //
1515   ///////////////////////////////////////////////////////////////////////
1516 
1517   bool is_critical_native = true;
1518   address native_func = method->critical_native_function();
1519   if (native_func == NULL) {
1520     native_func = method->native_function();
1521     is_critical_native = false;
1522   }
1523   assert(native_func != NULL, "must have function");
1524 
1525   //---------------------------------------------------------------------
1526   // We have received a description of where all the java args are located
1527   // on entry to the wrapper. We need to convert these args to where
1528   // the jni function will expect them. To figure out where they go
1529   // we convert the java signature to a C signature by inserting
1530   // the hidden arguments as arg[0] and possibly arg[1] (static method).
1531   //
1532   // The first hidden argument arg[0] is a pointer to the JNI environment.
1533   // It is generated for every call.
1534   // The second argument arg[1] to the JNI call, which is hidden for static
1535   // methods, is the boxed lock object. For static calls, the lock object
1536   // is the static method itself. The oop is constructed here. for instance
1537   // calls, the lock is performed on the object itself, the pointer of
1538   // which is passed as the first visible argument.
1539   //---------------------------------------------------------------------
1540 
1541   // Additionally, on z/Architecture we must convert integers
1542   // to longs in the C signature. We do this in advance in order to have
1543   // no trouble with indexes into the bt-arrays.
1544   // So convert the signature and registers now, and adjust the total number
1545   // of in-arguments accordingly.
1546   bool method_is_static = method->is_static();
1547   int  total_c_args     = total_in_args;
1548 
1549   if (!is_critical_native) {
1550     int n_hidden_args = method_is_static ? 2 : 1;
1551     total_c_args += n_hidden_args;
1552   } else {
1553     // No JNIEnv*, no this*, but unpacked arrays (base+length).
1554     for (int i = 0; i < total_in_args; i++) {
1555       if (in_sig_bt[i] == T_ARRAY) {
1556         total_c_args ++;
1557       }
1558     }
1559   }
1560 
1561   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1562   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1563   BasicType* in_elem_bt = NULL;
1564 
1565   // Create the signature for the C call:
1566   //   1) add the JNIEnv*
1567   //   2) add the class if the method is static
1568   //   3) copy the rest of the incoming signature (shifted by the number of
1569   //      hidden arguments)
1570 
1571   int argc = 0;
1572   if (!is_critical_native) {
1573     out_sig_bt[argc++] = T_ADDRESS;
1574     if (method->is_static()) {
1575       out_sig_bt[argc++] = T_OBJECT;
1576     }
1577 
1578     for (int i = 0; i < total_in_args; i++) {
1579       out_sig_bt[argc++] = in_sig_bt[i];
1580     }
1581   } else {
1582     Thread* THREAD = Thread::current();
1583     in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1584     SignatureStream ss(method->signature());
1585     int o = 0;
1586     for (int i = 0; i < total_in_args; i++, o++) {
1587       if (in_sig_bt[i] == T_ARRAY) {
1588         // Arrays are passed as tuples (int, elem*).
1589         Symbol* atype = ss.as_symbol(CHECK_NULL);
1590         const char* at = atype->as_C_string();
1591         if (strlen(at) == 2) {
1592           assert(at[0] == '[', "must be");
1593           switch (at[1]) {
1594             case 'B': in_elem_bt[o]  = T_BYTE; break;
1595             case 'C': in_elem_bt[o]  = T_CHAR; break;
1596             case 'D': in_elem_bt[o]  = T_DOUBLE; break;
1597             case 'F': in_elem_bt[o]  = T_FLOAT; break;
1598             case 'I': in_elem_bt[o]  = T_INT; break;
1599             case 'J': in_elem_bt[o]  = T_LONG; break;
1600             case 'S': in_elem_bt[o]  = T_SHORT; break;
1601             case 'Z': in_elem_bt[o]  = T_BOOLEAN; break;
1602             default: ShouldNotReachHere();
1603           }
1604         }
1605       } else {
1606         in_elem_bt[o] = T_VOID;
1607       }
1608       if (in_sig_bt[i] != T_VOID) {
1609         assert(in_sig_bt[i] == ss.type(), "must match");
1610         ss.next();
1611       }
1612     }
1613     assert(total_in_args == o, "must match");
1614 
1615     for (int i = 0; i < total_in_args; i++) {
1616       if (in_sig_bt[i] == T_ARRAY) {
1617         // Arrays are passed as tuples (int, elem*).
1618         out_sig_bt[argc++] = T_INT;
1619         out_sig_bt[argc++] = T_ADDRESS;
1620       } else {
1621         out_sig_bt[argc++] = in_sig_bt[i];
1622       }
1623     }
1624   }
1625 
1626   ///////////////////////////////////////////////////////////////////////
1627   // Now figure out where the args must be stored and how much stack space
1628   // they require (neglecting out_preserve_stack_slots but providing space
1629   // for storing the first five register arguments).
1630   // It's weird, see int_stk_helper.
1631   ///////////////////////////////////////////////////////////////////////
1632 
1633   //---------------------------------------------------------------------
1634   // Compute framesize for the wrapper.
1635   //
1636   // - We need to handlize all oops passed in registers.
1637   // - We must create space for them here that is disjoint from the save area.
1638   // - We always just allocate 5 words for storing down these object.
1639   //   This allows us to simply record the base and use the Ireg number to
1640   //   decide which slot to use.
1641   // - Note that the reg number used to index the stack slot is the inbound
1642   //   number, not the outbound number.
1643   // - We must shuffle args to match the native convention,
1644   //   and to include var-args space.
1645   //---------------------------------------------------------------------
1646 
1647   //---------------------------------------------------------------------
1648   // Calculate the total number of stack slots we will need:
1649   // - 1) abi requirements
1650   // - 2) outgoing args
1651   // - 3) space for inbound oop handle area
1652   // - 4) space for handlizing a klass if static method
1653   // - 5) space for a lock if synchronized method
1654   // - 6) workspace (save rtn value, int<->float reg moves, ...)
1655   // - 7) filler slots for alignment
1656   //---------------------------------------------------------------------
1657   // Here is how the space we have allocated will look like.
1658   // Since we use resize_frame, we do not create a new stack frame,
1659   // but just extend the one we got with our own data area.
1660   //
1661   // If an offset or pointer name points to a separator line, it is
1662   // assumed that addressing with offset 0 selects storage starting
1663   // at the first byte above the separator line.
1664   //
1665   //
1666   //     ...                   ...
1667   //      | caller's frame      |
1668   // FP-> |---------------------|
1669   //      | filler slots, if any|
1670   //     7| #slots == mult of 2 |
1671   //      |---------------------|
1672   //      | work space          |
1673   //     6| 2 slots = 8 bytes   |
1674   //      |---------------------|
1675   //     5| lock box (if sync)  |
1676   //      |---------------------| <- lock_slot_offset
1677   //     4| klass (if static)   |
1678   //      |---------------------| <- klass_slot_offset
1679   //     3| oopHandle area      |
1680   //      | (save area for      |
1681   //      |  critical natives)  |
1682   //      |                     |
1683   //      |                     |
1684   //      |---------------------| <- oop_handle_offset
1685   //     2| outbound memory     |
1686   //     ...                   ...
1687   //      | based arguments     |
1688   //      |---------------------|
1689   //      | vararg              |
1690   //     ...                   ...
1691   //      | area                |
1692   //      |---------------------| <- out_arg_slot_offset
1693   //     1| out_preserved_slots |
1694   //     ...                   ...
1695   //      | (z_abi spec)        |
1696   // SP-> |---------------------| <- FP_slot_offset (back chain)
1697   //     ...                   ...
1698   //
1699   //---------------------------------------------------------------------
1700 
1701   // *_slot_offset indicates offset from SP in #stack slots
1702   // *_offset      indicates offset from SP in #bytes
1703 
1704   int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
1705                     SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1706 
1707   // Now the space for the inbound oop handle area.
1708   int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
1709   if (is_critical_native) {
1710     // Critical natives may have to call out so they need a save area
1711     // for register arguments.
1712     int double_slots = 0;
1713     int single_slots = 0;
1714     for (int i = 0; i < total_in_args; i++) {
1715       if (in_regs[i].first()->is_Register()) {
1716         const Register reg = in_regs[i].first()->as_Register();
1717         switch (in_sig_bt[i]) {
1718           case T_BOOLEAN:
1719           case T_BYTE:
1720           case T_SHORT:
1721           case T_CHAR:
1722           case T_INT:
1723           // Fall through.
1724           case T_ARRAY:
1725           case T_LONG: double_slots++; break;
1726           default:  ShouldNotReachHere();
1727         }
1728       } else {
1729         if (in_regs[i].first()->is_FloatRegister()) {
1730           switch (in_sig_bt[i]) {
1731             case T_FLOAT:  single_slots++; break;
1732             case T_DOUBLE: double_slots++; break;
1733             default:  ShouldNotReachHere();
1734           }
1735         }
1736       }
1737     }  // for
1738     total_save_slots = double_slots * 2 + align_up(single_slots, 2); // Round to even.
1739   }
1740 
1741   int oop_handle_slot_offset = stack_slots;
1742   stack_slots += total_save_slots;                                        // 3)
1743 
1744   int klass_slot_offset = 0;
1745   int klass_offset      = -1;
1746   if (method_is_static && !is_critical_native) {                          // 4)
1747     klass_slot_offset  = stack_slots;
1748     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1749     stack_slots       += VMRegImpl::slots_per_word;
1750   }
1751 
1752   int lock_slot_offset = 0;
1753   int lock_offset      = -1;
1754   if (method->is_synchronized()) {                                        // 5)
1755     lock_slot_offset   = stack_slots;
1756     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1757     stack_slots       += VMRegImpl::slots_per_word;
1758   }
1759 
1760   int workspace_slot_offset= stack_slots;                                 // 6)
1761   stack_slots         += 2;
1762 
1763   // Now compute actual number of stack words we need.
1764   // Round to align stack properly.
1765   stack_slots = align_up(stack_slots,                                     // 7)
1766                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1767   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1768 
1769 
1770   ///////////////////////////////////////////////////////////////////////
1771   // Now we can start generating code
1772   ///////////////////////////////////////////////////////////////////////
1773 
1774   unsigned int wrapper_CodeStart  = __ offset();
1775   unsigned int wrapper_UEPStart;
1776   unsigned int wrapper_VEPStart;
1777   unsigned int wrapper_FrameDone;
1778   unsigned int wrapper_CRegsSet;
1779   Label     handle_pending_exception;
1780   Label     ic_miss;
1781 
1782   //---------------------------------------------------------------------
1783   // Unverified entry point (UEP)
1784   //---------------------------------------------------------------------
1785   wrapper_UEPStart = __ offset();
1786 
1787   // check ic: object class <-> cached class
1788   if (!method_is_static) __ nmethod_UEP(ic_miss);
1789   // Fill with nops (alignment of verified entry point).
1790   __ align(CodeEntryAlignment);
1791 
1792   //---------------------------------------------------------------------
1793   // Verified entry point (VEP)
1794   //---------------------------------------------------------------------
1795   wrapper_VEPStart = __ offset();
1796 
1797   __ save_return_pc();
1798   __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1799 #ifndef USE_RESIZE_FRAME
1800   __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1801 #else
1802   __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1803                                                           // Just resize the existing one.
1804 #endif
1805 
1806   wrapper_FrameDone = __ offset();
1807 
1808   __ verify_thread();
1809 
1810   // Native nmethod wrappers never take possession of the oop arguments.
1811   // So the caller will gc the arguments.
1812   // The only thing we need an oopMap for is if the call is static.
1813   //
1814   // An OopMap for lock (and class if static), and one for the VM call itself
1815   OopMapSet  *oop_maps        = new OopMapSet();
1816   OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1817 
1818   if (is_critical_native) {
1819     check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
1820                                        oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
1821   }
1822 
1823 
1824   //////////////////////////////////////////////////////////////////////
1825   //
1826   // The Grand Shuffle
1827   //
1828   //////////////////////////////////////////////////////////////////////
1829   //
1830   // We immediately shuffle the arguments so that for any vm call we have
1831   // to make from here on out (sync slow path, jvmti, etc.) we will have
1832   // captured the oops from our caller and have a valid oopMap for them.
1833   //
1834   //--------------------------------------------------------------------
1835   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1836   // (derived from JavaThread* which is in Z_thread) and, if static,
1837   // the class mirror instead of a receiver. This pretty much guarantees that
1838   // register layout will not match. We ignore these extra arguments during
1839   // the shuffle. The shuffle is described by the two calling convention
1840   // vectors we have in our possession. We simply walk the java vector to
1841   // get the source locations and the c vector to get the destinations.
1842   //
1843   // This is a trick. We double the stack slots so we can claim
1844   // the oops in the caller's frame. Since we are sure to have
1845   // more args than the caller doubling is enough to make
1846   // sure we can capture all the incoming oop args from the caller.
1847   //--------------------------------------------------------------------
1848 
1849   // Record sp-based slot for receiver on stack for non-static methods.
1850   int receiver_offset = -1;
1851 
1852   //--------------------------------------------------------------------
1853   // We move the arguments backwards because the floating point registers
1854   // destination will always be to a register with a greater or equal
1855   // register number or the stack.
1856   //   jix is the index of the incoming Java arguments.
1857   //   cix is the index of the outgoing C arguments.
1858   //--------------------------------------------------------------------
1859 
1860 #ifdef ASSERT
1861   bool reg_destroyed[RegisterImpl::number_of_registers];
1862   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1863   for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
1864     reg_destroyed[r] = false;
1865   }
1866   for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
1867     freg_destroyed[f] = false;
1868   }
1869 #endif // ASSERT
1870 
1871   for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1872 #ifdef ASSERT
1873     if (in_regs[jix].first()->is_Register()) {
1874       assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1875     } else {
1876       if (in_regs[jix].first()->is_FloatRegister()) {
1877         assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1878       }
1879     }
1880     if (out_regs[cix].first()->is_Register()) {
1881       reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1882     } else {
1883       if (out_regs[cix].first()->is_FloatRegister()) {
1884         freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1885       }
1886     }
1887 #endif // ASSERT
1888 
1889     switch (in_sig_bt[jix]) {
1890       // Due to casting, small integers should only occur in pairs with type T_LONG.
1891       case T_BOOLEAN:
1892       case T_CHAR:
1893       case T_BYTE:
1894       case T_SHORT:
1895       case T_INT:
1896         // Move int and do sign extension.
1897         move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1898         break;
1899 
1900       case T_LONG :
1901         long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1902         break;
1903 
1904       case T_ARRAY:
1905         if (is_critical_native) {
1906           int body_arg = cix;
1907           cix -= 2; // Point to length arg.
1908           unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots);
1909           break;
1910         }
1911         // else fallthrough
1912       case T_OBJECT:
1913         assert(!is_critical_native, "no oop arguments");
1914         object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1915                     ((jix == 0) && (!method_is_static)),
1916                     &receiver_offset);
1917         break;
1918       case T_VOID:
1919         break;
1920 
1921       case T_FLOAT:
1922         float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1923         break;
1924 
1925       case T_DOUBLE:
1926         assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1927         double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1928         break;
1929 
1930       case T_ADDRESS:
1931         assert(false, "found T_ADDRESS in java args");
1932         break;
1933 
1934       default:
1935         ShouldNotReachHere();
1936     }
1937   }
1938 
1939   //--------------------------------------------------------------------
1940   // Pre-load a static method's oop into ARG2.
1941   // Used both by locking code and the normal JNI call code.
1942   //--------------------------------------------------------------------
1943   if (method_is_static && !is_critical_native) {
1944     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1945 
1946     // Now handlize the static class mirror in ARG2. It's known not-null.
1947     __ z_stg(Z_ARG2, klass_offset, Z_SP);
1948     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1949     __ add2reg(Z_ARG2, klass_offset, Z_SP);
1950   }
1951 
1952   // Get JNIEnv* which is first argument to native.
1953   if (!is_critical_native) {
1954     __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1955   }
1956 
1957   //////////////////////////////////////////////////////////////////////
1958   // We have all of the arguments setup at this point.
1959   // We MUST NOT touch any outgoing regs from this point on.
1960   // So if we must call out we must push a new frame.
1961   //////////////////////////////////////////////////////////////////////
1962 
1963 
1964   // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1965   // Both values represent the same position.
1966   __ get_PC(Z_R10);                // PC into register
1967   wrapper_CRegsSet = __ offset();  // and into into variable.
1968 
1969   // Z_R10 now has the pc loaded that we will use when we finally call to native.
1970 
1971   // We use the same pc/oopMap repeatedly when we call out.
1972   oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
1973 
1974   // Lock a synchronized method.
1975 
1976   if (method->is_synchronized()) {
1977     assert(!is_critical_native, "unhandled");
1978 
1979     // ATTENTION: args and Z_R10 must be preserved.
1980     Register r_oop  = Z_R11;
1981     Register r_box  = Z_R12;
1982     Register r_tmp1 = Z_R13;
1983     Register r_tmp2 = Z_R7;
1984     Label done;
1985 
1986     // Load the oop for the object or class. R_carg2_classorobject contains
1987     // either the handlized oop from the incoming arguments or the handlized
1988     // class mirror (if the method is static).
1989     __ z_lg(r_oop, 0, Z_ARG2);
1990 
1991     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
1992     // Get the lock box slot's address.
1993     __ add2reg(r_box, lock_offset, Z_SP);
1994 
1995 #ifdef ASSERT
1996     if (UseBiasedLocking)
1997       // Making the box point to itself will make it clear it went unused
1998       // but also be obviously invalid.
1999       __ z_stg(r_box, 0, r_box);
2000 #endif // ASSERT
2001 
2002     // Try fastpath for locking.
2003     // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
2004     __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
2005     __ z_bre(done);
2006 
2007     //-------------------------------------------------------------------------
2008     // None of the above fast optimizations worked so we have to get into the
2009     // slow case of monitor enter. Inline a special case of call_VM that
2010     // disallows any pending_exception.
2011     //-------------------------------------------------------------------------
2012 
2013     Register oldSP = Z_R11;
2014 
2015     __ z_lgr(oldSP, Z_SP);
2016 
2017     RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2018 
2019     // Prepare arguments for call.
2020     __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
2021     __ add2reg(Z_ARG2, lock_offset, oldSP);
2022     __ z_lgr(Z_ARG3, Z_thread);
2023 
2024     __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
2025 
2026     // Do the call.
2027     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
2028     __ call(Z_R1_scratch);
2029 
2030     __ reset_last_Java_frame();
2031 
2032     RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2033 #ifdef ASSERT
2034     { Label L;
2035       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2036       __ z_bre(L);
2037       __ stop("no pending exception allowed on exit from IR::monitorenter");
2038       __ bind(L);
2039     }
2040 #endif
2041     __ bind(done);
2042   } // lock for synchronized methods
2043 
2044 
2045   //////////////////////////////////////////////////////////////////////
2046   // Finally just about ready to make the JNI call.
2047   //////////////////////////////////////////////////////////////////////
2048 
2049   // Use that pc we placed in Z_R10 a while back as the current frame anchor.
2050   __ set_last_Java_frame(Z_SP, Z_R10);
2051 
2052   // Transition from _thread_in_Java to _thread_in_native.
2053   __ set_thread_state(_thread_in_native);
2054 
2055 
2056   //////////////////////////////////////////////////////////////////////
2057   // This is the JNI call.
2058   //////////////////////////////////////////////////////////////////////
2059 
2060   __ call_c(native_func);
2061 
2062 
2063   //////////////////////////////////////////////////////////////////////
2064   // We have survived the call once we reach here.
2065   //////////////////////////////////////////////////////////////////////
2066 
2067 
2068   //--------------------------------------------------------------------
2069   // Unpack native results.
2070   //--------------------------------------------------------------------
2071   // For int-types, we do any needed sign-extension required.
2072   // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
2073   // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
2074   // blocking or unlocking.
2075   // An OOP result (handle) is done specially in the slow-path code.
2076   //--------------------------------------------------------------------
2077   switch (ret_type) {  //GLGLGL
2078     case T_VOID:    break;         // Nothing to do!
2079     case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
2080     case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
2081     case T_LONG:    break;         // Got it where we want it (unless slow-path)
2082     case T_OBJECT:  break;         // Really a handle.
2083                                    // Cannot de-handlize until after reclaiming jvm_lock.
2084     case T_ARRAY:   break;
2085 
2086     case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
2087       __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
2088       __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
2089       break;
2090     case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
2091     case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
2092     case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
2093     case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
2094 
2095     default:
2096       ShouldNotReachHere();
2097       break;
2098   }
2099 
2100 
2101   // Switch thread to "native transition" state before reading the synchronization state.
2102   // This additional state is necessary because reading and testing the synchronization
2103   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2104   //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2105   //   - VM thread changes sync state to synchronizing and suspends threads for GC.
2106   //   - Thread A is resumed to finish this native method, but doesn't block here since it
2107   //     didn't see any synchronization in progress, and escapes.
2108 
2109   // Transition from _thread_in_native to _thread_in_native_trans.
2110   __ set_thread_state(_thread_in_native_trans);
2111 
2112   // Safepoint synchronization
2113   //--------------------------------------------------------------------
2114   // Must we block?
2115   //--------------------------------------------------------------------
2116   // Block, if necessary, before resuming in _thread_in_Java state.
2117   // In order for GC to work, don't clear the last_Java_sp until after blocking.
2118   //--------------------------------------------------------------------
2119   Label after_transition;
2120   {
2121     Label no_block, sync;
2122 
2123     save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
2124 
2125     if (os::is_MP()) {
2126       if (UseMembar) {
2127         // Force this write out before the read below.
2128         __ z_fence();
2129       } else {
2130         // Write serialization page so VM thread can do a pseudo remote membar.
2131         // We use the current thread pointer to calculate a thread specific
2132         // offset to write to within the page. This minimizes bus traffic
2133         // due to cache line collision.
2134         __ serialize_memory(Z_thread, Z_R1, Z_R2);
2135       }
2136     }
2137     __ generate_safepoint_check(sync, Z_R1, true);
2138 
2139     __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
2140     __ z_bre(no_block);
2141 
2142     // Block. Save any potential method result value before the operation and
2143     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2144     // lets us share the oopMap we used when we went native rather than create
2145     // a distinct one for this pc.
2146     //
2147     __ bind(sync);
2148     __ z_acquire();
2149 
2150     address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
2151                                              : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2152 
2153     __ call_VM_leaf(entry_point, Z_thread);
2154 
2155     if (is_critical_native) {
2156       restore_native_result(masm, ret_type, workspace_slot_offset);
2157       __ z_bru(after_transition); // No thread state transition here.
2158     }
2159     __ bind(no_block);
2160     restore_native_result(masm, ret_type, workspace_slot_offset);
2161   }
2162 
2163   //--------------------------------------------------------------------
2164   // Thread state is thread_in_native_trans. Any safepoint blocking has
2165   // already happened so we can now change state to _thread_in_Java.
2166   //--------------------------------------------------------------------
2167   // Transition from _thread_in_native_trans to _thread_in_Java.
2168   __ set_thread_state(_thread_in_Java);
2169   __ bind(after_transition);
2170 
2171 
2172   //--------------------------------------------------------------------
2173   // Reguard any pages if necessary.
2174   // Protect native result from being destroyed.
2175   //--------------------------------------------------------------------
2176 
2177   Label no_reguard;
2178 
2179   __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)),
2180            JavaThread::stack_guard_yellow_reserved_disabled);
2181 
2182   __ z_bre(no_reguard);
2183 
2184   save_native_result(masm, ret_type, workspace_slot_offset);
2185   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
2186   restore_native_result(masm, ret_type, workspace_slot_offset);
2187 
2188   __ bind(no_reguard);
2189 
2190 
2191   // Synchronized methods (slow path only)
2192   // No pending exceptions for now.
2193   //--------------------------------------------------------------------
2194   // Handle possibly pending exception (will unlock if necessary).
2195   // Native result is, if any is live, in Z_FRES or Z_RES.
2196   //--------------------------------------------------------------------
2197   // Unlock
2198   //--------------------------------------------------------------------
2199   if (method->is_synchronized()) {
2200     const Register r_oop        = Z_R11;
2201     const Register r_box        = Z_R12;
2202     const Register r_tmp1       = Z_R13;
2203     const Register r_tmp2       = Z_R7;
2204     Label done;
2205 
2206     // Get unboxed oop of class mirror or object ...
2207     int   offset = method_is_static ? klass_offset : receiver_offset;
2208 
2209     assert(offset != -1, "");
2210     __ z_lg(r_oop, offset, Z_SP);
2211 
2212     // ... and address of lock object box.
2213     __ add2reg(r_box, lock_offset, Z_SP);
2214 
2215     // Try fastpath for unlocking.
2216     __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
2217     __ z_bre(done);
2218 
2219     // Slow path for unlocking.
2220     // Save and restore any potential method result value around the unlocking operation.
2221     const Register R_exc = Z_R11;
2222 
2223     save_native_result(masm, ret_type, workspace_slot_offset);
2224 
2225     // Must save pending exception around the slow-path VM call. Since it's a
2226     // leaf call, the pending exception (if any) can be kept in a register.
2227     __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2228     assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
2229 
2230     // Must clear pending-exception before re-entering the VM. Since this is
2231     // a leaf call, pending-exception-oop can be safely kept in a register.
2232     __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
2233 
2234     // Inline a special case of call_VM that disallows any pending_exception.
2235 
2236     // Get locked oop from the handle we passed to jni.
2237     __ z_lg(Z_ARG1, offset, Z_SP);
2238     __ add2reg(Z_ARG2, lock_offset, Z_SP);
2239     __ z_lgr(Z_ARG3, Z_thread);
2240 
2241     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
2242 
2243     __ call(Z_R1_scratch);
2244 
2245 #ifdef ASSERT
2246     {
2247       Label L;
2248       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2249       __ z_bre(L);
2250       __ stop("no pending exception allowed on exit from IR::monitorexit");
2251       __ bind(L);
2252     }
2253 #endif
2254 
2255     // Check_forward_pending_exception jump to forward_exception if any pending
2256     // exception is set. The forward_exception routine expects to see the
2257     // exception in pending_exception and not in a register. Kind of clumsy,
2258     // since all folks who branch to forward_exception must have tested
2259     // pending_exception first and hence have it in a register already.
2260     __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2261     restore_native_result(masm, ret_type, workspace_slot_offset);
2262     __ z_bru(done);
2263     __ z_illtrap(0x66);
2264 
2265     __ bind(done);
2266   }
2267 
2268 
2269   //--------------------------------------------------------------------
2270   // Clear "last Java frame" SP and PC.
2271   //--------------------------------------------------------------------
2272   __ verify_thread(); // Z_thread must be correct.
2273 
2274   __ reset_last_Java_frame();
2275 
2276   // Unpack oop result, e.g. JNIHandles::resolve result.
2277   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2278     __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
2279   }
2280 
2281   if (CheckJNICalls) {
2282     // clear_pending_jni_exception_check
2283     __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
2284   }
2285 
2286   // Reset handle block.
2287   if (!is_critical_native) {
2288     __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
2289     __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
2290 
2291     // Check for pending exceptions.
2292     __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2293     __ z_brne(handle_pending_exception);
2294   }
2295 
2296 
2297   //////////////////////////////////////////////////////////////////////
2298   // Return
2299   //////////////////////////////////////////////////////////////////////
2300 
2301 
2302 #ifndef USE_RESIZE_FRAME
2303   __ pop_frame();                     // Pop wrapper frame.
2304 #else
2305   __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
2306 #endif
2307   __ restore_return_pc();             // This is the way back to the caller.
2308   __ z_br(Z_R14);
2309 
2310 
2311   //////////////////////////////////////////////////////////////////////
2312   // Out-of-line calls to the runtime.
2313   //////////////////////////////////////////////////////////////////////
2314 
2315 
2316   if (!is_critical_native) {
2317 
2318     //---------------------------------------------------------------------
2319     // Handler for pending exceptions (out-of-line).
2320     //---------------------------------------------------------------------
2321     // Since this is a native call, we know the proper exception handler
2322     // is the empty function. We just pop this frame and then jump to
2323     // forward_exception_entry. Z_R14 will contain the native caller's
2324     // return PC.
2325     __ bind(handle_pending_exception);
2326     __ pop_frame();
2327     __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2328     __ restore_return_pc();
2329     __ z_br(Z_R1_scratch);
2330 
2331     //---------------------------------------------------------------------
2332     // Handler for a cache miss (out-of-line)
2333     //---------------------------------------------------------------------
2334     __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2335   }
2336   __ flush();
2337 
2338 
2339   //////////////////////////////////////////////////////////////////////
2340   // end of code generation
2341   //////////////////////////////////////////////////////////////////////
2342 
2343 
2344   nmethod *nm = nmethod::new_native_nmethod(method,
2345                                             compile_id,
2346                                             masm->code(),
2347                                             (int)(wrapper_VEPStart-wrapper_CodeStart),
2348                                             (int)(wrapper_FrameDone-wrapper_CodeStart),
2349                                             stack_slots / VMRegImpl::slots_per_word,
2350                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2351                                             in_ByteSize(lock_offset),
2352                                             oop_maps);
2353 
2354   if (is_critical_native) {
2355     nm->set_lazy_critical_native(true);
2356   }
2357 
2358   return nm;
2359 #else
2360   ShouldNotReachHere();
2361   return NULL;
2362 #endif // COMPILER2
2363 }
2364 
2365 static address gen_c2i_adapter(MacroAssembler  *masm,
2366                                int total_args_passed,
2367                                int comp_args_on_stack,
2368                                const BasicType *sig_bt,
2369                                const VMRegPair *regs,
2370                                Label &skip_fixup) {
2371   // Before we get into the guts of the C2I adapter, see if we should be here
2372   // at all. We've come from compiled code and are attempting to jump to the
2373   // interpreter, which means the caller made a static call to get here
2374   // (vcalls always get a compiled target if there is one). Check for a
2375   // compiled target. If there is one, we need to patch the caller's call.
2376 
2377   // These two defs MUST MATCH code in gen_i2c2i_adapter!
2378   const Register ientry = Z_R11;
2379   const Register code   = Z_R11;
2380 
2381   address c2i_entrypoint;
2382   Label   patch_callsite;
2383 
2384   // Regular (verified) c2i entry point.
2385   c2i_entrypoint = __ pc();
2386 
2387   // Call patching needed?
2388   __ load_and_test_long(Z_R0_scratch, method_(code));
2389   __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2390   __ z_brne(patch_callsite);                    // Patch required if code != NULL (compiled target exists).
2391 
2392   __ bind(skip_fixup);  // Return point from patch_callsite.
2393 
2394   // Since all args are passed on the stack, total_args_passed*wordSize is the
2395   // space we need. We need ABI scratch area but we use the caller's since
2396   // it has already been allocated.
2397 
2398   const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2399   int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2400   Register  sender_SP   = Z_R10;
2401   Register  value       = Z_R12;
2402 
2403   // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2404   // In addition, frame manager expects initial_caller_sp in Z_R10.
2405   __ z_lgr(sender_SP, Z_SP);
2406 
2407   // This should always fit in 14 bit immediate.
2408   __ resize_frame(-extraspace, Z_R0_scratch);
2409 
2410   // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2411   // args. This essentially moves the callers ABI scratch area from the top to the
2412   // bottom of the arg area.
2413 
2414   int st_off =  extraspace - wordSize;
2415 
2416   // Now write the args into the outgoing interpreter space.
2417   for (int i = 0; i < total_args_passed; i++) {
2418     VMReg r_1 = regs[i].first();
2419     VMReg r_2 = regs[i].second();
2420     if (!r_1->is_valid()) {
2421       assert(!r_2->is_valid(), "");
2422       continue;
2423     }
2424     if (r_1->is_stack()) {
2425       // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2426       // We must account for it here.
2427       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2428 
2429       if (!r_2->is_valid()) {
2430         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2431       } else {
2432         // longs are given 2 64-bit slots in the interpreter,
2433         // but the data is passed in only 1 slot.
2434         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2435 #ifdef ASSERT
2436           __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2437 #endif
2438           st_off -= wordSize;
2439         }
2440         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2441       }
2442     } else {
2443       if (r_1->is_Register()) {
2444         if (!r_2->is_valid()) {
2445           __ z_st(r_1->as_Register(), st_off, Z_SP);
2446         } else {
2447           // longs are given 2 64-bit slots in the interpreter, but the
2448           // data is passed in only 1 slot.
2449           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2450 #ifdef ASSERT
2451             __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2452 #endif
2453             st_off -= wordSize;
2454           }
2455           __ z_stg(r_1->as_Register(), st_off, Z_SP);
2456         }
2457       } else {
2458         assert(r_1->is_FloatRegister(), "");
2459         if (!r_2->is_valid()) {
2460           __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2461         } else {
2462           // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2463           // data is passed in only 1 slot.
2464           // One of these should get known junk...
2465 #ifdef ASSERT
2466           __ z_lzdr(Z_F1);
2467           __ z_std(Z_F1, st_off, Z_SP);
2468 #endif
2469           st_off-=wordSize;
2470           __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2471         }
2472       }
2473     }
2474     st_off -= wordSize;
2475   }
2476 
2477 
2478   // Jump to the interpreter just as if interpreter was doing it.
2479   __ add2reg(Z_esp, st_off, Z_SP);
2480 
2481   // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2482   __ z_br(ientry);
2483 
2484 
2485   // Prevent illegal entry to out-of-line code.
2486   __ z_illtrap(0x22);
2487 
2488   // Generate out-of-line runtime call to patch caller,
2489   // then continue as interpreted.
2490 
2491   // IF you lose the race you go interpreted.
2492   // We don't see any possible endless c2i -> i2c -> c2i ...
2493   // transitions no matter how rare.
2494   __ bind(patch_callsite);
2495 
2496   RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2497   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2498   RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2499   __ z_bru(skip_fixup);
2500 
2501   // end of out-of-line code
2502 
2503   return c2i_entrypoint;
2504 }
2505 
2506 // On entry, the following registers are set
2507 //
2508 //    Z_thread  r8  - JavaThread*
2509 //    Z_method  r9  - callee's method (method to be invoked)
2510 //    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2511 //    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2512 //
2513 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2514                                     int total_args_passed,
2515                                     int comp_args_on_stack,
2516                                     const BasicType *sig_bt,
2517                                     const VMRegPair *regs) {
2518   const Register value = Z_R12;
2519   const Register ld_ptr= Z_esp;
2520 
2521   int ld_offset = total_args_passed * wordSize;
2522 
2523   // Cut-out for having no stack args.
2524   if (comp_args_on_stack) {
2525     // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2526     // registers are below. By subtracting stack0, we either get a negative
2527     // number (all values in registers) or the maximum stack slot accessed.
2528     // Convert VMRegImpl (4 byte) stack slots to words.
2529     int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2530     // Round up to miminum stack alignment, in wordSize
2531     comp_words_on_stack = align_up(comp_words_on_stack, 2);
2532 
2533     __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2534   }
2535 
2536   // Now generate the shuffle code. Pick up all register args and move the
2537   // rest through register value=Z_R12.
2538   for (int i = 0; i < total_args_passed; i++) {
2539     if (sig_bt[i] == T_VOID) {
2540       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2541       continue;
2542     }
2543 
2544     // Pick up 0, 1 or 2 words from ld_ptr.
2545     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2546            "scrambled load targets?");
2547     VMReg r_1 = regs[i].first();
2548     VMReg r_2 = regs[i].second();
2549     if (!r_1->is_valid()) {
2550       assert(!r_2->is_valid(), "");
2551       continue;
2552     }
2553     if (r_1->is_FloatRegister()) {
2554       if (!r_2->is_valid()) {
2555         __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2556         ld_offset-=wordSize;
2557       } else {
2558         // Skip the unused interpreter slot.
2559         __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2560         ld_offset -= 2 * wordSize;
2561       }
2562     } else {
2563       if (r_1->is_stack()) {
2564         // Must do a memory to memory move.
2565         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2566 
2567         if (!r_2->is_valid()) {
2568           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2569         } else {
2570           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2571           // data is passed in only 1 slot.
2572           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2573             ld_offset -= wordSize;
2574           }
2575           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2576         }
2577       } else {
2578         if (!r_2->is_valid()) {
2579           // Not sure we need to do this but it shouldn't hurt.
2580           if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
2581             __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2582           } else {
2583             __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2584           }
2585         } else {
2586           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2587           // data is passed in only 1 slot.
2588           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2589             ld_offset -= wordSize;
2590           }
2591           __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2592         }
2593       }
2594       ld_offset -= wordSize;
2595     }
2596   }
2597 
2598   // Jump to the compiled code just as if compiled code was doing it.
2599   // load target address from method oop:
2600   __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2601 
2602   // Store method oop into thread->callee_target.
2603   // 6243940: We might end up in handle_wrong_method if
2604   // the callee is deoptimized as we race thru here. If that
2605   // happens we don't want to take a safepoint because the
2606   // caller frame will look interpreted and arguments are now
2607   // "compiled" so it is much better to make this transition
2608   // invisible to the stack walking code. Unfortunately, if
2609   // we try and find the callee by normal means a safepoint
2610   // is possible. So we stash the desired callee in the thread
2611   // and the vm will find it there should this case occur.
2612   __ z_stg(Z_method, thread_(callee_target));
2613 
2614   __ z_br(Z_R1_scratch);
2615 }
2616 
2617 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2618                                                             int total_args_passed,
2619                                                             int comp_args_on_stack,
2620                                                             const BasicType *sig_bt,
2621                                                             const VMRegPair *regs,
2622                                                             AdapterFingerPrint* fingerprint) {
2623   __ align(CodeEntryAlignment);
2624   address i2c_entry = __ pc();
2625   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2626 
2627   address c2i_unverified_entry;
2628 
2629   Label skip_fixup;
2630   {
2631     Label ic_miss;
2632     const int klass_offset         = oopDesc::klass_offset_in_bytes();
2633     const int holder_klass_offset  = CompiledICHolder::holder_klass_offset();
2634     const int holder_method_offset = CompiledICHolder::holder_method_offset();
2635 
2636     // Out-of-line call to ic_miss handler.
2637     __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2638 
2639     // Unverified Entry Point UEP
2640     __ align(CodeEntryAlignment);
2641     c2i_unverified_entry = __ pc();
2642 
2643     // Check the pointers.
2644     if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2645       __ z_ltgr(Z_ARG1, Z_ARG1);
2646       __ z_bre(ic_miss);
2647     }
2648     __ verify_oop(Z_ARG1);
2649 
2650     // Check ic: object class <-> cached class
2651     // Compress cached class for comparison. That's more efficient.
2652     if (UseCompressedClassPointers) {
2653       __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
2654       __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2655     } else {
2656       __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2657     }
2658     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2659 
2660     // This def MUST MATCH code in gen_c2i_adapter!
2661     const Register code = Z_R11;
2662 
2663     __ z_lg(Z_method, holder_method_offset, Z_method);
2664     __ load_and_test_long(Z_R0, method_(code));
2665     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2666 
2667     // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2668   }
2669 
2670   address c2i_entry;
2671   c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2672 
2673   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
2674 }
2675 
2676 // This function returns the adjust size (in number of words) to a c2i adapter
2677 // activation for use during deoptimization.
2678 //
2679 // Actually only compiled frames need to be adjusted, but it
2680 // doesn't harm to adjust entry and interpreter frames, too.
2681 //
2682 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2683   assert(callee_locals >= callee_parameters,
2684           "test and remove; got more parms than locals");
2685   // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2686   return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2687          frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2688 }
2689 
2690 uint SharedRuntime::out_preserve_stack_slots() {
2691   return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2692 }
2693 
2694 //
2695 // Frame generation for deopt and uncommon trap blobs.
2696 //
2697 static void push_skeleton_frame(MacroAssembler* masm,
2698                           /* Unchanged */
2699                           Register frame_sizes_reg,
2700                           Register pcs_reg,
2701                           /* Invalidate */
2702                           Register frame_size_reg,
2703                           Register pc_reg) {
2704   BLOCK_COMMENT("  push_skeleton_frame {");
2705    __ z_lg(pc_reg, 0, pcs_reg);
2706    __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2707    __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2708    Register fp = pc_reg;
2709    __ push_frame(frame_size_reg, fp);
2710 #ifdef ASSERT
2711    // The magic is required for successful walking skeletal frames.
2712    __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2713    __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2714    // Fill other slots that are supposedly not necessary with eye catchers.
2715    __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2716    __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2717    // The sender_sp of the bottom frame is set before pushing it.
2718    // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2719    // is unknown here. Luckily it is not needed before filling the frame in
2720    // layout_activation(), we assert this by setting an eye catcher (see
2721    // comments on sender_sp in frame_s390.hpp).
2722    __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2723 #endif // ASSERT
2724   BLOCK_COMMENT("  } push_skeleton_frame");
2725 }
2726 
2727 // Loop through the UnrollBlock info and create new frames.
2728 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2729                             /* read */
2730                             Register unroll_block_reg,
2731                             /* invalidate */
2732                             Register frame_sizes_reg,
2733                             Register number_of_frames_reg,
2734                             Register pcs_reg,
2735                             Register tmp1,
2736                             Register tmp2) {
2737   BLOCK_COMMENT("push_skeleton_frames {");
2738   // _number_of_frames is of type int (deoptimization.hpp).
2739   __ z_lgf(number_of_frames_reg,
2740            Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2741   __ z_lg(pcs_reg,
2742           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2743   __ z_lg(frame_sizes_reg,
2744           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2745 
2746   // stack: (caller_of_deoptee, ...).
2747 
2748   // If caller_of_deoptee is a compiled frame, then we extend it to make
2749   // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2750   // See also Deoptimization::last_frame_adjust() above.
2751   // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2752 
2753   __ z_lgf(Z_R1_scratch,
2754            Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2755   __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2756   __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2757   // The oldest skeletal frame requires a valid sender_sp to make it walkable
2758   // (it is required to find the original pc of caller_of_deoptee if it is marked
2759   // for deoptimization - see nmethod::orig_pc_addr()).
2760   __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2761 
2762   // Now push the new interpreter frames.
2763   Label loop, loop_entry;
2764 
2765   // Make sure that there is at least one entry in the array.
2766   DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2767   __ asm_assert_ne("array_size must be > 0", 0x205);
2768 
2769   __ z_bru(loop_entry);
2770 
2771   __ bind(loop);
2772 
2773   __ add2reg(frame_sizes_reg, wordSize);
2774   __ add2reg(pcs_reg, wordSize);
2775 
2776   __ bind(loop_entry);
2777 
2778   // Allocate a new frame, fill in the pc.
2779   push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2780 
2781   __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2782   __ z_brne(loop);
2783 
2784   // Set the top frame's return pc.
2785   __ add2reg(pcs_reg, wordSize);
2786   __ z_lg(Z_R0_scratch, 0, pcs_reg);
2787   __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2788   BLOCK_COMMENT("} push_skeleton_frames");
2789 }
2790 
2791 //------------------------------generate_deopt_blob----------------------------
2792 void SharedRuntime::generate_deopt_blob() {
2793   // Allocate space for the code.
2794   ResourceMark rm;
2795   // Setup code generation tools.
2796   CodeBuffer buffer("deopt_blob", 2048, 1024);
2797   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2798   Label exec_mode_initialized;
2799   OopMap* map = NULL;
2800   OopMapSet *oop_maps = new OopMapSet();
2801 
2802   unsigned int start_off = __ offset();
2803   Label cont;
2804 
2805   // --------------------------------------------------------------------------
2806   // Normal entry (non-exception case)
2807   //
2808   // We have been called from the deopt handler of the deoptee.
2809   // Z_R14 points behind the call in the deopt handler. We adjust
2810   // it such that it points to the start of the deopt handler.
2811   // The return_pc has been stored in the frame of the deoptee and
2812   // will replace the address of the deopt_handler in the call
2813   // to Deoptimization::fetch_unroll_info below.
2814   // The (int) cast is necessary, because -((unsigned int)14)
2815   // is an unsigned int.
2816   __ add2reg(Z_R14, -(int)HandlerImpl::size_deopt_handler());
2817 
2818   const Register   exec_mode_reg = Z_tmp_1;
2819 
2820   // stack: (deoptee, caller of deoptee, ...)
2821 
2822   // pushes an "unpack" frame
2823   // R14 contains the return address pointing into the deoptimized
2824   // nmethod that was valid just before the nmethod was deoptimized.
2825   // save R14 into the deoptee frame.  the `fetch_unroll_info'
2826   // procedure called below will read it from there.
2827   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2828 
2829   // note the entry point.
2830   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2831   __ z_bru(exec_mode_initialized);
2832 
2833 #ifndef COMPILER1
2834   int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2835 #else
2836   // --------------------------------------------------------------------------
2837   // Reexecute entry
2838   // - Z_R14 = Deopt Handler in nmethod
2839 
2840   int reexecute_offset = __ offset() - start_off;
2841 
2842   // No need to update map as each call to save_live_registers will produce identical oopmap
2843   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2844 
2845   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2846   __ z_bru(exec_mode_initialized);
2847 #endif
2848 
2849 
2850   // --------------------------------------------------------------------------
2851   // Exception entry. We reached here via a branch. Registers on entry:
2852   // - Z_EXC_OOP (Z_ARG1) = exception oop
2853   // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2854 
2855   int exception_offset = __ offset() - start_off;
2856 
2857   // all registers are dead at this entry point, except for Z_EXC_OOP, and
2858   // Z_EXC_PC which contain the exception oop and exception pc
2859   // respectively.  Set them in TLS and fall thru to the
2860   // unpack_with_exception_in_tls entry point.
2861 
2862   // Store exception oop and pc in thread (location known to GC).
2863   // Need this since the call to "fetch_unroll_info()" may safepoint.
2864   __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2865   __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2866 
2867   // fall through
2868 
2869   int exception_in_tls_offset = __ offset() - start_off;
2870 
2871   // new implementation because exception oop is now passed in JavaThread
2872 
2873   // Prolog for exception case
2874   // All registers must be preserved because they might be used by LinearScan
2875   // Exceptiop oop and throwing PC are passed in JavaThread
2876 
2877   // load throwing pc from JavaThread and us it as the return address of the current frame.
2878   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2879 
2880   // Save everything in sight.
2881   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2882 
2883   // Now it is safe to overwrite any register
2884 
2885   // Clear the exception pc field in JavaThread
2886   __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2887 
2888   // Deopt during an exception.  Save exec mode for unpack_frames.
2889   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2890 
2891 
2892 #ifdef ASSERT
2893   // verify that there is really an exception oop in JavaThread
2894   __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2895   __ verify_oop(Z_ARG1);
2896 
2897   // verify that there is no pending exception
2898   __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2899                              "must not have pending exception here", __LINE__);
2900 #endif
2901 
2902   // --------------------------------------------------------------------------
2903   // At this point, the live registers are saved and
2904   // the exec_mode_reg has been set up correctly.
2905   __ bind(exec_mode_initialized);
2906 
2907   // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2908 
2909   {
2910   const Register unroll_block_reg  = Z_tmp_2;
2911 
2912   // we need to set `last_Java_frame' because `fetch_unroll_info' will
2913   // call `last_Java_frame()'.  however we can't block and no gc will
2914   // occur so we don't need an oopmap. the value of the pc in the
2915   // frame is not particularly important.  it just needs to identify the blob.
2916 
2917   // Don't set last_Java_pc anymore here (is implicitly NULL then).
2918   // the correct PC is retrieved in pd_last_frame() in that case.
2919   __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2920   // With EscapeAnalysis turned on, this call may safepoint
2921   // despite it's marked as "leaf call"!
2922   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2923   // Set an oopmap for the call site this describes all our saved volatile registers
2924   int offs = __ offset();
2925   oop_maps->add_gc_map(offs, map);
2926 
2927   __ reset_last_Java_frame();
2928   // save the return value.
2929   __ z_lgr(unroll_block_reg, Z_RET);
2930   // restore the return registers that have been saved
2931   // (among other registers) by save_live_registers(...).
2932   RegisterSaver::restore_result_registers(masm);
2933 
2934   // reload the exec mode from the UnrollBlock (it might have changed)
2935   __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2936 
2937   // In excp_deopt_mode, restore and clear exception oop which we
2938   // stored in the thread during exception entry above. The exception
2939   // oop will be the return value of this stub.
2940   NearLabel skip_restore_excp;
2941   __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2942   __ z_lg(Z_RET, thread_(exception_oop));
2943   __ clear_mem(thread_(exception_oop), 8);
2944   __ bind(skip_restore_excp);
2945 
2946   // remove the "unpack" frame
2947   __ pop_frame();
2948 
2949   // stack: (deoptee, caller of deoptee, ...).
2950 
2951   // pop the deoptee's frame
2952   __ pop_frame();
2953 
2954   // stack: (caller_of_deoptee, ...).
2955 
2956   // loop through the `UnrollBlock' info and create interpreter frames.
2957   push_skeleton_frames(masm, true/*deopt*/,
2958                   unroll_block_reg,
2959                   Z_tmp_3,
2960                   Z_tmp_4,
2961                   Z_ARG5,
2962                   Z_ARG4,
2963                   Z_ARG3);
2964 
2965   // stack: (skeletal interpreter frame, ..., optional skeletal
2966   // interpreter frame, caller of deoptee, ...).
2967   }
2968 
2969   // push an "unpack" frame taking care of float / int return values.
2970   __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
2971 
2972   // stack: (unpack frame, skeletal interpreter frame, ..., optional
2973   // skeletal interpreter frame, caller of deoptee, ...).
2974 
2975   // spill live volatile registers since we'll do a call.
2976   __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2977   __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2978 
2979   // let the unpacker layout information in the skeletal frames just allocated.
2980   __ get_PC(Z_RET);
2981   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
2982   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
2983                   Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
2984 
2985   __ reset_last_Java_frame();
2986 
2987   // restore the volatiles saved above.
2988   __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2989   __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2990 
2991   // pop the "unpack" frame.
2992   __ pop_frame();
2993   __ restore_return_pc();
2994 
2995   // stack: (top interpreter frame, ..., optional interpreter frame,
2996   // caller of deoptee, ...).
2997 
2998   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2999   __ restore_bcp();
3000   __ restore_locals();
3001   __ restore_esp();
3002 
3003   // return to the interpreter entry point.
3004   __ z_br(Z_R14);
3005 
3006   // Make sure all code is generated
3007   masm->flush();
3008 
3009   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3010   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3011 }
3012 
3013 
3014 #ifdef COMPILER2
3015 //------------------------------generate_uncommon_trap_blob--------------------
3016 void SharedRuntime::generate_uncommon_trap_blob() {
3017   // Allocate space for the code
3018   ResourceMark rm;
3019   // Setup code generation tools
3020   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3021   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3022 
3023   Register unroll_block_reg = Z_tmp_1;
3024   Register klass_index_reg  = Z_ARG2;
3025   Register unc_trap_reg     = Z_ARG2;
3026 
3027   // stack: (deoptee, caller_of_deoptee, ...).
3028 
3029   // push a dummy "unpack" frame and call
3030   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3031   // vframe array and return the `UnrollBlock' information.
3032 
3033   // save R14 to compiled frame.
3034   __ save_return_pc();
3035   // push the "unpack_frame".
3036   __ push_frame_abi160(0);
3037 
3038   // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
3039 
3040   // set the "unpack" frame as last_Java_frame.
3041   // `Deoptimization::uncommon_trap' expects it and considers its
3042   // sender frame as the deoptee frame.
3043   __ get_PC(Z_R1_scratch);
3044   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3045 
3046   __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
3047   __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
3048   BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
3049   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
3050 
3051   __ reset_last_Java_frame();
3052 
3053   // pop the "unpack" frame
3054   __ pop_frame();
3055 
3056   // stack: (deoptee, caller_of_deoptee, ...).
3057 
3058   // save the return value.
3059   __ z_lgr(unroll_block_reg, Z_RET);
3060 
3061   // pop the deoptee frame.
3062   __ pop_frame();
3063 
3064   // stack: (caller_of_deoptee, ...).
3065 
3066 #ifdef ASSERT
3067   assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
3068   assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
3069   const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
3070 #ifndef VM_LITTLE_ENDIAN
3071   + 3
3072 #endif
3073   ;
3074   if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
3075     __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3076   } else {
3077     __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3078   }
3079   __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
3080 #endif
3081 
3082   __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
3083 
3084   // allocate new interpreter frame(s) and possibly resize the caller's frame
3085   // (no more adapters !)
3086   push_skeleton_frames(masm, false/*deopt*/,
3087                   unroll_block_reg,
3088                   Z_tmp_2,
3089                   Z_tmp_3,
3090                   Z_tmp_4,
3091                   Z_ARG5,
3092                   Z_ARG4);
3093 
3094   // stack: (skeletal interpreter frame, ..., optional skeletal
3095   // interpreter frame, (resized) caller of deoptee, ...).
3096 
3097   // push a dummy "unpack" frame taking care of float return values.
3098   // call `Deoptimization::unpack_frames' to layout information in the
3099   // interpreter frames just created
3100 
3101   // push the "unpack" frame
3102    const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
3103 
3104   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3105   // skeletal interpreter frame, (resized) caller of deoptee, ...).
3106 
3107   // set the "unpack" frame as last_Java_frame
3108   __ get_PC(Z_R1_scratch);
3109   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3110 
3111   // indicate it is the uncommon trap case
3112   BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
3113   __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3114   // let the unpacker layout information in the skeletal frames just allocated.
3115   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
3116 
3117   __ reset_last_Java_frame();
3118   // pop the "unpack" frame
3119   __ pop_frame();
3120   // restore LR from top interpreter frame
3121   __ restore_return_pc();
3122 
3123   // stack: (top interpreter frame, ..., optional interpreter frame,
3124   // (resized) caller of deoptee, ...).
3125 
3126   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3127   __ restore_bcp();
3128   __ restore_locals();
3129   __ restore_esp();
3130 
3131   // return to the interpreter entry point
3132   __ z_br(Z_R14);
3133 
3134   masm->flush();
3135   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
3136 }
3137 #endif // COMPILER2
3138 
3139 
3140 //------------------------------generate_handler_blob------
3141 //
3142 // Generate a special Compile2Runtime blob that saves all registers,
3143 // and setup oopmap.
3144 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
3145   assert(StubRoutines::forward_exception_entry() != NULL,
3146          "must be generated before");
3147 
3148   ResourceMark rm;
3149   OopMapSet *oop_maps = new OopMapSet();
3150   OopMap* map;
3151 
3152   // Allocate space for the code. Setup code generation tools.
3153   CodeBuffer buffer("handler_blob", 2048, 1024);
3154   MacroAssembler* masm = new MacroAssembler(&buffer);
3155 
3156   unsigned int start_off = __ offset();
3157   address call_pc = NULL;
3158   int frame_size_in_bytes;
3159 
3160   bool cause_return = (poll_type == POLL_AT_RETURN);
3161   // Make room for return address (or push it again)
3162   if (!cause_return)
3163     __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
3164 
3165   // Save registers, fpu state, and flags
3166   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3167 
3168   // The following is basically a call_VM. However, we need the precise
3169   // address of the call in order to generate an oopmap. Hence, we do all the
3170   // work outselves.
3171   __ set_last_Java_frame(Z_SP, noreg);
3172 
3173   // call into the runtime to handle the safepoint poll
3174   __ call_VM_leaf(call_ptr, Z_thread);
3175 
3176 
3177   // Set an oopmap for the call site. This oopmap will map all
3178   // oop-registers and debug-info registers as callee-saved. This
3179   // will allow deoptimization at this safepoint to find all possible
3180   // debug-info recordings, as well as let GC find all oops.
3181 
3182   oop_maps->add_gc_map((int)(__ offset()-start_off), map);
3183 
3184   Label noException;
3185 
3186   __ reset_last_Java_frame();
3187 
3188   __ load_and_test_long(Z_R1, thread_(pending_exception));
3189   __ z_bre(noException);
3190 
3191   // Pending exception case, used (sporadically) by
3192   // api/java_lang/Thread.State/index#ThreadState et al.
3193   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3194 
3195   // Jump to forward_exception_entry, with the issuing PC in Z_R14
3196   // so it looks like the original nmethod called forward_exception_entry.
3197   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3198   __ z_br(Z_R1_scratch);
3199 
3200   // No exception case
3201   __ bind(noException);
3202 
3203   // Normal exit, restore registers and exit.
3204   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3205 
3206   __ z_br(Z_R14);
3207 
3208   // Make sure all code is generated
3209   masm->flush();
3210 
3211   // Fill-out other meta info
3212   return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3213 }
3214 
3215 
3216 //
3217 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3218 //
3219 // Generate a stub that calls into vm to find out the proper destination
3220 // of a Java call. All the argument registers are live at this point
3221 // but since this is generic code we don't know what they are and the caller
3222 // must do any gc of the args.
3223 //
3224 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3225   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3226 
3227   // allocate space for the code
3228   ResourceMark rm;
3229 
3230   CodeBuffer buffer(name, 1000, 512);
3231   MacroAssembler* masm                = new MacroAssembler(&buffer);
3232 
3233   OopMapSet *oop_maps = new OopMapSet();
3234   OopMap* map = NULL;
3235 
3236   unsigned int start_off = __ offset();
3237 
3238   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3239 
3240   // We must save a PC from within the stub as return PC
3241   // C code doesn't store the LR where we expect the PC,
3242   // so we would run into trouble upon stack walking.
3243   __ get_PC(Z_R1_scratch);
3244 
3245   unsigned int frame_complete = __ offset();
3246 
3247   __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
3248 
3249   __ call_VM_leaf(destination, Z_thread, Z_method);
3250 
3251 
3252   // Set an oopmap for the call site.
3253   // We need this not only for callee-saved registers, but also for volatile
3254   // registers that the compiler might be keeping live across a safepoint.
3255 
3256   oop_maps->add_gc_map((int)(frame_complete-start_off), map);
3257 
3258   // clear last_Java_sp
3259   __ reset_last_Java_frame();
3260 
3261   // check for pending exceptions
3262   Label pending;
3263   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3264   __ z_brne(pending);
3265 
3266   __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3267   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3268 
3269   // get the returned method
3270   __ get_vm_result_2(Z_method);
3271 
3272   // We are back the the original state on entry and ready to go.
3273   __ z_br(Z_R1_scratch);
3274 
3275   // Pending exception after the safepoint
3276 
3277   __ bind(pending);
3278 
3279   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3280 
3281   // exception pending => remove activation and forward to exception handler
3282 
3283   __ z_lgr(Z_R2, Z_R0); // pending_exception
3284   __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3285   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3286   __ z_br(Z_R1_scratch);
3287 
3288   // -------------
3289   // make sure all code is generated
3290   masm->flush();
3291 
3292   // return the blob
3293   // frame_size_words or bytes??
3294   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3295                                        oop_maps, true);
3296 
3297 }
3298 
3299 //------------------------------Montgomery multiplication------------------------
3300 //
3301 
3302 // Subtract 0:b from carry:a. Return carry.
3303 static unsigned long
3304 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3305   unsigned long i, c = 8 * (unsigned long)(len - 1);
3306   __asm__ __volatile__ (
3307     "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3308     "LGHI   0, 8               \n" // index increment (for BRXLG)
3309     "LGR    1, %[c]            \n" // index limit (for BRXLG)
3310     "0:                        \n"
3311     "LG     %[c], 0(%[i],%[a]) \n"
3312     "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3313     "STG    %[c], 0(%[i],%[a]) \n"
3314     "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3315     "SLBGR  %[c], %[c]         \n" // save carry - 1
3316     : [i]"=&a"(i), [c]"+r"(c)
3317     : [a]"a"(a), [b]"a"(b)
3318     : "cc", "memory", "r0", "r1"
3319  );
3320   return carry + c;
3321 }
3322 
3323 // Multiply (unsigned) Long A by Long B, accumulating the double-
3324 // length result into the accumulator formed of T0, T1, and T2.
3325 inline void MACC(unsigned long A[], long A_ind,
3326                  unsigned long B[], long B_ind,
3327                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3328   long A_si = 8 * A_ind,
3329        B_si = 8 * B_ind;
3330   __asm__ __volatile__ (
3331     "LG     1, 0(%[A_si],%[A]) \n"
3332     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3333     "ALGR   %[T0], 1           \n"
3334     "LGHI   1, 0               \n" // r1 = 0
3335     "ALCGR  %[T1], 0           \n"
3336     "ALCGR  %[T2], 1           \n"
3337     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3338     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3339     : "cc", "r0", "r1"
3340  );
3341 }
3342 
3343 // As above, but add twice the double-length result into the
3344 // accumulator.
3345 inline void MACC2(unsigned long A[], long A_ind,
3346                   unsigned long B[], long B_ind,
3347                   unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3348   const unsigned long zero = 0;
3349   long A_si = 8 * A_ind,
3350        B_si = 8 * B_ind;
3351   __asm__ __volatile__ (
3352     "LG     1, 0(%[A_si],%[A]) \n"
3353     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3354     "ALGR   %[T0], 1           \n"
3355     "ALCGR  %[T1], 0           \n"
3356     "ALCGR  %[T2], %[zero]     \n"
3357     "ALGR   %[T0], 1           \n"
3358     "ALCGR  %[T1], 0           \n"
3359     "ALCGR  %[T2], %[zero]     \n"
3360     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3361     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3362     : "cc", "r0", "r1"
3363  );
3364 }
3365 
3366 // Fast Montgomery multiplication. The derivation of the algorithm is
3367 // in "A Cryptographic Library for the Motorola DSP56000,
3368 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3369 static void
3370 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3371                     unsigned long m[], unsigned long inv, int len) {
3372   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3373   int i;
3374 
3375   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3376 
3377   for (i = 0; i < len; i++) {
3378     int j;
3379     for (j = 0; j < i; j++) {
3380       MACC(a, j, b, i-j, t0, t1, t2);
3381       MACC(m, j, n, i-j, t0, t1, t2);
3382     }
3383     MACC(a, i, b, 0, t0, t1, t2);
3384     m[i] = t0 * inv;
3385     MACC(m, i, n, 0, t0, t1, t2);
3386 
3387     assert(t0 == 0, "broken Montgomery multiply");
3388 
3389     t0 = t1; t1 = t2; t2 = 0;
3390   }
3391 
3392   for (i = len; i < 2 * len; i++) {
3393     int j;
3394     for (j = i - len + 1; j < len; j++) {
3395       MACC(a, j, b, i-j, t0, t1, t2);
3396       MACC(m, j, n, i-j, t0, t1, t2);
3397     }
3398     m[i-len] = t0;
3399     t0 = t1; t1 = t2; t2 = 0;
3400   }
3401 
3402   while (t0) {
3403     t0 = sub(m, n, t0, len);
3404   }
3405 }
3406 
3407 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3408 // multiplies so it should be up to 25% faster than Montgomery
3409 // multiplication. However, its loop control is more complex and it
3410 // may actually run slower on some machines.
3411 static void
3412 montgomery_square(unsigned long a[], unsigned long n[],
3413                   unsigned long m[], unsigned long inv, int len) {
3414   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3415   int i;
3416 
3417   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3418 
3419   for (i = 0; i < len; i++) {
3420     int j;
3421     int end = (i+1)/2;
3422     for (j = 0; j < end; j++) {
3423       MACC2(a, j, a, i-j, t0, t1, t2);
3424       MACC(m, j, n, i-j, t0, t1, t2);
3425     }
3426     if ((i & 1) == 0) {
3427       MACC(a, j, a, j, t0, t1, t2);
3428     }
3429     for (; j < i; j++) {
3430       MACC(m, j, n, i-j, t0, t1, t2);
3431     }
3432     m[i] = t0 * inv;
3433     MACC(m, i, n, 0, t0, t1, t2);
3434 
3435     assert(t0 == 0, "broken Montgomery square");
3436 
3437     t0 = t1; t1 = t2; t2 = 0;
3438   }
3439 
3440   for (i = len; i < 2*len; i++) {
3441     int start = i-len+1;
3442     int end = start + (len - start)/2;
3443     int j;
3444     for (j = start; j < end; j++) {
3445       MACC2(a, j, a, i-j, t0, t1, t2);
3446       MACC(m, j, n, i-j, t0, t1, t2);
3447     }
3448     if ((i & 1) == 0) {
3449       MACC(a, j, a, j, t0, t1, t2);
3450     }
3451     for (; j < len; j++) {
3452       MACC(m, j, n, i-j, t0, t1, t2);
3453     }
3454     m[i-len] = t0;
3455     t0 = t1; t1 = t2; t2 = 0;
3456   }
3457 
3458   while (t0) {
3459     t0 = sub(m, n, t0, len);
3460   }
3461 }
3462 
3463 // The threshold at which squaring is advantageous was determined
3464 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3465 // Value seems to be ok for other platforms, too.
3466 #define MONTGOMERY_SQUARING_THRESHOLD 64
3467 
3468 // Copy len longwords from s to d, word-swapping as we go. The
3469 // destination array is reversed.
3470 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3471   d += len;
3472   while(len-- > 0) {
3473     d--;
3474     unsigned long s_val = *s;
3475     // Swap words in a longword on little endian machines.
3476 #ifdef VM_LITTLE_ENDIAN
3477      Unimplemented();
3478 #endif
3479     *d = s_val;
3480     s++;
3481   }
3482 }
3483 
3484 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3485                                         jint len, jlong inv,
3486                                         jint *m_ints) {
3487   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3488   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3489   int longwords = len/2;
3490 
3491   // Make very sure we don't use so much space that the stack might
3492   // overflow. 512 jints corresponds to an 16384-bit integer and
3493   // will use here a total of 8k bytes of stack space.
3494   int total_allocation = longwords * sizeof (unsigned long) * 4;
3495   guarantee(total_allocation <= 8192, "must be");
3496   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3497 
3498   // Local scratch arrays
3499   unsigned long
3500     *a = scratch + 0 * longwords,
3501     *b = scratch + 1 * longwords,
3502     *n = scratch + 2 * longwords,
3503     *m = scratch + 3 * longwords;
3504 
3505   reverse_words((unsigned long *)a_ints, a, longwords);
3506   reverse_words((unsigned long *)b_ints, b, longwords);
3507   reverse_words((unsigned long *)n_ints, n, longwords);
3508 
3509   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3510 
3511   reverse_words(m, (unsigned long *)m_ints, longwords);
3512 }
3513 
3514 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3515                                       jint len, jlong inv,
3516                                       jint *m_ints) {
3517   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3518   assert(len % 2 == 0, "array length in montgomery_square must be even");
3519   int longwords = len/2;
3520 
3521   // Make very sure we don't use so much space that the stack might
3522   // overflow. 512 jints corresponds to an 16384-bit integer and
3523   // will use here a total of 6k bytes of stack space.
3524   int total_allocation = longwords * sizeof (unsigned long) * 3;
3525   guarantee(total_allocation <= 8192, "must be");
3526   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3527 
3528   // Local scratch arrays
3529   unsigned long
3530     *a = scratch + 0 * longwords,
3531     *n = scratch + 1 * longwords,
3532     *m = scratch + 2 * longwords;
3533 
3534   reverse_words((unsigned long *)a_ints, a, longwords);
3535   reverse_words((unsigned long *)n_ints, n, longwords);
3536 
3537   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3538     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3539   } else {
3540     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3541   }
3542 
3543   reverse_words(m, (unsigned long *)m_ints, longwords);
3544 }
3545 
3546 extern "C"
3547 int SpinPause() {
3548   return 0;
3549 }