1 /*
   2  * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "interpreter/interp_masm.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "registerSaver_s390.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/vframeArray.hpp"
  38 #include "vmreg_s390.inline.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_Runtime1.hpp"
  41 #endif
  42 #ifdef COMPILER2
  43 #include "opto/ad.hpp"
  44 #include "opto/runtime.hpp"
  45 #endif
  46 
  47 #ifdef PRODUCT
  48 #define __ masm->
  49 #else
  50 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
  51 #endif
  52 
  53 #define BLOCK_COMMENT(str) __ block_comment(str)
  54 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  55 
  56 #define RegisterSaver_LiveIntReg(regname) \
  57   { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
  58 
  59 #define RegisterSaver_LiveFloatReg(regname) \
  60   { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
  61 
  62 // Registers which are not saved/restored, but still they have got a frame slot.
  63 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
  64 #define RegisterSaver_ExcludedIntReg(regname) \
  65   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  66 
  67 // Registers which are not saved/restored, but still they have got a frame slot.
  68 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
  69 #define RegisterSaver_ExcludedFloatReg(regname) \
  70   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  71 
  72 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
  73   // Live registers which get spilled to the stack. Register positions
  74   // in this array correspond directly to the stack layout.
  75   //
  76   // live float registers:
  77   //
  78   RegisterSaver_LiveFloatReg(Z_F0 ),
  79   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  80   RegisterSaver_LiveFloatReg(Z_F2 ),
  81   RegisterSaver_LiveFloatReg(Z_F3 ),
  82   RegisterSaver_LiveFloatReg(Z_F4 ),
  83   RegisterSaver_LiveFloatReg(Z_F5 ),
  84   RegisterSaver_LiveFloatReg(Z_F6 ),
  85   RegisterSaver_LiveFloatReg(Z_F7 ),
  86   RegisterSaver_LiveFloatReg(Z_F8 ),
  87   RegisterSaver_LiveFloatReg(Z_F9 ),
  88   RegisterSaver_LiveFloatReg(Z_F10),
  89   RegisterSaver_LiveFloatReg(Z_F11),
  90   RegisterSaver_LiveFloatReg(Z_F12),
  91   RegisterSaver_LiveFloatReg(Z_F13),
  92   RegisterSaver_LiveFloatReg(Z_F14),
  93   RegisterSaver_LiveFloatReg(Z_F15),
  94   //
  95   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
  96   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
  97   RegisterSaver_LiveIntReg(Z_R2 ),
  98   RegisterSaver_LiveIntReg(Z_R3 ),
  99   RegisterSaver_LiveIntReg(Z_R4 ),
 100   RegisterSaver_LiveIntReg(Z_R5 ),
 101   RegisterSaver_LiveIntReg(Z_R6 ),
 102   RegisterSaver_LiveIntReg(Z_R7 ),
 103   RegisterSaver_LiveIntReg(Z_R8 ),
 104   RegisterSaver_LiveIntReg(Z_R9 ),
 105   RegisterSaver_LiveIntReg(Z_R10),
 106   RegisterSaver_LiveIntReg(Z_R11),
 107   RegisterSaver_LiveIntReg(Z_R12),
 108   RegisterSaver_LiveIntReg(Z_R13),
 109   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 110   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 111 };
 112 
 113 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
 114   // Live registers which get spilled to the stack. Register positions
 115   // in this array correspond directly to the stack layout.
 116   //
 117   // live float registers: All excluded, but still they get a stack slot to get same frame size.
 118   //
 119   RegisterSaver_ExcludedFloatReg(Z_F0 ),
 120   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 121   RegisterSaver_ExcludedFloatReg(Z_F2 ),
 122   RegisterSaver_ExcludedFloatReg(Z_F3 ),
 123   RegisterSaver_ExcludedFloatReg(Z_F4 ),
 124   RegisterSaver_ExcludedFloatReg(Z_F5 ),
 125   RegisterSaver_ExcludedFloatReg(Z_F6 ),
 126   RegisterSaver_ExcludedFloatReg(Z_F7 ),
 127   RegisterSaver_ExcludedFloatReg(Z_F8 ),
 128   RegisterSaver_ExcludedFloatReg(Z_F9 ),
 129   RegisterSaver_ExcludedFloatReg(Z_F10),
 130   RegisterSaver_ExcludedFloatReg(Z_F11),
 131   RegisterSaver_ExcludedFloatReg(Z_F12),
 132   RegisterSaver_ExcludedFloatReg(Z_F13),
 133   RegisterSaver_ExcludedFloatReg(Z_F14),
 134   RegisterSaver_ExcludedFloatReg(Z_F15),
 135   //
 136   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 137   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 138   RegisterSaver_LiveIntReg(Z_R2 ),
 139   RegisterSaver_LiveIntReg(Z_R3 ),
 140   RegisterSaver_LiveIntReg(Z_R4 ),
 141   RegisterSaver_LiveIntReg(Z_R5 ),
 142   RegisterSaver_LiveIntReg(Z_R6 ),
 143   RegisterSaver_LiveIntReg(Z_R7 ),
 144   RegisterSaver_LiveIntReg(Z_R8 ),
 145   RegisterSaver_LiveIntReg(Z_R9 ),
 146   RegisterSaver_LiveIntReg(Z_R10),
 147   RegisterSaver_LiveIntReg(Z_R11),
 148   RegisterSaver_LiveIntReg(Z_R12),
 149   RegisterSaver_LiveIntReg(Z_R13),
 150   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 151   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 152 };
 153 
 154 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
 155   // Live registers which get spilled to the stack. Register positions
 156   // in this array correspond directly to the stack layout.
 157   //
 158   // live float registers:
 159   //
 160   RegisterSaver_LiveFloatReg(Z_F0 ),
 161   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 162   RegisterSaver_LiveFloatReg(Z_F2 ),
 163   RegisterSaver_LiveFloatReg(Z_F3 ),
 164   RegisterSaver_LiveFloatReg(Z_F4 ),
 165   RegisterSaver_LiveFloatReg(Z_F5 ),
 166   RegisterSaver_LiveFloatReg(Z_F6 ),
 167   RegisterSaver_LiveFloatReg(Z_F7 ),
 168   RegisterSaver_LiveFloatReg(Z_F8 ),
 169   RegisterSaver_LiveFloatReg(Z_F9 ),
 170   RegisterSaver_LiveFloatReg(Z_F10),
 171   RegisterSaver_LiveFloatReg(Z_F11),
 172   RegisterSaver_LiveFloatReg(Z_F12),
 173   RegisterSaver_LiveFloatReg(Z_F13),
 174   RegisterSaver_LiveFloatReg(Z_F14),
 175   RegisterSaver_LiveFloatReg(Z_F15),
 176   //
 177   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 178   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 179   RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
 180   RegisterSaver_LiveIntReg(Z_R3 ),
 181   RegisterSaver_LiveIntReg(Z_R4 ),
 182   RegisterSaver_LiveIntReg(Z_R5 ),
 183   RegisterSaver_LiveIntReg(Z_R6 ),
 184   RegisterSaver_LiveIntReg(Z_R7 ),
 185   RegisterSaver_LiveIntReg(Z_R8 ),
 186   RegisterSaver_LiveIntReg(Z_R9 ),
 187   RegisterSaver_LiveIntReg(Z_R10),
 188   RegisterSaver_LiveIntReg(Z_R11),
 189   RegisterSaver_LiveIntReg(Z_R12),
 190   RegisterSaver_LiveIntReg(Z_R13),
 191   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 192   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 193 };
 194 
 195 // Live argument registers which get spilled to the stack.
 196 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
 197   RegisterSaver_LiveFloatReg(Z_FARG1),
 198   RegisterSaver_LiveFloatReg(Z_FARG2),
 199   RegisterSaver_LiveFloatReg(Z_FARG3),
 200   RegisterSaver_LiveFloatReg(Z_FARG4),
 201   RegisterSaver_LiveIntReg(Z_ARG1),
 202   RegisterSaver_LiveIntReg(Z_ARG2),
 203   RegisterSaver_LiveIntReg(Z_ARG3),
 204   RegisterSaver_LiveIntReg(Z_ARG4),
 205   RegisterSaver_LiveIntReg(Z_ARG5)
 206 };
 207 
 208 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
 209   // Live registers which get spilled to the stack. Register positions
 210   // in this array correspond directly to the stack layout.
 211   //
 212   // live float registers:
 213   //
 214   RegisterSaver_LiveFloatReg(Z_F0 ),
 215   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 216   RegisterSaver_LiveFloatReg(Z_F2 ),
 217   RegisterSaver_LiveFloatReg(Z_F3 ),
 218   RegisterSaver_LiveFloatReg(Z_F4 ),
 219   RegisterSaver_LiveFloatReg(Z_F5 ),
 220   RegisterSaver_LiveFloatReg(Z_F6 ),
 221   RegisterSaver_LiveFloatReg(Z_F7 ),
 222   // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
 223   // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
 224   // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
 225   // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
 226   // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
 227   // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
 228   // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
 229   // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
 230   //
 231   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 232   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 233   RegisterSaver_LiveIntReg(Z_R2 ),
 234   RegisterSaver_LiveIntReg(Z_R3 ),
 235   RegisterSaver_LiveIntReg(Z_R4 ),
 236   RegisterSaver_LiveIntReg(Z_R5 ),
 237   // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
 238   // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
 239   // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
 240   // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
 241   // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
 242   // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
 243   // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
 244   // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
 245   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 246   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 247 };
 248 
 249 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
 250   int reg_space = -1;
 251   switch (reg_set) {
 252     case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
 253     case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
 254     case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
 255     case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
 256     case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
 257     default: ShouldNotReachHere();
 258   }
 259   return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
 260 }
 261 
 262 
 263 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
 264   return live_reg_save_size(reg_set) + frame::z_abi_160_size;
 265 }
 266 
 267 
 268 // return_pc: Specify the register that should be stored as the return pc in the current frame.
 269 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
 270   // Record volatile registers as callee-save values in an OopMap so
 271   // their save locations will be propagated to the caller frame's
 272   // RegisterMap during StackFrameStream construction (needed for
 273   // deoptimization; see compiledVFrame::create_stack_value).
 274 
 275   // Calculate frame size.
 276   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 277   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 278   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 279 
 280   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 281   OopMap* map = new OopMap(frame_size_in_slots, 0);
 282 
 283   int regstosave_num = 0;
 284   const RegisterSaver::LiveRegType* live_regs = NULL;
 285 
 286   switch (reg_set) {
 287     case all_registers:
 288       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 289       live_regs      = RegisterSaver_LiveRegs;
 290       break;
 291     case all_registers_except_r2:
 292       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 293       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 294       break;
 295     case all_integer_registers:
 296       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 297       live_regs      = RegisterSaver_LiveIntRegs;
 298       break;
 299     case all_volatile_registers:
 300       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 301       live_regs      = RegisterSaver_LiveVolatileRegs;
 302       break;
 303     case arg_registers:
 304       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 305       live_regs      = RegisterSaver_LiveArgRegs;
 306       break;
 307     default: ShouldNotReachHere();
 308   }
 309 
 310   // Save return pc in old frame.
 311   __ save_return_pc(return_pc);
 312 
 313   // Push a new frame (includes stack linkage).
 314   __ push_frame(frame_size_in_bytes);
 315 
 316   // Register save area in new frame starts above z_abi_160 area.
 317   int offset = register_save_offset;
 318 
 319   Register first = noreg;
 320   Register last  = noreg;
 321   int      first_offset = -1;
 322   bool     float_spilled = false;
 323 
 324   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 325     int reg_num  = live_regs[i].reg_num;
 326     int reg_type = live_regs[i].reg_type;
 327 
 328     switch (reg_type) {
 329       case RegisterSaver::int_reg: {
 330         Register reg = as_Register(reg_num);
 331         if (last != reg->predecessor()) {
 332           if (first != noreg) {
 333             __ z_stmg(first, last, first_offset, Z_SP);
 334           }
 335           first = reg;
 336           first_offset = offset;
 337           DEBUG_ONLY(float_spilled = false);
 338         }
 339         last = reg;
 340         assert(last != Z_R0, "r0 would require special treatment");
 341         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 342         break;
 343       }
 344 
 345       case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
 346         continue; // Continue with next loop iteration.
 347 
 348       case RegisterSaver::float_reg: {
 349         FloatRegister freg = as_FloatRegister(reg_num);
 350         __ z_std(freg, offset, Z_SP);
 351         DEBUG_ONLY(float_spilled = true);
 352         break;
 353       }
 354 
 355       default:
 356         ShouldNotReachHere();
 357         break;
 358     }
 359 
 360     // Second set_callee_saved is really a waste but we'll keep things as they were for now
 361     map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
 362     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
 363   }
 364   assert(first != noreg, "Should spill at least one int reg.");
 365   __ z_stmg(first, last, first_offset, Z_SP);
 366 
 367   // And we're done.
 368   return map;
 369 }
 370 
 371 
 372 // Generate the OopMap (again, regs where saved before).
 373 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
 374   // Calculate frame size.
 375   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 376   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 377   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 378 
 379   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 380   OopMap* map = new OopMap(frame_size_in_slots, 0);
 381 
 382   int regstosave_num = 0;
 383   const RegisterSaver::LiveRegType* live_regs = NULL;
 384 
 385   switch (reg_set) {
 386     case all_registers:
 387       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 388       live_regs      = RegisterSaver_LiveRegs;
 389       break;
 390     case all_registers_except_r2:
 391       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 392       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 393       break;
 394     case all_integer_registers:
 395       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 396       live_regs      = RegisterSaver_LiveIntRegs;
 397       break;
 398     case all_volatile_registers:
 399       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 400       live_regs      = RegisterSaver_LiveVolatileRegs;
 401       break;
 402     case arg_registers:
 403       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 404       live_regs      = RegisterSaver_LiveArgRegs;
 405       break;
 406     default: ShouldNotReachHere();
 407   }
 408 
 409   // Register save area in new frame starts above z_abi_160 area.
 410   int offset = register_save_offset;
 411   for (int i = 0; i < regstosave_num; i++) {
 412     if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
 413       map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
 414       map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
 415     }
 416     offset += reg_size;
 417   }
 418   return map;
 419 }
 420 
 421 
 422 // Pop the current frame and restore all the registers that we saved.
 423 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
 424   int offset;
 425   const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
 426 
 427   Register first = noreg;
 428   Register last = noreg;
 429   int      first_offset = -1;
 430   bool     float_spilled = false;
 431 
 432   int regstosave_num = 0;
 433   const RegisterSaver::LiveRegType* live_regs = NULL;
 434 
 435   switch (reg_set) {
 436     case all_registers:
 437       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
 438       live_regs      = RegisterSaver_LiveRegs;
 439       break;
 440     case all_registers_except_r2:
 441       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 442       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 443       break;
 444     case all_integer_registers:
 445       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 446       live_regs      = RegisterSaver_LiveIntRegs;
 447       break;
 448     case all_volatile_registers:
 449       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
 450       live_regs      = RegisterSaver_LiveVolatileRegs;
 451       break;
 452     case arg_registers:
 453       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 454       live_regs      = RegisterSaver_LiveArgRegs;
 455       break;
 456     default: ShouldNotReachHere();
 457   }
 458 
 459   // Restore all registers (ints and floats).
 460 
 461   // Register save area in new frame starts above z_abi_160 area.
 462   offset = register_save_offset;
 463 
 464   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 465     int reg_num  = live_regs[i].reg_num;
 466     int reg_type = live_regs[i].reg_type;
 467 
 468     switch (reg_type) {
 469       case RegisterSaver::excluded_reg:
 470         continue; // Continue with next loop iteration.
 471 
 472       case RegisterSaver::int_reg: {
 473         Register reg = as_Register(reg_num);
 474         if (last != reg->predecessor()) {
 475           if (first != noreg) {
 476             __ z_lmg(first, last, first_offset, Z_SP);
 477           }
 478           first = reg;
 479           first_offset = offset;
 480           DEBUG_ONLY(float_spilled = false);
 481         }
 482         last = reg;
 483         assert(last != Z_R0, "r0 would require special treatment");
 484         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 485         break;
 486       }
 487 
 488       case RegisterSaver::float_reg: {
 489         FloatRegister freg = as_FloatRegister(reg_num);
 490         __ z_ld(freg, offset, Z_SP);
 491         DEBUG_ONLY(float_spilled = true);
 492         break;
 493       }
 494 
 495       default:
 496         ShouldNotReachHere();
 497     }
 498   }
 499   assert(first != noreg, "Should spill at least one int reg.");
 500   __ z_lmg(first, last, first_offset, Z_SP);
 501 
 502   // Pop the frame.
 503   __ pop_frame();
 504 
 505   // Restore the flags.
 506   __ restore_return_pc();
 507 }
 508 
 509 
 510 // Pop the current frame and restore the registers that might be holding a result.
 511 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 512   int i;
 513   int offset;
 514   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 515                                    sizeof(RegisterSaver::LiveRegType);
 516   const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
 517 
 518   // Restore all result registers (ints and floats).
 519   offset = register_save_offset;
 520   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 521     int reg_num = RegisterSaver_LiveRegs[i].reg_num;
 522     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 523     switch (reg_type) {
 524       case RegisterSaver::excluded_reg:
 525         continue; // Continue with next loop iteration.
 526       case RegisterSaver::int_reg: {
 527         if (as_Register(reg_num) == Z_RET) { // int result_reg
 528           __ z_lg(as_Register(reg_num), offset, Z_SP);
 529         }
 530         break;
 531       }
 532       case RegisterSaver::float_reg: {
 533         if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
 534           __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
 535         }
 536         break;
 537       }
 538       default:
 539         ShouldNotReachHere();
 540     }
 541   }
 542 }
 543 
 544 #if INCLUDE_CDS
 545 size_t SharedRuntime::trampoline_size() {
 546   return MacroAssembler::load_const_size() + 2;
 547 }
 548 
 549 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 550   // Think about using pc-relative branch.
 551   __ load_const(Z_R1_scratch, destination);
 552   __ z_br(Z_R1_scratch);
 553 }
 554 #endif
 555 
 556 // ---------------------------------------------------------------------------
 557 void SharedRuntime::save_native_result(MacroAssembler * masm,
 558                                        BasicType ret_type,
 559                                        int frame_slots) {
 560   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 561 
 562   switch (ret_type) {
 563     case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
 564     case T_BYTE:
 565     case T_CHAR:
 566     case T_SHORT:
 567     case T_INT:
 568       __ reg2mem_opt(Z_RET, memaddr, false);
 569       break;
 570     case T_OBJECT:   // Save pointer types as long.
 571     case T_ARRAY:
 572     case T_ADDRESS:
 573     case T_VOID:
 574     case T_LONG:
 575       __ reg2mem_opt(Z_RET, memaddr);
 576       break;
 577     case T_FLOAT:
 578       __ freg2mem_opt(Z_FRET, memaddr, false);
 579       break;
 580     case T_DOUBLE:
 581       __ freg2mem_opt(Z_FRET, memaddr);
 582       break;
 583   }
 584 }
 585 
 586 void SharedRuntime::restore_native_result(MacroAssembler *masm,
 587                                           BasicType       ret_type,
 588                                           int             frame_slots) {
 589   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 590 
 591   switch (ret_type) {
 592     case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
 593     case T_BYTE:
 594     case T_CHAR:
 595     case T_SHORT:
 596     case T_INT:
 597       __ mem2reg_opt(Z_RET, memaddr, false);
 598       break;
 599     case T_OBJECT:   // Restore pointer types as long.
 600     case T_ARRAY:
 601     case T_ADDRESS:
 602     case T_VOID:
 603     case T_LONG:
 604       __ mem2reg_opt(Z_RET, memaddr);
 605       break;
 606     case T_FLOAT:
 607       __ mem2freg_opt(Z_FRET, memaddr, false);
 608       break;
 609     case T_DOUBLE:
 610       __ mem2freg_opt(Z_FRET, memaddr);
 611       break;
 612   }
 613 }
 614 
 615 // ---------------------------------------------------------------------------
 616 // Read the array of BasicTypes from a signature, and compute where the
 617 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
 618 // quantities. Values less than VMRegImpl::stack0 are registers, those above
 619 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
 620 // as framesizes are fixed.
 621 // VMRegImpl::stack0 refers to the first slot 0(sp).
 622 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
 623 // up to RegisterImpl::number_of_registers are the 64-bit integer registers.
 624 
 625 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 626 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
 627 // units regardless of build.
 628 
 629 // The Java calling convention is a "shifted" version of the C ABI.
 630 // By skipping the first C ABI register we can call non-static jni methods
 631 // with small numbers of arguments without having to shuffle the arguments
 632 // at all. Since we control the java ABI we ought to at least get some
 633 // advantage out of it.
 634 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 635                                            VMRegPair *regs,
 636                                            int total_args_passed,
 637                                            int is_outgoing) {
 638   // c2c calling conventions for compiled-compiled calls.
 639 
 640   // An int/float occupies 1 slot here.
 641   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
 642   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 643 
 644   const VMReg z_iarg_reg[5] = {
 645     Z_R2->as_VMReg(),
 646     Z_R3->as_VMReg(),
 647     Z_R4->as_VMReg(),
 648     Z_R5->as_VMReg(),
 649     Z_R6->as_VMReg()
 650   };
 651   const VMReg z_farg_reg[4] = {
 652     Z_F0->as_VMReg(),
 653     Z_F2->as_VMReg(),
 654     Z_F4->as_VMReg(),
 655     Z_F6->as_VMReg()
 656   };
 657   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 658   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 659 
 660   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 661   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 662 
 663   int i;
 664   int stk = 0;
 665   int ireg = 0;
 666   int freg = 0;
 667 
 668   for (int i = 0; i < total_args_passed; ++i) {
 669     switch (sig_bt[i]) {
 670       case T_BOOLEAN:
 671       case T_CHAR:
 672       case T_BYTE:
 673       case T_SHORT:
 674       case T_INT:
 675         if (ireg < z_num_iarg_registers) {
 676           // Put int/ptr in register.
 677           regs[i].set1(z_iarg_reg[ireg]);
 678           ++ireg;
 679         } else {
 680           // Put int/ptr on stack.
 681           regs[i].set1(VMRegImpl::stack2reg(stk));
 682           stk += inc_stk_for_intfloat;
 683         }
 684         break;
 685       case T_LONG:
 686         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 687         if (ireg < z_num_iarg_registers) {
 688           // Put long in register.
 689           regs[i].set2(z_iarg_reg[ireg]);
 690           ++ireg;
 691         } else {
 692           // Put long on stack and align to 2 slots.
 693           if (stk & 0x1) { ++stk; }
 694           regs[i].set2(VMRegImpl::stack2reg(stk));
 695           stk += inc_stk_for_longdouble;
 696         }
 697         break;
 698       case T_OBJECT:
 699       case T_ARRAY:
 700       case T_ADDRESS:
 701         if (ireg < z_num_iarg_registers) {
 702           // Put ptr in register.
 703           regs[i].set2(z_iarg_reg[ireg]);
 704           ++ireg;
 705         } else {
 706           // Put ptr on stack and align to 2 slots, because
 707           // "64-bit pointers record oop-ishness on 2 aligned adjacent
 708           // registers." (see OopFlow::build_oop_map).
 709           if (stk & 0x1) { ++stk; }
 710           regs[i].set2(VMRegImpl::stack2reg(stk));
 711           stk += inc_stk_for_longdouble;
 712         }
 713         break;
 714       case T_FLOAT:
 715         if (freg < z_num_farg_registers) {
 716           // Put float in register.
 717           regs[i].set1(z_farg_reg[freg]);
 718           ++freg;
 719         } else {
 720           // Put float on stack.
 721           regs[i].set1(VMRegImpl::stack2reg(stk));
 722           stk += inc_stk_for_intfloat;
 723         }
 724         break;
 725       case T_DOUBLE:
 726         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 727         if (freg < z_num_farg_registers) {
 728           // Put double in register.
 729           regs[i].set2(z_farg_reg[freg]);
 730           ++freg;
 731         } else {
 732           // Put double on stack and align to 2 slots.
 733           if (stk & 0x1) { ++stk; }
 734           regs[i].set2(VMRegImpl::stack2reg(stk));
 735           stk += inc_stk_for_longdouble;
 736         }
 737         break;
 738       case T_VOID:
 739         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 740         // Do not count halves.
 741         regs[i].set_bad();
 742         break;
 743       default:
 744         ShouldNotReachHere();
 745     }
 746   }
 747   return round_to(stk, 2);
 748 }
 749 
 750 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 751                                         VMRegPair *regs,
 752                                         VMRegPair *regs2,
 753                                         int total_args_passed) {
 754   assert(regs2 == NULL, "second VMRegPair array not used on this platform");
 755 
 756   // Calling conventions for C runtime calls and calls to JNI native methods.
 757   const VMReg z_iarg_reg[5] = {
 758     Z_R2->as_VMReg(),
 759     Z_R3->as_VMReg(),
 760     Z_R4->as_VMReg(),
 761     Z_R5->as_VMReg(),
 762     Z_R6->as_VMReg()
 763   };
 764   const VMReg z_farg_reg[4] = {
 765     Z_F0->as_VMReg(),
 766     Z_F2->as_VMReg(),
 767     Z_F4->as_VMReg(),
 768     Z_F6->as_VMReg()
 769   };
 770   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 771   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 772 
 773   // Check calling conventions consistency.
 774   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 775   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 776 
 777   // Avoid passing C arguments in the wrong stack slots.
 778 
 779   // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
 780   // 2 such slots, like 64 bit values do.
 781   const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
 782   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 783 
 784   int i;
 785   // Leave room for C-compatible ABI
 786   int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
 787   int freg = 0;
 788   int ireg = 0;
 789 
 790   // We put the first 5 arguments into registers and the rest on the
 791   // stack. Float arguments are already in their argument registers
 792   // due to c2c calling conventions (see calling_convention).
 793   for (int i = 0; i < total_args_passed; ++i) {
 794     switch (sig_bt[i]) {
 795       case T_BOOLEAN:
 796       case T_CHAR:
 797       case T_BYTE:
 798       case T_SHORT:
 799       case T_INT:
 800         // Fall through, handle as long.
 801       case T_LONG:
 802       case T_OBJECT:
 803       case T_ARRAY:
 804       case T_ADDRESS:
 805       case T_METADATA:
 806         // Oops are already boxed if required (JNI).
 807         if (ireg < z_num_iarg_registers) {
 808           regs[i].set2(z_iarg_reg[ireg]);
 809           ++ireg;
 810         } else {
 811           regs[i].set2(VMRegImpl::stack2reg(stk));
 812           stk += inc_stk_for_longdouble;
 813         }
 814         break;
 815       case T_FLOAT:
 816         if (freg < z_num_farg_registers) {
 817           regs[i].set1(z_farg_reg[freg]);
 818           ++freg;
 819         } else {
 820           regs[i].set1(VMRegImpl::stack2reg(stk+1));
 821           stk +=  inc_stk_for_intfloat;
 822         }
 823         break;
 824       case T_DOUBLE:
 825         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 826         if (freg < z_num_farg_registers) {
 827           regs[i].set2(z_farg_reg[freg]);
 828           ++freg;
 829         } else {
 830           // Put double on stack.
 831           regs[i].set2(VMRegImpl::stack2reg(stk));
 832           stk += inc_stk_for_longdouble;
 833         }
 834         break;
 835       case T_VOID:
 836         // Do not count halves.
 837         regs[i].set_bad();
 838         break;
 839       default:
 840         ShouldNotReachHere();
 841     }
 842   }
 843   return round_to(stk, 2);
 844 }
 845 
 846 ////////////////////////////////////////////////////////////////////////
 847 //
 848 //  Argument shufflers
 849 //
 850 ////////////////////////////////////////////////////////////////////////
 851 
 852 //----------------------------------------------------------------------
 853 // The java_calling_convention describes stack locations as ideal slots on
 854 // a frame with no abi restrictions. Since we must observe abi restrictions
 855 // (like the placement of the register window) the slots must be biased by
 856 // the following value.
 857 //----------------------------------------------------------------------
 858 static int reg2slot(VMReg r) {
 859   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 860 }
 861 
 862 static int reg2offset(VMReg r) {
 863   return reg2slot(r) * VMRegImpl::stack_slot_size;
 864 }
 865 
 866 static void verify_oop_args(MacroAssembler *masm,
 867                             int total_args_passed,
 868                             const BasicType *sig_bt,
 869                             const VMRegPair *regs) {
 870   if (!VerifyOops) { return; }
 871 
 872   for (int i = 0; i < total_args_passed; i++) {
 873     if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
 874       VMReg r = regs[i].first();
 875       assert(r->is_valid(), "bad oop arg");
 876 
 877       if (r->is_stack()) {
 878         __ z_lg(Z_R0_scratch,
 879                 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
 880         __ verify_oop(Z_R0_scratch);
 881       } else {
 882         __ verify_oop(r->as_Register());
 883       }
 884     }
 885   }
 886 }
 887 
 888 static void gen_special_dispatch(MacroAssembler *masm,
 889                                  int total_args_passed,
 890                                  vmIntrinsics::ID special_dispatch,
 891                                  const BasicType *sig_bt,
 892                                  const VMRegPair *regs) {
 893   verify_oop_args(masm, total_args_passed, sig_bt, regs);
 894 
 895   // Now write the args into the outgoing interpreter space.
 896   bool     has_receiver   = false;
 897   Register receiver_reg   = noreg;
 898   int      member_arg_pos = -1;
 899   Register member_reg     = noreg;
 900   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
 901 
 902   if (ref_kind != 0) {
 903     member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
 904     member_reg = Z_R9;                       // Known to be free at this point.
 905     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 906   } else {
 907     guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch);
 908     has_receiver = true;
 909   }
 910 
 911   if (member_reg != noreg) {
 912     // Load the member_arg into register, if necessary.
 913     assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
 914     assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
 915 
 916     VMReg r = regs[member_arg_pos].first();
 917     assert(r->is_valid(), "bad member arg");
 918 
 919     if (r->is_stack()) {
 920       __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
 921     } else {
 922       // No data motion is needed.
 923       member_reg = r->as_Register();
 924     }
 925   }
 926 
 927   if (has_receiver) {
 928     // Make sure the receiver is loaded into a register.
 929     assert(total_args_passed > 0, "oob");
 930     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 931 
 932     VMReg r = regs[0].first();
 933     assert(r->is_valid(), "bad receiver arg");
 934 
 935     if (r->is_stack()) {
 936       // Porting note: This assumes that compiled calling conventions always
 937       // pass the receiver oop in a register. If this is not true on some
 938       // platform, pick a temp and load the receiver from stack.
 939       assert(false, "receiver always in a register");
 940       receiver_reg = Z_R13;  // Known to be free at this point.
 941       __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
 942     } else {
 943       // No data motion is needed.
 944       receiver_reg = r->as_Register();
 945     }
 946   }
 947 
 948   // Figure out which address we are really jumping to:
 949   MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
 950                                                  receiver_reg, member_reg,
 951                                                  /*for_compiler_entry:*/ true);
 952 }
 953 
 954 ////////////////////////////////////////////////////////////////////////
 955 //
 956 //  Argument shufflers
 957 //
 958 ////////////////////////////////////////////////////////////////////////
 959 
 960 // Is the size of a vector size (in bytes) bigger than a size saved by default?
 961 // 8 bytes registers are saved by default on z/Architecture.
 962 bool SharedRuntime::is_wide_vector(int size) {
 963   // Note, MaxVectorSize == 8 on this platform.
 964   assert(size <= 8, "%d bytes vectors are not supported", size);
 965   return size > 8;
 966 }
 967 
 968 //----------------------------------------------------------------------
 969 // An oop arg. Must pass a handle not the oop itself
 970 //----------------------------------------------------------------------
 971 static void object_move(MacroAssembler *masm,
 972                         OopMap *map,
 973                         int oop_handle_offset,
 974                         int framesize_in_slots,
 975                         VMRegPair src,
 976                         VMRegPair dst,
 977                         bool is_receiver,
 978                         int *receiver_offset) {
 979   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
 980 
 981   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
 982 
 983   // Must pass a handle. First figure out the location we use as a handle.
 984 
 985   if (src.first()->is_stack()) {
 986     // Oop is already on the stack, put handle on stack or in register
 987     // If handle will be on the stack, use temp reg to calculate it.
 988     Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
 989     Label    skip;
 990     int      slot_in_older_frame = reg2slot(src.first());
 991 
 992     guarantee(!is_receiver, "expecting receiver in register");
 993     map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
 994 
 995     __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
 996     __ load_and_test_long(Z_R0, Address(rHandle));
 997     __ z_brne(skip);
 998     // Use a NULL handle if oop is NULL.
 999     __ clear_reg(rHandle, true, false);
1000     __ bind(skip);
1001 
1002     // Copy handle to the right place (register or stack).
1003     if (dst.first()->is_stack()) {
1004       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1005     } // else
1006       // nothing to do. rHandle uses the correct register
1007   } else {
1008     // Oop is passed in an input register. We must flush it to the stack.
1009     const Register rOop = src.first()->as_Register();
1010     const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1011     int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1012     int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1013     NearLabel skip;
1014 
1015     if (is_receiver) {
1016       *receiver_offset = oop_slot_offset;
1017     }
1018     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1019 
1020     // Flush Oop to stack, calculate handle.
1021     __ z_stg(rOop, oop_slot_offset, Z_SP);
1022     __ add2reg(rHandle, oop_slot_offset, Z_SP);
1023 
1024     // If Oop == NULL, use a NULL handle.
1025     __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1026     __ clear_reg(rHandle, true, false);
1027     __ bind(skip);
1028 
1029     // Copy handle to the right place (register or stack).
1030     if (dst.first()->is_stack()) {
1031       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1032     } // else
1033       // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1034   }
1035 }
1036 
1037 //----------------------------------------------------------------------
1038 // A float arg. May have to do float reg to int reg conversion
1039 //----------------------------------------------------------------------
1040 static void float_move(MacroAssembler *masm,
1041                        VMRegPair src,
1042                        VMRegPair dst,
1043                        int framesize_in_slots,
1044                        int workspace_slot_offset) {
1045   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1046   int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1047 
1048   // We do not accept an argument in a VMRegPair to be spread over two slots,
1049   // no matter what physical location (reg or stack) the slots may have.
1050   // We just check for the unaccepted slot to be invalid.
1051   assert(!src.second()->is_valid(), "float in arg spread over two slots");
1052   assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1053 
1054   if (src.first()->is_stack()) {
1055     if (dst.first()->is_stack()) {
1056       // stack -> stack. The easiest of the bunch.
1057       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1058                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1059     } else {
1060       // stack to reg
1061       Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1062       if (dst.first()->is_Register()) {
1063         __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1064       } else {
1065         __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1066       }
1067     }
1068   } else if (src.first()->is_Register()) {
1069     if (dst.first()->is_stack()) {
1070       // gpr -> stack
1071       __ reg2mem_opt(src.first()->as_Register(),
1072                      Address(Z_SP, reg2offset(dst.first()), false ));
1073     } else {
1074       if (dst.first()->is_Register()) {
1075         // gpr -> gpr
1076         __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1077                               src.first()->as_Register(), T_INT);
1078       } else {
1079         if (VM_Version::has_FPSupportEnhancements()) {
1080           // gpr -> fpr. Exploit z10 capability of direct transfer.
1081           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1082         } else {
1083           // gpr -> fpr. Use work space on stack to transfer data.
1084           Address   stackaddr(Z_SP, workspace_offset);
1085 
1086           __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1087           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1088         }
1089       }
1090     }
1091   } else {
1092     if (dst.first()->is_stack()) {
1093       // fpr -> stack
1094       __ freg2mem_opt(src.first()->as_FloatRegister(),
1095                       Address(Z_SP, reg2offset(dst.first())), false);
1096     } else {
1097       if (dst.first()->is_Register()) {
1098         if (VM_Version::has_FPSupportEnhancements()) {
1099           // fpr -> gpr.
1100           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1101         } else {
1102           // fpr -> gpr. Use work space on stack to transfer data.
1103           Address   stackaddr(Z_SP, workspace_offset);
1104 
1105           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1106           __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1107         }
1108       } else {
1109         // fpr -> fpr
1110         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1111                                src.first()->as_FloatRegister(), T_FLOAT);
1112       }
1113     }
1114   }
1115 }
1116 
1117 //----------------------------------------------------------------------
1118 // A double arg. May have to do double reg to long reg conversion
1119 //----------------------------------------------------------------------
1120 static void double_move(MacroAssembler *masm,
1121                         VMRegPair src,
1122                         VMRegPair dst,
1123                         int framesize_in_slots,
1124                         int workspace_slot_offset) {
1125   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1126   int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1127 
1128   // Since src is always a java calling convention we know that the
1129   // src pair is always either all registers or all stack (and aligned?)
1130 
1131   if (src.first()->is_stack()) {
1132     if (dst.first()->is_stack()) {
1133       // stack -> stack. The easiest of the bunch.
1134       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1135                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1136     } else {
1137       // stack to reg
1138       Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1139 
1140       if (dst.first()->is_Register()) {
1141         __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1142       } else {
1143         __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1144       }
1145     }
1146   } else if (src.first()->is_Register()) {
1147     if (dst.first()->is_stack()) {
1148       // gpr -> stack
1149       __ reg2mem_opt(src.first()->as_Register(),
1150                      Address(Z_SP, reg2offset(dst.first())));
1151     } else {
1152       if (dst.first()->is_Register()) {
1153         // gpr -> gpr
1154         __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1155                               src.first()->as_Register(), T_LONG);
1156       } else {
1157         if (VM_Version::has_FPSupportEnhancements()) {
1158           // gpr -> fpr. Exploit z10 capability of direct transfer.
1159           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1160         } else {
1161           // gpr -> fpr. Use work space on stack to transfer data.
1162           Address stackaddr(Z_SP, workspace_offset);
1163           __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1164           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1165         }
1166       }
1167     }
1168   } else {
1169     if (dst.first()->is_stack()) {
1170       // fpr -> stack
1171       __ freg2mem_opt(src.first()->as_FloatRegister(),
1172                       Address(Z_SP, reg2offset(dst.first())));
1173     } else {
1174       if (dst.first()->is_Register()) {
1175         if (VM_Version::has_FPSupportEnhancements()) {
1176           // fpr -> gpr. Exploit z10 capability of direct transfer.
1177           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1178         } else {
1179           // fpr -> gpr. Use work space on stack to transfer data.
1180           Address stackaddr(Z_SP, workspace_offset);
1181 
1182           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1183           __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1184         }
1185       } else {
1186         // fpr -> fpr
1187         // In theory these overlap but the ordering is such that this is likely a nop.
1188         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1189                                src.first()->as_FloatRegister(), T_DOUBLE);
1190       }
1191     }
1192   }
1193 }
1194 
1195 //----------------------------------------------------------------------
1196 // A long arg.
1197 //----------------------------------------------------------------------
1198 static void long_move(MacroAssembler *masm,
1199                       VMRegPair src,
1200                       VMRegPair dst,
1201                       int framesize_in_slots) {
1202   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1203 
1204   if (src.first()->is_stack()) {
1205     if (dst.first()->is_stack()) {
1206       // stack -> stack. The easiest of the bunch.
1207       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1208                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1209     } else {
1210       // stack to reg
1211       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1212       __ mem2reg_opt(dst.first()->as_Register(),
1213                       Address(Z_SP, reg2offset(src.first()) + frame_offset));
1214     }
1215   } else {
1216     // reg to reg
1217     assert(src.first()->is_Register(), "long src value must be in GPR");
1218     if (dst.first()->is_stack()) {
1219       // reg -> stack
1220       __ reg2mem_opt(src.first()->as_Register(),
1221                      Address(Z_SP, reg2offset(dst.first())));
1222     } else {
1223       // reg -> reg
1224       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1225       __ move_reg_if_needed(dst.first()->as_Register(),
1226                             T_LONG, src.first()->as_Register(), T_LONG);
1227     }
1228   }
1229 }
1230 
1231 
1232 //----------------------------------------------------------------------
1233 // A int-like arg.
1234 //----------------------------------------------------------------------
1235 // On z/Architecture we will store integer like items to the stack as 64 bit
1236 // items, according to the z/Architecture ABI, even though Java would only store
1237 // 32 bits for a parameter.
1238 // We do sign extension for all base types. That is ok since the only
1239 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1240 // Sign extension 32->64 bit will thus not affect the value.
1241 //----------------------------------------------------------------------
1242 static void move32_64(MacroAssembler *masm,
1243                       VMRegPair src,
1244                       VMRegPair dst,
1245                       int framesize_in_slots) {
1246   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1247 
1248   if (src.first()->is_stack()) {
1249     Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1250     if (dst.first()->is_stack()) {
1251       // stack -> stack. MVC not posible due to sign extension.
1252       Address firstaddr(Z_SP, reg2offset(dst.first()));
1253       __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1254       __ reg2mem_opt(Z_R0_scratch, firstaddr);
1255     } else {
1256       // stack -> reg, sign extended
1257       __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1258     }
1259   } else {
1260     if (dst.first()->is_stack()) {
1261       // reg -> stack, sign extended
1262       Address firstaddr(Z_SP, reg2offset(dst.first()));
1263       __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1264       __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1265     } else {
1266       // reg -> reg, sign extended
1267       __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1268     }
1269   }
1270 }
1271 
1272 static void save_or_restore_arguments(MacroAssembler *masm,
1273                                       const int stack_slots,
1274                                       const int total_in_args,
1275                                       const int arg_save_area,
1276                                       OopMap *map,
1277                                       VMRegPair *in_regs,
1278                                       BasicType *in_sig_bt) {
1279 
1280   // If map is non-NULL then the code should store the values,
1281   // otherwise it should load them.
1282   int slot = arg_save_area;
1283   // Handle double words first.
1284   for (int i = 0; i < total_in_args; i++) {
1285     if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
1286       int offset = slot * VMRegImpl::stack_slot_size;
1287       slot += VMRegImpl::slots_per_word;
1288       assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
1289       const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1290       Address   stackaddr(Z_SP, offset);
1291       if (map != NULL) {
1292         __ freg2mem_opt(freg, stackaddr);
1293       } else {
1294         __ mem2freg_opt(freg, stackaddr);
1295       }
1296     } else if (in_regs[i].first()->is_Register() &&
1297                (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
1298       int offset = slot * VMRegImpl::stack_slot_size;
1299       const Register   reg = in_regs[i].first()->as_Register();
1300       if (map != NULL) {
1301         __ z_stg(reg, offset, Z_SP);
1302         if (in_sig_bt[i] == T_ARRAY) {
1303           map->set_oop(VMRegImpl::stack2reg(slot));
1304         }
1305       } else {
1306         __ z_lg(reg, offset, Z_SP);
1307         slot += VMRegImpl::slots_per_word;
1308         assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
1309       }
1310     }
1311   }
1312 
1313   // Save or restore single word registers.
1314   for (int i = 0; i < total_in_args; i++) {
1315     if (in_regs[i].first()->is_FloatRegister()) {
1316       if (in_sig_bt[i] == T_FLOAT) {
1317         int offset = slot * VMRegImpl::stack_slot_size;
1318         slot++;
1319         assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
1320         const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1321         Address   stackaddr(Z_SP, offset);
1322         if (map != NULL) {
1323           __ freg2mem_opt(freg, stackaddr, false);
1324         } else {
1325           __ mem2freg_opt(freg, stackaddr, false);
1326         }
1327       }
1328     } else if (in_regs[i].first()->is_stack() &&
1329                in_sig_bt[i] == T_ARRAY && map != NULL) {
1330       int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1331       map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1332     }
1333   }
1334 }
1335 
1336 // Check GCLocker::needs_gc and enter the runtime if it's true. This
1337 // keeps a new JNI critical region from starting until a GC has been
1338 // forced. Save down any oops in registers and describe them in an OopMap.
1339 static void check_needs_gc_for_critical_native(MacroAssembler   *masm,
1340                                                 const int stack_slots,
1341                                                 const int total_in_args,
1342                                                 const int arg_save_area,
1343                                                 OopMapSet *oop_maps,
1344                                                 VMRegPair *in_regs,
1345                                                 BasicType *in_sig_bt) {
1346   __ block_comment("check GCLocker::needs_gc");
1347   Label cont;
1348 
1349   // Check GCLocker::_needs_gc flag.
1350   __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
1351   __ z_cli(0, Z_R1_scratch, 0);
1352   __ z_bre(cont);
1353 
1354   // Save down any values that are live in registers and call into the
1355   // runtime to halt for a GC.
1356   OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1357 
1358   save_or_restore_arguments(masm, stack_slots, total_in_args,
1359                             arg_save_area, map, in_regs, in_sig_bt);
1360   address the_pc = __ pc();
1361   __ set_last_Java_frame(Z_SP, noreg);
1362 
1363   __ block_comment("block_for_jni_critical");
1364   __ z_lgr(Z_ARG1, Z_thread);
1365 
1366   address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
1367   __ call_c(entry_point);
1368   oop_maps->add_gc_map(__ offset(), map);
1369 
1370   __ reset_last_Java_frame();
1371 
1372   // Reload all the register arguments.
1373   save_or_restore_arguments(masm, stack_slots, total_in_args,
1374                             arg_save_area, NULL, in_regs, in_sig_bt);
1375 
1376   __ bind(cont);
1377 
1378   if (StressCriticalJNINatives) {
1379     // Stress register saving
1380     OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1381     save_or_restore_arguments(masm, stack_slots, total_in_args,
1382                               arg_save_area, map, in_regs, in_sig_bt);
1383 
1384     // Destroy argument registers.
1385     for (int i = 0; i < total_in_args; i++) {
1386       if (in_regs[i].first()->is_Register()) {
1387         // Don't set CC.
1388         __ clear_reg(in_regs[i].first()->as_Register(), true, false);
1389       } else {
1390         if (in_regs[i].first()->is_FloatRegister()) {
1391           FloatRegister fr = in_regs[i].first()->as_FloatRegister();
1392           __ z_lcdbr(fr, fr);
1393         }
1394       }
1395     }
1396 
1397     save_or_restore_arguments(masm, stack_slots, total_in_args,
1398                               arg_save_area, NULL, in_regs, in_sig_bt);
1399   }
1400 }
1401 
1402 static void move_ptr(MacroAssembler *masm,
1403                      VMRegPair src,
1404                      VMRegPair dst,
1405                      int framesize_in_slots) {
1406   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1407 
1408   if (src.first()->is_stack()) {
1409     if (dst.first()->is_stack()) {
1410       // stack to stack
1411       __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset));
1412       __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first())));
1413     } else {
1414       // stack to reg
1415       __ mem2reg_opt(dst.first()->as_Register(),
1416                      Address(Z_SP, reg2offset(src.first()) + frame_offset));
1417     }
1418   } else {
1419     if (dst.first()->is_stack()) {
1420       // reg to stack
1421     __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first())));
1422     } else {
1423     __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register());
1424     }
1425   }
1426 }
1427 
1428 // Unpack an array argument into a pointer to the body and the length
1429 // if the array is non-null, otherwise pass 0 for both.
1430 static void unpack_array_argument(MacroAssembler *masm,
1431                                    VMRegPair reg,
1432                                    BasicType in_elem_type,
1433                                    VMRegPair body_arg,
1434                                    VMRegPair length_arg,
1435                                    int framesize_in_slots) {
1436   Register tmp_reg = Z_tmp_2;
1437   Register tmp2_reg = Z_tmp_1;
1438 
1439   assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1440          "possible collision");
1441   assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1442          "possible collision");
1443 
1444   // Pass the length, ptr pair.
1445   NearLabel set_out_args;
1446   VMRegPair tmp, tmp2;
1447 
1448   tmp.set_ptr(tmp_reg->as_VMReg());
1449   tmp2.set_ptr(tmp2_reg->as_VMReg());
1450   if (reg.first()->is_stack()) {
1451     // Load the arg up from the stack.
1452     move_ptr(masm, reg, tmp, framesize_in_slots);
1453     reg = tmp;
1454   }
1455 
1456   const Register first = reg.first()->as_Register();
1457 
1458   // Don't set CC, indicate unused result.
1459   (void) __ clear_reg(tmp2_reg, true, false);
1460   if (tmp_reg != first) {
1461     __ clear_reg(tmp_reg, true, false);  // Don't set CC.
1462   }
1463   __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args);
1464   __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes()));
1465   __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first);
1466 
1467   __ bind(set_out_args);
1468   move_ptr(masm, tmp, body_arg, framesize_in_slots);
1469   move32_64(masm, tmp2, length_arg, framesize_in_slots);
1470 }
1471 
1472 //----------------------------------------------------------------------
1473 // Wrap a JNI call.
1474 //----------------------------------------------------------------------
1475 #undef USE_RESIZE_FRAME
1476 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1477                                                 const methodHandle& method,
1478                                                 int compile_id,
1479                                                 BasicType *in_sig_bt,
1480                                                 VMRegPair *in_regs,
1481                                                 BasicType ret_type) {
1482 #ifdef COMPILER2
1483   int total_in_args = method->size_of_parameters();
1484   if (method->is_method_handle_intrinsic()) {
1485     vmIntrinsics::ID iid = method->intrinsic_id();
1486     intptr_t start = (intptr_t) __ pc();
1487     int vep_offset = ((intptr_t) __ pc()) - start;
1488 
1489     gen_special_dispatch(masm, total_in_args,
1490                          method->intrinsic_id(), in_sig_bt, in_regs);
1491 
1492     int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1493 
1494     __ flush();
1495 
1496     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1497 
1498     return nmethod::new_native_nmethod(method,
1499                                        compile_id,
1500                                        masm->code(),
1501                                        vep_offset,
1502                                        frame_complete,
1503                                        stack_slots / VMRegImpl::slots_per_word,
1504                                        in_ByteSize(-1),
1505                                        in_ByteSize(-1),
1506                                        (OopMapSet *) NULL);
1507   }
1508 
1509 
1510   ///////////////////////////////////////////////////////////////////////
1511   //
1512   //  Precalculations before generating any code
1513   //
1514   ///////////////////////////////////////////////////////////////////////
1515 
1516   bool is_critical_native = true;
1517   address native_func = method->critical_native_function();
1518   if (native_func == NULL) {
1519     native_func = method->native_function();
1520     is_critical_native = false;
1521   }
1522   assert(native_func != NULL, "must have function");
1523 
1524   //---------------------------------------------------------------------
1525   // We have received a description of where all the java args are located
1526   // on entry to the wrapper. We need to convert these args to where
1527   // the jni function will expect them. To figure out where they go
1528   // we convert the java signature to a C signature by inserting
1529   // the hidden arguments as arg[0] and possibly arg[1] (static method).
1530   //
1531   // The first hidden argument arg[0] is a pointer to the JNI environment.
1532   // It is generated for every call.
1533   // The second argument arg[1] to the JNI call, which is hidden for static
1534   // methods, is the boxed lock object. For static calls, the lock object
1535   // is the static method itself. The oop is constructed here. for instance
1536   // calls, the lock is performed on the object itself, the pointer of
1537   // which is passed as the first visible argument.
1538   //---------------------------------------------------------------------
1539 
1540   // Additionally, on z/Architecture we must convert integers
1541   // to longs in the C signature. We do this in advance in order to have
1542   // no trouble with indexes into the bt-arrays.
1543   // So convert the signature and registers now, and adjust the total number
1544   // of in-arguments accordingly.
1545   bool method_is_static = method->is_static();
1546   int  total_c_args     = total_in_args;
1547 
1548   if (!is_critical_native) {
1549     int n_hidden_args = method_is_static ? 2 : 1;
1550     total_c_args += n_hidden_args;
1551   } else {
1552     // No JNIEnv*, no this*, but unpacked arrays (base+length).
1553     for (int i = 0; i < total_in_args; i++) {
1554       if (in_sig_bt[i] == T_ARRAY) {
1555         total_c_args ++;
1556       }
1557     }
1558   }
1559 
1560   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1561   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1562   BasicType* in_elem_bt = NULL;
1563 
1564   // Create the signature for the C call:
1565   //   1) add the JNIEnv*
1566   //   2) add the class if the method is static
1567   //   3) copy the rest of the incoming signature (shifted by the number of
1568   //      hidden arguments)
1569 
1570   int argc = 0;
1571   if (!is_critical_native) {
1572     out_sig_bt[argc++] = T_ADDRESS;
1573     if (method->is_static()) {
1574       out_sig_bt[argc++] = T_OBJECT;
1575     }
1576 
1577     for (int i = 0; i < total_in_args; i++) {
1578       out_sig_bt[argc++] = in_sig_bt[i];
1579     }
1580   } else {
1581     Thread* THREAD = Thread::current();
1582     in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1583     SignatureStream ss(method->signature());
1584     int o = 0;
1585     for (int i = 0; i < total_in_args; i++, o++) {
1586       if (in_sig_bt[i] == T_ARRAY) {
1587         // Arrays are passed as tuples (int, elem*).
1588         Symbol* atype = ss.as_symbol(CHECK_NULL);
1589         const char* at = atype->as_C_string();
1590         if (strlen(at) == 2) {
1591           assert(at[0] == '[', "must be");
1592           switch (at[1]) {
1593             case 'B': in_elem_bt[o]  = T_BYTE; break;
1594             case 'C': in_elem_bt[o]  = T_CHAR; break;
1595             case 'D': in_elem_bt[o]  = T_DOUBLE; break;
1596             case 'F': in_elem_bt[o]  = T_FLOAT; break;
1597             case 'I': in_elem_bt[o]  = T_INT; break;
1598             case 'J': in_elem_bt[o]  = T_LONG; break;
1599             case 'S': in_elem_bt[o]  = T_SHORT; break;
1600             case 'Z': in_elem_bt[o]  = T_BOOLEAN; break;
1601             default: ShouldNotReachHere();
1602           }
1603         }
1604       } else {
1605         in_elem_bt[o] = T_VOID;
1606       }
1607       if (in_sig_bt[i] != T_VOID) {
1608         assert(in_sig_bt[i] == ss.type(), "must match");
1609         ss.next();
1610       }
1611     }
1612     assert(total_in_args == o, "must match");
1613 
1614     for (int i = 0; i < total_in_args; i++) {
1615       if (in_sig_bt[i] == T_ARRAY) {
1616         // Arrays are passed as tuples (int, elem*).
1617         out_sig_bt[argc++] = T_INT;
1618         out_sig_bt[argc++] = T_ADDRESS;
1619       } else {
1620         out_sig_bt[argc++] = in_sig_bt[i];
1621       }
1622     }
1623   }
1624 
1625   ///////////////////////////////////////////////////////////////////////
1626   // Now figure out where the args must be stored and how much stack space
1627   // they require (neglecting out_preserve_stack_slots but providing space
1628   // for storing the first five register arguments).
1629   // It's weird, see int_stk_helper.
1630   ///////////////////////////////////////////////////////////////////////
1631 
1632   //---------------------------------------------------------------------
1633   // Compute framesize for the wrapper.
1634   //
1635   // - We need to handlize all oops passed in registers.
1636   // - We must create space for them here that is disjoint from the save area.
1637   // - We always just allocate 5 words for storing down these object.
1638   //   This allows us to simply record the base and use the Ireg number to
1639   //   decide which slot to use.
1640   // - Note that the reg number used to index the stack slot is the inbound
1641   //   number, not the outbound number.
1642   // - We must shuffle args to match the native convention,
1643   //   and to include var-args space.
1644   //---------------------------------------------------------------------
1645 
1646   //---------------------------------------------------------------------
1647   // Calculate the total number of stack slots we will need:
1648   // - 1) abi requirements
1649   // - 2) outgoing args
1650   // - 3) space for inbound oop handle area
1651   // - 4) space for handlizing a klass if static method
1652   // - 5) space for a lock if synchronized method
1653   // - 6) workspace (save rtn value, int<->float reg moves, ...)
1654   // - 7) filler slots for alignment
1655   //---------------------------------------------------------------------
1656   // Here is how the space we have allocated will look like.
1657   // Since we use resize_frame, we do not create a new stack frame,
1658   // but just extend the one we got with our own data area.
1659   //
1660   // If an offset or pointer name points to a separator line, it is
1661   // assumed that addressing with offset 0 selects storage starting
1662   // at the first byte above the separator line.
1663   //
1664   //
1665   //     ...                   ...
1666   //      | caller's frame      |
1667   // FP-> |---------------------|
1668   //      | filler slots, if any|
1669   //     7| #slots == mult of 2 |
1670   //      |---------------------|
1671   //      | work space          |
1672   //     6| 2 slots = 8 bytes   |
1673   //      |---------------------|
1674   //     5| lock box (if sync)  |
1675   //      |---------------------| <- lock_slot_offset
1676   //     4| klass (if static)   |
1677   //      |---------------------| <- klass_slot_offset
1678   //     3| oopHandle area      |
1679   //      | (save area for      |
1680   //      |  critical natives)  |
1681   //      |                     |
1682   //      |                     |
1683   //      |---------------------| <- oop_handle_offset
1684   //     2| outbound memory     |
1685   //     ...                   ...
1686   //      | based arguments     |
1687   //      |---------------------|
1688   //      | vararg              |
1689   //     ...                   ...
1690   //      | area                |
1691   //      |---------------------| <- out_arg_slot_offset
1692   //     1| out_preserved_slots |
1693   //     ...                   ...
1694   //      | (z_abi spec)        |
1695   // SP-> |---------------------| <- FP_slot_offset (back chain)
1696   //     ...                   ...
1697   //
1698   //---------------------------------------------------------------------
1699 
1700   // *_slot_offset indicates offset from SP in #stack slots
1701   // *_offset      indicates offset from SP in #bytes
1702 
1703   int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
1704                     SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1705 
1706   // Now the space for the inbound oop handle area.
1707   int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
1708   if (is_critical_native) {
1709     // Critical natives may have to call out so they need a save area
1710     // for register arguments.
1711     int double_slots = 0;
1712     int single_slots = 0;
1713     for (int i = 0; i < total_in_args; i++) {
1714       if (in_regs[i].first()->is_Register()) {
1715         const Register reg = in_regs[i].first()->as_Register();
1716         switch (in_sig_bt[i]) {
1717           case T_BOOLEAN:
1718           case T_BYTE:
1719           case T_SHORT:
1720           case T_CHAR:
1721           case T_INT:
1722           // Fall through.
1723           case T_ARRAY:
1724           case T_LONG: double_slots++; break;
1725           default:  ShouldNotReachHere();
1726         }
1727       } else {
1728         if (in_regs[i].first()->is_FloatRegister()) {
1729           switch (in_sig_bt[i]) {
1730             case T_FLOAT:  single_slots++; break;
1731             case T_DOUBLE: double_slots++; break;
1732             default:  ShouldNotReachHere();
1733           }
1734         }
1735       }
1736     }  // for
1737     total_save_slots = double_slots * 2 + round_to(single_slots, 2); // Round to even.
1738   }
1739 
1740   int oop_handle_slot_offset = stack_slots;
1741   stack_slots += total_save_slots;                                        // 3)
1742 
1743   int klass_slot_offset = 0;
1744   int klass_offset      = -1;
1745   if (method_is_static && !is_critical_native) {                          // 4)
1746     klass_slot_offset  = stack_slots;
1747     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1748     stack_slots       += VMRegImpl::slots_per_word;
1749   }
1750 
1751   int lock_slot_offset = 0;
1752   int lock_offset      = -1;
1753   if (method->is_synchronized()) {                                        // 5)
1754     lock_slot_offset   = stack_slots;
1755     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1756     stack_slots       += VMRegImpl::slots_per_word;
1757   }
1758 
1759   int workspace_slot_offset= stack_slots;                                 // 6)
1760   stack_slots         += 2;
1761 
1762   // Now compute actual number of stack words we need.
1763   // Round to align stack properly.
1764   stack_slots = round_to(stack_slots,                                     // 7)
1765                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1766   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1767 
1768 
1769   ///////////////////////////////////////////////////////////////////////
1770   // Now we can start generating code
1771   ///////////////////////////////////////////////////////////////////////
1772 
1773   unsigned int wrapper_CodeStart  = __ offset();
1774   unsigned int wrapper_UEPStart;
1775   unsigned int wrapper_VEPStart;
1776   unsigned int wrapper_FrameDone;
1777   unsigned int wrapper_CRegsSet;
1778   Label     handle_pending_exception;
1779   Label     ic_miss;
1780 
1781   //---------------------------------------------------------------------
1782   // Unverified entry point (UEP)
1783   //---------------------------------------------------------------------
1784   wrapper_UEPStart = __ offset();
1785 
1786   // check ic: object class <-> cached class
1787   if (!method_is_static) __ nmethod_UEP(ic_miss);
1788   // Fill with nops (alignment of verified entry point).
1789   __ align(CodeEntryAlignment);
1790 
1791   //---------------------------------------------------------------------
1792   // Verified entry point (VEP)
1793   //---------------------------------------------------------------------
1794   wrapper_VEPStart = __ offset();
1795 
1796   __ save_return_pc();
1797   __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1798 #ifndef USE_RESIZE_FRAME
1799   __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1800 #else
1801   __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1802                                                           // Just resize the existing one.
1803 #endif
1804 
1805   wrapper_FrameDone = __ offset();
1806 
1807   __ verify_thread();
1808 
1809   // Native nmethod wrappers never take possession of the oop arguments.
1810   // So the caller will gc the arguments.
1811   // The only thing we need an oopMap for is if the call is static.
1812   //
1813   // An OopMap for lock (and class if static), and one for the VM call itself
1814   OopMapSet  *oop_maps        = new OopMapSet();
1815   OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1816 
1817   if (is_critical_native) {
1818     check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
1819                                        oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
1820   }
1821 
1822 
1823   //////////////////////////////////////////////////////////////////////
1824   //
1825   // The Grand Shuffle
1826   //
1827   //////////////////////////////////////////////////////////////////////
1828   //
1829   // We immediately shuffle the arguments so that for any vm call we have
1830   // to make from here on out (sync slow path, jvmti, etc.) we will have
1831   // captured the oops from our caller and have a valid oopMap for them.
1832   //
1833   //--------------------------------------------------------------------
1834   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1835   // (derived from JavaThread* which is in Z_thread) and, if static,
1836   // the class mirror instead of a receiver. This pretty much guarantees that
1837   // register layout will not match. We ignore these extra arguments during
1838   // the shuffle. The shuffle is described by the two calling convention
1839   // vectors we have in our possession. We simply walk the java vector to
1840   // get the source locations and the c vector to get the destinations.
1841   //
1842   // This is a trick. We double the stack slots so we can claim
1843   // the oops in the caller's frame. Since we are sure to have
1844   // more args than the caller doubling is enough to make
1845   // sure we can capture all the incoming oop args from the caller.
1846   //--------------------------------------------------------------------
1847 
1848   // Record sp-based slot for receiver on stack for non-static methods.
1849   int receiver_offset = -1;
1850 
1851   //--------------------------------------------------------------------
1852   // We move the arguments backwards because the floating point registers
1853   // destination will always be to a register with a greater or equal
1854   // register number or the stack.
1855   //   jix is the index of the incoming Java arguments.
1856   //   cix is the index of the outgoing C arguments.
1857   //--------------------------------------------------------------------
1858 
1859 #ifdef ASSERT
1860   bool reg_destroyed[RegisterImpl::number_of_registers];
1861   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1862   for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
1863     reg_destroyed[r] = false;
1864   }
1865   for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
1866     freg_destroyed[f] = false;
1867   }
1868 #endif // ASSERT
1869 
1870   for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1871 #ifdef ASSERT
1872     if (in_regs[jix].first()->is_Register()) {
1873       assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1874     } else {
1875       if (in_regs[jix].first()->is_FloatRegister()) {
1876         assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1877       }
1878     }
1879     if (out_regs[cix].first()->is_Register()) {
1880       reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1881     } else {
1882       if (out_regs[cix].first()->is_FloatRegister()) {
1883         freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1884       }
1885     }
1886 #endif // ASSERT
1887 
1888     switch (in_sig_bt[jix]) {
1889       // Due to casting, small integers should only occur in pairs with type T_LONG.
1890       case T_BOOLEAN:
1891       case T_CHAR:
1892       case T_BYTE:
1893       case T_SHORT:
1894       case T_INT:
1895         // Move int and do sign extension.
1896         move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1897         break;
1898 
1899       case T_LONG :
1900         long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1901         break;
1902 
1903       case T_ARRAY:
1904         if (is_critical_native) {
1905           int body_arg = cix;
1906           cix -= 2; // Point to length arg.
1907           unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots);
1908           break;
1909         }
1910         // else fallthrough
1911       case T_OBJECT:
1912         assert(!is_critical_native, "no oop arguments");
1913         object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1914                     ((jix == 0) && (!method_is_static)),
1915                     &receiver_offset);
1916         break;
1917       case T_VOID:
1918         break;
1919 
1920       case T_FLOAT:
1921         float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1922         break;
1923 
1924       case T_DOUBLE:
1925         assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1926         double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1927         break;
1928 
1929       case T_ADDRESS:
1930         assert(false, "found T_ADDRESS in java args");
1931         break;
1932 
1933       default:
1934         ShouldNotReachHere();
1935     }
1936   }
1937 
1938   //--------------------------------------------------------------------
1939   // Pre-load a static method's oop into ARG2.
1940   // Used both by locking code and the normal JNI call code.
1941   //--------------------------------------------------------------------
1942   if (method_is_static && !is_critical_native) {
1943     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1944 
1945     // Now handlize the static class mirror in ARG2. It's known not-null.
1946     __ z_stg(Z_ARG2, klass_offset, Z_SP);
1947     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1948     __ add2reg(Z_ARG2, klass_offset, Z_SP);
1949   }
1950 
1951   // Get JNIEnv* which is first argument to native.
1952   if (!is_critical_native) {
1953     __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1954   }
1955 
1956   //////////////////////////////////////////////////////////////////////
1957   // We have all of the arguments setup at this point.
1958   // We MUST NOT touch any outgoing regs from this point on.
1959   // So if we must call out we must push a new frame.
1960   //////////////////////////////////////////////////////////////////////
1961 
1962 
1963   // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1964   // Both values represent the same position.
1965   __ get_PC(Z_R10);                // PC into register
1966   wrapper_CRegsSet = __ offset();  // and into into variable.
1967 
1968   // Z_R10 now has the pc loaded that we will use when we finally call to native.
1969 
1970   // We use the same pc/oopMap repeatedly when we call out.
1971   oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
1972 
1973   // Lock a synchronized method.
1974 
1975   if (method->is_synchronized()) {
1976     assert(!is_critical_native, "unhandled");
1977 
1978     // ATTENTION: args and Z_R10 must be preserved.
1979     Register r_oop  = Z_R11;
1980     Register r_box  = Z_R12;
1981     Register r_tmp1 = Z_R13;
1982     Register r_tmp2 = Z_R7;
1983     Label done;
1984 
1985     // Load the oop for the object or class. R_carg2_classorobject contains
1986     // either the handlized oop from the incoming arguments or the handlized
1987     // class mirror (if the method is static).
1988     __ z_lg(r_oop, 0, Z_ARG2);
1989 
1990     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
1991     // Get the lock box slot's address.
1992     __ add2reg(r_box, lock_offset, Z_SP);
1993 
1994 #ifdef ASSERT
1995     if (UseBiasedLocking)
1996       // Making the box point to itself will make it clear it went unused
1997       // but also be obviously invalid.
1998       __ z_stg(r_box, 0, r_box);
1999 #endif // ASSERT
2000 
2001     // Try fastpath for locking.
2002     // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
2003     __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
2004     __ z_bre(done);
2005 
2006     //-------------------------------------------------------------------------
2007     // None of the above fast optimizations worked so we have to get into the
2008     // slow case of monitor enter. Inline a special case of call_VM that
2009     // disallows any pending_exception.
2010     //-------------------------------------------------------------------------
2011 
2012     Register oldSP = Z_R11;
2013 
2014     __ z_lgr(oldSP, Z_SP);
2015 
2016     RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2017 
2018     // Prepare arguments for call.
2019     __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
2020     __ add2reg(Z_ARG2, lock_offset, oldSP);
2021     __ z_lgr(Z_ARG3, Z_thread);
2022 
2023     __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
2024 
2025     // Do the call.
2026     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
2027     __ call(Z_R1_scratch);
2028 
2029     __ reset_last_Java_frame();
2030 
2031     RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2032 #ifdef ASSERT
2033     { Label L;
2034       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2035       __ z_bre(L);
2036       __ stop("no pending exception allowed on exit from IR::monitorenter");
2037       __ bind(L);
2038     }
2039 #endif
2040     __ bind(done);
2041   } // lock for synchronized methods
2042 
2043 
2044   //////////////////////////////////////////////////////////////////////
2045   // Finally just about ready to make the JNI call.
2046   //////////////////////////////////////////////////////////////////////
2047 
2048   // Use that pc we placed in Z_R10 a while back as the current frame anchor.
2049   __ set_last_Java_frame(Z_SP, Z_R10);
2050 
2051   // Transition from _thread_in_Java to _thread_in_native.
2052   __ set_thread_state(_thread_in_native);
2053 
2054 
2055   //////////////////////////////////////////////////////////////////////
2056   // This is the JNI call.
2057   //////////////////////////////////////////////////////////////////////
2058 
2059   __ call_c(native_func);
2060 
2061 
2062   //////////////////////////////////////////////////////////////////////
2063   // We have survived the call once we reach here.
2064   //////////////////////////////////////////////////////////////////////
2065 
2066 
2067   //--------------------------------------------------------------------
2068   // Unpack native results.
2069   //--------------------------------------------------------------------
2070   // For int-types, we do any needed sign-extension required.
2071   // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
2072   // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
2073   // blocking or unlocking.
2074   // An OOP result (handle) is done specially in the slow-path code.
2075   //--------------------------------------------------------------------
2076   switch (ret_type) {  //GLGLGL
2077     case T_VOID:    break;         // Nothing to do!
2078     case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
2079     case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
2080     case T_LONG:    break;         // Got it where we want it (unless slow-path)
2081     case T_OBJECT:  break;         // Really a handle.
2082                                    // Cannot de-handlize until after reclaiming jvm_lock.
2083     case T_ARRAY:   break;
2084 
2085     case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
2086       __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
2087       __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
2088       break;
2089     case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
2090     case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
2091     case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
2092     case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
2093 
2094     default:
2095       ShouldNotReachHere();
2096       break;
2097   }
2098 
2099 
2100   // Switch thread to "native transition" state before reading the synchronization state.
2101   // This additional state is necessary because reading and testing the synchronization
2102   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2103   //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2104   //   - VM thread changes sync state to synchronizing and suspends threads for GC.
2105   //   - Thread A is resumed to finish this native method, but doesn't block here since it
2106   //     didn't see any synchronization in progress, and escapes.
2107 
2108   // Transition from _thread_in_native to _thread_in_native_trans.
2109   __ set_thread_state(_thread_in_native_trans);
2110 
2111   // Safepoint synchronization
2112   //--------------------------------------------------------------------
2113   // Must we block?
2114   //--------------------------------------------------------------------
2115   // Block, if necessary, before resuming in _thread_in_Java state.
2116   // In order for GC to work, don't clear the last_Java_sp until after blocking.
2117   //--------------------------------------------------------------------
2118   Label after_transition;
2119   {
2120     Label no_block, sync;
2121 
2122     save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
2123 
2124     if (os::is_MP()) {
2125       if (UseMembar) {
2126         // Force this write out before the read below.
2127         __ z_fence();
2128       } else {
2129         // Write serialization page so VM thread can do a pseudo remote membar.
2130         // We use the current thread pointer to calculate a thread specific
2131         // offset to write to within the page. This minimizes bus traffic
2132         // due to cache line collision.
2133         __ serialize_memory(Z_thread, Z_R1, Z_R2);
2134       }
2135     }
2136     __ generate_safepoint_check(sync, Z_R1, true);
2137 
2138     __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
2139     __ z_bre(no_block);
2140 
2141     // Block. Save any potential method result value before the operation and
2142     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2143     // lets us share the oopMap we used when we went native rather than create
2144     // a distinct one for this pc.
2145     //
2146     __ bind(sync);
2147     __ z_acquire();
2148 
2149     address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
2150                                              : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2151 
2152     __ call_VM_leaf(entry_point, Z_thread);
2153 
2154     if (is_critical_native) {
2155       restore_native_result(masm, ret_type, workspace_slot_offset);
2156       __ z_bru(after_transition); // No thread state transition here.
2157     }
2158     __ bind(no_block);
2159     restore_native_result(masm, ret_type, workspace_slot_offset);
2160   }
2161 
2162   //--------------------------------------------------------------------
2163   // Thread state is thread_in_native_trans. Any safepoint blocking has
2164   // already happened so we can now change state to _thread_in_Java.
2165   //--------------------------------------------------------------------
2166   // Transition from _thread_in_native_trans to _thread_in_Java.
2167   __ set_thread_state(_thread_in_Java);
2168   __ bind(after_transition);
2169 
2170 
2171   //--------------------------------------------------------------------
2172   // Reguard any pages if necessary.
2173   // Protect native result from being destroyed.
2174   //--------------------------------------------------------------------
2175 
2176   Label no_reguard;
2177 
2178   __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)),
2179            JavaThread::stack_guard_yellow_reserved_disabled);
2180 
2181   __ z_bre(no_reguard);
2182 
2183   save_native_result(masm, ret_type, workspace_slot_offset);
2184   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
2185   restore_native_result(masm, ret_type, workspace_slot_offset);
2186 
2187   __ bind(no_reguard);
2188 
2189 
2190   // Synchronized methods (slow path only)
2191   // No pending exceptions for now.
2192   //--------------------------------------------------------------------
2193   // Handle possibly pending exception (will unlock if necessary).
2194   // Native result is, if any is live, in Z_FRES or Z_RES.
2195   //--------------------------------------------------------------------
2196   // Unlock
2197   //--------------------------------------------------------------------
2198   if (method->is_synchronized()) {
2199     const Register r_oop        = Z_R11;
2200     const Register r_box        = Z_R12;
2201     const Register r_tmp1       = Z_R13;
2202     const Register r_tmp2       = Z_R7;
2203     Label done;
2204 
2205     // Get unboxed oop of class mirror or object ...
2206     int   offset = method_is_static ? klass_offset : receiver_offset;
2207 
2208     assert(offset != -1, "");
2209     __ z_lg(r_oop, offset, Z_SP);
2210 
2211     // ... and address of lock object box.
2212     __ add2reg(r_box, lock_offset, Z_SP);
2213 
2214     // Try fastpath for unlocking.
2215     __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
2216     __ z_bre(done);
2217 
2218     // Slow path for unlocking.
2219     // Save and restore any potential method result value around the unlocking operation.
2220     const Register R_exc = Z_R11;
2221 
2222     save_native_result(masm, ret_type, workspace_slot_offset);
2223 
2224     // Must save pending exception around the slow-path VM call. Since it's a
2225     // leaf call, the pending exception (if any) can be kept in a register.
2226     __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2227     assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
2228 
2229     // Must clear pending-exception before re-entering the VM. Since this is
2230     // a leaf call, pending-exception-oop can be safely kept in a register.
2231     __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
2232 
2233     // Inline a special case of call_VM that disallows any pending_exception.
2234 
2235     // Get locked oop from the handle we passed to jni.
2236     __ z_lg(Z_ARG1, offset, Z_SP);
2237     __ add2reg(Z_ARG2, lock_offset, Z_SP);
2238     __ z_lgr(Z_ARG3, Z_thread);
2239 
2240     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
2241 
2242     __ call(Z_R1_scratch);
2243 
2244 #ifdef ASSERT
2245     {
2246       Label L;
2247       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2248       __ z_bre(L);
2249       __ stop("no pending exception allowed on exit from IR::monitorexit");
2250       __ bind(L);
2251     }
2252 #endif
2253 
2254     // Check_forward_pending_exception jump to forward_exception if any pending
2255     // exception is set. The forward_exception routine expects to see the
2256     // exception in pending_exception and not in a register. Kind of clumsy,
2257     // since all folks who branch to forward_exception must have tested
2258     // pending_exception first and hence have it in a register already.
2259     __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2260     restore_native_result(masm, ret_type, workspace_slot_offset);
2261     __ z_bru(done);
2262     __ z_illtrap(0x66);
2263 
2264     __ bind(done);
2265   }
2266 
2267 
2268   //--------------------------------------------------------------------
2269   // Clear "last Java frame" SP and PC.
2270   //--------------------------------------------------------------------
2271   __ verify_thread(); // Z_thread must be correct.
2272 
2273   __ reset_last_Java_frame();
2274 
2275   // Unpack oop result
2276   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2277     NearLabel L;
2278     __ compare64_and_branch(Z_RET, (RegisterOrConstant)0L, Assembler::bcondEqual, L);
2279     __ z_lg(Z_RET, 0, Z_RET);
2280     __ bind(L);
2281     __ verify_oop(Z_RET);
2282   }
2283 
2284   if (CheckJNICalls) {
2285     // clear_pending_jni_exception_check
2286     __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
2287   }
2288 
2289   // Reset handle block.
2290   if (!is_critical_native) {
2291     __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
2292     __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
2293 
2294     // Check for pending exceptions.
2295     __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2296     __ z_brne(handle_pending_exception);
2297   }
2298 
2299 
2300   //////////////////////////////////////////////////////////////////////
2301   // Return
2302   //////////////////////////////////////////////////////////////////////
2303 
2304 
2305 #ifndef USE_RESIZE_FRAME
2306   __ pop_frame();                     // Pop wrapper frame.
2307 #else
2308   __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
2309 #endif
2310   __ restore_return_pc();             // This is the way back to the caller.
2311   __ z_br(Z_R14);
2312 
2313 
2314   //////////////////////////////////////////////////////////////////////
2315   // Out-of-line calls to the runtime.
2316   //////////////////////////////////////////////////////////////////////
2317 
2318 
2319   if (!is_critical_native) {
2320 
2321     //---------------------------------------------------------------------
2322     // Handler for pending exceptions (out-of-line).
2323     //---------------------------------------------------------------------
2324     // Since this is a native call, we know the proper exception handler
2325     // is the empty function. We just pop this frame and then jump to
2326     // forward_exception_entry. Z_R14 will contain the native caller's
2327     // return PC.
2328     __ bind(handle_pending_exception);
2329     __ pop_frame();
2330     __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2331     __ restore_return_pc();
2332     __ z_br(Z_R1_scratch);
2333 
2334     //---------------------------------------------------------------------
2335     // Handler for a cache miss (out-of-line)
2336     //---------------------------------------------------------------------
2337     __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2338   }
2339   __ flush();
2340 
2341 
2342   //////////////////////////////////////////////////////////////////////
2343   // end of code generation
2344   //////////////////////////////////////////////////////////////////////
2345 
2346 
2347   nmethod *nm = nmethod::new_native_nmethod(method,
2348                                             compile_id,
2349                                             masm->code(),
2350                                             (int)(wrapper_VEPStart-wrapper_CodeStart),
2351                                             (int)(wrapper_FrameDone-wrapper_CodeStart),
2352                                             stack_slots / VMRegImpl::slots_per_word,
2353                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2354                                             in_ByteSize(lock_offset),
2355                                             oop_maps);
2356 
2357   if (is_critical_native) {
2358     nm->set_lazy_critical_native(true);
2359   }
2360 
2361   return nm;
2362 #else
2363   ShouldNotReachHere();
2364   return NULL;
2365 #endif // COMPILER2
2366 }
2367 
2368 static address gen_c2i_adapter(MacroAssembler  *masm,
2369                                int total_args_passed,
2370                                int comp_args_on_stack,
2371                                const BasicType *sig_bt,
2372                                const VMRegPair *regs,
2373                                Label &skip_fixup) {
2374   // Before we get into the guts of the C2I adapter, see if we should be here
2375   // at all. We've come from compiled code and are attempting to jump to the
2376   // interpreter, which means the caller made a static call to get here
2377   // (vcalls always get a compiled target if there is one). Check for a
2378   // compiled target. If there is one, we need to patch the caller's call.
2379 
2380   // These two defs MUST MATCH code in gen_i2c2i_adapter!
2381   const Register ientry = Z_R11;
2382   const Register code   = Z_R11;
2383 
2384   address c2i_entrypoint;
2385   Label   patch_callsite;
2386 
2387   // Regular (verified) c2i entry point.
2388   c2i_entrypoint = __ pc();
2389 
2390   // Call patching needed?
2391   __ load_and_test_long(Z_R0_scratch, method_(code));
2392   __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2393   __ z_brne(patch_callsite);                    // Patch required if code != NULL (compiled target exists).
2394 
2395   __ bind(skip_fixup);  // Return point from patch_callsite.
2396 
2397   // Since all args are passed on the stack, total_args_passed*wordSize is the
2398   // space we need. We need ABI scratch area but we use the caller's since
2399   // it has already been allocated.
2400 
2401   const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2402   int       extraspace  = round_to(total_args_passed, 2)*wordSize + abi_scratch;
2403   Register  sender_SP   = Z_R10;
2404   Register  value       = Z_R12;
2405 
2406   // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2407   // In addition, frame manager expects initial_caller_sp in Z_R10.
2408   __ z_lgr(sender_SP, Z_SP);
2409 
2410   // This should always fit in 14 bit immediate.
2411   __ resize_frame(-extraspace, Z_R0_scratch);
2412 
2413   // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2414   // args. This essentially moves the callers ABI scratch area from the top to the
2415   // bottom of the arg area.
2416 
2417   int st_off =  extraspace - wordSize;
2418 
2419   // Now write the args into the outgoing interpreter space.
2420   for (int i = 0; i < total_args_passed; i++) {
2421     VMReg r_1 = regs[i].first();
2422     VMReg r_2 = regs[i].second();
2423     if (!r_1->is_valid()) {
2424       assert(!r_2->is_valid(), "");
2425       continue;
2426     }
2427     if (r_1->is_stack()) {
2428       // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2429       // We must account for it here.
2430       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2431 
2432       if (!r_2->is_valid()) {
2433         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2434       } else {
2435         // longs are given 2 64-bit slots in the interpreter,
2436         // but the data is passed in only 1 slot.
2437         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2438 #ifdef ASSERT
2439           __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2440 #endif
2441           st_off -= wordSize;
2442         }
2443         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2444       }
2445     } else {
2446       if (r_1->is_Register()) {
2447         if (!r_2->is_valid()) {
2448           __ z_st(r_1->as_Register(), st_off, Z_SP);
2449         } else {
2450           // longs are given 2 64-bit slots in the interpreter, but the
2451           // data is passed in only 1 slot.
2452           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2453 #ifdef ASSERT
2454             __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2455 #endif
2456             st_off -= wordSize;
2457           }
2458           __ z_stg(r_1->as_Register(), st_off, Z_SP);
2459         }
2460       } else {
2461         assert(r_1->is_FloatRegister(), "");
2462         if (!r_2->is_valid()) {
2463           __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2464         } else {
2465           // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2466           // data is passed in only 1 slot.
2467           // One of these should get known junk...
2468 #ifdef ASSERT
2469           __ z_lzdr(Z_F1);
2470           __ z_std(Z_F1, st_off, Z_SP);
2471 #endif
2472           st_off-=wordSize;
2473           __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2474         }
2475       }
2476     }
2477     st_off -= wordSize;
2478   }
2479 
2480 
2481   // Jump to the interpreter just as if interpreter was doing it.
2482   __ add2reg(Z_esp, st_off, Z_SP);
2483 
2484   // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2485   __ z_br(ientry);
2486 
2487 
2488   // Prevent illegal entry to out-of-line code.
2489   __ z_illtrap(0x22);
2490 
2491   // Generate out-of-line runtime call to patch caller,
2492   // then continue as interpreted.
2493 
2494   // IF you lose the race you go interpreted.
2495   // We don't see any possible endless c2i -> i2c -> c2i ...
2496   // transitions no matter how rare.
2497   __ bind(patch_callsite);
2498 
2499   RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2500   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2501   RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2502   __ z_bru(skip_fixup);
2503 
2504   // end of out-of-line code
2505 
2506   return c2i_entrypoint;
2507 }
2508 
2509 // On entry, the following registers are set
2510 //
2511 //    Z_thread  r8  - JavaThread*
2512 //    Z_method  r9  - callee's method (method to be invoked)
2513 //    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2514 //    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2515 //
2516 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2517                                     int total_args_passed,
2518                                     int comp_args_on_stack,
2519                                     const BasicType *sig_bt,
2520                                     const VMRegPair *regs) {
2521   const Register value = Z_R12;
2522   const Register ld_ptr= Z_esp;
2523 
2524   int ld_offset = total_args_passed * wordSize;
2525 
2526   // Cut-out for having no stack args.
2527   if (comp_args_on_stack) {
2528     // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2529     // registers are below. By subtracting stack0, we either get a negative
2530     // number (all values in registers) or the maximum stack slot accessed.
2531     // Convert VMRegImpl (4 byte) stack slots to words.
2532     int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2533     // Round up to miminum stack alignment, in wordSize
2534     comp_words_on_stack = round_to(comp_words_on_stack, 2);
2535 
2536     __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2537   }
2538 
2539   // Now generate the shuffle code. Pick up all register args and move the
2540   // rest through register value=Z_R12.
2541   for (int i = 0; i < total_args_passed; i++) {
2542     if (sig_bt[i] == T_VOID) {
2543       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2544       continue;
2545     }
2546 
2547     // Pick up 0, 1 or 2 words from ld_ptr.
2548     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2549            "scrambled load targets?");
2550     VMReg r_1 = regs[i].first();
2551     VMReg r_2 = regs[i].second();
2552     if (!r_1->is_valid()) {
2553       assert(!r_2->is_valid(), "");
2554       continue;
2555     }
2556     if (r_1->is_FloatRegister()) {
2557       if (!r_2->is_valid()) {
2558         __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2559         ld_offset-=wordSize;
2560       } else {
2561         // Skip the unused interpreter slot.
2562         __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2563         ld_offset -= 2 * wordSize;
2564       }
2565     } else {
2566       if (r_1->is_stack()) {
2567         // Must do a memory to memory move.
2568         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2569 
2570         if (!r_2->is_valid()) {
2571           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2572         } else {
2573           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2574           // data is passed in only 1 slot.
2575           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2576             ld_offset -= wordSize;
2577           }
2578           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2579         }
2580       } else {
2581         if (!r_2->is_valid()) {
2582           // Not sure we need to do this but it shouldn't hurt.
2583           if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
2584             __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2585           } else {
2586             __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2587           }
2588         } else {
2589           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2590           // data is passed in only 1 slot.
2591           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2592             ld_offset -= wordSize;
2593           }
2594           __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2595         }
2596       }
2597       ld_offset -= wordSize;
2598     }
2599   }
2600 
2601   // Jump to the compiled code just as if compiled code was doing it.
2602   // load target address from method oop:
2603   __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2604 
2605   // Store method oop into thread->callee_target.
2606   // 6243940: We might end up in handle_wrong_method if
2607   // the callee is deoptimized as we race thru here. If that
2608   // happens we don't want to take a safepoint because the
2609   // caller frame will look interpreted and arguments are now
2610   // "compiled" so it is much better to make this transition
2611   // invisible to the stack walking code. Unfortunately, if
2612   // we try and find the callee by normal means a safepoint
2613   // is possible. So we stash the desired callee in the thread
2614   // and the vm will find it there should this case occur.
2615   __ z_stg(Z_method, thread_(callee_target));
2616 
2617   __ z_br(Z_R1_scratch);
2618 }
2619 
2620 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2621                                                             int total_args_passed,
2622                                                             int comp_args_on_stack,
2623                                                             const BasicType *sig_bt,
2624                                                             const VMRegPair *regs,
2625                                                             AdapterFingerPrint* fingerprint) {
2626   __ align(CodeEntryAlignment);
2627   address i2c_entry = __ pc();
2628   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2629 
2630   address c2i_unverified_entry;
2631 
2632   Label skip_fixup;
2633   {
2634     Label ic_miss;
2635     const int klass_offset         = oopDesc::klass_offset_in_bytes();
2636     const int holder_klass_offset  = CompiledICHolder::holder_klass_offset();
2637     const int holder_method_offset = CompiledICHolder::holder_method_offset();
2638 
2639     // Out-of-line call to ic_miss handler.
2640     __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2641 
2642     // Unverified Entry Point UEP
2643     __ align(CodeEntryAlignment);
2644     c2i_unverified_entry = __ pc();
2645 
2646     // Check the pointers.
2647     if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2648       __ z_ltgr(Z_ARG1, Z_ARG1);
2649       __ z_bre(ic_miss);
2650     }
2651     __ verify_oop(Z_ARG1);
2652 
2653     // Check ic: object class <-> cached class
2654     // Compress cached class for comparison. That's more efficient.
2655     if (UseCompressedClassPointers) {
2656       __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
2657       __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2658     } else {
2659       __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2660     }
2661     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2662 
2663     // This def MUST MATCH code in gen_c2i_adapter!
2664     const Register code = Z_R11;
2665 
2666     __ z_lg(Z_method, holder_method_offset, Z_method);
2667     __ load_and_test_long(Z_R0, method_(code));
2668     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2669 
2670     // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2671   }
2672 
2673   address c2i_entry;
2674   c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2675 
2676   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
2677 }
2678 
2679 // This function returns the adjust size (in number of words) to a c2i adapter
2680 // activation for use during deoptimization.
2681 //
2682 // Actually only compiled frames need to be adjusted, but it
2683 // doesn't harm to adjust entry and interpreter frames, too.
2684 //
2685 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2686   assert(callee_locals >= callee_parameters,
2687           "test and remove; got more parms than locals");
2688   // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2689   return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2690          frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2691 }
2692 
2693 uint SharedRuntime::out_preserve_stack_slots() {
2694   return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2695 }
2696 
2697 //
2698 // Frame generation for deopt and uncommon trap blobs.
2699 //
2700 static void push_skeleton_frame(MacroAssembler* masm,
2701                           /* Unchanged */
2702                           Register frame_sizes_reg,
2703                           Register pcs_reg,
2704                           /* Invalidate */
2705                           Register frame_size_reg,
2706                           Register pc_reg) {
2707   BLOCK_COMMENT("  push_skeleton_frame {");
2708    __ z_lg(pc_reg, 0, pcs_reg);
2709    __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2710    __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2711    Register fp = pc_reg;
2712    __ push_frame(frame_size_reg, fp);
2713 #ifdef ASSERT
2714    // The magic is required for successful walking skeletal frames.
2715    __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2716    __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2717    // Fill other slots that are supposedly not necessary with eye catchers.
2718    __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2719    __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2720    // The sender_sp of the bottom frame is set before pushing it.
2721    // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2722    // is unknown here. Luckily it is not needed before filling the frame in
2723    // layout_activation(), we assert this by setting an eye catcher (see
2724    // comments on sender_sp in frame_s390.hpp).
2725    __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2726 #endif // ASSERT
2727   BLOCK_COMMENT("  } push_skeleton_frame");
2728 }
2729 
2730 // Loop through the UnrollBlock info and create new frames.
2731 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2732                             /* read */
2733                             Register unroll_block_reg,
2734                             /* invalidate */
2735                             Register frame_sizes_reg,
2736                             Register number_of_frames_reg,
2737                             Register pcs_reg,
2738                             Register tmp1,
2739                             Register tmp2) {
2740   BLOCK_COMMENT("push_skeleton_frames {");
2741   // _number_of_frames is of type int (deoptimization.hpp).
2742   __ z_lgf(number_of_frames_reg,
2743            Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2744   __ z_lg(pcs_reg,
2745           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2746   __ z_lg(frame_sizes_reg,
2747           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2748 
2749   // stack: (caller_of_deoptee, ...).
2750 
2751   // If caller_of_deoptee is a compiled frame, then we extend it to make
2752   // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2753   // See also Deoptimization::last_frame_adjust() above.
2754   // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2755 
2756   __ z_lgf(Z_R1_scratch,
2757            Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2758   __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2759   __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2760   // The oldest skeletal frame requires a valid sender_sp to make it walkable
2761   // (it is required to find the original pc of caller_of_deoptee if it is marked
2762   // for deoptimization - see nmethod::orig_pc_addr()).
2763   __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2764 
2765   // Now push the new interpreter frames.
2766   Label loop, loop_entry;
2767 
2768   // Make sure that there is at least one entry in the array.
2769   DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2770   __ asm_assert_ne("array_size must be > 0", 0x205);
2771 
2772   __ z_bru(loop_entry);
2773 
2774   __ bind(loop);
2775 
2776   __ add2reg(frame_sizes_reg, wordSize);
2777   __ add2reg(pcs_reg, wordSize);
2778 
2779   __ bind(loop_entry);
2780 
2781   // Allocate a new frame, fill in the pc.
2782   push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2783 
2784   __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2785   __ z_brne(loop);
2786 
2787   // Set the top frame's return pc.
2788   __ add2reg(pcs_reg, wordSize);
2789   __ z_lg(Z_R0_scratch, 0, pcs_reg);
2790   __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2791   BLOCK_COMMENT("} push_skeleton_frames");
2792 }
2793 
2794 //------------------------------generate_deopt_blob----------------------------
2795 void SharedRuntime::generate_deopt_blob() {
2796   // Allocate space for the code.
2797   ResourceMark rm;
2798   // Setup code generation tools.
2799   CodeBuffer buffer("deopt_blob", 2048, 1024);
2800   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2801   Label exec_mode_initialized;
2802   OopMap* map = NULL;
2803   OopMapSet *oop_maps = new OopMapSet();
2804 
2805   unsigned int start_off = __ offset();
2806   Label cont;
2807 
2808   // --------------------------------------------------------------------------
2809   // Normal entry (non-exception case)
2810   //
2811   // We have been called from the deopt handler of the deoptee.
2812   // Z_R14 points behind the call in the deopt handler. We adjust
2813   // it such that it points to the start of the deopt handler.
2814   // The return_pc has been stored in the frame of the deoptee and
2815   // will replace the address of the deopt_handler in the call
2816   // to Deoptimization::fetch_unroll_info below.
2817   // The (int) cast is necessary, because -((unsigned int)14)
2818   // is an unsigned int.
2819   __ add2reg(Z_R14, -(int)HandlerImpl::size_deopt_handler());
2820 
2821   const Register   exec_mode_reg = Z_tmp_1;
2822 
2823   // stack: (deoptee, caller of deoptee, ...)
2824 
2825   // pushes an "unpack" frame
2826   // R14 contains the return address pointing into the deoptimized
2827   // nmethod that was valid just before the nmethod was deoptimized.
2828   // save R14 into the deoptee frame.  the `fetch_unroll_info'
2829   // procedure called below will read it from there.
2830   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2831 
2832   // note the entry point.
2833   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2834   __ z_bru(exec_mode_initialized);
2835 
2836 #ifndef COMPILER1
2837   int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2838 #else
2839   // --------------------------------------------------------------------------
2840   // Reexecute entry
2841   // - Z_R14 = Deopt Handler in nmethod
2842 
2843   int reexecute_offset = __ offset() - start_off;
2844 
2845   // No need to update map as each call to save_live_registers will produce identical oopmap
2846   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2847 
2848   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2849   __ z_bru(exec_mode_initialized);
2850 #endif
2851 
2852 
2853   // --------------------------------------------------------------------------
2854   // Exception entry. We reached here via a branch. Registers on entry:
2855   // - Z_EXC_OOP (Z_ARG1) = exception oop
2856   // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2857 
2858   int exception_offset = __ offset() - start_off;
2859 
2860   // all registers are dead at this entry point, except for Z_EXC_OOP, and
2861   // Z_EXC_PC which contain the exception oop and exception pc
2862   // respectively.  Set them in TLS and fall thru to the
2863   // unpack_with_exception_in_tls entry point.
2864 
2865   // Store exception oop and pc in thread (location known to GC).
2866   // Need this since the call to "fetch_unroll_info()" may safepoint.
2867   __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2868   __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2869 
2870   // fall through
2871 
2872   int exception_in_tls_offset = __ offset() - start_off;
2873 
2874   // new implementation because exception oop is now passed in JavaThread
2875 
2876   // Prolog for exception case
2877   // All registers must be preserved because they might be used by LinearScan
2878   // Exceptiop oop and throwing PC are passed in JavaThread
2879 
2880   // load throwing pc from JavaThread and us it as the return address of the current frame.
2881   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2882 
2883   // Save everything in sight.
2884   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2885 
2886   // Now it is safe to overwrite any register
2887 
2888   // Clear the exception pc field in JavaThread
2889   __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2890 
2891   // Deopt during an exception.  Save exec mode for unpack_frames.
2892   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2893 
2894 
2895 #ifdef ASSERT
2896   // verify that there is really an exception oop in JavaThread
2897   __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2898   __ verify_oop(Z_ARG1);
2899 
2900   // verify that there is no pending exception
2901   __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2902                              "must not have pending exception here", __LINE__);
2903 #endif
2904 
2905   // --------------------------------------------------------------------------
2906   // At this point, the live registers are saved and
2907   // the exec_mode_reg has been set up correctly.
2908   __ bind(exec_mode_initialized);
2909 
2910   // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2911 
2912   {
2913   const Register unroll_block_reg  = Z_tmp_2;
2914 
2915   // we need to set `last_Java_frame' because `fetch_unroll_info' will
2916   // call `last_Java_frame()'.  however we can't block and no gc will
2917   // occur so we don't need an oopmap. the value of the pc in the
2918   // frame is not particularly important.  it just needs to identify the blob.
2919 
2920   // Don't set last_Java_pc anymore here (is implicitly NULL then).
2921   // the correct PC is retrieved in pd_last_frame() in that case.
2922   __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2923   // With EscapeAnalysis turned on, this call may safepoint
2924   // despite it's marked as "leaf call"!
2925   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2926   // Set an oopmap for the call site this describes all our saved volatile registers
2927   int offs = __ offset();
2928   oop_maps->add_gc_map(offs, map);
2929 
2930   __ reset_last_Java_frame();
2931   // save the return value.
2932   __ z_lgr(unroll_block_reg, Z_RET);
2933   // restore the return registers that have been saved
2934   // (among other registers) by save_live_registers(...).
2935   RegisterSaver::restore_result_registers(masm);
2936 
2937   // reload the exec mode from the UnrollBlock (it might have changed)
2938   __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2939 
2940   // In excp_deopt_mode, restore and clear exception oop which we
2941   // stored in the thread during exception entry above. The exception
2942   // oop will be the return value of this stub.
2943   NearLabel skip_restore_excp;
2944   __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2945   __ z_lg(Z_RET, thread_(exception_oop));
2946   __ clear_mem(thread_(exception_oop), 8);
2947   __ bind(skip_restore_excp);
2948 
2949   // remove the "unpack" frame
2950   __ pop_frame();
2951 
2952   // stack: (deoptee, caller of deoptee, ...).
2953 
2954   // pop the deoptee's frame
2955   __ pop_frame();
2956 
2957   // stack: (caller_of_deoptee, ...).
2958 
2959   // loop through the `UnrollBlock' info and create interpreter frames.
2960   push_skeleton_frames(masm, true/*deopt*/,
2961                   unroll_block_reg,
2962                   Z_tmp_3,
2963                   Z_tmp_4,
2964                   Z_ARG5,
2965                   Z_ARG4,
2966                   Z_ARG3);
2967 
2968   // stack: (skeletal interpreter frame, ..., optional skeletal
2969   // interpreter frame, caller of deoptee, ...).
2970   }
2971 
2972   // push an "unpack" frame taking care of float / int return values.
2973   __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
2974 
2975   // stack: (unpack frame, skeletal interpreter frame, ..., optional
2976   // skeletal interpreter frame, caller of deoptee, ...).
2977 
2978   // spill live volatile registers since we'll do a call.
2979   __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2980   __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2981 
2982   // let the unpacker layout information in the skeletal frames just allocated.
2983   __ get_PC(Z_RET);
2984   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
2985   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
2986                   Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
2987 
2988   __ reset_last_Java_frame();
2989 
2990   // restore the volatiles saved above.
2991   __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2992   __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2993 
2994   // pop the "unpack" frame.
2995   __ pop_frame();
2996   __ restore_return_pc();
2997 
2998   // stack: (top interpreter frame, ..., optional interpreter frame,
2999   // caller of deoptee, ...).
3000 
3001   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3002   __ restore_bcp();
3003   __ restore_locals();
3004   __ restore_esp();
3005 
3006   // return to the interpreter entry point.
3007   __ z_br(Z_R14);
3008 
3009   // Make sure all code is generated
3010   masm->flush();
3011 
3012   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3013   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3014 }
3015 
3016 
3017 #ifdef COMPILER2
3018 //------------------------------generate_uncommon_trap_blob--------------------
3019 void SharedRuntime::generate_uncommon_trap_blob() {
3020   // Allocate space for the code
3021   ResourceMark rm;
3022   // Setup code generation tools
3023   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3024   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3025 
3026   Register unroll_block_reg = Z_tmp_1;
3027   Register klass_index_reg  = Z_ARG2;
3028   Register unc_trap_reg     = Z_ARG2;
3029 
3030   // stack: (deoptee, caller_of_deoptee, ...).
3031 
3032   // push a dummy "unpack" frame and call
3033   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3034   // vframe array and return the `UnrollBlock' information.
3035 
3036   // save R14 to compiled frame.
3037   __ save_return_pc();
3038   // push the "unpack_frame".
3039   __ push_frame_abi160(0);
3040 
3041   // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
3042 
3043   // set the "unpack" frame as last_Java_frame.
3044   // `Deoptimization::uncommon_trap' expects it and considers its
3045   // sender frame as the deoptee frame.
3046   __ get_PC(Z_R1_scratch);
3047   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3048 
3049   __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
3050   __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
3051   BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
3052   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
3053 
3054   __ reset_last_Java_frame();
3055 
3056   // pop the "unpack" frame
3057   __ pop_frame();
3058 
3059   // stack: (deoptee, caller_of_deoptee, ...).
3060 
3061   // save the return value.
3062   __ z_lgr(unroll_block_reg, Z_RET);
3063 
3064   // pop the deoptee frame.
3065   __ pop_frame();
3066 
3067   // stack: (caller_of_deoptee, ...).
3068 
3069 #ifdef ASSERT
3070   assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
3071   assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
3072   const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
3073 #ifndef VM_LITTLE_ENDIAN
3074   + 3
3075 #endif
3076   ;
3077   if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
3078     __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3079   } else {
3080     __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3081   }
3082   __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
3083 #endif
3084 
3085   __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
3086 
3087   // allocate new interpreter frame(s) and possibly resize the caller's frame
3088   // (no more adapters !)
3089   push_skeleton_frames(masm, false/*deopt*/,
3090                   unroll_block_reg,
3091                   Z_tmp_2,
3092                   Z_tmp_3,
3093                   Z_tmp_4,
3094                   Z_ARG5,
3095                   Z_ARG4);
3096 
3097   // stack: (skeletal interpreter frame, ..., optional skeletal
3098   // interpreter frame, (resized) caller of deoptee, ...).
3099 
3100   // push a dummy "unpack" frame taking care of float return values.
3101   // call `Deoptimization::unpack_frames' to layout information in the
3102   // interpreter frames just created
3103 
3104   // push the "unpack" frame
3105    const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
3106 
3107   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3108   // skeletal interpreter frame, (resized) caller of deoptee, ...).
3109 
3110   // set the "unpack" frame as last_Java_frame
3111   __ get_PC(Z_R1_scratch);
3112   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3113 
3114   // indicate it is the uncommon trap case
3115   BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
3116   __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3117   // let the unpacker layout information in the skeletal frames just allocated.
3118   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
3119 
3120   __ reset_last_Java_frame();
3121   // pop the "unpack" frame
3122   __ pop_frame();
3123   // restore LR from top interpreter frame
3124   __ restore_return_pc();
3125 
3126   // stack: (top interpreter frame, ..., optional interpreter frame,
3127   // (resized) caller of deoptee, ...).
3128 
3129   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3130   __ restore_bcp();
3131   __ restore_locals();
3132   __ restore_esp();
3133 
3134   // return to the interpreter entry point
3135   __ z_br(Z_R14);
3136 
3137   masm->flush();
3138   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
3139 }
3140 #endif // COMPILER2
3141 
3142 
3143 //------------------------------generate_handler_blob------
3144 //
3145 // Generate a special Compile2Runtime blob that saves all registers,
3146 // and setup oopmap.
3147 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
3148   assert(StubRoutines::forward_exception_entry() != NULL,
3149          "must be generated before");
3150 
3151   ResourceMark rm;
3152   OopMapSet *oop_maps = new OopMapSet();
3153   OopMap* map;
3154 
3155   // Allocate space for the code. Setup code generation tools.
3156   CodeBuffer buffer("handler_blob", 2048, 1024);
3157   MacroAssembler* masm = new MacroAssembler(&buffer);
3158 
3159   unsigned int start_off = __ offset();
3160   address call_pc = NULL;
3161   int frame_size_in_bytes;
3162 
3163   bool cause_return = (poll_type == POLL_AT_RETURN);
3164   // Make room for return address (or push it again)
3165   if (!cause_return)
3166     __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
3167 
3168   // Save registers, fpu state, and flags
3169   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3170 
3171   // The following is basically a call_VM. However, we need the precise
3172   // address of the call in order to generate an oopmap. Hence, we do all the
3173   // work outselves.
3174   __ set_last_Java_frame(Z_SP, noreg);
3175 
3176   // call into the runtime to handle the safepoint poll
3177   __ call_VM_leaf(call_ptr, Z_thread);
3178 
3179 
3180   // Set an oopmap for the call site. This oopmap will map all
3181   // oop-registers and debug-info registers as callee-saved. This
3182   // will allow deoptimization at this safepoint to find all possible
3183   // debug-info recordings, as well as let GC find all oops.
3184 
3185   oop_maps->add_gc_map((int)(__ offset()-start_off), map);
3186 
3187   Label noException;
3188 
3189   __ reset_last_Java_frame();
3190 
3191   __ load_and_test_long(Z_R1, thread_(pending_exception));
3192   __ z_bre(noException);
3193 
3194   // Pending exception case, used (sporadically) by
3195   // api/java_lang/Thread.State/index#ThreadState et al.
3196   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3197 
3198   // Jump to forward_exception_entry, with the issuing PC in Z_R14
3199   // so it looks like the original nmethod called forward_exception_entry.
3200   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3201   __ z_br(Z_R1_scratch);
3202 
3203   // No exception case
3204   __ bind(noException);
3205 
3206   // Normal exit, restore registers and exit.
3207   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3208 
3209   __ z_br(Z_R14);
3210 
3211   // Make sure all code is generated
3212   masm->flush();
3213 
3214   // Fill-out other meta info
3215   return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3216 }
3217 
3218 
3219 //
3220 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3221 //
3222 // Generate a stub that calls into vm to find out the proper destination
3223 // of a Java call. All the argument registers are live at this point
3224 // but since this is generic code we don't know what they are and the caller
3225 // must do any gc of the args.
3226 //
3227 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3228   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3229 
3230   // allocate space for the code
3231   ResourceMark rm;
3232 
3233   CodeBuffer buffer(name, 1000, 512);
3234   MacroAssembler* masm                = new MacroAssembler(&buffer);
3235 
3236   OopMapSet *oop_maps = new OopMapSet();
3237   OopMap* map = NULL;
3238 
3239   unsigned int start_off = __ offset();
3240 
3241   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3242 
3243   // We must save a PC from within the stub as return PC
3244   // C code doesn't store the LR where we expect the PC,
3245   // so we would run into trouble upon stack walking.
3246   __ get_PC(Z_R1_scratch);
3247 
3248   unsigned int frame_complete = __ offset();
3249 
3250   __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
3251 
3252   __ call_VM_leaf(destination, Z_thread, Z_method);
3253 
3254 
3255   // Set an oopmap for the call site.
3256   // We need this not only for callee-saved registers, but also for volatile
3257   // registers that the compiler might be keeping live across a safepoint.
3258 
3259   oop_maps->add_gc_map((int)(frame_complete-start_off), map);
3260 
3261   // clear last_Java_sp
3262   __ reset_last_Java_frame();
3263 
3264   // check for pending exceptions
3265   Label pending;
3266   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3267   __ z_brne(pending);
3268 
3269   __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3270   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3271 
3272   // get the returned method
3273   __ get_vm_result_2(Z_method);
3274 
3275   // We are back the the original state on entry and ready to go.
3276   __ z_br(Z_R1_scratch);
3277 
3278   // Pending exception after the safepoint
3279 
3280   __ bind(pending);
3281 
3282   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3283 
3284   // exception pending => remove activation and forward to exception handler
3285 
3286   __ z_lgr(Z_R2, Z_R0); // pending_exception
3287   __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3288   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3289   __ z_br(Z_R1_scratch);
3290 
3291   // -------------
3292   // make sure all code is generated
3293   masm->flush();
3294 
3295   // return the blob
3296   // frame_size_words or bytes??
3297   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3298                                        oop_maps, true);
3299 
3300 }
3301 
3302 //------------------------------Montgomery multiplication------------------------
3303 //
3304 
3305 // Subtract 0:b from carry:a. Return carry.
3306 static unsigned long
3307 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3308   unsigned long i, c = 8 * (unsigned long)(len - 1);
3309   __asm__ __volatile__ (
3310     "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3311     "LGHI   0, 8               \n" // index increment (for BRXLG)
3312     "LGR    1, %[c]            \n" // index limit (for BRXLG)
3313     "0:                        \n"
3314     "LG     %[c], 0(%[i],%[a]) \n"
3315     "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3316     "STG    %[c], 0(%[i],%[a]) \n"
3317     "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3318     "SLBGR  %[c], %[c]         \n" // save carry - 1
3319     : [i]"=&a"(i), [c]"+r"(c)
3320     : [a]"a"(a), [b]"a"(b)
3321     : "cc", "memory", "r0", "r1"
3322  );
3323   return carry + c;
3324 }
3325 
3326 // Multiply (unsigned) Long A by Long B, accumulating the double-
3327 // length result into the accumulator formed of T0, T1, and T2.
3328 inline void MACC(unsigned long A[], long A_ind,
3329                  unsigned long B[], long B_ind,
3330                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3331   long A_si = 8 * A_ind,
3332        B_si = 8 * B_ind;
3333   __asm__ __volatile__ (
3334     "LG     1, 0(%[A_si],%[A]) \n"
3335     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3336     "ALGR   %[T0], 1           \n"
3337     "LGHI   1, 0               \n" // r1 = 0
3338     "ALCGR  %[T1], 0           \n"
3339     "ALCGR  %[T2], 1           \n"
3340     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3341     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3342     : "cc", "r0", "r1"
3343  );
3344 }
3345 
3346 // As above, but add twice the double-length result into the
3347 // accumulator.
3348 inline void MACC2(unsigned long A[], long A_ind,
3349                   unsigned long B[], long B_ind,
3350                   unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3351   const unsigned long zero = 0;
3352   long A_si = 8 * A_ind,
3353        B_si = 8 * B_ind;
3354   __asm__ __volatile__ (
3355     "LG     1, 0(%[A_si],%[A]) \n"
3356     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3357     "ALGR   %[T0], 1           \n"
3358     "ALCGR  %[T1], 0           \n"
3359     "ALCGR  %[T2], %[zero]     \n"
3360     "ALGR   %[T0], 1           \n"
3361     "ALCGR  %[T1], 0           \n"
3362     "ALCGR  %[T2], %[zero]     \n"
3363     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3364     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3365     : "cc", "r0", "r1"
3366  );
3367 }
3368 
3369 // Fast Montgomery multiplication. The derivation of the algorithm is
3370 // in "A Cryptographic Library for the Motorola DSP56000,
3371 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3372 static void
3373 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3374                     unsigned long m[], unsigned long inv, int len) {
3375   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3376   int i;
3377 
3378   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3379 
3380   for (i = 0; i < len; i++) {
3381     int j;
3382     for (j = 0; j < i; j++) {
3383       MACC(a, j, b, i-j, t0, t1, t2);
3384       MACC(m, j, n, i-j, t0, t1, t2);
3385     }
3386     MACC(a, i, b, 0, t0, t1, t2);
3387     m[i] = t0 * inv;
3388     MACC(m, i, n, 0, t0, t1, t2);
3389 
3390     assert(t0 == 0, "broken Montgomery multiply");
3391 
3392     t0 = t1; t1 = t2; t2 = 0;
3393   }
3394 
3395   for (i = len; i < 2 * len; i++) {
3396     int j;
3397     for (j = i - len + 1; j < len; j++) {
3398       MACC(a, j, b, i-j, t0, t1, t2);
3399       MACC(m, j, n, i-j, t0, t1, t2);
3400     }
3401     m[i-len] = t0;
3402     t0 = t1; t1 = t2; t2 = 0;
3403   }
3404 
3405   while (t0) {
3406     t0 = sub(m, n, t0, len);
3407   }
3408 }
3409 
3410 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3411 // multiplies so it should be up to 25% faster than Montgomery
3412 // multiplication. However, its loop control is more complex and it
3413 // may actually run slower on some machines.
3414 static void
3415 montgomery_square(unsigned long a[], unsigned long n[],
3416                   unsigned long m[], unsigned long inv, int len) {
3417   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3418   int i;
3419 
3420   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3421 
3422   for (i = 0; i < len; i++) {
3423     int j;
3424     int end = (i+1)/2;
3425     for (j = 0; j < end; j++) {
3426       MACC2(a, j, a, i-j, t0, t1, t2);
3427       MACC(m, j, n, i-j, t0, t1, t2);
3428     }
3429     if ((i & 1) == 0) {
3430       MACC(a, j, a, j, t0, t1, t2);
3431     }
3432     for (; j < i; j++) {
3433       MACC(m, j, n, i-j, t0, t1, t2);
3434     }
3435     m[i] = t0 * inv;
3436     MACC(m, i, n, 0, t0, t1, t2);
3437 
3438     assert(t0 == 0, "broken Montgomery square");
3439 
3440     t0 = t1; t1 = t2; t2 = 0;
3441   }
3442 
3443   for (i = len; i < 2*len; i++) {
3444     int start = i-len+1;
3445     int end = start + (len - start)/2;
3446     int j;
3447     for (j = start; j < end; j++) {
3448       MACC2(a, j, a, i-j, t0, t1, t2);
3449       MACC(m, j, n, i-j, t0, t1, t2);
3450     }
3451     if ((i & 1) == 0) {
3452       MACC(a, j, a, j, t0, t1, t2);
3453     }
3454     for (; j < len; j++) {
3455       MACC(m, j, n, i-j, t0, t1, t2);
3456     }
3457     m[i-len] = t0;
3458     t0 = t1; t1 = t2; t2 = 0;
3459   }
3460 
3461   while (t0) {
3462     t0 = sub(m, n, t0, len);
3463   }
3464 }
3465 
3466 // The threshold at which squaring is advantageous was determined
3467 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3468 // Value seems to be ok for other platforms, too.
3469 #define MONTGOMERY_SQUARING_THRESHOLD 64
3470 
3471 // Copy len longwords from s to d, word-swapping as we go. The
3472 // destination array is reversed.
3473 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3474   d += len;
3475   while(len-- > 0) {
3476     d--;
3477     unsigned long s_val = *s;
3478     // Swap words in a longword on little endian machines.
3479 #ifdef VM_LITTLE_ENDIAN
3480      Unimplemented();
3481 #endif
3482     *d = s_val;
3483     s++;
3484   }
3485 }
3486 
3487 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3488                                         jint len, jlong inv,
3489                                         jint *m_ints) {
3490   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3491   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3492   int longwords = len/2;
3493 
3494   // Make very sure we don't use so much space that the stack might
3495   // overflow. 512 jints corresponds to an 16384-bit integer and
3496   // will use here a total of 8k bytes of stack space.
3497   int total_allocation = longwords * sizeof (unsigned long) * 4;
3498   guarantee(total_allocation <= 8192, "must be");
3499   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3500 
3501   // Local scratch arrays
3502   unsigned long
3503     *a = scratch + 0 * longwords,
3504     *b = scratch + 1 * longwords,
3505     *n = scratch + 2 * longwords,
3506     *m = scratch + 3 * longwords;
3507 
3508   reverse_words((unsigned long *)a_ints, a, longwords);
3509   reverse_words((unsigned long *)b_ints, b, longwords);
3510   reverse_words((unsigned long *)n_ints, n, longwords);
3511 
3512   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3513 
3514   reverse_words(m, (unsigned long *)m_ints, longwords);
3515 }
3516 
3517 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3518                                       jint len, jlong inv,
3519                                       jint *m_ints) {
3520   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3521   assert(len % 2 == 0, "array length in montgomery_square must be even");
3522   int longwords = len/2;
3523 
3524   // Make very sure we don't use so much space that the stack might
3525   // overflow. 512 jints corresponds to an 16384-bit integer and
3526   // will use here a total of 6k bytes of stack space.
3527   int total_allocation = longwords * sizeof (unsigned long) * 3;
3528   guarantee(total_allocation <= 8192, "must be");
3529   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3530 
3531   // Local scratch arrays
3532   unsigned long
3533     *a = scratch + 0 * longwords,
3534     *n = scratch + 1 * longwords,
3535     *m = scratch + 2 * longwords;
3536 
3537   reverse_words((unsigned long *)a_ints, a, longwords);
3538   reverse_words((unsigned long *)n_ints, n, longwords);
3539 
3540   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3541     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3542   } else {
3543     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3544   }
3545 
3546   reverse_words(m, (unsigned long *)m_ints, longwords);
3547 }
3548 
3549 extern "C"
3550 int SpinPause() {
3551   return 0;
3552 }