--- old/src/cpu/s390/vm/c1_LIRAssembler_s390.cpp 2017-06-30 12:45:26.157942000 +0200 +++ new/src/cpu/s390/vm/c1_LIRAssembler_s390.cpp 2017-06-30 12:45:25.948940000 +0200 @@ -1139,14 +1139,7 @@ __ load_const_optimized(Z_R1_scratch, pp); // Pop the frame before the safepoint code. - int retPC_offset = initial_frame_size_in_bytes() + _z_abi16(return_pc); - if (Displacement::is_validDisp(retPC_offset)) { - __ z_lg(Z_R14, retPC_offset, Z_SP); - __ add2reg(Z_SP, initial_frame_size_in_bytes()); - } else { - __ add2reg(Z_SP, initial_frame_size_in_bytes()); - __ restore_return_pc(); - } + __ pop_frame_restore_retPC(initial_frame_size_in_bytes()); if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { __ reserved_stack_check(Z_R14); --- old/src/cpu/s390/vm/macroAssembler_s390.cpp 2017-06-30 12:45:27.860959000 +0200 +++ new/src/cpu/s390/vm/macroAssembler_s390.cpp 2017-06-30 12:45:27.663969000 +0200 @@ -2022,17 +2022,40 @@ z_stg(fp, _z_abi(callers_sp), Z_SP); } -// Resize_frame with SP(new) = [addr]. -void MacroAssembler::resize_frame_absolute(Register addr, Register fp, bool load_fp) { - assert_different_registers(addr, fp, Z_SP); - if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); } - - if (addr != Z_R0) { - // Minimize stalls by not using Z_SP immediately after update. - z_stg(fp, _z_abi(callers_sp), addr); - z_lgr(Z_SP, addr); +// Resize_frame with SP(new) = [newSP] + offset. +// This emitter is useful if we already have calculated a pointer +// into the to-be-allocated stack space, e.g. with special alignment properties, +// but need some additional space, e.g. for spilling. +// newSP is the pre-calculated pointer. It must not be modified. +// fp holds, or is filled with, the frame pointer. +// offset is the additional increment which is added to addr to form the new SP. +// Note: specify a negative value to reserve more space! +// load_fp == true only indicates that fp is not pre-filled with the frame pointer. +// It does not guarantee that fp contains the frame pointer at the end. +void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) { + assert_different_registers(newSP, fp, Z_SP); + + if (load_fp) { + z_lg(fp, _z_abi(callers_sp), Z_SP); + } + add2reg(Z_SP, offset, newSP); + z_stg(fp, _z_abi(callers_sp), Z_SP); +} + +// Resize_frame with SP(new) = [newSP]. +// load_fp == true only indicates that fp is not pre-filled with the frame pointer. +// It does not guarantee that fp contains the frame pointer at the end. +void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) { + assert_different_registers(newSP, fp, Z_SP); + + if (load_fp) { + z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store. + } + + z_lgr(Z_SP, newSP); + if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses. + z_stg(fp, _z_abi(callers_sp), newSP); } else { - z_lgr(Z_SP, addr); z_stg(fp, _z_abi(callers_sp), Z_SP); } } @@ -2040,17 +2063,12 @@ // Resize_frame with SP(new) = SP(old) + offset. void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) { assert_different_registers(fp, Z_SP); - if (load_fp) z_lg(fp, _z_abi(callers_sp), Z_SP); - if (Displacement::is_validDisp((int)_z_abi(callers_sp) + offset.constant_or_zero())) { - // Minimize stalls by first using, then updating Z_SP. - // Do that only if we have a small positive offset or if ExtImm are available. - z_stg(fp, Address(Z_SP, offset, _z_abi(callers_sp))); - add64(Z_SP, offset); - } else { - add64(Z_SP, offset); - z_stg(fp, _z_abi(callers_sp), Z_SP); + if (load_fp) { + z_lg(fp, _z_abi(callers_sp), Z_SP); } + add64(Z_SP, offset); + z_stg(fp, _z_abi(callers_sp), Z_SP); } void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) { @@ -2063,8 +2081,8 @@ #endif if (copy_sp) { z_lgr(old_sp, Z_SP); } if (bytes_with_inverted_sign) { - z_stg(old_sp, 0, bytes, Z_SP); - add2reg_with_index(Z_SP, 0, bytes, Z_SP); + z_agr(Z_SP, bytes); + z_stg(old_sp, 0, Z_SP); } else { z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster. z_stg(old_sp, 0, Z_SP); @@ -2074,21 +2092,9 @@ unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) { long offset = Assembler::align(bytes, frame::alignment_in_bytes); - if (Displacement::is_validDisp(-offset)) { - // Minimize stalls by first using, then updating Z_SP. - // Do that only if we have ExtImm available. - z_stg(Z_SP, -offset, Z_SP); - add2reg(Z_SP, -offset); - } else { - if (scratch != Z_R0 && scratch != Z_R1) { - z_stg(Z_SP, -offset, Z_SP); - add2reg(Z_SP, -offset); - } else { // scratch == Z_R0 || scratch == Z_R1 - z_lgr(scratch, Z_SP); - add2reg(Z_SP, -offset); - z_stg(scratch, 0, Z_SP); - } - } + z_lgr(scratch, Z_SP); + add2reg(Z_SP, -offset); + z_stg(scratch, 0, Z_SP); return offset; } @@ -2106,6 +2112,20 @@ Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP); } +// Pop current C frame and restore return PC register (Z_R14). +void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) { + BLOCK_COMMENT("pop_frame:"); + int retPC_offset = _z_abi16(return_pc) + frame_size_in_bytes; + // If possible, pop frame by add instead of load (a penny saved is a penny got :-). + if (Displacement::is_validDisp(retPC_offset)) { + z_lg(Z_R14, retPC_offset, Z_SP); + add2reg(Z_SP, frame_size_in_bytes); + } else { + add2reg(Z_SP, frame_size_in_bytes); + restore_return_pc(); + } +} + void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) { if (allow_relocation) { call_c(entry_point); --- old/src/cpu/s390/vm/macroAssembler_s390.hpp 2017-06-30 12:45:29.837981000 +0200 +++ new/src/cpu/s390/vm/macroAssembler_s390.hpp 2017-06-30 12:45:29.654992000 +0200 @@ -440,9 +440,21 @@ // Get current PC + offset. Offset given in bytes, must be even! address get_PC(Register result, int64_t offset); + // Accessing, and in particular modifying, a stack location is only safe if + // the stack pointer (Z_SP) is set such that the accessed stack location is + // in the reserved range. + // + // From a performance point of view, it is desirable not to change the SP + // first and then immediately use it to access the freshly reserved space. + // That opens a small gap, though. If, just after storing some value (the + // frame pointer) into the to-be-reserved space, an interrupt is caught, + // the handler might use the space beyond Z_SP for it's own purpose. + // If that happens, the stored value might get altered. + // Resize current frame either relatively wrt to current SP or absolute. void resize_frame_sub(Register offset, Register fp, bool load_fp=true); - void resize_frame_absolute(Register addr, Register fp, bool load_fp=true); + void resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp); + void resize_frame_absolute(Register newSP, Register fp, bool load_fp); void resize_frame(RegisterOrConstant offset, Register fp, bool load_fp=true); // Push a frame of size bytes, if copy_sp is false, old_sp must already @@ -461,6 +473,8 @@ // Pop current C frame. void pop_frame(); + // Pop current C frame and restore return PC register (Z_R14). + void pop_frame_restore_retPC(int frame_size_in_bytes); // // Calls --- old/src/cpu/s390/vm/s390.ad 2017-06-30 12:45:31.614003000 +0200 +++ new/src/cpu/s390/vm/s390.ad 2017-06-30 12:45:31.403001000 +0200 @@ -1,6 +1,6 @@ // // Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. -// Copyright (c) 2016 SAP SE. All rights reserved. +// Copyright (c) 2017, SAP SE. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -910,16 +910,8 @@ bool need_polling = do_polling() && C->is_method_compilation(); // Pop frame, restore return_pc, and all stuff needed by interpreter. - // Pop frame by add instead of load (a penny saved is a penny got :-). int frame_size_in_bytes = Assembler::align((C->frame_slots() << LogBytesPerInt), frame::alignment_in_bytes); - int retPC_offset = frame_size_in_bytes + _z_abi16(return_pc); - if (Displacement::is_validDisp(retPC_offset)) { - __ z_lg(Z_R14, retPC_offset, Z_SP); - __ add2reg(Z_SP, frame_size_in_bytes); - } else { - __ add2reg(Z_SP, frame_size_in_bytes); - __ restore_return_pc(); - } + __ pop_frame_restore_retPC(frame_size_in_bytes); if (StackReservedPages > 0 && C->has_reserved_stack_access()) { __ reserved_stack_check(Z_R14); --- old/src/cpu/s390/vm/stubGenerator_s390.cpp 2017-06-30 12:45:33.721027000 +0200 +++ new/src/cpu/s390/vm/stubGenerator_s390.cpp 2017-06-30 12:45:33.547025000 +0200 @@ -291,7 +291,7 @@ // Restore frame pointer. __ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP); // Pop frame. Done here to minimize stalls. - __ z_lg(Z_SP, _z_abi(callers_sp), Z_SP); + __ pop_frame(); // Reload some volatile registers which we've spilled before the call // to frame manager / native entry. @@ -1677,7 +1677,7 @@ // Helper function which generates code to // - load the function code in register fCode (== Z_R0) - // - load the data block length (depends on cipher function) in register srclen if requested. + // - load the data block length (depends on cipher function) into register srclen if requested. // - is_decipher switches between cipher/decipher function codes // - set_len requests (if true) loading the data block length in register srclen void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) { @@ -1689,6 +1689,7 @@ && (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256. __ z_cghi(keylen, 52); + __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); if (!identical_dataBlk_len) { __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk); @@ -1706,6 +1707,7 @@ __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); } // __ z_brl(fCode_set); // keyLen < 52: AES128 // fallthru + __ bind(fCode_set); if (identical_dataBlk_len) { __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk);