--- old/src/cpu/x86/vm/sharedRuntime_x86_32.cpp 2016-01-29 14:28:15.185925558 +0100 +++ new/src/cpu/x86/vm/sharedRuntime_x86_32.cpp 2016-01-29 14:28:15.117925561 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -114,15 +114,20 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool verify_fpu, bool save_vectors) { - int vect_words = 0; int num_xmm_regs = XMMRegisterImpl::number_of_registers; + int ymm_bytes = num_xmm_regs * 16; + int zmm_bytes = num_xmm_regs * 32; #ifdef COMPILER2 if (save_vectors) { assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); - // Save upper half of ZMM/YMM registers : - vect_words = 8 * 16 / wordSize; - additional_frame_words += vect_words; + // Save upper half of YMM registers + int vect_bytes = ymm_bytes; + if (UseAVX > 2) { + // Save upper half of ZMM registers as well + vect_bytes += zmm_bytes; + } + additional_frame_words += vect_bytes / wordSize; } #else assert(!save_vectors, "vectors are generated only by C2"); @@ -185,13 +190,14 @@ off = xmm0_off; delta = xmm1_off - off; - if(UseSSE == 1) { // Save the XMM state + if(UseSSE == 1) { + // Save the XMM state for (int n = 0; n < num_xmm_regs; n++) { __ movflt(Address(rsp, off*wordSize), as_XMMRegister(n)); off += delta; } } else if(UseSSE >= 2) { - // Save whole 128bit (16 bytes) XMM regiters + // Save whole 128bit (16 bytes) XMM registers for (int n = 0; n < num_xmm_regs; n++) { __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n)); off += delta; @@ -199,13 +205,14 @@ } if (save_vectors) { - assert(vect_words*wordSize == 128, ""); - __ subptr(rsp, 128); // Save upper half of YMM registes + __ subptr(rsp, ymm_bytes); + // Save upper half of YMM registers for (int n = 0; n < num_xmm_regs; n++) { __ vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); } if (UseAVX > 2) { - __ subptr(rsp, 256); // Save upper half of ZMM registes + __ subptr(rsp, zmm_bytes); + // Save upper half of ZMM registers for (int n = 0; n < num_xmm_regs; n++) { __ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); } @@ -255,50 +262,59 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { int num_xmm_regs = XMMRegisterImpl::number_of_registers; + int ymm_bytes = num_xmm_regs * 16; + int zmm_bytes = num_xmm_regs * 32; // Recover XMM & FPU state int additional_frame_bytes = 0; #ifdef COMPILER2 if (restore_vectors) { assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); - additional_frame_bytes = 128; + // Save upper half of YMM registers + additional_frame_bytes = ymm_bytes; + if (UseAVX > 2) { + // Save upper half of ZMM registers as well + additional_frame_bytes += zmm_bytes; + } } #else assert(!restore_vectors, "vectors are generated only by C2"); #endif - if (restore_vectors) { - assert(additional_frame_bytes == 128, ""); - if (UseAVX > 2) { - // Restore upper half of ZMM registers. - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); - } - __ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes - } - // Restore upper half of YMM registes. - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); - } - __ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes - } - int off = xmm0_off; int delta = xmm1_off - off; if (UseSSE == 1) { + // Restore XMM registers + assert(additional_frame_bytes == 0, ""); for (int n = 0; n < num_xmm_regs; n++) { __ movflt(as_XMMRegister(n), Address(rsp, off*wordSize)); off += delta; } } else if (UseSSE >= 2) { - // additional_frame_bytes only populated for the restore_vector case, else it is 0 + // Restore whole 128bit (16 bytes) XMM registers. Do this before restoring YMM and + // ZMM because the movdqu instruction zeros the upper part of the XMM register. for (int n = 0; n < num_xmm_regs; n++) { __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes)); off += delta; } } + if (restore_vectors) { + if (UseAVX > 2) { + // Restore upper half of ZMM registers. + for (int n = 0; n < num_xmm_regs; n++) { + __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); + } + __ addptr(rsp, zmm_bytes); + } + // Restore upper half of YMM registers. + for (int n = 0; n < num_xmm_regs; n++) { + __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); + } + __ addptr(rsp, ymm_bytes); + } + __ pop_FPU_state(); __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers @@ -306,7 +322,6 @@ __ popa(); // Get the rbp, described implicitly by the frame sender code (no oopMap) __ pop(rbp); - } void RegisterSaver::restore_result_registers(MacroAssembler* masm) {