--- old/src/cpu/x86/vm/sharedRuntime_x86_32.cpp 2016-01-29 14:28:15.185925558 +0100 +++ new/src/cpu/x86/vm/sharedRuntime_x86_32.cpp 2016-01-29 14:28:15.117925561 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -114,15 +114,20 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool verify_fpu, bool save_vectors) { - int vect_words = 0; int num_xmm_regs = XMMRegisterImpl::number_of_registers; + int ymm_bytes = num_xmm_regs * 16; + int zmm_bytes = num_xmm_regs * 32; #ifdef COMPILER2 if (save_vectors) { assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); - // Save upper half of ZMM/YMM registers : - vect_words = 8 * 16 / wordSize; - additional_frame_words += vect_words; + // Save upper half of YMM registers + int vect_bytes = ymm_bytes; + if (UseAVX > 2) { + // Save upper half of ZMM registers as well + vect_bytes += zmm_bytes; + } + additional_frame_words += vect_bytes / wordSize; } #else assert(!save_vectors, "vectors are generated only by C2"); @@ -185,13 +190,14 @@ off = xmm0_off; delta = xmm1_off - off; - if(UseSSE == 1) { // Save the XMM state + if(UseSSE == 1) { + // Save the XMM state for (int n = 0; n < num_xmm_regs; n++) { __ movflt(Address(rsp, off*wordSize), as_XMMRegister(n)); off += delta; } } else if(UseSSE >= 2) { - // Save whole 128bit (16 bytes) XMM regiters + // Save whole 128bit (16 bytes) XMM registers for (int n = 0; n < num_xmm_regs; n++) { __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n)); off += delta; @@ -199,13 +205,14 @@ } if (save_vectors) { - assert(vect_words*wordSize == 128, ""); - __ subptr(rsp, 128); // Save upper half of YMM registes + __ subptr(rsp, ymm_bytes); + // Save upper half of YMM registers for (int n = 0; n < num_xmm_regs; n++) { __ vextractf128h(Address(rsp, n*16), as_XMMRegister(n)); } if (UseAVX > 2) { - __ subptr(rsp, 256); // Save upper half of ZMM registes + __ subptr(rsp, zmm_bytes); + // Save upper half of ZMM registers for (int n = 0; n < num_xmm_regs; n++) { __ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1); } @@ -255,50 +262,59 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { int num_xmm_regs = XMMRegisterImpl::number_of_registers; + int ymm_bytes = num_xmm_regs * 16; + int zmm_bytes = num_xmm_regs * 32; // Recover XMM & FPU state int additional_frame_bytes = 0; #ifdef COMPILER2 if (restore_vectors) { assert(UseAVX > 0, "512bit vectors are supported only with EVEX"); assert(MaxVectorSize == 64, "only 512bit vectors are supported now"); - additional_frame_bytes = 128; + // Save upper half of YMM registers + additional_frame_bytes = ymm_bytes; + if (UseAVX > 2) { + // Save upper half of ZMM registers as well + additional_frame_bytes += zmm_bytes; + } } #else assert(!restore_vectors, "vectors are generated only by C2"); #endif - if (restore_vectors) { - assert(additional_frame_bytes == 128, ""); - if (UseAVX > 2) { - // Restore upper half of ZMM registers. - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); - } - __ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes - } - // Restore upper half of YMM registes. - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); - } - __ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes - } - int off = xmm0_off; int delta = xmm1_off - off; if (UseSSE == 1) { + // Restore XMM registers + assert(additional_frame_bytes == 0, ""); for (int n = 0; n < num_xmm_regs; n++) { __ movflt(as_XMMRegister(n), Address(rsp, off*wordSize)); off += delta; } } else if (UseSSE >= 2) { - // additional_frame_bytes only populated for the restore_vector case, else it is 0 + // Restore whole 128bit (16 bytes) XMM registers. Do this before restoring YMM and + // ZMM because the movdqu instruction zeros the upper part of the XMM register. for (int n = 0; n < num_xmm_regs; n++) { __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes)); off += delta; } } + if (restore_vectors) { + if (UseAVX > 2) { + // Restore upper half of ZMM registers. + for (int n = 0; n < num_xmm_regs; n++) { + __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1); + } + __ addptr(rsp, zmm_bytes); + } + // Restore upper half of YMM registers. + for (int n = 0; n < num_xmm_regs; n++) { + __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16)); + } + __ addptr(rsp, ymm_bytes); + } + __ pop_FPU_state(); __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers @@ -306,7 +322,6 @@ __ popa(); // Get the rbp, described implicitly by the frame sender code (no oopMap) __ pop(rbp); - } void RegisterSaver::restore_result_registers(MacroAssembler* masm) { --- old/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2016-01-29 14:28:15.569925540 +0100 +++ new/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2016-01-29 14:28:15.489925544 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -176,18 +176,18 @@ // push cpu state handles this on EVEX enabled targets if (save_vectors) { - // Save upper half of YMM registes(0..15) + // Save upper half of YMM registers(0..15) int base_addr = XSAVE_AREA_YMM_BEGIN; for (int n = 0; n < 16; n++) { __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n)); } if (VM_Version::supports_evex()) { - // Save upper half of ZMM registes(0..15) + // Save upper half of ZMM registers(0..15) base_addr = XSAVE_AREA_ZMM_BEGIN; for (int n = 0; n < 16; n++) { __ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1); } - // Save full ZMM registes(16..num_xmm_regs) + // Save full ZMM registers(16..num_xmm_regs) base_addr = XSAVE_AREA_UPPERBANK; off = 0; int vector_len = Assembler::AVX_512bit; @@ -330,18 +330,18 @@ // On EVEX enabled targets everything is handled in pop fpu state if (restore_vectors) { - // Restore upper half of YMM registes (0..15) + // Restore upper half of YMM registers (0..15) int base_addr = XSAVE_AREA_YMM_BEGIN; for (int n = 0; n < 16; n++) { __ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16)); } if (VM_Version::supports_evex()) { - // Restore upper half of ZMM registes (0..15) + // Restore upper half of ZMM registers (0..15) base_addr = XSAVE_AREA_ZMM_BEGIN; for (int n = 0; n < 16; n++) { __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1); } - // Restore full ZMM registes(16..num_xmm_regs) + // Restore full ZMM registers(16..num_xmm_regs) base_addr = XSAVE_AREA_UPPERBANK; int vector_len = Assembler::AVX_512bit; int off = 0; @@ -351,7 +351,7 @@ } } else { if (VM_Version::supports_evex()) { - // Restore upper bank of ZMM registes(16..31) for double/float usage + // Restore upper bank of ZMM registers(16..31) for double/float usage int base_addr = XSAVE_AREA_UPPERBANK; int off = 0; for (int n = 16; n < num_xmm_regs; n++) { --- /dev/null 2016-01-29 08:34:07.103350968 +0100 +++ new/test/compiler/runtime/safepoints/TestRegisterRestoring.java 2016-01-29 14:28:15.821925528 +0100 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + + +/** + * @test + * @bug 8148490 + * @summary Test correct saving and restoring of vector registers at safepoints. + * @run main/othervm -Xbatch -XX:-TieredCompilation -XX:CompileCommand=exclude,TestRegisterRestoring::main -XX:+SafepointALot TestRegisterRestoring + */ +public class TestRegisterRestoring { + public static void main(String args[]) throws Exception { + // Initialize + float[] array = new float[100]; + for (int i = 0; i < array.length; ++i) { + array[i] = 0; + } + // Test + for (int j = 0; j < 20_000; ++j) { + increment(array); + // Check result + for (int i = 0; i < array.length; i++) { + if (array[i] != 10_000) { + throw new RuntimeException("Test failed: array[" + i + "] = " + array[i] + " but should be 10.000"); + } + array[i] = 0; + } + } + } + + static void increment(float[] array) { + // Loop with safepoint + for (long l = 0; l < 10_000; l++) { + // Vectorized loop + for (int i = 0; i < array.length; ++i) { + array[i] += 1; + } + } + } +} +