--- old/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2017-04-20 13:47:18.276591000 -0700 +++ new/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2017-04-20 13:47:17.816545000 -0700 @@ -47,6 +47,7 @@ #if INCLUDE_JVMCI #include "jvmci/jvmciJavaClasses.hpp" #endif +#include "vm_version_x86.hpp" #define __ masm-> @@ -151,8 +152,8 @@ } #if defined(COMPILER2) || INCLUDE_JVMCI if (save_vectors) { - assert(UseAVX > 0, "up to 512bit vectors are supported with EVEX"); - assert(MaxVectorSize <= 64, "up to 512bit vectors are supported now"); + assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); + assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); } #else assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); @@ -206,6 +207,7 @@ } } } + __ vzeroupper(); if (frame::arg_reg_save_area_bytes != 0) { // Allocate argument register save area __ subptr(rsp, frame::arg_reg_save_area_bytes); @@ -322,13 +324,15 @@ #if defined(COMPILER2) || INCLUDE_JVMCI if (restore_vectors) { - assert(UseAVX > 0, "up to 512bit vectors are supported with EVEX"); - assert(MaxVectorSize <= 64, "up to 512bit vectors are supported now"); + assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); + assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); } #else assert(!restore_vectors, "vectors are generated only by C2"); #endif + __ vzeroupper(); + // On EVEX enabled targets everything is handled in pop fpu state if (restore_vectors) { // Restore upper half of YMM registers (0..15) @@ -528,7 +532,7 @@ // align stack so push_CPU_state doesn't fault __ andptr(rsp, -(StackAlignmentInBytes)); __ push_CPU_state(); - + __ vzeroupper(); // VM needs caller's callsite // VM needs target method // This needs to be a long call since we will relocate this adapter to @@ -547,6 +551,7 @@ __ addptr(rsp, frame::arg_reg_save_area_bytes); } + __ vzeroupper(); __ pop_CPU_state(); // restore sp __ mov(rsp, r13); @@ -1465,7 +1470,6 @@ save_or_restore_arguments(masm, stack_slots, total_in_args, arg_save_area, NULL, in_regs, in_sig_bt); - __ bind(cont); #ifdef ASSERT if (StressCriticalJNINatives) { @@ -2485,6 +2489,7 @@ // preserved and correspond to the bcp/locals pointers. So we do a runtime call // by hand. // + __ vzeroupper(); save_native_result(masm, ret_type, stack_slots); __ mov(c_rarg0, r15_thread); __ mov(r12, rsp); // remember sp @@ -2661,7 +2666,7 @@ // If we haven't already saved the native result we must save it now as xmm registers // are still exposed. - + __ vzeroupper(); if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { save_native_result(masm, ret_type, stack_slots); } @@ -2707,6 +2712,7 @@ // SLOW PATH Reguard the stack if needed __ bind(reguard); + __ vzeroupper(); save_native_result(masm, ret_type, stack_slots); __ mov(r12, rsp); // remember sp __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows