< prev index next >

src/cpu/x86/vm/x86.ad

Print this page

        

*** 57,130 **** // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. // // The encoding number is the actual bit-pattern placed into the opcodes. ! // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. // Word a in each register holds a Float, words ab hold a Double. // The whole registers are used in SSE4.2 version intrinsics, // array copy stubs and superword operations (see UseSSE42Intrinsics, // UseXMMForArrayCopy and UseSuperword flags). ! // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). // Linux ABI: No register preserved across function calls // XMM0-XMM7 might hold parameters ! // Windows ABI: XMM6-XMM15 preserved across function calls // XMM0-XMM3 might hold parameters reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); #ifdef _WIN64 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); --- 57,182 ---- // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. // // The encoding number is the actual bit-pattern placed into the opcodes. ! // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. // Word a in each register holds a Float, words ab hold a Double. // The whole registers are used in SSE4.2 version intrinsics, // array copy stubs and superword operations (see UseSSE42Intrinsics, // UseXMMForArrayCopy and UseSuperword flags). ! // For pre EVEX enabled architectures: ! // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) ! // For EVEX enabled architectures: ! // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). ! // // Linux ABI: No register preserved across function calls // XMM0-XMM7 might hold parameters ! // Windows ABI: XMM6-XMM31 preserved across function calls // XMM0-XMM3 might hold parameters reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); + reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); + reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); + reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); + reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); + reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); + reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); + reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); + reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); + reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); + reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); + reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); + reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); + reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); + reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); + reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); + reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); + reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); + reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); + reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); + reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); + reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); + reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); + reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); + reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); + reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); + reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); + reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); + reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); + reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); + reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); + reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); + reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); + reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); + reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); + reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); + reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); + reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); + reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); + reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); + reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); + reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); + reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); + reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); + reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); + reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); + reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); + reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); + reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); #ifdef _WIN64 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
*** 132,222 **** --- 184,625 ---- reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); + reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); + reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); + reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); + reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); + reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); + reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); + reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); + reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); + reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); + reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); + reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); + reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); + reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); + reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); + reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); + reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); + reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); + reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); + reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); + reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); + reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); + reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); + reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); + reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); + reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); + reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); + reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); + reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); + reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); + reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); + reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); + reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); + reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); + reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); + reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); + reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); + reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); + reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); + reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); + reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); + reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); + reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); + reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); + reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); + reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); + reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); + reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); + reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); + reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); + reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); + reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); + reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); + reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); + reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); + reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); + reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); + reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); + reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); + reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); + reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); + reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); + reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); + reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); + reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); + reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); + reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); + reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); + reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); + reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); + reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); + reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); + reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); + reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); + reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); + reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); + reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); + reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); + reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); + reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); + reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); + + reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); + reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); + reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); + reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); + reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); + reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); + reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); + reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); + reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); + reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); + reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); + reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); + reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); + reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); + reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); + reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); + + reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); + reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); + reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); + reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); + reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); + reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); + reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); + reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); + reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); + reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); + reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); + reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); + reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); + reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); + reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); + reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); + + reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); + reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); + reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); + reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); + reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); + reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); + reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); + reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); + reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); + reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); + reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); + reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); + reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); + reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); + reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); + reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); + + reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); + reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); + reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); + reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); + reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); + reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); + reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); + reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); + reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); + reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); + reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); + reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); + reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); + reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); + reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); + reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); + + reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); + reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); + reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); + reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); + reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); + reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); + reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); + reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); + reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); + reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); + reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); + reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); + reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); + reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); + reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); + reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); + + reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); + reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); + reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); + reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); + reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); + reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); + reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); + reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); + reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); + reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); + reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); + reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); + reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); + reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); + reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); + reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); + + reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); + reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); + reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); + reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); + reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); + reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); + reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); + reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); + reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); + reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); + reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); + reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); + reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); + reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); + reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); + reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); + + reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); + reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); + reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); + reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); + reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); + reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); + reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); + reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); + reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); + reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); + reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); + reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); + reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); + reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); + reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); + reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); + + reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); + reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); + reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); + reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); + reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); + reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); + reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); + reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); + reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); + reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); + reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); + reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); + reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); + reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); + reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); + reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); + + reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); + reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); + reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); + reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); + reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); + reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); + reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); + reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); + reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); + reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); + reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); + reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); + reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); + reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); + reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); + reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); + + reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); + reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); + reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); + reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); + reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); + reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); + reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); + reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); + reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); + reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); + reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); + reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); + reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); + reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); + reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); + reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); + + reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); + reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); + reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); + reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); + reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); + reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); + reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); + reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); + reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); + reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); + reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); + reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); + reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); + reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); + reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); + + reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); + reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); + reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); + reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); + reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); + reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); + reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); + reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); + reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); + reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); + reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); + reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); + reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); + reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); + reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); + reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); + + reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); + reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); + reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); + reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); + reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); + reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); + reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); + reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); + reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); + reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); + reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); + reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); + reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); + reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); + reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); + reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); + + reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); + reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); + reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); + reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); + reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); + reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); + reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); + reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); + reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); + reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); + reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); + reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); + reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); + reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); + reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); + reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); + + reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); + reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); + reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); + reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); + reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); + reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); + reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); + reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); + reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); + reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); + reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); + reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); + reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); + reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); + reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); + reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); #else // _WIN64 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
*** 224,242 **** --- 627,661 ---- reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); + reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); + reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); + reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); + reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); + reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); + reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); + reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); + reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); + reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); + reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); + reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); + reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); + reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); + reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); + reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); + reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); #ifdef _LP64 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
*** 244,316 **** --- 663,1071 ---- reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); + reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); + reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); + reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); + reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); + reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); + reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); + reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); + reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); + reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); + reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); + reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); + reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); + reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); + reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); + reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); + reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); + reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); + reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); + reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); + reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); + reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); + reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); + reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); + reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); + reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); + reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); + reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); + reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); + reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); + reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); + reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); + reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); + reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); + reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); + reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); + reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); + reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); + reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); + reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); + reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); + reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); + reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); + reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); + reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); + reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); + reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); + reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); + reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); + reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); + reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); + reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); + reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); + reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); + reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); + reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); + reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); + reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); + reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); + reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); + reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); + reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); + reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); + reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); + reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); + + reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); + reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); + reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); + reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); + reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); + reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); + reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); + reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); + reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); + reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); + reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); + reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); + reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); + reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); + reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); + reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); + + reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); + reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); + reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); + reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); + reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); + reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); + reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); + reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); + reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); + reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); + reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); + reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); + reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); + reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); + reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); + reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); + + reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); + reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); + reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); + reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); + reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); + reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); + reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); + reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); + reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); + reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); + reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); + reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); + reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); + reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); + reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); + reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); + + reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); + reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); + reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); + reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); + reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); + reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); + reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); + reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); + reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); + reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); + reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); + reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); + reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); + reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); + reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); + reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); + + reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); + reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); + reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); + reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); + reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); + reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); + reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); + reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); + reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); + reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); + reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); + reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); + reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); + reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); + reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); + reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); + + reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); + reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); + reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); + reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); + reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); + reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); + reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); + reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); + reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); + reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); + reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); + reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); + reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); + reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); + reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); + reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); + + reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); + reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); + reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); + reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); + reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); + reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); + reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); + reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); + reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); + reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); + reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); + reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); + reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); + reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); + reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); + reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); + + reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); + reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); + reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); + reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); + reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); + reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); + reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); + reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); + reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); + reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); + reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); + reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); + reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); + reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); + reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); + reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); + + reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); + reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); + reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); + reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); + reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); + reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); + reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); + reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); + reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); + reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); + reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); + reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); + reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); + reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); + reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); + reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); + + reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); + reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); + reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); + reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); + reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); + reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); + reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); + reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); + reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); + reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); + reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); + reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); + reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); + reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); + reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); + reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); + + reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); + reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); + reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); + reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); + reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); + reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); + reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); + reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); + reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); + reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); + reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); + reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); + reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); + reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); + reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); + reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); + + reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); + reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); + reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); + reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); + reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); + reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); + reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); + reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); + reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); + reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); + reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); + reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); + reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); + reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); + reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); + reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); + + reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); + reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); + reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); + reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); + reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); + reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); + reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); + reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); + reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); + reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); + reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); + reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); + reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); + reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); + reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); + reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); + + reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); + reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); + reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); + reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); + reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); + reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); + reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); + reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); + reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); + reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); + reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); + reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); + reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); + reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); + reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); + reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); + + reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); + reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); + reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); + reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); + reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); + reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); + reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); + reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); + reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); + reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); + reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); + reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); + reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); + reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); + reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); + reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); + + reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); + reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); + reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); + reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); + reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); + reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); + reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); + reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); + reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); + reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); + reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); + reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); + reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); + reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); + reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); + reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); #endif // _LP64 #endif // _WIN64
*** 318,355 **** reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); #else reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); #endif // _LP64 ! alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, ! XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, ! XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, ! XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, ! XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, ! XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, ! XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, ! XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h #ifdef _LP64 ! ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, ! XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, ! XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, ! XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, ! XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, ! XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, ! XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, ! XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h #endif ); // flags allocation class should be last. ! alloc_class chunk2(RFLAGS); // Singleton class for condition codes reg_class int_flags(RFLAGS); ! // Class for all float registers ! reg_class float_reg(XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, --- 1073,1155 ---- reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); #else reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); #endif // _LP64 ! reg_def K0( SOC, SOC, Op_RegK, 0, k0->as_VMReg()); ! reg_def K1( SOC, SOC, Op_RegK, 1, k1->as_VMReg()); ! reg_def K2( SOC, SOC, Op_RegK, 2, k2->as_VMReg()); ! reg_def K3( SOC, SOC, Op_RegK, 3, k3->as_VMReg()); ! reg_def K4( SOC, SOC, Op_RegK, 4, k4->as_VMReg()); ! reg_def K5( SOC, SOC, Op_RegK, 5, k5->as_VMReg()); ! reg_def K6( SOC, SOC, Op_RegK, 6, k6->as_VMReg()); ! reg_def K7( SOC, SOC, Op_RegK, 7, k7->as_VMReg()); ! ! alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, ! XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, ! XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, ! XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, ! XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, ! XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, ! XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, ! XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p #ifdef _LP64 ! ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, ! XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, ! XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, ! XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, ! XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, ! XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, ! XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, ! XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p ! ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, ! XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, ! XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, ! XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, ! XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, ! XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, ! XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, ! XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, ! XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, ! XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, ! XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, ! XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, ! XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, ! XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, ! XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, ! XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p #endif ); + // kreg allocation class should be last. + alloc_class chunk2(K0, + K1, + K2, + K3, + K4, + K5, + K6, + K7); + + // Singleton class for mask registers + reg_class mask_reg(K0, + K1, + K2, + K3, + K4, + K5, + K6, + K7); + // flags allocation class should be last. ! alloc_class chunk3(RFLAGS); // Singleton class for condition codes reg_class int_flags(RFLAGS); ! // Class for pre evex float registers ! reg_class float_reg_legacy(XMM0, XMM1, XMM2, XMM3, XMM4, XMM5,
*** 365,376 **** XMM14, XMM15 #endif ); ! // Class for all double registers ! reg_class double_reg(XMM0, XMM0b, XMM1, XMM1b, XMM2, XMM2b, XMM3, XMM3b, XMM4, XMM4b, XMM5, XMM5b, --- 1165,1215 ---- XMM14, XMM15 #endif ); ! // Class for evex float registers ! reg_class float_reg_evex(XMM0, ! XMM1, ! XMM2, ! XMM3, ! XMM4, ! XMM5, ! XMM6, ! XMM7 ! #ifdef _LP64 ! ,XMM8, ! XMM9, ! XMM10, ! XMM11, ! XMM12, ! XMM13, ! XMM14, ! XMM15, ! XMM16, ! XMM17, ! XMM18, ! XMM19, ! XMM20, ! XMM21, ! XMM22, ! XMM23, ! XMM24, ! XMM25, ! XMM26, ! XMM27, ! XMM28, ! XMM29, ! XMM30, ! XMM31 ! #endif ! ); ! ! reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); ! ! // Class for pre evex double registers ! reg_class double_reg_legacy(XMM0, XMM0b, XMM1, XMM1b, XMM2, XMM2b, XMM3, XMM3b, XMM4, XMM4b, XMM5, XMM5b,
*** 386,397 **** XMM14, XMM14b, XMM15, XMM15b #endif ); ! // Class for all 32bit vector registers ! reg_class vectors_reg(XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, --- 1225,1275 ---- XMM14, XMM14b, XMM15, XMM15b #endif ); ! // Class for evex double registers ! reg_class double_reg_evex(XMM0, XMM0b, ! XMM1, XMM1b, ! XMM2, XMM2b, ! XMM3, XMM3b, ! XMM4, XMM4b, ! XMM5, XMM5b, ! XMM6, XMM6b, ! XMM7, XMM7b ! #ifdef _LP64 ! ,XMM8, XMM8b, ! XMM9, XMM9b, ! XMM10, XMM10b, ! XMM11, XMM11b, ! XMM12, XMM12b, ! XMM13, XMM13b, ! XMM14, XMM14b, ! XMM15, XMM15b, ! XMM16, XMM16b, ! XMM17, XMM17b, ! XMM18, XMM18b, ! XMM19, XMM19b, ! XMM20, XMM20b, ! XMM21, XMM21b, ! XMM22, XMM22b, ! XMM23, XMM23b, ! XMM24, XMM24b, ! XMM25, XMM25b, ! XMM26, XMM26b, ! XMM27, XMM27b, ! XMM28, XMM28b, ! XMM29, XMM29b, ! XMM30, XMM30b, ! XMM31, XMM31b ! #endif ! ); ! ! reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); ! ! // Class for pre evex 32bit vector registers ! reg_class vectors_reg_legacy(XMM0, XMM1, XMM2, XMM3, XMM4, XMM5,
*** 407,418 **** XMM14, XMM15 #endif ); // Class for all 64bit vector registers ! reg_class vectord_reg(XMM0, XMM0b, XMM1, XMM1b, XMM2, XMM2b, XMM3, XMM3b, XMM4, XMM4b, XMM5, XMM5b, --- 1285,1335 ---- XMM14, XMM15 #endif ); + // Class for evex 32bit vector registers + reg_class vectors_reg_evex(XMM0, + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7 + #ifdef _LP64 + ,XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15, + XMM16, + XMM17, + XMM18, + XMM19, + XMM20, + XMM21, + XMM22, + XMM23, + XMM24, + XMM25, + XMM26, + XMM27, + XMM28, + XMM29, + XMM30, + XMM31 + #endif + ); + + reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); + // Class for all 64bit vector registers ! reg_class vectord_reg_legacy(XMM0, XMM0b, XMM1, XMM1b, XMM2, XMM2b, XMM3, XMM3b, XMM4, XMM4b, XMM5, XMM5b,
*** 428,439 **** XMM14, XMM14b, XMM15, XMM15b #endif ); // Class for all 128bit vector registers ! reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM1, XMM1b, XMM1c, XMM1d, XMM2, XMM2b, XMM2c, XMM2d, XMM3, XMM3b, XMM3c, XMM3d, XMM4, XMM4b, XMM4c, XMM4d, XMM5, XMM5b, XMM5c, XMM5d, --- 1345,1395 ---- XMM14, XMM14b, XMM15, XMM15b #endif ); + // Class for all 64bit vector registers + reg_class vectord_reg_evex(XMM0, XMM0b, + XMM1, XMM1b, + XMM2, XMM2b, + XMM3, XMM3b, + XMM4, XMM4b, + XMM5, XMM5b, + XMM6, XMM6b, + XMM7, XMM7b + #ifdef _LP64 + ,XMM8, XMM8b, + XMM9, XMM9b, + XMM10, XMM10b, + XMM11, XMM11b, + XMM12, XMM12b, + XMM13, XMM13b, + XMM14, XMM14b, + XMM15, XMM15b, + XMM16, XMM16b, + XMM17, XMM17b, + XMM18, XMM18b, + XMM19, XMM19b, + XMM20, XMM20b, + XMM21, XMM21b, + XMM22, XMM22b, + XMM23, XMM23b, + XMM24, XMM24b, + XMM25, XMM25b, + XMM26, XMM26b, + XMM27, XMM27b, + XMM28, XMM28b, + XMM29, XMM29b, + XMM30, XMM30b, + XMM31, XMM31b + #endif + ); + + reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); + // Class for all 128bit vector registers ! reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM1, XMM1b, XMM1c, XMM1d, XMM2, XMM2b, XMM2c, XMM2d, XMM3, XMM3b, XMM3c, XMM3d, XMM4, XMM4b, XMM4c, XMM4d, XMM5, XMM5b, XMM5c, XMM5d,
*** 449,460 **** XMM14, XMM14b, XMM14c, XMM14d, XMM15, XMM15b, XMM15c, XMM15d #endif ); // Class for all 256bit vector registers ! reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, --- 1405,1455 ---- XMM14, XMM14b, XMM14c, XMM14d, XMM15, XMM15b, XMM15c, XMM15d #endif ); + // Class for all 128bit vector registers + reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, + XMM1, XMM1b, XMM1c, XMM1d, + XMM2, XMM2b, XMM2c, XMM2d, + XMM3, XMM3b, XMM3c, XMM3d, + XMM4, XMM4b, XMM4c, XMM4d, + XMM5, XMM5b, XMM5c, XMM5d, + XMM6, XMM6b, XMM6c, XMM6d, + XMM7, XMM7b, XMM7c, XMM7d + #ifdef _LP64 + ,XMM8, XMM8b, XMM8c, XMM8d, + XMM9, XMM9b, XMM9c, XMM9d, + XMM10, XMM10b, XMM10c, XMM10d, + XMM11, XMM11b, XMM11c, XMM11d, + XMM12, XMM12b, XMM12c, XMM12d, + XMM13, XMM13b, XMM13c, XMM13d, + XMM14, XMM14b, XMM14c, XMM14d, + XMM15, XMM15b, XMM15c, XMM15d, + XMM16, XMM16b, XMM16c, XMM16d, + XMM17, XMM17b, XMM17c, XMM17d, + XMM18, XMM18b, XMM18c, XMM18d, + XMM19, XMM19b, XMM19c, XMM19d, + XMM20, XMM20b, XMM20c, XMM20d, + XMM21, XMM21b, XMM21c, XMM21d, + XMM22, XMM22b, XMM22c, XMM22d, + XMM23, XMM23b, XMM23c, XMM23d, + XMM24, XMM24b, XMM24c, XMM24d, + XMM25, XMM25b, XMM25c, XMM25d, + XMM26, XMM26b, XMM26c, XMM26d, + XMM27, XMM27b, XMM27c, XMM27d, + XMM28, XMM28b, XMM28c, XMM28d, + XMM29, XMM29b, XMM29c, XMM29d, + XMM30, XMM30b, XMM30c, XMM30d, + XMM31, XMM31b, XMM31c, XMM31d + #endif + ); + + reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); + // Class for all 256bit vector registers ! reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
*** 470,479 **** --- 1465,1550 ---- XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h #endif ); + // Class for all 256bit vector registers + reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, + XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, + XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, + XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, + XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, + XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, + XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h + #ifdef _LP64 + ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, + XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, + XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, + XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, + XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, + XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, + XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, + XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, + XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, + XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, + XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, + XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, + XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, + XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, + XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, + XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, + XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, + XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, + XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, + XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, + XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, + XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, + XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, + XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h + #endif + ); + + reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); + + // Class for all 512bit vector registers + reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, + XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, + XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, + XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, + XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, + XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, + XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p + #ifdef _LP64 + ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, + XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, + XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, + XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, + XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, + XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, + XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, + XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p + ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, + XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, + XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, + XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, + XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, + XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, + XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, + XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, + XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, + XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, + XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, + XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, + XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, + XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, + XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, + XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p + #endif + ); + %} //----------SOURCE BLOCK------------------------------------------------------- // This is a block of C++ code which provides values, functions, and
*** 621,630 **** --- 1692,1703 ---- break; case Op_MulVI: if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX return false; break; + case Op_MulVL: + case Op_MulReductionVL: case Op_AddReductionVL: if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here return false; case Op_AddReductionVI: if (UseSSE < 3) // requires at least SSE3
*** 637,646 **** --- 1710,1723 ---- case Op_MulReductionVF: case Op_MulReductionVD: if (UseSSE < 1) // requires at least SSE return false; break; + case Op_MoveK: + if (!VM_Version::supports_evex()) + return false; + break; case Op_CompareAndSwapL: #ifdef _LP64 case Op_CompareAndSwapP: #endif if (!VM_Version::supports_cx8())
*** 655,668 **** const int Matcher::vector_width_in_bytes(BasicType bt) { assert(is_java_primitive(bt), "only primitive type vectors"); if (UseSSE < 2) return 0; // SSE2 supports 128bit vectors for all types. // AVX2 supports 256bit vectors for all types. ! int size = (UseAVX > 1) ? 32 : 16; // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) ! size = 32; // Use flag to limit vector size. size = MIN2(size,(int)MaxVectorSize); // Minimum 2 values in vector (or 4 for bytes). switch (bt) { case T_DOUBLE: --- 1732,1746 ---- const int Matcher::vector_width_in_bytes(BasicType bt) { assert(is_java_primitive(bt), "only primitive type vectors"); if (UseSSE < 2) return 0; // SSE2 supports 128bit vectors for all types. // AVX2 supports 256bit vectors for all types. ! // AVX2/EVEX supports 512bit vectors for all types. ! int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) ! size = (UseAVX > 2) ? 64 : 32; // Use flag to limit vector size. size = MIN2(size,(int)MaxVectorSize); // Minimum 2 values in vector (or 4 for bytes). switch (bt) { case T_DOUBLE:
*** 700,709 **** --- 1778,1788 ---- switch(size) { case 4: return Op_VecS; case 8: return Op_VecD; case 16: return Op_VecX; case 32: return Op_VecY; + case 64: return Op_VecZ; } ShouldNotReachHere(); return 0; }
*** 743,752 **** --- 1822,1834 ---- __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); break; case Op_VecY: __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); break; + case Op_VecZ: + __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); + break; default: ShouldNotReachHere(); } int size = __ offset() - offset; #ifdef ASSERT
*** 761,779 **** case Op_VecD: case Op_VecX: st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); break; case Op_VecY: st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); break; default: ShouldNotReachHere(); } #endif } // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. ! return 4; } static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, int stack_offset, int reg, uint ireg, outputStream* st) { // In 64-bit VM size calculation is very complex. Emitting instructions --- 1843,1862 ---- case Op_VecD: case Op_VecX: st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); break; case Op_VecY: + case Op_VecZ: st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); break; default: ShouldNotReachHere(); } #endif } // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. ! return (UseAVX > 2) ? 6 : 4; } static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, int stack_offset, int reg, uint ireg, outputStream* st) { // In 64-bit VM size calculation is very complex. Emitting instructions
*** 794,803 **** --- 1877,1889 ---- __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); break; case Op_VecY: __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); break; + case Op_VecZ: + __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); + break; default: ShouldNotReachHere(); } } else { // store switch (ireg) {
*** 811,827 **** __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); break; case Op_VecY: __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); break; default: ShouldNotReachHere(); } } int size = __ offset() - offset; #ifdef ASSERT ! int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); #endif return size; #ifndef PRODUCT --- 1897,1916 ---- __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); break; case Op_VecY: __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); break; + case Op_VecZ: + __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); + break; default: ShouldNotReachHere(); } } int size = __ offset() - offset; #ifdef ASSERT ! int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); #endif return size; #ifndef PRODUCT
*** 836,845 **** --- 1925,1935 ---- break; case Op_VecX: st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); break; case Op_VecY: + case Op_VecZ: st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); break; default: ShouldNotReachHere(); }
*** 853,871 **** break; case Op_VecX: st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); break; case Op_VecY: st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); break; default: ShouldNotReachHere(); } } #endif } ! int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. return 5+offset_size; } static inline jfloat replicate4_imm(int con, int width) { --- 1943,1962 ---- break; case Op_VecX: st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); break; case Op_VecY: + case Op_VecZ: st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); break; default: ShouldNotReachHere(); } } #endif } ! int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. return 5+offset_size; } static inline jfloat replicate4_imm(int con, int width) {
*** 965,1008 **** //----------OPERANDS----------------------------------------------------------- // Operand definitions must precede instruction definitions for correct parsing // in the ADLC because operands constitute user defined types which are used in // instruction definitions. ! // Vectors ! operand vecS() %{ ! constraint(ALLOC_IN_RC(vectors_reg)); ! match(VecS); ! ! format %{ %} ! interface(REG_INTER); ! %} ! ! operand vecD() %{ ! constraint(ALLOC_IN_RC(vectord_reg)); ! match(VecD); ! ! format %{ %} ! interface(REG_INTER); ! %} ! ! operand vecX() %{ ! constraint(ALLOC_IN_RC(vectorx_reg)); ! match(VecX); format %{ %} interface(REG_INTER); %} ! operand vecY() %{ ! constraint(ALLOC_IN_RC(vectory_reg)); ! match(VecY); format %{ %} interface(REG_INTER); %} - // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) // ============================================================================ instruct ShouldNotReachHere() %{ --- 2056,2083 ---- //----------OPERANDS----------------------------------------------------------- // Operand definitions must precede instruction definitions for correct parsing // in the ADLC because operands constitute user defined types which are used in // instruction definitions. ! // This one generically applies only for evex, so only one version ! operand vecZ() %{ ! constraint(ALLOC_IN_RC(vectorz_reg)); ! match(VecZ); format %{ %} interface(REG_INTER); %} ! operand regK() %{ ! constraint(ALLOC_IN_RC(mask_reg)); ! predicate(UseAVX > 2); ! match(RegK); format %{ %} interface(REG_INTER); %} // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) // ============================================================================ instruct ShouldNotReachHere() %{
*** 1599,1611 **** predicate(UseAVX > 0); match(Set dst (AbsF src)); ins_cost(150); format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} ins_encode %{ ! bool vector256 = false; __ vandps($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(float_signmask()), vector256); %} ins_pipe(pipe_slow); %} instruct absD_reg(regD dst) %{ --- 2674,2686 ---- predicate(UseAVX > 0); match(Set dst (AbsF src)); ins_cost(150); format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} ins_encode %{ ! int vector_len = 0; __ vandps($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(float_signmask()), vector_len); %} ins_pipe(pipe_slow); %} instruct absD_reg(regD dst) %{
*** 1625,1637 **** match(Set dst (AbsD src)); ins_cost(150); format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" "# abs double by sign masking" %} ins_encode %{ ! bool vector256 = false; __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(double_signmask()), vector256); %} ins_pipe(pipe_slow); %} instruct negF_reg(regF dst) %{ --- 2700,2712 ---- match(Set dst (AbsD src)); ins_cost(150); format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" "# abs double by sign masking" %} ins_encode %{ ! int vector_len = 0; __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(double_signmask()), vector_len); %} ins_pipe(pipe_slow); %} instruct negF_reg(regF dst) %{
*** 1649,1661 **** predicate(UseAVX > 0); match(Set dst (NegF src)); ins_cost(150); format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} ins_encode %{ ! bool vector256 = false; __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(float_signflip()), vector256); %} ins_pipe(pipe_slow); %} instruct negD_reg(regD dst) %{ --- 2724,2736 ---- predicate(UseAVX > 0); match(Set dst (NegF src)); ins_cost(150); format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} ins_encode %{ ! int vector_len = 0; __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(float_signflip()), vector_len); %} ins_pipe(pipe_slow); %} instruct negD_reg(regD dst) %{
*** 1675,1687 **** match(Set dst (NegD src)); ins_cost(150); format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" "# neg double by sign flipping" %} ins_encode %{ ! bool vector256 = false; __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(double_signflip()), vector256); %} ins_pipe(pipe_slow); %} instruct sqrtF_reg(regF dst, regF src) %{ --- 2750,2762 ---- match(Set dst (NegD src)); ins_cost(150); format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" "# neg double by sign flipping" %} ins_encode %{ ! int vector_len = 0; __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ! ExternalAddress(double_signflip()), vector_len); %} ins_pipe(pipe_slow); %} instruct sqrtF_reg(regF dst, regF src) %{
*** 1752,1761 **** --- 2827,2870 ---- __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} + // ====================MASK INSTRUCTIONS===================================== + + instruct loadK(regK dst, memory mem) %{ + predicate(UseAVX>2); + match(Set dst (MoveK (LoadI mem))); + ins_cost(125); + format %{ "kmovq $dst,$mem\t! load mask" %} + ins_encode %{ + __ kmovq($dst$$KRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); + %} + + instruct storeK(memory mem, regK src) %{ + predicate(UseAVX>2); + match(Set mem (StoreI mem src)); + ins_cost(125); + format %{ "kmovq $mem,$src\t! store mask" %} + ins_encode %{ + __ kmovq($mem$$Address, $src$$KRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct moveK_kreg_kreg(regK dst, regK src) %{ + predicate(UseAVX>2); + match(Set dst (MoveK src)); + ins_cost(85); + format %{ "kmovq $dst,$src\t# copy kreg src to kreg dst" %} + ins_encode %{ + __ kmovq($dst$$KRegister, $src$$KRegister); + %} + ins_pipe( pipe_slow ); + %} // ====================VECTOR INSTRUCTIONS===================================== // Load vectors (4 bytes long) instruct loadV4(vecS dst, memory mem) %{
*** 1803,1812 **** --- 2912,2934 ---- __ vmovdqu($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow ); %} + // Load vectors (64 bytes long) + instruct loadV64(vecZ dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 64); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} + ins_encode %{ + int vector_len = 2; + __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); + %} + // Store vectors instruct storeV4(memory mem, vecS src) %{ predicate(n->as_StoreVector()->memory_size() == 4); match(Set mem (StoreVector mem src)); ins_cost(145);
*** 1848,1857 **** --- 2970,2991 ---- __ vmovdqu($mem$$Address, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct storeV64(memory mem, vecZ src) %{ + predicate(n->as_StoreVector()->memory_size() == 64); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} + ins_encode %{ + int vector_len = 2; + __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + // Replicate byte scalar to be vector instruct Repl4B(vecS dst, rRegI src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t"
*** 1911,1920 **** --- 3045,3074 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct Repl64B(vecZ dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 64); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t" + "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // Replicate byte scalar immediate to be vector by loading from const table. instruct Repl4B_imm(vecS dst, immI con) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateB con)); format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %}
*** 1958,1967 **** --- 3112,3137 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct Repl64B_imm(vecZ dst, immI con) %{ + predicate(n->as_Vector()->length() == 64); + match(Set dst (ReplicateB con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t" + "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // Replicate byte scalar zero to be vector instruct Repl4B_zero(vecS dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateB zero)); format %{ "pxor $dst,$dst\t! replicate4B zero" %}
*** 1995,2006 **** predicate(n->as_Vector()->length() == 32); match(Set dst (ReplicateB zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! bool vector256 = true; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); %} ins_pipe( fpu_reg_reg ); %} // Replicate char/short (2 byte) scalar to be vector --- 3165,3188 ---- predicate(n->as_Vector()->length() == 32); match(Set dst (ReplicateB zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! int vector_len = 1; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl64B_zero(vecZ dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 64); ! match(Set dst (ReplicateB zero)); ! format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} ! ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). ! int vector_len = 2; ! __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Replicate char/short (2 byte) scalar to be vector
*** 2056,2065 **** --- 3238,3265 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct Repl32S(vecZ dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t" + "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. instruct Repl2S_imm(vecS dst, immI con) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateS con)); format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %}
*** 2103,2112 **** --- 3303,3328 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct Repl32S_imm(vecZ dst, immI con) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateS con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t" + "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // Replicate char/short (2 byte) scalar zero to be vector instruct Repl2S_zero(vecS dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateS zero)); format %{ "pxor $dst,$dst\t! replicate2S zero" %}
*** 2140,2151 **** predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateS zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! bool vector256 = true; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); %} ins_pipe( fpu_reg_reg ); %} // Replicate integer (4 byte) scalar to be vector --- 3356,3379 ---- predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateS zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! int vector_len = 1; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl32S_zero(vecZ dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 32); ! match(Set dst (ReplicateS zero)); ! format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} ! ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). ! int vector_len = 2; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Replicate integer (4 byte) scalar to be vector
*** 2185,2200 **** __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. ! instruct Repl2I_imm(vecD dst, immI con) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateI con)); ! format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} ! ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); %} ins_pipe( fpu_reg_reg ); %} --- 3413,3444 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct Repl16I(vecZ dst, rRegI src) %{ ! predicate(n->as_Vector()->length() == 16); ! match(Set dst (ReplicateI src)); ! format %{ "movd $dst,$src\n\t" ! "pshufd $dst,$dst,0x00\n\t" ! "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" ! "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} ! ins_encode %{ ! __ movdl($dst$$XMMRegister, $src$$Register); ! __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); ! __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. ! instruct Repl2I_imm(vecD dst, immI con) %{ ! predicate(n->as_Vector()->length() == 2); ! match(Set dst (ReplicateI con)); ! format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} ! ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); %} ins_pipe( fpu_reg_reg ); %}
*** 2222,2231 **** --- 3466,3491 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct Repl16I_imm(vecZ dst, immI con) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateI con)); + format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\n\t" + "vinserti64x4h $dst k0,$dst,$dst" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // Integer could be loaded into xmm register directly from memory. instruct Repl2I_mem(vecD dst, memory mem) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateI (LoadI mem))); format %{ "movd $dst,$mem\n\t"
*** 2261,2270 **** --- 3521,3546 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct Repl16I_mem(vecZ dst, memory mem) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateI (LoadI mem))); + format %{ "movd $dst,$mem\n\t" + "pshufd $dst,$dst,0x00\n\t" + "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" + "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $mem$$Address); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // Replicate integer (4 byte) scalar zero to be vector instruct Repl2I_zero(vecD dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateI zero)); format %{ "pxor $dst,$dst\t! replicate2I" %}
*** 2288,2299 **** predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateI zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! bool vector256 = true; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); %} ins_pipe( fpu_reg_reg ); %} // Replicate long (8 byte) scalar to be vector --- 3564,3587 ---- predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateI zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! int vector_len = 1; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl16I_zero(vecZ dst, immI0 zero) %{ ! predicate(n->as_Vector()->length() == 16); ! match(Set dst (ReplicateI zero)); ! format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} ! ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). ! int vector_len = 2; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Replicate long (8 byte) scalar to be vector
*** 2321,2330 **** --- 3609,3634 ---- __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + + instruct Repl8L(vecZ dst, rRegL src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateL src)); + format %{ "movdq $dst,$src\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" + "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} + ins_encode %{ + __ movdq($dst$$XMMRegister, $src$$Register); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} #else // _LP64 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp);
*** 2357,2366 **** --- 3661,3690 ---- __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + + instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL src)); + effect(TEMP dst, USE src, TEMP tmp); + format %{ "movdl $dst,$src.lo\n\t" + "movdl $tmp,$src.hi\n\t" + "punpckldq $dst,$tmp\n\t" + "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" + "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} #endif // _LP64 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. instruct Repl2L_imm(vecX dst, immL con) %{ predicate(n->as_Vector()->length() == 2);
*** 2386,2395 **** --- 3710,3735 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct Repl8L_imm(vecZ dst, immL con) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateL con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t" + "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress($con)); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // Long could be loaded into xmm register directly from memory. instruct Repl2L_mem(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateL (LoadL mem))); format %{ "movq $dst,$mem\n\t"
*** 2413,2422 **** --- 3753,3778 ---- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct Repl8L_mem(vecZ dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateL (LoadL mem))); + format %{ "movq $dst,$mem\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" + "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // Replicate long (8 byte) scalar zero to be vector instruct Repl2L_zero(vecX dst, immL0 zero) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateL zero)); format %{ "pxor $dst,$dst\t! replicate2L zero" %}
*** 2430,2441 **** predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateL zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! bool vector256 = true; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); %} ins_pipe( fpu_reg_reg ); %} // Replicate float (4 byte) scalar to be vector --- 3786,3809 ---- predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateL zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). ! int vector_len = 1; ! __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl8L_zero(vecZ dst, immL0 zero) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateL zero)); ! format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} ! ins_encode %{ ! // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). ! int vector_len = 2; ! __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Replicate float (4 byte) scalar to be vector
*** 2469,2478 **** --- 3837,3860 ---- __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct Repl16F(vecZ dst, regF src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateF src)); + format %{ "pshufd $dst,$src,0x00\n\t" + "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t" + "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // Replicate float (4 byte) scalar zero to be vector instruct Repl2F_zero(vecD dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateF zero)); format %{ "xorps $dst,$dst\t! replicate2F zero" %}
*** 2495,2506 **** instruct Repl8F_zero(vecY dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateF zero)); format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} ins_encode %{ ! bool vector256 = true; ! __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); %} ins_pipe( fpu_reg_reg ); %} // Replicate double (8 bytes) scalar to be vector --- 3877,3899 ---- instruct Repl8F_zero(vecY dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateF zero)); format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} ins_encode %{ ! int vector_len = 1; ! __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl16F_zero(vecZ dst, immF0 zero) %{ ! predicate(n->as_Vector()->length() == 16); ! match(Set dst (ReplicateF zero)); ! format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} ! ins_encode %{ ! int vector_len = 2; ! __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Replicate double (8 bytes) scalar to be vector
*** 2524,2533 **** --- 3917,3940 ---- __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct Repl8D(vecZ dst, regD src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateD src)); + format %{ "pshufd $dst,$src,0x44\n\t" + "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t" + "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // Replicate double (8 byte) scalar zero to be vector instruct Repl2D_zero(vecX dst, immD0 zero) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateD zero)); format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
*** 2540,2551 **** instruct Repl4D_zero(vecY dst, immD0 zero) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateD zero)); format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} ins_encode %{ ! bool vector256 = true; ! __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); %} ins_pipe( fpu_reg_reg ); %} // ====================REDUCTION ARITHMETIC======================================= --- 3947,3969 ---- instruct Repl4D_zero(vecY dst, immD0 zero) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateD zero)); format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} ins_encode %{ ! int vector_len = 1; ! __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); ! %} ! ins_pipe( fpu_reg_reg ); ! %} ! ! instruct Repl8D_zero(vecZ dst, immD0 zero) %{ ! predicate(n->as_Vector()->length() == 8); ! match(Set dst (ReplicateD zero)); ! format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} ! ins_encode %{ ! int vector_len = 2; ! __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // ====================REDUCTION ARITHMETIC=======================================
*** 2568,2588 **** %} ins_pipe( pipe_slow ); %} instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 0); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction2I" %} ins_encode %{ ! __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} --- 3986,4027 ---- %} ins_pipe( pipe_slow ); %} instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 0 && UseAVX < 3); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction2I" %} ins_encode %{ ! int vector_len = 0; ! __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); ! __ movdl($dst$$Register, $tmp2$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 2); ! match(Set dst (AddReductionVI src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "pshufd $tmp2,$src2,0x1\n\t" ! "vpaddd $tmp,$src2,$tmp2\n\t" ! "movd $tmp2,$src1\n\t" ! "vpaddd $tmp2,$tmp,$tmp2\n\t" ! "movd $dst,$tmp2\t! add reduction2I" %} ! ins_encode %{ ! int vector_len = 0; ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %}
*** 2606,2656 **** %} ins_pipe( pipe_slow ); %} instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 0); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction4I" %} ins_encode %{ ! __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false); ! __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 0); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" ! "vextractf128 $tmp2,$tmp\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction8I" %} ins_encode %{ ! __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true); ! __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true); ! __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (AddReductionVF src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "movdqu $tmp,$src1\n\t" --- 4045,4249 ---- %} ins_pipe( pipe_slow ); %} instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 0 && UseAVX < 3); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction4I" %} ins_encode %{ ! int vector_len = 0; ! __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); ! __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); ! __ movdl($dst$$Register, $tmp2$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 2); ! match(Set dst (AddReductionVI src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "pshufd $tmp2,$src2,0xE\n\t" ! "vpaddd $tmp,$src2,$tmp2\n\t" ! "pshufd $tmp2,$tmp,0x1\n\t" ! "vpaddd $tmp,$tmp,$tmp2\n\t" ! "movd $tmp2,$src1\n\t" ! "vpaddd $tmp2,$tmp,$tmp2\n\t" ! "movd $dst,$tmp2\t! add reduction4I" %} ! ins_encode %{ ! int vector_len = 0; ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); ! __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 0 && UseAVX < 3); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" ! "vextracti128 $tmp2,$tmp\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction8I" %} ins_encode %{ ! int vector_len = 1; ! __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); ! __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); ! __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); ! __ movdl($dst$$Register, $tmp2$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 2); ! match(Set dst (AddReductionVI src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti128 $tmp,$src2\n\t" ! "vpaddd $tmp,$tmp,$src2\n\t" ! "pshufd $tmp2,$tmp,0xE\n\t" ! "vpaddd $tmp,$tmp,$tmp2\n\t" ! "pshufd $tmp2,$tmp,0x1\n\t" ! "vpaddd $tmp,$tmp,$tmp2\n\t" ! "movd $tmp2,$src1\n\t" ! "vpaddd $tmp2,$tmp,$tmp2\n\t" ! "movd $dst,$tmp2\t! add reduction8I" %} ! ins_encode %{ ! int vector_len = 0; ! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); ! __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); ! __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); ! __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); ! __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ movdl($dst$$Register, $tmp2$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ ! predicate(UseAVX > 2); ! match(Set dst (AddReductionVI src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vextracti64x4 $tmp3,$src2\n\t" ! "vpaddd $tmp3,$tmp3,$src2\n\t" ! "vextracti128 $tmp,$tmp3\n\t" ! "vpaddd $tmp,$tmp,$tmp3\n\t" ! "pshufd $tmp2,$tmp,0xE\n\t" ! "vpaddd $tmp,$tmp,$tmp2\n\t" ! "pshufd $tmp2,$tmp,0x1\n\t" ! "vpaddd $tmp,$tmp,$tmp2\n\t" ! "movd $tmp2,$src1\n\t" ! "vpaddd $tmp2,$tmp,$tmp2\n\t" ! "movd $dst,$tmp2\t! mul reduction16I" %} ! ins_encode %{ ! __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); ! __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); ! __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); ! __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); ! __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); ! __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); ! __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 2); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "pshufd $tmp2,$src2,0xE\n\t" + "vpaddq $tmp,$src2,$tmp2\n\t" + "movdq $tmp2,$src1\n\t" + "vpaddq $tmp2,$tmp,$tmp2\n\t" + "movdq $dst,$tmp2\t! add reduction2L" %} + ins_encode %{ + __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); + __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); + __ movdq($tmp2$$XMMRegister, $src1$$Register); + __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); + __ movdq($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 2); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" + "vpaddq $tmp2,$tmp,$src2\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vpaddq $tmp2,$tmp2,$tmp\n\t" + "movdq $tmp,$src1\n\t" + "vpaddq $tmp2,$tmp2,$tmp\n\t" + "movdq $dst,$tmp2\t! add reduction4L" %} + ins_encode %{ + __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); + __ movdq($tmp$$XMMRegister, $src1$$Register); + __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); + __ movdq($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + + instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ + predicate(UseAVX > 2); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP tmp, TEMP tmp2); + format %{ "vextracti64x4 $tmp2,$src2\n\t" + "vpaddq $tmp2,$tmp2,$src2\n\t" + "vextracti128 $tmp,$tmp2\n\t" + "vpaddq $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp2,0xE\n\t" + "vpaddq $tmp2,$tmp2,$tmp\n\t" + "movdq $tmp,$src1\n\t" + "vpaddq $tmp2,$tmp2,$tmp\n\t" + "movdq $dst,$tmp2\t! add reduction8L" %} + ins_encode %{ + __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); + __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); + __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); + __ movdq($tmp$$XMMRegister, $src1$$Register); + __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); + __ movdq($dst$$Register, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (AddReductionVF src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "movdqu $tmp,$src1\n\t"
*** 2770,2779 **** --- 4363,4443 ---- __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ + predicate(UseAVX > 2); + match(Set dst (AddReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2, TEMP tmp3); + format %{ "vaddss $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x02\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x03\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "vextractf64x2 $tmp3,$src2, 0x1\n\t" + "vaddss $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0x01\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x02\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x03\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "vextractf64x2 $tmp3,$src2, 0x2\n\t" + "vaddss $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0x01\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x02\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x03\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "vextractf64x2 $tmp3,$src2, 0x3\n\t" + "vaddss $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0x01\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x02\n\t" + "vaddss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x03\n\t" + "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} + ins_encode %{ + __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); + __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); + __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (AddReductionVD src1 src2)); effect(TEMP tmp, TEMP dst); format %{ "movdqu $tmp,$src1\n\t"
*** 2817,2827 **** "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} ins_encode %{ __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); --- 4481,4530 ---- "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} ins_encode %{ __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ ! predicate(UseAVX > 2); ! match(Set dst (AddReductionVD src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vaddsd $tmp2,$src1,$src2\n\t" ! "pshufd $tmp,$src2,0xE\n\t" ! "vaddsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x1\n\t" ! "vaddsd $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0xE\n\t" ! "vaddsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x2\n\t" ! "vaddsd $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0xE\n\t" ! "vaddsd $tmp2,$tmp2,$tmp\n\t" ! "vextractf64x2 $tmp3,$src2, 0x3\n\t" ! "vaddsd $tmp2,$tmp2,$tmp3\n\t" ! "pshufd $tmp,$tmp3,0xE\n\t" ! "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} ! ins_encode %{ ! __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); ! __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); ! __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); ! __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow );
*** 2854,2867 **** "vpmulld $tmp,$src2,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction2I" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} --- 4557,4571 ---- "vpmulld $tmp,$src2,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction2I" %} ins_encode %{ + int vector_len = 0; __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %}
*** 2898,2954 **** "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction4I" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextractf128 $tmp,$src2\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction8I" %} ins_encode %{ ! __ vextractf128h($tmp$$XMMRegister, $src2$$XMMRegister); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, false); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rsmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (MulReductionVF src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "movdqu $tmp,$src1\n\t" "mulss $tmp,$src2\n\t" "pshufd $tmp2,$src2,0x01\n\t" "mulss $tmp,$tmp2\n\t" ! "movdqu $dst,$tmp\t! add reduction2F" %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); --- 4602,4760 ---- "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction4I" %} ins_encode %{ + int vector_len = 0; __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti128 $tmp,$src2\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction8I" %} ins_encode %{ ! int vector_len = 0; ! __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); ! __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); ! __ movdl($dst$$Register, $tmp2$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ ! predicate(UseAVX > 2); ! match(Set dst (MulReductionVI src1 src2)); ! effect(TEMP tmp, TEMP tmp2, TEMP tmp3); ! format %{ "vextracti64x4 $tmp3,$src2\n\t" ! "vpmulld $tmp3,$tmp3,$src2\n\t" ! "vextracti128 $tmp,$tmp3\n\t" ! "vpmulld $tmp,$tmp,$src2\n\t" ! "pshufd $tmp2,$tmp,0xE\n\t" ! "vpmulld $tmp,$tmp,$tmp2\n\t" ! "pshufd $tmp2,$tmp,0x1\n\t" ! "vpmulld $tmp,$tmp,$tmp2\n\t" ! "movd $tmp2,$src1\n\t" ! "vpmulld $tmp2,$tmp,$tmp2\n\t" ! "movd $dst,$tmp2\t! mul reduction16I" %} ! ins_encode %{ ! __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); ! __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); ! __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp2$$XMMRegister, $src1$$Register); ! __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} ! instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 2); ! match(Set dst (MulReductionVL src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "pshufd $tmp2,$src2,0xE\n\t" ! "vpmullq $tmp,$src2,$tmp2\n\t" ! "movdq $tmp2,$src1\n\t" ! "vpmullq $tmp2,$tmp,$tmp2\n\t" ! "movdq $dst,$tmp2\t! mul reduction2L" %} ! ins_encode %{ ! __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); ! __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); ! __ movdq($tmp2$$XMMRegister, $src1$$Register); ! __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); ! __ movdq($dst$$Register, $tmp2$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 2); ! match(Set dst (MulReductionVL src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" ! "vpmullq $tmp2,$tmp,$src2\n\t" ! "pshufd $tmp,$tmp2,0xE\n\t" ! "vpmullq $tmp2,$tmp2,$tmp\n\t" ! "movdq $tmp,$src1\n\t" ! "vpmullq $tmp2,$tmp2,$tmp\n\t" ! "movdq $dst,$tmp2\t! mul reduction4L" %} ! ins_encode %{ ! __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); ! __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); ! __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); ! __ movdq($tmp$$XMMRegister, $src1$$Register); ! __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); ! __ movdq($dst$$Register, $tmp2$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ ! predicate(UseAVX > 2); ! match(Set dst (MulReductionVL src1 src2)); ! effect(TEMP tmp, TEMP tmp2); ! format %{ "vextracti64x4 $tmp2,$src2\n\t" ! "vpmullq $tmp2,$tmp2,$src2\n\t" ! "vextracti128 $tmp,$tmp2\n\t" ! "vpmullq $tmp2,$tmp2,$tmp\n\t" ! "pshufd $tmp,$tmp2,0xE\n\t" ! "vpmullq $tmp2,$tmp2,$tmp\n\t" ! "movdq $tmp,$src1\n\t" ! "vpmullq $tmp2,$tmp2,$tmp\n\t" ! "movdq $dst,$tmp2\t! mul reduction8L" %} ! ins_encode %{ ! __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); ! __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); ! __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); ! __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); ! __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); ! __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); ! __ movdq($tmp$$XMMRegister, $src1$$Register); ! __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); ! __ movdq($dst$$Register, $tmp2$$XMMRegister); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (MulReductionVF src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "movdqu $tmp,$src1\n\t" "mulss $tmp,$src2\n\t" "pshufd $tmp2,$src2,0x01\n\t" "mulss $tmp,$tmp2\n\t" ! "movdqu $dst,$tmp\t! mul reduction2F" %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
*** 2961,2971 **** predicate(UseAVX > 0); match(Set dst (MulReductionVF src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vmulss $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vmulss $dst,$tmp2,$tmp\t! add reduction2F" %} ins_encode %{ __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} --- 4767,4777 ---- predicate(UseAVX > 0); match(Set dst (MulReductionVF src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vmulss $tmp2,$src1,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" ! "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} ins_encode %{ __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %}
*** 2982,2992 **** "mulss $tmp,$tmp2\n\t" "pshufd $tmp2,$src2,0x02\n\t" "mulss $tmp,$tmp2\n\t" "pshufd $tmp2,$src2,0x03\n\t" "mulss $tmp,$tmp2\n\t" ! "movdqu $dst,$tmp\t! add reduction4F" %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); --- 4788,4798 ---- "mulss $tmp,$tmp2\n\t" "pshufd $tmp2,$src2,0x02\n\t" "mulss $tmp,$tmp2\n\t" "pshufd $tmp2,$src2,0x03\n\t" "mulss $tmp,$tmp2\n\t" ! "movdqu $dst,$tmp\t! mul reduction4F" %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
*** 3007,3017 **** "pshufd $tmp,$src2,0x01\n\t" "vmulss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vmulss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vmulss $dst,$tmp2,$tmp\t! add reduction4F" %} ins_encode %{ __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); --- 4813,4823 ---- "pshufd $tmp,$src2,0x01\n\t" "vmulss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vmulss $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" ! "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} ins_encode %{ __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
*** 3059,3076 **** __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (MulReductionVD src1 src2)); effect(TEMP tmp, TEMP dst); format %{ "movdqu $tmp,$src1\n\t" "mulsd $tmp,$src2\n\t" "pshufd $dst,$src2,0xE\n\t" ! "mulsd $dst,$tmp\t! add reduction2D" %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); --- 4865,4953 ---- __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ + predicate(UseAVX > 2); + match(Set dst (MulReductionVF src1 src2)); + effect(TEMP tmp, TEMP tmp2, TEMP tmp3); + format %{ "vmulss $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0x01\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x02\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$src2,0x03\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "vextractf32x4 $tmp3,$src2, 0x1\n\t" + "vmulss $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0x01\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x02\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x03\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "vextractf32x4 $tmp3,$src2, 0x2\n\t" + "vmulss $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0x01\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x02\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x03\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "vextractf32x4 $tmp3,$src2, 0x3\n\t" + "vmulss $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0x01\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x02\n\t" + "vmulss $tmp2,$tmp2,$tmp\n\t" + "pshufd $tmp,$tmp3,0x03\n\t" + "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} + ins_encode %{ + __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); + __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); + __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (MulReductionVD src1 src2)); effect(TEMP tmp, TEMP dst); format %{ "movdqu $tmp,$src1\n\t" "mulsd $tmp,$src2\n\t" "pshufd $dst,$src2,0xE\n\t" ! "mulsd $dst,$tmp\t! mul reduction2D" %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
*** 3114,3123 **** --- 4991,5039 ---- __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} + instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ + predicate(UseAVX > 2); + match(Set dst (MulReductionVD src1 src2)); + effect(TEMP tmp, TEMP tmp2, TEMP tmp3); + format %{ "vmulsd $tmp2,$src1,$src2\n\t" + "pshufd $tmp,$src2,0xE\n\t" + "vmulsd $tmp2,$tmp2,$tmp\n\t" + "vextractf64x2 $tmp3,$src2, 0x1\n\t" + "vmulsd $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$src2,0xE\n\t" + "vmulsd $tmp2,$tmp2,$tmp\n\t" + "vextractf64x2 $tmp3,$src2, 0x2\n\t" + "vmulsd $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0xE\n\t" + "vmulsd $tmp2,$tmp2,$tmp\n\t" + "vextractf64x2 $tmp3,$src2, 0x3\n\t" + "vmulsd $tmp2,$tmp2,$tmp3\n\t" + "pshufd $tmp,$tmp3,0xE\n\t" + "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} + ins_encode %{ + __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); + __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); + __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); + __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); + __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); + __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); + %} + // ====================VECTOR ARITHMETIC======================================= // --------------------------------- ADD -------------------------------------- // Bytes vector add
*** 3134,3145 **** instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd8B(vecD dst, vecD src) %{ --- 5050,5061 ---- instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8B(vecD dst, vecD src) %{
*** 3155,3166 **** instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd16B(vecX dst, vecX src) %{ --- 5071,5082 ---- instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16B(vecX dst, vecX src) %{
*** 3176,3220 **** instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} ins_encode %{ ! bool vector256 = true; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ins_encode %{ ! bool vector256 = true; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Shorts/Chars vector add --- 5092,5158 ---- instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 64); ! match(Set dst (AddVB src1 src2)); ! format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 64); ! match(Set dst (AddVB src (LoadVector mem))); ! format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Shorts/Chars vector add
*** 3231,3242 **** instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd4S(vecD dst, vecD src) %{ --- 5169,5180 ---- instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4S(vecD dst, vecD src) %{
*** 3252,3263 **** instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd8S(vecX dst, vecX src) %{ --- 5190,5201 ---- instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8S(vecX dst, vecX src) %{
*** 3273,3317 **** instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Integers vector add --- 5211,5277 ---- instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (AddVS src1 src2)); ! format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (AddVS src (LoadVector mem))); ! format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector add
*** 3328,3339 **** instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVI src1 src2)); format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd4I(vecX dst, vecX src) %{ --- 5288,5299 ---- instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVI src1 src2)); format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4I(vecX dst, vecX src) %{
*** 3349,3393 **** instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVI src1 src2)); format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVI src (LoadVector mem))); format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (AddVI src1 src2)); format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (AddVI src (LoadVector mem))); format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Longs vector add --- 5309,5375 ---- instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVI src1 src2)); format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVI src (LoadVector mem))); format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (AddVI src1 src2)); format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (AddVI src (LoadVector mem))); format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVI src1 src2)); ! format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVI src (LoadVector mem))); ! format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Longs vector add
*** 3404,3448 **** instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVL src1 src2)); format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVL src (LoadVector mem))); format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} ins_encode %{ ! bool vector256 = false; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (AddVL src1 src2)); format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} ins_encode %{ ! bool vector256 = true; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (AddVL src (LoadVector mem))); format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} ins_encode %{ ! bool vector256 = true; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Floats vector add --- 5386,5452 ---- instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVL src1 src2)); format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVL src (LoadVector mem))); format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} ins_encode %{ ! int vector_len = 0; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (AddVL src1 src2)); format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (AddVL src (LoadVector mem))); format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVL src1 src2)); ! format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVL src (LoadVector mem))); ! format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Floats vector add
*** 3459,3470 **** instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVF src1 src2)); format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} ins_encode %{ ! bool vector256 = false; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd4F(vecX dst, vecX src) %{ --- 5463,5474 ---- instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVF src1 src2)); format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} ins_encode %{ ! int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4F(vecX dst, vecX src) %{
*** 3480,3524 **** instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVF src1 src2)); format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} ins_encode %{ ! bool vector256 = false; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVF src (LoadVector mem))); format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} ins_encode %{ ! bool vector256 = false; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVF src1 src2)); format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} ins_encode %{ ! bool vector256 = true; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVF src (LoadVector mem))); format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} ins_encode %{ ! bool vector256 = true; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Doubles vector add --- 5484,5550 ---- instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVF src1 src2)); format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} ins_encode %{ ! int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVF src (LoadVector mem))); format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} ins_encode %{ ! int vector_len = 0; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVF src1 src2)); format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} ins_encode %{ ! int vector_len = 1; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVF src (LoadVector mem))); format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} ins_encode %{ ! int vector_len = 1; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVF src1 src2)); ! format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} ! ins_encode %{ ! int vector_len = 2; ! __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (AddVF src (LoadVector mem))); ! format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} ! ins_encode %{ ! int vector_len = 2; ! __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Doubles vector add
*** 3535,3579 **** instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVD src1 src2)); format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} ins_encode %{ ! bool vector256 = false; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVD src (LoadVector mem))); format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} ins_encode %{ ! bool vector256 = false; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVD src1 src2)); format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} ins_encode %{ ! bool vector256 = true; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVD src (LoadVector mem))); format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} ins_encode %{ ! bool vector256 = true; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // --------------------------------- SUB -------------------------------------- --- 5561,5627 ---- instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVD src1 src2)); format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} ins_encode %{ ! int vector_len = 0; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVD src (LoadVector mem))); format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} ins_encode %{ ! int vector_len = 0; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVD src1 src2)); format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVD src (LoadVector mem))); format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVD src1 src2)); ! format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (AddVD src (LoadVector mem))); ! format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- SUB --------------------------------------
*** 3592,3603 **** instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub8B(vecD dst, vecD src) %{ --- 5640,5651 ---- instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8B(vecD dst, vecD src) %{
*** 3613,3624 **** instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub16B(vecX dst, vecX src) %{ --- 5661,5672 ---- instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16B(vecX dst, vecX src) %{
*** 3634,3678 **** instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} ins_encode %{ ! bool vector256 = true; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} ins_encode %{ ! bool vector256 = true; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Shorts/Chars vector sub --- 5682,5748 ---- instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} ins_encode %{ ! int vector_len = 1; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} ins_encode %{ ! int vector_len = 1; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 64); ! match(Set dst (SubVB src1 src2)); ! format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 64); ! match(Set dst (SubVB src (LoadVector mem))); ! format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Shorts/Chars vector sub
*** 3689,3700 **** instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub4S(vecD dst, vecD src) %{ --- 5759,5770 ---- instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4S(vecD dst, vecD src) %{
*** 3710,3721 **** instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub8S(vecX dst, vecX src) %{ --- 5780,5791 ---- instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8S(vecX dst, vecX src) %{
*** 3731,3775 **** instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Integers vector sub --- 5801,5867 ---- instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (SubVS src1 src2)); ! format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (SubVS src (LoadVector mem))); ! format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector sub
*** 3786,3797 **** instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVI src1 src2)); format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub4I(vecX dst, vecX src) %{ --- 5878,5889 ---- instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVI src1 src2)); format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4I(vecX dst, vecX src) %{
*** 3807,3851 **** instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVI src1 src2)); format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVI src (LoadVector mem))); format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (SubVI src1 src2)); format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (SubVI src (LoadVector mem))); format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Longs vector sub --- 5899,5965 ---- instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVI src1 src2)); format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVI src (LoadVector mem))); format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (SubVI src1 src2)); format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (SubVI src (LoadVector mem))); format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (SubVI src1 src2)); ! format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (SubVI src (LoadVector mem))); ! format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Longs vector sub
*** 3862,3906 **** instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVL src1 src2)); format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVL src (LoadVector mem))); format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} ins_encode %{ ! bool vector256 = false; ! __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (SubVL src1 src2)); format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} ins_encode %{ ! bool vector256 = true; ! __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (SubVL src (LoadVector mem))); ! format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} ins_encode %{ ! bool vector256 = true; ! __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Floats vector sub --- 5976,6042 ---- instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVL src1 src2)); format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVL src (LoadVector mem))); format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} ins_encode %{ ! int vector_len = 0; ! __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (SubVL src1 src2)); format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (SubVL src (LoadVector mem))); ! format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SubVL src1 src2)); ! format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SubVL src (LoadVector mem))); ! format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} ins_encode %{ ! int vector_len = 2; ! __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Floats vector sub
*** 3917,3928 **** instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVF src1 src2)); format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} ins_encode %{ ! bool vector256 = false; ! __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub4F(vecX dst, vecX src) %{ --- 6053,6064 ---- instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVF src1 src2)); format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} ins_encode %{ ! int vector_len = 0; ! __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4F(vecX dst, vecX src) %{
*** 3938,3982 **** instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVF src1 src2)); format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} ins_encode %{ ! bool vector256 = false; ! __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVF src (LoadVector mem))); format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} ins_encode %{ ! bool vector256 = false; ! __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVF src1 src2)); format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} ins_encode %{ ! bool vector256 = true; ! __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVF src (LoadVector mem))); format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} ins_encode %{ ! bool vector256 = true; ! __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Doubles vector sub --- 6074,6140 ---- instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVF src1 src2)); format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} ins_encode %{ ! int vector_len = 0; ! __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVF src (LoadVector mem))); format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} ins_encode %{ ! int vector_len = 0; ! __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVF src1 src2)); format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} ins_encode %{ ! int vector_len = 1; ! __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVF src (LoadVector mem))); format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} ins_encode %{ ! int vector_len = 1; ! __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (SubVF src1 src2)); ! format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} ! ins_encode %{ ! int vector_len = 2; ! __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (SubVF src (LoadVector mem))); ! format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} ! ins_encode %{ ! int vector_len = 2; ! __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Doubles vector sub
*** 3993,4037 **** instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVD src1 src2)); format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} ins_encode %{ ! bool vector256 = false; ! __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVD src (LoadVector mem))); format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} ins_encode %{ ! bool vector256 = false; ! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVD src1 src2)); format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} ins_encode %{ ! bool vector256 = true; ! __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVD src (LoadVector mem))); format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} ins_encode %{ ! bool vector256 = true; ! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // --------------------------------- MUL -------------------------------------- --- 6151,6217 ---- instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVD src1 src2)); format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} ins_encode %{ ! int vector_len = 0; ! __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVD src (LoadVector mem))); format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} ins_encode %{ ! int vector_len = 0; ! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVD src1 src2)); format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVD src (LoadVector mem))); format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SubVD src1 src2)); ! format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (SubVD src (LoadVector mem))); ! format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- MUL --------------------------------------
*** 4050,4061 **** instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} ins_encode %{ ! bool vector256 = false; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul4S(vecD dst, vecD src) %{ --- 6230,6241 ---- instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4S(vecD dst, vecD src) %{
*** 4071,4082 **** instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} ins_encode %{ ! bool vector256 = false; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul8S(vecX dst, vecX src) %{ --- 6251,6262 ---- instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8S(vecX dst, vecX src) %{
*** 4092,4136 **** instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Integers vector mul (sse4_1) --- 6272,6338 ---- instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (MulVS src1 src2)); ! format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (MulVS src (LoadVector mem))); ! format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector mul (sse4_1)
*** 4147,4158 **** instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVI src1 src2)); format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} ins_encode %{ ! bool vector256 = false; ! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul4I(vecX dst, vecX src) %{ --- 6349,6371 ---- instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVI src1 src2)); format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 2); ! match(Set dst (MulVL src1 src2)); ! format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} ! ins_encode %{ ! int vector_len = 0; ! __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4I(vecX dst, vecX src) %{
*** 4168,4212 **** instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVI src1 src2)); format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVI src (LoadVector mem))); format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (MulVI src1 src2)); format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (MulVI src (LoadVector mem))); format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Floats vector mul --- 6381,6491 ---- instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVI src1 src2)); format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVI src (LoadVector mem))); format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 4); ! match(Set dst (MulVL src1 src2)); ! format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 4); ! match(Set dst (MulVL src (LoadVector mem))); ! format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} ! ins_encode %{ ! int vector_len = 1; ! __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (MulVI src1 src2)); format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (MulVL src1 src2)); ! format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (MulVI src1 src2)); ! format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (MulVI src (LoadVector mem))); format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (MulVL src (LoadVector mem))); ! format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (MulVI src (LoadVector mem))); ! format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Floats vector mul
*** 4223,4234 **** instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVF src1 src2)); format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} ins_encode %{ ! bool vector256 = false; ! __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul4F(vecX dst, vecX src) %{ --- 6502,6513 ---- instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVF src1 src2)); format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} ins_encode %{ ! int vector_len = 0; ! __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4F(vecX dst, vecX src) %{
*** 4244,4288 **** instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVF src1 src2)); format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} ins_encode %{ ! bool vector256 = false; ! __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVF src (LoadVector mem))); format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} ins_encode %{ ! bool vector256 = false; ! __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVF src1 src2)); format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} ins_encode %{ ! bool vector256 = true; ! __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVF src (LoadVector mem))); format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} ins_encode %{ ! bool vector256 = true; ! __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Doubles vector mul --- 6523,6589 ---- instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVF src1 src2)); format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} ins_encode %{ ! int vector_len = 0; ! __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVF src (LoadVector mem))); format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} ins_encode %{ ! int vector_len = 0; ! __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVF src1 src2)); format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} ins_encode %{ ! int vector_len = 1; ! __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVF src (LoadVector mem))); format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} ins_encode %{ ! int vector_len = 1; ! __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (MulVF src1 src2)); ! format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} ! ins_encode %{ ! int vector_len = 2; ! __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (MulVF src (LoadVector mem))); ! format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} ! ins_encode %{ ! int vector_len = 2; ! __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Doubles vector mul
*** 4299,4343 **** instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVD src1 src2)); format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} ins_encode %{ ! bool vector256 = false; ! __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVD src (LoadVector mem))); format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} ins_encode %{ ! bool vector256 = false; ! __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVD src1 src2)); format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} ins_encode %{ ! bool vector256 = true; ! __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVD src (LoadVector mem))); format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} ins_encode %{ ! bool vector256 = true; ! __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // --------------------------------- DIV -------------------------------------- --- 6600,6666 ---- instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVD src1 src2)); format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} ins_encode %{ ! int vector_len = 0; ! __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVD src (LoadVector mem))); format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} ins_encode %{ ! int vector_len = 0; ! __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVD src1 src2)); format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVD src (LoadVector mem))); format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (MulVD src1 src2)); ! format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (MulVD src (LoadVector mem))); ! format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- DIV --------------------------------------
*** 4356,4367 **** instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (DivVF src1 src2)); format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} ins_encode %{ ! bool vector256 = false; ! __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vdiv4F(vecX dst, vecX src) %{ --- 6679,6690 ---- instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (DivVF src1 src2)); format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} ins_encode %{ ! int vector_len = 0; ! __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv4F(vecX dst, vecX src) %{
*** 4377,4421 **** instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVF src1 src2)); format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} ins_encode %{ ! bool vector256 = false; ! __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVF src (LoadVector mem))); format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} ins_encode %{ ! bool vector256 = false; ! __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (DivVF src1 src2)); format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} ins_encode %{ ! bool vector256 = true; ! __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (DivVF src (LoadVector mem))); format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} ins_encode %{ ! bool vector256 = true; ! __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // Doubles vector div --- 6700,6766 ---- instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVF src1 src2)); format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} ins_encode %{ ! int vector_len = 0; ! __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVF src (LoadVector mem))); format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} ins_encode %{ ! int vector_len = 0; ! __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (DivVF src1 src2)); format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} ins_encode %{ ! int vector_len = 1; ! __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (DivVF src (LoadVector mem))); format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} ins_encode %{ ! int vector_len = 1; ! __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 16); ! match(Set dst (DivVF src1 src2)); ! format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} ! ins_encode %{ ! int vector_len = 2; ! __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 0 && n->as_Vector()->length() == 16); ! match(Set dst (DivVF src (LoadVector mem))); ! format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} ! ins_encode %{ ! int vector_len = 2; ! __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Doubles vector div
*** 4432,4476 **** instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (DivVD src1 src2)); format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} ins_encode %{ ! bool vector256 = false; ! __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (DivVD src (LoadVector mem))); format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} ins_encode %{ ! bool vector256 = false; ! __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVD src1 src2)); format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} ins_encode %{ ! bool vector256 = true; ! __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVD src (LoadVector mem))); format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} ins_encode %{ ! bool vector256 = true; ! __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // ------------------------------ Shift --------------------------------------- --- 6777,6843 ---- instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (DivVD src1 src2)); format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} ins_encode %{ ! int vector_len = 0; ! __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (DivVD src (LoadVector mem))); format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} ins_encode %{ ! int vector_len = 0; ! __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVD src1 src2)); format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVD src (LoadVector mem))); format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} ins_encode %{ ! int vector_len = 1; ! __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (DivVD src1 src2)); ! format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (DivVD src (LoadVector mem))); ! format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} ! ins_encode %{ ! int vector_len = 2; ! __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // ------------------------------ Shift ---------------------------------------
*** 4513,4535 **** instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll4S(vecD dst, vecS shift) %{ --- 6880,6902 ---- instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4S(vecD dst, vecS shift) %{
*** 4555,4577 **** instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll8S(vecX dst, vecS shift) %{ --- 6922,6944 ---- instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll8S(vecX dst, vecS shift) %{
*** 4597,4641 **** instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} // Integers vector left shift --- 6964,7030 ---- instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (LShiftVS src shift)); ! format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector left shift
*** 4662,4684 **** instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} ins_encode %{ ! bool vector256 = false; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} ins_encode %{ ! bool vector256 = false; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll4I(vecX dst, vecS shift) %{ --- 7051,7073 ---- instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4I(vecX dst, vecS shift) %{
*** 4704,4748 **** instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} // Longs vector left shift --- 7093,7159 ---- instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVI src shift)); ! format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (LShiftVI src shift)); ! format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Longs vector left shift
*** 4769,4813 **** instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} ins_encode %{ ! bool vector256 = false; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} ins_encode %{ ! bool vector256 = false; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} ins_encode %{ ! bool vector256 = true; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} ins_encode %{ ! bool vector256 = true; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} // ----------------------- LogicalRightShift ----------------------------------- --- 7180,7246 ---- instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} ins_encode %{ ! int vector_len = 0; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (LShiftVL src shift)); ! format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // ----------------------- LogicalRightShift -----------------------------------
*** 4840,4862 **** instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl4S(vecD dst, vecS shift) %{ --- 7273,7295 ---- instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4S(vecD dst, vecS shift) %{
*** 4882,4904 **** instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl8S(vecX dst, vecS shift) %{ --- 7315,7337 ---- instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl8S(vecX dst, vecS shift) %{
*** 4924,4968 **** instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} // Integers vector logical right shift --- 7357,7423 ---- instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (URShiftVS src shift)); ! format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector logical right shift
*** 4989,5011 **** instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl4I(vecX dst, vecS shift) %{ --- 7444,7466 ---- instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4I(vecX dst, vecS shift) %{
*** 5031,5075 **** instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} // Longs vector logical right shift --- 7486,7552 ---- instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVI src shift)); ! format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (URShiftVI src shift)); ! format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Longs vector logical right shift
*** 5096,5140 **** instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} ins_encode %{ ! bool vector256 = true; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} ins_encode %{ ! bool vector256 = true; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} // ------------------- ArithmeticRightShift ----------------------------------- --- 7573,7639 ---- instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} ins_encode %{ ! int vector_len = 1; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVL src shift)); ! format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 8); ! match(Set dst (URShiftVL src shift)); ! format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // ------------------- ArithmeticRightShift -----------------------------------
*** 5163,5185 **** instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra4S(vecD dst, vecS shift) %{ --- 7662,7684 ---- instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4S(vecD dst, vecS shift) %{
*** 5205,5227 **** instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra8S(vecX dst, vecS shift) %{ --- 7704,7726 ---- instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8S(vecX dst, vecS shift) %{
*** 5247,5291 **** instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ ! bool vector256 = false; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ ! bool vector256 = true; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} // Integers vector arithmetic right shift --- 7746,7812 ---- instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ ! int vector_len = 0; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ ! int vector_len = 1; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 32); ! match(Set dst (RShiftVS src shift)); ! format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector arithmetic right shift
*** 5312,5334 **** instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra4I(vecX dst, vecS shift) %{ --- 7833,7855 ---- instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4I(vecX dst, vecS shift) %{
*** 5354,5398 **** instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} ins_encode %{ ! bool vector256 = false; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} ins_encode %{ ! bool vector256 = true; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); %} ins_pipe( pipe_slow ); %} // There are no longs vector arithmetic right shift instructions. --- 7875,7941 ---- instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} ins_encode %{ ! int vector_len = 0; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} ins_encode %{ ! int vector_len = 1; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (RShiftVI src shift)); ! format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length() == 16); ! match(Set dst (RShiftVI src shift)); ! format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // There are no longs vector arithmetic right shift instructions.
*** 5413,5424 **** instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vand8B(vecD dst, vecD src) %{ --- 7956,7967 ---- instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand8B(vecD dst, vecD src) %{
*** 5434,5445 **** instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vand16B(vecX dst, vecX src) %{ --- 7977,7988 ---- instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand16B(vecX dst, vecX src) %{
*** 5455,5499 **** instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV src (LoadVector mem))); format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} ins_encode %{ ! bool vector256 = true; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (AndV src (LoadVector mem))); format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} ins_encode %{ ! bool vector256 = true; ! __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // --------------------------------- OR --------------------------------------- --- 7998,8064 ---- instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV src (LoadVector mem))); format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} ins_encode %{ ! int vector_len = 1; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (AndV src (LoadVector mem))); format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} ins_encode %{ ! int vector_len = 1; ! __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); ! match(Set dst (AndV src1 src2)); ! format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); ! match(Set dst (AndV src (LoadVector mem))); ! format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- OR ---------------------------------------
*** 5511,5522 **** instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vor8B(vecD dst, vecD src) %{ --- 8076,8087 ---- instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor8B(vecD dst, vecD src) %{
*** 5532,5543 **** instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vor16B(vecX dst, vecX src) %{ --- 8097,8108 ---- instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor16B(vecX dst, vecX src) %{
*** 5553,5597 **** instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV src (LoadVector mem))); format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} ins_encode %{ ! bool vector256 = true; ! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (OrV src (LoadVector mem))); format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} ins_encode %{ ! bool vector256 = true; ! __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} // --------------------------------- XOR -------------------------------------- --- 8118,8184 ---- instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV src (LoadVector mem))); format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} ins_encode %{ ! int vector_len = 1; ! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (OrV src (LoadVector mem))); format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} ins_encode %{ ! int vector_len = 1; ! __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); ! match(Set dst (OrV src1 src2)); ! format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); ! match(Set dst (OrV src (LoadVector mem))); ! format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- XOR --------------------------------------
*** 5609,5620 **** instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vxor8B(vecD dst, vecD src) %{ --- 8196,8207 ---- instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor8B(vecD dst, vecD src) %{
*** 5630,5641 **** instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vxor16B(vecX dst, vecX src) %{ --- 8217,8228 ---- instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor16B(vecX dst, vecX src) %{
*** 5651,5694 **** instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV src (LoadVector mem))); format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} ins_encode %{ ! bool vector256 = false; ! __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} ins_encode %{ ! bool vector256 = true; ! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); %} ins_pipe( pipe_slow ); %} instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (XorV src (LoadVector mem))); format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} ins_encode %{ ! bool vector256 = true; ! __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); %} ins_pipe( pipe_slow ); %} --- 8238,8303 ---- instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV src (LoadVector mem))); format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} ins_encode %{ ! int vector_len = 0; ! __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} ins_encode %{ ! int vector_len = 1; ! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (XorV src (LoadVector mem))); format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} ins_encode %{ ! int vector_len = 1; ! __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); ! match(Set dst (XorV src1 src2)); ! format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); ! %} ! ins_pipe( pipe_slow ); ! %} ! ! instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ ! predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); ! match(Set dst (XorV src (LoadVector mem))); ! format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} ! ins_encode %{ ! int vector_len = 2; ! __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %}
< prev index next >