// // Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License version 2 only, as // published by the Free Software Foundation. // // This code is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // version 2 for more details (a copy is included in the LICENSE file that // accompanied this code). // // You should have received a copy of the GNU General Public License version // 2 along with this work; if not, write to the Free Software Foundation, // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. // // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA // or visit www.oracle.com if you need additional information or have any // questions. // // // X86 Common Architecture Description File //----------REGISTER DEFINITION BLOCK------------------------------------------ // This information is used by the matcher and the register allocator to // describe individual registers and classes of registers within the target // archtecture. register %{ //----------Architecture Description Register Definitions---------------------- // General Registers // "reg_def" name ( register save type, C convention save type, // ideal register type, encoding ); // Register Save Types: // // NS = No-Save: The register allocator assumes that these registers // can be used without saving upon entry to the method, & // that they do not need to be saved at call sites. // // SOC = Save-On-Call: The register allocator assumes that these registers // can be used without saving upon entry to the method, // but that they must be saved at call sites. // // SOE = Save-On-Entry: The register allocator assumes that these registers // must be saved before using them upon entry to the // method, but they do not need to be saved at call // sites. // // AS = Always-Save: The register allocator assumes that these registers // must be saved before using them upon entry to the // method, & that they must be saved at call sites. // // Ideal Register Type is used to determine how to save & restore a // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. // // The encoding number is the actual bit-pattern placed into the opcodes. // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. // Word a in each register holds a Float, words ab hold a Double. // The whole registers are used in SSE4.2 version intrinsics, // array copy stubs and superword operations (see UseSSE42Intrinsics, // UseXMMForArrayCopy and UseSuperword flags). // For pre EVEX enabled architectures: // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) // For EVEX enabled architectures: // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). // // Linux ABI: No register preserved across function calls // XMM0-XMM7 might hold parameters // Windows ABI: XMM6-XMM31 preserved across function calls // XMM0-XMM3 might hold parameters reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); #ifdef _LP64 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); #endif // _LP64 #ifdef _LP64 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); #else reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); #endif // _LP64 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p #ifdef _LP64 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p #endif ); // flags allocation class should be last. alloc_class chunk2(RFLAGS); // Singleton class for condition codes reg_class int_flags(RFLAGS); // Class for pre evex float registers reg_class float_reg_legacy(XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 #ifdef _LP64 ,XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15 #endif ); // Class for evex float registers reg_class float_reg_evex(XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 #ifdef _LP64 ,XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM16, XMM17, XMM18, XMM19, XMM20, XMM21, XMM22, XMM23, XMM24, XMM25, XMM26, XMM27, XMM28, XMM29, XMM30, XMM31 #endif ); reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); // Class for pre evex double registers reg_class double_reg_legacy(XMM0, XMM0b, XMM1, XMM1b, XMM2, XMM2b, XMM3, XMM3b, XMM4, XMM4b, XMM5, XMM5b, XMM6, XMM6b, XMM7, XMM7b #ifdef _LP64 ,XMM8, XMM8b, XMM9, XMM9b, XMM10, XMM10b, XMM11, XMM11b, XMM12, XMM12b, XMM13, XMM13b, XMM14, XMM14b, XMM15, XMM15b #endif ); // Class for evex double registers reg_class double_reg_evex(XMM0, XMM0b, XMM1, XMM1b, XMM2, XMM2b, XMM3, XMM3b, XMM4, XMM4b, XMM5, XMM5b, XMM6, XMM6b, XMM7, XMM7b #ifdef _LP64 ,XMM8, XMM8b, XMM9, XMM9b, XMM10, XMM10b, XMM11, XMM11b, XMM12, XMM12b, XMM13, XMM13b, XMM14, XMM14b, XMM15, XMM15b, XMM16, XMM16b, XMM17, XMM17b, XMM18, XMM18b, XMM19, XMM19b, XMM20, XMM20b, XMM21, XMM21b, XMM22, XMM22b, XMM23, XMM23b, XMM24, XMM24b, XMM25, XMM25b, XMM26, XMM26b, XMM27, XMM27b, XMM28, XMM28b, XMM29, XMM29b, XMM30, XMM30b, XMM31, XMM31b #endif ); reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); // Class for pre evex 32bit vector registers reg_class vectors_reg_legacy(XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 #ifdef _LP64 ,XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15 #endif ); // Class for evex 32bit vector registers reg_class vectors_reg_evex(XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 #ifdef _LP64 ,XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM16, XMM17, XMM18, XMM19, XMM20, XMM21, XMM22, XMM23, XMM24, XMM25, XMM26, XMM27, XMM28, XMM29, XMM30, XMM31 #endif ); reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); // Class for all 64bit vector registers reg_class vectord_reg_legacy(XMM0, XMM0b, XMM1, XMM1b, XMM2, XMM2b, XMM3, XMM3b, XMM4, XMM4b, XMM5, XMM5b, XMM6, XMM6b, XMM7, XMM7b #ifdef _LP64 ,XMM8, XMM8b, XMM9, XMM9b, XMM10, XMM10b, XMM11, XMM11b, XMM12, XMM12b, XMM13, XMM13b, XMM14, XMM14b, XMM15, XMM15b #endif ); // Class for all 64bit vector registers reg_class vectord_reg_evex(XMM0, XMM0b, XMM1, XMM1b, XMM2, XMM2b, XMM3, XMM3b, XMM4, XMM4b, XMM5, XMM5b, XMM6, XMM6b, XMM7, XMM7b #ifdef _LP64 ,XMM8, XMM8b, XMM9, XMM9b, XMM10, XMM10b, XMM11, XMM11b, XMM12, XMM12b, XMM13, XMM13b, XMM14, XMM14b, XMM15, XMM15b, XMM16, XMM16b, XMM17, XMM17b, XMM18, XMM18b, XMM19, XMM19b, XMM20, XMM20b, XMM21, XMM21b, XMM22, XMM22b, XMM23, XMM23b, XMM24, XMM24b, XMM25, XMM25b, XMM26, XMM26b, XMM27, XMM27b, XMM28, XMM28b, XMM29, XMM29b, XMM30, XMM30b, XMM31, XMM31b #endif ); reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); // Class for all 128bit vector registers reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM1, XMM1b, XMM1c, XMM1d, XMM2, XMM2b, XMM2c, XMM2d, XMM3, XMM3b, XMM3c, XMM3d, XMM4, XMM4b, XMM4c, XMM4d, XMM5, XMM5b, XMM5c, XMM5d, XMM6, XMM6b, XMM6c, XMM6d, XMM7, XMM7b, XMM7c, XMM7d #ifdef _LP64 ,XMM8, XMM8b, XMM8c, XMM8d, XMM9, XMM9b, XMM9c, XMM9d, XMM10, XMM10b, XMM10c, XMM10d, XMM11, XMM11b, XMM11c, XMM11d, XMM12, XMM12b, XMM12c, XMM12d, XMM13, XMM13b, XMM13c, XMM13d, XMM14, XMM14b, XMM14c, XMM14d, XMM15, XMM15b, XMM15c, XMM15d #endif ); // Class for all 128bit vector registers reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM1, XMM1b, XMM1c, XMM1d, XMM2, XMM2b, XMM2c, XMM2d, XMM3, XMM3b, XMM3c, XMM3d, XMM4, XMM4b, XMM4c, XMM4d, XMM5, XMM5b, XMM5c, XMM5d, XMM6, XMM6b, XMM6c, XMM6d, XMM7, XMM7b, XMM7c, XMM7d #ifdef _LP64 ,XMM8, XMM8b, XMM8c, XMM8d, XMM9, XMM9b, XMM9c, XMM9d, XMM10, XMM10b, XMM10c, XMM10d, XMM11, XMM11b, XMM11c, XMM11d, XMM12, XMM12b, XMM12c, XMM12d, XMM13, XMM13b, XMM13c, XMM13d, XMM14, XMM14b, XMM14c, XMM14d, XMM15, XMM15b, XMM15c, XMM15d, XMM16, XMM16b, XMM16c, XMM16d, XMM17, XMM17b, XMM17c, XMM17d, XMM18, XMM18b, XMM18c, XMM18d, XMM19, XMM19b, XMM19c, XMM19d, XMM20, XMM20b, XMM20c, XMM20d, XMM21, XMM21b, XMM21c, XMM21d, XMM22, XMM22b, XMM22c, XMM22d, XMM23, XMM23b, XMM23c, XMM23d, XMM24, XMM24b, XMM24c, XMM24d, XMM25, XMM25b, XMM25c, XMM25d, XMM26, XMM26b, XMM26c, XMM26d, XMM27, XMM27b, XMM27c, XMM27d, XMM28, XMM28b, XMM28c, XMM28d, XMM29, XMM29b, XMM29c, XMM29d, XMM30, XMM30b, XMM30c, XMM30d, XMM31, XMM31b, XMM31c, XMM31d #endif ); reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); // Class for all 256bit vector registers reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h #ifdef _LP64 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h #endif ); // Class for all 256bit vector registers reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h #ifdef _LP64 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h #endif ); reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); // Class for all 512bit vector registers reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p #ifdef _LP64 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p #endif ); // Class for restricted 512bit vector registers reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p #ifdef _LP64 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p #endif ); reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); #ifdef _LP64 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); #endif %} //----------SOURCE BLOCK------------------------------------------------------- // This is a block of C++ code which provides values, functions, and // definitions necessary in the rest of the architecture description source_hpp %{ // Header information of the source block. // Method declarations/definitions which are used outside // the ad-scope can conveniently be defined here. // // To keep related declarations/definitions/uses close together, // we switch between source %{ }% and source_hpp %{ }% freely as needed. class NativeJump; class CallStubImpl { //-------------------------------------------------------------- //---< Used for optimization in Compile::shorten_branches >--- //-------------------------------------------------------------- public: // Size of call trampoline stub. static uint size_call_trampoline() { return 0; // no call trampolines on this platform } // number of relocations needed by a call trampoline stub static uint reloc_call_trampoline() { return 0; // no call trampolines on this platform } }; class HandlerImpl { public: static int emit_exception_handler(CodeBuffer &cbuf); static int emit_deopt_handler(CodeBuffer& cbuf); static uint size_exception_handler() { // NativeCall instruction size is the same as NativeJump. // exception handler starts out as jump and can be patched to // a call be deoptimization. (4932387) // Note that this value is also credited (in output.cpp) to // the size of the code section. return NativeJump::instruction_size; } #ifdef _LP64 static uint size_deopt_handler() { // three 5 byte instructions plus one move for unreachable address. return 15+3; } #else static uint size_deopt_handler() { // NativeCall instruction size is the same as NativeJump. // exception handler starts out as jump and can be patched to // a call be deoptimization. (4932387) // Note that this value is also credited (in output.cpp) to // the size of the code section. return 5 + NativeJump::instruction_size; // pushl(); jmp; } #endif }; %} // end source_hpp source %{ #include "opto/addnode.hpp" // Emit exception handler code. // Stuff framesize into a register and call a VM stub routine. int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { // Note that the code buffer's insts_mark is always relative to insts. // That's why we must use the macroassembler to generate a handler. MacroAssembler _masm(&cbuf); address base = __ start_a_stub(size_exception_handler()); if (base == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return 0; // CodeBuffer::expand failed } int offset = __ offset(); __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); __ end_a_stub(); return offset; } // Emit deopt handler code. int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { // Note that the code buffer's insts_mark is always relative to insts. // That's why we must use the macroassembler to generate a handler. MacroAssembler _masm(&cbuf); address base = __ start_a_stub(size_deopt_handler()); if (base == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return 0; // CodeBuffer::expand failed } int offset = __ offset(); #ifdef _LP64 address the_pc = (address) __ pc(); Label next; // push a "the_pc" on the stack without destroying any registers // as they all may be live. // push address of "next" __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 __ bind(next); // adjust it so it matches "the_pc" __ subptr(Address(rsp, 0), __ offset() - offset); #else InternalAddress here(__ pc()); __ pushptr(here.addr()); #endif __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); __ end_a_stub(); return offset; } //============================================================================= // Float masks come from different places depending on platform. #ifdef _LP64 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); } static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); } static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); } static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); } static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } static address vector_iota_indices() { return StubRoutines::x86::vector_iota_indices(); } static address vector_byte_bitset() { return StubRoutines::x86::vector_byte_bitset(); } static address vector_long_perm_mask() { return StubRoutines::x86::vector_long_perm_mask(); } static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } static address vector_all_ones_mask() { return StubRoutines::x86::vector_all_ones_mask(); } static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } static address vector_int_sizemask() { return StubRoutines::x86::vector_int_size_mask(); } static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } static address vector_short_sizemask() { return StubRoutines::x86::vector_short_size_mask(); } static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } static address vector_long_sizemask() { return StubRoutines::x86::vector_long_size_mask(); } #else static address float_signmask() { return (address)float_signmask_pool; } static address float_signflip() { return (address)float_signflip_pool; } static address double_signmask() { return (address)double_signmask_pool; } static address double_signflip() { return (address)double_signflip_pool; } #endif const bool Matcher::match_rule_supported(int opcode) { if (!has_match_rule(opcode)) return false; bool ret_value = true; switch (opcode) { case Op_PopCountI: case Op_PopCountL: if (!UsePopCountInstruction) ret_value = false; break; case Op_PopCountVI: if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) ret_value = false; break; case Op_MulVI: case Op_MulVL: if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX ret_value = false; break; case Op_MulReductionVL: if (VM_Version::supports_avx512dq() == false) ret_value = false; break; case Op_AddReductionVL: if (UseSSE < 2) // requires at least SSE2 ret_value = false; break; case Op_MulReductionVI: if (UseSSE < 4) // requires at least SSE4 ret_value = false; break; case Op_AddReductionVF: case Op_AddReductionVD: case Op_MulReductionVF: case Op_MulReductionVD: if (UseSSE < 1) // requires at least SSE ret_value = false; break; case Op_SqrtVD: case Op_SqrtVF: if (UseAVX < 1) // enabled for AVX only ret_value = false; break; case Op_CompareAndSwapL: #ifdef _LP64 case Op_CompareAndSwapP: #endif if (!VM_Version::supports_cx8()) ret_value = false; break; case Op_CMoveVF: case Op_CMoveVD: if (UseAVX < 1 || UseAVX > 2) ret_value = false; break; case Op_StrIndexOf: if (!UseSSE42Intrinsics) ret_value = false; break; case Op_StrIndexOfChar: if (!UseSSE42Intrinsics) ret_value = false; break; case Op_OnSpinWait: if (VM_Version::supports_on_spin_wait() == false) ret_value = false; break; case Op_MulAddVS2VI: if (UseSSE < 2) ret_value = false; break; #ifdef _LP64 case Op_MaxD: case Op_MaxF: case Op_MinD: case Op_MinF: if (UseAVX < 1) // enabled for AVX only ret_value = false; break; #endif } return ret_value; // Per default match rules are supported. } const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt, int op_arity) { // identify extra cases that we might want to provide match rules for // e.g. Op_ vector nodes and other intrinsics while guarding with vlen bool ret_value = match_rule_supported(opcode); if (ret_value) { int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; if (!vector_size_supported(bt, vlen)) { ret_value = false; } else if (size_in_bits > 256 && UseAVX <= 2) { // Only AVX512 supports 512-bit vectors ret_value = false; } else if (UseAVX == 0 && size_in_bits > 128) { // Only AVX supports 256-bit vectors ret_value = false; } else if (is_subword_type(bt) && size_in_bits == 512 && VM_Version::supports_avx512bw() == false) { // Byte and Short types are not supported in AVX512 if AVX512BW is not true. ret_value = false; } else { switch (opcode) { case Op_AbsV: if (is_integral_type(bt) && UseSSE < 3) { ret_value = false; } else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } else if (bt == T_LONG && UseAVX <= 2) { ret_value = false; } // Implementation limitation break; case Op_AddVB: case Op_SubVB: if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) ret_value = false; break; case Op_MaxV: case Op_MinV: if (UseSSE < 4 && (bt == T_BYTE || bt == T_INT || bt == T_LONG)) ret_value = false; if ((bt == T_FLOAT || bt == T_DOUBLE)) { // Float/Double intrinsics are enabled for AVX family currently. if (UseAVX == 0) ret_value = false; // 512 bit Float/Double intrinsics need AVX512DQ if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) ret_value = false; } break; case Op_MulVB: case Op_LShiftVB: case Op_RShiftVB: case Op_URShiftVB: case Op_LShiftVS: case Op_RShiftVS: case Op_URShiftVS: if (size_in_bits <= 128 && UseSSE < 4) { ret_value = false; } else if (size_in_bits > 256 && UseAVX < 2) { ret_value = false; } break; case Op_LShiftVI: case Op_RShiftVI: case Op_URShiftVI: if (op_arity == 2 && UseAVX <= 1) ret_value = false; break; case Op_LShiftVL: case Op_RShiftVL: case Op_URShiftVL: if (op_arity == 2 && UseAVX <= 1) ret_value = false; break; case Op_MulVS: case Op_AddVS: case Op_SubVS: if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) ret_value = false; break; case Op_CallLeafVector: if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) ret_value = false; break; case Op_CMoveVF: if (vlen != 8) ret_value = false; break; case Op_CMoveVD: if (vlen != 4) ret_value = false; break; case Op_AddReductionVI: if (bt == T_INT && UseSSE < 3) { ret_value = false; } else if (is_subword_type(bt) && UseSSE <= 3) { ret_value = false; } break; case Op_AndReductionV: case Op_OrReductionV: case Op_XorReductionV: if (bt == T_BYTE && UseSSE <= 3) { ret_value = false; } break; case Op_VectorMaskCmp: if (UseAVX <= 0) { ret_value = false; } else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } break; case Op_MinReductionV: case Op_MaxReductionV: if ((bt == T_INT || bt == T_LONG || bt == T_BYTE) && UseSSE <= 3) { ret_value = false; } else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } // Float/Double intrinsics enabled for AVX family. if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) ret_value = false; if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) ret_value = false; break; case Op_VectorBlend: if (UseSSE <= 3 && UseAVX == 0) { ret_value = false; } else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } break; case Op_VectorTest: if (UseAVX <= 0) { ret_value = false; } else if (size_in_bits != 128 && size_in_bits != 256) { ret_value = false; } // Implementation limitation break; case Op_VectorLoadMask: if (UseSSE <= 3) { ret_value = false; } else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } // Implementation limitation break; case Op_VectorLoadShuffle: case Op_VectorRearrange: if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation due to how shuffle is loaded else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } // Implementation limitation else if (bt == T_BYTE && size_in_bits >= 256 && !VM_Version::supports_avx512vbmi()) { ret_value = false; } // Implementation limitation else if (bt == T_SHORT && size_in_bits >= 256 && !VM_Version::supports_avx512vlbw()) { ret_value = false; } // Implementation limitation break; case Op_VectorStoreMask: if (UseAVX < 0) { ret_value = false; } // Implementation limitation else if ((size_in_bits >= 256 || bt == T_LONG || bt == T_DOUBLE) && UseAVX < 2) { ret_value = false; } // Implementation limitation else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation else if (size_in_bits == 512 && !VM_Version::supports_avx512bw()) { ret_value = false; } // Implementation limitation break; case Op_VectorCastB2X: if (UseAVX <= 0) { ret_value = false; } else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } break; case Op_VectorCastS2X: if (UseAVX <= 0) { ret_value = false; } else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } else if (is_integral_type(bt) && vlen * type2aelembytes(T_SHORT) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } break; case Op_VectorCastI2X: if (UseAVX <= 0) { ret_value = false; } else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } else if (is_integral_type(bt) && vlen * type2aelembytes(T_INT) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } break; case Op_VectorCastL2X: if (UseAVX <= 0) { ret_value = false; } else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } else if (is_integral_type(bt) && vlen * type2aelembytes(T_LONG) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { ret_value = false; } break; case Op_VectorCastF2X: // Casts from FP to integral types require special fixup logic not easily // implementable with vectors. if (UseAVX <= 0) { ret_value = false; } else if (bt != T_DOUBLE) { ret_value = false; } // Implementation limitation break; case Op_VectorCastD2X: // Casts from FP to integral types require special fixup logic not easily // implementable with vectors. if (UseAVX <= 0) { ret_value = false; } else if (bt != T_FLOAT) { ret_value = false; } // Implementation limitation break; case Op_VectorReinterpret: if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } break; case Op_MulReductionVI: if (bt ==T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { ret_value = false; } break; case Op_FmaVD: case Op_FmaVF: if (!UseFMA) { ret_value = false; } case Op_LoadVectorGather: if (UseAVX < 2) { ret_value = false; } else if (size_in_bits == 64 ) { ret_value = false; } break; case Op_StoreVectorScatter: if (UseAVX < 3) { ret_value = false; } else if (size_in_bits == 64 ) { ret_value = false; } break; default: break; } } } if (ret_value) { assert(is_java_primitive(bt) && (vlen > 0) && is_power_of_2(vlen) && vector_size_supported(bt, vlen), "must be supported"); } return ret_value; // Per default match rules are supported. } const bool Matcher::has_predicated_vectors(void) { bool ret_value = false; if (UseAVX > 2) { ret_value = VM_Version::supports_avx512vl(); } return ret_value; } const int Matcher::float_pressure(int default_pressure_threshold) { int float_pressure_threshold = default_pressure_threshold; #ifdef _LP64 if (UseAVX > 2) { // Increase pressure threshold on machines with AVX3 which have // 2x more XMM registers. float_pressure_threshold = default_pressure_threshold * 2; } #endif return float_pressure_threshold; } // Max vector size in bytes. 0 if not supported. const int Matcher::vector_width_in_bytes(BasicType bt) { assert(is_java_primitive(bt), "only primitive type vectors"); if (UseSSE < 2) return 0; // SSE2 supports 128bit vectors for all types. // AVX2 supports 256bit vectors for all types. // AVX2/EVEX supports 512bit vectors for all types. int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) size = (UseAVX > 2) ? 64 : 32; if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) size = (VM_Version::supports_avx512bw()) ? 64 : 32; // Use flag to limit vector size. size = MIN2(size,(int)MaxVectorSize); // Minimum 2 values in vector (or 4 for bytes). switch (bt) { case T_DOUBLE: case T_LONG: if (size < 16) return 0; break; case T_FLOAT: case T_INT: if (size < 8) return 0; break; case T_BOOLEAN: if (size < 4) return 0; break; case T_CHAR: if (size < 4) return 0; break; case T_BYTE: if (size < 4) return 0; break; case T_SHORT: if (size < 4) return 0; break; default: ShouldNotReachHere(); } return size; } // Limits on vector size (number of elements) loaded into vector. const int Matcher::max_vector_size(const BasicType bt) { return vector_width_in_bytes(bt)/type2aelembytes(bt); } const int Matcher::min_vector_size(const BasicType bt) { int max_size = max_vector_size(bt); // Min size which can be loaded into vector is 4 bytes. int size = (type2aelembytes(bt) == 1) ? 4 : 2; return MIN2(size,max_size); } // Vector ideal reg corresponding to specified size in bytes const uint Matcher::vector_ideal_reg(int size) { assert(MaxVectorSize >= size, ""); switch(size) { case 4: return Op_VecS; case 8: return Op_VecD; case 16: return Op_VecX; case 32: return Op_VecY; case 64: return Op_VecZ; } ShouldNotReachHere(); return 0; } // Only lowest bits of xmm reg are used for vector shift count. const uint Matcher::vector_shift_count_ideal_reg(int size) { return Op_VecS; } // x86 supports misaligned vectors store/load. const bool Matcher::misaligned_vectors_ok() { return true; } // x86 AES instructions are compatible with SunJCE expanded // keys, hence we do not need to pass the original key to stubs const bool Matcher::pass_original_key_for_aes() { return false; } const bool Matcher::convi2l_type_required = true; // Check for shift by small constant as well static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && shift->in(2)->get_int() <= 3 && // Are there other uses besides address expressions? !matcher->is_visited(shift)) { address_visited.set(shift->_idx); // Flag as address_visited mstack.push(shift->in(2), Matcher::Visit); Node *conv = shift->in(1); #ifdef _LP64 // Allow Matcher to match the rule which bypass // ConvI2L operation for an array index on LP64 // if the index value is positive. if (conv->Opcode() == Op_ConvI2L && conv->as_Type()->type()->is_long()->_lo >= 0 && // Are there other uses besides address expressions? !matcher->is_visited(conv)) { address_visited.set(conv->_idx); // Flag as address_visited mstack.push(conv->in(1), Matcher::Pre_Visit); } else #endif mstack.push(conv, Matcher::Pre_Visit); return true; } return false; } // Should the Matcher clone shifts on addressing modes, expecting them // to be subsumed into complex addressing expressions or compute them // into registers? bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { Node *off = m->in(AddPNode::Offset); if (off->is_Con()) { address_visited.test_set(m->_idx); // Flag as address_visited Node *adr = m->in(AddPNode::Address); // Intel can handle 2 adds in addressing mode // AtomicAdd is not an addressing expression. // Cheap to find it by looking for screwy base. if (adr->is_AddP() && !adr->in(AddPNode::Base)->is_top() && LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 // Are there other uses besides address expressions? !is_visited(adr)) { address_visited.set(adr->_idx); // Flag as address_visited Node *shift = adr->in(AddPNode::Offset); if (!clone_shift(shift, this, mstack, address_visited)) { mstack.push(shift, Pre_Visit); } mstack.push(adr->in(AddPNode::Address), Pre_Visit); mstack.push(adr->in(AddPNode::Base), Pre_Visit); } else { mstack.push(adr, Pre_Visit); } // Clone X+offset as it also folds into most addressing expressions mstack.push(off, Visit); mstack.push(m->in(AddPNode::Base), Pre_Visit); return true; } else if (clone_shift(off, this, mstack, address_visited)) { address_visited.test_set(m->_idx); // Flag as address_visited mstack.push(m->in(AddPNode::Address), Pre_Visit); mstack.push(m->in(AddPNode::Base), Pre_Visit); return true; } return false; } void Compile::reshape_address(AddPNode* addp) { } // Helper methods for MachSpillCopyNode::implementation(). static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, uint ireg, outputStream* st) { // In 64-bit VM size calculation is very complex. Emitting instructions // into scratch buffer is used to get size in 64-bit VM. LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) assert(ireg == Op_VecS || // 32bit vector (src_lo & 1) == 0 && (src_lo + 1) == src_hi && (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, "no non-adjacent vector moves" ); if (cbuf) { MacroAssembler _masm(cbuf); int offset = __ offset(); switch (ireg) { case Op_VecS: // copy whole register case Op_VecD: case Op_VecX: #ifndef _LP64 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); #else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); } else { __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); } #endif break; case Op_VecY: #ifndef _LP64 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); #else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); } else { __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); } #endif break; case Op_VecZ: __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); break; default: ShouldNotReachHere(); } int size = __ offset() - offset; #ifdef ASSERT // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. assert(!do_size || size == 4, "incorrect size calculattion"); #endif return size; #ifndef PRODUCT } else if (!do_size) { switch (ireg) { case Op_VecS: case Op_VecD: case Op_VecX: st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); break; case Op_VecY: case Op_VecZ: st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); break; default: ShouldNotReachHere(); } #endif } // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. return (UseAVX > 2) ? 6 : 4; } static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, int stack_offset, int reg, uint ireg, outputStream* st) { // In 64-bit VM size calculation is very complex. Emitting instructions // into scratch buffer is used to get size in 64-bit VM. LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) if (cbuf) { MacroAssembler _masm(cbuf); int offset = __ offset(); if (is_load) { switch (ireg) { case Op_VecS: __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); break; case Op_VecD: __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); break; case Op_VecX: #ifndef _LP64 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); #else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); } else { __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); } #endif break; case Op_VecY: #ifndef _LP64 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); #else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); } else { __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); } #endif break; case Op_VecZ: __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); break; default: ShouldNotReachHere(); } } else { // store switch (ireg) { case Op_VecS: __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); break; case Op_VecD: __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); break; case Op_VecX: #ifndef _LP64 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); #else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); } else { __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); } #endif break; case Op_VecY: #ifndef _LP64 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); #else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); } else { __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); } #endif break; case Op_VecZ: __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); break; default: ShouldNotReachHere(); } } int size = __ offset() - offset; #ifdef ASSERT int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); #endif return size; #ifndef PRODUCT } else if (!do_size) { if (is_load) { switch (ireg) { case Op_VecS: st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); break; case Op_VecD: st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); break; case Op_VecX: st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); break; case Op_VecY: case Op_VecZ: st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); break; default: ShouldNotReachHere(); } } else { // store switch (ireg) { case Op_VecS: st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); break; case Op_VecD: st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); break; case Op_VecX: st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); break; case Op_VecY: case Op_VecZ: st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); break; default: ShouldNotReachHere(); } } #endif } bool is_single_byte = false; int vec_len = 0; if ((UseAVX > 2) && (stack_offset != 0)) { int tuple_type = Assembler::EVEX_FVM; int input_size = Assembler::EVEX_32bit; switch (ireg) { case Op_VecS: tuple_type = Assembler::EVEX_T1S; break; case Op_VecD: tuple_type = Assembler::EVEX_T1S; input_size = Assembler::EVEX_64bit; break; case Op_VecX: break; case Op_VecY: vec_len = 1; break; case Op_VecZ: vec_len = 2; break; } is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); } int offset_size = 0; int size = 5; if (UseAVX > 2 ) { if (VM_Version::supports_avx512novl() && (vec_len == 2)) { offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); size += 2; // Need an additional two bytes for EVEX encoding } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); } else { offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); size += 2; // Need an additional two bytes for EVEX encodding } } else { offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); } // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. return size+offset_size; } static inline jint replicate4_imm(int con, int width) { // Load a constant of "width" (in bytes) and replicate it to fill 32bit. assert(width == 1 || width == 2, "only byte or short types here"); int bit_width = width * 8; jint val = con; val &= (1 << bit_width) - 1; // mask off sign bits while(bit_width < 32) { val |= (val << bit_width); bit_width <<= 1; } return val; } static inline jlong replicate8_imm(int con, int width) { // Load a constant of "width" (in bytes) and replicate it to fill 64bit. assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); int bit_width = width * 8; jlong val = con; val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits while(bit_width < 64) { val |= (val << bit_width); bit_width <<= 1; } return val; } #ifndef PRODUCT void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { st->print("nop \t# %d bytes pad for loops and calls", _count); } #endif void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { MacroAssembler _masm(&cbuf); __ nop(_count); } uint MachNopNode::size(PhaseRegAlloc*) const { return _count; } #ifndef PRODUCT void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { st->print("# breakpoint"); } #endif void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { MacroAssembler _masm(&cbuf); __ int3(); } uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { return MachNode::size(ra_); } %} encode %{ enc_class call_epilog %{ if (VerifyStackAtCalls) { // Check that stack depth is unchanged: find majik cookie on stack int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); MacroAssembler _masm(&cbuf); Label L; __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); __ jccb(Assembler::equal, L); // Die if stack mismatch __ int3(); __ bind(L); } %} %} //----------OPERANDS----------------------------------------------------------- // Operand definitions must precede instruction definitions for correct parsing // in the ADLC because operands constitute user defined types which are used in // instruction definitions. operand immU1() %{ predicate(n->get_int() >= 0 && n->get_int() < nth_bit(1)); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} operand immU2() %{ predicate(n->get_int() >= 0 && n->get_int() < nth_bit(2)); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} operand immU3() %{ predicate(n->get_int() >= 0 && n->get_int() < nth_bit(3)); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} operand immU4() %{ predicate(n->get_int() >= 0 && n->get_int() < nth_bit(4)); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} operand immU5() %{ predicate(n->get_int() >= 0 && n->get_int() < nth_bit(5)); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} operand immU6() %{ predicate(n->get_int() >= 0 && n->get_int() < nth_bit(6)); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} // Comparison Code for FP conditional move operand cmpOp_vcmppd() %{ match(Bool); predicate(n->as_Bool()->_test._test != BoolTest::overflow && n->as_Bool()->_test._test != BoolTest::no_overflow); format %{ "" %} interface(COND_INTER) %{ equal (0x0, "eq"); less (0x1, "lt"); less_equal (0x2, "le"); not_equal (0xC, "ne"); greater_equal(0xD, "ge"); greater (0xE, "gt"); //TODO cannot compile (adlc breaks) without two next lines with error: // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ // equal' for overflow. overflow (0x20, "o"); // not really supported by the instruction no_overflow (0x21, "no"); // not really supported by the instruction %} %} // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) // ============================================================================ instruct ShouldNotReachHere() %{ match(Halt); format %{ "ud2\t# ShouldNotReachHere" %} ins_encode %{ __ ud2(); %} ins_pipe(pipe_slow); %} // =================================EVEX special=============================== instruct setMask(rRegI dst, rRegI src) %{ predicate(Matcher::has_predicated_vectors()); match(Set dst (SetVectMaskI src)); effect(TEMP dst); format %{ "setvectmask $dst, $src" %} ins_encode %{ __ setvectmask($dst$$Register, $src$$Register); %} ins_pipe(pipe_slow); %} // ============================================================================ instruct addF_reg(regF dst, regF src) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (AddF dst src)); format %{ "addss $dst, $src" %} ins_cost(150); ins_encode %{ __ addss($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct addF_mem(regF dst, memory src) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (AddF dst (LoadF src))); format %{ "addss $dst, $src" %} ins_cost(150); ins_encode %{ __ addss($dst$$XMMRegister, $src$$Address); %} ins_pipe(pipe_slow); %} instruct addF_imm(regF dst, immF con) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (AddF dst con)); format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} ins_cost(150); ins_encode %{ __ addss($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ predicate(UseAVX > 0); match(Set dst (AddF src1 src2)); format %{ "vaddss $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ predicate(UseAVX > 0); match(Set dst (AddF src1 (LoadF src2))); format %{ "vaddss $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); %} ins_pipe(pipe_slow); %} instruct addF_reg_imm(regF dst, regF src, immF con) %{ predicate(UseAVX > 0); match(Set dst (AddF src con)); format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} ins_cost(150); ins_encode %{ __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct addD_reg(regD dst, regD src) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (AddD dst src)); format %{ "addsd $dst, $src" %} ins_cost(150); ins_encode %{ __ addsd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct addD_mem(regD dst, memory src) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (AddD dst (LoadD src))); format %{ "addsd $dst, $src" %} ins_cost(150); ins_encode %{ __ addsd($dst$$XMMRegister, $src$$Address); %} ins_pipe(pipe_slow); %} instruct addD_imm(regD dst, immD con) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (AddD dst con)); format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} ins_cost(150); ins_encode %{ __ addsd($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ predicate(UseAVX > 0); match(Set dst (AddD src1 src2)); format %{ "vaddsd $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ predicate(UseAVX > 0); match(Set dst (AddD src1 (LoadD src2))); format %{ "vaddsd $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); %} ins_pipe(pipe_slow); %} instruct addD_reg_imm(regD dst, regD src, immD con) %{ predicate(UseAVX > 0); match(Set dst (AddD src con)); format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} ins_cost(150); ins_encode %{ __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct subF_reg(regF dst, regF src) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (SubF dst src)); format %{ "subss $dst, $src" %} ins_cost(150); ins_encode %{ __ subss($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct subF_mem(regF dst, memory src) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (SubF dst (LoadF src))); format %{ "subss $dst, $src" %} ins_cost(150); ins_encode %{ __ subss($dst$$XMMRegister, $src$$Address); %} ins_pipe(pipe_slow); %} instruct subF_imm(regF dst, immF con) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (SubF dst con)); format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} ins_cost(150); ins_encode %{ __ subss($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ predicate(UseAVX > 0); match(Set dst (SubF src1 src2)); format %{ "vsubss $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ predicate(UseAVX > 0); match(Set dst (SubF src1 (LoadF src2))); format %{ "vsubss $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); %} ins_pipe(pipe_slow); %} instruct subF_reg_imm(regF dst, regF src, immF con) %{ predicate(UseAVX > 0); match(Set dst (SubF src con)); format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} ins_cost(150); ins_encode %{ __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct subD_reg(regD dst, regD src) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (SubD dst src)); format %{ "subsd $dst, $src" %} ins_cost(150); ins_encode %{ __ subsd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct subD_mem(regD dst, memory src) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (SubD dst (LoadD src))); format %{ "subsd $dst, $src" %} ins_cost(150); ins_encode %{ __ subsd($dst$$XMMRegister, $src$$Address); %} ins_pipe(pipe_slow); %} instruct subD_imm(regD dst, immD con) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (SubD dst con)); format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} ins_cost(150); ins_encode %{ __ subsd($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ predicate(UseAVX > 0); match(Set dst (SubD src1 src2)); format %{ "vsubsd $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ predicate(UseAVX > 0); match(Set dst (SubD src1 (LoadD src2))); format %{ "vsubsd $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); %} ins_pipe(pipe_slow); %} instruct subD_reg_imm(regD dst, regD src, immD con) %{ predicate(UseAVX > 0); match(Set dst (SubD src con)); format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} ins_cost(150); ins_encode %{ __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct mulF_reg(regF dst, regF src) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (MulF dst src)); format %{ "mulss $dst, $src" %} ins_cost(150); ins_encode %{ __ mulss($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct mulF_mem(regF dst, memory src) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (MulF dst (LoadF src))); format %{ "mulss $dst, $src" %} ins_cost(150); ins_encode %{ __ mulss($dst$$XMMRegister, $src$$Address); %} ins_pipe(pipe_slow); %} instruct mulF_imm(regF dst, immF con) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (MulF dst con)); format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} ins_cost(150); ins_encode %{ __ mulss($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ predicate(UseAVX > 0); match(Set dst (MulF src1 src2)); format %{ "vmulss $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ predicate(UseAVX > 0); match(Set dst (MulF src1 (LoadF src2))); format %{ "vmulss $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); %} ins_pipe(pipe_slow); %} instruct mulF_reg_imm(regF dst, regF src, immF con) %{ predicate(UseAVX > 0); match(Set dst (MulF src con)); format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} ins_cost(150); ins_encode %{ __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct mulD_reg(regD dst, regD src) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (MulD dst src)); format %{ "mulsd $dst, $src" %} ins_cost(150); ins_encode %{ __ mulsd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct mulD_mem(regD dst, memory src) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (MulD dst (LoadD src))); format %{ "mulsd $dst, $src" %} ins_cost(150); ins_encode %{ __ mulsd($dst$$XMMRegister, $src$$Address); %} ins_pipe(pipe_slow); %} instruct mulD_imm(regD dst, immD con) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (MulD dst con)); format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} ins_cost(150); ins_encode %{ __ mulsd($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ predicate(UseAVX > 0); match(Set dst (MulD src1 src2)); format %{ "vmulsd $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ predicate(UseAVX > 0); match(Set dst (MulD src1 (LoadD src2))); format %{ "vmulsd $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); %} ins_pipe(pipe_slow); %} instruct mulD_reg_imm(regD dst, regD src, immD con) %{ predicate(UseAVX > 0); match(Set dst (MulD src con)); format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} ins_cost(150); ins_encode %{ __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct divF_reg(regF dst, regF src) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (DivF dst src)); format %{ "divss $dst, $src" %} ins_cost(150); ins_encode %{ __ divss($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct divF_mem(regF dst, memory src) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (DivF dst (LoadF src))); format %{ "divss $dst, $src" %} ins_cost(150); ins_encode %{ __ divss($dst$$XMMRegister, $src$$Address); %} ins_pipe(pipe_slow); %} instruct divF_imm(regF dst, immF con) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (DivF dst con)); format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} ins_cost(150); ins_encode %{ __ divss($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ predicate(UseAVX > 0); match(Set dst (DivF src1 src2)); format %{ "vdivss $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ predicate(UseAVX > 0); match(Set dst (DivF src1 (LoadF src2))); format %{ "vdivss $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); %} ins_pipe(pipe_slow); %} instruct divF_reg_imm(regF dst, regF src, immF con) %{ predicate(UseAVX > 0); match(Set dst (DivF src con)); format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} ins_cost(150); ins_encode %{ __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct divD_reg(regD dst, regD src) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (DivD dst src)); format %{ "divsd $dst, $src" %} ins_cost(150); ins_encode %{ __ divsd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct divD_mem(regD dst, memory src) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (DivD dst (LoadD src))); format %{ "divsd $dst, $src" %} ins_cost(150); ins_encode %{ __ divsd($dst$$XMMRegister, $src$$Address); %} ins_pipe(pipe_slow); %} instruct divD_imm(regD dst, immD con) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (DivD dst con)); format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} ins_cost(150); ins_encode %{ __ divsd($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ predicate(UseAVX > 0); match(Set dst (DivD src1 src2)); format %{ "vdivsd $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ predicate(UseAVX > 0); match(Set dst (DivD src1 (LoadD src2))); format %{ "vdivsd $dst, $src1, $src2" %} ins_cost(150); ins_encode %{ __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); %} ins_pipe(pipe_slow); %} instruct divD_reg_imm(regD dst, regD src, immD con) %{ predicate(UseAVX > 0); match(Set dst (DivD src con)); format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} ins_cost(150); ins_encode %{ __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct absF_reg(regF dst) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (AbsF dst)); ins_cost(150); format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} ins_encode %{ __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); %} ins_pipe(pipe_slow); %} instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ predicate(UseAVX > 0); match(Set dst (AbsF src)); ins_cost(150); format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} ins_encode %{ int vector_len = 0; __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(float_signmask()), vector_len); %} ins_pipe(pipe_slow); %} instruct absD_reg(regD dst) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (AbsD dst)); ins_cost(150); format %{ "andpd $dst, [0x7fffffffffffffff]\t" "# abs double by sign masking" %} ins_encode %{ __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); %} ins_pipe(pipe_slow); %} instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ predicate(UseAVX > 0); match(Set dst (AbsD src)); ins_cost(150); format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" "# abs double by sign masking" %} ins_encode %{ int vector_len = 0; __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(double_signmask()), vector_len); %} ins_pipe(pipe_slow); %} instruct negI_rReg_2(rRegI dst, rFlagsReg cr) %{ match(Set dst (NegI dst)); effect(KILL cr); format %{ "negl $dst\t# int" %} ins_encode %{ __ negl($dst$$Register); %} ins_pipe(ialu_reg); %} instruct negL_rReg_2(rRegL dst, rFlagsReg cr) %{ match(Set dst (NegL dst)); effect(KILL cr); format %{ "negq $dst\t# int" %} ins_encode %{ __ negq($dst$$Register); %} ins_pipe(ialu_reg); %} instruct negF_reg(regF dst) %{ predicate((UseSSE>=1) && (UseAVX == 0)); match(Set dst (NegF dst)); ins_cost(150); format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} ins_encode %{ __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); %} ins_pipe(pipe_slow); %} instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ predicate(UseAVX > 0); match(Set dst (NegF src)); ins_cost(150); format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} ins_encode %{ __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(float_signflip())); %} ins_pipe(pipe_slow); %} instruct negD_reg(regD dst) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (NegD dst)); ins_cost(150); format %{ "xorpd $dst, [0x8000000000000000]\t" "# neg double by sign flipping" %} ins_encode %{ __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); %} ins_pipe(pipe_slow); %} instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ predicate(UseAVX > 0); match(Set dst (NegD src)); ins_cost(150); format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" "# neg double by sign flipping" %} ins_encode %{ __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(double_signflip())); %} ins_pipe(pipe_slow); %} instruct sqrtF_reg(regF dst, regF src) %{ predicate(UseSSE>=1); match(Set dst (SqrtF src)); format %{ "sqrtss $dst, $src" %} ins_cost(150); ins_encode %{ __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct sqrtF_mem(regF dst, memory src) %{ predicate(UseSSE>=1); match(Set dst (SqrtF (LoadF src))); format %{ "sqrtss $dst, $src" %} ins_cost(150); ins_encode %{ __ sqrtss($dst$$XMMRegister, $src$$Address); %} ins_pipe(pipe_slow); %} instruct sqrtF_imm(regF dst, immF con) %{ predicate(UseSSE>=1); match(Set dst (SqrtF con)); format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} ins_cost(150); ins_encode %{ __ sqrtss($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct sqrtD_reg(regD dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (SqrtD src)); format %{ "sqrtsd $dst, $src" %} ins_cost(150); ins_encode %{ __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} instruct sqrtD_mem(regD dst, memory src) %{ predicate(UseSSE>=2); match(Set dst (SqrtD (LoadD src))); format %{ "sqrtsd $dst, $src" %} ins_cost(150); ins_encode %{ __ sqrtsd($dst$$XMMRegister, $src$$Address); %} ins_pipe(pipe_slow); %} instruct sqrtD_imm(regD dst, immD con) %{ predicate(UseSSE>=2); match(Set dst (SqrtD con)); format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} ins_cost(150); ins_encode %{ __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} instruct onspinwait() %{ match(OnSpinWait); ins_cost(200); format %{ $$template $$emit$$"pause\t! membar_onspinwait" %} ins_encode %{ __ pause(); %} ins_pipe(pipe_slow); %} // a * b + c instruct fmaD_reg(regD a, regD b, regD c) %{ predicate(UseFMA); match(Set c (FmaD c (Binary a b))); format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} ins_cost(150); ins_encode %{ __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); %} ins_pipe( pipe_slow ); %} // a * b + c instruct fmaF_reg(regF a, regF b, regF c) %{ predicate(UseFMA); match(Set c (FmaF c (Binary a b))); format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} ins_cost(150); ins_encode %{ __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); %} ins_pipe( pipe_slow ); %} // ====================VECTOR INSTRUCTIONS===================================== instruct reinterpretS(vecS dst) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); match(Set dst (VectorReinterpret dst)); ins_cost(125); format %{ " # reinterpret $dst" %} ins_encode %{ // empty %} ins_pipe( pipe_slow ); %} instruct reinterpretS2D(vecD dst, vecS src, rRegL scratch) %{ predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP dst, TEMP scratch); format %{ " # reinterpret $dst,$src" %} ins_encode %{ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); __ pand($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct reinterpretS2D_avx(vecD dst, vecS src, rRegL scratch) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP dst, TEMP scratch); format %{ " # reinterpret $dst,$src" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct reinterpretS2X(vecX dst, vecS src, rRegL scratch) %{ predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP dst, TEMP scratch); format %{ " # reinterpret $dst,$src" %} ins_encode %{ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); __ pand($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct reinterpretS2X_avx(vecX dst, vecS src, rRegL scratch) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP scratch); format %{ " # reinterpret $dst,$src" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct reinterpretS2Y(vecY dst, vecS src, rRegL scratch) %{ predicate(UseAVX >= 2 && n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP scratch); format %{ " # reinterpret $dst,$src" %} ins_encode %{ int vector_len = 1; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct reinterpretS2Z(vecZ dst, vecS src, rRegL scratch) %{ predicate(UseAVX > 2 && n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP scratch); format %{ " # reinterpret $dst,$src" %} ins_encode %{ int vector_len = 2; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct reinterpretD2S(vecS dst, vecD src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ " # reinterpret $dst,$src" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct reinterpretD(vecD dst) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); match(Set dst (VectorReinterpret dst)); ins_cost(125); format %{ " # reinterpret $dst" %} ins_encode %{ // empty %} ins_pipe( pipe_slow ); %} instruct reinterpretD2X(vecX dst, vecD src, rRegL scratch) %{ predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP dst, TEMP scratch); format %{ " # reinterpret $dst,$src" %} ins_encode %{ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); __ pand($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct reinterpretD2X_avx(vecX dst, vecD src, rRegL scratch) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP dst, TEMP scratch); format %{ " # reinterpret $dst,$src" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct reinterpretD2Y(vecY dst, vecD src, rRegL scratch) %{ predicate(UseAVX >= 2 && n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP scratch); format %{ " # reinterpret $dst,$src" %} ins_encode %{ int vector_len = 1; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct reinterpretD2Z(vecZ dst, vecD src, rRegL scratch) %{ predicate(UseAVX > 2 && n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP scratch); format %{ " # reinterpret $dst,$src" %} ins_encode %{ int vector_len = 2; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct reinterpretX2S(vecS dst, vecX src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ " # reinterpret $dst,$src" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct reinterpretX2D(vecD dst, vecX src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ " # reinterpret $dst,$src" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct reinterpretX(vecX dst) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); match(Set dst (VectorReinterpret dst)); ins_cost(125); format %{ " # reinterpret $dst" %} ins_encode %{ // empty %} ins_pipe( pipe_slow ); %} instruct reinterpretX2Y(vecY dst, vecX src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP dst); format %{ " # reinterpret $dst,$src" %} ins_encode %{ int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ movdqu($dst$$XMMRegister, $src$$XMMRegister); // just 128-bits need moved %} ins_pipe( pipe_slow ); %} instruct reinterpretX2Z(vecZ dst, vecX src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP dst); format %{ " # reinterpret $dst,$src" %} ins_encode %{ int vector_len = 2; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ movdqu($dst$$XMMRegister, $src$$XMMRegister); // just 128-bits need moved %} ins_pipe( pipe_slow ); %} instruct reinterpretY2S(vecS dst, vecY src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ " # reinterpret $dst,$src" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct reinterpretY2D(vecD dst, vecY src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ " # reinterpret $dst,$src" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct reinterpretY2X(vecX dst, vecY src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ " # reinterpret $dst,$src" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct reinterpretY(vecY dst) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); match(Set dst (VectorReinterpret dst)); ins_cost(125); format %{ " # reinterpret $dst" %} ins_encode %{ // empty %} ins_pipe( pipe_slow ); %} instruct reinterpretY2Z(vecZ dst, vecY src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP dst); format %{ " # reinterpret $dst,$src" %} ins_encode %{ int vector_len = 2; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct reinterpretZ2S(vecS dst, vecZ src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ " # reinterpret $dst,$src" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct reinterpretZ2D(vecD dst, vecZ src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ " # reinterpret $dst,$src" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct reinterpretZ2X(vecX dst, vecZ src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ " # reinterpret $dst,$src" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct reinterpretZ2Y(vecY dst, vecZ src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ " # reinterpret $dst,$src" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct reinterpretZ(vecZ dst) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); match(Set dst (VectorReinterpret dst)); ins_cost(125); format %{ " # reinterpret $dst" %} ins_encode %{ // empty %} ins_pipe( pipe_slow ); %} // ========== // Load vectors (1 byte long) instruct loadV1(vecS dst, memory mem, rRegI tmp) %{ predicate(n->as_LoadVector()->memory_size() == 1); match(Set dst (LoadVector mem)); ins_cost(125); effect(TEMP tmp); format %{ "movzbl $tmp,$mem\n\t" "movd $dst,$tmp\t! load vector (1 byte)" %} ins_encode %{ __ movzbl($tmp$$Register, $mem$$Address); __ movdl($dst$$XMMRegister, $tmp$$Register); %} ins_pipe( pipe_slow ); %} // Load vectors (2 bytes long) instruct loadV2(vecS dst, memory mem, rRegI tmp) %{ predicate(n->as_LoadVector()->memory_size() == 2); match(Set dst (LoadVector mem)); ins_cost(125); effect(TEMP tmp); format %{ "movzwl $tmp,$mem\n\t" "movd $dst,$tmp\t! load vector (2 bytes)" %} ins_encode %{ __ movzwl($tmp$$Register, $mem$$Address); __ movdl($dst$$XMMRegister, $tmp$$Register); %} ins_pipe( pipe_slow ); %} // Load vectors (4 bytes long) instruct loadV4(vecS dst, memory mem) %{ predicate(n->as_LoadVector()->memory_size() == 4); match(Set dst (LoadVector mem)); ins_cost(125); format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} ins_encode %{ __ movdl($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow ); %} // Load vectors (4 bytes long) instruct MoveVecS2Leg(legVecS dst, vecS src) %{ match(Set dst src); format %{ "movss $dst,$src\t! load vector (4 bytes)" %} ins_encode %{ __ movflt($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} // Load vectors (4 bytes long) instruct MoveLeg2VecS(vecS dst, legVecS src) %{ match(Set dst src); format %{ "movss $dst,$src\t! load vector (4 bytes)" %} ins_encode %{ __ movflt($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} // Load vectors (8 bytes long) instruct loadV8(vecD dst, memory mem) %{ predicate(n->as_LoadVector()->memory_size() == 8); match(Set dst (LoadVector mem)); ins_cost(125); format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} ins_encode %{ __ movq($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow ); %} // Load vectors (8 bytes long) instruct MoveVecD2Leg(legVecD dst, vecD src) %{ match(Set dst src); format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} ins_encode %{ __ movdbl($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} // Load vectors (8 bytes long) instruct MoveLeg2VecD(vecD dst, legVecD src) %{ match(Set dst src); format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} ins_encode %{ __ movdbl($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} // Load vectors (16 bytes long) instruct loadV16(vecX dst, memory mem) %{ predicate(n->as_LoadVector()->memory_size() == 16); match(Set dst (LoadVector mem)); ins_cost(125); format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} ins_encode %{ __ movdqu($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow ); %} // Load vectors (16 bytes long) instruct MoveVecX2Leg(legVecX dst, vecX src) %{ match(Set dst src); format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} ins_encode %{ if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { int vector_len = 2; __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); } else { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( fpu_reg_reg ); %} // Load vectors (16 bytes long) instruct MoveLeg2VecX(vecX dst, legVecX src) %{ match(Set dst src); format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} ins_encode %{ if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { int vector_len = 2; __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); } else { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( fpu_reg_reg ); %} // Load vectors (32 bytes long) instruct loadV32(vecY dst, memory mem) %{ predicate(n->as_LoadVector()->memory_size() == 32); match(Set dst (LoadVector mem)); ins_cost(125); format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} ins_encode %{ __ vmovdqu($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow ); %} // Load vectors (32 bytes long) instruct MoveVecY2Leg(legVecY dst, vecY src) %{ match(Set dst src); format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} ins_encode %{ if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { int vector_len = 2; __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); } else { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( fpu_reg_reg ); %} // Load vectors (32 bytes long) instruct MoveLeg2VecY(vecY dst, legVecY src) %{ match(Set dst src); format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} ins_encode %{ if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { int vector_len = 2; __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); } else { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( fpu_reg_reg ); %} // Load vectors (64 bytes long) instruct loadV64_dword(vecZ dst, memory mem) %{ predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); match(Set dst (LoadVector mem)); ins_cost(125); format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} ins_encode %{ int vector_len = 2; __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Load vectors (64 bytes long) instruct loadV64_qword(vecZ dst, memory mem) %{ predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); match(Set dst (LoadVector mem)); ins_cost(125); format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} ins_encode %{ int vector_len = 2; __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{ match(Set dst src); format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} ins_encode %{ int vector_len = 2; __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{ match(Set dst src); format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} ins_encode %{ int vector_len = 2; __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Store vectors instruct storeV1(memory mem, vecS src, rRegI tmp) %{ predicate(n->as_StoreVector()->memory_size() == 1); match(Set mem (StoreVector mem src)); ins_cost(145); effect(TEMP tmp); format %{ "movd $tmp,$src\n\t" "movb $mem,$tmp\t! store vector (1 byte)" %} ins_encode %{ __ movdl($tmp$$Register, $src$$XMMRegister); __ movb($mem$$Address, $tmp$$Register); %} ins_pipe( pipe_slow ); %} instruct storeV2(memory mem, vecS src, rRegI tmp) %{ predicate(n->as_StoreVector()->memory_size() == 2); match(Set mem (StoreVector mem src)); ins_cost(145); effect(TEMP tmp); format %{ "movd $tmp,$src\n\t" "movw $mem,$tmp\t! store vector (2 bytes)" %} ins_encode %{ __ movdl($tmp$$Register, $src$$XMMRegister); __ movw($mem$$Address, $tmp$$Register); %} ins_pipe( pipe_slow ); %} instruct storeV4(memory mem, vecS src) %{ predicate(n->as_StoreVector()->memory_size() == 4); match(Set mem (StoreVector mem src)); ins_cost(145); format %{ "movd $mem,$src\t! store vector (4 bytes)" %} ins_encode %{ __ movdl($mem$$Address, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storeV8(memory mem, vecD src) %{ predicate(n->as_StoreVector()->memory_size() == 8); match(Set mem (StoreVector mem src)); ins_cost(145); format %{ "movq $mem,$src\t! store vector (8 bytes)" %} ins_encode %{ __ movq($mem$$Address, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storeV16(memory mem, vecX src) %{ predicate(n->as_StoreVector()->memory_size() == 16); match(Set mem (StoreVector mem src)); ins_cost(145); format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} ins_encode %{ __ movdqu($mem$$Address, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storeV32(memory mem, vecY src) %{ predicate(n->as_StoreVector()->memory_size() == 32); match(Set mem (StoreVector mem src)); ins_cost(145); format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} ins_encode %{ __ vmovdqu($mem$$Address, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storeV64_dword(memory mem, vecZ src) %{ predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); match(Set mem (StoreVector mem src)); ins_cost(145); format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} ins_encode %{ int vector_len = 2; __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct storeV64_qword(memory mem, vecZ src) %{ predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); match(Set mem (StoreVector mem src)); ins_cost(145); format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} ins_encode %{ int vector_len = 2; __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // ====================LEGACY REPLICATE======================================= instruct Repl4B_mem(vecS dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\t! replicate4B" %} ins_encode %{ __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl8B_mem(vecD dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\t! replicate8B" %} ins_encode %{ __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl16B(vecX dst, rRegI src) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t" "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\t! replicate16B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl16B_mem(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\t! replicate16B" %} ins_encode %{ __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl32B(vecY dst, rRegI src) %{ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t" "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t! replicate32B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl32B_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t! replicate32B" %} ins_encode %{ __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl64B(legVecZ dst, rRegI src) %{ predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t" "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl64B_mem(legVecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} ins_encode %{ __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl16B_imm(vecX dst, immI con) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\t! replicate16B($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl32B_imm(vecY dst, immI con) %{ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl64B_imm(legVecZ dst, immI con) %{ predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl4S(vecD dst, rRegI src) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS src)); format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\t! replicate4S" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl4S_mem(vecD dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS (LoadS mem))); format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} ins_encode %{ __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl8S(vecX dst, rRegI src) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS src)); format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\t! replicate8S" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8S_mem(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS (LoadS mem))); format %{ "pshuflw $dst,$mem,0x00\n\t" "punpcklqdq $dst,$dst\t! replicate8S" %} ins_encode %{ __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8S_imm(vecX dst, immI con) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\t! replicate8S($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl16S(vecY dst, rRegI src) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS src)); format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t! replicate16S" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl16S_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS (LoadS mem))); format %{ "pshuflw $dst,$mem,0x00\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t! replicate16S" %} ins_encode %{ __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl16S_imm(vecY dst, immI con) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t! replicate16S($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl32S(legVecZ dst, rRegI src) %{ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS src)); format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl32S_mem(legVecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS (LoadS mem))); format %{ "pshuflw $dst,$mem,0x00\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} ins_encode %{ __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl32S_imm(legVecZ dst, immI con) %{ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl4I(vecX dst, rRegI src) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" "pshufd $dst,$dst,0x00\t! replicate4I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl4I_mem(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI (LoadI mem))); format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl8I(vecY dst, rRegI src) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" "pshufd $dst,$dst,0x00\n\t" "vinserti128_high $dst,$dst\t! replicate8I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8I_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI (LoadI mem))); format %{ "pshufd $dst,$mem,0x00\n\t" "vinserti128_high $dst,$dst\t! replicate8I" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl16I(legVecZ dst, rRegI src) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" "pshufd $dst,$dst,0x00\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl16I_mem(legVecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI (LoadI mem))); format %{ "pshufd $dst,$mem,0x00\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl4I_imm(vecX dst, immI con) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" "punpcklqdq $dst,$dst" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8I_imm(vecY dst, immI con) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl16I_imm(legVecZ dst, immI con) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} // Long could be loaded into xmm register directly from memory. instruct Repl2L_mem(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateL (LoadL mem))); format %{ "movq $dst,$mem\n\t" "punpcklqdq $dst,$dst\t! replicate2L" %} ins_encode %{ __ movq($dst$$XMMRegister, $mem$$Address); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Replicate long (8 byte) scalar to be vector #ifdef _LP64 instruct Repl4L(vecY dst, rRegL src) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL src)); format %{ "movdq $dst,$src\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t! replicate4L" %} ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8L(legVecZ dst, rRegL src) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL src)); format %{ "movdq $dst,$src\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} #else // _LP64 instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "movdl $dst,$src.lo\n\t" "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t! replicate4L" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "movdl $dst,$src.lo\n\t" "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} #endif // _LP64 instruct Repl4L_imm(vecY dst, immL con) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t! replicate4L($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress($con)); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8L_imm(legVecZ dst, immL con) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress($con)); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl4L_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL (LoadL mem))); format %{ "movq $dst,$mem\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t! replicate4L" %} ins_encode %{ __ movq($dst$$XMMRegister, $mem$$Address); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8L_mem(legVecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateL (LoadL mem))); format %{ "movq $dst,$mem\n\t" "punpcklqdq $dst,$dst\n\t" "vinserti128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} ins_encode %{ __ movq($dst$$XMMRegister, $mem$$Address); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl2F_mem(vecD dst, memory mem) %{ predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF (LoadF mem))); format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl4F_mem(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF (LoadF mem))); format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl8F(vecY dst, vlRegF src) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF src)); format %{ "pshufd $dst,$src,0x00\n\t" "vinsertf128_high $dst,$dst\t! replicate8F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8F_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF (LoadF mem))); format %{ "pshufd $dst,$mem,0x00\n\t" "vinsertf128_high $dst,$dst\t! replicate8F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl16F(legVecZ dst, vlRegF src) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF src)); format %{ "pshufd $dst,$src,0x00\n\t" "vinsertf128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl16F_mem(legVecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF (LoadF mem))); format %{ "pshufd $dst,$mem,0x00\n\t" "vinsertf128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl2F_zero(vecD dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 2 && UseAVX < 3); match(Set dst (ReplicateF zero)); format %{ "xorps $dst,$dst\t! replicate2F zero" %} ins_encode %{ __ xorps($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4F_zero(vecX dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 4 && UseAVX < 3); match(Set dst (ReplicateF zero)); format %{ "xorps $dst,$dst\t! replicate4F zero" %} ins_encode %{ __ xorps($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl8F_zero(vecY dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 8 && UseAVX < 3); match(Set dst (ReplicateF zero)); format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} ins_encode %{ int vector_len = 1; __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl2D_mem(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD (LoadD mem))); format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); %} ins_pipe( pipe_slow ); %} instruct Repl4D(vecY dst, vlRegD src) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD src)); format %{ "pshufd $dst,$src,0x44\n\t" "vinsertf128_high $dst,$dst\t! replicate4D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl4D_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD (LoadD mem))); format %{ "pshufd $dst,$mem,0x44\n\t" "vinsertf128_high $dst,$dst\t! replicate4D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct Repl8D(legVecZ dst, vlRegD src) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD src)); format %{ "pshufd $dst,$src,0x44\n\t" "vinsertf128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} instruct Repl8D_mem(legVecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD (LoadD mem))); format %{ "pshufd $dst,$mem,0x44\n\t" "vinsertf128_high $dst,$dst\t" "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} // Replicate double (8 byte) scalar zero to be vector instruct Repl2D_zero(vecX dst, immD0 zero) %{ predicate(n->as_Vector()->length() == 2 && UseAVX < 3); match(Set dst (ReplicateD zero)); format %{ "xorpd $dst,$dst\t! replicate2D zero" %} ins_encode %{ __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4D_zero(vecY dst, immD0 zero) %{ predicate(n->as_Vector()->length() == 4 && UseAVX < 3); match(Set dst (ReplicateD zero)); format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} ins_encode %{ int vector_len = 1; __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // ====================GENERIC REPLICATE========================================== // Replicate byte scalar to be vector instruct Repl4B(vecS dst, rRegI src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t" "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\t! replicate4B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} instruct Repl8B(vecD dst, rRegI src) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateB src)); format %{ "movd $dst,$src\n\t" "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\t! replicate8B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} // Replicate byte scalar immediate to be vector by loading from const table. instruct Repl4B_imm(vecS dst, immI con) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateB con)); format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} ins_encode %{ __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); %} ins_pipe( pipe_slow ); %} instruct Repl8B_imm(vecD dst, immI con) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); %} ins_pipe( pipe_slow ); %} // Replicate byte scalar zero to be vector instruct Repl4B_zero(vecS dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateB zero)); format %{ "pxor $dst,$dst\t! replicate4B zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl8B_zero(vecD dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateB zero)); format %{ "pxor $dst,$dst\t! replicate8B zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl16B_zero(vecX dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateB zero)); format %{ "pxor $dst,$dst\t! replicate16B zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl32B_zero(vecY dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 32); match(Set dst (ReplicateB zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Replicate char/short (2 byte) scalar to be vector instruct Repl2S(vecS dst, rRegI src) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateS src)); format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\t! replicate2S" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( fpu_reg_reg ); %} // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. instruct Repl2S_imm(vecS dst, immI con) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateS con)); format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} ins_encode %{ __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4S_imm(vecD dst, immI con) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); %} ins_pipe( fpu_reg_reg ); %} // Replicate char/short (2 byte) scalar zero to be vector instruct Repl2S_zero(vecS dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateS zero)); format %{ "pxor $dst,$dst\t! replicate2S zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4S_zero(vecD dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateS zero)); format %{ "pxor $dst,$dst\t! replicate4S zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl8S_zero(vecX dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateS zero)); format %{ "pxor $dst,$dst\t! replicate8S zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl16S_zero(vecY dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateS zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Replicate integer (4 byte) scalar to be vector instruct Repl2I(vecD dst, rRegI src) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" "pshufd $dst,$dst,0x00\t! replicate2I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( fpu_reg_reg ); %} // Integer could be loaded into xmm register directly from memory. instruct Repl2I_mem(vecD dst, memory mem) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateI (LoadI mem))); format %{ "movd $dst,$mem\n\t" "pshufd $dst,$dst,0x00\t! replicate2I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $mem$$Address); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); %} ins_pipe( fpu_reg_reg ); %} // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. instruct Repl2I_imm(vecD dst, immI con) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); %} ins_pipe( fpu_reg_reg ); %} // Replicate integer (4 byte) scalar zero to be vector instruct Repl2I_zero(vecD dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateI zero)); format %{ "pxor $dst,$dst\t! replicate2I" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4I_zero(vecX dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateI zero)); format %{ "pxor $dst,$dst\t! replicate4I zero)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl8I_zero(vecY dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateI zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Replicate long (8 byte) scalar to be vector #ifdef _LP64 instruct Repl2L(vecX dst, rRegL src) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateL src)); format %{ "movdq $dst,$src\n\t" "punpcklqdq $dst,$dst\t! replicate2L" %} ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} #else // _LP64 instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "movdl $dst,$src.lo\n\t" "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" "punpcklqdq $dst,$dst\t! replicate2L"%} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} #endif // _LP64 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. instruct Repl2L_imm(vecX dst, immL con) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\t! replicate2L($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress($con)); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Replicate long (8 byte) scalar zero to be vector instruct Repl2L_zero(vecX dst, immL0 zero) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateL zero)); format %{ "pxor $dst,$dst\t! replicate2L zero" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4L_zero(vecY dst, immL0 zero) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateL zero)); format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Replicate float (4 byte) scalar to be vector instruct Repl2F(vecD dst, vlRegF src) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateF src)); format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4F(vecX dst, vlRegF src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateF src)); format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); %} ins_pipe( pipe_slow ); %} // Replicate double (8 bytes) scalar to be vector instruct Repl2D(vecX dst, vlRegD src) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateD src)); format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); %} ins_pipe( pipe_slow ); %} // ====================EVEX REPLICATE============================================= instruct Repl4B_mem_evex(vecS dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} ins_encode %{ int vector_len = 0; __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8B_mem_evex(vecD dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} ins_encode %{ int vector_len = 0; __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16B_evex(vecX dst, rRegI src) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB src)); format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} ins_encode %{ int vector_len = 0; __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16B_mem_evex(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} ins_encode %{ int vector_len = 0; __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl32B_evex(vecY dst, rRegI src) %{ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB src)); format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} ins_encode %{ int vector_len = 1; __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl32B_mem_evex(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} ins_encode %{ int vector_len = 1; __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl64B_evex(vecZ dst, rRegI src) %{ predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); match(Set dst (ReplicateB src)); format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} ins_encode %{ int vector_len = 2; __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); match(Set dst (ReplicateB (LoadB mem))); format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} ins_encode %{ int vector_len = 2; __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16B_imm_evex(vecX dst, immI con) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "vpbroadcastb $dst,$dst\t! replicate16B" %} ins_encode %{ int vector_len = 0; __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl32B_imm_evex(vecY dst, immI con) %{ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "vpbroadcastb $dst,$dst\t! replicate32B" %} ins_encode %{ int vector_len = 1; __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl64B_imm_evex(vecZ dst, immI con) %{ predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "vpbroadcastb $dst,$dst\t! upper replicate64B" %} ins_encode %{ int vector_len = 2; __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 64 && UseAVX > 2); match(Set dst (ReplicateB zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4S_evex(vecD dst, rRegI src) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS src)); format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} ins_encode %{ int vector_len = 0; __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl4S_mem_evex(vecD dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS (LoadS mem))); format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} ins_encode %{ int vector_len = 0; __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8S_evex(vecX dst, rRegI src) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS src)); format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} ins_encode %{ int vector_len = 0; __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8S_mem_evex(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS (LoadS mem))); format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} ins_encode %{ int vector_len = 0; __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16S_evex(vecY dst, rRegI src) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS src)); format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} ins_encode %{ int vector_len = 1; __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16S_mem_evex(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS (LoadS mem))); format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} ins_encode %{ int vector_len = 1; __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl32S_evex(vecZ dst, rRegI src) %{ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); match(Set dst (ReplicateS src)); format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} ins_encode %{ int vector_len = 2; __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); match(Set dst (ReplicateS (LoadS mem))); format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} ins_encode %{ int vector_len = 2; __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8S_imm_evex(vecX dst, immI con) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "vpbroadcastw $dst,$dst\t! replicate8S" %} ins_encode %{ int vector_len = 0; __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16S_imm_evex(vecY dst, immI con) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "vpbroadcastw $dst,$dst\t! replicate16S" %} ins_encode %{ int vector_len = 1; __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl32S_imm_evex(vecZ dst, immI con) %{ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "vpbroadcastw $dst,$dst\t! replicate32S" %} ins_encode %{ int vector_len = 2; __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 32 && UseAVX > 2); match(Set dst (ReplicateS zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). int vector_len = 2; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4I_evex(vecX dst, rRegI src) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateI src)); format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} ins_encode %{ int vector_len = 0; __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl4I_mem_evex(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateI (LoadI mem))); format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} ins_encode %{ int vector_len = 0; __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8I_evex(vecY dst, rRegI src) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateI src)); format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} ins_encode %{ int vector_len = 1; __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8I_mem_evex(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateI (LoadI mem))); format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} ins_encode %{ int vector_len = 1; __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16I_evex(vecZ dst, rRegI src) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateI src)); format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} ins_encode %{ int vector_len = 2; __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateI (LoadI mem))); format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} ins_encode %{ int vector_len = 2; __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl4I_imm_evex(vecX dst, immI con) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" "vpbroadcastd $dst,$dst\t! replicate4I" %} ins_encode %{ int vector_len = 0; __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8I_imm_evex(vecY dst, immI con) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" "vpbroadcastd $dst,$dst\t! replicate8I" %} ins_encode %{ int vector_len = 1; __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16I_imm_evex(vecZ dst, immI con) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" "vpbroadcastd $dst,$dst\t! replicate16I" %} ins_encode %{ int vector_len = 2; __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateI zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). int vector_len = 2; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // Replicate long (8 byte) scalar to be vector #ifdef _LP64 instruct Repl4L_evex(vecY dst, rRegL src) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateL src)); format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} ins_encode %{ int vector_len = 1; __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8L_evex(vecZ dst, rRegL src) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateL src)); format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} ins_encode %{ int vector_len = 2; __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); %} ins_pipe( pipe_slow ); %} #else // _LP64 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "movdl $dst,$src.lo\n\t" "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" "vpbroadcastq $dst,$dst\t! replicate4L" %} ins_encode %{ int vector_len = 1; __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "movdl $dst,$src.lo\n\t" "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" "vpbroadcastq $dst,$dst\t! replicate8L" %} ins_encode %{ int vector_len = 2; __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} #endif // _LP64 instruct Repl4L_imm_evex(vecY dst, immL con) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" "vpbroadcastq $dst,$dst\t! replicate4L" %} ins_encode %{ int vector_len = 1; __ movq($dst$$XMMRegister, $constantaddress($con)); __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8L_imm_evex(vecZ dst, immL con) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" "vpbroadcastq $dst,$dst\t! replicate8L" %} ins_encode %{ int vector_len = 2; __ movq($dst$$XMMRegister, $constantaddress($con)); __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl2L_mem_evex(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateL (LoadL mem))); format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} ins_encode %{ int vector_len = 0; __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl4L_mem_evex(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateL (LoadL mem))); format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} ins_encode %{ int vector_len = 1; __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateL (LoadL mem))); format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} ins_encode %{ int vector_len = 2; __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateL zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} ins_encode %{ // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl8F_evex(vecY dst, regF src) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateF src)); format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} ins_encode %{ int vector_len = 1; __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8F_mem_evex(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateF (LoadF mem))); format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} ins_encode %{ int vector_len = 1; __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16F_evex(vecZ dst, regF src) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateF src)); format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} ins_encode %{ int vector_len = 2; __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateF (LoadF mem))); format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} ins_encode %{ int vector_len = 2; __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 2 && UseAVX > 2); match(Set dst (ReplicateF zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} ins_encode %{ // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2); match(Set dst (ReplicateF zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} ins_encode %{ // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateF zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} ins_encode %{ // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateF zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} ins_encode %{ // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4D_evex(vecY dst, regD src) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateD src)); format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} ins_encode %{ int vector_len = 1; __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl4D_mem_evex(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); match(Set dst (ReplicateD (LoadD mem))); format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} ins_encode %{ int vector_len = 1; __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8D_evex(vecZ dst, regD src) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateD src)); format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} ins_encode %{ int vector_len = 2; __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateD (LoadD mem))); format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} ins_encode %{ int vector_len = 2; __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ predicate(n->as_Vector()->length() == 2 && UseAVX > 2); match(Set dst (ReplicateD zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} ins_encode %{ // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ predicate(n->as_Vector()->length() == 4 && UseAVX > 2); match(Set dst (ReplicateD zero)); format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} ins_encode %{ // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateD zero)); format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} ins_encode %{ // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation int vector_len = 2; __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( fpu_reg_reg ); %} // ====================VECTOR INSERT======================================= instruct rvinsert8B(vecD dst, vecD src, rRegI val, immU3 idx) %{ predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "pinsrb $dst,$val\t! Insert 8B" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ pinsrb($dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert16B(vecX dst, vecX src, rRegI val, immU4 idx) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "pinsrb $dst,$val\t! Insert 16B" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ pinsrb($dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert16B_avx(vecX dst, vecX src, rRegI val, immU4 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "vmovdqu $dst,$src\n\t" "vpinsrb $dst,$dst,$val\t! Insert 16B" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vpinsrb($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert32B(vecY dst, vecY src, vecY tmp, rRegI val, immU5 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp); format %{"vmovdqu $dst,$src\n\t" "vextracti128 $tmp,$src\n\t" "vpinsrb $tmp,$tmp,$val\n\t" "vinserti128 $dst,$tmp\t! Insert 32B" %} ins_encode %{ uint x_idx = $idx$$constant & right_n_bits(4); uint y_idx = ($idx$$constant >> 4) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); __ vpinsrb($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert64B(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegI val, immU6 idx) %{ predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp, TEMP tmp1); format %{ "evmovdquq $dst,$src\n\t" "vextracti64x4 $tmp,$src\n\t" "vextracti128 $tmp1,$tmp\n\t" "vpinsrb $tmp1,$tmp1,$val\n\t" "vinserti128 $tmp,$tmp,$tmp1\n\t" "vinserti64x4 $dst,$dst,$tmp\t! Insert 64B" %} ins_encode %{ uint x_idx = $idx$$constant & right_n_bits(4); uint y_idx = ($idx$$constant >> 4) & 1; uint z_idx = ($idx$$constant >> 5) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); } __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); __ vpinsrb($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert4S(vecD dst, vecD src, rRegI val, immU2 idx) %{ predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "pinsrw $dst,$val\t! Insert 4S" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ pinsrw($dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert8S(vecX dst, vecX src, rRegI val, immU3 idx) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "pinsrw $dst,$val\t! Insert 8S" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ pinsrw($dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert8S_avx(vecX dst, vecX src, rRegI val, immU3 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "vmovdqu $dst,$src\n\t" "vpinsrw $dst,$dst,$val\t! Insert 8S" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vpinsrw($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert16S(vecY dst, vecY src, vecX tmp, rRegI val, immU4 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp); format %{ "vmovdqu $dst,$src\n\t" "vextracti128 $tmp,$src\n\t" "vpinsrw $tmp,$tmp,$val\n\t" "vinserti128 $dst,$dst,$tmp\t! Insert 16S" %} ins_encode %{ uint x_idx = $idx$$constant & right_n_bits(3); uint y_idx = ($idx$$constant >> 3) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); __ vpinsrw($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert32S(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegI val, immU5 idx) %{ predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp, TEMP tmp1); format %{ "evmovdquq $dst,$src\n\t" "vextracti64x4 $tmp,$src\n\t" "vextracti128 $tmp1,$tmp\n\t" "vpinsrw $tmp1,$tmp1,$val\n\t" "vinserti128 $tmp,$tmp,$tmp1\n\t" "vinserti64x4 $dst,$dst,$tmp\t! Insert 32S" %} ins_encode %{ uint x_idx = $idx$$constant & right_n_bits(3); uint y_idx = ($idx$$constant >> 3) & 1; uint z_idx = ($idx$$constant >> 4) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); } __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); __ vpinsrw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert2I(vecD dst, vecD src, rRegI val, immU1 idx) %{ predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "pinsrd $dst,$val\t! Insert 2I" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ pinsrd($dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert4I(vecX dst, vecX src, rRegI val, immU2 idx) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "pinsrd $dst,$val\t! Insert 4I" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ pinsrd($dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert4I_avx(vecX dst, vecX src, rRegI val, immU2 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "vmovdqu $dst,$src\n\t" "vpinsrd $dst,$val\t! Insert 4I" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vpinsrd($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert8I(vecY dst, vecY src, vecY tmp, rRegI val, immU3 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp); format %{ "vmovdqu $dst,$src\n\t" "vextracti128 $tmp,$src\n\t" "vpinsrd $tmp,$tmp,$val\n\t" "vinserti128 $dst,$dst,$tmp\t! Insert 8I" %} ins_encode %{ uint x_idx = $idx$$constant & right_n_bits(2); uint y_idx = ($idx$$constant >> 2) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); __ vpinsrd($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert16I(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegI val, immU4 idx) %{ predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp, TEMP tmp1); format %{ "evmovdquq $dst,$src\n\t" "vextracti64x4 $tmp,$src\n\t" "vextracti128 $tmp,$tmp\n\t" "vpinsrd $tmp,$tmp,$val\n\t" "vinserti128 $tmp,$tmp,$tmp\n\t" "vinserti64x4 $dst,$dst,$tmp\t! Insert 16I" %} ins_encode %{ uint x_idx = $idx$$constant & right_n_bits(2); uint y_idx = ($idx$$constant >> 2) & 1; uint z_idx = ($idx$$constant >> 3) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); } __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); __ vpinsrd($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert1L(vecD dst, vecD src, rRegL val, immI0 idx) %{ predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "pinsrq $dst,$val\t! Insert 1L" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ pinsrq($dst$$XMMRegister, $val$$Register, 0); %} ins_pipe( pipe_slow ); %} instruct rvinsert2L(vecX dst, vecX src, rRegL val, immU1 idx) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "pinsrq $dst,$dst\t! Insert 2L" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert2L_avx(vecX dst, vecX src, rRegL val, immU1 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "vmovdqu $dst,$src\n\t" "vpinsrq $dst,$dst,$val\t! Insert 2L" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vpinsrq($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert4L(vecY dst, vecY src, vecY tmp, rRegL val, immU2 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp); format %{ "vmovdqu $dst,$src\n\t" "vextracti128 $tmp,$src\n\t" "vpinsrq $tmp,$tmp,$val\n\t" "vinserti128 $dst,$dst,$tmp\t! Insert 4L" %} ins_encode %{ uint x_idx = $idx$$constant & 1; uint y_idx = ($idx$$constant >> 1) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); __ vpinsrq($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert8L(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegL val, immU3 idx) %{ predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp, TEMP tmp1); format %{ "evmovdquq $dst,$src\n\t" "vextracti64x4 $tmp,$src\n\t" "vextracti128 $tmp,$tmp\n\t" "vpinsrq $tmp,$tmp,$val\n\t" "vinserti128 $tmp,$tmp,$tmp\n\t" "vinserti64x4 $dst,$dst,$tmp\t! Insert 8L" %} ins_encode %{ uint x_idx = $idx$$constant & 1; uint y_idx = ($idx$$constant >> 1) & 1; uint z_idx = ($idx$$constant >> 2) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); } __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); __ vpinsrq($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert2F(vecD dst, vecD src, regF val, immU1 idx) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "insertps $dst,$dst,$val\t! Insert 2F" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert2F_avx(vecD dst, vecD src, regF val, immU1 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "insertps $dst,$dst,$val\t! Insert 2F" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vinsertps($dst$$XMMRegister, $dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert4F(vecX dst, vecX src, regF val, immU2 idx) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "movdqu $dst,$src\n\t" "insertps $dst,$dst,$val\t! Insert 4F" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert4F_avx(vecX dst, vecX src, regF val, immU2 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst); format %{ "vmovdqu $dst,$src\n\t" "vinsertps $dst,$dst,$val\t! Insert 4F" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vinsertps($dst$$XMMRegister, $dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert8F(vecY dst, vecY src, vecY tmp, regF val, immU3 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp); format %{ "vmovdqu $dst,$src\n\t" "vextractf128 $tmp,$src\n\t" "vinsertps $tmp,$tmp,$val\n\t" "vinsertf128 $dst,$dst,$tmp\t! Insert 8F" %} ins_encode %{ uint x_idx = $idx$$constant & right_n_bits(2); uint y_idx = ($idx$$constant >> 2) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); __ vinsertps($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$XMMRegister, x_idx); __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert16F(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, regF val, immU4 idx) %{ predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp, TEMP tmp1); format %{ "evmovdquq $dst,$src\n\t" "vextractf128 $tmp,$src\n\t" "vinsertps $tmp,$tmp,$val\n\t" "movsbl $dst,$dst\t! Insert 4I" %} ins_encode %{ uint x_idx = $idx$$constant & right_n_bits(2); uint y_idx = ($idx$$constant >> 2) & 1; uint z_idx = ($idx$$constant >> 3) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); } __ vextractf64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); __ vextractf128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); __ vinsertps($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$XMMRegister, x_idx); __ vinsertf128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); __ vinsertf64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert1D(vecD dst, vecD src, regD val, rRegL tmp, immI0 idx) %{ predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp); format %{ "movdqu $dst,$src\n\t" "movq $tmp,$val\n\t" "pinsrq $dst,$tmp\t! Insert 1D" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ movq($tmp$$Register, $val$$XMMRegister); __ pinsrq($dst$$XMMRegister, $tmp$$Register, 0); %} ins_pipe( pipe_slow ); %} instruct rvinsert2D(vecX dst, vecX src, regD val, rRegL tmp, immU1 idx) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp); format %{ "movdqu $dst,$src\n\t" "movq $dst,$src\n\t" "pinsrq $dst,$dst\t! Insert 2D" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } __ movq($tmp$$Register, $val$$XMMRegister); __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert2D_avx(vecX dst, vecX src, regD val, rRegL tmp, immU1 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp); format %{ "vmovdqu $dst,$src\n\t" "movq $tmp,$val\n\t" "vpinsrq $dst,$dst,$tmp\t! Insert 2D" %} ins_encode %{ if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ movq($tmp$$Register, $val$$XMMRegister); __ vpinsrq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$Register, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct rvinsert4D(vecY dst, vecY src, vecY tmp, regD val, rRegL tmp1, immU2 idx) %{ predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp, TEMP tmp1); format %{ "vmovdqu $dst,$src\n\t" "vextracti128 $tmp,$src\n\t" "movq $tmp1,$val\n\t" "vpinsrq $tmp,$tmp,$tmp1\n\t" "vinserti128 $dst,$dst,$tmp\t! Insert 4D" %} ins_encode %{ uint x_idx = $idx$$constant & 1; uint y_idx = ($idx$$constant >> 1) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); __ movq($tmp1$$Register, $val$$XMMRegister); __ vpinsrq($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$Register, x_idx); __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); %} ins_pipe( pipe_slow ); %} instruct rvinsert8D(vecZ dst, vecZ src, vecZ tmp, vecY tmp2, regD val, rRegL tmp1, immU3 idx) %{ predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2); format %{ "evmovdquq $dst,$src\n\t" "vextractf64x4 $tmp,$src\n\t" "vextractf128 $tmp,$tmp\n\t" "movq $tmp1,$val\n\t" "vpinsrq $tmp,$tmp,$val\n\t" "vinsertf128 $tmp,$tmp,$tmp\n\t" "vinsertf64x4 $dst,$dst,$tmp\t! Insert 8D" %} ins_encode %{ uint x_idx = $idx$$constant & 1; uint y_idx = ($idx$$constant >> 1) & 1; uint z_idx = ($idx$$constant >> 2) & 1; if ($dst$$XMMRegister != $src$$XMMRegister) { __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); } __ vextractf64x4($tmp2$$XMMRegister, $src$$XMMRegister, z_idx); __ vextractf128($tmp$$XMMRegister, $tmp2$$XMMRegister, y_idx); __ movq($tmp1$$Register, $val$$XMMRegister); __ vpinsrq($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$Register, x_idx); __ vinsertf128($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, y_idx); __ vinsertf64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, z_idx); %} ins_pipe( pipe_slow ); %} // ====================REDUCTION ARITHMETIC======================================= instruct rsadd8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "pshufd $tmp,$src2,0x1\n\t" "paddb $tmp,$src2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp2,$tmp, 0x0\n\t" "addl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "addl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x2\n\t" "addl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x3\n\t" "addl $dst,$tmp2\n\t" "movsbl $dst,$dst\t! add reduction8B" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ paddb($tmp$$XMMRegister, $src2$$XMMRegister); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ addl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ addl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); __ addl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); __ addl($dst$$Register, $tmp2$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsadd16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "pshufd $tmp,$src2,0xE\n\t" "paddb $tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "paddb $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp3,$tmp, 0x0\n\t" "addl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x1\n\t" "addl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x2\n\t" "addl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x3\n\t" "addl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! add reduction16B" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ paddb($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ paddb($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ addl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ addl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); __ addl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); __ addl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvadd32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti128_high $tmp,$src2\n\t" "vpaddb $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddb $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpaddb $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp3,$tmp, 0x0\n\t" "addl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x1\n\t" "addl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x2\n\t" "addl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x3\n\t" "addl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! add reduction32B" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ addl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ addl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); __ addl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); __ addl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvadd64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpaddb $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpaddb $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddb $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpaddb $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "movdl $tmp3,$tmp\n\t" "addl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "addl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "addl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "addl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! add reduction64B" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpaddb($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzbl($dst$$Register, $src1$$Register); __ movdl($tmp3$$Register, $tmp$$XMMRegister); __ addl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ addl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ addl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ addl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsadd4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "movdqu $tmp,$src2\n\t" "phaddw $tmp,$tmp\n\t" "phaddw $tmp,$tmp\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp2,$tmp, 0x0\n\t" "addw $dst,$tmp2\n\t" "movswl $dst,$dst\t! add reduction4S" %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ addw($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvadd4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "vphaddw $tmp,$src2,$src2\n\t" "vphaddw $tmp,$tmp,$tmp\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp2,$tmp, 0x0\n\t" "addw $dst,$tmp2\n\t" "movswl $dst,$dst\t! add reduction4S" %} ins_encode %{ int vector_len = 0; __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ addw($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsadd8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "movdqu $tmp,$src2\n\t" "phaddw $tmp,$tmp\n\t" "phaddw $tmp,$tmp\n\t" "phaddw $tmp,$tmp\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp2,$tmp, 0x0\n\t" "addw $dst,$tmp2\n\t" "movswl $dst,$dst\t! add reduction8S" %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ addw($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvadd8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "vphaddw $tmp,$src2,$src2\n\t" "vphaddw $tmp,$tmp,$tmp\n\t" "vphaddw $tmp,$tmp,$tmp\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp2,$tmp, 0x0\n\t" "addw $dst,$tmp2\n\t" "movswl $dst,$dst\t! add reduction8S" %} ins_encode %{ int vector_len = 0; __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ addw($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvadd16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "vphaddw $tmp,$src2,$src2\n\t" "vphaddw $tmp,$tmp,$tmp\n\t" "vphaddw $tmp,$tmp,$tmp\n\t" "vphaddw $tmp,$tmp,$tmp\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp2,$tmp, 0x0\n\t" "addw $dst,$tmp2\n\t" "movswl $dst,$dst\t! add reduction16S" %} ins_encode %{ int vector_len = 1; __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpermq($tmp$$XMMRegister, $tmp$$XMMRegister, 0xD8, vector_len); __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ addw($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvadd32S_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpaddw $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpaddw $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddw $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpaddw $tmp,$tmp,$tmp2\n\t" "movdl $tmp3,$tmp\n\t" "addw $dst,$tmp3\n\t" "shrl $tmp3,0x16\n\t" "addw $dst,$tmp3\n\t" "movswl $dst,$dst\t! add reduction32S" %} ins_encode %{ __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpaddw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp3$$Register, $tmp$$XMMRegister); __ movzwl($dst$$Register, $src1$$Register); __ addw($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 16); __ addw($dst$$Register, $tmp3$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp2, TEMP tmp); format %{ "movdqu $tmp2,$src2\n\t" "phaddd $tmp2,$tmp2\n\t" "movd $tmp,$src1\n\t" "paddd $tmp,$tmp2\n\t" "movd $dst,$tmp\t! add reduction2I" %} ins_encode %{ __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdl($dst$$Register, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction2I" %} ins_encode %{ int vector_len = 0; __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0x1\n\t" "vpaddd $tmp,$src2,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! add reduction2I" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "movdqu $tmp,$src2\n\t" "phaddd $tmp,$tmp\n\t" "phaddd $tmp,$tmp\n\t" "movd $tmp2,$src1\n\t" "paddd $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction4I" %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction4I" %} ins_encode %{ int vector_len = 0; __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "vpaddd $tmp,$src2,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! add reduction4I" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" "vextracti128_high $tmp2,$tmp\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" "movd $dst,$tmp2\t! add reduction8I" %} ins_encode %{ int vector_len = 1; __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpaddd $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! add reduction8I" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpaddd $tmp3,$tmp3,$src2\n\t" "vextracti128_high $tmp,$tmp3\n\t" "vpaddd $tmp,$tmp,$tmp3\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} #ifdef _LP64 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "paddq $tmp2,$src2\n\t" "movdq $tmp,$src1\n\t" "paddq $tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction2L" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ paddq($tmp2$$XMMRegister, $src2$$XMMRegister); __ movdq($tmp$$XMMRegister, $src1$$Register); __ paddq($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ predicate(UseAVX > 1); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpaddq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction4L" %} ins_encode %{ __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpaddq $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction8L" %} ins_encode %{ __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} #endif instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (AddReductionVF dst src2)); effect(TEMP dst, TEMP tmp); format %{ "addss $dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "addss $dst,$tmp\t! add reduction2F" %} ins_encode %{ __ addss($dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ addss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ predicate(UseAVX > 0); match(Set dst (AddReductionVF dst src2)); effect(TEMP dst, TEMP tmp); format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vaddss $dst,$dst,$tmp\t! add reduction2F" %} ins_encode %{ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (AddReductionVF dst src2)); effect(TEMP dst, TEMP tmp); format %{ "addss $dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "addss $dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "addss $dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "addss $dst,$tmp\t! add reduction4F" %} ins_encode %{ __ addss($dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ addss($dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ addss($dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ addss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ predicate(UseAVX > 0); match(Set dst (AddReductionVF dst src2)); effect(TEMP tmp, TEMP dst); format %{ "vaddss $dst,dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\t! add reduction4F" %} ins_encode %{ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ predicate(UseAVX > 0); match(Set dst (AddReductionVF dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" "vextractf128_high $tmp2,$src2\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\t! add reduction8F" %} ins_encode %{ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVF dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vaddss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x2\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x3\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\t! add reduction16F" %} ins_encode %{ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (AddReductionVD dst src2)); effect(TEMP tmp, TEMP dst); format %{ "addsd $dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "addsd $dst,$tmp\t! add reduction2D" %} ins_encode %{ __ addsd($dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ predicate(UseAVX > 0); match(Set dst (AddReductionVD dst src2)); effect(TEMP tmp, TEMP dst); format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} ins_encode %{ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{ predicate(UseAVX > 0); match(Set dst (AddReductionVD dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" "vextractf128 $tmp2,$src2,0x1\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} ins_encode %{ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ predicate(UseAVX > 2); match(Set dst (AddReductionVD dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x2\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x3\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} ins_encode %{ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rssub2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (SubReductionVFP dst src2)); effect(TEMP dst, TEMP tmp); format %{ "subss $dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "subss $dst,$dst,$tmp\t! sub reduction2F" %} ins_encode %{ __ subss($dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ subss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvsub2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (SubReductionVFP dst src2)); effect(TEMP dst, TEMP tmp); format %{ "vsubss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vsubss $dst,$dst,$tmp\t! sub reduction2F" %} ins_encode %{ __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rssub4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (SubReductionVFP dst src2)); effect(TEMP dst, TEMP tmp); format %{ "subss $dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "subss $dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "subss $dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "subss $dst,$tmp\t! sub reduction4F" %} ins_encode %{ __ subss($dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ subss($dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ subss($dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ subss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvsub4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (SubReductionVFP dst src2)); effect(TEMP tmp, TEMP dst); format %{ "vsubss $dst,dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vsubss $dst,$dst,$tmp\t! sub reduction4F" %} ins_encode %{ __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsub8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (SubReductionVFP dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vsubss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vsubss $dst,$dst,$tmp\n\t" "vextractf128_high $tmp2,$src2\n\t" "vsubss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vsubss $dst,$dst,$tmp\t! sub reduction8F" %} ins_encode %{ __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsub16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (SubReductionVFP dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vsubss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vsubss $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t" "vsubss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vsubss $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x2\n\t" "vsubss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vsubss $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x3\n\t" "vsubss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vsubss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vsubss $dst,$dst,$tmp\t! sub reduction16F" %} ins_encode %{ __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rssub2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (SubReductionVFP dst src2)); effect(TEMP tmp, TEMP dst); format %{ "subsd $dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "subsd $dst,$tmp\t! sub reduction2D" %} ins_encode %{ __ subsd($dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ subsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvsub2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (SubReductionVFP dst src2)); effect(TEMP tmp, TEMP dst); format %{ "vsubsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vsubsd $dst,$dst,$tmp\t! sub reduction2D" %} ins_encode %{ __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvsub4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (SubReductionVFP dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vsubsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vsubsd $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t" "vsubsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vsubsd $dst,$dst,$tmp\t! sub reduction4D" %} ins_encode %{ __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvsub8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (SubReductionVFP dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vsubsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vsubsd $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t" "vsubsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vsubsd $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x2\n\t" "vsubsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vsubsd $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x3\n\t" "vsubsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vsubsd $dst,$dst,$tmp\t! sub reduction8D" %} ins_encode %{ __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmul8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MulReductionVI src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "pmovsxbw $tmp,$src2\n\t" "pshufd $tmp1,$tmp,0xE\n\t" "pmullw $tmp,$tmp1\n\t" "pshufd $tmp1,$tmp,0x1\n\t" "pmullw $tmp,$tmp1\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "imul $tmp2,$tmp3 \n\t" "movsbl $dst,$src1\n\t" "imull $dst,$tmp2\n\t" "movsbl $dst,$dst\t! mul reduction8B" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ imull($tmp2$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $src1$$Register); __ imull($dst$$Register, $tmp2$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmul16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MulReductionVI src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "pmovsxbw $tmp,$src2\n\t" "pshufd $tmp1,$src2,0xEE\n\t" "pmovsxbw $tmp1,$tmp1\n\t" "pmullw $tmp,$tmp1\n\t" "pshufd $tmp1,$tmp,0xE\n\t" "pmullw $tmp,$tmp1\n\t" "pshufd $tmp1,$tmp,0x1\n\t" "pmullw $tmp,$tmp1\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "imull $tmp2,$tmp3 \n\t" "movsbl $dst,$src1\n\t" "imull $dst,$tmp2\n\t" "movsbl $dst,$dst\t! mul reduction16B" %} ins_encode %{ int vector_len = 0; __ pmovsxbw($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp1$$XMMRegister, $src2$$XMMRegister, 0xEE); __ pmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister); __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ imull($tmp2$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $src1$$Register); __ imull($dst$$Register, $tmp2$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmul32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MulReductionVI src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "vextracti128_high $tmp,$src2\n\t" "pmovsxbw $tmp,$tmp\n\t" "pmovsxbw $tmp1,$src2\n\t" "vpmullw $tmp,$tmp,$tmp1\n\t" "vextracti128_high $tmp1,$tmp\n\t" "vpmullw $tmp,$tmp,$tmp1\n\t" "pshufd $tmp1,$tmp,0xE\n\t" "vpmullw $tmp,$tmp,$tmp1\n\t" "pshufd $tmp1,$tmp,0x1\n\t" "vpmullw $tmp,$tmp,$tmp1\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "imull $tmp2,$tmp3 \n\t" "movsbl $dst,$src1\n\t" "imull $dst,$tmp2\n\t" "movsbl $dst,$dst\t! mul reduction32B" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmovsxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpmovsxbw($tmp1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ imull($tmp2$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $src1$$Register); __ imull($dst$$Register, $tmp2$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmul64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MulReductionVI src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp,$src2\n\t" "vpmovsxbw $tmp,$tmp\n\t" "vpmovsxbw $tmp1,$src2\n\t" "vpmullw $tmp,$tmp,$tmp1\n\t" "vextracti64x4_high $tmp1,$tmp\n\t" "vpmullw $tmp,$tmp,$tmp1\n\t" "vextracti128_high $tmp1,$tmp\n\t" "vpmullw $tmp,$tmp,$tmp1\n\t" "pshufd $tmp1,$tmp,0xE\n\t" "vpmullw $tmp,$tmp,$tmp1\n\t" "pshufd $tmp1,$tmp,0x1\n\t" "vpmullw $tmp,$tmp,$tmp1\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "imull $tmp2,$tmp3 \n\t" "movsbl $dst,$src1\n\t" "imull $dst,$tmp2\n\t" "movsbl $dst,$dst\t! mul reduction64B" %} ins_encode %{ int vector_len = 2; __ vextracti64x4_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmovsxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpmovsxbw($tmp1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); __ vextracti64x4_high($tmp1$$XMMRegister, $tmp$$XMMRegister); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 1); __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ imull($tmp2$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $src1$$Register); __ imull($dst$$Register, $tmp2$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmul4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MulReductionVI src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "pshufd $tmp,$src2,0x1\n\t" "pmullw $tmp,$src2\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "imull $tmp2,$tmp3 \n\t" "movswl $dst,$src1\n\t" "imull $dst,$tmp2\n\t" "movswl $dst,$dst\t! mul reduction4S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ pmullw($tmp$$XMMRegister, $src2$$XMMRegister); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ imull($tmp2$$Register, $tmp3$$Register); __ movswl($dst$$Register, $src1$$Register); __ imull($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmul8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MulReductionVI src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "pshufd $tmp,$src2,0xE\n\t" "pmullw $tmp,$src2\n\t" "pshufd $tmp1,$tmp,0x1\n\t" "pmullw $tmp,$tmp1\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "imul $tmp2,$tmp3 \n\t" "movswl $dst,$src1\n\t" "imull $dst,$tmp2\n\t" "movswl $dst,$dst\t! mul reduction8S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pmullw($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ imull($tmp2$$Register, $tmp3$$Register); __ movswl($dst$$Register, $src1$$Register); __ imull($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmul16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MulReductionVI src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "vextracti128_high $tmp,$src2\n\t" "vpmullw $tmp,$tmp,$src2\n\t" "pshufd $tmp1,$tmp,0xE\n\t" "pmullw $tmp,$tmp1\n\t" "pshufd $tmp1,$tmp,0x1\n\t" "pmullw $tmp,$tmp1\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "imul $tmp2,$tmp3 \n\t" "movswl $dst,$src1\n\t" "imull $dst,$tmp2\n\t" "movswl $dst,$dst\t! mul reduction16S" %} ins_encode %{ __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 1); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ imull($tmp2$$Register, $tmp3$$Register); __ movswl($dst$$Register, $src1$$Register); __ imull($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmul32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MulReductionVI src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp1,$src2\n\t" "vpmullw $tmp1,$tmp1,$src2\n\t" "vextracti128_high $tmp,$tmp1\n\t" "vpmullw $tmp,$tmp,$tmp1\n\t" "pshufd $tmp1,$tmp,0xE\n\t" "pmullw $tmp,$tmp1\n\t" "pshufd $tmp1,$tmp,0x1\n\t" "pmullw $tmp,$tmp1\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "imul $tmp2,$tmp3 \n\t" "movswl $dst,$src1\n\t" "imull $dst,$tmp2\n\t" "movswl $dst,$dst\t! mul reduction32S" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp1$$XMMRegister, $src2$$XMMRegister); __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp1$$XMMRegister); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ imull($tmp2$$Register, $tmp3$$Register); __ movswl($dst$$Register, $src1$$Register); __ imull($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0x1\n\t" "pmulld $tmp2,$src2\n\t" "movd $tmp,$src1\n\t" "pmulld $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! mul reduction2I" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0x1\n\t" "vpmulld $tmp,$src2,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction2I" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ predicate(UseSSE > 3 && UseAVX == 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "pmulld $tmp2,$src2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "pmulld $tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "pmulld $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! mul reduction4I" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "vpmulld $tmp,$src2,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction4I" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction8I" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpmulld $tmp3,$tmp3,$src2\n\t" "vextracti128_high $tmp,$tmp3\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} #ifdef _LP64 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "vpmullq $tmp,$src2,$tmp2\n\t" "movdq $tmp2,$src1\n\t" "vpmullq $tmp2,$tmp,$tmp2\n\t" "movdq $dst,$tmp2\t! mul reduction2L" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdq($tmp2$$XMMRegister, $src1$$Register); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpmullq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction4L" %} ins_encode %{ __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpmullq $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction8L" %} ins_encode %{ __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} #endif instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (MulReductionVF dst src2)); effect(TEMP dst, TEMP tmp); format %{ "mulss $dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "mulss $dst,$tmp\t! mul reduction2F" %} ins_encode %{ __ mulss($dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ predicate(UseAVX > 0); match(Set dst (MulReductionVF dst src2)); effect(TEMP tmp, TEMP dst); format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} ins_encode %{ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (MulReductionVF dst src2)); effect(TEMP dst, TEMP tmp); format %{ "mulss $dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "mulss $dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "mulss $dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "mulss $dst,$tmp\t! mul reduction4F" %} ins_encode %{ __ mulss($dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ predicate(UseAVX > 0); match(Set dst (MulReductionVF dst src2)); effect(TEMP tmp, TEMP dst); format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} ins_encode %{ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ predicate(UseAVX > 0); match(Set dst (MulReductionVF dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" "vextractf128_high $tmp2,$src2\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} ins_encode %{ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ predicate(UseAVX > 2); match(Set dst (MulReductionVF dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vmulss $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x2\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x3\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x02\n\t" "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} ins_encode %{ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ predicate(UseSSE >= 1 && UseAVX == 0); match(Set dst (MulReductionVD dst src2)); effect(TEMP dst, TEMP tmp); format %{ "mulsd $dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "mulsd $dst,$tmp\t! mul reduction2D" %} ins_encode %{ __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ predicate(UseAVX > 0); match(Set dst (MulReductionVD dst src2)); effect(TEMP tmp, TEMP dst); format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} ins_encode %{ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{ predicate(UseAVX > 0); match(Set dst (MulReductionVD dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" "vextractf128_high $tmp2,$src2\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} ins_encode %{ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ predicate(UseAVX > 2); match(Set dst (MulReductionVD dst src2)); effect(TEMP tmp, TEMP dst, TEMP tmp2); format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x1\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x2\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" "vextractf32x4 $tmp2,$src2,0x3\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} ins_encode %{ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} //--------------------Min Reduction -------------------- instruct rsmin8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "pshufd $tmp,$src2,0x1\n\t" "pminsb $tmp,$src2\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp,0x0\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1, 0x0\n\t" "movl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $tmp3,$dst\n\t" "cmovl $dst,$tmp3\t! min reduction4S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); __ pminsb($tmp$$XMMRegister, $src2$$XMMRegister); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($tmp3$$Register, $dst$$Register); __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmin16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "pshufd $tmp4,$src2,0xE\n\t" "pminsb $tmp4,$src2\n\t" "pshufd $tmp,$tmp4,0x1\n\t" "pminsb $tmp,$tmp4\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp,0x0\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1, 0x0\n\t" "movl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $tmp3,$dst\n\t" "cmovl $dst,$tmp3\t! min reduction4S" %} ins_encode %{ __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); __ pminsb($tmp4$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); __ pminsb($tmp$$XMMRegister, $tmp4$$XMMRegister); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($tmp3$$Register, $dst$$Register); __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmin16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "pshufd $tmp4,$src2,0xE\n\t" "vpminsb $tmp,$tmp4,$src2\n\t" "pshufd $tmp,$tmp4,0x1\n\t" "vpminsb $tmp,$tmp,$tmp4\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp,0x0\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1, 0x0\n\t" "movl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $tmp3,$dst\n\t" "cmovl $dst,$tmp3\t! min reduction4S" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($tmp3$$Register, $dst$$Register); __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmin32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "vextracti128_high $tmp,$src2\n\t" "vpminsb $tmp,$tmp,$src2\n\t" "pshufd $tmp4,$tmp,0xE\n\t" "vpminsb $tmp4,$tmp4,$tmp\n\t" "pshufd $tmp,$tmp4,0x1\n\t" "vpminsb $tmp,$tmp,$tmp4\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp,0x0\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1, 0x0\n\t" "movl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $tmp3,$dst\n\t" "cmovl $dst,$tmp3\t! min reduction4S" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($tmp3$$Register, $dst$$Register); __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmin64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "vextracti64x4_high $tmp4,$src2\n\t" "vpminsb $tmp4,$tmp4,$src2\n\t" "vextracti128_high $tmp,$tmp4\n\t" "vpminsb $tmp,$tmp,$tmp4\n\t" "pshufd $tmp4,$tmp,0xE\n\t" "vpminsb $tmp4,$tmp4,$tmp\n\t" "pshufd $tmp,$tmp4,0x1\n\t" "vpminsb $tmp,$tmp,$tmp4\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp,0x0\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1, 0x0\n\t" "movl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $tmp3,$dst\n\t" "cmovl $dst,$tmp3\t! min reduction4S" %} ins_encode %{ __ vextracti64x4_high($tmp4$$XMMRegister, $src2$$XMMRegister); __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 2); __ vextracti128_high($tmp$$XMMRegister, $tmp4$$XMMRegister); __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 1); __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($tmp3$$Register, $dst$$Register); __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmin4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "pshufd $tmp,$src2,0x1\n\t" "pminsw $tmp,$src2\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "movswl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp, 0x0\n\t" "movswl $tmp3,$tmp3,0x1\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1\n\t" "movswl $dst,$tmp3\t! min reduction4S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); __ pminsw($tmp$$XMMRegister, $src2$$XMMRegister); __ pextrw($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp2$$Register, $tmp2$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmin8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "pshufd $tmp2,$src2,0xE\n\t" "pminsw $tmp2,$src2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "pminsw $tmp,$tmp2\n\t" "pextrw $tmp2,$tmp\n\t" "movswl $tmp2,$tmp2\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "movswl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1\n\t" "movl $dst,$tmp3\t! min reduction8S" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister,0xE); __ pminsw($tmp2$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ pminsw($tmp$$XMMRegister, $tmp2$$XMMRegister); __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp4$$Register, $tmp4$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp4$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmin8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "pshufd $tmp,$src2,0xE\n\t" "vpminsw $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpminsw $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "vpminsw $dst,$dst,$tmp3\n\t" "pextrw $tmp3,$tmp, 0x1\n\t" "vpminsw $dst,$dst,$tmp3\n\t" "movswl $dst,$dst\t! min reduction8S" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp4$$Register, $tmp4$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp4$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmin16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "vextracti128_high $tmp,$src2\n\t" "vpminsw $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpminsw $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpminsw $tmp,$tmp,$tmp2\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "movswl $tmp2,$tmp2\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "movswl $tmp3,$tmp3\n\t" "cmpl $tmp2$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1\n\t" "movl $dst,$tmp3\t! min reduction16S" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpminsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp4$$Register, $tmp4$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp4$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmin32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpminsw $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpminsw $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpminsw $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpminsw $tmp,$tmp,$tmp2\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "movswl $dst,$src1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "movswl $dst,$src1\n\t" "cmpl $tmp2$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1\n\t" "movl $dst,$dst\t! min reduction32S" %} ins_encode %{ int vector_len = 2; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpminsw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpminsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp4$$Register, $tmp4$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp4$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmin2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp,$src2,0x1\n\t" "pminsd $tmp,$src2\n\t" "movd $tmp2,$src1\n\t" "pminsd $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! min reduction2I" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ pminsd($tmp$$XMMRegister, $src2$$XMMRegister); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ pminsd($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmin2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp,$src2,0x1\n\t" "vpminsd $tmp2,$tmp,$src2\n\t" "movd $tmp,$src1\n\t" "vpminsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! min reduction2I" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmin4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp,$src2,0xE\n\t" "pminsd $tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "pminsd $tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "pminsd $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! min reduction4I" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pminsd($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ pminsd($tmp2$$XMMRegister,$tmp$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ pminsd($tmp2$$XMMRegister,$tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmin4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp,$src2,0xE\n\t" "vpminsd $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "vpminsd $tmp2,$tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "vpminsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! min reduction4I" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmin4I_reduction_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp,$src2,0xE\n\t" "vpminsd $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "vpminsd $tmp2,$tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "vpminsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! min reduction4I" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmin8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpminsd $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpminsd $tmp2,$tmp,$tmp2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "vpminsd $tmp2,$tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "vpminsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! min reduction8I" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmin8I_reduction_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpminsd $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpminsd $tmp2,$tmp,$tmp2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "vpminsd $tmp2,$tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "vpminsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! min reduction8I" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmin16I_reduction_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpminsd $tmp3,$tmp3,$src2\n\t" "vextracti128_high $tmp,$tmp3\n\t" "vpminsd $tmp,$tmp,$tmp3\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpminsd $tmp2,$tmp,$tmp2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "vpminsd $tmp2,$tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "vpminsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! min reduction16I" %} ins_encode %{ int vector_len = 2; __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpminsd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Long Min Reduction instruct rsmin1L_reduction_reg(rRegL dst, rRegL src1, legVecD src2, rxmm0 tmp, legVecD tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "movdq $tmp,$src1\n\t" "movdq $tmp2,$src1\n\t" "pcmpgtq $tmp,$src2\n\t" "blendvpd $tmp2,$src2\n\t" "movdq $dst,$tmp2\t! min reduction1L" %} ins_encode %{ __ movdq($tmp$$XMMRegister,$src1$$Register); __ movdq($tmp2$$XMMRegister,$src1$$Register); __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); __ blendvpd($tmp2$$XMMRegister,$src2$$XMMRegister); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmin2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, rxmm0 xmm_0, vecX tmp2, vecX tmp3) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinReductionV src1 src2)); effect(TEMP xmm_0, TEMP tmp2, TEMP tmp3); format %{ "pshufd $tmp3,$src2,0xE\n\t" "movdqu $xmm_0,$src2\n\t" "movdqu $tmp2,$src2\n\t" "pcmpgtq $xmm_0,$tmp3\n\t" "blendvpd $tmp2,$tmp3\n\t" "movdqu $xmm_0,$tmp2\n\t" "movdq $tmp3,$src1\n\t" "pcmpgtq $xmm_0,$tmp3\n\t" "blendvpd $tmp2,$tmp3\n\t" "movq $dst,$tmp2\t! min reduction2L" %} ins_encode %{ __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 0xE); __ movdqu($xmm_0$$XMMRegister, $src2$$XMMRegister); __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); __ blendvpd($tmp2$$XMMRegister, $tmp3$$XMMRegister); __ movdqu($xmm_0$$XMMRegister, $tmp2$$XMMRegister); __ movdq($tmp3$$XMMRegister, $src1$$Register); __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); __ blendvpd($tmp2$$XMMRegister,$tmp3$$XMMRegister); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmin2L_reduction_reg(rRegL dst, rRegL src1, legVecX src2, legVecX tmp, legVecX tmp2, legVecX tmp3) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "pshufd $tmp2,$src2,0xE\n\t" "vpcmpgtq $tmp,$tmp2,$src2\n\t" "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" "movq $tmp,$src1\n\t" "vpcmpgtq $tmp3,$tmp2,$tmp\n\t" "blendvpd $tmp2,$tmp2,$src1,$tmp3\n\t" "movq $dst,$tmp2\t! min reduction2L" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdq($tmp$$XMMRegister,$src1$$Register); __ vpcmpgtq($tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister,$src1$$XMMRegister,$tmp3$$XMMRegister, vector_len); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmin4L_reduction_reg(rRegL dst, rRegL src1, legVecY src2, legVecY tmp, legVecY tmp2, legVecY tmp3) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti128_high $tmp2,$src2\n\t" "vpcmpgtq $tmp,$tmp2,$src2\n\t" "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" "vpshufd $tmp3, $tmp2,0x1\n\t" "vpcmpgtq $tmp, $tmp3,$tmp\n\t2" "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" "movq $tmp2,$src1\n\t" "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" "blendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" "movq $dst,$tmp2\t! min reduction2L" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vblendvpd($tmp3$$XMMRegister,$tmp3$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdq($tmp$$XMMRegister,$src1$$Register); __ vpcmpgtq($tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vblendvpd($tmp2$$XMMRegister, $tmp3$$XMMRegister,$tmp$$XMMRegister,$tmp2$$XMMRegister, vector_len); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmin8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpcmpgtq $tmp,$tmp3,$src2\n\t" "vblendvpd $tmp3,$tmp3,$src2,$tmp\n\t" "vextracti128_high $tmp2,$tmp3\n\t" "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" "vpshufd $tmp3,$tmp2,0x1\n\t" "vpcmpgtq $tmp,$tmp3,$tmp2\n\t" "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" "movq $tmp2,$src1\n\t" "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" "movq $dst,$tmp2\t! min reduction4I" %} ins_encode %{ __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vblendvpd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 1); __ vextracti128_high($tmp2$$XMMRegister, $tmp3$$XMMRegister); __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 1); __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, 1); __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, 1); __ vblendvpd($tmp3$$XMMRegister,$tmp3$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, 1); __ movdq($tmp2$$XMMRegister, $src1$$Register); __ vpcmpgtq($tmp$$XMMRegister,$tmp2$$XMMRegister, $tmp3$$XMMRegister, 1); __ vblendvpd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, 1); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Float Min Reduction instruct rvmin2F_reduction_reg_av(legRegF dst, legVecD src, legVecD tmp, legVecD dtmp, legVecD atmp, legVecD btmp, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP xmm_1); format %{ "vpermilps $xmm_1,$src,1\n\t" "vminps_macro $dtmp,$xmm_1,$src\t! minps\n\t" "vminps_macro $dst,$dtmp,$dst\t! minps" %} ins_encode %{ int vector_len = 0; __ vpermilps($xmm_1$$XMMRegister, $src$$XMMRegister, 1, vector_len); __ vmin_max_macro($dtmp$$XMMRegister, $xmm_1$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, true, true, vector_len); __ vmin_max_macro($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, true, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin2F_reduction_reg(legRegF dst, immF src1, legVecD src2, legVecD tmp, legVecD atmp, legVecD btmp, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(1)->as_Type()->type() == (Type*)TypeF::POS_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1); format %{ "vpermilps $xmm_1,$src2,1\n\t" "vminps_macro $dst,$xmm_1,$src2\t! minps" %} ins_encode %{ int vector_len = 0; __ vpermilps($xmm_1$$XMMRegister, $src2$$XMMRegister, 1, vector_len); __ vmin_max_macro($dst$$XMMRegister, $xmm_1$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, true, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin4F_reduction_reg_av(legRegF dst, legVecX src, legVecX tmp, legVecX dtmp, legVecX atmp, legVecX btmp, legVecX xmm_0, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1); format %{ "vpermilps $xmm_1,$src,14\n\t" "vminps_macro $xmm_0,$xmm_1,$src\t! minps\n\t" "vpermilps $xmm_1,$xmm_0,1\n\t" "vminps_macro $dtmp,$xmm_1,$xmm_0\t! minps\n\t" "vminps_macro $dst,$dtmp,$dst\t! minps" %} ins_encode %{ int vector_len = 0; __ vpermilps($xmm_1$$XMMRegister, $src$$XMMRegister, 14, vector_len); __ vmin_max_macro($xmm_0$$XMMRegister, $xmm_1$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermilps($xmm_1$$XMMRegister, $xmm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dtmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vmin_max_macro($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin4F_reduction_reg(legRegF dst, immF src1, legVecX src2, legVecX tmp, legVecX atmp, legVecX btmp, legVecX xmm_0, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(1)->as_Type()->type() == (Type*)TypeF::POS_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1); format %{ "vpermilps $xmm_1,$src2,14\n\t" "vminps_macro $xmm_0,$xmm_1,$src2\t! minps\n\t" "vpermilps $xmm_1,$xmm_0,1\n\t" "vminps_macro $dst,$xmm_1,$xmm_0\t! minps" %} ins_encode %{ int vector_len = 0; __ vpermilps($xmm_1$$XMMRegister, $src2$$XMMRegister, 14, vector_len); __ vmin_max_macro($xmm_0$$XMMRegister, $xmm_1$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermilps($xmm_1$$XMMRegister, $xmm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dst$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin8F_reduction_reg_av(legRegF dst, legVecY src, legVecY tmp, legVecY dtmp, legVecY atmp, legVecY btmp, legVecY ymm_0, legVecY ymm_1) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vperm2f128 $ymm_1,$src,$src,1\n\t" "vminps_macro $ymm_0,$ymm_1,$src\t! minps\n\t" "vpermilps $ymm_1,$ymm_0,14\n\t" "vminps_macro $ymm_0,$ymm_1,$ymm_0\n\t! mips\n\t" "vpermilps $ymm_1,$ymm_0,1\n\t" "vminps_macro $dtmp,$ymm_1,$ymm_0\t! minps\n\t" "vminps_macro $dst,$dtmp,$dst\t! minps" %} ins_encode %{ int vector_len = 1; __ vperm2f128($ymm_1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dtmp$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vmin_max_macro($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin8F_reduction_reg(legRegF dst, immF src1, legVecY src2, legVecY tmp, legVecY atmp, legVecY btmp, legVecY ymm_0, legVecY ymm_1) %{ predicate(UseAVX > 0 && n->in(1)->as_Type()->type() == (Type*)TypeF::POS_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vperm2f128 $ymm_1,$src2,$src2, 1\n\t" "vminps_macro $ymm_0,$ymm_1,$src2\t! minps\n\t" "vpermilps $ymm_1,$ymm_0,14\n\t" "vminps_macro $ymm_0,$ymm_1,$ymm_0\t! mips\n\t" "vpermilps $ymm_1,$ymm_0,1\n\t" "vminps_macro $dst,$ymm_1,$ymm_0\t! minps" %} ins_encode %{ int vector_len = 1; __ vperm2f128($ymm_1$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, 1); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dst$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin16F_reduction_reg_av(regF dst, vecZ src, vecZ tmp, vecZ dtmp, vecZ atmp, vecZ btmp, vecY ymm_0, vecY ymm_1) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vextractf64x4 $ymm_0, $src, 0\n\t" "vextractf64x4 $ymm_1, $src, 1\n\t" "vminps_macro $ymm_0,$ymm_1,$ymm_0\t! minps\n\t" "vpermpd $ymm_1,$ymm_0,78\n\t" "vminps_macro $ymm_0,$ymm_1,$ymm_0\n\t! minps\n\t" "vpermilps $ymm_1,$ymm_0,14\n\t" "vminps_macro $ymm_0,$ymm_1,$ymm_0\n\t! minps\n\t" "vpermilps $ymm_1,$ymm_0,1\n\t" "vminps_macro $dtmp,$ymm_1,$ymm_0\t! minps\n\t" "vminps_macro $dst,$dtmp,$dst\t! minps" %} ins_encode %{ int vector_len = 1; KRegister ktmp = k1; __ vextractf64x4($ymm_0$$XMMRegister, $src$$XMMRegister, 0); __ vextractf64x4($ymm_1$$XMMRegister, $src$$XMMRegister, 1); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 78, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro_evex($dtmp$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vmin_max_macro_evex($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin16F_reduction_reg(regF dst, immF src1, vecZ src2, vecZ tmp, vecZ atmp, vecZ btmp, vecY ymm_0, vecY ymm_1) %{ predicate(UseAVX > 2 && n->in(1)->as_Type()->type() == (Type*)TypeF::POS_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vextractf64x4 $ymm_0, $src2, 0\n\t" "vextractf64x4 $ymm_1, $src2, 1\n\t" "vminps_macro $ymm_0,$ymm_1,$ymm_0\t! minps\n\t" "vpermpd $ymm_1,$ymm_0, 78\n\t" "vminps_macro $ymm_0,$ymm_1,$ymm_0\t! minps\n\t" "vpermilps $ymm_1,$ymm_0,14\n\t" "vminps_macro $ymm_0,$ymm_1,$ymm_0\t! mips\n\t" "vpermilps $ymm_1,$ymm_0,1\n\t" "vminps_macro $dst,$ymm_1,$ymm_0\t! minps" %} ins_encode %{ int vector_len = 1; KRegister ktmp = k1; __ vextractf64x4($ymm_0$$XMMRegister, $src2$$XMMRegister, 0); __ vextractf64x4($ymm_1$$XMMRegister, $src2$$XMMRegister, 1); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 78, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro_evex($dst$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin2D_reduction_reg_av(legRegD dst, legVecX src, legVecX tmp, legVecX dtmp, legVecX atmp, legVecX btmp, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MinReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP xmm_1); format %{ "vpermilpd $xmm_1,$src,1\n\t" "vminps_macro $dtmp,$xmm_1,$src\t! minps\n\t" "vminps_macro $dst,$dtmp,$dst\t! minps" %} ins_encode %{ int vector_len = 0; __ vpermilpd($xmm_1$$XMMRegister, $src$$XMMRegister, 1, vector_len); __ vmin_max_macro($dtmp$$XMMRegister, $xmm_1$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, false, true, vector_len); __ vmin_max_macro($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, false, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin2D_reduction_reg(legRegD dst, immD src1, legVecX src2, legVecX tmp, legVecX atmp, legVecX btmp, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(1)->as_Type()->type() == (Type*)TypeD::POS_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1); format %{ "vpermilpd $xmm_1,$src2,1\n\t" "vminps_macro $dst,$xmm_1,$src2\t! minps" %} ins_encode %{ int vector_len = 0; __ vpermilpd($xmm_1$$XMMRegister, $src2$$XMMRegister, 1, vector_len); __ vmin_max_macro($dst$$XMMRegister, $xmm_1$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, false, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin4D_reduction_reg_av(legRegD dst, legVecY src, legVecY tmp, legVecY dtmp, legVecY atmp, legVecY btmp, legVecY ymm_0, legVecY ymm_1) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MinReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vperm2f128 $ymm_1,$src,$src,1\n\t" "vminpd_macro $ymm_0,$ymm_1,$src\t! minps\n\t" "vpermilpd $ymm_1,$ymm_0,1\n\t" "vminpd_macro $dtmp,$ymm_1,$ymm_0\t! minps\n\t" "vminpd_macro $dst,$dtmp,$dst\t! minps" %} ins_encode %{ int vector_len = 1; __ vperm2f128($ymm_1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); __ vpermilpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dtmp$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); __ vmin_max_macro($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin4D_reduction_reg(legRegD dst, immD src1, legVecY src2, legVecY tmp, legVecY atmp, legVecY btmp, legVecY ymm_0, legVecY ymm_1) %{ predicate(UseAVX > 0 && n->in(1)->as_Type()->type() == (Type*)TypeD::POS_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vperm2f128 $ymm_1,$src2,$src2,1\n\t" "vminpd_macro $ymm_0,$ymm_1,$src2\t! minps\n\t" "vpermilpd $ymm_1,$ymm_0,1\n\t" "vminpd_macro $dst,$ymm_1,$ymm_0\t! minps" %} ins_encode %{ int vector_len = 1; __ vperm2f128($ymm_1$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, 1); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); __ vpermilpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dst$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin8D_reduction_reg_av(regD dst, vecZ src, vecZ tmp, vecZ dtmp, vecZ atmp, vecZ btmp, vecY ymm_0, vecY ymm_1) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MinReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vextractf64x4 $ymm_0, $src, 0\n\t" "vextractf64x4 $ymm_1, $src, 1\n\t" "vminpd_macro $ymm_0,$ymm_1,$ymm_0\t! minpd\n\t" "vpermpd $ymm_1,$ymm_0,14\n\t" "vminpd_macro $ymm_0,$ymm_1,$src\t! minpd\n\t" "vpermilpd $ymm_1,$ymm_0,1\n\t" "vminpd_macro $dtmp,$ymm_1,$ymm_0\t! minpd\n\t" "vminpd_macro $dst,$dtmp,$dst\t! minpd\t" %} ins_encode %{ int vector_len = 1; KRegister ktmp = k1; __ vextractf64x4($ymm_0$$XMMRegister, $src$$XMMRegister, 0); __ vextractf64x4($ymm_1$$XMMRegister, $src$$XMMRegister, 1); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); __ vpermpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); __ vpermilpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro_evex($dst$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); __ vmin_max_macro_evex($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmin8D_reduction_reg(regD dst, immD src1, vecZ src2, vecZ tmp, vecZ atmp, vecZ btmp, vecY ymm_0, vecY ymm_1) %{ predicate(UseAVX > 2 && n->in(1)->as_Type()->type() == (Type*)TypeD::POS_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MinReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vextractf64x4 $ymm_0, $src2, 0\n\t" "vextractf64x4 $ymm_1, $src2, 1\n\t" "vminpd_macro $ymm_0,$ymm_1,$ymm_0\t! minpd\n\t" "vpermpd $ymm_1,$ymm_0,14\n\t" "vminpd_macro $ymm_0,$ymm_1,$ymm_0\t! minpd\n\t" "vpermilpd $ymm_1,$ymm_0,1\n\t" "vminpd_macro $dst,$ymm_1,$ymm_0\t! minpd\n\t" %} ins_encode %{ int vector_len = 1; KRegister ktmp = k1; __ vextractf64x4($ymm_0$$XMMRegister, $src2$$XMMRegister, 0); __ vextractf64x4($ymm_1$$XMMRegister, $src2$$XMMRegister, 1); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); __ vpermpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); __ vpermilpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro_evex($dst$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, true, vector_len); %} ins_pipe( pipe_slow ); %} // ------- Max Reduction ------------ instruct rsmax8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "pshufd $tmp,$src2,0x1\n\t" "pminsb $tmp,$src2\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp,0x0\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1, 0x0\n\t" "movl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $tmp3,$dst\n\t" "cmovl $dst,$tmp3\t! min reduction4S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); __ pmaxsb($tmp$$XMMRegister, $src2$$XMMRegister); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($tmp3$$Register, $dst$$Register); __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmax16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "pshufd $tmp4,$src2,0xE\n\t" "pmaxsb $tmp4,$src2\n\t" "pshufd $tmp,$tmp4,0x1\n\t" "pmaxsb $tmp,$tmp4\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp,0x0\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1, 0x0\n\t" "movl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $tmp3,$dst\n\t" "cmovl $dst,$tmp3\t! max reduction4S" %} ins_encode %{ __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); __ pmaxsb($tmp4$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); __ pmaxsb($tmp$$XMMRegister, $tmp4$$XMMRegister); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($tmp3$$Register, $dst$$Register); __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmax16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "pshufd $tmp4,$src2,0xE\n\t" "vpmaxsb $tmp,$tmp4,$src2\n\t" "pshufd $tmp,$tmp4,0x1\n\t" "vpmaxsb $tmp,$tmp,$tmp4\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp,0x0\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1, 0x0\n\t" "movl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $tmp3,$dst\n\t" "cmovl $dst,$tmp3\t! max reduction4S" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($tmp3$$Register, $dst$$Register); __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmax32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "vextracti128_high $tmp,$src2\n\t" "vpmaxsb $tmp,$tmp,$src2\n\t" "pshufd $tmp4,$tmp,0xE\n\t" "vpmaxsb $tmp4,$tmp4,$tmp\n\t" "pshufd $tmp,$tmp4,0x1\n\t" "vpmaxsb $tmp,$tmp,$tmp4\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp,0x0\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1, 0x0\n\t" "movl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $tmp3,$dst\n\t" "cmovl $dst,$tmp3\t! min reduction4S" %} ins_encode %{ __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($tmp3$$Register, $dst$$Register); __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmax64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "vextracti64x4_high $tmp4,$src2\n\t" "vpmaxsb $tmp4,$tmp4,$src2\n\t" "vextracti128_high $tmp,$tmp4\n\t" "vpmaxsb $tmp,$tmp,$tmp4\n\t" "pshufd $tmp4,$tmp,0xE\n\t" "vpmaxsb $tmp,$tmp4,$tmp\n\t" "pshufd $tmp4,$src2,0xE\n\t" "vpmaxsb $tmp,$tmp4,$src2\n\t" "pshufd $tmp,$tmp4,0x1\n\t" "vpmaxsb $tmp,$tmp,$tmp4\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp,0x0\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1, 0x0\n\t" "movl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp\n\t" "movsbl $tmp2,$tmp2\n\t" "pextrb $tmp3,$tmp\n\t" "movsbl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $tmp3,$dst\n\t" "cmovl $dst,$tmp3\t! max reduction32B" %} ins_encode %{ __ vextracti64x4_high($tmp4$$XMMRegister, $src2$$XMMRegister); __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp4$$XMMRegister); __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); __ movsbl($tmp2$$Register, $tmp2$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); __ movsbl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($tmp3$$Register, $dst$$Register); __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmax4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "pshufd $tmp,$src2,0x1\n\t" "pminsw $tmp,$src2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp2,$tmp, 0x0\n\t" "pminsw $dst,$tmp2\n\t" "pminsw $dst,$tmp2\n\t" "movswl $dst,$dst\t! min reduction4S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); __ pmaxsw($tmp$$XMMRegister, $src2$$XMMRegister); __ pextrw($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp2$$Register, $tmp2$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmax4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "pshufd $tmp,$src2,0x1\n\t" "pminsw $tmp,$src2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp2,$tmp, 0x0\n\t" "pminsw $dst,$tmp2\n\t" "pminsw $dst,$tmp2\n\t" "movswl $dst,$dst\t! min reduction4S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pextrw($tmp2$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp2$$Register, $tmp2$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp2$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmax8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "pshufd $tmp2,$src2,0xE\n\t" "pmaxsw $tmp2,$src2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "pmaxsw $tmp,$tmp2\n\t" "pextrw $tmp2,$tmp\n\t" "movswl $tmp2,$tmp2\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "movswl $tmp3,$tmp3\n\t" "cmpl $tmp2,$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1\n\t" "movl $dst,$tmp3\t! max reduction8S" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister,0xE); __ pmaxsw($tmp2$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ pmaxsw($tmp$$XMMRegister, $tmp2$$XMMRegister); __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp4$$Register, $tmp4$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp4$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmax8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "pshufd $tmp,$src2,0xE\n\t" "vpmaxsw $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpmaxsw $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "vpmaxsw $dst,$dst,$tmp3\n\t" "pextrw $tmp3,$tmp, 0x1\n\t" "vpmaxsw $dst,$dst,$tmp3\n\t" "movswl $dst,$dst\t! max reduction8S" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp4$$Register, $tmp4$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp4$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmax16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "vextracti128_high $tmp,$src2\n\t" "vpmaxsw $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmaxsw $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpmaxsw $tmp,$tmp,$tmp2\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "movswl $tmp2,$tmp2\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "movswl $tmp3,$tmp3\n\t" "cmpl $tmp2$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1\n\t" "movl $dst,$tmp3\t! max reduction16S" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmaxsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp4$$Register, $tmp4$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp4$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rvmax32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpmaxsw $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpmaxsw $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmaxsw $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpmaxsw $tmp,$tmp,$tmp2\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "movswl $dst,$src1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "movswl $dst,$src1\n\t" "cmpl $tmp2$tmp3\n\t" "cmovl $tmp3,$tmp2\n\t" "cmpl $src1,$tmp3\n\t" "cmovl $tmp3,$src1\n\t" "movl $dst,$dst\t! max reduction32S" %} ins_encode %{ int vector_len = 2; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpmaxsw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmaxsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); __ movswl($tmp4$$Register, $tmp4$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); __ movswl($tmp3$$Register, $tmp3$$Register); __ cmpl($tmp4$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); __ cmpl($src1$$Register, $tmp3$$Register); __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); __ movl($dst$$Register, $tmp3$$Register); %} ins_pipe( pipe_slow ); %} instruct rsmax2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp,$src2,0x1\n\t" "pmaxsd $tmp,$src2\n\t" "movd $tmp2,$src1\n\t" "pmaxsd $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! max reduction2I" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ pmaxsd($tmp$$XMMRegister, $src2$$XMMRegister); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ pmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmax2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp,$src2,0x1\n\t" "vpmaxsd $tmp2,$tmp,$src2\n\t" "movd $tmp,$src1\n\t" "vpmaxsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! max reduction2I" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmax4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp,$src2,0xE\n\t" "pmaxsd $tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "pmaxsd $tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "pmaxsd $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! max reduction4I" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pmaxsd($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ pmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ pmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmax4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp,$src2,0xE\n\t" "vpmaxsd $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "vpmaxsd $tmp2,$tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "vpmaxsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! max reduction4I" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmax4I_reduction_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp,$src2,0xE\n\t" "vpmaxsd $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "vpmaxsd $tmp2,$tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "vpmaxsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! max reduction4I" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmax8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpmaxsd $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmaxsd $tmp2,$tmp,$tmp2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "vpmaxsd $tmp2,$tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "vpmaxsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! max reduction8I" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmaxsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmax8I_reduction_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpmaxsd $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmaxsd $tmp2,$tmp,$tmp2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "vpmaxsd $tmp2,$tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "vpmaxsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! max reduction8I" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpmaxsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmax16I_reduction_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpmaxsd $tmp3,$tmp3,$src2\n\t" "vextracti128_high $tmp,$tmp3\n\t" "vpmaxsd $tmp,$tmp,$tmp3\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmaxsd $tmp2,$tmp,$tmp2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "vpmaxsd $tmp2,$tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "vpmaxsd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! max reduction16I" %} ins_encode %{ int vector_len = 2; __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpmaxsd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpmaxsd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdl($tmp$$XMMRegister, $src1$$Register); __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Long Max Reduction instruct rsmax1L_reduction_reg(rRegL dst, rRegL src1, legVecD src2, rxmm0 xmm_0, legVecD tmp2, legVecD tmp3) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxReductionV src1 src2)); effect(TEMP xmm_0, TEMP tmp2, TEMP tmp3); format %{ "movdq $xmm_0,$src1\n\t" "movdq $tmp2,$src1\n\t" "pcmpgtq $xmm_0,$src2\n\t" "blendvpd $tmp2,$src2\n\t" "movdq $dst,$tmp2\t! max reduction1L" %} ins_encode %{ __ movdq($xmm_0$$XMMRegister,$src1$$Register); __ movdq($tmp2$$XMMRegister,$src1$$Register); __ movdq($tmp3$$XMMRegister,$src2$$Register); __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); __ blendvpd($tmp3$$XMMRegister,$tmp2$$XMMRegister); __ movdq($dst$$Register, $tmp3$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsmax2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, rxmm0 xmm_0, vecX tmp2, vecX tmp3) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxReductionV src1 src2)); effect(TEMP xmm_0, TEMP tmp2, TEMP tmp3); format %{ "pshufd $tmp3,$src2,0xE\n\t" "movdqu $xmm_0,$src2\n\t" "pcmpgtq $xmm_0,$tmp3\n\t" "blendvpd $tmp3,$src2\n\t" "movdqu $xmm_0,$tmp3\n\t" "movdq $tmp2,$src1\n\t" "pcmpgtq $xmm_0,$tmp2\n\t" "blendvpd $tmp2,$tmp3\n\t" "movq $dst,$tmp2\t! max reduction2L" %} ins_encode %{ __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 0xE); __ movdqu($xmm_0$$XMMRegister, $src2$$XMMRegister); __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); __ blendvpd($tmp3$$XMMRegister, $src2$$XMMRegister); __ movdqu($xmm_0$$XMMRegister, $tmp3$$XMMRegister); __ movdq($tmp2$$XMMRegister, $src1$$Register); __ pcmpgtq($xmm_0$$XMMRegister, $tmp2$$XMMRegister); __ blendvpd($tmp2$$XMMRegister,$tmp3$$XMMRegister); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmax2L_reduction_reg(rRegL dst, rRegL src1, legVecX src2, legVecX tmp, legVecX tmp2, legVecX tmp3) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "pshufd $tmp2,$src2,0xE\n\t" "vpcmpgtq $tmp,$tmp2,$src2\n\t" "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" "movq $tmp,$src1\n\t" "vpcmpgtq $tmp3,$tmp2,$tmp\n\t" "blendvpd $tmp2,$tmp2,$src1,$tmp3\n\t" "movq $dst,$tmp2\t! max reduction2L" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($tmp2$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdq($tmp$$XMMRegister,$src1$$Register); __ vpcmpgtq($tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vblendvpd($tmp2$$XMMRegister, $tmp$$XMMRegister,$tmp2$$XMMRegister,$tmp3$$XMMRegister, vector_len); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmax4L_reduction_reg(rRegL dst, rRegL src1, legVecY src2, legVecY tmp, legVecY tmp2, legVecY tmp3) %{ predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti128_high $tmp2,$src2\n\t" "vpcmpgtq $tmp,$tmp2,$src2\n\t" "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" "vpshufd $tmp3, $tmp2,0x1\n\t" "vpcmpgtq $tmp, $tmp3,$tmp\n\t2" "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" "movq $tmp2,$src1\n\t" "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" "blendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" "movq $dst,$tmp2\t! max reduction2L" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($tmp2$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vblendvpd($tmp3$$XMMRegister,$tmp2$$XMMRegister,$tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdq($tmp$$XMMRegister,$src1$$Register); __ vpcmpgtq($tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vblendvpd($tmp2$$XMMRegister, $tmp$$XMMRegister,$tmp3$$XMMRegister,$tmp2$$XMMRegister, vector_len); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvmax8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpcmpgtq $tmp,$tmp3,$src2\n\t" "vblendvpd $tmp3,$tmp3,$src2,$tmp\n\t" "vextracti128_high $tmp2,$tmp3\n\t" "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" "vpshufd $tmp3,$tmp2,0x1\n\t" "vpcmpgtq $tmp,$tmp3,$tmp2\n\t" "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" "movq $tmp2,$src1\n\t" "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" "movq $dst,$tmp2\t! max reduction4I" %} ins_encode %{ int vector_len = 1; __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($tmp3$$XMMRegister, $src2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vextracti128_high($tmp2$$XMMRegister, $tmp3$$XMMRegister); __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vblendvpd($tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vblendvpd($tmp3$$XMMRegister,$tmp2$$XMMRegister,$tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdq($tmp2$$XMMRegister, $src1$$Register); __ vpcmpgtq($tmp$$XMMRegister,$tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vblendvpd($tmp2$$XMMRegister,$tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Float max Reduction instruct rvmax2F_reduction_reg_av(legRegF dst, legVecD src, legVecD tmp, legVecD dtmp, legVecD atmp, legVecD btmp, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP xmm_1); format %{ "vpermilps $tmp,$src,1\n\t" "vminps_macro $dtmp,$tmp,$src\t! minps\n\t" "vminps_macro $dst,$dtmp,$dst\t! minps" %} ins_encode %{ int vector_len = 0; __ vpermilps($xmm_1$$XMMRegister, $src$$XMMRegister, 1, vector_len); __ vmin_max_macro($dtmp$$XMMRegister, $xmm_1$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, true, false, vector_len); __ vmin_max_macro($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, true, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax2F_reduction_reg(legRegF dst, immF src1, legVecD src2, legVecD tmp, legVecD atmp, legVecD btmp, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(1)->as_Type()->type() == (Type*)TypeF::NEG_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1); format %{ "vpermilps $tmp,$src2,1\n\t" "vminps_macro $dst,$tmp,$src2\t! minps" %} ins_encode %{ int vector_len = 0; __ vpermilps($xmm_1$$XMMRegister, $src2$$XMMRegister, 1, vector_len); __ vmin_max_macro($dst$$XMMRegister, $xmm_1$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, true, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax4F_reduction_reg_av(legRegF dst, legVecX src, legVecX tmp, legVecX dtmp, legVecX atmp, legVecX btmp, legVecX xmm_0, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1); format %{ "vpermilps $xmm_1,$src,14\n\t" "vmaxps_macro $xmm_0,$xmm_1,$src\t! maxps\n\t" "vpermilps $xmm_1,$xmm_0,1\n\t" "vmaxps_macro $dtmp,$xmm_1,$xmm_0\t! maxps\n\t" "vmaxps_macro $dst,$dtmp,$dst\t! maxps" %} ins_encode %{ int vector_len = 0; __ vpermilps($xmm_1$$XMMRegister, $src$$XMMRegister, 14, vector_len); __ vmin_max_macro($xmm_0$$XMMRegister, $xmm_1$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermilps($xmm_1$$XMMRegister, $xmm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dtmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vmin_max_macro($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax4F_reduction_reg(legRegF dst, immF src1, legVecX src2, legVecX tmp, legVecX atmp, legVecX btmp, legVecX xmm_0, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(1)->as_Type()->type() == (Type*)TypeF::NEG_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1); format %{ "vpermilps $xmm_1,$src2,14\n\t" "vmaxps_macro $xmm_0,$xmm_1,$src2\t! maxps\n\t" "vpermilps $xmm_1,$xmm_0,1\n\t" "vmaxps_macro $xmm_0,$xmm_1,$xmm_0\t! maxps" %} ins_encode %{ int vector_len = 0; __ vpermilps($xmm_1$$XMMRegister, $src2$$XMMRegister, 14, vector_len); __ vmin_max_macro($xmm_0$$XMMRegister, $xmm_1$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermilps($xmm_1$$XMMRegister, $xmm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dst$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax8F_reduction_reg_av(legRegF dst, legVecY src, legVecY tmp, legVecY dtmp, legVecY atmp, legVecY btmp, legVecY ymm_0, legVecY ymm_1) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vperm2f128 $ymm_1,$src,$src,1\n\t" "vmaxps_macro $ymm_0,$ymm_1,$src\t! maxps\n\t" "vpermilps $ymm_1,$ymm_0,14\n\t" "vmaxps_macro $ymm_0,$ymm_1,$ymm_0\t! mips\n\t" "vpermilps $ymm_1,$ymm_0,1\n\t" "vmaxps_macro $dtmp,$ymm_1,$ymm_0\t! maxps\n\t" "vmaxps_macro $dst,$dtmp,$dst\t! maxps" %} ins_encode %{ int vector_len = 1; __ vperm2f128($ymm_1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dtmp$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vmin_max_macro($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax8F_reduction_reg(legRegF dst, immF src1, legVecY src2, legVecY tmp, legVecY atmp, legVecY btmp, legVecY ymm_0, legVecY ymm_1) %{ predicate(UseAVX > 0 && n->in(1)->as_Type()->type() == (Type*)TypeF::NEG_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vperm2f128 $ymm_1,$src2,$src2,1\n\t" "vmaxps_macro $ymm_0,$ymm_1,$src2\t! maxps\n\t" "vpermilps $ymm_1,$ymm_0,14\n\t" "vmaxps_macro $ymm_0,$ymm_1,$ymm_0\t! mips\n\t" "vpermilps $ymm_1,$ymm_0,1\n\t" "vmaxps_macro $dst,$ymm_1,$ymm_0\t! maxps" %} ins_encode %{ int vector_len = 1; __ vperm2f128($ymm_1$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, 1); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dst$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax16F_reduction_reg_av(regF dst, vecZ src, vecZ dtmp, vecZ atmp, vecZ btmp, vecY ymm_0, vecY ymm_1) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vextractf64x4 $ymm_0, $src, 0\n\t" "vextractf64x4 $ymm_1, $src, 1\n\t" "vmaxps_macro $ymm_0,$ymm_1,$ymm_0\t! maxps\n\t" "vpermpd $ymm_1,$ymm_0, 78\n\t" "vmaxps_macro $ymm_0,$ymm_1,$src\t! maxps\n\t" "vpermilps $ymm_1,$ymm_0,14\n\t" "vmaxps_macro $ymm_0,$ymm_1,$ymm_0\t! mips\n\t" "vpermilps $ymm_1,$ymm_0,1\n\t" "vmaxps_macro $dtmp,$ymm_1,$ymm_0\t! maxps\n\t" "vmaxps_macro $dst,$dtmp,$dst\t! maxps" %} ins_encode %{ int vector_len = 1; KRegister ktmp = k1; __ vextractf64x4($ymm_0$$XMMRegister, $src$$XMMRegister, 0); __ vextractf64x4($ymm_1$$XMMRegister, $src$$XMMRegister, 1); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 78, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro_evex($dtmp$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vmin_max_macro_evex($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax16F_reduction_reg(regF dst, immF src1, vecZ src2, vecZ atmp, vecZ btmp, vecY ymm_0, vecY ymm_1) %{ predicate(UseAVX > 2 && n->in(1)->as_Type()->type() == (Type*)TypeF::NEG_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vextractf64x4 $ymm_0, $src2, 0\n\t" "vextractf64x4 $ymm_1, $src2, 1\n\t" "vmaxps_macro $ymm_0,$ymm_1,$ymm_0\t! maxps\n\t" "vpermpd $ymm_1,$ymm_0, 78\n\t" "vmaxps_macro $ymm_0,$ymm_1,$ymm_0\t! maxps\n\t" "vpermilps $ymm_1,$ymm_0,14\n\t" "vmaxps_macro $ymm_0,$ymm_1,$ymm_0\t! mips\n\t" "vpermilps $ymm_1,$ymm_0,1\n\t" "vmaxps_macro $dst,$ymm_1,$ymm_0\t! maxps" %} ins_encode %{ int vector_len = 1; KRegister ktmp = k1; __ vextractf64x4($ymm_0$$XMMRegister, $src2$$XMMRegister, 0); __ vextractf64x4($ymm_1$$XMMRegister, $src2$$XMMRegister, 1); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 78, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); __ vpermilps($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro_evex($dst$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , true, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax2D_reduction_reg_av(legRegD dst, legVecX src, legVecX tmp, legVecX dtmp, legVecX atmp, legVecX btmp, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP xmm_1); format %{ "vpermilpd $xmm_1,$src,1\n\t" "vmaxpd_macro $dtmp,$xmm_1,$src\t! maxps\n\t" "vmaxpd_macro $dst,$dtmp,$dst\t! maxps" %} ins_encode %{ int vector_len = 0; __ vpermilpd($xmm_1$$XMMRegister, $src$$XMMRegister, 1, vector_len); __ vmin_max_macro($dtmp$$XMMRegister, $xmm_1$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, false, false, vector_len); __ vmin_max_macro($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, false, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax2D_reduction_reg(legRegD dst, immD src1 , legVecX src2, legVecX tmp, legVecX atmp, legVecX btmp, legVecX xmm_1) %{ predicate(UseAVX > 0 && n->in(1)->as_Type()->type() == (Type*)TypeD::NEG_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1); format %{ "vpermilpd $xmm_1,$src2,1\n\t" "vmaxpd_macro $dst,$xmm_1,$src2\t! maxps" %} ins_encode %{ int vector_len = 0; __ vpermilpd($xmm_1$$XMMRegister, $src2$$XMMRegister, 1, vector_len); __ vmin_max_macro($dst$$XMMRegister, $xmm_1$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, false, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax4D_reduction_reg_av(legRegD dst, legVecY src, legVecY tmp, legVecY dtmp, legVecY atmp, legVecY btmp, legVecY ymm_0, legVecY ymm_1) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vperm2f128 $ymm_1,$src,$src,1\n\t" "vmaxpd_macro $ymm_0,$ymm_1,$src\t! maxps\n\t" "vpermilpd $ymm_1,$ymm_0,1\n\t" "vmaxpd_macro $dtmp,$ymm_1,$ymm_0\t! maxps\n\t" "vmaxpd_macro $dst,$dtmp,$dst\t! maxps" %} ins_encode %{ int vector_len = 1; __ vperm2f128($ymm_1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); __ vpermilpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dtmp$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); __ vmin_max_macro($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax4D_reduction_reg(legRegD dst, immD src1, legVecY src2, legVecY tmp, legVecY atmp, legVecY btmp, legVecY ymm_0, legVecY ymm_1) %{ predicate(UseAVX > 0 && n->in(1)->as_Type()->type() == (Type*)TypeD::NEG_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vperm2f128 $ymm_1,$src2,$src2,1\n\t" "vmaxpd_macro $ymm_0,$ymm_1,$src2\t! maxps\n\t" "vpermilpd $ymm_1,$ymm_0,1\n\t" "vmaxpd_macro $ymm_0,$ymm_1,$ymm_0\t! maxps" %} ins_encode %{ int vector_len = 1; __ vperm2f128($ymm_1$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, 1); __ vmin_max_macro($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); __ vpermilpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro($dst$$XMMRegister, $ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax8D_reduction_reg_av(regD dst, vecZ src, vecZ dtmp, vecZ atmp, vecZ btmp, vecY ymm_0, vecY ymm_1) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP dtmp, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vextractf64x4 $ymm_0, $src, 0\n\t" "vextractf64x4 $ymm_1, $src, 1\n\t" "vmaxpd_macro $ymm_0,$ymm_1,$ymm_0\t! maxpd\n\t" "vpermpd $ymm_1,$ymm_0, 14\n\t" "vmaxpd_macro $ymm_0,$ymm_1,$ymm_0\t! maxpd\n\t" "vpermilpd $ymm_1,$ymm_0,1\n\t" "vmaxpd_macro $dtmp,$ymm_1,$ymm_0\t! maxpd\n\t" "vmaxpd_macro $dst,$dtmp,$dst\t! maxpd\n\t" %} ins_encode %{ int vector_len = 1; KRegister ktmp = k1; __ vextractf64x4($ymm_0$$XMMRegister, $src$$XMMRegister, 0); __ vextractf64x4($ymm_1$$XMMRegister, $src$$XMMRegister, 1); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); __ vpermpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); __ vpermilpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro_evex($dtmp$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); __ vmin_max_macro_evex($dst$$XMMRegister, $dtmp$$XMMRegister, $dst$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rvmax8D_reduction_reg(regD dst, immD src1, vecZ src2, vecZ atmp, vecZ btmp, vecY ymm_0, vecY ymm_1) %{ predicate(UseAVX > 2 && n->in(1)->as_Type()->type() == (Type*)TypeD::NEG_INF && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP atmp, TEMP btmp, TEMP ymm_0, TEMP ymm_1); format %{ "vextractf64x4 $ymm_0, $src2, 0\n\t" "vextractf64x4 $ymm_1, $src2, 1\n\t" "vmaxpd_macro $ymm_0,$ymm_1,$ymm_0\t! maxpd\n\t" "vpermpd $ymm_1,$ymm_0, 14\n\t" "vmaxpd_macro $ymm_0,$ymm_1,$ymm_0\t! maxpd\n\t" "vpermilpd $ymm_1,$ymm_0,1\n\t" "vmaxpd_macro $dst,$ymm_1,$ymm_0\t! maxpd\n\t" %} ins_encode %{ int vector_len = 1; KRegister ktmp = k1; __ vextractf64x4($ymm_0$$XMMRegister, $src2$$XMMRegister, 0); __ vextractf64x4($ymm_1$$XMMRegister, $src2$$XMMRegister, 1); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); __ vpermpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 14, vector_len); __ vmin_max_macro_evex($ymm_0$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); __ vpermilpd($ymm_1$$XMMRegister, $ymm_0$$XMMRegister, 1, vector_len); __ vmin_max_macro_evex($dst$$XMMRegister, $ymm_1$$XMMRegister, $ymm_0$$XMMRegister, ktmp, $atmp$$XMMRegister, $btmp$$XMMRegister , false, false, vector_len); %} ins_pipe( pipe_slow ); %} instruct rsand8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "pshufd $tmp,$src2,0x1\n\t" "pand $tmp,$src2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp2,$tmp, 0x0\n\t" "andl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "andl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x2\n\t" "andl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x3\n\t" "andl $dst,$tmp2\n\t" "movsbl $dst,$dst\t! and reduction8B" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ pand($tmp$$XMMRegister, $src2$$XMMRegister); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ andl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ andl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); __ andl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); __ andl($dst$$Register, $tmp2$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsand16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "pshufd $tmp,$src2,0xE\n\t" "pand $tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "pand $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp3,$tmp, 0x0\n\t" "andl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x1\n\t" "andl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x2\n\t" "andl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x3\n\t" "andl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! and reduction16B" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pand($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ andl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ andl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); __ andl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); __ andl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvand32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti128_high $tmp,$src2\n\t" "vpand $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp3,$tmp, 0x0\n\t" "andl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x1\n\t" "andl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x2\n\t" "andl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x3\n\t" "andl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! and reduction32B" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ andl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ andl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); __ andl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); __ andl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvand64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpand $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "movdl $tmp3,$tmp\n\t" "andl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "andl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "andl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "andl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! and reduction64B" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzbl($dst$$Register, $src1$$Register); __ movdl($tmp3$$Register, $tmp$$XMMRegister); __ andl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ andl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ andl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ andl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsand4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "pshufd $tmp,$src2,0x1\n\t" "pand $tmp,$src2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp2,$tmp, 0x0\n\t" "andw $dst,$tmp2\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "andw $dst,$tmp2\n\t" "movswl $dst,$dst\t! and reduction4S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ pand($tmp$$XMMRegister, $src2$$XMMRegister); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ andw($dst$$Register, $tmp2$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ andw($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsand8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "pshufd $tmp,$src2,0xE\n\t" "pand $tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "pand $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "andw $dst,$tmp3\n\t" "pextrw $tmp3,$tmp, 0x1\n\t" "andw $dst,$tmp3\n\t" "movswl $dst,$dst\t! and reduction8S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pand($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ andw($dst$$Register, $tmp3$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ andw($dst$$Register, $tmp3$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvand16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti128_high $tmp,$src2\n\t" "vpand $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "andw $dst,$tmp3\n\t" "pextrw $tmp3,$tmp, 0x1\n\t" "andw $dst,$tmp3\n\t" "movswl $dst,$dst\t! and reduction16S" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ andw($dst$$Register, $tmp3$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ andw($dst$$Register, $tmp3$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvand32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpand $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "movdl $tmp3,$tmp\n\t" "andw $dst,$tmp3\n\t" "shrl $tmp3,0x16\n\t" "andw $dst,$tmp3\n\t" "movswl $dst,$dst\t! and reduction32S" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzwl($dst$$Register, $src1$$Register); __ movdl($tmp3$$Register, $tmp$$XMMRegister); __ andw($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 16); __ andw($dst$$Register, $tmp3$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsand2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0x1\n\t" "pand $tmp2,$src2\n\t" "movd $tmp,$src1\n\t" "pand $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! and reduction2I" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsand4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "pand $tmp2,$src2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "pand $tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "pand $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! and reduction4I" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvand8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpand $tmp,$tmp,$src2\n\t" "vpshufd $tmp2,$tmp,0xE\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "vpshufd $tmp2,$tmp,0x1\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpand $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! and reduction8I" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvand16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpand $tmp3,$tmp3,$src2\n\t" "vextracti128_high $tmp,$tmp3\n\t" "vpand $tmp,$tmp,$src2\n\t" "vpshufd $tmp2,$tmp,0xE\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "vpshufd $tmp2,$tmp,0x1\n\t" "vpand $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpand $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! and reduction16I" %} ins_encode %{ __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpand($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, 0); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, 0); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} #ifdef _LP64 instruct rsand2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "pand $tmp2,$src2\n\t" "movdq $tmp,$src1\n\t" "pand $tmp2,$tmp\n\t" "movq $dst,$tmp2\t! and reduction2L" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); __ movdq($tmp$$XMMRegister, $src1$$Register); __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvand4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpand $tmp2,$tmp,$src2\n\t" "vpshufd $tmp,$tmp2,0xE\n\t" "vpand $tmp2,$tmp2,$tmp\n\t" "movq $tmp,$src1\n\t" "vpand $tmp2,$tmp2,$tmp\n\t" "movq $dst,$tmp2\t! and reduction4L" %} ins_encode %{ __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movq($tmp$$XMMRegister, $src1$$Register); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvand8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (AndReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpandq $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpandq $tmp2,$tmp2,$tmp\n\t" "vpshufd $tmp,$tmp2,0xE\n\t" "vpandq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpandq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! and reduction8L" %} ins_encode %{ __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} #endif instruct rsor8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "pshufd $tmp,$src2,0x1\n\t" "por $tmp,$src2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp2,$tmp, 0x0\n\t" "orl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "orl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x2\n\t" "orl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x3\n\t" "orl $dst,$tmp2\n\t" "movsbl $dst,$dst\t! or reduction8B" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ por($tmp$$XMMRegister, $src2$$XMMRegister); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ orl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ orl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); __ orl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); __ orl($dst$$Register, $tmp2$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsor16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "pshufd $tmp,$src2,0xE\n\t" "por $tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "por $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp3,$tmp, 0x0\n\t" "orl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x1\n\t" "orl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x2\n\t" "orl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x3\n\t" "orl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! or reduction16B" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ por($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ por($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ orl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ orl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); __ orl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); __ orl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvor32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti128_high $tmp,$src2\n\t" "vpor $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpor $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp3,$tmp, 0x0\n\t" "orl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x1\n\t" "orl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x2\n\t" "orl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x3\n\t" "orl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! or reduction32B" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ orl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ orl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); __ orl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); __ orl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvor64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpor $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpor $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "movdl $tmp3,$tmp\n\t" "orl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "orl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "orl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "orl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! or reduction64B" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzbl($dst$$Register, $src1$$Register); __ movdl($tmp3$$Register, $tmp$$XMMRegister); __ orl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ orl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ orl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ orl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsor4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "pshufd $tmp,$src2,0x1\n\t" "por $tmp,$src2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp2,$tmp, 0x0\n\t" "orw $dst,$tmp2\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "orw $dst,$tmp2\n\t" "movswl $dst,$dst\t! or reduction4S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ por($tmp$$XMMRegister, $src2$$XMMRegister); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ orw($dst$$Register, $tmp2$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ orw($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsor8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "pshufd $tmp,$src2,0xE\n\t" "por $tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "por $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "orw $dst,$tmp3\n\t" "pextrw $tmp3,$tmp, 0x1\n\t" "orw $dst,$tmp3\n\t" "movswl $dst,$dst\t! or reduction8S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ por($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ por($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ orw($dst$$Register, $tmp3$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ orw($dst$$Register, $tmp3$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvor16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti128_high $tmp,$src2\n\t" "vpor $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpor $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "orw $dst,$tmp3\n\t" "pextrw $tmp3,$tmp, 0x1\n\t" "orw $dst,$tmp3\n\t" "movswl $dst,$dst\t! or reduction16S" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ orw($dst$$Register, $tmp3$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ orw($dst$$Register, $tmp3$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvor32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpor $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpor $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "movdl $tmp3,$tmp\n\t" "orw $dst,$tmp3\n\t" "shrl $tmp3,0x16\n\t" "orw $dst,$tmp3\n\t" "movswl $dst,$dst\t! or reduction32S" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzwl($dst$$Register, $src1$$Register); __ movdl($tmp3$$Register, $tmp$$XMMRegister); __ orw($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 16); __ orw($dst$$Register, $tmp3$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsor2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0x1\n\t" "por $tmp2,$src2\n\t" "movd $tmp,$src1\n\t" "por $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! or reduction2I" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ por($tmp2$$XMMRegister, $src2$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsor4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "por $tmp2,$src2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "por $tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "por $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! or reduction4I" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ por($tmp2$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvor8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpor $tmp,$tmp,$src2\n\t" "vpshufd $tmp2,$tmp,0xE\t" "vpor $tmp,$tmp,$tmp2\n\t" "vpshufd $tmp2,$tmp,0x1\t" "vpor $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpor $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! or reduction8I" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvor16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpor $tmp3,$tmp3,$src2\n\t" "vextracti128_high $tmp,$tmp3\n\t" "vpor $tmp,$tmp,$src2\n\t" "vpshufd $tmp2,$tmp,0xE\t" "vpor $tmp,$tmp,$tmp2\n\t" "vpshufd $tmp2,$tmp,0x1\t" "vpor $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpor $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! or reduction16I" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpor($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsor2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "por $tmp2,$src2\n\t" "movdq $tmp,$src1\n\t" "por $tmp2,$tmp\n\t" "movq $dst,$tmp2\t! or reduction2L" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ por($tmp2$$XMMRegister, $src2$$XMMRegister); __ movdq($tmp$$XMMRegister, $src1$$Register); __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvor4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpor $tmp2,$tmp,$src2\n\t" "vpshufd $tmp,$tmp2,0xE\t" "vpor $tmp2,$tmp2,$tmp\n\t" "movq $tmp,$src1\n\t" "vpor $tmp2,$tmp2,$tmp\n\t" "movq $dst,$tmp2\t! or reduction4L" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movq($tmp$$XMMRegister, $src1$$Register); __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} #ifdef _LP64 instruct rvor8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (OrReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vporq $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vporq $tmp2,$tmp2,$tmp\n\t" "vpshufd $tmp,$tmp2,0xE\t" "vporq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vporq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! or reduction8L" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} #endif instruct rsxor8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "pshufd $tmp,$src2,0x1\n\t" "pxor $tmp,$src2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp2,$tmp, 0x0\n\t" "xorl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x1\n\t" "xorl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x2\n\t" "xorl $dst,$tmp2\n\t" "pextrb $tmp2,$tmp, 0x3\n\t" "xorl $dst,$tmp2\n\t" "movsbl $dst,$dst\t! xor reduction8B" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ xorl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ xorl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); __ xorl($dst$$Register, $tmp2$$Register); __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); __ xorl($dst$$Register, $tmp2$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsxor16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "pshufd $tmp,$src2,0xE\n\t" "pxor $tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "pxor $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp3,$tmp, 0x0\n\t" "xorl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x1\n\t" "xorl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x2\n\t" "xorl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x3\n\t" "xorl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! xor reduction16B" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ pxor($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ xorl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ xorl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); __ xorl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); __ xorl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvxor32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti128_high $tmp,$src2\n\t" "vpxor $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpxor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpxor $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "pextrb $tmp3,$tmp, 0x0\n\t" "xorl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x1\n\t" "xorl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x2\n\t" "xorl $dst,$tmp3\n\t" "pextrb $tmp3,$tmp, 0x3\n\t" "xorl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! xor reduction32B" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzbl($dst$$Register, $src1$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ xorl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ xorl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); __ xorl($dst$$Register, $tmp3$$Register); __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); __ xorl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvxor64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpxor $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpxor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpxor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpxor $tmp,$tmp,$tmp2\n\t" "movzbl $dst,$src1\n\t" "movdl $tmp3,$tmp\n\t" "xorl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "xorl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "xorl $dst,$tmp3\n\t" "shrl $tmp3,0x8\n\t" "xorl $dst,$tmp3\n\t" "movsbl $dst,$dst\t! xor reduction64B" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzbl($dst$$Register, $src1$$Register); __ movdl($tmp3$$Register, $tmp$$XMMRegister); __ xorl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ xorl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ xorl($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 8); __ xorl($dst$$Register, $tmp3$$Register); __ movsbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsxor4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP dst); format %{ "pshufd $tmp,$src2,0x1\n\t" "pxor $tmp,$src2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp2,$tmp, 0x0\n\t" "xorw $dst,$tmp2\n\t" "pextrw $tmp2,$tmp, 0x1\n\t" "xorw $dst,$tmp2\n\t" "movswl $dst,$dst\t! xor reduction4S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); __ xorw($dst$$Register, $tmp2$$Register); __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); __ xorw($dst$$Register, $tmp2$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsxor8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "pshufd $tmp,$src2,0xE\n\t" "pxor $tmp,$src2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "pxor $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "xorw $dst,$tmp3\n\t" "pextrw $tmp3,$tmp, 0x1\n\t" "xorw $dst,$tmp3\n\t" "movswl $dst,$dst\t! xor reduction8S" %} ins_encode %{ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ pxor($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ xorw($dst$$Register, $tmp3$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ xorw($dst$$Register, $tmp3$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvxor16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti128_high $tmp,$src2\n\t" "vpxor $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpxor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpxor $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "pextrw $tmp3,$tmp, 0x0\n\t" "xorw $dst,$tmp3\n\t" "pextrw $tmp3,$tmp, 0x1\n\t" "xorw $dst,$tmp3\n\t" "movswl $dst,$dst\t! xor reduction16S" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzwl($dst$$Register, $src1$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); __ xorw($dst$$Register, $tmp3$$Register); __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); __ xorw($dst$$Register, $tmp3$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rvxor32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpxor $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpxor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpxor $tmp,$tmp,$tmp2\n\t" "pshufd $tmp2,$tmp,0x1\n\t" "vpxor $tmp,$tmp,$tmp2\n\t" "movzwl $dst,$src1\n\t" "movdl $tmp3,$tmp\n\t" "xorw $dst,$tmp3\n\t" "shrl $tmp3,0x16\n\t" "xorw $dst,$tmp3\n\t" "movswl $dst,$dst\t! xor reduction32S" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movzwl($dst$$Register, $src1$$Register); __ movdl($tmp3$$Register, $tmp$$XMMRegister); __ xorw($dst$$Register, $tmp3$$Register); __ shrl($tmp3$$Register, 16); __ xorw($dst$$Register, $tmp3$$Register); __ movswl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct rsxor2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0x1\n\t" "pxor $tmp2,$src2\n\t" "movd $tmp,$src1\n\t" "pxor $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! xor reduction2I" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsxor4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "pxor $tmp2,$src2\n\t" "pshufd $tmp,$tmp2,0x1\n\t" "pxor $tmp2,$tmp\n\t" "movd $tmp,$src1\n\t" "pxor $tmp2,$tmp\n\t" "movd $dst,$tmp2\t! xor reduction4I" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($tmp$$XMMRegister, $src1$$Register); __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvxor8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpxor $tmp,$tmp,$src2\n\t" "vpshufd $tmp2,$tmp,0xE\t" "vpxor $tmp,$tmp,$tmp2\n\t" "vpshufd $tmp2,$tmp,0x1\t" "vpxor $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpxor $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! xor reduction8I" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvxor16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4_high $tmp3,$src2\n\t" "vpxor $tmp3,$tmp3,$src2\n\t" "vextracti128_high $tmp,$tmp3\n\t" "vpxor $tmp,$tmp,$src2\n\t" "vpshufd $tmp2,$tmp,0xE\t" "vpxor $tmp,$tmp,$tmp2\n\t" "vpshufd $tmp2,$tmp,0x1\t" "vpxor $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpxor $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! xor reduction16I" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpxor($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ movdl($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rsxor2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "pshufd $tmp2,$src2,0xE\n\t" "pxor $tmp2,$src2\n\t" "movdq $tmp,$src1\n\t" "pxor $tmp2,$tmp\n\t" "movq $dst,$tmp2\t! xor reduction2L" %} ins_encode %{ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); __ movdq($tmp$$XMMRegister, $src1$$Register); __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rvxor4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti128_high $tmp,$src2\n\t" "vpxor $tmp2,$tmp,$src2\n\t" "vpshufd $tmp,$tmp2,0xE\t" "vpxor $tmp2,$tmp2,$tmp\n\t" "movq $tmp,$src1\n\t" "vpxor $tmp2,$tmp2,$tmp\n\t" "movq $dst,$tmp2\t! xor reduction4L" %} ins_encode %{ int vector_len = 0; __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movq($tmp$$XMMRegister, $src1$$Register); __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} #ifdef _LP64 instruct rvxor8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (XorReductionV src1 src2)); effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4_high $tmp2,$src2\n\t" "vpxorq $tmp2,$tmp2,$src2\n\t" "vextracti128_high $tmp,$tmp2\n\t" "vpxorq $tmp2,$tmp2,$tmp\n\t" "vpshufd $tmp,$tmp2,0xE\t" "vpxorq $tmp2,$tmp2,$tmp\n\t" "movdq $tmp,$src1\n\t" "vpxorq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! xor reduction8L" %} ins_encode %{ int vector_len = 0; __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdq($tmp$$XMMRegister, $src1$$Register); __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ movdq($dst$$Register, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} #endif // ====================VECTOR ARITHMETIC======================================= // --------------------------------- ADD -------------------------------------- // Bytes vector add instruct vadd4B(vecS dst, vecS src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (AddVB dst src)); format %{ "paddb $dst,$src\t! add packed4B" %} ins_encode %{ __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8B(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (AddVB dst src)); format %{ "paddb $dst,$src\t! add packed8B" %} ins_encode %{ __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16B(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 16); match(Set dst (AddVB dst src)); format %{ "paddb $dst,$src\t! add packed16B" %} ins_encode %{ __ paddb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} ins_encode %{ int vector_len = 0; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} ins_encode %{ int vector_len = 1; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} ins_encode %{ int vector_len = 1; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); match(Set dst (AddVB src1 src2)); format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} ins_encode %{ int vector_len = 2; __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} ins_encode %{ int vector_len = 2; __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Shorts/Chars vector add instruct vadd2S(vecS dst, vecS src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (AddVS dst src)); format %{ "paddw $dst,$src\t! add packed2S" %} ins_encode %{ __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} ins_encode %{ int vector_len = 0; __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} ins_encode %{ int vector_len = 0; __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4S(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (AddVS dst src)); format %{ "paddw $dst,$src\t! add packed4S" %} ins_encode %{ __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} ins_encode %{ int vector_len = 0; __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} ins_encode %{ int vector_len = 0; __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8S(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (AddVS dst src)); format %{ "paddw $dst,$src\t! add packed8S" %} ins_encode %{ __ paddw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} ins_encode %{ int vector_len = 0; __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} ins_encode %{ int vector_len = 0; __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} ins_encode %{ int vector_len = 1; __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} ins_encode %{ int vector_len = 1; __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (AddVS src1 src2)); format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} ins_encode %{ int vector_len = 2; __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} ins_encode %{ int vector_len = 2; __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector add instruct vadd2I(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (AddVI dst src)); format %{ "paddd $dst,$src\t! add packed2I" %} ins_encode %{ __ paddd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVI src1 src2)); format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} ins_encode %{ int vector_len = 0; __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVI src (LoadVector mem))); format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} ins_encode %{ int vector_len = 0; __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4I(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (AddVI dst src)); format %{ "paddd $dst,$src\t! add packed4I" %} ins_encode %{ __ paddd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVI src1 src2)); format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} ins_encode %{ int vector_len = 0; __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVI src (LoadVector mem))); format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} ins_encode %{ int vector_len = 0; __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (AddVI src1 src2)); format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} ins_encode %{ int vector_len = 1; __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (AddVI src (LoadVector mem))); format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} ins_encode %{ int vector_len = 1; __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (AddVI src1 src2)); format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} ins_encode %{ int vector_len = 2; __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (AddVI src (LoadVector mem))); format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} ins_encode %{ int vector_len = 2; __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Longs vector add instruct vadd2L(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (AddVL dst src)); format %{ "paddq $dst,$src\t! add packed2L" %} ins_encode %{ __ paddq($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVL src1 src2)); format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} ins_encode %{ int vector_len = 0; __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVL src (LoadVector mem))); format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} ins_encode %{ int vector_len = 0; __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (AddVL src1 src2)); format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} ins_encode %{ int vector_len = 1; __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (AddVL src (LoadVector mem))); format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} ins_encode %{ int vector_len = 1; __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (AddVL src1 src2)); format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} ins_encode %{ int vector_len = 2; __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (AddVL src (LoadVector mem))); format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} ins_encode %{ int vector_len = 2; __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Floats vector add instruct vadd2F(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (AddVF dst src)); format %{ "addps $dst,$src\t! add packed2F" %} ins_encode %{ __ addps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVF src1 src2)); format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} ins_encode %{ int vector_len = 0; __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVF src (LoadVector mem))); format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} ins_encode %{ int vector_len = 0; __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4F(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (AddVF dst src)); format %{ "addps $dst,$src\t! add packed4F" %} ins_encode %{ __ addps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVF src1 src2)); format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} ins_encode %{ int vector_len = 0; __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVF src (LoadVector mem))); format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} ins_encode %{ int vector_len = 0; __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVF src1 src2)); format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} ins_encode %{ int vector_len = 1; __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AddVF src (LoadVector mem))); format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} ins_encode %{ int vector_len = 1; __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (AddVF src1 src2)); format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} ins_encode %{ int vector_len = 2; __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (AddVF src (LoadVector mem))); format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} ins_encode %{ int vector_len = 2; __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Doubles vector add instruct vadd2D(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (AddVD dst src)); format %{ "addpd $dst,$src\t! add packed2D" %} ins_encode %{ __ addpd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVD src1 src2)); format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} ins_encode %{ int vector_len = 0; __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (AddVD src (LoadVector mem))); format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} ins_encode %{ int vector_len = 0; __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVD src1 src2)); format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} ins_encode %{ int vector_len = 1; __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AddVD src (LoadVector mem))); format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} ins_encode %{ int vector_len = 1; __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (AddVD src1 src2)); format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} ins_encode %{ int vector_len = 2; __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (AddVD src (LoadVector mem))); format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} ins_encode %{ int vector_len = 2; __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- SUB -------------------------------------- // Bytes vector sub instruct vsub4B(vecS dst, vecS src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (SubVB dst src)); format %{ "psubb $dst,$src\t! sub packed4B" %} ins_encode %{ __ psubb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} ins_encode %{ int vector_len = 0; __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} ins_encode %{ int vector_len = 0; __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8B(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (SubVB dst src)); format %{ "psubb $dst,$src\t! sub packed8B" %} ins_encode %{ __ psubb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} ins_encode %{ int vector_len = 0; __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} ins_encode %{ int vector_len = 0; __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16B(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 16); match(Set dst (SubVB dst src)); format %{ "psubb $dst,$src\t! sub packed16B" %} ins_encode %{ __ psubb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} ins_encode %{ int vector_len = 0; __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} ins_encode %{ int vector_len = 0; __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} ins_encode %{ int vector_len = 1; __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} ins_encode %{ int vector_len = 1; __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); match(Set dst (SubVB src1 src2)); format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} ins_encode %{ int vector_len = 2; __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} ins_encode %{ int vector_len = 2; __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Shorts/Chars vector sub instruct vsub2S(vecS dst, vecS src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (SubVS dst src)); format %{ "psubw $dst,$src\t! sub packed2S" %} ins_encode %{ __ psubw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4S(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (SubVS dst src)); format %{ "psubw $dst,$src\t! sub packed4S" %} ins_encode %{ __ psubw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8S(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS dst src)); format %{ "psubw $dst,$src\t! sub packed8S" %} ins_encode %{ __ psubw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} ins_encode %{ int vector_len = 0; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} ins_encode %{ int vector_len = 1; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} ins_encode %{ int vector_len = 1; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (SubVS src1 src2)); format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} ins_encode %{ int vector_len = 2; __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} ins_encode %{ int vector_len = 2; __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector sub instruct vsub2I(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (SubVI dst src)); format %{ "psubd $dst,$src\t! sub packed2I" %} ins_encode %{ __ psubd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVI src1 src2)); format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} ins_encode %{ int vector_len = 0; __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVI src (LoadVector mem))); format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} ins_encode %{ int vector_len = 0; __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4I(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (SubVI dst src)); format %{ "psubd $dst,$src\t! sub packed4I" %} ins_encode %{ __ psubd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVI src1 src2)); format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} ins_encode %{ int vector_len = 0; __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVI src (LoadVector mem))); format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} ins_encode %{ int vector_len = 0; __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (SubVI src1 src2)); format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} ins_encode %{ int vector_len = 1; __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (SubVI src (LoadVector mem))); format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} ins_encode %{ int vector_len = 1; __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (SubVI src1 src2)); format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} ins_encode %{ int vector_len = 2; __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (SubVI src (LoadVector mem))); format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} ins_encode %{ int vector_len = 2; __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Longs vector sub instruct vsub2L(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (SubVL dst src)); format %{ "psubq $dst,$src\t! sub packed2L" %} ins_encode %{ __ psubq($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVL src1 src2)); format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} ins_encode %{ int vector_len = 0; __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVL src (LoadVector mem))); format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} ins_encode %{ int vector_len = 0; __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (SubVL src1 src2)); format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} ins_encode %{ int vector_len = 1; __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (SubVL src (LoadVector mem))); format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} ins_encode %{ int vector_len = 1; __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (SubVL src1 src2)); format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} ins_encode %{ int vector_len = 2; __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (SubVL src (LoadVector mem))); format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} ins_encode %{ int vector_len = 2; __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Floats vector sub instruct vsub2F(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (SubVF dst src)); format %{ "subps $dst,$src\t! sub packed2F" %} ins_encode %{ __ subps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVF src1 src2)); format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} ins_encode %{ int vector_len = 0; __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVF src (LoadVector mem))); format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} ins_encode %{ int vector_len = 0; __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4F(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (SubVF dst src)); format %{ "subps $dst,$src\t! sub packed4F" %} ins_encode %{ __ subps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVF src1 src2)); format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} ins_encode %{ int vector_len = 0; __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVF src (LoadVector mem))); format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} ins_encode %{ int vector_len = 0; __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVF src1 src2)); format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} ins_encode %{ int vector_len = 1; __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SubVF src (LoadVector mem))); format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} ins_encode %{ int vector_len = 1; __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (SubVF src1 src2)); format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} ins_encode %{ int vector_len = 2; __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (SubVF src (LoadVector mem))); format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} ins_encode %{ int vector_len = 2; __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Doubles vector sub instruct vsub2D(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (SubVD dst src)); format %{ "subpd $dst,$src\t! sub packed2D" %} ins_encode %{ __ subpd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVD src1 src2)); format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} ins_encode %{ int vector_len = 0; __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SubVD src (LoadVector mem))); format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} ins_encode %{ int vector_len = 0; __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVD src1 src2)); format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} ins_encode %{ int vector_len = 1; __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SubVD src (LoadVector mem))); format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} ins_encode %{ int vector_len = 1; __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (SubVD src1 src2)); format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} ins_encode %{ int vector_len = 2; __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (SubVD src (LoadVector mem))); format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} ins_encode %{ int vector_len = 2; __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- MUL -------------------------------------- // Byte vector mul instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp2, vecS tmp) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 4); match(Set dst (MulVB src1 src2)); effect(TEMP dst, TEMP tmp2, TEMP tmp); format %{"pmovsxbw $tmp,$src1\n\t" "pmovsxbw $tmp2,$src2\n\t" "pmullw $tmp,$tmp2\n\t" "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp2\n\t" "packuswb $tmp,$tmp\n\t" "movss $dst,$tmp\t! mul packed4B" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); __ movss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp2, vecD tmp) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 8); match(Set dst (MulVB src1 src2)); effect(TEMP dst, TEMP tmp2, TEMP tmp); format %{"pmovsxbw $tmp,$src1\n\t" "pmovsxbw $tmp2,$src2\n\t" "pmullw $tmp,$tmp2\n\t" "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp2\n\t" "packuswb $tmp,$tmp\n\t" "movsd $dst,$tmp\t! mul packed8B" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); __ movsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp3, vecX tmp2, vecX tmp) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 16); match(Set dst (MulVB src1 src2)); effect(TEMP tmp3, TEMP tmp2, TEMP tmp); format %{"pmovsxbw $tmp,$src1\n\t" "pmovsxbw $tmp2,$src2\n\t" "pmullw $tmp,$tmp2\n\t" "pshufd $tmp2,$src1\n\t" "pshufd $tmp3,$src2\n\t" "pmovsxbw $tmp2,$tmp2\n\t" "pmovsxbw $tmp3,$tmp3\n\t" "pmullw $tmp2,$tmp3\n\t" "movdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp3\n\t" "pand $tmp2,$tmp3\n\t" "packuswb $tmp,$tmp2\n\t" "movdqu $dst,$tmp \n\t! mul packed16B" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 238); __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 238); __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); __ pmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister); __ pmullw($tmp2$$XMMRegister, $tmp3$$XMMRegister); __ movdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp3$$XMMRegister); __ pand($tmp2$$XMMRegister, $tmp3$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecY tmp2, vecY tmp) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (MulVB src1 src2)); effect(TEMP dst, TEMP tmp2, TEMP tmp); format %{"vpmovsxbw $tmp,$src1\n\t" "vpmovsxbw $tmp2,$src2\n\t" "vpmullw $tmp,$tmp2\n\t" "vmovdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" "vpand $tmp,$tmp2\n\t" "vextracti128_high $tmp2,$tmp\n\t" "vpackuswb $dst,$tmp, $tmp2\n\t! mul packed16B" %} ins_encode %{ int vector_len = 1; __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vmovdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, vecY tmp3) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (MulVB src1 src2)); effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{"vextracti128_high $tmp1,$src1\n\t" "vextracti128_high $tmp3,$src2\n\t" "vpmovsxbw $tmp1,$tmp1\n\t" "vpmovsxbw $tmp3,$tmp3\n\t" "vpmullw $tmp1,$tmp1,$tmp3\n\t" "vpmovsxbw $tmp2,$src1\n\t" "vpmovsxbw $tmp3,$src2\n\t" "vpmullw $tmp2,$tmp2,$tmp3\n\t" "vmovdqu $tmp3, [0x00ff00ff0x00ff00ff]\n\t" "vpbroadcastd $tmp3, $tmp3\n\t" "vpand $tmp2,$tmp2,$tmp3\n\t" "vpand $tmp1,$tmp1,$tmp3\n\t" "vpackuswb $dst,$tmp2,$tmp1\n\t" "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); __ vextracti128_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); __ vpmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpmovsxbw($tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpackuswb($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp1$$XMMRegister, vector_len); __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, vecZ tmp3, vecZ tmp4, vecZ tmp5, vecZ tmp6) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 64); match(Set dst (MulVB src1 src2)); effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4); format %{"vextracti64x4_high $tmp1,$src1\n\t" "vextracti64x4_high $tmp3,$src2\n\t" "vpmovsxbw $tmp1,$tmp1\n\t" "vpmovsxbw $tmp3,$tmp3\n\t" "vpmullw $tmp1,$tmp1,$tmp3\n\t" "vpmovsxbw $tmp2,$src1\n\t" "vpmovsxbw $tmp3,$src2\n\t" "vpmullw $tmp2,$tmp2,$tmp3\n\t" "vmovdqu $tmp3, [0x00ff00ff0x00ff00ff]\n\t" "vpbroadcastd $tmp3, $tmp3\n\t" "vpand $tmp1,$tmp1,$tmp3\n\t" "vpand $tmp2,$tmp2,$tmp3\n\t" "vpackuswb $tmp1,$tmp2,$tmp1\n\t" "vextracti64x4_high $tmp3,$tmp1\n\t" "vpermq $tmp3, $tmp3, 0x8D\n\t" "vpermq $tmp1, $tmp1, 0xD8\n\t" "vmovdqu $tmp4,$tmp3\n\t" "vmovdqu $tmp2,$tmp1\n\t" "vpblendd $tmp3,$tmp3,$tmp1\n\t" "vpblendd $tmp2,$tmp2,$tmp4\n\t" "vpermq $tmp2,$tmp2,0x4E\n\t" "vinserti64x4 $dst,$dst,$tmp3,0x00\n\t" "vinserti64x4 $dst,$dst,$tmp2,0x01\t! mul packed64B" %} ins_encode %{ int vector_len = 2; __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); __ vpmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpmovsxbw($tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpackuswb($tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp1$$XMMRegister, vector_len); __ vextracti64x4_high($tmp3$$XMMRegister, $tmp1$$XMMRegister); __ vpermq($tmp3$$XMMRegister, $tmp3$$XMMRegister, 0x8D, 1); __ vpermq($tmp1$$XMMRegister, $tmp1$$XMMRegister, 0xD8, 1); __ vmovdqu($tmp4$$XMMRegister, $tmp3$$XMMRegister); __ vmovdqu($tmp2$$XMMRegister, $tmp1$$XMMRegister); __ vpblendd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $tmp1$$XMMRegister, 0x0F, 1); __ vpblendd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp4$$XMMRegister, 0x0F, 1); __ vpermq($tmp2$$XMMRegister, $tmp2$$XMMRegister, 0x4E, 1); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp3$$XMMRegister, 0x00); __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, 0x01); %} ins_pipe( pipe_slow ); %} // Shorts/Chars vector mul instruct vmul2S(vecS dst, vecS src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (MulVS dst src)); format %{ "pmullw $dst,$src\t! mul packed2S" %} ins_encode %{ __ pmullw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4S(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (MulVS dst src)); format %{ "pmullw $dst,$src\t! mul packed4S" %} ins_encode %{ __ pmullw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8S(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS dst src)); format %{ "pmullw $dst,$src\t! mul packed8S" %} ins_encode %{ __ pmullw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} ins_encode %{ int vector_len = 0; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} ins_encode %{ int vector_len = 1; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} ins_encode %{ int vector_len = 1; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (MulVS src1 src2)); format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} ins_encode %{ int vector_len = 2; __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} ins_encode %{ int vector_len = 2; __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector mul (sse4_1) instruct vmul2I(vecD dst, vecD src) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 2); match(Set dst (MulVI dst src)); format %{ "pmulld $dst,$src\t! mul packed2I" %} ins_encode %{ __ pmulld($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVI src1 src2)); format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} ins_encode %{ int vector_len = 0; __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVI src (LoadVector mem))); format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} ins_encode %{ int vector_len = 0; __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4I(vecX dst, vecX src) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 4); match(Set dst (MulVI dst src)); format %{ "pmulld $dst,$src\t! mul packed4I" %} ins_encode %{ __ pmulld($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVI src1 src2)); format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} ins_encode %{ int vector_len = 0; __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVI src (LoadVector mem))); format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} ins_encode %{ int vector_len = 0; __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Long vector mul instruct mul2L_reg(vecX dst, vecX src2, vecX tmp) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && VM_Version::supports_sse4_1()); match(Set dst (MulVL dst src2)); effect(TEMP dst, TEMP tmp); format %{ "pshufd $tmp,$src2, 177\n\t" "pmulld $tmp,$dst\n\t" "phaddd $tmp,$tmp\n\t" "pmovzxdq $tmp,$tmp\n\t" "psllq $tmp, 32\n\t" "pmuludq $dst,$src2\n\t" "paddq $dst,$tmp\n\t! mul packed2L" %} ins_encode %{ int vector_len = 0; __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); __ psllq($tmp$$XMMRegister, 32); __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmul2L_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && VM_Version::supports_avx()); match(Set dst (MulVL src1 src2)); effect(TEMP tmp1, TEMP tmp); format %{ "vpshufd $tmp,$src2\n\t" "vpmulld $tmp,$src1,$tmp\n\t" "vphaddd $tmp,$tmp,$tmp\n\t" "vpmovzxdq $tmp,$tmp\n\t" "vpsllq $tmp,$tmp\n\t" "vpmuludq $tmp1,$src1,$src2\n\t" "vpaddq $dst,$tmp,$tmp1\t! mul packed2L" %} ins_encode %{ int vector_len = 0; __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); match(Set dst (MulVL src1 src2)); format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} ins_encode %{ int vector_len = 0; __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); match(Set dst (MulVL src (LoadVector mem))); format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} ins_encode %{ int vector_len = 0; __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4L_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp, vecY tmp1,) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && VM_Version::supports_avx2()); match(Set dst (MulVL src1 src2)); effect(TEMP tmp1, TEMP tmp); format %{ "vpshufd $tmp,$src2\n\t" "vpmulld $tmp,$src1,$tmp\n\t" "vphaddd $tmp,$tmp,$tmp\n\t" "vpmovzxdq $tmp,$tmp\n\t" "vpsllq $tmp,$tmp\n\t" "vpmuludq $tmp1,$src1,$src2\n\t" "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} ins_encode %{ int vector_len = 1; __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); match(Set dst (MulVL src1 src2)); format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} ins_encode %{ int vector_len = 1; __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); match(Set dst (MulVL src (LoadVector mem))); format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} ins_encode %{ int vector_len = 1; __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); match(Set dst (MulVL src1 src2)); format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} ins_encode %{ int vector_len = 2; __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); match(Set dst (MulVL src (LoadVector mem))); format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} ins_encode %{ int vector_len = 2; __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (MulVI src1 src2)); format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} ins_encode %{ int vector_len = 1; __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (MulVI src (LoadVector mem))); format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} ins_encode %{ int vector_len = 1; __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (MulVI src1 src2)); format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} ins_encode %{ int vector_len = 2; __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (MulVI src (LoadVector mem))); format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} ins_encode %{ int vector_len = 2; __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Floats vector mul instruct vmul2F(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (MulVF dst src)); format %{ "mulps $dst,$src\t! mul packed2F" %} ins_encode %{ __ mulps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVF src1 src2)); format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} ins_encode %{ int vector_len = 0; __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVF src (LoadVector mem))); format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} ins_encode %{ int vector_len = 0; __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4F(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (MulVF dst src)); format %{ "mulps $dst,$src\t! mul packed4F" %} ins_encode %{ __ mulps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVF src1 src2)); format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} ins_encode %{ int vector_len = 0; __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVF src (LoadVector mem))); format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} ins_encode %{ int vector_len = 0; __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVF src1 src2)); format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} ins_encode %{ int vector_len = 1; __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (MulVF src (LoadVector mem))); format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} ins_encode %{ int vector_len = 1; __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (MulVF src1 src2)); format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} ins_encode %{ int vector_len = 2; __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (MulVF src (LoadVector mem))); format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} ins_encode %{ int vector_len = 2; __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Doubles vector mul instruct vmul2D(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (MulVD dst src)); format %{ "mulpd $dst,$src\t! mul packed2D" %} ins_encode %{ __ mulpd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVD src1 src2)); format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} ins_encode %{ int vector_len = 0; __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulVD src (LoadVector mem))); format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} ins_encode %{ int vector_len = 0; __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVD src1 src2)); format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} ins_encode %{ int vector_len = 1; __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulVD src (LoadVector mem))); format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} ins_encode %{ int vector_len = 1; __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (MulVD src1 src2)); format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} ins_encode %{ int vector_len = 2; __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (MulVD src (LoadVector mem))); format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} ins_encode %{ int vector_len = 2; __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); effect(TEMP dst, USE src1, USE src2); format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" %} ins_encode %{ int vector_len = 1; int cond = (Assembler::Condition)($copnd$$cmpcode); __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); effect(TEMP dst, USE src1, USE src2); format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" %} ins_encode %{ int vector_len = 1; int cond = (Assembler::Condition)($copnd$$cmpcode); __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- DIV -------------------------------------- // Floats vector div instruct vdiv2F(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (DivVF dst src)); format %{ "divps $dst,$src\t! div packed2F" %} ins_encode %{ __ divps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (DivVF src1 src2)); format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} ins_encode %{ int vector_len = 0; __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (DivVF src (LoadVector mem))); format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} ins_encode %{ int vector_len = 0; __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv4F(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (DivVF dst src)); format %{ "divps $dst,$src\t! div packed4F" %} ins_encode %{ __ divps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVF src1 src2)); format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} ins_encode %{ int vector_len = 0; __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVF src (LoadVector mem))); format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} ins_encode %{ int vector_len = 0; __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (DivVF src1 src2)); format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} ins_encode %{ int vector_len = 1; __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (DivVF src (LoadVector mem))); format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} ins_encode %{ int vector_len = 1; __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (DivVF src1 src2)); format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} ins_encode %{ int vector_len = 2; __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16); match(Set dst (DivVF src (LoadVector mem))); format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} ins_encode %{ int vector_len = 2; __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // Doubles vector div instruct vdiv2D(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (DivVD dst src)); format %{ "divpd $dst,$src\t! div packed2D" %} ins_encode %{ __ divpd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (DivVD src1 src2)); format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} ins_encode %{ int vector_len = 0; __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (DivVD src (LoadVector mem))); format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} ins_encode %{ int vector_len = 0; __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVD src1 src2)); format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} ins_encode %{ int vector_len = 1; __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (DivVD src (LoadVector mem))); format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} ins_encode %{ int vector_len = 1; __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (DivVD src1 src2)); format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} ins_encode %{ int vector_len = 2; __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (DivVD src (LoadVector mem))); format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} ins_encode %{ int vector_len = 2; __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // ------------------------------ Min --------------------------------------- // Byte vector Min instruct min8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinV src1 src2)); effect(TEMP dst); format %{ "movdqu $dst,$src1\n\t" "pminsb $dst,$src2\t! " %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); __ pminsb($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct min8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinV src1 src2)); format %{ "vpminsb $dst,$src1,$src2\t! " %} ins_encode %{ __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} instruct min16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinV src1 src2)); effect(TEMP dst); format %{ "movdqu $dst,$src1\n\t" "pminsb $dst,$src2\t! " %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); __ pminsb($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct min16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinV src1 src2)); format %{ "vpminsb $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinV src1 src2)); format %{ "vpminsb $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 1; __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min64B_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MinV src1 src2)); format %{ "vpminsb $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 2; __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} //Short vector Min instruct min4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinV src1 src2)); effect(TEMP dst); format %{ "movsd $dst,$src1\n\t" "pminsw $dst,$src2\t! " %} ins_encode %{ __ movsd($dst$$XMMRegister, $src1$$XMMRegister); __ pminsw($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct min4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinV src1 src2)); effect(TEMP dst); format %{ "vpminsw $dst,$src1,$src2\t! " %} ins_encode %{ __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} instruct min8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinV src1 src2)); effect(TEMP dst); format %{ "movdqu $dst,$src1\n\t" "pminsw $dst,$src2\t! " %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); __ pminsw($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct min8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinV src1 src2)); format %{ "vpminsw $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinV src1 src2)); format %{ "vpminsw $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 1; __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min32S_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MinV src1 src2)); format %{ "vpminsw $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 2; __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // Int vector Min instruct min2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinV src1 src2)); effect(TEMP dst); format %{ "movsd $dst,$src1\n\t" "pminsd $dst,$src2\t! " %} ins_encode %{ __ movsd($dst$$XMMRegister, $src1$$XMMRegister); __ pminsd($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct min2I_reg_avx(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinV src1 src2)); format %{ "vpminsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinV src1 src2)); effect(TEMP dst); format %{ "movdqu $dst,$src1\n\t" "pminsd $dst,$src2\t! " %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); __ pminsd($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct min4I_reg_avx(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinV src1 src2)); format %{ "vpminsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min4I_reg_evex(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinV src1 src2)); format %{ "vpminsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min8I_reg_avx(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinV src1 src2)); format %{ "vpminsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 1; __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min8I_reg_evex(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinV src1 src2)); format %{ "vpminsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 1; __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min16I_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MinV src1 src2)); format %{ "vpminsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 2; __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // Long vector Min instruct minL_reg(vecD dst, vecD src1, vecD src2, rxmm0 tmp) %{ predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinV src1 src2)); effect(TEMP dst, TEMP tmp); format %{ "movsd $tmp,$src1\n\t" "movsd $dst,$src1\n\t" "pcmpgtq $tmp,$src2\n\t" "blendvpd $dst,$src2\t! " %} ins_encode %{ __ movsd($tmp$$XMMRegister, $src1$$XMMRegister); __ movsd($dst$$XMMRegister, $src1$$XMMRegister); __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct min1L_reg_avx(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinV src1 src2)); effect(TEMP dst); format %{ "vpcmpgtq $dst,$src1,$src2\n\t" "vblendvpd $dst,$src1,$src2,$dst\t! " %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min2L_reg(vecX dst, vecX src1, vecX src2, rxmm0 tmp) %{ predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinV src1 src2)); effect(TEMP dst, TEMP tmp); format %{ "movdqu $tmp,$src1\n\t" "movdqu $dst,$src1\n\t" "pcmpgtq $tmp,$src2\n\t" "blendvpd $dst,$src2\t! " %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct min2L_reg_avx(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinV src1 src2)); effect(TEMP dst); format %{ "vpcmpgtq $dst,$src1,$src2\n\t" "vblendvpd $dst,$src1,$src2,$dst\t! " %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min4L_reg_avx(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinV src1 src2)); effect(TEMP dst); format %{ "vpcmpgtq $dst,$src1,$src2\n\t" "vblendvpd $dst,$src1,$src2,$dst\t! " %} ins_encode %{ int vector_len = 1; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min2L_reg_evex(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinV src1 src2)); format %{ "vpminsq $dst,$src1,src2\t! " %} ins_encode %{ __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} instruct min4L_reg_evex(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinV src1 src2)); format %{ "vpminsq $dst,$src1,src2\t! " %} ins_encode %{ __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); %} ins_pipe( pipe_slow ); %} instruct min8L_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinV src1 src2)); format %{ "vpminsq $dst,$src1,src2\t! " %} ins_encode %{ __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2); %} ins_pipe( pipe_slow ); %} // Float vector Min instruct min2F_reg_avx(legVecD dst, legVecD a, legVecD b, legVecD tmp, legVecD atmp, legVecD btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinV a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "blendvps $atmp,$a,$b,$a \n\t" "blendvps $btmp,$b,$a,$a \n\t" "vminps $tmp,$atmp,$btmp \n\t" "cmpps.unordered $btmp, $atmp, $atmp \n\t" "blendvps $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 0; __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); __ vminps($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min4F_reg_avx(legVecX dst, legVecX a, legVecX b, legVecX tmp, legVecX atmp, legVecX btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinV a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "blendvps $atmp,$a,$b,$a \n\t" "blendvps $btmp,$b,$a,$a \n\t" "vminps $tmp,$atmp,$btmp \n\t" "cmpps.unordered $btmp, $atmp, $atmp \n\t" "blendvps $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 0; __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); __ vminps($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min8F_reg_avx(legVecY dst, legVecY a, legVecY b, legVecY tmp, legVecY atmp, legVecY btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinV a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "blendvps $atmp,$a,$b,$a \n\t" "blendvps $btmp,$b,$a,$a \n\t" "vminps $tmp,$atmp,$btmp \n\t" "cmpps.unordered $btmp, $atmp, $atmp \n\t" "blendvps $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 1; __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); __ vminps($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min16F_reg_evex(vecZ dst, vecZ a, vecZ b, vecZ atmp, vecZ btmp) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MinV a b)); effect(USE a, USE b, TEMP atmp, TEMP btmp); format %{ "vpmovd2m k1,$a \n\t" "vblendmps $atmp,k1,$a,$b \n\t" "vblendmps $btmp,k1,$b,$a \n\t" "vminps $dst,$atmp,$btmp \n\t" "vcmpps.unordered k1,$atmp,$atmp \n\t" "vmovaps $dst,k1,$atmp \n\t" %} ins_encode %{ int vector_len = 2; KRegister ktmp = k1; KRegister mask = k0; __ evpmovd2m(ktmp, $a$$XMMRegister, vector_len); __ evblendmps($atmp$$XMMRegister, ktmp, $a$$XMMRegister, $b$$XMMRegister, true, vector_len); __ evblendmps($btmp$$XMMRegister, ktmp, $b$$XMMRegister, $a$$XMMRegister, true, vector_len); __ vminps($dst$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); __ evcmpps(ktmp, mask, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, $atmp$$XMMRegister, true, vector_len); %} ins_pipe( pipe_slow ); %} // Double vector Min instruct min1D_reg_avx(legVecD dst, legVecD a, legVecD b, legVecD tmp, legVecD atmp, legVecD btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MinV a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "blendvpd $atmp,$a,$b,$a \n\t" "blendvpd $btmp,$b,$a,$a \n\t" "vminpd $tmp,$atmp,$btmp \n\t" "cmppd.unordered $btmp, $atmp, $atmp \n\t" "blendvpd $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 0; __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); __ vminpd($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min2D_reg_avx(legVecX dst, legVecX a, legVecX b, legVecX tmp, legVecX atmp, legVecX btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MinV a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "blendvpd $atmp,$a,$b,$a \n\t" "blendvpd $btmp,$b,$a,$a \n\t" "vminpd $tmp,$atmp,$btmp \n\t" "cmppd.unordered $btmp, $atmp, $atmp \n\t" "blendvpd $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 0; __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); __ vminpd($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min4D_reg_avx(legVecY dst, legVecY a, legVecY b, legVecY tmp, legVecY atmp, legVecY btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MinV a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "blendvpd $atmp,$a,$b,$a \n\t" "blendvpd $btmp,$b,$a,$a \n\t" "vminpd $tmp,$atmp,$btmp \n\t" "cmppd.unordered $btmp, $atmp, $atmp \n\t" "blendvpd $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 1; __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); __ vminpd($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct min8D_reg_evex(vecZ dst, vecZ a, vecZ b, vecZ atmp, vecZ btmp) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MinV a b)); effect(USE a, USE b, TEMP atmp, TEMP btmp); format %{ "vpmovq2m k1,$a \n\t" "vblendmpd $atmp,k1,$a,$b \n\t" "vblendmpd $btmp,k1,$b,$a \n\t" "vminpd $dst,$atmp,$btmp \n\t" "vcmppd.unordered k1,$atmp,$atmp \n\t" "vmovapd $dst,k1,$atmp \n\t" %} ins_encode %{ int vector_len = 2; KRegister ktmp = k1; KRegister mask = k0; __ evpmovq2m(ktmp, $a$$XMMRegister, vector_len); __ evblendmpd($atmp$$XMMRegister, ktmp, $a$$XMMRegister, $b$$XMMRegister, true, vector_len); __ evblendmpd($btmp$$XMMRegister, ktmp, $b$$XMMRegister, $a$$XMMRegister, true, vector_len); __ vminpd($dst$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); __ evcmppd(ktmp, mask, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, $atmp$$XMMRegister, true, vector_len); %} ins_pipe( pipe_slow ); %} // ------------------------------ Max --------------------------------------- // Byte vector Max instruct max8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxV src1 src2)); effect(TEMP dst); format %{ "movsd $dst,$src1\n\t" "pmaxsb $dst,$src2\t! " %} ins_encode %{ __ movsd($dst$$XMMRegister, $src1$$XMMRegister); __ pmaxsb($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct max8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsb $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxV src1 src2)); effect(TEMP dst); format %{ "movdqu $dst,$src1\n\t" "pmaxsb $dst,$src2\t! " %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); __ pmaxsb($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct max16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsb $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsb $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 1; __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max64B_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsb $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 2; __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} //Short vector Max instruct max4S_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseSSE > 1 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxV src1 src2)); effect(TEMP dst); format %{ "movsd $dst,$src1\n\t" "pmaxsw $dst,$src2\t! " %} ins_encode %{ __ movsd($dst$$XMMRegister, $src1$$XMMRegister); __ pmaxsw($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct max4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsw $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max8S_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseSSE > 1 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxV src1 src2)); effect(TEMP dst); format %{ "movdqu $dst,$src1\n\t" "pmaxsw $dst,$src2\t! " %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); __ pmaxsw($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct max8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsw $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max16S_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsw $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 1; __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max32S_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsw $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 2; __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // Int vector Max instruct max2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxV src1 src2)); effect(TEMP dst); format %{ "movdqu $dst,$src1\n\t" "pmaxsd $dst,$src2\t! " %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); __ pmaxsd($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct max2I_reg_avx(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxV src1 src2)); effect(TEMP dst); format %{ "movdqu $dst,$src1\n\t" "pmaxsd $dst,$src2\t! " %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); __ pmaxsd($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct max4I_reg_avx(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max4I_reg_evex(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 0; __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max8I_reg_avx(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 1; __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max8I_reg_evex(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 1; __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max16I_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsd $dst,$src1,$src2\t! " %} ins_encode %{ int vector_len = 2; __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // Long Vector Max instruct maxL_reg(vecD dst, vecD src1, vecD src2, rxmm0 tmp) %{ predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxV src1 src2)); effect(TEMP dst, TEMP tmp); format %{ "movsd $tmp,$src1\n\t" "movsd $dst,$src1\n\t" "pcmpgtq $tmp,$src2\n\t" "blendvpd $dst,$src2\t! " %} ins_encode %{ __ movsd($tmp$$XMMRegister, $src1$$XMMRegister); __ movsd($dst$$XMMRegister, $src2$$XMMRegister); __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct max1L_reg_avx(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxV src1 src2)); effect(TEMP dst); format %{ "vpcmpgtq $dst,$src1,$src2\n\t" "vblendvpd $dst,$src2,$src1,$dst\t! " %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max2L_reg(vecX dst, vecX src1, vecX src2, rxmm0 tmp) %{ predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxV src1 src2)); effect(TEMP dst, TEMP tmp); format %{ "movdqu $tmp,$src2\n\t" "movdqu $dst,$src1\n\t" "pcmpgtq $tmp,$src1\n\t" "blendvpd $dst,$src2\t! " %} ins_encode %{ __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); __ pcmpgtq($tmp$$XMMRegister, $src1$$XMMRegister); __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct max2L_reg_avx(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxV src1 src2)); effect(TEMP dst); format %{ "vpcmpgtq $dst,$src1,$src2\n\t" "vblendvpd $dst,$src2,$src1,$dst\t! " %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max2L_reg_evex(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsq $dst,$src1,src2\t! " %} ins_encode %{ __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} instruct max4L_reg_avx(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxV src1 src2)); effect(TEMP dst); format %{ "vpcmpgtq $dst,$src1,$src2\n\t" "vblendvpd $dst,$src2,$src1,$dst\t! " %} ins_encode %{ int vector_len = 1; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max4L_reg_evex(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsq $dst,$src1,src2\t! " %} ins_encode %{ __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); %} ins_pipe( pipe_slow ); %} instruct max8L_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxV src1 src2)); format %{ "vpmaxsq $dst,$src1,src2\t! " %} ins_encode %{ __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2); %} ins_pipe( pipe_slow ); %} // Float Vector Max instruct max2F_reg_avx(legVecD dst, legVecD a, legVecD b, legVecD tmp, legVecD atmp, legVecD btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxV a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "blendvps $btmp,$b,$a,$b \n\t" "blendvps $atmp,$a,$b,$b \n\t" "vmaxps $tmp,$atmp,$btmp \n\t" "cmpps.unordered $btmp, $atmp, $atmp \n\t" "blendvps $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 0; __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); __ vmaxps($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max4F_reg_avx(legVecX dst, legVecX a, legVecX b, legVecX tmp, legVecX atmp, legVecX btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxV a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "blendvps $btmp,$b,$a,$b \n\t" "blendvps $atmp,$a,$b,$b \n\t" "vmaxps $tmp,$atmp,$btmp \n\t" "cmpps.unordered $btmp, $atmp, $atmp \n\t" "blendvps $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 0; __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); __ vmaxps($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max8F_reg_avx(legVecY dst, legVecY a, legVecY b, legVecY tmp, legVecY atmp, legVecY btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxV a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "blendvps $btmp,$b,$a,$b \n\t" "blendvps $atmp,$a,$b,$b \n\t" "vmaxps $tmp,$atmp,$btmp \n\t" "cmpps.unordered $btmp, $atmp, $atmp \n\t" "blendvps $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 1; __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); __ vmaxps($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max16F_reg_evex(vecZ dst, vecZ a, vecZ b, vecZ atmp, vecZ btmp) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (MaxV a b)); effect(USE a, USE b, TEMP atmp, TEMP btmp); format %{ "vpmovd2m k1,$b \n\t" "vblendmps $atmp,k1,$a,$b \n\t" "vblendmps $btmp,k1,$b,$a \n\t" "vmaxps $dst,$atmp,$btmp \n\t" "vcmpps.unordered k1,$atmp,$atmp \n\t" "vmovaps $dst,k1,$atmp \n\t" %} ins_encode %{ int vector_len = 2; KRegister ktmp = k1; KRegister mask = k0; __ evpmovd2m(ktmp, $b$$XMMRegister, vector_len); __ evblendmps($atmp$$XMMRegister, ktmp, $a$$XMMRegister, $b$$XMMRegister, true, vector_len); __ evblendmps($btmp$$XMMRegister, ktmp, $b$$XMMRegister, $a$$XMMRegister, true, vector_len); __ vmaxps($dst$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); __ evcmpps(ktmp, mask, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, $atmp$$XMMRegister, true, vector_len); %} ins_pipe( pipe_slow ); %} // Double Vector Max instruct max1D_reg_avx(legVecD dst, legVecD a, legVecD b, legVecD tmp, legVecD atmp, legVecD btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MaxV a b)); effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); format %{ "blendvpd $btmp,$b,$a,$b \n\t" "blendvpd $atmp,$a,$b,$b \n\t" "vmaxpd $tmp,$atmp,$btmp \n\t" "cmppd.unordered $btmp, $atmp, $atmp \n\t" "blendvpd $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 0; __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); __ vmaxpd($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max2D_reg_avx(legVecX dst, legVecX a, legVecX b, legVecX tmp, legVecX atmp, legVecX btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MaxV a b)); effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); format %{ "blendvpd $btmp,$b,$a,$b \n\t" "blendvpd $atmp,$a,$b,$b \n\t" "vmaxpd $tmp,$atmp,$btmp \n\t" "cmppd.unordered $btmp, $atmp, $atmp \n\t" "blendvpd $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 0; __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); __ vmaxpd($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max4D_reg_avx(legVecY dst, legVecY a, legVecY b, legVecY tmp, legVecY atmp, legVecY btmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MaxV a b)); effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); format %{ "blendvpd $btmp,$b,$a,$b \n\t" "blendvpd $atmp,$a,$b,$b \n\t" "vmaxpd $tmp,$atmp,$btmp \n\t" "cmppd.unordered $btmp, $atmp, $atmp \n\t" "blendvpd $dst,$tmp,$atmp,$btmp \n\t" %} ins_encode %{ int vector_len = 1; __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); __ vmaxpd($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct max8D_reg_evex(vecZ dst, vecZ a, vecZ b, vecZ atmp, vecZ btmp) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (MaxV a b)); effect(USE a, USE b, TEMP atmp, TEMP btmp); format %{ "vpmovq2m k1,$b \n\t" "vblendmpd $atmp,k1,$a,$b \n\t" "vblendmpd $btmp,k1,$b,$a \n\t" "vmaxpd $dst,$atmp,$btmp \n\t" "vcmppd.unordered k1,$atmp,$atmp \n\t" "vmovapd $dst,k1,$atmp \n\t" %} ins_encode %{ int vector_len = 2; KRegister ktmp = k1; KRegister mask = k0; __ evpmovq2m(ktmp, $b$$XMMRegister, vector_len); __ evblendmpd($atmp$$XMMRegister, ktmp, $a$$XMMRegister, $b$$XMMRegister, true, vector_len); __ evblendmpd($btmp$$XMMRegister, ktmp, $b$$XMMRegister, $a$$XMMRegister, true, vector_len); __ vmaxpd($dst$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); __ evcmppd(ktmp, mask, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::UNORD_Q, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, $atmp$$XMMRegister, true, vector_len); %} ins_pipe( pipe_slow ); %} // ------------------------------ Shift --------------------------------------- // Left and right shift count vectors are the same on x86 // (only lowest bits of xmm reg are used for count). instruct vshiftcnt(vecS dst, rRegI cnt) %{ match(Set dst (LShiftCntV cnt)); match(Set dst (RShiftCntV cnt)); format %{ "movd $dst,$cnt\t! load shift count" %} ins_encode %{ __ movdl($dst$$XMMRegister, $cnt$$Register); %} ins_pipe( pipe_slow ); %} // --------------------------------- Sqrt -------------------------------------- // Floating point vector sqrt instruct vsqrt2D_reg(vecX dst, vecX src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SqrtVD src)); format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} ins_encode %{ int vector_len = 0; __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt2D_mem(vecX dst, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SqrtVD (LoadVector mem))); format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} ins_encode %{ int vector_len = 0; __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt4D_reg(vecY dst, vecY src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SqrtVD src)); format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} ins_encode %{ int vector_len = 1; __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt4D_mem(vecY dst, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SqrtVD (LoadVector mem))); format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} ins_encode %{ int vector_len = 1; __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (SqrtVD src)); format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} ins_encode %{ int vector_len = 2; __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt8D_mem(vecZ dst, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (SqrtVD (LoadVector mem))); format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} ins_encode %{ int vector_len = 2; __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt2F_reg(vecD dst, vecD src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SqrtVF src)); format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} ins_encode %{ int vector_len = 0; __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt2F_mem(vecD dst, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (SqrtVF (LoadVector mem))); format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} ins_encode %{ int vector_len = 0; __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt4F_reg(vecX dst, vecX src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SqrtVF src)); format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} ins_encode %{ int vector_len = 0; __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt4F_mem(vecX dst, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (SqrtVF (LoadVector mem))); format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} ins_encode %{ int vector_len = 0; __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt8F_reg(vecY dst, vecY src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SqrtVF src)); format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} ins_encode %{ int vector_len = 1; __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt8F_mem(vecY dst, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (SqrtVF (LoadVector mem))); format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} ins_encode %{ int vector_len = 1; __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (SqrtVF src)); format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} ins_encode %{ int vector_len = 2; __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsqrt16F_mem(vecZ dst, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (SqrtVF (LoadVector mem))); format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} ins_encode %{ int vector_len = 2; __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // ------------------------------ LeftShift ----------------------------------- // Byte vector left shift instruct vsll4B_reg(vecS dst, vecS src, vecS shift, vecD tmp, vecD tmp2) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 4); match(Set dst (LShiftVB src shift)); effect(TEMP tmp2, TEMP tmp); format %{"pmovsxbw $tmp,$src\n\t" "psllw $tmp,$shift\n\t" "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp2\n\t" "packuswb $tmp,$tmp\n\t" "movss $dst,$tmp\n\t! left shift packed4B" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); __ psllw($tmp$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); __ movss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsll8B_reg(vecD dst, vecD src, vecS shift, vecX tmp, vecX tmp2) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 8); match(Set dst (LShiftVB src shift)); effect(TEMP tmp2, TEMP tmp); format %{"pmovsxbw $tmp,$src\n\t" "psllw $tmp,$shift\n\t" "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp2\n\t" "packuswb $tmp,$tmp\n\t" "movsd $dst,$tmp\n\t! left shift packed8B" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); __ psllw($tmp$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); __ movsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsll16B_reg(vecX dst, vecX src, vecS shift, vecX tmp, vecX tmp2, vecX tmp3) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 16); match(Set dst (LShiftVB src shift)); effect(TEMP tmp2, TEMP tmp, TEMP tmp3); format %{"pmovsxbw $tmp,$src\n\t" "psllw $tmp,$shift\n\t" "pshufd $tmp2,$src\n\t" "pmovsxbw $tmp2,$tmp2\n\t" "psllw $tmp2,$shift\n\t" "movdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp3\n\t" "pand $tmp2,$tmp3\n\t" "packuswb $tmp,$tmp2\n\t" "modqu $dst,$tmp\n\t! left shift packed16B" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); __ psllw($tmp$$XMMRegister, $shift$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0x0E); __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); __ psllw($tmp2$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp3$$XMMRegister); __ pand($tmp2$$XMMRegister, $tmp3$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsll16B_avx(vecX dst, vecX src, vecS shift, vecY tmp, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVB src shift)); effect(TEMP dst, TEMP tmp, TEMP scratch); format %{"vpmovsxbw $tmp,$src\n\t" "vpsllw $tmp,$tmp,$shift\\n\t" "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" "vextracti128_high $dst,$tmp\n\t" "vpackuswb $dst,$tmp, $dst\n\t! left shift packed16B" %} ins_encode %{ int vector_len = 1; __ vpmovsxbw($tmp$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsllw($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} instruct vsll32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, vecY tmp2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (LShiftVB src shift)); effect(TEMP dst, TEMP tmp2, TEMP tmp, TEMP scratch); format %{"vextracti128_high $tmp,$src\n\t" "vpmovsxbw $tmp,$tmp\n\t" "vpmovsxbw $tmp2,$src\n\t" "vpsllw $tmp,$tmp,$shift\n\t" "vpsllw $tmp2,$tmp2,$shift\n\t" "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" "vpand $tmp2,$tmp2,[0x00ff00ff0x00ff00ff]\n\t" "vpackuswb $dst,$tmp2,$tmp\n\t" "vpermq $dst,$dst, 0xD8\n\t! left shift for packed32B" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); __ vpmovsxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpmovsxbw($tmp2$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsllw($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpsllw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vpackuswb($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp, vecZ tmp2, vecZ tmp3, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 64); match(Set dst (LShiftVB src shift)); effect(TEMP dst, TEMP tmp3, TEMP tmp2, TEMP tmp, TEMP scratch); format %{"vextracti64x4 $tmp,$src\n\t" "vpmovsxbw $tmp,$tmp\n\t" "vpmovsxbw $tmp2,$src\n\t" "vpsllw $tmp,$tmp,$shift\n\t" "vpsllw $tmp2,$tmp2,$shift\n\t" "vmovdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" "vpbroadcastd $tmp3,$tmp3\n\t" "vpand $tmp,$tmp,$tmp3\n\t" "vpand $tmp2,$tmp2,$tmp3\n\t" "vpackuswb $dst,$tmp,$tmp2\n\t" "evmovdquq $tmp3, [0x06040200070500301]\n\t" "vpermq $dst,$tmp3,$dst\n\t! left shift for packed64B" %} ins_encode %{ int vector_len = 2; __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, 1); __ vpmovsxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpmovsxbw($tmp2$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsllw($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpsllw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ evmovdquq($tmp3$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); __ vpermq($dst$$XMMRegister, $tmp3$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // Shorts/Chars vector left shift instruct vsll2S(vecS dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS dst shift)); format %{ "psllw $dst,$shift\t! left shift packed2S" %} ins_encode %{ __ psllw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsll2S_imm(vecS dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS dst (LShiftCntV shift))); format %{ "psllw $dst,$shift\t! left shift packed2S" %} ins_encode %{ __ psllw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVS src (LShiftCntV shift))); format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4S(vecD dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS dst shift)); format %{ "psllw $dst,$shift\t! left shift packed4S" %} ins_encode %{ __ psllw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsll4S_imm(vecD dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS dst (LShiftCntV shift))); format %{ "psllw $dst,$shift\t! left shift packed4S" %} ins_encode %{ __ psllw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVS src (LShiftCntV shift))); format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll8S(vecX dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS dst shift)); format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ __ psllw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsll8S_imm(vecX dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS dst (LShiftCntV shift))); format %{ "psllw $dst,$shift\t! left shift packed8S" %} ins_encode %{ __ psllw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (LShiftVS src (LShiftCntV shift))); format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (LShiftVS src (LShiftCntV shift))); format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src shift)); format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (LShiftVS src (LShiftCntV shift))); format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector left shift instruct vsll2I(vecD dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVI dst shift)); format %{ "pslld $dst,$shift\t! left shift packed2I" %} ins_encode %{ __ pslld($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsll2I_imm(vecD dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVI dst (LShiftCntV shift))); format %{ "pslld $dst,$shift\t! left shift packed2I" %} ins_encode %{ __ pslld($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} ins_encode %{ int vector_len = 0; __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVI src (LShiftCntV shift))); format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} ins_encode %{ int vector_len = 0; __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4I(vecX dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVI dst shift)); format %{ "pslld $dst,$shift\t! left shift packed4I" %} ins_encode %{ __ pslld($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsll4I_imm(vecX dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVI dst (LShiftCntV shift))); format %{ "pslld $dst,$shift\t! left shift packed4I" %} ins_encode %{ __ pslld($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (LShiftVI src (LShiftCntV shift))); format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (LShiftVI src (LShiftCntV shift))); format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (LShiftVI src shift)); format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} ins_encode %{ int vector_len = 2; __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (LShiftVI src (LShiftCntV shift))); format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} ins_encode %{ int vector_len = 2; __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Longs vector left shift instruct vsll2L(vecX dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVL dst shift)); format %{ "psllq $dst,$shift\t! left shift packed2L" %} ins_encode %{ __ psllq($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsll2L_imm(vecX dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVL dst (LShiftCntV shift))); format %{ "psllq $dst,$shift\t! left shift packed2L" %} ins_encode %{ __ psllq($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} ins_encode %{ int vector_len = 0; __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (LShiftVL src (LShiftCntV shift))); format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} ins_encode %{ int vector_len = 0; __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (LShiftVL src (LShiftCntV shift))); format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (LShiftVL src shift)); format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ins_encode %{ int vector_len = 2; __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (LShiftVL src (LShiftCntV shift))); format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} ins_encode %{ int vector_len = 2; __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // ----------------------- LogicalRightShift ----------------------------------- // Bytes vector logical right shift instruct vsrl4B_reg(vecS dst, vecS src, vecS shift, vecD tmp, vecD tmp2) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 4); match(Set dst (URShiftVB src shift)); effect(TEMP tmp2, TEMP tmp); format %{"pmovzxbw $tmp,$src\n\t" "psrlw $tmp,$shift\n\t" "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp2\n\t" "packuswb $tmp,$tmp\n\t" "movss $dst,$tmp\n\t! logical right shift for packed4B" %} ins_encode %{ __ pmovzxbw($tmp$$XMMRegister, $src$$XMMRegister); __ psrlw($tmp$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); __ movss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsrl8B_reg(vecD dst, vecD src, vecS shift, vecX tmp, vecX tmp2) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 8); match(Set dst (URShiftVB src shift)); effect(TEMP tmp2, TEMP tmp); format %{"pmovzxbw $tmp,$src\n\t" "psrlw $tmp,$shift\n\t" "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp2\n\t" "packuswb $tmp,$tmp\n\t" "movsd $dst,$tmp\n\t!logical right shift for packed8B" %} ins_encode %{ __ pmovzxbw($tmp$$XMMRegister, $src$$XMMRegister); __ psrlw($tmp$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); __ movsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsrl16B_reg(vecX dst, vecX src, vecS shift, vecX tmp, vecX tmp2, vecX tmp3) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 16); match(Set dst (URShiftVB src shift)); effect(TEMP tmp2, TEMP tmp, TEMP tmp3); format %{"pmovzxbw $tmp,$src\n\t" "psrlw $tmp,$shift\n\t" "pshufd $tmp2,$src,14\n\t" "pmovzxbw $tmp2,$tmp2\n\t" "psrlw $tmp2,$shift\n\t" "movdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp3\n\t" "pand tmp2,$tmp3\n\t" "packuswb $tmp,$tmp2\n\t" "movdqu $dst,$tmp\n\t! logical right shift for packed16B" %} ins_encode %{ __ pmovzxbw($tmp$$XMMRegister, $src$$XMMRegister); __ psrlw($tmp$$XMMRegister, $shift$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 14); __ pmovzxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); __ psrlw($tmp2$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp3$$XMMRegister); __ pand($tmp2$$XMMRegister, $tmp3$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsrl16B_avx(vecX dst, vecX src, vecS shift, vecY tmp, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (URShiftVB src shift)); effect(TEMP dst, TEMP tmp, TEMP scratch); format %{"vpmovzxbw $tmp,$src\n\t" "vpsrlw $tmp,$tmp,$shift\n\t" "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" "vextracti128_high $dst,$tmp\n\t" "vpackuswb $dst,$tmp,$dst\n\t! logical right shift for packed16B" %} ins_encode %{ int vector_len = 1; __ vpmovzxbw($tmp$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsrlw($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} instruct vsrl32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, vecY tmp2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (URShiftVB src shift)); effect(TEMP tmp2, TEMP tmp, TEMP scratch); format %{"vextracti128_high $tmp,$src\n\t" "vpmovzxbw $tmp,$tmp\n\t" "vpmovzxbw $tmp2,$src\n\t" "vpsrlw $tmp,$tmp,$shift\n\t" "vpsrlw $tmp2,$tmp2,$shift\n\t" "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" "vpand $tmp2,$tmp2,[0x00ff00ff0x00ff00ff]\n\t" "vpackuswb $dst,$tmp2, $tmp\n\t" "vpermq $dst,$dst, 0xD8\n\t! logical right shift for packed32B" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); __ vpmovzxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpmovzxbw($tmp2$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsrlw($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpsrlw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vpackuswb($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl64B(vecZ dst, vecZ src, vecS shift, vecZ tmp, vecZ tmp2, vecZ tmp3, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 64); match(Set dst (URShiftVB src shift)); effect(TEMP dst, TEMP tmp3, TEMP tmp2, TEMP tmp, TEMP scratch); format %{"vextracti64x4 $tmp,$src\n\t" "vpmovzxbw $tmp,$tmp\n\t" "vpmovzxbw $tmp2,$src\n\t" "vpsrlw $tmp,$tmp,$shift\n\t" "vpsrlw $tmp2,$tmp2,$shift\n\t" "vmovdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" "vpbroadcastd $tmp3,$tmp3\n\t" "vpand $tmp,$tmp,$tmp3\n\t" "vpand $tmp2,$tmp2,$tmp3\n\t" "vpackuswb $dst,$tmp,$tmp2\n\t" "evmovdquq $tmp3, [0x06040200070500301]\n\t" "vpermq $dst,$tmp3,$dst\n\t! logical right shift for packed64B" %} ins_encode %{ int vector_len = 2; __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, 1); __ vpmovzxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpmovzxbw($tmp2$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsrlw($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpsrlw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ evmovdquq($tmp3$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); __ vpermq($dst$$XMMRegister, $tmp3$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // Shorts vector logical right shift produces incorrect Java result // for negative data because java code convert short value into int with // sign extension before a shift. But char vectors are fine since chars are // unsigned values. instruct vsrl2S(vecS dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS dst shift)); format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsrl2S_imm(vecS dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS dst (RShiftCntV shift))); format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVS src (RShiftCntV shift))); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4S(vecD dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS dst shift)); format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsrl4S_imm(vecD dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS dst (RShiftCntV shift))); format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVS src (RShiftCntV shift))); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl8S(vecX dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS dst shift)); format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsrl8S_imm(vecX dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS dst (RShiftCntV shift))); format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} ins_encode %{ __ psrlw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (URShiftVS src (RShiftCntV shift))); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (URShiftVS src (RShiftCntV shift))); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (URShiftVS src shift)); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (URShiftVS src (RShiftCntV shift))); format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector logical right shift instruct vsrl2I(vecD dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVI dst shift)); format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} ins_encode %{ __ psrld($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsrl2I_imm(vecD dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVI dst (RShiftCntV shift))); format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} ins_encode %{ __ psrld($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} ins_encode %{ int vector_len = 0; __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVI src (RShiftCntV shift))); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} ins_encode %{ int vector_len = 0; __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4I(vecX dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVI dst shift)); format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} ins_encode %{ __ psrld($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsrl4I_imm(vecX dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVI dst (RShiftCntV shift))); format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} ins_encode %{ __ psrld($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (URShiftVI src (RShiftCntV shift))); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (URShiftVI src (RShiftCntV shift))); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (URShiftVI src shift)); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} ins_encode %{ int vector_len = 2; __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (URShiftVI src (RShiftCntV shift))); format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} ins_encode %{ int vector_len = 2; __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Longs vector logical right shift instruct vsrl2L(vecX dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVL dst shift)); format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} ins_encode %{ __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsrl2L_imm(vecX dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVL dst (RShiftCntV shift))); format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} ins_encode %{ __ psrlq($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} ins_encode %{ int vector_len = 0; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (URShiftVL src (RShiftCntV shift))); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} ins_encode %{ int vector_len = 0; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (URShiftVL src (RShiftCntV shift))); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (URShiftVL src shift)); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} ins_encode %{ int vector_len = 2; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (URShiftVL src (RShiftCntV shift))); format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} ins_encode %{ int vector_len = 2; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // ------------------- ArithmeticRightShift ----------------------------------- // Byte vector arithmetic right shift instruct vsra4B_reg(vecS dst, vecS src, vecS shift, vecD tmp, vecD tmp2) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 4); match(Set dst (RShiftVB src shift)); effect(TEMP tmp2, TEMP tmp); format %{"pmovsxbw $tmp,$src\n\t" "psraw $tmp,$shift\n\t" "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp2\n\t" "packuswb $tmp,$tmp\n\t" "movss $dst,$tmp\n\t! arithmetic right shift for packed4B" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); __ psraw($tmp$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); __ movss($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra8B_reg(vecD dst, vecD src, vecS shift, vecX tmp, vecX tmp2) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 8); match(Set dst (RShiftVB src shift)); effect(TEMP tmp2, TEMP tmp); format %{"pmovsxbw $tmp,$src\n\t" "psraw $tmp,$shift\n\t" "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp2\n\t" "packuswb $tmp,$tmp\n\t" "movsd $dst,$tmp\n\t! arithmetic right shift for packed8B" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); __ psraw($tmp$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); __ movsd($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra16B_reg(vecX dst, vecX src, vecS shift, vecX tmp, vecX tmp2, vecX tmp3) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 16); match(Set dst (RShiftVB src shift)); effect(TEMP tmp2, TEMP tmp, TEMP tmp3); format %{"pmovsxbw $tmp,$src\n\t" "psraw $tmp,$shift\n\t" "pshufd $tmp2,$src\n\t" "pmovsxbw $tmp2,$tmp2\n\t" "psraw $tmp2,$shift\n\t" "movdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" "pand $tmp,$tmp3\n\t" "pand $tmp2,$tmp3\n\t" "packuswb $tmp,$tmp2\n\t" "movdqu $dst,$tmp\n\t! arithmetic right shift for packed16B" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); __ psraw($tmp$$XMMRegister, $shift$$XMMRegister); __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); __ psraw($tmp2$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ pand($tmp$$XMMRegister, $tmp3$$XMMRegister); __ pand($tmp2$$XMMRegister, $tmp3$$XMMRegister); __ packuswb($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra16B_avx(vecX dst, vecX src, vecS shift, vecY tmp, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (RShiftVB src shift)); effect(TEMP dst, TEMP tmp, TEMP scratch); format %{"vpmovsxbw $tmp,$src\n\t" "vpsraw $tmp,$tmp,$shift\n\t" "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" "vextracti128_high $dst,$tmp\n\t" "vpackuswb $dst,$tmp,$dst\n\t! arithmetic right shift for packed16B" %} ins_encode %{ int vector_len = 1; __ vpmovsxbw($tmp$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsraw($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} instruct vsra32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, vecY tmp2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32); match(Set dst (RShiftVB src shift)); effect(TEMP tmp2, TEMP tmp, TEMP dst, TEMP scratch); format %{"vextracti128_high $tmp,$src\n\t" "vpmovsxbw $tmp,$tmp\n\t" "vpmovsxbw $tmp2,$src\n\t" "vpsraw $tmp,$tmp,$shift\n\t" "vpsraw $tmp2,$tmp2,$shift\n\t" "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" "vpand $tmp2,$tmp2,[0x00ff00ff0x00ff00ff]\n\t" "vpackuswb $dst,$tmp2,$tmp\n\t" "vpermq $dst,$dst,0xD8\n\t! arithmetic right shift for packed32B" %} ins_encode %{ int vector_len = 1; __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); __ vpmovsxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpmovsxbw($tmp2$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsraw($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpsraw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vpackuswb($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra64B(vecZ dst, vecZ src, vecS shift, vecZ tmp, vecZ tmp2, vecZ tmp3, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 64); match(Set dst (RShiftVB src shift)); effect(TEMP dst, TEMP tmp3, TEMP tmp2, TEMP tmp, TEMP scratch); format %{"vextracti64x4 $tmp,$src\n\t" "vpmovsxbw $tmp,$tmp\n\t" "vpmovsxbw $tmp2,$src\n\t" "vpsraw $tmp,$tmp,$shift\n\t" "vpsraw $tmp2,$tmp2,$shift\n\t" "vmovdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" "vpbroadcastd $tmp3,$tmp3\n\t" "vpand $tmp,$tmp,$tmp3\n\t" "vpand $tmp2,$tmp2,$tmp3\n\t" "vpackuswb $dst,$tmp,$tmp2\n\t" "evmovdquq $tmp3, [0x06040200070500301]\n\t" "vpermq $dst,$tmp3,$dst\n\t! arithmetic right shift for packed64B" %} ins_encode %{ int vector_len = 2; __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, 1); __ vpmovsxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpmovsxbw($tmp2$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsraw($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpsraw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); __ evmovdquq($tmp3$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); __ vpermq($dst$$XMMRegister, $tmp3$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // Shorts/Chars vector arithmetic right shift instruct vsra2S(vecS dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ __ psraw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra2S_imm(vecS dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS dst (RShiftCntV shift))); format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ __ psraw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVS src (RShiftCntV shift))); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4S(vecD dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ __ psraw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra4S_imm(vecD dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS dst (RShiftCntV shift))); format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ __ psraw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVS src (RShiftCntV shift))); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8S(vecX dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS dst shift)); format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ __ psraw($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra8S_imm(vecX dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS dst (RShiftCntV shift))); format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ __ psraw($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (RShiftVS src (RShiftCntV shift))); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} ins_encode %{ int vector_len = 0; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); match(Set dst (RShiftVS src (RShiftCntV shift))); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} ins_encode %{ int vector_len = 1; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (RShiftVS src shift)); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); match(Set dst (RShiftVS src (RShiftCntV shift))); format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} ins_encode %{ int vector_len = 2; __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Integers vector arithmetic right shift instruct vsra2I(vecD dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVI dst shift)); format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} ins_encode %{ __ psrad($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra2I_imm(vecD dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVI dst (RShiftCntV shift))); format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} ins_encode %{ __ psrad($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} ins_encode %{ int vector_len = 0; __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (RShiftVI src (RShiftCntV shift))); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} ins_encode %{ int vector_len = 0; __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4I(vecX dst, vecS shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVI dst shift)); format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} ins_encode %{ __ psrad($dst$$XMMRegister, $shift$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra4I_imm(vecX dst, immI8 shift) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVI dst (RShiftCntV shift))); format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} ins_encode %{ __ psrad($dst$$XMMRegister, (int)$shift$$constant); %} ins_pipe( pipe_slow ); %} instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (RShiftVI src (RShiftCntV shift))); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (RShiftVI src (RShiftCntV shift))); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (RShiftVI src shift)); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} ins_encode %{ int vector_len = 2; __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (RShiftVI src (RShiftCntV shift))); format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} ins_encode %{ int vector_len = 2; __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} // Long vector arithmetic right shift instruct vsra1L(vecD dst, vecD src, vecS shift, vecD tmp) %{ predicate(n->as_Vector()->length() == 1); match(Set dst (RShiftVL src shift)); effect(TEMP dst, TEMP tmp); format %{ "movdqu $dst,$src\n\t" "psrlq $dst,$shift\n\t" "movdqu $tmp,[0x8000000000000000]\n\t" "psrlq $tmp,$shift\n\t" "pxor $dst,$tmp\n\t" "psubq $dst,$tmp\t! arithmetic right shift packed1L" %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src$$XMMRegister); __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra1L_imm(vecD dst, vecD src, immI8 shift, vecD tmp) %{ predicate(n->as_Vector()->length() == 1); match(Set dst (RShiftVL src (RShiftCntV shift))); effect(TEMP dst, TEMP tmp); format %{ "movdqu $dst,$src\n\t" "psrlq $dst,$shift\n\t" "movdqu $tmp,[0x8000000000000000]\n\t" "psrlq $tmp,$shift\n\t" "pxor $dst,$tmp\n\t" "psubq $dst,$tmp\t! arithmetic right shift packed1L" %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src$$XMMRegister); __ psrlq($dst$$XMMRegister, (int)$shift$$constant); __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ psrlq($tmp$$XMMRegister, (int)$shift$$constant); __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra1L_reg(vecD dst, vecD src, vecS shift, vecD tmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1); match(Set dst (RShiftVL src shift)); effect(TEMP dst, TEMP tmp); format %{ "vpsrlq $dst,$src,$shift\n\t" "vmovdqu $tmp,[0x8000000000000000]\n\t" "vpsrlq $tmp,$tmp,$shift\n\t" "vpxor $dst,$dst,$tmp\n\t" "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed1L" %} ins_encode %{ int vector_len = 0; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra1L_reg_imm(vecD dst, vecD src, immI8 shift, vecD tmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1); match(Set dst (RShiftVL src (RShiftCntV shift))); effect(TEMP dst, TEMP tmp); format %{ "vpsrlq $dst,$src,$shift\n\t" "vmovdqu $tmp,[0x8000000000000000]\n\t" "vpsrlq $tmp,$tmp,$shift\n\t" "vpxor $dst,$dst,$tmp\n\t" "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed1L" %} ins_encode %{ int vector_len = 0; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra1L_reg_evex(vecD dst, vecD src, vecS shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 1); match(Set dst (RShiftVL src shift)); format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed1L" %} ins_encode %{ int vector_len = 0; __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra2L_reg_imm(vecX dst, vecX src, immI8 shift, vecX tmp) %{ predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); match(Set dst (RShiftVL src (RShiftCntV shift))); effect(TEMP dst, TEMP tmp); format %{ "movdqu $dst,$src\n\t" "psrlq $dst,$shift\n\t" "movdqu $tmp,[0x8000000000000000]\n\t" "psrlq $tmp,$shift\n\t" "pxor $dst,$tmp\n\t" "psubq $dst,$tmp\t! arithmetic right shift packed2L" %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src$$XMMRegister); __ psrlq($dst$$XMMRegister, (int)$shift$$constant); __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ psrlq($tmp$$XMMRegister, (int)$shift$$constant); __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp) %{ predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); match(Set dst (RShiftVL src shift)); effect(TEMP dst, TEMP tmp); format %{ "movdqu $dst,$src\n\t" "psrlq $dst,$shift\n\t" "movdqu $tmp,[0x8000000000000000]\n\t" "psrlq $tmp,$shift\n\t" "pxor $dst,$tmp\n\t" "psubq $dst,$tmp\t! arithmetic right shift packed2L" %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src$$XMMRegister); __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vsra2L_reg_evex_imm(vecX dst, vecX src, immI8 shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2); match(Set dst (RShiftVL src (RShiftCntV shift))); format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} ins_encode %{ int vector_len = 0; __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2); match(Set dst (RShiftVL src shift)); format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} ins_encode %{ int vector_len = 0; __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4L_reg_imm(vecY dst, vecY src, immI8 shift, vecY tmp) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (RShiftVL src (RShiftCntV shift))); effect(TEMP dst, TEMP tmp); format %{ "vpsrlq $dst,$src,$shift\n\t" "vmovdqu $tmp,[0x8000000000000000]\n\t" "vpsrlq $tmp,$tmp,$shift\n\t" "vpxor $dst,$dst,$tmp\n\t" "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); match(Set dst (RShiftVL src shift)); effect(TEMP dst, TEMP tmp); format %{ "vpsrlq $dst,$src,$shift\n\t" "vmovdqu $tmp,[0x8000000000000000]\n\t" "vpsrlq $tmp,$tmp,$shift\n\t" "vpxor $dst,$dst,$tmp\n\t" "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4L_reg_evex_imm(vecY dst, vecY src, immI8 shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4); match(Set dst (RShiftVL src (RShiftCntV shift))); format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} ins_encode %{ int vector_len = 1; __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4); match(Set dst (RShiftVL src shift)); format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %} ins_encode %{ int vector_len = 1; __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8L_reg_evex_imm(vecZ dst, vecZ src, immI8 shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (RShiftVL src (RShiftCntV shift))); format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} ins_encode %{ int vector_len = 2; __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsra8L_reg_evex(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (RShiftVL src shift)); format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed8L" %} ins_encode %{ int vector_len = 2; __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // ------------------- Variable Bit Shift Left Logical ----------------------------- //Integer Variable left shift instruct vsllv2I(vecD dst, vecD src, vecD shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVI src shift)); format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed2I" %} ins_encode %{ int vector_len = 0; __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsllv4I_reg(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVI src shift)); format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsllv4I_reg_evex(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVI src shift)); format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsllv8I_reg(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVI src shift)); format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsllv8I_reg_evex(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVI src shift)); format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsllv16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVI src shift)); format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed16I" %} ins_encode %{ int vector_len = 2; __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} //Long Variable left shift instruct vsllv1L_reg(vecD dst, vecD src, vecD shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVL src shift)); format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed1L" %} ins_encode %{ int vector_len = 0; __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsllv2L_reg(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVL src shift)); format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed2L" %} ins_encode %{ int vector_len = 0; __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsllv2L_reg_evex(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVL src shift)); format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed2L" %} ins_encode %{ int vector_len = 0; __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsllv4L_reg(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVL src shift)); format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsllv4L_reg_evex(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVL src shift)); format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsllv8L_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); match(Set dst (LShiftVL src shift)); format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed16I" %} ins_encode %{ int vector_len = 2; __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // ------------------- Variable Bit Shift Right Logical ----------------------------- //Integer Variable right shift instruct vsrlv2I_reg(vecD dst, vecD src, vecD shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVI src shift)); format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed2I" %} ins_encode %{ int vector_len = 0; __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrlv4I_reg(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVI src shift)); format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrlv4I_reg_evex(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVI src shift)); format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrlv8I_reg(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVI src shift)); format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrlv8I_reg_evex(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVI src shift)); format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrlv16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVI src shift)); format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed16I" %} ins_encode %{ int vector_len = 2; __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} //Long Variable right shift instruct vsrlv1L_reg(vecD dst, vecD src, vecD shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVL src shift)); format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed1L" %} ins_encode %{ int vector_len = 0; __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrlv2L_reg(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVL src shift)); format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} ins_encode %{ int vector_len = 0; __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrlv2L_reg_evex(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVL src shift)); format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} ins_encode %{ int vector_len = 0; __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrlv4L_reg(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVL src shift)); format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrlv4L_reg_evex(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVL src shift)); format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrlv8L_reg(vecZ dst, vecZ src, vecZ shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (URShiftVL src shift)); format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} ins_encode %{ int vector_len = 2; __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // ------------------- Variable Bit Shift Right Arithmetic ----------------------------- //Integer Variable right shift instruct vsrav2I_reg(vecD dst, vecD src, vecD shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVI src shift)); format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed2I" %} ins_encode %{ int vector_len = 0; __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav4I_reg(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVI src shift)); format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav4I_reg_evex(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVI src shift)); format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} ins_encode %{ int vector_len = 0; __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav8I_reg(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVI src shift)); format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav8I_reg_evex(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVI src shift)); format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} ins_encode %{ int vector_len = 1; __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVI src shift)); format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed16I" %} ins_encode %{ int vector_len = 2; __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} //Long Variable right shift arithmetic instruct vsrav1L_reg(vecD dst, vecD src, vecD shift, vecD tmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); effect(TEMP dst, TEMP tmp); format %{ "vpsrlvq $dst,$src,$shift\n\t" "vmovdqu $tmp,[0x8000000000000000]\n\t" "vpsrlvq $tmp,$tmp,$shift\n\t" "vpxor $dst,$dst,$tmp\n\t" "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed1L" %} ins_encode %{ int vector_len = 0; __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav1L_reg_evex(vecD dst, vecD src, vecD shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); format %{ "evpsravq $dst,$src,$shift\t! variable arithmetic right shift packed1L" %} ins_encode %{ int vector_len = 0; __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav2L_reg(vecX dst, vecX src, vecX shift, vecX tmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); effect(TEMP dst, TEMP tmp); format %{ "vpsrlvq $dst,$src,$shift\n\t" "vmovdqu $tmp,[0x8000000000000000]\n\t" "vpsrlvq $tmp,$tmp,$shift\n\t" "vpxor $dst,$dst,$tmp\n\t" "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed2L" %} ins_encode %{ int vector_len = 0; __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav2L_reg_evex(vecX dst, vecX src, vecX shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); format %{ "evpsravq $dst,$src,$shift\t! variable arithmetic right shift packed2L" %} ins_encode %{ int vector_len = 0; __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav4L_reg(vecY dst, vecY src, vecY shift, vecY tmp) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); effect(TEMP dst, TEMP tmp); format %{ "vpsrlvq $dst,$src,$shift\n\t" "vmovdqu $tmp,[0x8000000000000000]\n\t" "vpsrlvq $tmp,$tmp,$shift\n\t" "vpxor $dst,$dst,$tmp\n\t" "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav4L_reg_evex(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); format %{ "evpsravq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} ins_encode %{ int vector_len = 1; __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav8L_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); format %{ "evpsravq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} ins_encode %{ int vector_len = 2; __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- AND -------------------------------------- instruct vand4B(vecS dst, vecS src) %{ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (AndV dst src)); format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} ins_encode %{ __ pand($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (AndV src (LoadVector mem))); format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand8B(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (AndV dst src)); format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} ins_encode %{ __ pand($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (AndV src (LoadVector mem))); format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand16B(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV dst src)); format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} ins_encode %{ __ pand($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV src (LoadVector mem))); format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} ins_encode %{ int vector_len = 1; __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (AndV src (LoadVector mem))); format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} ins_encode %{ int vector_len = 1; __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); match(Set dst (AndV src1 src2)); format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} ins_encode %{ int vector_len = 2; __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); match(Set dst (AndV src (LoadVector mem))); format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} ins_encode %{ int vector_len = 2; __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- OR --------------------------------------- instruct vor4B(vecS dst, vecS src) %{ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (OrV dst src)); format %{ "por $dst,$src\t! or vectors (4 bytes)" %} ins_encode %{ __ por($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} ins_encode %{ int vector_len = 0; __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (OrV src (LoadVector mem))); format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} ins_encode %{ int vector_len = 0; __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor8B(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (OrV dst src)); format %{ "por $dst,$src\t! or vectors (8 bytes)" %} ins_encode %{ __ por($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} ins_encode %{ int vector_len = 0; __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (OrV src (LoadVector mem))); format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} ins_encode %{ int vector_len = 0; __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor16B(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV dst src)); format %{ "por $dst,$src\t! or vectors (16 bytes)" %} ins_encode %{ __ por($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} ins_encode %{ int vector_len = 0; __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV src (LoadVector mem))); format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} ins_encode %{ int vector_len = 0; __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} ins_encode %{ int vector_len = 1; __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (OrV src (LoadVector mem))); format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} ins_encode %{ int vector_len = 1; __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); match(Set dst (OrV src1 src2)); format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} ins_encode %{ int vector_len = 2; __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); match(Set dst (OrV src (LoadVector mem))); format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} ins_encode %{ int vector_len = 2; __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- XOR -------------------------------------- instruct vxor4B(vecS dst, vecS src) %{ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (XorV dst src)); format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} ins_encode %{ int vector_len = 0; __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (XorV src (LoadVector mem))); format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} ins_encode %{ int vector_len = 0; __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor8B(vecD dst, vecD src) %{ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (XorV dst src)); format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} ins_encode %{ int vector_len = 0; __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (XorV src (LoadVector mem))); format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} ins_encode %{ int vector_len = 0; __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor16B(vecX dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV dst src)); format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} ins_encode %{ int vector_len = 0; __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV src (LoadVector mem))); format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} ins_encode %{ int vector_len = 0; __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} ins_encode %{ int vector_len = 1; __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (XorV src (LoadVector mem))); format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} ins_encode %{ int vector_len = 1; __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); match(Set dst (XorV src1 src2)); format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} ins_encode %{ int vector_len = 2; __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); match(Set dst (XorV src (LoadVector mem))); format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} ins_encode %{ int vector_len = 2; __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Bto4S_reg(vecD dst, vecS src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbw $dst,$src\t! convert 4B to 4S vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Bto8S_reg(vecX dst, vecD src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbw $dst,$src\t! convert 8B to 8S vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt16Bto16S_reg(vecY dst, vecX src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbw $dst,$src\t! convert 16B to 16S vector" %} ins_encode %{ int vector_len = 1; __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt32Bto32S_reg(vecZ dst, vecY src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbw $dst,$src\t! convert 32B to 32S vector" %} ins_encode %{ int vector_len = 2; __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Bto4I_reg(vecX dst, vecS src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbd $dst,$src\t! convert 4B to 4I vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Bto8I_reg(vecY dst, vecD src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbd $dst,$src\t! convert 8B to 8I vector" %} ins_encode %{ int vector_len = 1; __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt16Bto16I_reg(vecZ dst, vecX src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbd $dst,$src\t! convert 16B to 16I vector" %} ins_encode %{ int vector_len = 2; __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Bto4L_reg(vecY dst, vecS src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbq $dst,$src\t! convert 4B to 4L vector" %} ins_encode %{ int vector_len = 1; __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Bto8L_reg(vecZ dst, vecD src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbq $dst,$src\t! convert 8B to 8L vector" %} ins_encode %{ int vector_len = 2; __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Bto4F_reg(vecX dst, vecS src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbd $dst,$src\n\t" "vcvtdq2ps $dst,$dst\t! convert 4B to 4F vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Bto8F_reg(vecY dst, vecD src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbd $dst,$src\n\t" "vcvtdq2ps $dst,$dst\t! convert 8B to 8F vector" %} ins_encode %{ int vector_len = 1; __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt16Bto16F_reg(vecZ dst, vecX src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbd $dst,$src\n\t" "vcvtdq2ps $dst,$dst\t! convert 16B to 16F vector" %} ins_encode %{ int vector_len = 2; __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Bto4D_reg(vecY dst, vecS src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbd $dst,$src\n\t" "vcvtdq2pd $dst,$dst\t! convert 4B to 4D vector" %} ins_encode %{ __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, 0); __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, 1); %} ins_pipe( pipe_slow ); %} instruct vcvt8Bto8D_reg(vecZ dst, vecD src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastB2X src)); format %{ "vpmovsxbd $dst,$src\n\t" "vcvtdq2pd $dst,$dst\t! convert 8B to 8D vector" %} ins_encode %{ __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, 1); __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, 2); %} ins_pipe( pipe_slow ); %} instruct vcvt4Sto4B_reg(vecS dst, vecD src, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); effect(TEMP scratch); match(Set dst (VectorCastS2X src)); format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" "vpackuswb $dst,$dst\t! convert 4S to 4B vector" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Sto8B_reg(vecD dst, vecX src, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); effect(TEMP scratch); match(Set dst (VectorCastS2X src)); format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" "vpackuswb $dst,$dst\t! convert 8S to 8B vector" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt16Sto16B_reg(vecX dst, vecY src, vecY tmp, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); effect(TEMP scratch, TEMP tmp); match(Set dst (VectorCastS2X src)); format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" "vextracti128 $tmp,$dst,0x1\n\t" "vpackuswb $dst,$dst,$tmp\t! convert 16S to 16B vector" %} ins_encode %{ int vector_len = 1; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt32Sto32B_reg(vecY dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorCastS2X src)); format %{ "evpmovwb $dst,$src\t! convert 32S to 32B vector" %} ins_encode %{ int vector_len = 2; __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Sto2I_reg(vecD dst, vecS src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\t! convert 2S to 2I vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Sto4I_reg(vecX dst, vecD src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\t! convert 4S to 4I vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Sto8I_reg(vecY dst, vecX src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\t! convert 8S to 8I vector" %} ins_encode %{ int vector_len = 1; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt16Sto16I_reg(vecZ dst, vecY src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\t! convert 16S to 16I vector" %} ins_encode %{ int vector_len = 2; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Sto2L_reg(vecX dst, vecS src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwq $dst,$src\t! convert 2S to 2L vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Sto4L_reg(vecY dst, vecD src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwq $dst,$src\t! convert 4S to 4L vector" %} ins_encode %{ int vector_len = 1; __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Sto8L_reg(vecZ dst, vecX src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwq $dst,$src\t! convert 8S to 8L vector" %} ins_encode %{ int vector_len = 2; __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Sto2F_reg(vecD dst, vecS src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\n\t" "vcvtdq2ps $dst,$dst\t! convert 2S to 2F vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Sto4F_reg(vecX dst, vecD src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\n\t" "vcvtdq2ps $dst,$dst\t! convert 4S to 4F vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Sto8F_reg(vecY dst, vecX src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\n\t" "vcvtdq2ps $dst,$dst\t! convert 8S to 8F vector" %} ins_encode %{ int vector_len = 1; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt16Sto16F_reg(vecZ dst, vecY src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\n\t" "vcvtdq2ps $dst,$dst\t! convert 16S to 16F vector" %} ins_encode %{ int vector_len = 2; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Sto2D_reg(vecX dst, vecS src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\n\t" "vcvtdq2pd $dst,$dst\t! convert 2S to 2D vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Sto4D_reg(vecY dst, vecD src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\n\t" "vcvtdq2pd $dst,$dst\t! convert 4S to 4D vector" %} ins_encode %{ int vector_len = 1; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Sto8D_reg(vecZ dst, vecX src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastS2X src)); format %{ "vpmovsxwd $dst,$src\n\t" "vcvtdq2pd $dst,$dst\t! convert 8S to 8D vector" %} ins_encode %{ int vector_len = 2; __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Ito4B_reg(vecS dst, vecX src, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); effect(TEMP scratch); match(Set dst (VectorCastI2X src)); format %{ "vpand $dst,$src,[0x000000FF000000FF]\n\t" "vpackusdw $dst,$dst\n\t" "vpackuswb $dst,$dst\t! convert 4I to 4B vector" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Ito8B_reg(vecD dst, vecY src, vecY tmp, rRegL scratch) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); effect(TEMP scratch, TEMP tmp); match(Set dst (VectorCastI2X src)); format %{ "vpand $dst,$src,[0x000000FF000000FF]\n\t" "vextracti128 $tmp,$dst,0x1\n\t" "vpackusdw $dst,$dst,$tmp\n\t" "vpackuswb $dst,$dst\t! convert 8I to 8B vector" %} ins_encode %{ int vector_len = 1; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} instruct vcvt16Ito16B_reg(vecX dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorCastI2X src)); format %{ "evpmovdb $dst,$src\t! convert 16I to 16B vector" %} ins_encode %{ int vector_len = 2; __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Ito2S_reg(vecS dst, vecD src, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); effect(TEMP scratch); match(Set dst (VectorCastI2X src)); format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" "vpackusdw $dst,$dst\t! convert 2I to 2S vector" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Ito4S_reg(vecD dst, vecX src, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); effect(TEMP scratch); match(Set dst (VectorCastI2X src)); format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" "vpackusdw $dst,$dst\t! convert 4I to 4S vector" %} ins_encode %{ int vector_len = 0; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Ito8S_reg(vecX dst, vecY src, vecY tmp, rRegL scratch) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); effect(TEMP scratch, TEMP tmp); match(Set dst (VectorCastI2X src)); format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" "vextracti128 $tmp,$dst,0x1\n\t" "vpackusdw $dst,$dst,$tmp\t! convert 8I to 8S vector" %} ins_encode %{ int vector_len = 1; __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt16Ito16S_reg(vecY dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorCastI2X src)); format %{ "evpmovdw $dst,$src\t! convert 16I to 16S vector" %} ins_encode %{ int vector_len = 2; __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Ito2L_reg(vecX dst, vecD src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastI2X src)); format %{ "vpmovsxdq $dst,$src\t! convert 2I to 2L vector" %} ins_encode %{ int vector_len = 0; __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Ito4L_reg(vecY dst, vecX src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastI2X src)); format %{ "vpmovsxdq $dst,$src\t! convert 4I to 4L vector" %} ins_encode %{ int vector_len = 1; __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Ito8L_reg(vecZ dst, vecY src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastI2X src)); format %{ "vpmovsxdq $dst,$src\t! convert 8I to 8L vector" %} ins_encode %{ int vector_len = 2; __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Ito2F_reg(vecD dst, vecD src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastI2X src)); format %{ "vcvtdq2ps $dst,$src\t! convert 2I to 2F vector" %} ins_encode %{ int vector_len = 0; __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Ito4F_reg(vecX dst, vecX src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastI2X src)); format %{ "vcvtdq2ps $dst,$src\t! convert 4I to 4F vector" %} ins_encode %{ int vector_len = 0; __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Ito8F_reg(vecY dst, vecY src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastI2X src)); format %{ "vcvtdq2ps $dst,$src\t! convert 8I to 8F vector" %} ins_encode %{ int vector_len = 1; __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt16Ito16F_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastI2X src)); format %{ "vcvtdq2ps $dst,$src\t! convert 16I to 16F vector" %} ins_encode %{ int vector_len = 2; __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Ito2D_reg(vecX dst, vecD src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastI2X src)); format %{ "vcvtdq2pd $dst,$src\t! convert 2I to 2D vector" %} ins_encode %{ int vector_len = 0; __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Ito4D_reg(vecY dst, vecX src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastI2X src)); format %{ "vcvtdq2pd $dst,$src\t! convert 4I to 4D vector" %} ins_encode %{ int vector_len = 1; __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Ito8D_reg(vecZ dst, vecY src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastI2X src)); format %{ "vcvtdq2pd $dst,$src\t! convert 8I to 8D vector" %} ins_encode %{ int vector_len = 2; __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Lto4B_reg(vecS dst, vecY src, rRegL scratch) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorCastL2X src)); effect(TEMP scratch); format %{ "vpermilps $dst,$src,8\n\t" "vpermpd $dst,$dst,8\n\t" "vpand $dst,$dst,[0x000000FF000000FF]\n\t" "vpackusdw $dst,$dst\n\t" "vpackuswb $dst,$dst\t! convert 4L to 4B vector" %} ins_encode %{ int vector_len = 1; __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); // Since cast to int has been done, do rest of operations in 128. vector_len = 0; __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Lto8B_reg(vecD dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorCastL2X src)); format %{ "evpmovqb $dst,$src\t! convert 8L to 8B vector" %} ins_encode %{ int vector_len = 2; __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Lto2S_reg(vecS dst, vecX src, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorCastL2X src)); effect(TEMP scratch); format %{ "vpshufd $dst,$src,8\n\t" "vpand $dst,$dst,[0x0000FFFF0000FFFF]\n\t" "vpackusdw $dst,$dst\t! convert 2L to 2S vector" %} ins_encode %{ int vector_len = 0; __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Lto4S_reg(vecD dst, vecY src, rRegL scratch) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorCastL2X src)); effect(TEMP scratch); format %{ "vpermilps $dst,$src,8\n\t" "vpermpd $dst,$dst,8\n\t" "vpand $dst,$dst,[0x0000FFFF0000FFFF]\n\t" "vpackusdw $dst,$dst\t! convert 4L to 4S vector" %} ins_encode %{ int vector_len = 1; __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); // Since cast to int has been done, do rest of operations in 128. vector_len = 0; __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Lto8S_reg(vecX dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorCastL2X src)); format %{ "evpmovqw $dst,$src\t! convert 8L to 8S vector" %} ins_encode %{ int vector_len = 2; __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt1Lto1I_reg(vecS dst, vecD src) %{ predicate(n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastL2X src)); format %{ "movdqu $dst,$src\t! convert 1L to 1I vector" %} ins_encode %{ // If register is the same, then move is not needed. if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct vcvt2Lto2I_reg(vecD dst, vecX src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastL2X src)); format %{ "pshufd $dst,$src,8\t! convert 2L to 2I vector" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); %} ins_pipe( pipe_slow ); %} instruct vcvt2Lto2I_reg_avx(vecD dst, vecX src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastL2X src)); format %{ "vpshufd $dst,$src,8\t! convert 2L to 2I vector" %} ins_encode %{ int vector_len = 0; __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Lto4I_reg(vecX dst, vecY src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastL2X src)); format %{ "vpermilps $dst,$src,8\n\t" "vpermpd $dst,$dst,8\t! convert 4L to 4I vector" %} ins_encode %{ int vector_len = 1; __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Lto8I_reg(vecY dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastL2X src)); format %{ "evpmovqd $dst,$src\t! convert 8L to 8I vector" %} ins_encode %{ int vector_len = 2; __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Lto2F_reg(vecD dst, vecX src) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastL2X src)); format %{ "vcvtqq2ps $dst,$src\t! convert 2L to 2F vector" %} ins_encode %{ int vector_len = 0; __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Lto4F_reg(vecX dst, vecY src) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastL2X src)); format %{ "vcvtqq2ps $dst,$src\t! convert 4L to 4F vector" %} ins_encode %{ int vector_len = 1; __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Lto8F_reg(vecY dst, vecZ src) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastL2X src)); format %{ "vcvtqq2ps $dst,$src\t! convert 8L to 8F vector" %} ins_encode %{ int vector_len = 2; __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt1Lto1D_reg(vecD dst, vecD src) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastL2X src)); format %{ "vcvtqq2pd $dst,$src\t! convert 1L to 1D vector" %} ins_encode %{ int vector_len = 0; __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Lto2D_reg(vecX dst, vecX src) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastL2X src)); format %{ "vcvtqq2pd $dst,$src\t! convert 2L to 2D vector" %} ins_encode %{ int vector_len = 0; __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Lto4D_reg(vecY dst, vecY src) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastL2X src)); format %{ "vcvtqq2pd $dst,$src\t! convert 4L to 4D vector" %} ins_encode %{ int vector_len = 1; __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Lto8D_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastL2X src)); format %{ "vcvtqq2pd $dst,$src\t! convert 8L to 8D vector" %} ins_encode %{ int vector_len = 2; __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Fto2D_reg(vecX dst, vecD src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastF2X src)); format %{ "vcvtps2pd $dst,$src\t! convert 2F to 2D vector" %} ins_encode %{ int vector_len = 0; __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Fto4D_reg(vecY dst, vecX src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastF2X src)); format %{ "vcvtps2pd $dst,$src\t! convert 4F to 4D vector" %} ins_encode %{ int vector_len = 1; __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Fto8D_reg(vecZ dst, vecY src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastF2X src)); format %{ "vcvtps2pd $dst,$src\t! convert 8F to 8D vector" %} ins_encode %{ int vector_len = 2; __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt2Dto2F_reg(vecD dst, vecX src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastD2X src)); format %{ "vcvtpd2ps $dst,$src\t! convert 2D to 2F vector" %} ins_encode %{ int vector_len = 0; __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt4Dto4F_reg(vecX dst, vecY src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastD2X src)); format %{ "vcvtpd2ps $dst,$src\t! convert 4D to 4F vector" %} ins_encode %{ int vector_len = 1; __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcvt8Dto8F_reg(vecY dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastD2X src)); format %{ "vcvtpd2ps $dst,$src\t! convert 8D to 8F vector" %} ins_encode %{ int vector_len = 2; __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq2F(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed2F" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq4F(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed4F" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq8F(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed8F" %} ins_encode %{ int vector_len = 1; Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpeqps k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16F" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmplt2F(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed2F" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt4F(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed4F" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt8F(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed8F" %} ins_encode %{ int vector_len = 1; Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpltps k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed16F" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpgt2F(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed2F" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt4F(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed4F" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt8F(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed8F" %} ins_encode %{ int vector_len = 1; Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpgtps k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16F" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge2F(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed2F" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpge4F(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed4F" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpge8F(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed8F" %} ins_encode %{ int vector_len = 1; Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpge16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpgeps k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16F" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple2F(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed2F" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmple4F(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed4F" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmple8F(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed8F" %} ins_encode %{ int vector_len = 1; Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmple16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpleps k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16F" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne2F(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed2F" %} ins_encode %{ int vector_len = 0; // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpne4F(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed4F" %} ins_encode %{ int vector_len = 0; // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpne8F(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed8F" %} ins_encode %{ int vector_len = 1; // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpne16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpneps k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed16F" %} ins_encode %{ int vector_len = 2; // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpeq1D(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed1D" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq2D(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed2D" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq4D(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed4D" %} ins_encode %{ int vector_len = 1; Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpeqpd k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8D" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmplt1D(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed1D" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt2D(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed2D" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt4D(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed4D" %} ins_encode %{ int vector_len = 1; Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpltpd k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed8D" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpgt1D(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed1D" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt2D(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed2D" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt4D(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed4D" %} ins_encode %{ int vector_len = 1; Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpgtpd k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8D" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge1D(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed1D" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpge2D(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed2D" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpge4D(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed4D" %} ins_encode %{ int vector_len = 1; Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpge8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpgepd k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8D" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple1D(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed1D" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmple2D(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed2D" %} ins_encode %{ int vector_len = 0; Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmple4D(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed4D" %} ins_encode %{ int vector_len = 1; Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmple8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmplepd k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8D" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne1D(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed1D" %} ins_encode %{ int vector_len = 0; // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpne2D(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed2D" %} ins_encode %{ int vector_len = 0; // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpne4D(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed4D" %} ins_encode %{ int vector_len = 1; // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpne8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vcmpnepd k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed8D" %} ins_encode %{ int vector_len = 2; // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpeq2I(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed2I" %} ins_encode %{ int vector_len = 0; __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq4I(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed4I" %} ins_encode %{ int vector_len = 0; __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq8I(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed8I" %} ins_encode %{ int vector_len = 1; __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpeqd k2,$src1,$src2\n\t" "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16I" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::eq; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmplt2I(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed2I" %} ins_encode %{ int vector_len = 0; __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt4I(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed4I" %} ins_encode %{ int vector_len = 0; __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt8I(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed8I" %} ins_encode %{ int vector_len = 1; __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnled k2,$src1,$src2\n\t" "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::lt; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpgt2I(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed2I" %} ins_encode %{ int vector_len = 0; __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt4I(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed4I" %} ins_encode %{ int vector_len = 0; __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt8I(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed8I" %} ins_encode %{ int vector_len = 1; __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnled k2,$src1,$src2\n\t" "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::nle; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtd $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2I" %} ins_encode %{ int vector_len = 0; __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtd $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4I" %} ins_encode %{ int vector_len = 0; __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtd $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8I" %} ins_encode %{ int vector_len = 1; __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnltd k2,$src1,$src2\n\t" "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16I" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::nlt; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtd $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2I" %} ins_encode %{ int vector_len = 0; __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtd $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4I" %} ins_encode %{ int vector_len = 0; __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtd $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8I" %} ins_encode %{ int vector_len = 1; __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpled k2,$src1,$src2\n\t" "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16I" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::le; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqd $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2I" %} ins_encode %{ int vector_len = 0; __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqd $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4I" %} ins_encode %{ int vector_len = 0; __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqd $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8I" %} ins_encode %{ int vector_len = 1; __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpneqd k2,$src1,$src2\n\t" "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed16I" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::neq; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpeq8B(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed8B" %} ins_encode %{ int vector_len = 0; __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq16B(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed16B" %} ins_encode %{ int vector_len = 0; __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq32B(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed32B" %} ins_encode %{ int vector_len = 1; __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpeqb k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed64B" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::eq; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmplt8B(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed8B" %} ins_encode %{ int vector_len = 0; __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt16B(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed16B" %} ins_encode %{ int vector_len = 0; __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt32B(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed32B" %} ins_encode %{ int vector_len = 1; __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnleb k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::lt; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpgt8B(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed8B" %} ins_encode %{ int vector_len = 0; __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt16B(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed16B" %} ins_encode %{ int vector_len = 0; __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt32B(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed32B" %} ins_encode %{ int vector_len = 1; __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnleb k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::nle; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtb $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8B" %} ins_encode %{ int vector_len = 0; __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtb $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16B" %} ins_encode %{ int vector_len = 0; __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct extract8d(regD dst, vecZ src, vecZ tmp, immI idx) %{ predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 8); match(Set dst (ExtractD src idx)); effect(TEMP tmp); ins_encode %{ int vector_len = 2; int midx = 0x7 & $idx$$constant; if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } else if (midx == 1) { __ vpshufpd($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); } else if (midx > 1 && midx <= 7) { int extr_idx1 = midx / 2; int extr_idx2 = midx % 2; __ vextractf32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); __ vpshufpd($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, extr_idx2, vector_len); } %} ins_pipe( pipe_slow ); %} instruct extract4d(regD dst, vecY src, vecY tmp, immI idx) %{ predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); match(Set dst (ExtractD src idx)); effect(TEMP tmp); ins_encode %{ int vector_len = 1; int midx = 0x3 & $idx$$constant; if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } else if (midx == 1) { __ vpshufpd($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); } else if (midx > 1 && midx <= 3) { __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); __ vpshufpd($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, midx - 2, vector_len); } %} ins_pipe( pipe_slow ); %} instruct extract2d(regD dst, vecX src, immI idx) %{ predicate(UseSSE >= 2 && n->in(1)->bottom_type()->is_vect()->length() == 2); match(Set dst (ExtractD src idx)); ins_encode %{ int midx = 0x1 & $idx$$constant; if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } if (midx > 0) { __ pshufpd($dst$$XMMRegister, $dst$$XMMRegister, midx); } %} ins_pipe( pipe_slow ); %} instruct extract1d(regD dst, vecD src, immI idx) %{ predicate(UseSSE >= 2 && n->in(1)->bottom_type()->is_vect()->length() == 1); match(Set dst (ExtractD src idx)); ins_encode %{ int midx = 0x1 & $idx$$constant; if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct extract16f(regF dst, vecZ src, vecZ tmp, immI idx) %{ predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 16); match(Set dst (ExtractF src idx)); effect(TEMP tmp); ins_encode %{ int vector_len=2; int midx = 0xF & $idx$$constant; if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } else if (midx >= 1 && midx <= 3) { __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); } else { int extr_idx1 = midx / 4; int extr_idx2 = midx % 4; __ vextractf32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); __ vpshufps($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, extr_idx2, vector_len); } %} ins_pipe( pipe_slow ); %} instruct extract8f(regF dst, vecY src, vecY tmp, immI idx) %{ predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 8); match(Set dst (ExtractF src idx)); effect(TEMP tmp); ins_encode %{ int vector_len=1; int midx = 0x7 & $idx$$constant; if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); } else if (midx >= 1 && midx <= 3) { __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); } else if (midx >= 4) { __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); __ vpshufps($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, midx - 4, vector_len); } %} ins_pipe( pipe_slow ); %} instruct extract4f(regF dst, vecX src, immI idx) %{ predicate(UseSSE >= 2 && n->in(1)->bottom_type()->is_vect()->length() == 4); match(Set dst (ExtractF src idx)); ins_encode %{ int midx = 0x3 & $idx$$constant; if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } if (midx > 0) { __ pshufps($dst$$XMMRegister, $dst$$XMMRegister, midx); } %} ins_pipe( pipe_slow ); %} instruct extract2f(regF dst, vecD src, immI idx) %{ predicate(UseSSE >= 2 && n->in(1)->bottom_type()->is_vect()->length() == 2); match(Set dst (ExtractF src idx)); ins_encode %{ int midx = 0x1 & $idx$$constant; if ($dst$$XMMRegister != $src$$XMMRegister) { __ movdqu($dst$$XMMRegister, $src$$XMMRegister); } if (midx > 0) { __ pshufps($dst$$XMMRegister, $dst$$XMMRegister, midx); } %} ins_pipe( pipe_slow ); %} instruct extract1l(rRegL dst, vecD src, immI idx) %{ predicate(UseSSE > 1 && n->in(1)->bottom_type()->is_vect()->length() == 1); match(Set dst (ExtractL src idx)); ins_encode %{ int midx = 0x1 & $idx$$constant; if (midx == 0) { __ movq($dst$$Register, $src$$XMMRegister); } %} ins_pipe( pipe_slow ); %} instruct extract2l(rRegL dst, vecX src, immI idx) %{ predicate(UseSSE >= 4 && n->in(1)->bottom_type()->is_vect()->length() == 2); match(Set dst (ExtractL src idx)); ins_encode %{ int midx = 0x1 & $idx$$constant; if (midx == 0) { __ movq($dst$$Register, $src$$XMMRegister); } else { __ pextrq($dst$$Register, $src$$XMMRegister, midx); } %} ins_pipe( pipe_slow ); %} instruct extract4l(rRegL dst, vecY src, immI idx, vecX tmp) %{ predicate(UseAVX > 1 && n->in(1)->bottom_type()->is_vect()->length() == 4); match(Set dst (ExtractL src idx)); effect(TEMP tmp); ins_encode %{ int midx = 0x3 & $idx$$constant; if (midx == 0) { __ movq($dst$$Register, $src$$XMMRegister); } else if(midx==1){ __ pextrq($dst$$Register, $src$$XMMRegister, midx); } else { __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); __ pextrq($dst$$Register, $tmp$$XMMRegister, midx-2); } %} ins_pipe( pipe_slow ); %} instruct extract8l(rRegL dst, vecZ src, vecX tmp, immI idx) %{ predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 8); match(Set dst (ExtractL src idx)); effect(TEMP tmp); ins_encode %{ int midx = 0x7 & $idx$$constant; if (midx == 0) { __ movq($dst$$Register, $src$$XMMRegister); } else if (midx == 1) { __ pextrq($dst$$Register, $src$$XMMRegister, midx); } else { // Using 2 because there are 2 longs in 128-bit int extr_idx1 = midx / 2; int extr_idx2 = midx % 2; __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); __ pextrq($dst$$Register, $tmp$$XMMRegister, extr_idx2); } %} ins_pipe( pipe_slow ); %} instruct extract2i(rRegI dst, vecD src, immI idx) %{ predicate(UseSSE > 3 && n->in(1)->bottom_type()->is_vect()->length() == 2); match(Set dst (ExtractI src idx)); ins_encode %{ int midx = 0x1 & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); } else if (midx >= 1) { __ pextrd($dst$$Register, $src$$XMMRegister, midx); } %} ins_pipe( pipe_slow ); %} instruct extract4i(rRegI dst, vecX src, immI idx) %{ predicate(UseSSE > 3 && n->in(1)->bottom_type()->is_vect()->length() == 4); match(Set dst (ExtractI src idx)); ins_encode %{ int midx = 0x3 & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); } else if (midx >= 1 && midx <= 3) { __ pextrd($dst$$Register, $src$$XMMRegister, midx); } %} ins_pipe( pipe_slow ); %} instruct extract8i(rRegI dst, vecY src, vecX tmp, immI idx) %{ predicate(UseAVX > 1 && n->in(1)->bottom_type()->is_vect()->length() == 8); match(Set dst (ExtractI src idx)); effect(TEMP tmp); ins_encode %{ int midx = 0x7 & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); } else if (midx >= 1 && midx <= 3) { __ pextrd($dst$$Register, $src$$XMMRegister, midx); } else if (midx >= 4) { __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); __ pextrd($dst$$Register, $tmp$$XMMRegister, midx - 4); } %} ins_pipe( pipe_slow ); %} instruct extract16i(rRegI dst, vecZ src, vecX tmp, immI idx) %{ predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 16); match(Set dst (ExtractI src idx)); effect(TEMP tmp); ins_encode %{ int midx = 0xF & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); } else if (midx >= 1 && midx <= 3) { __ pextrd($dst$$Register, $src$$XMMRegister, midx); } else { // Using 4 because there are 4 ints in 128-bit int extr_idx1 = midx / 4; int extr_idx2 = midx % 4; __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); __ pextrd($dst$$Register, $tmp$$XMMRegister, extr_idx2); } %} ins_pipe( pipe_slow ); %} instruct extract4s(rRegI dst, vecD src, immI idx) %{ predicate(UseSSE > 1 && n->in(1)->bottom_type()->is_vect()->length() == 4); match(Set dst (ExtractS src idx)); ins_encode %{ int midx = 0x3 & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); __ movswl($dst$$Register, $dst$$Register); } else if (midx >= 1) { __ pextrw($dst$$Register, $src$$XMMRegister, midx); __ movswl($dst$$Register, $dst$$Register); } %} ins_pipe( pipe_slow ); %} instruct extract8s(rRegI dst, vecX src, immI idx) %{ predicate(UseSSE > 1 && n->in(1)->bottom_type()->is_vect()->length() == 8); match(Set dst (ExtractS src idx)); ins_encode %{ int midx = 0x7 & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); __ movswl($dst$$Register, $dst$$Register); } else if (midx >= 1) { __ pextrw($dst$$Register, $src$$XMMRegister, midx); __ movswl($dst$$Register, $dst$$Register); } %} ins_pipe( pipe_slow ); %} instruct extract16s(rRegI dst, vecY src, vecX tmp, immI idx) %{ predicate(UseAVX > 1 && n->in(1)->bottom_type()->is_vect()->length() == 16); match(Set dst (ExtractS src idx)); effect(TEMP tmp); ins_encode %{ int midx = 0xF & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); __ movswl($dst$$Register, $dst$$Register); } else if (midx >= 1 && midx <= 7) { __ pextrw($dst$$Register, $src$$XMMRegister, midx); __ movswl($dst$$Register, $dst$$Register); } else { __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); __ pextrw($dst$$Register, $tmp$$XMMRegister, midx-8); __ movswl($dst$$Register, $dst$$Register); } %} ins_pipe( pipe_slow ); %} instruct extract32s(rRegI dst, vecZ src, vecX tmp, immI idx) %{ predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 32); match(Set dst (ExtractS src idx)); effect(TEMP tmp); ins_encode %{ int midx = 0x1F & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); __ movswl($dst$$Register, $dst$$Register); } else if (midx >= 1 && midx <= 7) { __ pextrw($dst$$Register, $src$$XMMRegister, midx); __ movswl($dst$$Register, $dst$$Register); } else { int extr_idx1 = midx / 8; int extr_idx2 = midx % 8; __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); __ pextrw($dst$$Register, $tmp$$XMMRegister, extr_idx2); __ movswl($dst$$Register, $dst$$Register); } %} ins_pipe( pipe_slow ); %} instruct extract8b(rRegI dst, vecD src, immI idx) %{ predicate(UseSSE >= 4 && n->in(1)->bottom_type()->is_vect()->length() == 8); match(Set dst (ExtractB src idx)); ins_encode %{ int midx = 0x7 & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); __ movsbl($dst$$Register, $dst$$Register); } else if (midx >= 1) { __ pextrb($dst$$Register, $src$$XMMRegister, midx); __ movsbl($dst$$Register, $dst$$Register); } %} ins_pipe( pipe_slow ); %} instruct extract16b(rRegI dst, vecX src, immI idx) %{ predicate(UseSSE >= 4 && n->in(1)->bottom_type()->is_vect()->length() == 16); match(Set dst (ExtractB src idx)); ins_encode %{ int midx = 0xF & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); __ movsbl($dst$$Register, $dst$$Register); } else if (midx >= 1) { __ pextrb($dst$$Register, $src$$XMMRegister, midx); __ movsbl($dst$$Register, $dst$$Register); } %} ins_pipe( pipe_slow ); %} instruct extract32b(rRegI dst, vecY src, vecX tmp, immI idx) %{ predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 32); match(Set dst (ExtractB src idx)); effect(TEMP tmp); ins_encode %{ int midx = 0x1F & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); __ movsbl($dst$$Register, $dst$$Register); } else if (midx >= 1 && midx <= 15) { __ pextrb($dst$$Register, $src$$XMMRegister, midx); __ movsbl($dst$$Register, $dst$$Register); } else { int extr_idx1 = midx / 16; int extr_idx2 = midx % 16; __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); __ pextrb($dst$$Register, $tmp$$XMMRegister, extr_idx2); __ movsbl($dst$$Register, $dst$$Register); } %} ins_pipe( pipe_slow ); %} instruct extract64b(rRegI dst, vecZ src, vecX tmp, immI idx) %{ predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 64); match(Set dst (ExtractB src idx)); effect(TEMP tmp); ins_encode %{ int midx = 0x3F & $idx$$constant; if (midx == 0) { __ movdl($dst$$Register, $src$$XMMRegister); __ movsbl($dst$$Register, $dst$$Register); } else if (midx >= 1 && midx <= 15) { __ pextrb($dst$$Register, $src$$XMMRegister, midx); __ movsbl($dst$$Register, $dst$$Register); } else { int extr_idx1 = midx / 16; int extr_idx2 = midx % 16; __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); __ pextrb($dst$$Register, $tmp$$XMMRegister, extr_idx2); __ movsbl($dst$$Register, $dst$$Register); } %} ins_pipe( pipe_slow ); %} instruct vcmpge32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtb $dst,$src2,$src1\n " "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed32B" %} ins_encode %{ int vector_len = 1; __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnltb k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed64B" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::nlt; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtb $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8B" %} ins_encode %{ int vector_len = 0; __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtb $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16B" %} ins_encode %{ int vector_len = 0; __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtb $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed32B" %} ins_encode %{ int vector_len = 1; __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpleb k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed64B" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::le; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqb $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8B" %} ins_encode %{ int vector_len = 0; __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqb $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16B" %} ins_encode %{ int vector_len = 0; __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqb $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed32B" %} ins_encode %{ int vector_len = 1; __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpneqb k2,$src1,$src2\n\t" "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed64B" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::neq; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpeq4S(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed4S" %} ins_encode %{ int vector_len = 0; __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq8S(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed8S" %} ins_encode %{ int vector_len = 0; __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq16S(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed16S" %} ins_encode %{ int vector_len = 1; __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpeqw k2,$src1,$src2\n\t" "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed32S" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::eq; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmplt4S(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed4S" %} ins_encode %{ int vector_len = 0; __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt8S(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed8S" %} ins_encode %{ int vector_len = 0; __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt16S(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed16S" %} ins_encode %{ int vector_len = 1; __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnlew k2,$src1,$src2\n\t" "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::lt; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpgt4S(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed4S" %} ins_encode %{ int vector_len = 0; __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt8S(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed8S" %} ins_encode %{ int vector_len = 0; __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt16S(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed16S" %} ins_encode %{ int vector_len = 1; __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnlew k2,$src1,$src2\n\t" "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::nle; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtw $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4S" %} ins_encode %{ int vector_len = 0; __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtw $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8S" %} ins_encode %{ int vector_len = 0; __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtw $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16S" %} ins_encode %{ int vector_len = 1; __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnltw k2,$src1,$src2\n\t" "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed32S" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::nlt; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtw $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4S" %} ins_encode %{ int vector_len = 0; __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtw $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8S" %} ins_encode %{ int vector_len = 0; __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtw $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16S" %} ins_encode %{ int vector_len = 1; __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmplew k2,$src1,$src2\n\t" "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed32S" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::le; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqw $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4S" %} ins_encode %{ int vector_len = 0; __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqw $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8S" %} ins_encode %{ int vector_len = 0; __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqw $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16S" %} ins_encode %{ int vector_len = 1; __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpneqw k2,$src1,$src2\n\t" "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed32S" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::neq; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpeq1L(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed1L" %} ins_encode %{ int vector_len = 0; __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq2L(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed2L" %} ins_encode %{ int vector_len = 0; __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq4L(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed4L" %} ins_encode %{ int vector_len = 1; __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpeq8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpeqq k2,$src1,$src2\n\t" "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8L" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::eq; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmplt1L(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed1L" %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt2L(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed2L" %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt4L(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed4L" %} ins_encode %{ int vector_len = 1; __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmplt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnleq k2,$src1,$src2\n\t" "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::lt; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpgt1L(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed1L" %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt2L(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed2L" %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt4L(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed4L" %} ins_encode %{ int vector_len = 1; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vcmpgt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnleq k2,$src1,$src2\n\t" "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::nle; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtq $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed1L" %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtq $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2L" %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtq $dst,$src2,$src1\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4L" %} ins_encode %{ int vector_len = 1; __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpge8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpnltq k2,$src1,$src2\n\t" "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8L" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::nlt; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtq $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed1L" %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtq $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2L" %} ins_encode %{ int vector_len = 0; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpgtq $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4L" %} ins_encode %{ int vector_len = 1; __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmple8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpleq k2,$src1,$src2\n\t" "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8L" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::le; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqq $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed1L" %} ins_encode %{ int vector_len = 0; __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqq $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2L" %} ins_encode %{ int vector_len = 0; __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP scratch); format %{ "vpcmpeqq $dst,$src1,$src2\n\t" "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4L" %} ins_encode %{ int vector_len = 1; __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vcmpne8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorMaskCmp src1 src2)); effect(TEMP dst, TEMP scratch); format %{ "vpcmpneqq k2,$src1,$src2\n\t" "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed8L" %} ins_encode %{ int vector_len = 2; Assembler::ComparisonPredicate cmp = Assembler::neq; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. KRegister mask = k0; // The comparison itself is not being masked. __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct blendvps2F(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "blendvps $dst,$src,$mask\t! packed2F" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ blendvps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vblendvps2F(vecD dst, vecD src1, vecD src2, vecD mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed2F" %} ins_encode %{ int vector_len = 0; __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct blendvps4F(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "blendvps $dst,$src,$mask\t! packed4F" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ blendvps($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vblendvps4F(vecX dst, vecX src1, vecX src2, vecX mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed4F" %} ins_encode %{ int vector_len = 0; __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vblendvps8F(vecY dst, vecY src1, vecY src2, vecY mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed8F" %} ins_encode %{ int vector_len = 1; __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vblendvps16F(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorBlend (Binary src1 src2) mask)); effect(TEMP scratch); format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" "vblendmps $dst,k2,$src1,$src2\t! blend packed16F " %} ins_encode %{ int vector_len = 2; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. __ evpcmpeqd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); __ evblendmps($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct vblendvpd8D(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorBlend (Binary src1 src2) mask)); effect(TEMP scratch); format %{ "evpcmpeqq k2,$mask,0xFFFFFFFF\n\t" "vblendmpd $dst,k2,$src1,$src2\t! blend packed16F " %} ins_encode %{ int vector_len = 2; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); __ evblendmpd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpblendmb64B(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE && VM_Version::supports_avx512bw()); match(Set dst (VectorBlend (Binary src1 src2) mask)); effect(TEMP scratch); format %{ "vpcmpeqb k2,$mask,0xFFFFFFFF\n\t" "vpblendmb $dst,k2,$src1,$src2\t! blend packed64B " %} ins_encode %{ int vector_len = 2; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. __ evpcmpb(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); __ evpblendmb($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpblendmw32S(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT && VM_Version::supports_avx512bw()); match(Set dst (VectorBlend (Binary src1 src2) mask)); effect(TEMP scratch); format %{ "vpcmpeqw k2,$mask,0xFFFFFFFF\n\t" "vpblendmw $dst,k2,$src1,$src2\t! blend packed32S " %} ins_encode %{ int vector_len = 2; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. __ evpcmpw(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); __ evpblendmw($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpblendmd16I(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorBlend (Binary src1 src2) mask)); effect(TEMP scratch); format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" "vpblendmd $dst,k2,$src1,$src2\t! blend packed16I " %} ins_encode %{ int vector_len = 2; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. __ evpcmpd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); __ evpblendmd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpblendmq8L(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorBlend (Binary src1 src2) mask)); effect(TEMP scratch); format %{ "vpcmpeqq k2,$mask,0xFFFFFFFF\n\t" "vpblendmq $dst,k2,$src1,$src2\t! blend packed8L " %} ins_encode %{ int vector_len = 2; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); __ evpblendmq($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); %} ins_pipe( pipe_slow ); %} instruct pblendvb2I(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "vpblendvb $dst,$src,$mask\t! blend packed2I" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vpblendvb2I(vecD dst, vecD src1, vecD src2, vecD mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2I" %} ins_encode %{ int vector_len = 0; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct pblendvb4I(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "vpblendvb $dst,$src,$mask\t! blend packed4I" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vpblendvb4I(vecX dst, vecX src1, vecX src2, vecX mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4I" %} ins_encode %{ int vector_len = 0; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpblendvb8I(vecY dst, vecY src1, vecY src2, vecY mask) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8I" %} ins_encode %{ int vector_len = 1; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct pblendvb8B(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "pblendvb $dst,$src,$mask\t! blend packed8B" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vpblendvb8B(vecD dst, vecD src1, vecD src2, vecD mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8B" %} ins_encode %{ int vector_len = 0; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct pblendvb16B(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "pblendvb $dst,$src,$mask\t! blend packed16B" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vpblendvb16B(vecX dst, vecX src1, vecX src2, vecX mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16B" %} ins_encode %{ int vector_len = 0; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpblendvb32B(vecY dst, vecY src1, vecY src2, vecY mask) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed32B" %} ins_encode %{ int vector_len = 1; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct pblendvb4S(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "pblendvb $dst,$src,$mask\t! blend packed4S" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vpblendvb4S(vecD dst, vecD src1, vecD src2, vecD mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4S" %} ins_encode %{ int vector_len = 0; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct pblendvb8S(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "pblendvb $dst,$src,$mask\t! blend packed8S" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vpblendvb8S(vecX dst, vecX src1, vecX src2, vecX mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8S" %} ins_encode %{ int vector_len = 0; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpblendvb16S(vecY dst, vecY src1, vecY src2, vecY mask) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16S" %} ins_encode %{ int vector_len = 1; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct pblendvb1L(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "pblendvb $dst,$src,$mask\t! blend packed1L" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vpblendvb1L(vecD dst, vecD src1, vecD src2, vecD mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed1L" %} ins_encode %{ int vector_len = 0; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct pblendvb2L(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "pblendvb $dst,$src,$mask\t! blend packed2L" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vpblendvb2L(vecX dst, vecX src1, vecX src2, vecX mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2L" %} ins_encode %{ int vector_len = 0; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpblendvb4L(vecY dst, vecY src1, vecY src2, vecY mask) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4L" %} ins_encode %{ int vector_len = 1; __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct blendvpd1D(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "blendvpd $dst,$src,$mask\t! packed1D" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vblendvpd1D(vecD dst, vecD src1, vecD src2, vecD mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed1D" %} ins_encode %{ int vector_len = 0; __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct blendvpd2D(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorBlend (Binary dst src) mask)); effect(TEMP xmm_0); format %{ "blendvpd $dst,$src,$mask\t! packed2D" %} ins_encode %{ if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); } __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vblendvpd2D(vecX dst, vecX src1, vecX src2, vecX mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed2D" %} ins_encode %{ int vector_len = 0; __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vblendvpd4D(vecY dst, vecY src1, vecY src2, vecY mask) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed4D" %} ins_encode %{ int vector_len = 1; __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- NEG -------------------------------------- // a = -a instruct vneg2I_reg(vecD dst, vecD src) %{ predicate(UseSSE > 1 && n->as_Vector()->length() == 2); match(Set dst (NegVI src)); effect(TEMP dst); format %{ "pxor $dst,$dst\n\t" "psubd $dst, $src\t! neg packed2I" %} ins_cost(150); ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); __ psubd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vneg4I_reg(vecX dst, vecX src) %{ predicate(UseSSE > 1 && n->as_Vector()->length() == 4); match(Set dst (NegVI src)); effect(TEMP dst); format %{ "pxor $dst,$dst\n\t" "psubd $dst, $src\t! neg packed4I" %} ins_cost(150); ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); __ psubd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vneg8I_reg(vecY dst, vecY src, vecY tmp) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (NegVI src)); effect(TEMP tmp); format %{ "vpxor $tmp,$tmp,$tmp\n\t" "vpsubd $dst,$tmp,$src\t! neg packed8I" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vneg16I_reg(vecZ dst, vecZ src, vecZ tmp) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (NegVI src)); effect(TEMP tmp); format %{ "vpxor $tmp,$tmp,$tmp\n\t" "vpsubd $dst,$tmp,$src\t! neg packed16I" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vneg1D(regD dst) %{ predicate((UseSSE>=2) && (UseAVX == 0)); match(Set dst (NegVD dst)); ins_cost(150); format %{ "xorpd $dst,[0x8000000000000000] \t# $dst = -$dst neg packed1D" %} ins_encode %{ __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); %} ins_pipe(pipe_slow); %} instruct vneg1D_reg(vecX dst, vecX src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 1); match(Set dst (NegVD src)); format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed1D" %} ins_cost(150); ins_encode %{ __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(double_signflip())); %} ins_pipe( pipe_slow ); %} instruct vneg2D_reg(vecX dst) %{ predicate((UseSSE>=2)); match(Set dst (NegVD dst)); ins_cost(150); format %{ "xorpd $dst,[0x8000000000000000]\t# $dst = -$dst neg packed2D" %} ins_encode %{ __ xorpd($dst$$XMMRegister, ExternalAddress(vector_double_signflip())); %} ins_pipe(pipe_slow); %} instruct vneg4D_reg(vecY dst, vecY src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (NegVD src)); format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed4D" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); %} ins_pipe( pipe_slow ); %} instruct vneg8D_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (NegVD src)); format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed8D" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); %} ins_pipe( pipe_slow ); %} instruct vneg2F_reg(vecD dst) %{ predicate(UseSSE > 0 && n->as_Vector()->length() == 2); match(Set dst (NegVF dst)); format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed2F" %} ins_cost(150); ins_encode %{ __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); %} ins_pipe( pipe_slow ); %} instruct vneg4F_reg(vecX dst) %{ predicate(UseSSE > 0 && n->as_Vector()->length() == 4); match(Set dst (NegVF dst)); format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed4F" %} ins_cost(150); ins_encode %{ __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); %} ins_pipe( pipe_slow ); %} instruct vneg8F_reg(vecY dst, vecY src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (NegVF src)); format %{ "vxorps $dst,$src\t# $dst = -$src neg packed8F" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); %} ins_pipe( pipe_slow ); %} instruct vneg16F_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (NegVF src)); format %{ "vxorps $dst,$src\t# $dst = -$src neg packed16F" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- ABS -------------------------------------- // a = |a| instruct vabs8B_reg(vecD dst, vecD src) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AbsV src)); format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} ins_cost(150); ins_encode %{ __ pabsb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vabs16B_reg(vecX dst, vecX src) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AbsV src)); format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} ins_cost(150); ins_encode %{ __ pabsb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vabs32B_reg(vecY dst, vecY src) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AbsV src)); format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs64B_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AbsV src)); format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs4S_reg(vecD dst, vecD src) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AbsV src)); format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} ins_cost(150); ins_encode %{ __ pabsw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vabs8S_reg(vecX dst, vecX src) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AbsV src)); format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} ins_cost(150); ins_encode %{ __ pabsw($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vabs16S_reg(vecY dst, vecY src) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AbsV src)); format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs32S_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AbsV src)); format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs2I_reg(vecD dst, vecD src) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AbsV src)); format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} ins_cost(150); ins_encode %{ __ pabsd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vabs4I_reg(vecX dst, vecX src) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AbsV src)); format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} ins_cost(150); ins_encode %{ __ pabsd($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vabs8I_reg(vecY dst, vecY src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AbsV src)); format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs16I_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AbsV src)); format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs2L_reg(vecX dst, vecX src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (AbsV src)); format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} ins_cost(150); ins_encode %{ int vector_len = 0; __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs4L_reg(vecY dst, vecY src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (AbsV src)); format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs8L_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (AbsV src)); format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs1D_reg(vecD dst) %{ predicate(UseSSE > 0 && n->as_Vector()->length() == 1); match(Set dst (AbsVD dst)); format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed1D" %} ins_cost(150); ins_encode %{ __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); %} ins_pipe( pipe_slow ); %} instruct vabs2D_reg(vecX dst) %{ predicate(UseSSE > 0 && n->as_Vector()->length() == 2); match(Set dst (AbsVD dst)); format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed2D" %} ins_cost(150); ins_encode %{ __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); %} ins_pipe( pipe_slow ); %} instruct vabs4D_reg(vecY dst, vecY src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (AbsVD src)); format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed4D" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs8D_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (AbsVD src)); format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed8D" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs2F_reg(vecD dst) %{ predicate(UseSSE > 0 && n->as_Vector()->length() == 2); match(Set dst (AbsVF dst)); format %{ "andps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed2F" %} ins_cost(150); ins_encode %{ __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); %} ins_pipe( pipe_slow ); %} instruct vabs4F_reg(vecX dst) %{ predicate(UseSSE > 0 && n->as_Vector()->length() == 4); match(Set dst (AbsVF dst)); format %{ "vandps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed4F" %} ins_cost(150); ins_encode %{ __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); %} ins_pipe( pipe_slow ); %} instruct vabs8F_reg(vecY dst, vecY src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 8); match(Set dst (AbsVF src)); format %{ "vandps $dst,$src\t# $dst = |$src| abs packed8F" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); %} ins_pipe( pipe_slow ); %} instruct vabs16F_reg(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (AbsVF src)); format %{ "vandps $dst,$src\t# $dst = |$src| abs packed16F" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); %} ins_pipe( pipe_slow ); %} //------------------------------------- NOT -------------------------------------------- instruct vnot4B(vecS dst, vecS src) %{ predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (NotV src)); effect(TEMP dst); format %{ "pxor $dst,$src\t! not vectors (4 bytes)" %} ins_encode %{ __ movdl($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); __ pxor($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vnot4B_reg(vecS dst, vecS src, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); match(Set dst (NotV src)); effect(TEMP scratch); format %{ "vpxor $dst,$src\t! not vectors (4 bytes)" %} ins_encode %{ int vector_len = 0; __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vnot8B(vecD dst, vecD src) %{ predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (NotV src)); effect(TEMP dst); format %{ "pxor $dst,$src\t! not vectors (8 bytes)" %} ins_encode %{ __ movq($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); __ pxor($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vnot8B_reg(vecD dst, vecD src, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); match(Set dst (NotV src)); effect(TEMP scratch); format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (8 bytes)" %} ins_encode %{ int vector_len = 0; __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vnot16B(vecX dst, vecX src) %{ predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (NotV src)); effect(TEMP dst); format %{ "pxor $dst,$src\t! not vectors (16 bytes)" %} ins_encode %{ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); __ pxor($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vnot16B_reg(vecX dst, vecX src, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); match(Set dst (NotV src)); effect(TEMP scratch); format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (16 bytes)" %} ins_encode %{ int vector_len = 0; __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vnot32B_reg(vecY dst, vecY src, rRegL scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); match(Set dst (NotV src)); effect(TEMP scratch); format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (32 bytes)" %} ins_encode %{ int vector_len = 1; __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vnot64B_reg(vecZ dst, vecZ src, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); match(Set dst (NotV src)); effect(TEMP scratch); format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (64 bytes)" %} ins_encode %{ int vector_len = 2; __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct vptest4inae(rRegI dst, legVecX src1, legVecX src2) %{ predicate(UseAVX > 0 && static_cast(n)->get_predicate() == BoolTest::overflow); match(Set dst (VectorTest src1 src2 )); format %{ "vptest $src1,$src2\n\t" "setb $dst\t!" %} ins_encode %{ int vector_len = 0; __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ setb(Assembler::carrySet, $dst$$Register); __ movzbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct vptest4ieq(rRegI dst, legVecX src1, legVecX src2) %{ predicate(UseAVX > 0 && static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2 )); format %{ "vptest $src1,$src2\n\t" "setb $dst\t!" %} ins_encode %{ int vector_len = 0; __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ setb(Assembler::notZero, $dst$$Register); __ movzbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct vptest8inae(rRegI dst, legVecY src1, legVecY src2) %{ predicate(UseAVX > 0 && static_cast(n)->get_predicate() == BoolTest::overflow); match(Set dst (VectorTest src1 src2 )); format %{ "vptest $src1,$src2\n\t" "setb $dst\t!" %} ins_encode %{ int vector_len = 1; __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ setb(Assembler::carrySet, $dst$$Register); __ movzbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct vptest8ieq(rRegI dst, legVecY src1, legVecY src2) %{ predicate(UseAVX > 0 && static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2 )); format %{ "vptest $src1,$src2\n\t" "setb $dst\t!" %} ins_encode %{ int vector_len = 1; __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); __ setb(Assembler::notZero, $dst$$Register); __ movzbl($dst$$Register, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct loadmask8b(vecD dst, vecD src) %{ predicate(UseSSE >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "pxor $dst,$dst\n\t" "psubb $dst,$src\t! load mask (8B to 8B)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); __ psubb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadmask16b(vecX dst, vecX src) %{ predicate(UseSSE >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "vpxor $dst,$dst\n\t" "vpsubb $dst,$src\t! load mask (16B to 16B)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); __ psubb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadmask32b(vecY dst, vecY src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "vpxor $dst,$dst\n\t" "vpsubb $dst,$src\t! load mask (32B to 32B)" %} ins_encode %{ int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadmask64b(vecZ dst, vecZ src) %{ predicate(UseAVX > 0 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "vpxor $dst,$dst\n\t" "vpsubb $dst,$src\t! load mask (64B to 64B)" %} ins_encode %{ int vector_len = 2; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadmask4s(vecD dst, vecS src) %{ predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "pxor $dst,$dst\n\t" "psubb $dst,$src\n\t" "pmovsxbw $dst\t! load mask (4B to 4S)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); __ psubb($dst$$XMMRegister, $src$$XMMRegister); __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadmask8s(vecX dst, vecD src) %{ predicate(UseSSE >= 4 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "pxor $dst,$dst\n\t" "psubb $dst,$src\n\t" "pmovsxbw $dst\t! load mask (8B to 8S)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); __ psubb($dst$$XMMRegister, $src$$XMMRegister); __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadmask16s(vecY dst, vecX src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "vpxor $dst,$dst\n\t" "vpsubb $dst,$src\n\t" "vpmovsxbw $dst\t! load mask (16B to 16S)" %} ins_encode %{ int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadmask32s(vecZ dst, vecY src) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "vpxor $dst,$dst\n\t" "vpsubb $dst,$src\n\t" "vpmovsxbw $dst\t! load mask (32B to 32S)" %} ins_encode %{ int vector_len = 2; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 1); __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 1); __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadmask2i(vecD dst, vecS src) %{ predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "pxor $dst,$dst\n\t" "psubb $dst,$src\n\t" "pmovsxbd $dst\t! load mask (2B to 2I)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); __ psubb($dst$$XMMRegister, $src$$XMMRegister); __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadmask4i(vecX dst, vecS src) %{ predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "pxor $dst,$dst\n\t" "psubb $dst,$src\n\t" "pmovsxbd $dst\t! load mask (4B to 4I)" %} ins_encode %{ int vector_len = 0; __ pxor($dst$$XMMRegister, $dst$$XMMRegister); __ psubb($dst$$XMMRegister, $src$$XMMRegister); __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadmask8i(vecY dst, vecD src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "vpxor $dst,$dst\n\t" "vpsubb $dst,$src\n\t" "vpmovsxbd $dst\t! load mask (8B to 8I)" %} ins_encode %{ int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); __ vpmovsxbd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadmask16i(vecZ dst, vecX src, vecZ tmp) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorLoadMask src)); effect(TEMP dst, TEMP tmp); format %{ "vpxor $dst,$dst\n\t" "vpmovzxbd $tmp,$src\n\t" "vpsubd $dst,$tmp\t! load mask (16B to 16I)" %} ins_encode %{ int vector_len = 2; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ vpmovzxbd($tmp$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadmask1l(vecD dst, vecS src) %{ predicate(UseSSE >= 4 && n->as_Vector()->length() == 1 && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "pxor $dst,$dst\n\t" "psubb $dst,$src\n\t" "pmovsxbq $dst\t! load mask (1B to 1L)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); __ psubb($dst$$XMMRegister, $src$$XMMRegister); __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadmask2l(vecX dst, vecS src) %{ predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "pxor $dst,$dst\n\t" "psubb $dst,$src\n\t" "pmovsxbq $dst\t! load mask (2B to 2L)" %} ins_encode %{ __ pxor($dst$$XMMRegister, $dst$$XMMRegister); __ psubb($dst$$XMMRegister, $src$$XMMRegister); __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadmask4l(vecY dst, vecS src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "vpxor $dst,$dst\n\t" "vpsubb $dst,$src\n\t" "vpmovsxbq $dst\t! load mask (4B to 4L)" %} ins_encode %{ int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); __ vpmovsxbq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadmask8l(vecZ dst, vecD src, vecZ tmp) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (VectorLoadMask src)); effect(TEMP dst, TEMP tmp); format %{ "vpxor $dst,$dst\n\t" "vpmovzxbq $tmp,$src\n\t" "vpsubq $dst,$tmp\t! load mask (8B to 8L)" %} ins_encode %{ int vector_len = 2; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ vpmovzxbq($tmp$$XMMRegister, $src$$XMMRegister, vector_len); __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct storemask8b(vecD dst, vecD src) %{ predicate(UseSSE >= 3 && n->as_Vector()->length() == 8 && static_cast(n)->GetInputMaskSize() == 1); match(Set dst (VectorStoreMask src)); format %{ "pabsb $dst,$src\t! store mask (8B to 8B)" %} ins_encode %{ __ pabsb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storemask16b(vecX dst, vecX src) %{ predicate(UseSSE >=3 && n->as_Vector()->length() == 16 && static_cast(n)->GetInputMaskSize() == 1); match(Set dst (VectorStoreMask src)); format %{ "pabsb $dst,$src\t! store mask (16B to 16B)" %} ins_encode %{ __ pabsb($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storemask32b(vecY dst, vecY src) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && static_cast(n)->GetInputMaskSize() == 1); match(Set dst (VectorStoreMask src)); format %{ "vpabsb $dst,$src\t! store mask (32B to 32B)" %} ins_encode %{ int vector_len = 1; __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct storemask64b(vecZ dst, vecZ src, rRegL scratch) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && static_cast(n)->GetInputMaskSize() == 1); match(Set dst (VectorStoreMask src)); effect(TEMP scratch); format %{ "vpcmpeqb k2,$src,0xFFFFFFFF\n\t" "vmovdqub $dst,k2,0x01010101\t! store mask (64B to 64B)" %} ins_encode %{ int vector_len = 2; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. Assembler::ComparisonPredicate cp = Assembler::eq; __ evpcmpb(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct storemask4s(vecS dst, vecD src) %{ predicate(UseSSE >= 3 && n->as_Vector()->length() == 4 && static_cast(n)->GetInputMaskSize() == 2); match(Set dst (VectorStoreMask src)); format %{ "pabsw $dst,$src\n\t" "packuswb $dst,$dst\t! store mask (4S to 4B)" %} ins_encode %{ __ pabsw($dst$$XMMRegister, $src$$XMMRegister); __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storemask8s(vecD dst, vecX src) %{ predicate(UseSSE >=3 && n->as_Vector()->length() == 8 && static_cast(n)->GetInputMaskSize() == 2); match(Set dst (VectorStoreMask src)); format %{ "pabsw $dst,$src\n\t" "packuswb $dst,$dst\t! store mask (8S to 8B)" %} ins_encode %{ __ pabsw($dst$$XMMRegister, $src$$XMMRegister); __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storemask16s(vecX dst, vecY src, vecY tmp) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && static_cast(n)->GetInputMaskSize() == 2); match(Set dst (VectorStoreMask src)); effect(TEMP dst, TEMP tmp); format %{ "vpabsw $dst,$src\n\t" "vextracti128 $tmp,$dst\n\t" "vpackuswb $dst,$dst,$tmp\t! store mask (16S to 16B)" %} ins_encode %{ int vector_len = 1; __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct storemask32s(vecY dst, vecZ src, rRegL scratch) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && static_cast(n)->GetInputMaskSize() == 2); match(Set dst (VectorStoreMask src)); effect(TEMP scratch); format %{ "vpcmpeqw k2,$src,0xFFFFFFFF\n\t" "vmovdqub $dst,k2,0x01010101\t! store mask (32S to 32B)" %} ins_encode %{ int vector_len = 2; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. Assembler::ComparisonPredicate cp = Assembler::eq; __ evpcmpw(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); // The dst is 256-bit - thus we can do a smaller move. __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 1, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct storemask2i(vecS dst, vecD src) %{ predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && static_cast(n)->GetInputMaskSize() == 4); match(Set dst (VectorStoreMask src)); format %{ "pabsd $dst,$src\n\t" "packusdw $dst,$dst\n\t" "packuswb $dst,$dst\t! store mask (2I to 2B)" %} ins_encode %{ __ pabsd($dst$$XMMRegister, $src$$XMMRegister); __ packusdw($dst$$XMMRegister, $dst$$XMMRegister); __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storemask4i(vecS dst, vecX src) %{ predicate(UseSSE >=4 && n->as_Vector()->length() == 4 && static_cast(n)->GetInputMaskSize() == 4); match(Set dst (VectorStoreMask src)); format %{ "pabsd $dst,$src\n\t" "packusdw $dst,$dst\n\t" "packuswb $dst,$dst\t! store mask (4I to 4B)" %} ins_encode %{ __ pabsd($dst$$XMMRegister, $src$$XMMRegister); __ packusdw($dst$$XMMRegister, $dst$$XMMRegister); __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storemask8i(vecD dst, vecY src, vecY tmp) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && static_cast(n)->GetInputMaskSize() == 4); match(Set dst (VectorStoreMask src)); effect(TEMP dst, TEMP tmp); format %{ "vpxor $dst,$dst\n\t" "vpsubd $dst,$src\n\t" "vextracti128 $tmp,$dst\n\t" "vpackusdw $dst,$dst,$tmp\n\t" "vpackuswb $dst,$dst,$dst\t! store mask (8I to 8B)" %} ins_encode %{ int vector_len = 1; __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} instruct storemask16i(vecX dst, vecZ src, rRegL scratch) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16 && static_cast(n)->GetInputMaskSize() == 4); match(Set dst (VectorStoreMask src)); effect(TEMP scratch); format %{ "vpcmpeqd k2,$src,0xFFFFFFFF\n\t" "vmovdqub $dst,k2,0x01010101\t! store mask (16I to 16B)" %} ins_encode %{ int vector_len = 2; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. __ evpcmpeqd(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); // The dst is only 128-bit - thus we can do a smaller move. __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 0, $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct storemask1l(vecS dst, vecD src) %{ predicate(UseSSE >= 4 && n->as_Vector()->length() == 1 && static_cast(n)->GetInputMaskSize() == 8); match(Set dst (VectorStoreMask src)); format %{ "pabsd $dst,$src\n\t" "packusdw $dst,$dst\n\t" "packuswb $dst,$dst\t! store mask (1L to 1B)" %} ins_encode %{ __ pabsd($dst$$XMMRegister, $src$$XMMRegister); __ packusdw($dst$$XMMRegister, $dst$$XMMRegister); __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storemask2l(vecS dst, vecX src) %{ predicate(UseSSE >=4 && n->as_Vector()->length() == 2 && static_cast(n)->GetInputMaskSize() == 8); match(Set dst (VectorStoreMask src)); format %{ "pshufd $dst,$src,0x8\n\t" "pabsd $dst,$dst\n\t" "packusdw $dst,$dst\n\t" "packuswb $dst,$dst\t! store mask (2L to 2B)" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); __ packusdw($dst$$XMMRegister, $dst$$XMMRegister); __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storemask4l(vecS dst, vecY src, rRegL scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && static_cast(n)->GetInputMaskSize() == 8); match(Set dst (VectorStoreMask src)); effect(TEMP scratch, TEMP dst); format %{ "vmovdqu $dst,[0,2,4,6,1,3,5,7]\n\t" "vpermd $dst,$dst,$src," "vpabsd $dst,$dst\n\t" "vpackusdw $dst,$dst,$dst\n\t" "vpackuswb $dst,$dst,$dst\t! store mask (4L to 4B)" %} ins_encode %{ // vpermd and load are 256-bit, but all others are 128-bit instructions. int vector_len = 0; __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_long_perm_mask()), $scratch$$Register); __ vpermd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 1); __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct storemask8l(vecD dst, vecZ src, rRegL scratch) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8 && static_cast(n)->GetInputMaskSize() == 8); match(Set dst (VectorStoreMask src)); effect(TEMP scratch); format %{ "vpcmpeqq k2,$src,0xFFFFFFFF\n\t" "vmovdqub $dst,k2,0x01010101\t! store mask (8L to 8B)" %} ins_encode %{ int vector_len = 2; KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. Assembler::ComparisonPredicate cp = Assembler::eq; __ evpcmpq(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); // The dst is only 128-bit - thus we can do a smaller move. __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 0, $scratch$$Register); %} ins_pipe( pipe_slow ); %} //-------------------------------- LOAD_IOTA_INDICES---------------------------------- instruct loadcon4b(vecS dst, immI0 src, rRegI scratch) %{ predicate(UseSSE > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadConst src)); effect(TEMP scratch); format %{ "movdqu $dst, CONSTANT_MEMORY\t! load iota indices" %} ins_encode %{ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_iota_indices()), $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct loadcon8b(vecD dst, immI0 src, rRegI scratch) %{ predicate(UseSSE > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadConst src)); effect(TEMP scratch); format %{ "movdqu $dst, CONSTANT_MEMORY\t! load iota indices" %} ins_encode %{ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_iota_indices()), $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct loadcon16b(vecX dst, immI0 src, rRegI scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadConst src)); effect(TEMP scratch); format %{ "vpmovdqu $dst, CONSTANT_MEMORY\t! load iota indices" %} ins_encode %{ __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_iota_indices()), $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct loadcon32b(vecY dst, immI0 src, rRegI scratch) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadConst src)); effect(TEMP scratch); format %{ "vmovdqu $dst, CONSTANT_MEMORY\t! load iota indices" %} ins_encode %{ __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_iota_indices()), $scratch$$Register); %} ins_pipe( pipe_slow ); %} instruct loadcon64b(vecZ dst, immI0 src, rRegL scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadConst src)); effect(TEMP scratch); format %{ "vmovdqub $dst,k0, CONSTANT_MEMORY\t! load iota indices" %} ins_encode %{ int vector_len = 2; __ evmovdqub($dst$$XMMRegister, k0, ExternalAddress(vector_iota_indices()), false, vector_len, $scratch$$Register); %} ins_pipe( pipe_slow ); %} //-------------------------------- LOAD_SHUFFLE ---------------------------------- instruct loadshuffle8b(vecD dst, vecD src) %{ predicate(UseSSE > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadShuffle src)); format %{ "movdqu $dst, $src\t! load shuffle (load 8B for 8BRearrange)" %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadshuffle16b(vecX dst, vecX src) %{ predicate(UseSSE > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadShuffle src)); format %{ "movdqu $dst, $src\t! load shuffle (load 16B for 16BRearrange)" %} ins_encode %{ __ movdqu($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadshuffle32b(vecY dst, vecY src) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadShuffle src)); format %{ "vmovdqu $dst, $src\t! load shuffle (load 32B for 32BRearrange)" %} ins_encode %{ __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadshuffle64b(vecZ dst, vecZ src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadShuffle src)); format %{ "vmovdqu $dst, $src\t! load shuffle (load 64B for 64BRearrange)" %} ins_encode %{ __ evmovdqul($dst$$XMMRegister, $src$$XMMRegister, 2); %} ins_pipe( pipe_slow ); %} instruct loadshuffle4s(vecD dst, vecS src, vecD tmp, vecD tmp2, rRegI scratch) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorLoadShuffle src)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); format %{ "pmovsxbw $tmp, $src \n\t" "movdqu $tmp2,0x0002000200020002\n\t" "pmullw $tmp,$tmp2\n\t" "movdqu $tmp2,$tmp\n\t" "psllw $tmp2,0x8\n\t" "paddb $tmp2,$tmp\n\t" "movdqu $tmp, 0x0100010001000100 \n\t" "paddb $tmp2,$tmp\n\t" "movdqu $dst, $tmp2\t! load shuffle (load 4B for 4SRearrange)" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_sizemask()), $scratch$$Register); __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdqu($tmp2$$XMMRegister, $tmp$$XMMRegister); __ psllw($tmp2$$XMMRegister, 0x8); __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdqu($dst$$XMMRegister, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadshuffle8s(vecX dst, vecD src, vecX tmp, vecX tmp2, rRegI scratch) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorLoadShuffle src)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); format %{ "pmovsxbw $tmp, $src \n\t" "movdqu $tmp2,0x0002000200020002\n\t" "pmullw $tmp,$tmp2\n\t" "movdqu $tmp2,$tmp\n\t" "psllw $tmp2,0x8\n\t" "paddb $tmp2,$tmp\n\t" "movdqu $tmp, 0x0100010001000100 \n\t" "paddb $tmp2,$tmp\n\t" "movdqu $dst, $tmp2\t! load shuffle (load 8B for 8SRearrange)" %} ins_encode %{ __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_sizemask()), $scratch$$Register); __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdqu($tmp2$$XMMRegister, $tmp$$XMMRegister); __ psllw($tmp2$$XMMRegister, 0x8); __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdqu($dst$$XMMRegister, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadshuffle16s(vecY dst, vecX src) %{ predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorLoadShuffle src)); format %{ "vpmovsxbw $dst,$src\t! load shuffle (load 16B for 16SRearrange)" %} ins_encode %{ int vector_len = 1; __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadshuffle32s(vecZ dst, vecY src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorLoadShuffle src)); format %{ "vpmovsxbw $dst,$src\t! load shuffle (load 32B for 32SRearrange)" %} ins_encode %{ int vector_len = 2; __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadshuffle4i(vecX dst, vecS src, vecX tmp, vecX tmp2, rRegI scratch) %{ predicate(UseSSE > 3 && n->as_Vector()->length() == 4 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorLoadShuffle src)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); format %{ "pmovsxbd $tmp, $src \n\t" "movdqu $tmp2, 0x0000000400000004 \n\t" "pmulld $tmp2, $tmp \n\t" "movdqu $tmp,$tmp2\n\t" "pslld $tmp2,0x8\n\t" "paddb $tmp2,$tmp\n\t" "pslld $tmp2,0x8\n\t" "paddb $tmp2,$tmp\n\t" "pslld $tmp2,0x8\n\t" "paddb $tmp2,$tmp\n\t" "movdqu $tmp, 0x0302010003020100 \n\t" "paddb $tmp2,$tmp\n\t" "movdqu $dst, $tmp2\t! load shuffle (load 4B for 4IRearrange)" %} ins_encode %{ __ pmovsxbd($tmp$$XMMRegister, $src$$XMMRegister); __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_int_sizemask()), $scratch$$Register); __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdqu($tmp$$XMMRegister, $tmp2$$XMMRegister); __ pslld($tmp2$$XMMRegister, 0x8); __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); __ pslld($tmp2$$XMMRegister, 0x8); __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); __ pslld($tmp2$$XMMRegister, 0x8); __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); __ movdqu($dst$$XMMRegister, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadshuffle8i(vecY dst, vecD src) %{ predicate(UseAVX >= 1 && n->as_Vector()->length() == 8 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorLoadShuffle src)); format %{ "vpmovsxbd $dst, $src\t! load shuffle (load 8B for 8IRearrange)" %} ins_encode %{ int vector_len = 1; __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadshuffle16i(vecZ dst, vecX src) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorLoadShuffle src)); format %{ "vpmovsxbd $dst, $src\t! load shuffle (load 16B for 16IRearrange)" %} ins_encode %{ int vector_len = 2; __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct loadshuffle4l(vecY dst, vecS src, vecY tmp, vecY tmp2, rRegI scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (VectorLoadShuffle src)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); format %{ "vpmovsxbd $tmp2, $src \n\t" "movdqu $tmp, 0x0000000200000002 \n\t" "pmulld $tmp, $tmp2 \n\t" "vpmovsxdq $tmp2,$tmp\n\t" "vpsllq $tmp2,0x20\n\t" "vpaddd $tmp2,$tmp\n\t" "vmovdqu $tmp, 0x0000000100000000 \n\t" "vpaddd $tmp2,$tmp\n\t" "vmovdqu $dst, $tmp2\t! load shuffle (load 4L for 4LRearrange)" %} ins_encode %{ int vector_len = 1; __ vpmovsxbd($tmp2$$XMMRegister, $src$$XMMRegister, 0); __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sizemask()), $scratch$$Register); __ pmulld($tmp$$XMMRegister, $tmp2$$XMMRegister); __ vpmovsxdq($tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsllq($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x20, vector_len); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_shufflemask()), $scratch$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vmovdqu($dst$$XMMRegister, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadshuffle8l(vecZ dst, vecD src, vecZ tmp, vecZ tmp2, rRegI scratch) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (VectorLoadShuffle src)); effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); format %{ "vpmovsxbd $tmp2, $src \n\t" "movdqu $tmp, 0x0000000200000002 \n\t" "pmulld $tmp, $tmp2\n\t" "vpmovsxdq $tmp2,$tmp\n\t" "vpsllq $tmp2,0x20\n\t" "vpaddd $tmp2,$tmp\n\t" "vmovdqu $tmp, 0x0000000100000000 \n\t" "vpaddd $tmp2,$tmp\n\t" "vmovdqu $dst, $tmp2\t! load shuffle (load 8L for 8LRearrange)" %} ins_encode %{ int vector_len = 2; __ vpmovsxbd($tmp2$$XMMRegister, $src$$XMMRegister, 1); __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sizemask()), $scratch$$Register); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 1); __ vpmovsxdq($tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ vpsllq($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x20, vector_len); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ evmovdqul($tmp$$XMMRegister, k0, ExternalAddress(vector_long_shufflemask()), false, vector_len, $scratch$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); __ evmovdqul($dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} //-------------------------------- Rearrange ------------------------------------- instruct rearrange8b(vecD dst, vecD shuffle) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorRearrange dst shuffle)); effect(TEMP dst); format %{ "pshufb $dst, $shuffle\t! rerrrange (8BRearrange)" %} ins_encode %{ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rearrange16b(vecX dst, vecX shuffle) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorRearrange dst shuffle)); effect(TEMP dst); format %{ "pshufb $dst, $shuffle\t! rearrange (16BRearrange)" %} ins_encode %{ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rearrange32b(vecY dst, vecY src, vecY shuffle) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vbmi() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorRearrange src shuffle)); effect(TEMP dst); format %{ "vpermb $dst, $shuffle\t! rearrange (32BRearrange)" %} ins_encode %{ int vector_len = 1; __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct rearrange64b(vecZ dst, vecZ src, vecZ shuffle) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vbmi() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorRearrange src shuffle)); effect(TEMP dst); format %{ "vpermb $dst, $shuffle\t! rearrange (64BRearrange)" %} ins_encode %{ int vector_len = 2; __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct rearrange4s(vecD dst, vecD shuffle) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorRearrange dst shuffle)); effect(TEMP dst); format %{ "pshufb $dst, $shuffle\t! rerrrange (4SRearrange)" %} ins_encode %{ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rearrange8s(vecX dst, vecX shuffle) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorRearrange dst shuffle)); effect(TEMP dst); format %{ "pshufb $dst, $shuffle\t! rearrange (8SRearrange)" %} ins_encode %{ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rearrange16s(vecY dst, vecY src, vecY shuffle) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vlbw() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorRearrange src shuffle)); effect(TEMP dst); format %{ "vpermw $dst, $shuffle\t! rearrange (16SRearrange)" %} ins_encode %{ int vector_len = 1; __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct rearrange32s(vecZ dst, vecZ src, vecZ shuffle) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512vlbw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorRearrange src shuffle)); effect(TEMP dst); format %{ "vpermw $dst, $shuffle\t! rearrange (32SRearrange)" %} ins_encode %{ int vector_len = 2; __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct rearrange4i(vecX dst, vecX shuffle) %{ predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorRearrange dst shuffle)); effect(TEMP dst); format %{ "pshufb $dst, $shuffle\t! rearrange (4IRearrange)" %} ins_encode %{ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct rearrange8i(vecY dst, vecY src, vecY shuffle) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorRearrange src shuffle)); effect(TEMP dst); format %{ "vpermd $dst, $src, $shuffle\t! rearrange (8IRearrange)" %} ins_encode %{ int vector_len = 1; __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct rearrange16i(vecZ dst, vecZ src, vecZ shuffle) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorRearrange src shuffle)); effect(TEMP dst); format %{ "vpermd $dst, $src, $shuffle\t! rearrange (16IRearrange)" %} ins_encode %{ int vector_len = 2; __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct rearrange4l(vecY dst, vecY src, vecY shuffle) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (VectorRearrange src shuffle)); effect(TEMP dst); format %{ "vpermd $dst, $src, $shuffle\t! rearrange (4LRearrange)" %} ins_encode %{ int vector_len = 1; __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct rearrange8l(vecZ dst, vecZ src, vecZ shuffle) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (VectorRearrange src shuffle)); effect(TEMP dst); format %{ "vpermd $dst, $src, $shuffle\t! rearrange (8LRearrange)" %} ins_encode %{ int vector_len = 2; __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- FMA -------------------------------------- // a * b + c instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ predicate(UseFMA && n->as_Vector()->length() == 2); match(Set c (FmaVD c (Binary a b))); format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} ins_cost(150); ins_encode %{ int vector_len = 0; __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma2D_mem(vecX a, memory b, vecX c) %{ predicate(UseFMA && n->as_Vector()->length() == 2); match(Set c (FmaVD c (Binary a (LoadVector b)))); format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} ins_cost(150); ins_encode %{ int vector_len = 0; __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ predicate(UseFMA && n->as_Vector()->length() == 4); match(Set c (FmaVD c (Binary a b))); format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma4D_mem(vecY a, memory b, vecY c) %{ predicate(UseFMA && n->as_Vector()->length() == 4); match(Set c (FmaVD c (Binary a (LoadVector b)))); format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ predicate(UseFMA && n->as_Vector()->length() == 8); match(Set c (FmaVD c (Binary a b))); format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ predicate(UseFMA && n->as_Vector()->length() == 8); match(Set c (FmaVD c (Binary a (LoadVector b)))); format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma2F_reg(vecD a, vecD b, vecD c) %{ predicate(UseFMA && n->as_Vector()->length() == 2); match(Set c (FmaVF c (Binary a b))); format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed2F" %} ins_cost(150); ins_encode %{ int vector_len = 0; __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma2F_mem(vecD a, memory b, vecD c) %{ predicate(UseFMA && n->as_Vector()->length() == 2); match(Set c (FmaVF c (Binary a (LoadVector b)))); format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed2F" %} ins_cost(150); ins_encode %{ int vector_len = 0; __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ predicate(UseFMA && n->as_Vector()->length() == 4); match(Set c (FmaVF c (Binary a b))); format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} ins_cost(150); ins_encode %{ int vector_len = 0; __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma4F_mem(vecX a, memory b, vecX c) %{ predicate(UseFMA && n->as_Vector()->length() == 4); match(Set c (FmaVF c (Binary a (LoadVector b)))); format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} ins_cost(150); ins_encode %{ int vector_len = 0; __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ predicate(UseFMA && n->as_Vector()->length() == 8); match(Set c (FmaVF c (Binary a b))); format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma8F_mem(vecY a, memory b, vecY c) %{ predicate(UseFMA && n->as_Vector()->length() == 8); match(Set c (FmaVF c (Binary a (LoadVector b)))); format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} ins_cost(150); ins_encode %{ int vector_len = 1; __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ predicate(UseFMA && n->as_Vector()->length() == 16); match(Set c (FmaVF c (Binary a b))); format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // a * b + c instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ predicate(UseFMA && n->as_Vector()->length() == 16); match(Set c (FmaVF c (Binary a (LoadVector b)))); format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} ins_cost(150); ins_encode %{ int vector_len = 2; __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- Vector Multiply Add -------------------------------------- instruct smuladd4S2I_reg(vecD dst, vecD src1) %{ predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 2); match(Set dst (MulAddVS2VI dst src1)); format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed4Sto2I" %} ins_encode %{ __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmuladd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 2); match(Set dst (MulAddVS2VI src1 src2)); format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed4Sto2I" %} ins_encode %{ int vector_len = 0; __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct smuladd8S4I_reg(vecX dst, vecX src1) %{ predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4); match(Set dst (MulAddVS2VI dst src1)); format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed8Sto4I" %} ins_encode %{ __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vmuladd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 0 && n->as_Vector()->length() == 4); match(Set dst (MulAddVS2VI src1 src2)); format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed8Sto4I" %} ins_encode %{ int vector_len = 0; __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmuladd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (MulAddVS2VI src1 src2)); format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed16Sto8I" %} ins_encode %{ int vector_len = 1; __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vmuladd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (MulAddVS2VI src1 src2)); format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed32Sto16I" %} ins_encode %{ int vector_len = 2; __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} // --------------------------------- Vector Multiply Add Add ---------------------------------- instruct vmuladdadd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 2); match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed4Sto2I" %} ins_encode %{ int vector_len = 0; __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); ins_cost(10); %} instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 4); match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed8Sto4I" %} ins_encode %{ int vector_len = 0; __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); ins_cost(10); %} instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 8); match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed16Sto8I" %} ins_encode %{ int vector_len = 1; __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); ins_cost(10); %} instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed32Sto16I" %} ins_encode %{ int vector_len = 2; __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); ins_cost(10); %} // --------------------------------- PopCount -------------------------------------- instruct vpopcount2I(vecD dst, vecD src) %{ predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); match(Set dst (PopCountVI src)); format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} ins_encode %{ int vector_len = 0; __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpopcount4I(vecX dst, vecX src) %{ predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); match(Set dst (PopCountVI src)); format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} ins_encode %{ int vector_len = 0; __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpopcount8I(vecY dst, vecY src) %{ predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); match(Set dst (PopCountVI src)); format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} ins_encode %{ int vector_len = 1; __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vpopcount16I(vecZ dst, vecZ src) %{ predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); match(Set dst (PopCountVI src)); format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} ins_encode %{ int vector_len = 2; __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %}