2877 ins_cost(145); 2878 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2879 ins_encode %{ 2880 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2881 %} 2882 ins_pipe( pipe_slow ); 2883 %} 2884 2885 instruct storeV64(memory mem, vecZ src) %{ 2886 predicate(n->as_StoreVector()->memory_size() == 64); 2887 match(Set mem (StoreVector mem src)); 2888 ins_cost(145); 2889 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 2890 ins_encode %{ 2891 int vector_len = 2; 2892 __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); 2893 %} 2894 ins_pipe( pipe_slow ); 2895 %} 2896 2897 // Replicate byte scalar to be vector 2898 instruct Repl4B(vecS dst, rRegI src) %{ 2899 predicate(n->as_Vector()->length() == 4); 2900 match(Set dst (ReplicateB src)); 2901 format %{ "movd $dst,$src\n\t" 2902 "punpcklbw $dst,$dst\n\t" 2903 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2904 ins_encode %{ 2905 __ movdl($dst$$XMMRegister, $src$$Register); 2906 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2907 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2908 %} 2909 ins_pipe( pipe_slow ); 2910 %} 2911 2912 instruct Repl8B(vecD dst, rRegI src) %{ 2913 predicate(n->as_Vector()->length() == 8); 2914 match(Set dst (ReplicateB src)); 2915 format %{ "movd $dst,$src\n\t" 2916 "punpcklbw $dst,$dst\n\t" 2917 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2918 ins_encode %{ 2919 __ movdl($dst$$XMMRegister, $src$$Register); 2920 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2921 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2922 %} 2923 ins_pipe( pipe_slow ); 2924 %} 2925 2926 instruct Repl16B(vecX dst, rRegI src) %{ 2927 predicate(n->as_Vector()->length() == 16); 2928 match(Set dst (ReplicateB src)); 2929 format %{ "movd $dst,$src\n\t" 2930 "punpcklbw $dst,$dst\n\t" 2931 "pshuflw $dst,$dst,0x00\n\t" 2932 "punpcklqdq $dst,$dst\t! replicate16B" %} 2933 ins_encode %{ 2934 __ movdl($dst$$XMMRegister, $src$$Register); 2935 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2936 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2937 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2938 %} 2939 ins_pipe( pipe_slow ); 2940 %} 2941 2942 instruct Repl32B(vecY dst, rRegI src) %{ 2943 predicate(n->as_Vector()->length() == 32); 2944 match(Set dst (ReplicateB src)); 2945 format %{ "movd $dst,$src\n\t" 2946 "punpcklbw $dst,$dst\n\t" 2947 "pshuflw $dst,$dst,0x00\n\t" 2948 "punpcklqdq $dst,$dst\n\t" 2949 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2950 ins_encode %{ 2951 __ movdl($dst$$XMMRegister, $src$$Register); 2952 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2953 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2954 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2955 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2956 %} 2957 ins_pipe( pipe_slow ); 2958 %} 2959 2960 instruct Repl64B(vecZ dst, rRegI src) %{ 2961 predicate(n->as_Vector()->length() == 64); 2962 match(Set dst (ReplicateB src)); 2963 format %{ "movd $dst,$src\n\t" 2964 "punpcklbw $dst,$dst\n\t" 2965 "pshuflw $dst,$dst,0x00\n\t" 2966 "punpcklqdq $dst,$dst\n\t" 2967 "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t" 2968 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %} 2969 ins_encode %{ 2970 __ movdl($dst$$XMMRegister, $src$$Register); 2971 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2972 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2973 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2974 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2975 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2976 %} 2977 ins_pipe( pipe_slow ); 2978 %} 2979 2980 // Replicate byte scalar immediate to be vector by loading from const table. 2981 instruct Repl4B_imm(vecS dst, immI con) %{ 2982 predicate(n->as_Vector()->length() == 4); 2983 match(Set dst (ReplicateB con)); 2984 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 2985 ins_encode %{ 2986 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 2987 %} 2988 ins_pipe( pipe_slow ); 2989 %} 2990 2991 instruct Repl8B_imm(vecD dst, immI con) %{ 2992 predicate(n->as_Vector()->length() == 8); 2993 match(Set dst (ReplicateB con)); 2994 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 2995 ins_encode %{ 2996 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2997 %} 2998 ins_pipe( pipe_slow ); 2999 %} 3000 3001 instruct Repl16B_imm(vecX dst, immI con) %{ 3002 predicate(n->as_Vector()->length() == 16); 3003 match(Set dst (ReplicateB con)); 3004 format %{ "movq $dst,[$constantaddress]\n\t" 3005 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3006 ins_encode %{ 3007 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3008 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3009 %} 3010 ins_pipe( pipe_slow ); 3011 %} 3012 3013 instruct Repl32B_imm(vecY dst, immI con) %{ 3014 predicate(n->as_Vector()->length() == 32); 3015 match(Set dst (ReplicateB con)); 3016 format %{ "movq $dst,[$constantaddress]\n\t" 3017 "punpcklqdq $dst,$dst\n\t" 3018 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3019 ins_encode %{ 3020 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3021 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3022 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3023 %} 3024 ins_pipe( pipe_slow ); 3025 %} 3026 3027 instruct Repl64B_imm(vecZ dst, immI con) %{ 3028 predicate(n->as_Vector()->length() == 64); 3029 match(Set dst (ReplicateB con)); 3030 format %{ "movq $dst,[$constantaddress]\n\t" 3031 "punpcklqdq $dst,$dst\n\t" 3032 "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t" 3033 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %} 3034 ins_encode %{ 3035 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3036 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3037 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3038 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3039 %} 3040 ins_pipe( pipe_slow ); 3041 %} 3042 3043 // Replicate byte scalar zero to be vector 3044 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3045 predicate(n->as_Vector()->length() == 4); 3046 match(Set dst (ReplicateB zero)); 3047 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3048 ins_encode %{ 3049 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3050 %} 3051 ins_pipe( fpu_reg_reg ); 3052 %} 3053 3054 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3055 predicate(n->as_Vector()->length() == 8); 3056 match(Set dst (ReplicateB zero)); 3057 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3058 ins_encode %{ 3059 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3060 %} 3061 ins_pipe( fpu_reg_reg ); 3062 %} 3063 3064 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3065 predicate(n->as_Vector()->length() == 16); 3066 match(Set dst (ReplicateB zero)); 3067 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3068 ins_encode %{ 3069 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3070 %} 3071 ins_pipe( fpu_reg_reg ); 3072 %} 3073 3074 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3075 predicate(n->as_Vector()->length() == 32); 3076 match(Set dst (ReplicateB zero)); 3077 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3078 ins_encode %{ 3079 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3080 int vector_len = 1; 3081 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3082 %} 3083 ins_pipe( fpu_reg_reg ); 3084 %} 3085 3086 instruct Repl64B_zero(vecZ dst, immI0 zero) %{ 3087 predicate(n->as_Vector()->length() == 64); 3088 match(Set dst (ReplicateB zero)); 3089 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3090 ins_encode %{ 3091 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3092 int vector_len = 2; 3093 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3094 %} 3095 ins_pipe( fpu_reg_reg ); 3096 %} 3097 3098 // Replicate char/short (2 byte) scalar to be vector 3099 instruct Repl2S(vecS dst, rRegI src) %{ 3100 predicate(n->as_Vector()->length() == 2); 3101 match(Set dst (ReplicateS src)); 3102 format %{ "movd $dst,$src\n\t" 3103 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3104 ins_encode %{ 3105 __ movdl($dst$$XMMRegister, $src$$Register); 3106 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3107 %} 3108 ins_pipe( fpu_reg_reg ); 3109 %} 3110 3111 instruct Repl4S(vecD dst, rRegI src) %{ 3112 predicate(n->as_Vector()->length() == 4); 3113 match(Set dst (ReplicateS src)); 3114 format %{ "movd $dst,$src\n\t" 3115 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3116 ins_encode %{ 3117 __ movdl($dst$$XMMRegister, $src$$Register); 3118 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3119 %} 3120 ins_pipe( fpu_reg_reg ); 3121 %} 3122 3123 instruct Repl8S(vecX dst, rRegI src) %{ 3124 predicate(n->as_Vector()->length() == 8); 3125 match(Set dst (ReplicateS src)); 3126 format %{ "movd $dst,$src\n\t" 3127 "pshuflw $dst,$dst,0x00\n\t" 3128 "punpcklqdq $dst,$dst\t! replicate8S" %} 3129 ins_encode %{ 3130 __ movdl($dst$$XMMRegister, $src$$Register); 3131 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3132 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3133 %} 3134 ins_pipe( pipe_slow ); 3135 %} 3136 3137 instruct Repl16S(vecY dst, rRegI src) %{ 3138 predicate(n->as_Vector()->length() == 16); 3139 match(Set dst (ReplicateS src)); 3140 format %{ "movd $dst,$src\n\t" 3141 "pshuflw $dst,$dst,0x00\n\t" 3142 "punpcklqdq $dst,$dst\n\t" 3143 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3144 ins_encode %{ 3145 __ movdl($dst$$XMMRegister, $src$$Register); 3146 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3147 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3148 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3149 %} 3150 ins_pipe( pipe_slow ); 3151 %} 3152 3153 instruct Repl32S(vecZ dst, rRegI src) %{ 3154 predicate(n->as_Vector()->length() == 32); 3155 match(Set dst (ReplicateS src)); 3156 format %{ "movd $dst,$src\n\t" 3157 "pshuflw $dst,$dst,0x00\n\t" 3158 "punpcklqdq $dst,$dst\n\t" 3159 "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t" 3160 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %} 3161 ins_encode %{ 3162 __ movdl($dst$$XMMRegister, $src$$Register); 3163 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3164 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3165 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3166 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3167 %} 3168 ins_pipe( pipe_slow ); 3169 %} 3170 3171 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3172 instruct Repl2S_imm(vecS dst, immI con) %{ 3173 predicate(n->as_Vector()->length() == 2); 3174 match(Set dst (ReplicateS con)); 3175 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3176 ins_encode %{ 3177 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3178 %} 3179 ins_pipe( fpu_reg_reg ); 3180 %} 3181 3182 instruct Repl4S_imm(vecD dst, immI con) %{ 3183 predicate(n->as_Vector()->length() == 4); 3184 match(Set dst (ReplicateS con)); 3185 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3186 ins_encode %{ 3187 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3188 %} 3189 ins_pipe( fpu_reg_reg ); 3190 %} 3191 3192 instruct Repl8S_imm(vecX dst, immI con) %{ 3193 predicate(n->as_Vector()->length() == 8); 3194 match(Set dst (ReplicateS con)); 3195 format %{ "movq $dst,[$constantaddress]\n\t" 3196 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3197 ins_encode %{ 3198 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3199 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3200 %} 3201 ins_pipe( pipe_slow ); 3202 %} 3203 3204 instruct Repl16S_imm(vecY dst, immI con) %{ 3205 predicate(n->as_Vector()->length() == 16); 3206 match(Set dst (ReplicateS con)); 3207 format %{ "movq $dst,[$constantaddress]\n\t" 3208 "punpcklqdq $dst,$dst\n\t" 3209 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3210 ins_encode %{ 3211 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3212 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3213 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3214 %} 3215 ins_pipe( pipe_slow ); 3216 %} 3217 3218 instruct Repl32S_imm(vecZ dst, immI con) %{ 3219 predicate(n->as_Vector()->length() == 32); 3220 match(Set dst (ReplicateS con)); 3221 format %{ "movq $dst,[$constantaddress]\n\t" 3222 "punpcklqdq $dst,$dst\n\t" 3223 "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t" 3224 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %} 3225 ins_encode %{ 3226 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3227 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3228 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3229 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3230 %} 3231 ins_pipe( pipe_slow ); 3232 %} 3233 3234 // Replicate char/short (2 byte) scalar zero to be vector 3235 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3236 predicate(n->as_Vector()->length() == 2); 3237 match(Set dst (ReplicateS zero)); 3238 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3239 ins_encode %{ 3240 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3241 %} 3242 ins_pipe( fpu_reg_reg ); 3243 %} 3244 3245 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3246 predicate(n->as_Vector()->length() == 4); 3247 match(Set dst (ReplicateS zero)); 3248 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3249 ins_encode %{ 3250 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3251 %} 3252 ins_pipe( fpu_reg_reg ); 3253 %} 3254 3255 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3256 predicate(n->as_Vector()->length() == 8); 3257 match(Set dst (ReplicateS zero)); 3258 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3259 ins_encode %{ 3260 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3261 %} 3262 ins_pipe( fpu_reg_reg ); 3263 %} 3264 3265 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3266 predicate(n->as_Vector()->length() == 16); 3267 match(Set dst (ReplicateS zero)); 3268 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3269 ins_encode %{ 3270 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3271 int vector_len = 1; 3272 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3273 %} 3274 ins_pipe( fpu_reg_reg ); 3275 %} 3276 3277 instruct Repl32S_zero(vecZ dst, immI0 zero) %{ 3278 predicate(n->as_Vector()->length() == 32); 3279 match(Set dst (ReplicateS zero)); 3280 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3281 ins_encode %{ 3282 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3283 int vector_len = 2; 3284 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3285 %} 3286 ins_pipe( fpu_reg_reg ); 3287 %} 3288 3289 // Replicate integer (4 byte) scalar to be vector 3290 instruct Repl2I(vecD dst, rRegI src) %{ 3291 predicate(n->as_Vector()->length() == 2); 3292 match(Set dst (ReplicateI src)); 3293 format %{ "movd $dst,$src\n\t" 3294 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3295 ins_encode %{ 3296 __ movdl($dst$$XMMRegister, $src$$Register); 3297 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3298 %} 3299 ins_pipe( fpu_reg_reg ); 3300 %} 3301 3302 instruct Repl4I(vecX dst, rRegI src) %{ 3303 predicate(n->as_Vector()->length() == 4); 3304 match(Set dst (ReplicateI src)); 3305 format %{ "movd $dst,$src\n\t" 3306 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3307 ins_encode %{ 3308 __ movdl($dst$$XMMRegister, $src$$Register); 3309 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3310 %} 3311 ins_pipe( pipe_slow ); 3312 %} 3313 3314 instruct Repl8I(vecY dst, rRegI src) %{ 3315 predicate(n->as_Vector()->length() == 8); 3316 match(Set dst (ReplicateI src)); 3317 format %{ "movd $dst,$src\n\t" 3318 "pshufd $dst,$dst,0x00\n\t" 3319 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3320 ins_encode %{ 3321 __ movdl($dst$$XMMRegister, $src$$Register); 3322 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3323 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3324 %} 3325 ins_pipe( pipe_slow ); 3326 %} 3327 3328 instruct Repl16I(vecZ dst, rRegI src) %{ 3329 predicate(n->as_Vector()->length() == 16); 3330 match(Set dst (ReplicateI src)); 3331 format %{ "movd $dst,$src\n\t" 3332 "pshufd $dst,$dst,0x00\n\t" 3333 "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" 3334 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} 3335 ins_encode %{ 3336 __ movdl($dst$$XMMRegister, $src$$Register); 3337 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3338 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3339 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3340 %} 3341 ins_pipe( pipe_slow ); 3342 %} 3343 3344 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3345 instruct Repl2I_imm(vecD dst, immI con) %{ 3346 predicate(n->as_Vector()->length() == 2); 3347 match(Set dst (ReplicateI con)); 3348 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3349 ins_encode %{ 3350 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3351 %} 3352 ins_pipe( fpu_reg_reg ); 3353 %} 3354 3355 instruct Repl4I_imm(vecX dst, immI con) %{ 3356 predicate(n->as_Vector()->length() == 4); 3357 match(Set dst (ReplicateI con)); 3358 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3359 "punpcklqdq $dst,$dst" %} 3360 ins_encode %{ 3361 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3362 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3363 %} 3364 ins_pipe( pipe_slow ); 3365 %} 3366 3367 instruct Repl8I_imm(vecY dst, immI con) %{ 3368 predicate(n->as_Vector()->length() == 8); 3369 match(Set dst (ReplicateI con)); 3370 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3371 "punpcklqdq $dst,$dst\n\t" 3372 "vinserti128h $dst,$dst,$dst" %} 3373 ins_encode %{ 3374 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3375 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3376 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3377 %} 3378 ins_pipe( pipe_slow ); 3379 %} 3380 3381 instruct Repl16I_imm(vecZ dst, immI con) %{ 3382 predicate(n->as_Vector()->length() == 16); 3383 match(Set dst (ReplicateI con)); 3384 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 3385 "punpcklqdq $dst,$dst\n\t" 3386 "vinserti128h $dst,$dst,$dst\n\t" 3387 "vinserti64x4h $dst k0,$dst,$dst" %} 3388 ins_encode %{ 3389 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3390 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3391 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3392 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3393 %} 3394 ins_pipe( pipe_slow ); 3395 %} 3396 3397 // Integer could be loaded into xmm register directly from memory. 3398 instruct Repl2I_mem(vecD dst, memory mem) %{ 3399 predicate(n->as_Vector()->length() == 2); 3400 match(Set dst (ReplicateI (LoadI mem))); 3401 format %{ "movd $dst,$mem\n\t" 3402 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3403 ins_encode %{ 3404 __ movdl($dst$$XMMRegister, $mem$$Address); 3405 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3406 %} 3407 ins_pipe( fpu_reg_reg ); 3408 %} 3409 3410 instruct Repl4I_mem(vecX dst, memory mem) %{ 3411 predicate(n->as_Vector()->length() == 4); 3412 match(Set dst (ReplicateI (LoadI mem))); 3413 format %{ "movd $dst,$mem\n\t" 3414 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3415 ins_encode %{ 3416 __ movdl($dst$$XMMRegister, $mem$$Address); 3417 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3418 %} 3419 ins_pipe( pipe_slow ); 3420 %} 3421 3422 instruct Repl8I_mem(vecY dst, memory mem) %{ 3423 predicate(n->as_Vector()->length() == 8); 3424 match(Set dst (ReplicateI (LoadI mem))); 3425 format %{ "movd $dst,$mem\n\t" 3426 "pshufd $dst,$dst,0x00\n\t" 3427 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3428 ins_encode %{ 3429 __ movdl($dst$$XMMRegister, $mem$$Address); 3430 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3431 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3432 %} 3433 ins_pipe( pipe_slow ); 3434 %} 3435 3436 instruct Repl16I_mem(vecZ dst, memory mem) %{ 3437 predicate(n->as_Vector()->length() == 16); 3438 match(Set dst (ReplicateI (LoadI mem))); 3439 format %{ "movd $dst,$mem\n\t" 3440 "pshufd $dst,$dst,0x00\n\t" 3441 "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" 3442 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} 3443 ins_encode %{ 3444 __ movdl($dst$$XMMRegister, $mem$$Address); 3445 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3446 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3447 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3448 %} 3449 ins_pipe( pipe_slow ); 3450 %} 3451 3452 // Replicate integer (4 byte) scalar zero to be vector 3453 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3454 predicate(n->as_Vector()->length() == 2); 3455 match(Set dst (ReplicateI zero)); 3456 format %{ "pxor $dst,$dst\t! replicate2I" %} 3457 ins_encode %{ 3458 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3459 %} 3460 ins_pipe( fpu_reg_reg ); 3461 %} 3462 3463 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3464 predicate(n->as_Vector()->length() == 4); 3465 match(Set dst (ReplicateI zero)); 3466 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3467 ins_encode %{ 3468 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3469 %} 3470 ins_pipe( fpu_reg_reg ); 3471 %} 3472 3473 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3474 predicate(n->as_Vector()->length() == 8); 3475 match(Set dst (ReplicateI zero)); 3476 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3477 ins_encode %{ 3478 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3479 int vector_len = 1; 3480 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3481 %} 3482 ins_pipe( fpu_reg_reg ); 3483 %} 3484 3485 instruct Repl16I_zero(vecZ dst, immI0 zero) %{ 3486 predicate(n->as_Vector()->length() == 16); 3487 match(Set dst (ReplicateI zero)); 3488 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 3489 ins_encode %{ 3490 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 3491 int vector_len = 2; 3492 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3493 %} 3494 ins_pipe( fpu_reg_reg ); 3495 %} 3496 3497 // Replicate long (8 byte) scalar to be vector 3498 #ifdef _LP64 3499 instruct Repl2L(vecX dst, rRegL src) %{ 3500 predicate(n->as_Vector()->length() == 2); 3501 match(Set dst (ReplicateL src)); 3502 format %{ "movdq $dst,$src\n\t" 3503 "punpcklqdq $dst,$dst\t! replicate2L" %} 3504 ins_encode %{ 3505 __ movdq($dst$$XMMRegister, $src$$Register); 3506 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3507 %} 3508 ins_pipe( pipe_slow ); 3509 %} 3510 3511 instruct Repl4L(vecY dst, rRegL src) %{ 3512 predicate(n->as_Vector()->length() == 4); 3513 match(Set dst (ReplicateL src)); 3514 format %{ "movdq $dst,$src\n\t" 3515 "punpcklqdq $dst,$dst\n\t" 3516 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3517 ins_encode %{ 3518 __ movdq($dst$$XMMRegister, $src$$Register); 3519 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3520 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3521 %} 3522 ins_pipe( pipe_slow ); 3523 %} 3524 3525 instruct Repl8L(vecZ dst, rRegL src) %{ 3526 predicate(n->as_Vector()->length() == 8); 3527 match(Set dst (ReplicateL src)); 3528 format %{ "movdq $dst,$src\n\t" 3529 "punpcklqdq $dst,$dst\n\t" 3530 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3531 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3532 ins_encode %{ 3533 __ movdq($dst$$XMMRegister, $src$$Register); 3534 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3535 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3536 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3537 %} 3538 ins_pipe( pipe_slow ); 3539 %} 3540 #else // _LP64 3541 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3542 predicate(n->as_Vector()->length() == 2); 3543 match(Set dst (ReplicateL src)); 3544 effect(TEMP dst, USE src, TEMP tmp); 3545 format %{ "movdl $dst,$src.lo\n\t" 3546 "movdl $tmp,$src.hi\n\t" 3547 "punpckldq $dst,$tmp\n\t" 3548 "punpcklqdq $dst,$dst\t! replicate2L"%} 3549 ins_encode %{ 3550 __ movdl($dst$$XMMRegister, $src$$Register); 3551 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3552 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3553 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3554 %} 3555 ins_pipe( pipe_slow ); 3556 %} 3557 3558 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3559 predicate(n->as_Vector()->length() == 4); 3560 match(Set dst (ReplicateL src)); 3561 effect(TEMP dst, USE src, TEMP tmp); 3562 format %{ "movdl $dst,$src.lo\n\t" 3563 "movdl $tmp,$src.hi\n\t" 3564 "punpckldq $dst,$tmp\n\t" 3565 "punpcklqdq $dst,$dst\n\t" 3566 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3567 ins_encode %{ 3568 __ movdl($dst$$XMMRegister, $src$$Register); 3569 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3570 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3571 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3572 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3573 %} 3574 ins_pipe( pipe_slow ); 3575 %} 3576 3577 instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{ 3578 predicate(n->as_Vector()->length() == 4); 3579 match(Set dst (ReplicateL src)); 3580 effect(TEMP dst, USE src, TEMP tmp); 3581 format %{ "movdl $dst,$src.lo\n\t" 3582 "movdl $tmp,$src.hi\n\t" 3583 "punpckldq $dst,$tmp\n\t" 3584 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3585 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3586 ins_encode %{ 3587 __ movdl($dst$$XMMRegister, $src$$Register); 3588 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3589 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3590 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3591 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3592 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3593 %} 3594 ins_pipe( pipe_slow ); 3595 %} 3596 #endif // _LP64 3597 3598 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3599 instruct Repl2L_imm(vecX dst, immL con) %{ 3600 predicate(n->as_Vector()->length() == 2); 3601 match(Set dst (ReplicateL con)); 3602 format %{ "movq $dst,[$constantaddress]\n\t" 3603 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3604 ins_encode %{ 3605 __ movq($dst$$XMMRegister, $constantaddress($con)); 3606 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3607 %} 3608 ins_pipe( pipe_slow ); 3609 %} 3610 3611 instruct Repl4L_imm(vecY dst, immL con) %{ 3612 predicate(n->as_Vector()->length() == 4); 3613 match(Set dst (ReplicateL con)); 3614 format %{ "movq $dst,[$constantaddress]\n\t" 3615 "punpcklqdq $dst,$dst\n\t" 3616 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3617 ins_encode %{ 3618 __ movq($dst$$XMMRegister, $constantaddress($con)); 3619 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3620 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3621 %} 3622 ins_pipe( pipe_slow ); 3623 %} 3624 3625 instruct Repl8L_imm(vecZ dst, immL con) %{ 3626 predicate(n->as_Vector()->length() == 8); 3627 match(Set dst (ReplicateL con)); 3628 format %{ "movq $dst,[$constantaddress]\n\t" 3629 "punpcklqdq $dst,$dst\n\t" 3630 "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t" 3631 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %} 3632 ins_encode %{ 3633 __ movq($dst$$XMMRegister, $constantaddress($con)); 3634 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3635 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3636 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3637 %} 3638 ins_pipe( pipe_slow ); 3639 %} 3640 3641 // Long could be loaded into xmm register directly from memory. 3642 instruct Repl2L_mem(vecX dst, memory mem) %{ 3643 predicate(n->as_Vector()->length() == 2); 3644 match(Set dst (ReplicateL (LoadL mem))); 3645 format %{ "movq $dst,$mem\n\t" 3646 "punpcklqdq $dst,$dst\t! replicate2L" %} 3647 ins_encode %{ 3648 __ movq($dst$$XMMRegister, $mem$$Address); 3649 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3650 %} 3651 ins_pipe( pipe_slow ); 3652 %} 3653 3654 instruct Repl4L_mem(vecY dst, memory mem) %{ 3655 predicate(n->as_Vector()->length() == 4); 3656 match(Set dst (ReplicateL (LoadL mem))); 3657 format %{ "movq $dst,$mem\n\t" 3658 "punpcklqdq $dst,$dst\n\t" 3659 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3660 ins_encode %{ 3661 __ movq($dst$$XMMRegister, $mem$$Address); 3662 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3663 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3664 %} 3665 ins_pipe( pipe_slow ); 3666 %} 3667 3668 instruct Repl8L_mem(vecZ dst, memory mem) %{ 3669 predicate(n->as_Vector()->length() == 8); 3670 match(Set dst (ReplicateL (LoadL mem))); 3671 format %{ "movq $dst,$mem\n\t" 3672 "punpcklqdq $dst,$dst\n\t" 3673 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3674 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3675 ins_encode %{ 3676 __ movq($dst$$XMMRegister, $mem$$Address); 3677 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3678 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3679 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3680 %} 3681 ins_pipe( pipe_slow ); 3682 %} 3683 3684 // Replicate long (8 byte) scalar zero to be vector 3685 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3686 predicate(n->as_Vector()->length() == 2); 3687 match(Set dst (ReplicateL zero)); 3688 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3689 ins_encode %{ 3690 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3691 %} 3692 ins_pipe( fpu_reg_reg ); 3693 %} 3694 3695 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3696 predicate(n->as_Vector()->length() == 4); 3697 match(Set dst (ReplicateL zero)); 3698 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3699 ins_encode %{ 3700 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3701 int vector_len = 1; 3702 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3703 %} 3704 ins_pipe( fpu_reg_reg ); 3705 %} 3706 3707 instruct Repl8L_zero(vecZ dst, immL0 zero) %{ 3708 predicate(n->as_Vector()->length() == 8); 3709 match(Set dst (ReplicateL zero)); 3710 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 3711 ins_encode %{ 3712 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3713 int vector_len = 2; 3714 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3715 %} 3716 ins_pipe( fpu_reg_reg ); 3717 %} 3718 3719 // Replicate float (4 byte) scalar to be vector 3720 instruct Repl2F(vecD dst, regF src) %{ 3721 predicate(n->as_Vector()->length() == 2); 3722 match(Set dst (ReplicateF src)); 3723 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3724 ins_encode %{ 3725 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3726 %} 3727 ins_pipe( fpu_reg_reg ); 3728 %} 3729 3730 instruct Repl4F(vecX dst, regF src) %{ 3731 predicate(n->as_Vector()->length() == 4); 3732 match(Set dst (ReplicateF src)); 3733 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3734 ins_encode %{ 3735 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3736 %} 3737 ins_pipe( pipe_slow ); 3738 %} 3739 3740 instruct Repl8F(vecY dst, regF src) %{ 3741 predicate(n->as_Vector()->length() == 8); 3742 match(Set dst (ReplicateF src)); 3743 format %{ "pshufd $dst,$src,0x00\n\t" 3744 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3745 ins_encode %{ 3746 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3747 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3748 %} 3749 ins_pipe( pipe_slow ); 3750 %} 3751 3752 instruct Repl16F(vecZ dst, regF src) %{ 3753 predicate(n->as_Vector()->length() == 16); 3754 match(Set dst (ReplicateF src)); 3755 format %{ "pshufd $dst,$src,0x00\n\t" 3756 "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t" 3757 "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %} 3758 ins_encode %{ 3759 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3760 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3761 __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3762 %} 3763 ins_pipe( pipe_slow ); 3764 %} 3765 3766 // Replicate float (4 byte) scalar zero to be vector 3767 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3768 predicate(n->as_Vector()->length() == 2); 3769 match(Set dst (ReplicateF zero)); 3770 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3771 ins_encode %{ 3772 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3773 %} 3774 ins_pipe( fpu_reg_reg ); 3775 %} 3776 3777 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3778 predicate(n->as_Vector()->length() == 4); 3779 match(Set dst (ReplicateF zero)); 3780 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3781 ins_encode %{ 3782 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3783 %} 3784 ins_pipe( fpu_reg_reg ); 3785 %} 3786 3787 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3788 predicate(n->as_Vector()->length() == 8); 3789 match(Set dst (ReplicateF zero)); 3790 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3791 ins_encode %{ 3792 int vector_len = 1; 3793 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3794 %} 3795 ins_pipe( fpu_reg_reg ); 3796 %} 3797 3798 instruct Repl16F_zero(vecZ dst, immF0 zero) %{ 3799 predicate(n->as_Vector()->length() == 16); 3800 match(Set dst (ReplicateF zero)); 3801 format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} 3802 ins_encode %{ 3803 int vector_len = 2; 3804 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3805 %} 3806 ins_pipe( fpu_reg_reg ); 3807 %} 3808 3809 // Replicate double (8 bytes) scalar to be vector 3810 instruct Repl2D(vecX dst, regD src) %{ 3811 predicate(n->as_Vector()->length() == 2); 3812 match(Set dst (ReplicateD src)); 3813 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3814 ins_encode %{ 3815 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3816 %} 3817 ins_pipe( pipe_slow ); 3818 %} 3819 3820 instruct Repl4D(vecY dst, regD src) %{ 3821 predicate(n->as_Vector()->length() == 4); 3822 match(Set dst (ReplicateD src)); 3823 format %{ "pshufd $dst,$src,0x44\n\t" 3824 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3825 ins_encode %{ 3826 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3827 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3828 %} 3829 ins_pipe( pipe_slow ); 3830 %} 3831 3832 instruct Repl8D(vecZ dst, regD src) %{ 3833 predicate(n->as_Vector()->length() == 8); 3834 match(Set dst (ReplicateD src)); 3835 format %{ "pshufd $dst,$src,0x44\n\t" 3836 "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t" 3837 "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %} 3838 ins_encode %{ 3839 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3840 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3841 __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3842 %} 3843 ins_pipe( pipe_slow ); 3844 %} 3845 3846 // Replicate double (8 byte) scalar zero to be vector 3847 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3848 predicate(n->as_Vector()->length() == 2); 3849 match(Set dst (ReplicateD zero)); 3850 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3851 ins_encode %{ 3852 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3853 %} 3854 ins_pipe( fpu_reg_reg ); 3855 %} 3856 3857 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3858 predicate(n->as_Vector()->length() == 4); 3859 match(Set dst (ReplicateD zero)); 3860 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3861 ins_encode %{ 3862 int vector_len = 1; 3863 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3864 %} 3865 ins_pipe( fpu_reg_reg ); 3866 %} 3867 3868 instruct Repl8D_zero(vecZ dst, immD0 zero) %{ 3869 predicate(n->as_Vector()->length() == 8); 3870 match(Set dst (ReplicateD zero)); 3871 format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 3872 ins_encode %{ 3873 int vector_len = 2; 3874 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3875 %} 3876 ins_pipe( fpu_reg_reg ); 3877 %} 3878 3879 // ====================REDUCTION ARITHMETIC======================================= 3880 3881 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 3882 predicate(UseSSE > 2 && UseAVX == 0); 3883 match(Set dst (AddReductionVI src1 src2)); 3884 effect(TEMP tmp2, TEMP tmp); 3885 format %{ "movdqu $tmp2,$src2\n\t" 3886 "phaddd $tmp2,$tmp2\n\t" 3887 "movd $tmp,$src1\n\t" 3888 "paddd $tmp,$tmp2\n\t" 3889 "movd $dst,$tmp\t! add reduction2I" %} 4946 ins_pipe( pipe_slow ); 4947 %} 4948 4949 // ====================VECTOR ARITHMETIC======================================= 4950 4951 // --------------------------------- ADD -------------------------------------- 4952 4953 // Bytes vector add 4954 instruct vadd4B(vecS dst, vecS src) %{ 4955 predicate(n->as_Vector()->length() == 4); 4956 match(Set dst (AddVB dst src)); 4957 format %{ "paddb $dst,$src\t! add packed4B" %} 4958 ins_encode %{ 4959 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4960 %} 4961 ins_pipe( pipe_slow ); 4962 %} 4963 4964 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 4965 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4966 match(Set dst (AddVB src1 src2)); 4967 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 4968 ins_encode %{ 4969 int vector_len = 0; 4970 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4971 %} 4972 ins_pipe( pipe_slow ); 4973 %} 4974 4975 instruct vadd8B(vecD dst, vecD src) %{ 4976 predicate(n->as_Vector()->length() == 8); 4977 match(Set dst (AddVB dst src)); 4978 format %{ "paddb $dst,$src\t! add packed8B" %} 4979 ins_encode %{ 4980 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4981 %} 4982 ins_pipe( pipe_slow ); 4983 %} 4984 4985 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 4986 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4987 match(Set dst (AddVB src1 src2)); 4988 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 4989 ins_encode %{ 4990 int vector_len = 0; 4991 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4992 %} 4993 ins_pipe( pipe_slow ); 4994 %} 4995 4996 instruct vadd16B(vecX dst, vecX src) %{ 4997 predicate(n->as_Vector()->length() == 16); 4998 match(Set dst (AddVB dst src)); 4999 format %{ "paddb $dst,$src\t! add packed16B" %} 5000 ins_encode %{ 5001 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5002 %} 5003 ins_pipe( pipe_slow ); 5004 %} 5005 5006 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5007 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5008 match(Set dst (AddVB src1 src2)); 5009 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5010 ins_encode %{ 5011 int vector_len = 0; 5012 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5013 %} 5014 ins_pipe( pipe_slow ); 5015 %} 5074 predicate(n->as_Vector()->length() == 2); 5075 match(Set dst (AddVS dst src)); 5076 format %{ "paddw $dst,$src\t! add packed2S" %} 5077 ins_encode %{ 5078 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5079 %} 5080 ins_pipe( pipe_slow ); 5081 %} 5082 5083 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5084 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5085 match(Set dst (AddVS src1 src2)); 5086 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5087 ins_encode %{ 5088 int vector_len = 0; 5089 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5090 %} 5091 ins_pipe( pipe_slow ); 5092 %} 5093 5094 instruct vadd4S(vecD dst, vecD src) %{ 5095 predicate(n->as_Vector()->length() == 4); 5096 match(Set dst (AddVS dst src)); 5097 format %{ "paddw $dst,$src\t! add packed4S" %} 5098 ins_encode %{ 5099 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5100 %} 5101 ins_pipe( pipe_slow ); 5102 %} 5103 5104 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5105 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5106 match(Set dst (AddVS src1 src2)); 5107 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5108 ins_encode %{ 5109 int vector_len = 0; 5110 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5111 %} 5112 ins_pipe( pipe_slow ); 5113 %} 5114 5115 instruct vadd8S(vecX dst, vecX src) %{ 5116 predicate(n->as_Vector()->length() == 8); 5117 match(Set dst (AddVS dst src)); 5118 format %{ "paddw $dst,$src\t! add packed8S" %} 5119 ins_encode %{ 5120 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5121 %} 5122 ins_pipe( pipe_slow ); 5123 %} 5124 5125 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5126 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5127 match(Set dst (AddVS src1 src2)); 5128 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5129 ins_encode %{ 5130 int vector_len = 0; 5131 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5132 %} 5133 ins_pipe( pipe_slow ); 5134 %} 5193 predicate(n->as_Vector()->length() == 2); 5194 match(Set dst (AddVI dst src)); 5195 format %{ "paddd $dst,$src\t! add packed2I" %} 5196 ins_encode %{ 5197 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5198 %} 5199 ins_pipe( pipe_slow ); 5200 %} 5201 5202 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5203 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5204 match(Set dst (AddVI src1 src2)); 5205 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5206 ins_encode %{ 5207 int vector_len = 0; 5208 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5209 %} 5210 ins_pipe( pipe_slow ); 5211 %} 5212 5213 instruct vadd4I(vecX dst, vecX src) %{ 5214 predicate(n->as_Vector()->length() == 4); 5215 match(Set dst (AddVI dst src)); 5216 format %{ "paddd $dst,$src\t! add packed4I" %} 5217 ins_encode %{ 5218 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5219 %} 5220 ins_pipe( pipe_slow ); 5221 %} 5222 5223 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5224 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5225 match(Set dst (AddVI src1 src2)); 5226 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5227 ins_encode %{ 5228 int vector_len = 0; 5229 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5230 %} 5231 ins_pipe( pipe_slow ); 5232 %} 5368 predicate(n->as_Vector()->length() == 2); 5369 match(Set dst (AddVF dst src)); 5370 format %{ "addps $dst,$src\t! add packed2F" %} 5371 ins_encode %{ 5372 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5373 %} 5374 ins_pipe( pipe_slow ); 5375 %} 5376 5377 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 5378 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5379 match(Set dst (AddVF src1 src2)); 5380 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 5381 ins_encode %{ 5382 int vector_len = 0; 5383 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5384 %} 5385 ins_pipe( pipe_slow ); 5386 %} 5387 5388 instruct vadd4F(vecX dst, vecX src) %{ 5389 predicate(n->as_Vector()->length() == 4); 5390 match(Set dst (AddVF dst src)); 5391 format %{ "addps $dst,$src\t! add packed4F" %} 5392 ins_encode %{ 5393 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5394 %} 5395 ins_pipe( pipe_slow ); 5396 %} 5397 5398 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 5399 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5400 match(Set dst (AddVF src1 src2)); 5401 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 5402 ins_encode %{ 5403 int vector_len = 0; 5404 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5405 %} 5406 ins_pipe( pipe_slow ); 5407 %} 5545 predicate(n->as_Vector()->length() == 4); 5546 match(Set dst (SubVB dst src)); 5547 format %{ "psubb $dst,$src\t! sub packed4B" %} 5548 ins_encode %{ 5549 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5550 %} 5551 ins_pipe( pipe_slow ); 5552 %} 5553 5554 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 5555 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5556 match(Set dst (SubVB src1 src2)); 5557 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 5558 ins_encode %{ 5559 int vector_len = 0; 5560 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5561 %} 5562 ins_pipe( pipe_slow ); 5563 %} 5564 5565 instruct vsub8B(vecD dst, vecD src) %{ 5566 predicate(n->as_Vector()->length() == 8); 5567 match(Set dst (SubVB dst src)); 5568 format %{ "psubb $dst,$src\t! sub packed8B" %} 5569 ins_encode %{ 5570 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5571 %} 5572 ins_pipe( pipe_slow ); 5573 %} 5574 5575 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 5576 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5577 match(Set dst (SubVB src1 src2)); 5578 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 5579 ins_encode %{ 5580 int vector_len = 0; 5581 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5582 %} 5583 ins_pipe( pipe_slow ); 5584 %} 5585 5586 instruct vsub16B(vecX dst, vecX src) %{ 5587 predicate(n->as_Vector()->length() == 16); 5588 match(Set dst (SubVB dst src)); 5589 format %{ "psubb $dst,$src\t! sub packed16B" %} 5590 ins_encode %{ 5591 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5592 %} 5593 ins_pipe( pipe_slow ); 5594 %} 5595 5596 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 5597 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5598 match(Set dst (SubVB src1 src2)); 5599 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 5600 ins_encode %{ 5601 int vector_len = 0; 5602 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5603 %} 5604 ins_pipe( pipe_slow ); 5605 %} 5664 predicate(n->as_Vector()->length() == 2); 5665 match(Set dst (SubVS dst src)); 5666 format %{ "psubw $dst,$src\t! sub packed2S" %} 5667 ins_encode %{ 5668 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5669 %} 5670 ins_pipe( pipe_slow ); 5671 %} 5672 5673 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 5674 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5675 match(Set dst (SubVS src1 src2)); 5676 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 5677 ins_encode %{ 5678 int vector_len = 0; 5679 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5680 %} 5681 ins_pipe( pipe_slow ); 5682 %} 5683 5684 instruct vsub4S(vecD dst, vecD src) %{ 5685 predicate(n->as_Vector()->length() == 4); 5686 match(Set dst (SubVS dst src)); 5687 format %{ "psubw $dst,$src\t! sub packed4S" %} 5688 ins_encode %{ 5689 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 5695 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5696 match(Set dst (SubVS src1 src2)); 5697 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 5698 ins_encode %{ 5699 int vector_len = 0; 5700 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5701 %} 5702 ins_pipe( pipe_slow ); 5703 %} 5704 5705 instruct vsub8S(vecX dst, vecX src) %{ 5706 predicate(n->as_Vector()->length() == 8); 5707 match(Set dst (SubVS dst src)); 5708 format %{ "psubw $dst,$src\t! sub packed8S" %} 5709 ins_encode %{ 5710 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5711 %} 5712 ins_pipe( pipe_slow ); 5713 %} 5714 5715 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 5716 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5717 match(Set dst (SubVS src1 src2)); 5718 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 5719 ins_encode %{ 5720 int vector_len = 0; 5721 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5722 %} 5723 ins_pipe( pipe_slow ); 5724 %} 5783 predicate(n->as_Vector()->length() == 2); 5784 match(Set dst (SubVI dst src)); 5785 format %{ "psubd $dst,$src\t! sub packed2I" %} 5786 ins_encode %{ 5787 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 5793 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5794 match(Set dst (SubVI src1 src2)); 5795 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 5796 ins_encode %{ 5797 int vector_len = 0; 5798 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5799 %} 5800 ins_pipe( pipe_slow ); 5801 %} 5802 5803 instruct vsub4I(vecX dst, vecX src) %{ 5804 predicate(n->as_Vector()->length() == 4); 5805 match(Set dst (SubVI dst src)); 5806 format %{ "psubd $dst,$src\t! sub packed4I" %} 5807 ins_encode %{ 5808 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5809 %} 5810 ins_pipe( pipe_slow ); 5811 %} 5812 5813 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 5814 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5815 match(Set dst (SubVI src1 src2)); 5816 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 5817 ins_encode %{ 5818 int vector_len = 0; 5819 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5958 predicate(n->as_Vector()->length() == 2); 5959 match(Set dst (SubVF dst src)); 5960 format %{ "subps $dst,$src\t! sub packed2F" %} 5961 ins_encode %{ 5962 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 5968 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5969 match(Set dst (SubVF src1 src2)); 5970 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 5971 ins_encode %{ 5972 int vector_len = 0; 5973 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 5977 5978 instruct vsub4F(vecX dst, vecX src) %{ 5979 predicate(n->as_Vector()->length() == 4); 5980 match(Set dst (SubVF dst src)); 5981 format %{ "subps $dst,$src\t! sub packed4F" %} 5982 ins_encode %{ 5983 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5984 %} 5985 ins_pipe( pipe_slow ); 5986 %} 5987 5988 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 5989 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5990 match(Set dst (SubVF src1 src2)); 5991 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 5992 ins_encode %{ 5993 int vector_len = 0; 5994 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5995 %} 5996 ins_pipe( pipe_slow ); 5997 %} 6135 predicate(n->as_Vector()->length() == 2); 6136 match(Set dst (MulVS dst src)); 6137 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6138 ins_encode %{ 6139 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6140 %} 6141 ins_pipe( pipe_slow ); 6142 %} 6143 6144 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6145 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6146 match(Set dst (MulVS src1 src2)); 6147 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6148 ins_encode %{ 6149 int vector_len = 0; 6150 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6151 %} 6152 ins_pipe( pipe_slow ); 6153 %} 6154 6155 instruct vmul4S(vecD dst, vecD src) %{ 6156 predicate(n->as_Vector()->length() == 4); 6157 match(Set dst (MulVS dst src)); 6158 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6159 ins_encode %{ 6160 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6161 %} 6162 ins_pipe( pipe_slow ); 6163 %} 6164 6165 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6166 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6167 match(Set dst (MulVS src1 src2)); 6168 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6169 ins_encode %{ 6170 int vector_len = 0; 6171 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6172 %} 6173 ins_pipe( pipe_slow ); 6174 %} 6175 6176 instruct vmul8S(vecX dst, vecX src) %{ 6177 predicate(n->as_Vector()->length() == 8); 6178 match(Set dst (MulVS dst src)); 6179 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6180 ins_encode %{ 6181 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6182 %} 6183 ins_pipe( pipe_slow ); 6184 %} 6185 6186 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6187 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6188 match(Set dst (MulVS src1 src2)); 6189 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 6190 ins_encode %{ 6191 int vector_len = 0; 6192 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6193 %} 6194 ins_pipe( pipe_slow ); 6195 %} 6254 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 6255 match(Set dst (MulVI dst src)); 6256 format %{ "pmulld $dst,$src\t! mul packed2I" %} 6257 ins_encode %{ 6258 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6259 %} 6260 ins_pipe( pipe_slow ); 6261 %} 6262 6263 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 6264 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6265 match(Set dst (MulVI src1 src2)); 6266 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 6267 ins_encode %{ 6268 int vector_len = 0; 6269 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6270 %} 6271 ins_pipe( pipe_slow ); 6272 %} 6273 6274 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 6275 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6276 match(Set dst (MulVL src1 src2)); 6277 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 6278 ins_encode %{ 6279 int vector_len = 0; 6280 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6281 %} 6282 ins_pipe( pipe_slow ); 6283 %} 6284 6285 instruct vmul4I(vecX dst, vecX src) %{ 6286 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 6287 match(Set dst (MulVI dst src)); 6288 format %{ "pmulld $dst,$src\t! mul packed4I" %} 6289 ins_encode %{ 6290 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 6296 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6297 match(Set dst (MulVI src1 src2)); 6298 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 6299 ins_encode %{ 6300 int vector_len = 0; 6301 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 6307 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6308 match(Set dst (MulVI src (LoadVector mem))); 6309 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 6310 ins_encode %{ 6311 int vector_len = 0; 6312 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6313 %} 6314 ins_pipe( pipe_slow ); 6315 %} 6316 6317 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 6318 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6319 match(Set dst (MulVL src1 src2)); 6320 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 6321 ins_encode %{ 6322 int vector_len = 1; 6323 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 6329 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6330 match(Set dst (MulVL src (LoadVector mem))); 6331 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 6332 ins_encode %{ 6333 int vector_len = 1; 6334 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6338 6339 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 6340 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6341 match(Set dst (MulVI src1 src2)); 6342 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 6343 ins_encode %{ 6344 int vector_len = 1; 6345 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6346 %} 6347 ins_pipe( pipe_slow ); 6348 %} 6349 6350 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6351 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6352 match(Set dst (MulVL src1 src2)); 6353 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 6354 ins_encode %{ 6355 int vector_len = 2; 6356 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6357 %} 6358 ins_pipe( pipe_slow ); 6359 %} 6360 6361 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6362 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6363 match(Set dst (MulVI src1 src2)); 6364 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 6365 ins_encode %{ 6366 int vector_len = 2; 6367 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6368 %} 6369 ins_pipe( pipe_slow ); 6370 %} 6371 6372 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 6373 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6374 match(Set dst (MulVI src (LoadVector mem))); 6375 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 6376 ins_encode %{ 6377 int vector_len = 1; 6378 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6379 %} 6380 ins_pipe( pipe_slow ); 6381 %} 6382 6383 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 6384 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6385 match(Set dst (MulVL src (LoadVector mem))); 6386 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 6387 ins_encode %{ 6388 int vector_len = 2; 6389 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6390 %} 6391 ins_pipe( pipe_slow ); 6392 %} 6393 6394 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 6395 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6396 match(Set dst (MulVI src (LoadVector mem))); 6397 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 6398 ins_encode %{ 6399 int vector_len = 2; 6400 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6401 %} 6402 ins_pipe( pipe_slow ); 6403 %} 6404 6405 // Floats vector mul 6406 instruct vmul2F(vecD dst, vecD src) %{ 6407 predicate(n->as_Vector()->length() == 2); 6408 match(Set dst (MulVF dst src)); 6409 format %{ "mulps $dst,$src\t! mul packed2F" %} 6410 ins_encode %{ 6411 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 6417 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6418 match(Set dst (MulVF src1 src2)); 6419 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 6420 ins_encode %{ 6421 int vector_len = 0; 6422 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6423 %} 6424 ins_pipe( pipe_slow ); 6425 %} 6426 6427 instruct vmul4F(vecX dst, vecX src) %{ 6428 predicate(n->as_Vector()->length() == 4); 6429 match(Set dst (MulVF dst src)); 6430 format %{ "mulps $dst,$src\t! mul packed4F" %} 6431 ins_encode %{ 6432 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 6438 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6439 match(Set dst (MulVF src1 src2)); 6440 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 6441 ins_encode %{ 6442 int vector_len = 0; 6443 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6584 predicate(n->as_Vector()->length() == 2); 6585 match(Set dst (DivVF dst src)); 6586 format %{ "divps $dst,$src\t! div packed2F" %} 6587 ins_encode %{ 6588 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6589 %} 6590 ins_pipe( pipe_slow ); 6591 %} 6592 6593 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 6594 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6595 match(Set dst (DivVF src1 src2)); 6596 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 6597 ins_encode %{ 6598 int vector_len = 0; 6599 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6600 %} 6601 ins_pipe( pipe_slow ); 6602 %} 6603 6604 instruct vdiv4F(vecX dst, vecX src) %{ 6605 predicate(n->as_Vector()->length() == 4); 6606 match(Set dst (DivVF dst src)); 6607 format %{ "divps $dst,$src\t! div packed4F" %} 6608 ins_encode %{ 6609 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6610 %} 6611 ins_pipe( pipe_slow ); 6612 %} 6613 6614 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 6615 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6616 match(Set dst (DivVF src1 src2)); 6617 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 6618 ins_encode %{ 6619 int vector_len = 0; 6620 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 7861 predicate(n->as_Vector()->length_in_bytes() == 4); 7862 match(Set dst (AndV dst src)); 7863 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 7864 ins_encode %{ 7865 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7866 %} 7867 ins_pipe( pipe_slow ); 7868 %} 7869 7870 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 7871 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 7872 match(Set dst (AndV src1 src2)); 7873 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 7874 ins_encode %{ 7875 int vector_len = 0; 7876 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7877 %} 7878 ins_pipe( pipe_slow ); 7879 %} 7880 7881 instruct vand8B(vecD dst, vecD src) %{ 7882 predicate(n->as_Vector()->length_in_bytes() == 8); 7883 match(Set dst (AndV dst src)); 7884 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 7885 ins_encode %{ 7886 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7887 %} 7888 ins_pipe( pipe_slow ); 7889 %} 7890 7891 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 7892 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 7893 match(Set dst (AndV src1 src2)); 7894 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 7895 ins_encode %{ 7896 int vector_len = 0; 7897 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7898 %} 7899 ins_pipe( pipe_slow ); 7900 %} 7901 7902 instruct vand16B(vecX dst, vecX src) %{ 7903 predicate(n->as_Vector()->length_in_bytes() == 16); 7904 match(Set dst (AndV dst src)); 7905 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 7906 ins_encode %{ 7907 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7908 %} 7909 ins_pipe( pipe_slow ); 7910 %} 7911 7912 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 7913 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 7914 match(Set dst (AndV src1 src2)); 7915 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 7916 ins_encode %{ 7917 int vector_len = 0; 7918 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7919 %} 7920 ins_pipe( pipe_slow ); 7921 %} 7981 predicate(n->as_Vector()->length_in_bytes() == 4); 7982 match(Set dst (OrV dst src)); 7983 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 7984 ins_encode %{ 7985 __ por($dst$$XMMRegister, $src$$XMMRegister); 7986 %} 7987 ins_pipe( pipe_slow ); 7988 %} 7989 7990 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 7991 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 7992 match(Set dst (OrV src1 src2)); 7993 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 7994 ins_encode %{ 7995 int vector_len = 0; 7996 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7997 %} 7998 ins_pipe( pipe_slow ); 7999 %} 8000 8001 instruct vor8B(vecD dst, vecD src) %{ 8002 predicate(n->as_Vector()->length_in_bytes() == 8); 8003 match(Set dst (OrV dst src)); 8004 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8005 ins_encode %{ 8006 __ por($dst$$XMMRegister, $src$$XMMRegister); 8007 %} 8008 ins_pipe( pipe_slow ); 8009 %} 8010 8011 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8012 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8013 match(Set dst (OrV src1 src2)); 8014 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8015 ins_encode %{ 8016 int vector_len = 0; 8017 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8018 %} 8019 ins_pipe( pipe_slow ); 8020 %} 8021 8022 instruct vor16B(vecX dst, vecX src) %{ 8023 predicate(n->as_Vector()->length_in_bytes() == 16); 8024 match(Set dst (OrV dst src)); 8025 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 8026 ins_encode %{ 8027 __ por($dst$$XMMRegister, $src$$XMMRegister); 8028 %} 8029 ins_pipe( pipe_slow ); 8030 %} 8031 8032 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8033 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8034 match(Set dst (OrV src1 src2)); 8035 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 8036 ins_encode %{ 8037 int vector_len = 0; 8038 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8039 %} 8040 ins_pipe( pipe_slow ); 8041 %} 8101 predicate(n->as_Vector()->length_in_bytes() == 4); 8102 match(Set dst (XorV dst src)); 8103 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 8104 ins_encode %{ 8105 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8106 %} 8107 ins_pipe( pipe_slow ); 8108 %} 8109 8110 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8111 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8112 match(Set dst (XorV src1 src2)); 8113 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 8114 ins_encode %{ 8115 int vector_len = 0; 8116 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8117 %} 8118 ins_pipe( pipe_slow ); 8119 %} 8120 8121 instruct vxor8B(vecD dst, vecD src) %{ 8122 predicate(n->as_Vector()->length_in_bytes() == 8); 8123 match(Set dst (XorV dst src)); 8124 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 8125 ins_encode %{ 8126 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8127 %} 8128 ins_pipe( pipe_slow ); 8129 %} 8130 8131 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8132 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8133 match(Set dst (XorV src1 src2)); 8134 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 8135 ins_encode %{ 8136 int vector_len = 0; 8137 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8138 %} 8139 ins_pipe( pipe_slow ); 8140 %} 8141 8142 instruct vxor16B(vecX dst, vecX src) %{ 8143 predicate(n->as_Vector()->length_in_bytes() == 16); 8144 match(Set dst (XorV dst src)); 8145 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 8146 ins_encode %{ 8147 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8148 %} 8149 ins_pipe( pipe_slow ); 8150 %} 8151 8152 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8153 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8154 match(Set dst (XorV src1 src2)); 8155 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 8156 ins_encode %{ 8157 int vector_len = 0; | 2877 ins_cost(145); 2878 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2879 ins_encode %{ 2880 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2881 %} 2882 ins_pipe( pipe_slow ); 2883 %} 2884 2885 instruct storeV64(memory mem, vecZ src) %{ 2886 predicate(n->as_StoreVector()->memory_size() == 64); 2887 match(Set mem (StoreVector mem src)); 2888 ins_cost(145); 2889 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 2890 ins_encode %{ 2891 int vector_len = 2; 2892 __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); 2893 %} 2894 ins_pipe( pipe_slow ); 2895 %} 2896 2897 // ====================LEGACY REPLICATE======================================= 2898 2899 instruct Repl16B(vecX dst, rRegI src) %{ 2900 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2901 match(Set dst (ReplicateB src)); 2902 format %{ "movd $dst,$src\n\t" 2903 "punpcklbw $dst,$dst\n\t" 2904 "pshuflw $dst,$dst,0x00\n\t" 2905 "punpcklqdq $dst,$dst\t! replicate16B" %} 2906 ins_encode %{ 2907 __ movdl($dst$$XMMRegister, $src$$Register); 2908 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2909 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2910 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2911 %} 2912 ins_pipe( pipe_slow ); 2913 %} 2914 2915 instruct Repl16B_mem(vecX dst, memory mem) %{ 2916 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2917 match(Set dst (ReplicateB (LoadB mem))); 2918 format %{ "punpcklbw $dst,$mem\n\t" 2919 "pshuflw $dst,$dst,0x00\n\t" 2920 "punpcklqdq $dst,$dst\t! replicate16B" %} 2921 ins_encode %{ 2922 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2923 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2924 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2925 %} 2926 ins_pipe( pipe_slow ); 2927 %} 2928 2929 instruct Repl32B(vecY dst, rRegI src) %{ 2930 predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2931 match(Set dst (ReplicateB src)); 2932 format %{ "movd $dst,$src\n\t" 2933 "punpcklbw $dst,$dst\n\t" 2934 "pshuflw $dst,$dst,0x00\n\t" 2935 "punpcklqdq $dst,$dst\n\t" 2936 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2937 ins_encode %{ 2938 __ movdl($dst$$XMMRegister, $src$$Register); 2939 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2940 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2941 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2942 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2943 %} 2944 ins_pipe( pipe_slow ); 2945 %} 2946 2947 instruct Repl32B_mem(vecY dst, memory mem) %{ 2948 predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2949 match(Set dst (ReplicateB (LoadB mem))); 2950 format %{ "punpcklbw $dst,$mem\n\t" 2951 "pshuflw $dst,$dst,0x00\n\t" 2952 "punpcklqdq $dst,$dst\n\t" 2953 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2954 ins_encode %{ 2955 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2956 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2957 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2958 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2959 %} 2960 ins_pipe( pipe_slow ); 2961 %} 2962 2963 instruct Repl16B_imm(vecX dst, immI con) %{ 2964 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2965 match(Set dst (ReplicateB con)); 2966 format %{ "movq $dst,[$constantaddress]\n\t" 2967 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 2968 ins_encode %{ 2969 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2970 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2971 %} 2972 ins_pipe( pipe_slow ); 2973 %} 2974 2975 instruct Repl32B_imm(vecY dst, immI con) %{ 2976 predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2977 match(Set dst (ReplicateB con)); 2978 format %{ "movq $dst,[$constantaddress]\n\t" 2979 "punpcklqdq $dst,$dst\n\t" 2980 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 2981 ins_encode %{ 2982 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2983 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2984 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2985 %} 2986 ins_pipe( pipe_slow ); 2987 %} 2988 2989 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 2990 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && UseAVX < 3); 2991 match(Set dst (ReplicateB zero)); 2992 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 2993 ins_encode %{ 2994 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2995 %} 2996 ins_pipe( fpu_reg_reg ); 2997 %} 2998 2999 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3000 predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && UseAVX < 3); 3001 match(Set dst (ReplicateB zero)); 3002 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3003 ins_encode %{ 3004 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3005 int vector_len = 1; 3006 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3007 %} 3008 ins_pipe( fpu_reg_reg ); 3009 %} 3010 3011 instruct Repl8S(vecX dst, rRegI src) %{ 3012 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3013 match(Set dst (ReplicateS src)); 3014 format %{ "movd $dst,$src\n\t" 3015 "pshuflw $dst,$dst,0x00\n\t" 3016 "punpcklqdq $dst,$dst\t! replicate8S" %} 3017 ins_encode %{ 3018 __ movdl($dst$$XMMRegister, $src$$Register); 3019 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3020 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3021 %} 3022 ins_pipe( pipe_slow ); 3023 %} 3024 3025 instruct Repl16S(vecY dst, rRegI src) %{ 3026 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3027 match(Set dst (ReplicateS src)); 3028 format %{ "movd $dst,$src\n\t" 3029 "pshuflw $dst,$dst,0x00\n\t" 3030 "punpcklqdq $dst,$dst\n\t" 3031 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3032 ins_encode %{ 3033 __ movdl($dst$$XMMRegister, $src$$Register); 3034 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3035 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3036 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3037 %} 3038 ins_pipe( pipe_slow ); 3039 %} 3040 3041 instruct Repl8S_imm(vecX dst, immI con) %{ 3042 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3043 match(Set dst (ReplicateS con)); 3044 format %{ "movq $dst,[$constantaddress]\n\t" 3045 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3046 ins_encode %{ 3047 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3048 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3049 %} 3050 ins_pipe( pipe_slow ); 3051 %} 3052 3053 instruct Repl16S_imm(vecY dst, immI con) %{ 3054 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3055 match(Set dst (ReplicateS con)); 3056 format %{ "movq $dst,[$constantaddress]\n\t" 3057 "punpcklqdq $dst,$dst\n\t" 3058 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3059 ins_encode %{ 3060 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3061 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3062 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3063 %} 3064 ins_pipe( pipe_slow ); 3065 %} 3066 3067 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3068 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3); 3069 match(Set dst (ReplicateS zero)); 3070 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3071 ins_encode %{ 3072 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3073 %} 3074 ins_pipe( fpu_reg_reg ); 3075 %} 3076 3077 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3078 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && UseAVX < 3); 3079 match(Set dst (ReplicateS zero)); 3080 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3081 ins_encode %{ 3082 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3083 int vector_len = 1; 3084 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3085 %} 3086 ins_pipe( fpu_reg_reg ); 3087 %} 3088 3089 instruct Repl4I(vecX dst, rRegI src) %{ 3090 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3091 match(Set dst (ReplicateI src)); 3092 format %{ "movd $dst,$src\n\t" 3093 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3094 ins_encode %{ 3095 __ movdl($dst$$XMMRegister, $src$$Register); 3096 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3097 %} 3098 ins_pipe( pipe_slow ); 3099 %} 3100 3101 instruct Repl4I_mem(vecX dst, memory mem) %{ 3102 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3103 match(Set dst (ReplicateI (LoadI mem))); 3104 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3105 ins_encode %{ 3106 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3107 %} 3108 ins_pipe( pipe_slow ); 3109 %} 3110 3111 instruct Repl8I(vecY dst, rRegI src) %{ 3112 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3113 match(Set dst (ReplicateI src)); 3114 format %{ "movd $dst,$src\n\t" 3115 "pshufd $dst,$dst,0x00\n\t" 3116 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3117 ins_encode %{ 3118 __ movdl($dst$$XMMRegister, $src$$Register); 3119 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3120 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3121 %} 3122 ins_pipe( pipe_slow ); 3123 %} 3124 3125 instruct Repl8I_mem(vecY dst, memory mem) %{ 3126 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3127 match(Set dst (ReplicateI (LoadI mem))); 3128 format %{ "pshufd $dst,$mem,0x00\n\t" 3129 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3130 ins_encode %{ 3131 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3132 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3133 %} 3134 ins_pipe( pipe_slow ); 3135 %} 3136 3137 instruct Repl4I_imm(vecX dst, immI con) %{ 3138 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3139 match(Set dst (ReplicateI con)); 3140 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3141 "punpcklqdq $dst,$dst" %} 3142 ins_encode %{ 3143 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3144 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3145 %} 3146 ins_pipe( pipe_slow ); 3147 %} 3148 3149 instruct Repl8I_imm(vecY dst, immI con) %{ 3150 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3151 match(Set dst (ReplicateI con)); 3152 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3153 "punpcklqdq $dst,$dst\n\t" 3154 "vinserti128h $dst,$dst,$dst" %} 3155 ins_encode %{ 3156 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3157 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3158 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3159 %} 3160 ins_pipe( pipe_slow ); 3161 %} 3162 3163 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3164 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && UseAVX < 3); 3165 match(Set dst (ReplicateI zero)); 3166 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3167 ins_encode %{ 3168 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3169 %} 3170 ins_pipe( fpu_reg_reg ); 3171 %} 3172 3173 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3174 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3); 3175 match(Set dst (ReplicateI zero)); 3176 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3177 ins_encode %{ 3178 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3179 int vector_len = 1; 3180 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3181 %} 3182 ins_pipe( fpu_reg_reg ); 3183 %} 3184 3185 // Replicate long (8 byte) scalar to be vector 3186 #ifdef _LP64 3187 instruct Repl4L(vecY dst, rRegL src) %{ 3188 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3189 match(Set dst (ReplicateL src)); 3190 format %{ "movdq $dst,$src\n\t" 3191 "punpcklqdq $dst,$dst\n\t" 3192 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3193 ins_encode %{ 3194 __ movdq($dst$$XMMRegister, $src$$Register); 3195 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3196 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3197 %} 3198 ins_pipe( pipe_slow ); 3199 %} 3200 #else // _LP64 3201 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3202 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3203 match(Set dst (ReplicateL src)); 3204 effect(TEMP dst, USE src, TEMP tmp); 3205 format %{ "movdl $dst,$src.lo\n\t" 3206 "movdl $tmp,$src.hi\n\t" 3207 "punpckldq $dst,$tmp\n\t" 3208 "punpcklqdq $dst,$dst\n\t" 3209 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3210 ins_encode %{ 3211 __ movdl($dst$$XMMRegister, $src$$Register); 3212 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3213 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3214 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3215 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3216 %} 3217 ins_pipe( pipe_slow ); 3218 %} 3219 #endif // _LP64 3220 3221 instruct Repl4L_imm(vecY dst, immL con) %{ 3222 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3223 match(Set dst (ReplicateL con)); 3224 format %{ "movq $dst,[$constantaddress]\n\t" 3225 "punpcklqdq $dst,$dst\n\t" 3226 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3227 ins_encode %{ 3228 __ movq($dst$$XMMRegister, $constantaddress($con)); 3229 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3230 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3231 %} 3232 ins_pipe( pipe_slow ); 3233 %} 3234 3235 instruct Repl4L_mem(vecY dst, memory mem) %{ 3236 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3237 match(Set dst (ReplicateL (LoadL mem))); 3238 format %{ "movq $dst,$mem\n\t" 3239 "punpcklqdq $dst,$dst\n\t" 3240 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3241 ins_encode %{ 3242 __ movq($dst$$XMMRegister, $mem$$Address); 3243 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3244 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3245 %} 3246 ins_pipe( pipe_slow ); 3247 %} 3248 3249 instruct Repl8L_mem(vecZ dst, memory mem) %{ 3250 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3); 3251 match(Set dst (ReplicateL (LoadL mem))); 3252 format %{ "movq $dst,$mem\n\t" 3253 "punpcklqdq $dst,$dst\n\t" 3254 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3255 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3256 ins_encode %{ 3257 __ movq($dst$$XMMRegister, $mem$$Address); 3258 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3259 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3260 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3261 %} 3262 ins_pipe( pipe_slow ); 3263 %} 3264 3265 instruct Repl8F(vecY dst, regF src) %{ 3266 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3267 match(Set dst (ReplicateF src)); 3268 format %{ "pshufd $dst,$src,0x00\n\t" 3269 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3270 ins_encode %{ 3271 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3272 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3273 %} 3274 ins_pipe( pipe_slow ); 3275 %} 3276 3277 instruct Repl8F_mem(vecY dst, memory mem) %{ 3278 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3279 match(Set dst (ReplicateF (LoadF mem))); 3280 format %{ "pshufd $dst,$mem,0x00\n\t" 3281 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3282 ins_encode %{ 3283 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3284 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3285 %} 3286 ins_pipe( pipe_slow ); 3287 %} 3288 3289 instruct Repl4D(vecY dst, regD src) %{ 3290 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3291 match(Set dst (ReplicateD src)); 3292 format %{ "pshufd $dst,$src,0x44\n\t" 3293 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3294 ins_encode %{ 3295 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3296 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3297 %} 3298 ins_pipe( pipe_slow ); 3299 %} 3300 3301 instruct Repl4D_mem(vecY dst, memory mem) %{ 3302 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3303 match(Set dst (ReplicateD (LoadD mem))); 3304 format %{ "pshufd $dst,$mem,0x44\n\t" 3305 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3306 ins_encode %{ 3307 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3308 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3309 %} 3310 ins_pipe( pipe_slow ); 3311 %} 3312 3313 // ====================GENERIC REPLICATE========================================== 3314 3315 // Replicate byte scalar to be vector 3316 instruct Repl4B(vecS dst, rRegI src) %{ 3317 predicate(n->as_Vector()->length() == 4); 3318 match(Set dst (ReplicateB src)); 3319 format %{ "movd $dst,$src\n\t" 3320 "punpcklbw $dst,$dst\n\t" 3321 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3322 ins_encode %{ 3323 __ movdl($dst$$XMMRegister, $src$$Register); 3324 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3325 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3326 %} 3327 ins_pipe( pipe_slow ); 3328 %} 3329 3330 instruct Repl4B_mem(vecS dst, memory mem) %{ 3331 predicate(n->as_Vector()->length() == 4); 3332 match(Set dst (ReplicateB (LoadB mem))); 3333 format %{ "punpcklbw $dst,$mem\n\t" 3334 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3335 ins_encode %{ 3336 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3337 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3338 %} 3339 ins_pipe( pipe_slow ); 3340 %} 3341 3342 instruct Repl8B(vecD dst, rRegI src) %{ 3343 predicate(n->as_Vector()->length() == 8); 3344 match(Set dst (ReplicateB src)); 3345 format %{ "movd $dst,$src\n\t" 3346 "punpcklbw $dst,$dst\n\t" 3347 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3348 ins_encode %{ 3349 __ movdl($dst$$XMMRegister, $src$$Register); 3350 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3351 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3352 %} 3353 ins_pipe( pipe_slow ); 3354 %} 3355 3356 instruct Repl8B_mem(vecD dst, memory mem) %{ 3357 predicate(n->as_Vector()->length() == 8); 3358 match(Set dst (ReplicateB (LoadB mem))); 3359 format %{ "punpcklbw $dst,$mem\n\t" 3360 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3361 ins_encode %{ 3362 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3363 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3364 %} 3365 ins_pipe( pipe_slow ); 3366 %} 3367 3368 // Replicate byte scalar immediate to be vector by loading from const table. 3369 instruct Repl4B_imm(vecS dst, immI con) %{ 3370 predicate(n->as_Vector()->length() == 4); 3371 match(Set dst (ReplicateB con)); 3372 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3373 ins_encode %{ 3374 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3375 %} 3376 ins_pipe( pipe_slow ); 3377 %} 3378 3379 instruct Repl8B_imm(vecD dst, immI con) %{ 3380 predicate(n->as_Vector()->length() == 8); 3381 match(Set dst (ReplicateB con)); 3382 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3383 ins_encode %{ 3384 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3385 %} 3386 ins_pipe( pipe_slow ); 3387 %} 3388 3389 // Replicate byte scalar zero to be vector 3390 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3391 predicate(n->as_Vector()->length() == 4); 3392 match(Set dst (ReplicateB zero)); 3393 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3394 ins_encode %{ 3395 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3396 %} 3397 ins_pipe( fpu_reg_reg ); 3398 %} 3399 3400 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3401 predicate(n->as_Vector()->length() == 8); 3402 match(Set dst (ReplicateB zero)); 3403 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3404 ins_encode %{ 3405 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3406 %} 3407 ins_pipe( fpu_reg_reg ); 3408 %} 3409 3410 // Replicate char/short (2 byte) scalar to be vector 3411 instruct Repl2S(vecS dst, rRegI src) %{ 3412 predicate(n->as_Vector()->length() == 2); 3413 match(Set dst (ReplicateS src)); 3414 format %{ "movd $dst,$src\n\t" 3415 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3416 ins_encode %{ 3417 __ movdl($dst$$XMMRegister, $src$$Register); 3418 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3419 %} 3420 ins_pipe( fpu_reg_reg ); 3421 %} 3422 3423 instruct Repl4S(vecD dst, rRegI src) %{ 3424 predicate(n->as_Vector()->length() == 4); 3425 match(Set dst (ReplicateS src)); 3426 format %{ "movd $dst,$src\n\t" 3427 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3428 ins_encode %{ 3429 __ movdl($dst$$XMMRegister, $src$$Register); 3430 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3431 %} 3432 ins_pipe( fpu_reg_reg ); 3433 %} 3434 3435 instruct Repl4S_mem(vecD dst, memory mem) %{ 3436 predicate(n->as_Vector()->length() == 4); 3437 match(Set dst (ReplicateS (LoadS mem))); 3438 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3439 ins_encode %{ 3440 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3441 %} 3442 ins_pipe( fpu_reg_reg ); 3443 %} 3444 3445 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3446 instruct Repl2S_imm(vecS dst, immI con) %{ 3447 predicate(n->as_Vector()->length() == 2); 3448 match(Set dst (ReplicateS con)); 3449 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3450 ins_encode %{ 3451 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3452 %} 3453 ins_pipe( fpu_reg_reg ); 3454 %} 3455 3456 instruct Repl4S_imm(vecD dst, immI con) %{ 3457 predicate(n->as_Vector()->length() == 4); 3458 match(Set dst (ReplicateS con)); 3459 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3460 ins_encode %{ 3461 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3462 %} 3463 ins_pipe( fpu_reg_reg ); 3464 %} 3465 3466 // Replicate char/short (2 byte) scalar zero to be vector 3467 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3468 predicate(n->as_Vector()->length() == 2); 3469 match(Set dst (ReplicateS zero)); 3470 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3471 ins_encode %{ 3472 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3473 %} 3474 ins_pipe( fpu_reg_reg ); 3475 %} 3476 3477 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3478 predicate(n->as_Vector()->length() == 4); 3479 match(Set dst (ReplicateS zero)); 3480 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3481 ins_encode %{ 3482 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3483 %} 3484 ins_pipe( fpu_reg_reg ); 3485 %} 3486 3487 // Replicate integer (4 byte) scalar to be vector 3488 instruct Repl2I(vecD dst, rRegI src) %{ 3489 predicate(n->as_Vector()->length() == 2); 3490 match(Set dst (ReplicateI src)); 3491 format %{ "movd $dst,$src\n\t" 3492 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3493 ins_encode %{ 3494 __ movdl($dst$$XMMRegister, $src$$Register); 3495 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3496 %} 3497 ins_pipe( fpu_reg_reg ); 3498 %} 3499 3500 // Integer could be loaded into xmm register directly from memory. 3501 instruct Repl2I_mem(vecD dst, memory mem) %{ 3502 predicate(n->as_Vector()->length() == 2); 3503 match(Set dst (ReplicateI (LoadI mem))); 3504 format %{ "movd $dst,$mem\n\t" 3505 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3506 ins_encode %{ 3507 __ movdl($dst$$XMMRegister, $mem$$Address); 3508 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3509 %} 3510 ins_pipe( fpu_reg_reg ); 3511 %} 3512 3513 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3514 instruct Repl2I_imm(vecD dst, immI con) %{ 3515 predicate(n->as_Vector()->length() == 2); 3516 match(Set dst (ReplicateI con)); 3517 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3518 ins_encode %{ 3519 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3520 %} 3521 ins_pipe( fpu_reg_reg ); 3522 %} 3523 3524 // Replicate integer (4 byte) scalar zero to be vector 3525 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3526 predicate(n->as_Vector()->length() == 2); 3527 match(Set dst (ReplicateI zero)); 3528 format %{ "pxor $dst,$dst\t! replicate2I" %} 3529 ins_encode %{ 3530 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3531 %} 3532 ins_pipe( fpu_reg_reg ); 3533 %} 3534 3535 // Replicate long (8 byte) scalar to be vector 3536 #ifdef _LP64 3537 instruct Repl2L(vecX dst, rRegL src) %{ 3538 predicate(n->as_Vector()->length() == 2); 3539 match(Set dst (ReplicateL src)); 3540 format %{ "movdq $dst,$src\n\t" 3541 "punpcklqdq $dst,$dst\t! replicate2L" %} 3542 ins_encode %{ 3543 __ movdq($dst$$XMMRegister, $src$$Register); 3544 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3545 %} 3546 ins_pipe( pipe_slow ); 3547 %} 3548 #else // _LP64 3549 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3550 predicate(n->as_Vector()->length() == 2); 3551 match(Set dst (ReplicateL src)); 3552 effect(TEMP dst, USE src, TEMP tmp); 3553 format %{ "movdl $dst,$src.lo\n\t" 3554 "movdl $tmp,$src.hi\n\t" 3555 "punpckldq $dst,$tmp\n\t" 3556 "punpcklqdq $dst,$dst\t! replicate2L"%} 3557 ins_encode %{ 3558 __ movdl($dst$$XMMRegister, $src$$Register); 3559 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3560 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3561 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3562 %} 3563 ins_pipe( pipe_slow ); 3564 %} 3565 #endif // _LP64 3566 3567 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3568 instruct Repl2L_imm(vecX dst, immL con) %{ 3569 predicate(n->as_Vector()->length() == 2); 3570 match(Set dst (ReplicateL con)); 3571 format %{ "movq $dst,[$constantaddress]\n\t" 3572 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3573 ins_encode %{ 3574 __ movq($dst$$XMMRegister, $constantaddress($con)); 3575 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3576 %} 3577 ins_pipe( pipe_slow ); 3578 %} 3579 3580 // Long could be loaded into xmm register directly from memory. 3581 instruct Repl2L_mem(vecX dst, memory mem) %{ 3582 predicate(n->as_Vector()->length() == 2); 3583 match(Set dst (ReplicateL (LoadL mem))); 3584 format %{ "movq $dst,$mem\n\t" 3585 "punpcklqdq $dst,$dst\t! replicate2L" %} 3586 ins_encode %{ 3587 __ movq($dst$$XMMRegister, $mem$$Address); 3588 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3589 %} 3590 ins_pipe( pipe_slow ); 3591 %} 3592 3593 // Replicate long (8 byte) scalar zero to be vector 3594 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3595 predicate(n->as_Vector()->length() == 2); 3596 match(Set dst (ReplicateL zero)); 3597 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3598 ins_encode %{ 3599 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3600 %} 3601 ins_pipe( fpu_reg_reg ); 3602 %} 3603 3604 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3605 predicate(n->as_Vector()->length() == 4); 3606 match(Set dst (ReplicateL zero)); 3607 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3608 ins_encode %{ 3609 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3610 int vector_len = 1; 3611 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3612 %} 3613 ins_pipe( fpu_reg_reg ); 3614 %} 3615 3616 // Replicate float (4 byte) scalar to be vector 3617 instruct Repl2F(vecD dst, regF src) %{ 3618 predicate(n->as_Vector()->length() == 2); 3619 match(Set dst (ReplicateF src)); 3620 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3621 ins_encode %{ 3622 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3623 %} 3624 ins_pipe( fpu_reg_reg ); 3625 %} 3626 3627 instruct Repl2F_mem(vecD dst, memory mem) %{ 3628 predicate(n->as_Vector()->length() == 2); 3629 match(Set dst (ReplicateF (LoadF mem))); 3630 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3631 ins_encode %{ 3632 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3633 %} 3634 ins_pipe( pipe_slow ); 3635 %} 3636 3637 instruct Repl4F(vecX dst, regF src) %{ 3638 predicate(n->as_Vector()->length() == 4); 3639 match(Set dst (ReplicateF src)); 3640 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3641 ins_encode %{ 3642 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3643 %} 3644 ins_pipe( pipe_slow ); 3645 %} 3646 3647 instruct Repl4F_mem(vecX dst, memory mem) %{ 3648 predicate(n->as_Vector()->length() == 4); 3649 match(Set dst (ReplicateF (LoadF mem))); 3650 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3651 ins_encode %{ 3652 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3653 %} 3654 ins_pipe( pipe_slow ); 3655 %} 3656 3657 // Replicate float (4 byte) scalar zero to be vector 3658 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3659 predicate(n->as_Vector()->length() == 2); 3660 match(Set dst (ReplicateF zero)); 3661 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3662 ins_encode %{ 3663 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3664 %} 3665 ins_pipe( fpu_reg_reg ); 3666 %} 3667 3668 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3669 predicate(n->as_Vector()->length() == 4); 3670 match(Set dst (ReplicateF zero)); 3671 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3672 ins_encode %{ 3673 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3674 %} 3675 ins_pipe( fpu_reg_reg ); 3676 %} 3677 3678 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3679 predicate(n->as_Vector()->length() == 8); 3680 match(Set dst (ReplicateF zero)); 3681 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3682 ins_encode %{ 3683 int vector_len = 1; 3684 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3685 %} 3686 ins_pipe( fpu_reg_reg ); 3687 %} 3688 3689 // Replicate double (8 bytes) scalar to be vector 3690 instruct Repl2D(vecX dst, regD src) %{ 3691 predicate(n->as_Vector()->length() == 2); 3692 match(Set dst (ReplicateD src)); 3693 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3694 ins_encode %{ 3695 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3696 %} 3697 ins_pipe( pipe_slow ); 3698 %} 3699 3700 instruct Repl2D_mem(vecX dst, memory mem) %{ 3701 predicate(n->as_Vector()->length() == 2); 3702 match(Set dst (ReplicateD (LoadD mem))); 3703 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3704 ins_encode %{ 3705 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3706 %} 3707 ins_pipe( pipe_slow ); 3708 %} 3709 3710 // Replicate double (8 byte) scalar zero to be vector 3711 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3712 predicate(n->as_Vector()->length() == 2); 3713 match(Set dst (ReplicateD zero)); 3714 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3715 ins_encode %{ 3716 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3717 %} 3718 ins_pipe( fpu_reg_reg ); 3719 %} 3720 3721 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3722 predicate(n->as_Vector()->length() == 4); 3723 match(Set dst (ReplicateD zero)); 3724 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3725 ins_encode %{ 3726 int vector_len = 1; 3727 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3728 %} 3729 ins_pipe( fpu_reg_reg ); 3730 %} 3731 3732 // ====================EVEX REPLICATE============================================= 3733 3734 // Note: some of the legacy forms are applicable to EVEX 3735 3736 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3737 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3738 match(Set dst (ReplicateB src)); 3739 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3740 ins_encode %{ 3741 int vector_len = 0; 3742 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3743 %} 3744 ins_pipe( pipe_slow ); 3745 %} 3746 3747 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3748 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3749 match(Set dst (ReplicateB (LoadB mem))); 3750 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3751 ins_encode %{ 3752 int vector_len = 0; 3753 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3754 %} 3755 ins_pipe( pipe_slow ); 3756 %} 3757 3758 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3759 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3760 match(Set dst (ReplicateB src)); 3761 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3762 ins_encode %{ 3763 int vector_len = 1; 3764 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3765 %} 3766 ins_pipe( pipe_slow ); 3767 %} 3768 3769 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3770 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3771 match(Set dst (ReplicateB (LoadB mem))); 3772 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3773 ins_encode %{ 3774 int vector_len = 1; 3775 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3776 %} 3777 ins_pipe( pipe_slow ); 3778 %} 3779 3780 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3781 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3782 match(Set dst (ReplicateB src)); 3783 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3784 ins_encode %{ 3785 int vector_len = 2; 3786 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3787 %} 3788 ins_pipe( pipe_slow ); 3789 %} 3790 3791 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3792 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vlbw()); 3793 match(Set dst (ReplicateB (LoadB mem))); 3794 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3795 ins_encode %{ 3796 int vector_len = 2; 3797 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3798 %} 3799 ins_pipe( pipe_slow ); 3800 %} 3801 3802 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3803 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3804 match(Set dst (ReplicateB con)); 3805 format %{ "movq $dst,[$constantaddress]\n\t" 3806 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3807 ins_encode %{ 3808 int vector_len = 0; 3809 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3810 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3811 %} 3812 ins_pipe( pipe_slow ); 3813 %} 3814 3815 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3816 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3817 match(Set dst (ReplicateB con)); 3818 format %{ "movq $dst,[$constantaddress]\n\t" 3819 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3820 ins_encode %{ 3821 int vector_len = 1; 3822 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3823 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3824 %} 3825 ins_pipe( pipe_slow ); 3826 %} 3827 3828 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3829 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3830 match(Set dst (ReplicateB con)); 3831 format %{ "movq $dst,[$constantaddress]\n\t" 3832 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3833 ins_encode %{ 3834 int vector_len = 2; 3835 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3836 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3837 %} 3838 ins_pipe( pipe_slow ); 3839 ins_pipe( pipe_slow ); 3840 %} 3841 3842 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3843 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3844 match(Set dst (ReplicateB zero)); 3845 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3846 ins_encode %{ 3847 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3848 int vector_len = 2; 3849 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3850 %} 3851 ins_pipe( fpu_reg_reg ); 3852 %} 3853 3854 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3855 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3856 match(Set dst (ReplicateS src)); 3857 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3858 ins_encode %{ 3859 int vector_len = 0; 3860 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3861 %} 3862 ins_pipe( pipe_slow ); 3863 %} 3864 3865 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3866 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3867 match(Set dst (ReplicateS (LoadS mem))); 3868 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 3869 ins_encode %{ 3870 int vector_len = 0; 3871 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3872 %} 3873 ins_pipe( pipe_slow ); 3874 %} 3875 3876 instruct Repl16S_evex(vecY dst, rRegI src) %{ 3877 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3878 match(Set dst (ReplicateS src)); 3879 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 3880 ins_encode %{ 3881 int vector_len = 1; 3882 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3883 %} 3884 ins_pipe( pipe_slow ); 3885 %} 3886 3887 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 3888 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3889 match(Set dst (ReplicateS (LoadS mem))); 3890 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 3891 ins_encode %{ 3892 int vector_len = 1; 3893 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3894 %} 3895 ins_pipe( pipe_slow ); 3896 %} 3897 3898 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 3899 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3900 match(Set dst (ReplicateS src)); 3901 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 3902 ins_encode %{ 3903 int vector_len = 2; 3904 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3905 %} 3906 ins_pipe( pipe_slow ); 3907 %} 3908 3909 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 3910 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3911 match(Set dst (ReplicateS (LoadS mem))); 3912 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 3913 ins_encode %{ 3914 int vector_len = 2; 3915 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3916 %} 3917 ins_pipe( pipe_slow ); 3918 %} 3919 3920 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 3921 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3922 match(Set dst (ReplicateS con)); 3923 format %{ "movq $dst,[$constantaddress]\n\t" 3924 "vpbroadcastw $dst,$dst\t! replicate8S" %} 3925 ins_encode %{ 3926 int vector_len = 0; 3927 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3928 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3929 %} 3930 ins_pipe( pipe_slow ); 3931 %} 3932 3933 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 3934 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3935 match(Set dst (ReplicateS con)); 3936 format %{ "movq $dst,[$constantaddress]\n\t" 3937 "vpbroadcastw $dst,$dst\t! replicate16S" %} 3938 ins_encode %{ 3939 int vector_len = 1; 3940 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3941 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3942 %} 3943 ins_pipe( pipe_slow ); 3944 %} 3945 3946 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 3947 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3948 match(Set dst (ReplicateS con)); 3949 format %{ "movq $dst,[$constantaddress]\n\t" 3950 "vpbroadcastw $dst,$dst\t! replicate32S" %} 3951 ins_encode %{ 3952 int vector_len = 2; 3953 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3954 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3955 %} 3956 ins_pipe( pipe_slow ); 3957 %} 3958 3959 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 3960 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3961 match(Set dst (ReplicateS zero)); 3962 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3963 ins_encode %{ 3964 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3965 int vector_len = 2; 3966 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3967 %} 3968 ins_pipe( fpu_reg_reg ); 3969 %} 3970 3971 instruct Repl4I_evex(vecX dst, rRegI src) %{ 3972 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3973 match(Set dst (ReplicateI src)); 3974 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 3975 ins_encode %{ 3976 int vector_len = 0; 3977 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3978 %} 3979 ins_pipe( pipe_slow ); 3980 %} 3981 3982 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 3983 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3984 match(Set dst (ReplicateI (LoadI mem))); 3985 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 3986 ins_encode %{ 3987 int vector_len = 0; 3988 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3989 %} 3990 ins_pipe( pipe_slow ); 3991 %} 3992 3993 instruct Repl8I_evex(vecY dst, rRegI src) %{ 3994 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 3995 match(Set dst (ReplicateI src)); 3996 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 3997 ins_encode %{ 3998 int vector_len = 1; 3999 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4000 %} 4001 ins_pipe( pipe_slow ); 4002 %} 4003 4004 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4005 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4006 match(Set dst (ReplicateI (LoadI mem))); 4007 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4008 ins_encode %{ 4009 int vector_len = 1; 4010 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4011 %} 4012 ins_pipe( pipe_slow ); 4013 %} 4014 4015 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4016 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4017 match(Set dst (ReplicateI src)); 4018 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4019 ins_encode %{ 4020 int vector_len = 2; 4021 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4022 %} 4023 ins_pipe( pipe_slow ); 4024 %} 4025 4026 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4027 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4028 match(Set dst (ReplicateI (LoadI mem))); 4029 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4030 ins_encode %{ 4031 int vector_len = 2; 4032 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4033 %} 4034 ins_pipe( pipe_slow ); 4035 %} 4036 4037 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4038 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4039 match(Set dst (ReplicateI con)); 4040 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4041 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4042 ins_encode %{ 4043 int vector_len = 0; 4044 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4045 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4046 %} 4047 ins_pipe( pipe_slow ); 4048 %} 4049 4050 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4051 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4052 match(Set dst (ReplicateI con)); 4053 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4054 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4055 ins_encode %{ 4056 int vector_len = 1; 4057 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4058 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4059 %} 4060 ins_pipe( pipe_slow ); 4061 %} 4062 4063 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4064 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4065 match(Set dst (ReplicateI con)); 4066 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4067 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4068 ins_encode %{ 4069 int vector_len = 2; 4070 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4071 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4072 %} 4073 ins_pipe( pipe_slow ); 4074 %} 4075 4076 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4077 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4078 match(Set dst (ReplicateI zero)); 4079 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4080 ins_encode %{ 4081 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4082 int vector_len = 2; 4083 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4084 %} 4085 ins_pipe( fpu_reg_reg ); 4086 %} 4087 4088 // Replicate long (8 byte) scalar to be vector 4089 #ifdef _LP64 4090 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4091 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4092 match(Set dst (ReplicateL src)); 4093 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4094 ins_encode %{ 4095 int vector_len = 1; 4096 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4097 %} 4098 ins_pipe( pipe_slow ); 4099 %} 4100 4101 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4102 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4103 match(Set dst (ReplicateL src)); 4104 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4105 ins_encode %{ 4106 int vector_len = 2; 4107 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4108 %} 4109 ins_pipe( pipe_slow ); 4110 %} 4111 #else // _LP64 4112 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4113 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4114 match(Set dst (ReplicateL src)); 4115 effect(TEMP dst, USE src, TEMP tmp); 4116 format %{ "movdl $dst,$src.lo\n\t" 4117 "movdl $tmp,$src.hi\n\t" 4118 "punpckldq $dst,$tmp\n\t" 4119 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4120 ins_encode %{ 4121 int vector_len = 1; 4122 __ movdl($dst$$XMMRegister, $src$$Register); 4123 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4124 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4125 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4126 %} 4127 ins_pipe( pipe_slow ); 4128 %} 4129 4130 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4131 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4132 match(Set dst (ReplicateL src)); 4133 effect(TEMP dst, USE src, TEMP tmp); 4134 format %{ "movdl $dst,$src.lo\n\t" 4135 "movdl $tmp,$src.hi\n\t" 4136 "punpckldq $dst,$tmp\n\t" 4137 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4138 ins_encode %{ 4139 int vector_len = 2; 4140 __ movdl($dst$$XMMRegister, $src$$Register); 4141 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4142 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4143 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4144 %} 4145 ins_pipe( pipe_slow ); 4146 %} 4147 #endif // _LP64 4148 4149 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4150 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4151 match(Set dst (ReplicateL con)); 4152 format %{ "movq $dst,[$constantaddress]\n\t" 4153 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4154 ins_encode %{ 4155 int vector_len = 1; 4156 __ movq($dst$$XMMRegister, $constantaddress($con)); 4157 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4158 %} 4159 ins_pipe( pipe_slow ); 4160 %} 4161 4162 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4163 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4164 match(Set dst (ReplicateL con)); 4165 format %{ "movq $dst,[$constantaddress]\n\t" 4166 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4167 ins_encode %{ 4168 int vector_len = 2; 4169 __ movq($dst$$XMMRegister, $constantaddress($con)); 4170 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4171 %} 4172 ins_pipe( pipe_slow ); 4173 %} 4174 4175 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4176 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4177 match(Set dst (ReplicateL (LoadL mem))); 4178 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4179 ins_encode %{ 4180 int vector_len = 1; 4181 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4182 %} 4183 ins_pipe( pipe_slow ); 4184 %} 4185 4186 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4187 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4188 match(Set dst (ReplicateL (LoadL mem))); 4189 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4190 ins_encode %{ 4191 int vector_len = 2; 4192 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4198 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4199 match(Set dst (ReplicateL zero)); 4200 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4201 ins_encode %{ 4202 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4203 int vector_len = 2; 4204 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4205 %} 4206 ins_pipe( fpu_reg_reg ); 4207 %} 4208 4209 instruct Repl8F_evex(vecY dst, regF src) %{ 4210 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4211 match(Set dst (ReplicateF src)); 4212 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4213 ins_encode %{ 4214 int vector_len = 1; 4215 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4216 %} 4217 ins_pipe( pipe_slow ); 4218 %} 4219 4220 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4221 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4222 match(Set dst (ReplicateF (LoadF mem))); 4223 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4224 ins_encode %{ 4225 int vector_len = 1; 4226 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4227 %} 4228 ins_pipe( pipe_slow ); 4229 %} 4230 4231 instruct Repl16F_evex(vecZ dst, regF src) %{ 4232 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4233 match(Set dst (ReplicateF src)); 4234 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4235 ins_encode %{ 4236 int vector_len = 2; 4237 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4238 %} 4239 ins_pipe( pipe_slow ); 4240 %} 4241 4242 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4243 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4244 match(Set dst (ReplicateF (LoadF mem))); 4245 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4246 ins_encode %{ 4247 int vector_len = 2; 4248 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4249 %} 4250 ins_pipe( pipe_slow ); 4251 %} 4252 4253 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4254 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4255 match(Set dst (ReplicateF zero)); 4256 format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} 4257 ins_encode %{ 4258 int vector_len = 2; 4259 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4260 %} 4261 ins_pipe( fpu_reg_reg ); 4262 %} 4263 4264 instruct Repl4D_evex(vecY dst, regD src) %{ 4265 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4266 match(Set dst (ReplicateD src)); 4267 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4268 ins_encode %{ 4269 int vector_len = 1; 4270 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4271 %} 4272 ins_pipe( pipe_slow ); 4273 %} 4274 4275 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4276 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4277 match(Set dst (ReplicateD (LoadD mem))); 4278 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4279 ins_encode %{ 4280 int vector_len = 1; 4281 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4282 %} 4283 ins_pipe( pipe_slow ); 4284 %} 4285 4286 instruct Repl8D_evex(vecZ dst, regD src) %{ 4287 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4288 match(Set dst (ReplicateD src)); 4289 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4290 ins_encode %{ 4291 int vector_len = 2; 4292 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4293 %} 4294 ins_pipe( pipe_slow ); 4295 %} 4296 4297 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4298 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4299 match(Set dst (ReplicateD (LoadD mem))); 4300 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4301 ins_encode %{ 4302 int vector_len = 2; 4303 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4304 %} 4305 ins_pipe( pipe_slow ); 4306 %} 4307 4308 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4309 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4310 match(Set dst (ReplicateD zero)); 4311 format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4312 ins_encode %{ 4313 int vector_len = 2; 4314 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4315 %} 4316 ins_pipe( fpu_reg_reg ); 4317 %} 4318 4319 // ====================REDUCTION ARITHMETIC======================================= 4320 4321 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4322 predicate(UseSSE > 2 && UseAVX == 0); 4323 match(Set dst (AddReductionVI src1 src2)); 4324 effect(TEMP tmp2, TEMP tmp); 4325 format %{ "movdqu $tmp2,$src2\n\t" 4326 "phaddd $tmp2,$tmp2\n\t" 4327 "movd $tmp,$src1\n\t" 4328 "paddd $tmp,$tmp2\n\t" 4329 "movd $dst,$tmp\t! add reduction2I" %} 5386 ins_pipe( pipe_slow ); 5387 %} 5388 5389 // ====================VECTOR ARITHMETIC======================================= 5390 5391 // --------------------------------- ADD -------------------------------------- 5392 5393 // Bytes vector add 5394 instruct vadd4B(vecS dst, vecS src) %{ 5395 predicate(n->as_Vector()->length() == 4); 5396 match(Set dst (AddVB dst src)); 5397 format %{ "paddb $dst,$src\t! add packed4B" %} 5398 ins_encode %{ 5399 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5400 %} 5401 ins_pipe( pipe_slow ); 5402 %} 5403 5404 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5405 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5406 match(Set dst (AddVB src1 src2)); 5407 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5408 ins_encode %{ 5409 int vector_len = 0; 5410 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5411 %} 5412 ins_pipe( pipe_slow ); 5413 %} 5414 5415 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 5416 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5417 match(Set dst (AddVB src (LoadVector mem))); 5418 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5419 ins_encode %{ 5420 int vector_len = 0; 5421 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5422 %} 5423 ins_pipe( pipe_slow ); 5424 %} 5425 5426 instruct vadd8B(vecD dst, vecD src) %{ 5427 predicate(n->as_Vector()->length() == 8); 5428 match(Set dst (AddVB dst src)); 5429 format %{ "paddb $dst,$src\t! add packed8B" %} 5430 ins_encode %{ 5431 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5432 %} 5433 ins_pipe( pipe_slow ); 5434 %} 5435 5436 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 5437 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5438 match(Set dst (AddVB src1 src2)); 5439 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5440 ins_encode %{ 5441 int vector_len = 0; 5442 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5443 %} 5444 ins_pipe( pipe_slow ); 5445 %} 5446 5447 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 5448 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5449 match(Set dst (AddVB src (LoadVector mem))); 5450 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5451 ins_encode %{ 5452 int vector_len = 0; 5453 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5454 %} 5455 ins_pipe( pipe_slow ); 5456 %} 5457 5458 instruct vadd16B(vecX dst, vecX src) %{ 5459 predicate(n->as_Vector()->length() == 16); 5460 match(Set dst (AddVB dst src)); 5461 format %{ "paddb $dst,$src\t! add packed16B" %} 5462 ins_encode %{ 5463 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5464 %} 5465 ins_pipe( pipe_slow ); 5466 %} 5467 5468 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5469 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5470 match(Set dst (AddVB src1 src2)); 5471 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5472 ins_encode %{ 5473 int vector_len = 0; 5474 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5475 %} 5476 ins_pipe( pipe_slow ); 5477 %} 5536 predicate(n->as_Vector()->length() == 2); 5537 match(Set dst (AddVS dst src)); 5538 format %{ "paddw $dst,$src\t! add packed2S" %} 5539 ins_encode %{ 5540 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5541 %} 5542 ins_pipe( pipe_slow ); 5543 %} 5544 5545 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5546 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5547 match(Set dst (AddVS src1 src2)); 5548 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5549 ins_encode %{ 5550 int vector_len = 0; 5551 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5552 %} 5553 ins_pipe( pipe_slow ); 5554 %} 5555 5556 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 5557 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5558 match(Set dst (AddVS src (LoadVector mem))); 5559 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5560 ins_encode %{ 5561 int vector_len = 0; 5562 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5563 %} 5564 ins_pipe( pipe_slow ); 5565 %} 5566 5567 instruct vadd4S(vecD dst, vecD src) %{ 5568 predicate(n->as_Vector()->length() == 4); 5569 match(Set dst (AddVS dst src)); 5570 format %{ "paddw $dst,$src\t! add packed4S" %} 5571 ins_encode %{ 5572 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5573 %} 5574 ins_pipe( pipe_slow ); 5575 %} 5576 5577 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5578 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5579 match(Set dst (AddVS src1 src2)); 5580 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5581 ins_encode %{ 5582 int vector_len = 0; 5583 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5584 %} 5585 ins_pipe( pipe_slow ); 5586 %} 5587 5588 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 5589 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5590 match(Set dst (AddVS src (LoadVector mem))); 5591 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5592 ins_encode %{ 5593 int vector_len = 0; 5594 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5595 %} 5596 ins_pipe( pipe_slow ); 5597 %} 5598 5599 instruct vadd8S(vecX dst, vecX src) %{ 5600 predicate(n->as_Vector()->length() == 8); 5601 match(Set dst (AddVS dst src)); 5602 format %{ "paddw $dst,$src\t! add packed8S" %} 5603 ins_encode %{ 5604 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5605 %} 5606 ins_pipe( pipe_slow ); 5607 %} 5608 5609 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5610 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5611 match(Set dst (AddVS src1 src2)); 5612 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5613 ins_encode %{ 5614 int vector_len = 0; 5615 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5616 %} 5617 ins_pipe( pipe_slow ); 5618 %} 5677 predicate(n->as_Vector()->length() == 2); 5678 match(Set dst (AddVI dst src)); 5679 format %{ "paddd $dst,$src\t! add packed2I" %} 5680 ins_encode %{ 5681 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5682 %} 5683 ins_pipe( pipe_slow ); 5684 %} 5685 5686 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5687 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5688 match(Set dst (AddVI src1 src2)); 5689 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5690 ins_encode %{ 5691 int vector_len = 0; 5692 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5693 %} 5694 ins_pipe( pipe_slow ); 5695 %} 5696 5697 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 5698 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5699 match(Set dst (AddVI src (LoadVector mem))); 5700 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 5701 ins_encode %{ 5702 int vector_len = 0; 5703 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5704 %} 5705 ins_pipe( pipe_slow ); 5706 %} 5707 5708 instruct vadd4I(vecX dst, vecX src) %{ 5709 predicate(n->as_Vector()->length() == 4); 5710 match(Set dst (AddVI dst src)); 5711 format %{ "paddd $dst,$src\t! add packed4I" %} 5712 ins_encode %{ 5713 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5714 %} 5715 ins_pipe( pipe_slow ); 5716 %} 5717 5718 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5719 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5720 match(Set dst (AddVI src1 src2)); 5721 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5722 ins_encode %{ 5723 int vector_len = 0; 5724 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5725 %} 5726 ins_pipe( pipe_slow ); 5727 %} 5863 predicate(n->as_Vector()->length() == 2); 5864 match(Set dst (AddVF dst src)); 5865 format %{ "addps $dst,$src\t! add packed2F" %} 5866 ins_encode %{ 5867 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5868 %} 5869 ins_pipe( pipe_slow ); 5870 %} 5871 5872 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 5873 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5874 match(Set dst (AddVF src1 src2)); 5875 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 5876 ins_encode %{ 5877 int vector_len = 0; 5878 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5879 %} 5880 ins_pipe( pipe_slow ); 5881 %} 5882 5883 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 5884 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5885 match(Set dst (AddVF src (LoadVector mem))); 5886 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 5887 ins_encode %{ 5888 int vector_len = 0; 5889 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5890 %} 5891 ins_pipe( pipe_slow ); 5892 %} 5893 5894 instruct vadd4F(vecX dst, vecX src) %{ 5895 predicate(n->as_Vector()->length() == 4); 5896 match(Set dst (AddVF dst src)); 5897 format %{ "addps $dst,$src\t! add packed4F" %} 5898 ins_encode %{ 5899 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5900 %} 5901 ins_pipe( pipe_slow ); 5902 %} 5903 5904 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 5905 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5906 match(Set dst (AddVF src1 src2)); 5907 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 5908 ins_encode %{ 5909 int vector_len = 0; 5910 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5911 %} 5912 ins_pipe( pipe_slow ); 5913 %} 6051 predicate(n->as_Vector()->length() == 4); 6052 match(Set dst (SubVB dst src)); 6053 format %{ "psubb $dst,$src\t! sub packed4B" %} 6054 ins_encode %{ 6055 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6056 %} 6057 ins_pipe( pipe_slow ); 6058 %} 6059 6060 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6061 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6062 match(Set dst (SubVB src1 src2)); 6063 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6064 ins_encode %{ 6065 int vector_len = 0; 6066 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6067 %} 6068 ins_pipe( pipe_slow ); 6069 %} 6070 6071 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6072 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6073 match(Set dst (SubVB src (LoadVector mem))); 6074 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6075 ins_encode %{ 6076 int vector_len = 0; 6077 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6078 %} 6079 ins_pipe( pipe_slow ); 6080 %} 6081 6082 instruct vsub8B(vecD dst, vecD src) %{ 6083 predicate(n->as_Vector()->length() == 8); 6084 match(Set dst (SubVB dst src)); 6085 format %{ "psubb $dst,$src\t! sub packed8B" %} 6086 ins_encode %{ 6087 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6088 %} 6089 ins_pipe( pipe_slow ); 6090 %} 6091 6092 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6093 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6094 match(Set dst (SubVB src1 src2)); 6095 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6096 ins_encode %{ 6097 int vector_len = 0; 6098 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6099 %} 6100 ins_pipe( pipe_slow ); 6101 %} 6102 6103 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6104 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6105 match(Set dst (SubVB src (LoadVector mem))); 6106 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6107 ins_encode %{ 6108 int vector_len = 0; 6109 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6110 %} 6111 ins_pipe( pipe_slow ); 6112 %} 6113 6114 instruct vsub16B(vecX dst, vecX src) %{ 6115 predicate(n->as_Vector()->length() == 16); 6116 match(Set dst (SubVB dst src)); 6117 format %{ "psubb $dst,$src\t! sub packed16B" %} 6118 ins_encode %{ 6119 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6120 %} 6121 ins_pipe( pipe_slow ); 6122 %} 6123 6124 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6125 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6126 match(Set dst (SubVB src1 src2)); 6127 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6128 ins_encode %{ 6129 int vector_len = 0; 6130 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6131 %} 6132 ins_pipe( pipe_slow ); 6133 %} 6192 predicate(n->as_Vector()->length() == 2); 6193 match(Set dst (SubVS dst src)); 6194 format %{ "psubw $dst,$src\t! sub packed2S" %} 6195 ins_encode %{ 6196 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6197 %} 6198 ins_pipe( pipe_slow ); 6199 %} 6200 6201 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6202 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6203 match(Set dst (SubVS src1 src2)); 6204 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6205 ins_encode %{ 6206 int vector_len = 0; 6207 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6208 %} 6209 ins_pipe( pipe_slow ); 6210 %} 6211 6212 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6213 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6214 match(Set dst (SubVS src (LoadVector mem))); 6215 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6216 ins_encode %{ 6217 int vector_len = 0; 6218 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6219 %} 6220 ins_pipe( pipe_slow ); 6221 %} 6222 6223 instruct vsub4S(vecD dst, vecD src) %{ 6224 predicate(n->as_Vector()->length() == 4); 6225 match(Set dst (SubVS dst src)); 6226 format %{ "psubw $dst,$src\t! sub packed4S" %} 6227 ins_encode %{ 6228 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6229 %} 6230 ins_pipe( pipe_slow ); 6231 %} 6232 6233 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6234 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6235 match(Set dst (SubVS src1 src2)); 6236 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6237 ins_encode %{ 6238 int vector_len = 0; 6239 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6240 %} 6241 ins_pipe( pipe_slow ); 6242 %} 6243 6244 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6245 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6246 match(Set dst (SubVS src (LoadVector mem))); 6247 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6248 ins_encode %{ 6249 int vector_len = 0; 6250 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6251 %} 6252 ins_pipe( pipe_slow ); 6253 %} 6254 6255 instruct vsub8S(vecX dst, vecX src) %{ 6256 predicate(n->as_Vector()->length() == 8); 6257 match(Set dst (SubVS dst src)); 6258 format %{ "psubw $dst,$src\t! sub packed8S" %} 6259 ins_encode %{ 6260 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6261 %} 6262 ins_pipe( pipe_slow ); 6263 %} 6264 6265 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6266 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6267 match(Set dst (SubVS src1 src2)); 6268 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6269 ins_encode %{ 6270 int vector_len = 0; 6271 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6272 %} 6273 ins_pipe( pipe_slow ); 6274 %} 6333 predicate(n->as_Vector()->length() == 2); 6334 match(Set dst (SubVI dst src)); 6335 format %{ "psubd $dst,$src\t! sub packed2I" %} 6336 ins_encode %{ 6337 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6338 %} 6339 ins_pipe( pipe_slow ); 6340 %} 6341 6342 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6343 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6344 match(Set dst (SubVI src1 src2)); 6345 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6346 ins_encode %{ 6347 int vector_len = 0; 6348 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6349 %} 6350 ins_pipe( pipe_slow ); 6351 %} 6352 6353 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6354 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6355 match(Set dst (SubVI src (LoadVector mem))); 6356 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6357 ins_encode %{ 6358 int vector_len = 0; 6359 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6360 %} 6361 ins_pipe( pipe_slow ); 6362 %} 6363 6364 instruct vsub4I(vecX dst, vecX src) %{ 6365 predicate(n->as_Vector()->length() == 4); 6366 match(Set dst (SubVI dst src)); 6367 format %{ "psubd $dst,$src\t! sub packed4I" %} 6368 ins_encode %{ 6369 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6370 %} 6371 ins_pipe( pipe_slow ); 6372 %} 6373 6374 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6375 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6376 match(Set dst (SubVI src1 src2)); 6377 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6378 ins_encode %{ 6379 int vector_len = 0; 6380 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6381 %} 6382 ins_pipe( pipe_slow ); 6383 %} 6519 predicate(n->as_Vector()->length() == 2); 6520 match(Set dst (SubVF dst src)); 6521 format %{ "subps $dst,$src\t! sub packed2F" %} 6522 ins_encode %{ 6523 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6524 %} 6525 ins_pipe( pipe_slow ); 6526 %} 6527 6528 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 6529 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6530 match(Set dst (SubVF src1 src2)); 6531 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 6532 ins_encode %{ 6533 int vector_len = 0; 6534 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6535 %} 6536 ins_pipe( pipe_slow ); 6537 %} 6538 6539 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 6540 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6541 match(Set dst (SubVF src (LoadVector mem))); 6542 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 6543 ins_encode %{ 6544 int vector_len = 0; 6545 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6546 %} 6547 ins_pipe( pipe_slow ); 6548 %} 6549 6550 instruct vsub4F(vecX dst, vecX src) %{ 6551 predicate(n->as_Vector()->length() == 4); 6552 match(Set dst (SubVF dst src)); 6553 format %{ "subps $dst,$src\t! sub packed4F" %} 6554 ins_encode %{ 6555 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6556 %} 6557 ins_pipe( pipe_slow ); 6558 %} 6559 6560 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 6561 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6562 match(Set dst (SubVF src1 src2)); 6563 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 6564 ins_encode %{ 6565 int vector_len = 0; 6566 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6567 %} 6568 ins_pipe( pipe_slow ); 6569 %} 6707 predicate(n->as_Vector()->length() == 2); 6708 match(Set dst (MulVS dst src)); 6709 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6710 ins_encode %{ 6711 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6712 %} 6713 ins_pipe( pipe_slow ); 6714 %} 6715 6716 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6717 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6718 match(Set dst (MulVS src1 src2)); 6719 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6720 ins_encode %{ 6721 int vector_len = 0; 6722 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6723 %} 6724 ins_pipe( pipe_slow ); 6725 %} 6726 6727 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 6728 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6729 match(Set dst (MulVS src (LoadVector mem))); 6730 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 6731 ins_encode %{ 6732 int vector_len = 0; 6733 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6734 %} 6735 ins_pipe( pipe_slow ); 6736 %} 6737 6738 instruct vmul4S(vecD dst, vecD src) %{ 6739 predicate(n->as_Vector()->length() == 4); 6740 match(Set dst (MulVS dst src)); 6741 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6742 ins_encode %{ 6743 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6744 %} 6745 ins_pipe( pipe_slow ); 6746 %} 6747 6748 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6749 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6750 match(Set dst (MulVS src1 src2)); 6751 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6752 ins_encode %{ 6753 int vector_len = 0; 6754 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6755 %} 6756 ins_pipe( pipe_slow ); 6757 %} 6758 6759 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 6760 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6761 match(Set dst (MulVS src (LoadVector mem))); 6762 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 6763 ins_encode %{ 6764 int vector_len = 0; 6765 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6766 %} 6767 ins_pipe( pipe_slow ); 6768 %} 6769 6770 instruct vmul8S(vecX dst, vecX src) %{ 6771 predicate(n->as_Vector()->length() == 8); 6772 match(Set dst (MulVS dst src)); 6773 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6774 ins_encode %{ 6775 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6776 %} 6777 ins_pipe( pipe_slow ); 6778 %} 6779 6780 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6781 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6782 match(Set dst (MulVS src1 src2)); 6783 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 6784 ins_encode %{ 6785 int vector_len = 0; 6786 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6787 %} 6788 ins_pipe( pipe_slow ); 6789 %} 6848 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 6849 match(Set dst (MulVI dst src)); 6850 format %{ "pmulld $dst,$src\t! mul packed2I" %} 6851 ins_encode %{ 6852 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6853 %} 6854 ins_pipe( pipe_slow ); 6855 %} 6856 6857 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 6858 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6859 match(Set dst (MulVI src1 src2)); 6860 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 6861 ins_encode %{ 6862 int vector_len = 0; 6863 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6864 %} 6865 ins_pipe( pipe_slow ); 6866 %} 6867 6868 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 6869 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6870 match(Set dst (MulVI src (LoadVector mem))); 6871 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 6872 ins_encode %{ 6873 int vector_len = 0; 6874 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6875 %} 6876 ins_pipe( pipe_slow ); 6877 %} 6878 6879 instruct vmul4I(vecX dst, vecX src) %{ 6880 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 6881 match(Set dst (MulVI dst src)); 6882 format %{ "pmulld $dst,$src\t! mul packed4I" %} 6883 ins_encode %{ 6884 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6885 %} 6886 ins_pipe( pipe_slow ); 6887 %} 6888 6889 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 6890 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6891 match(Set dst (MulVI src1 src2)); 6892 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 6893 ins_encode %{ 6894 int vector_len = 0; 6895 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6896 %} 6897 ins_pipe( pipe_slow ); 6898 %} 6899 6900 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 6901 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6902 match(Set dst (MulVI src (LoadVector mem))); 6903 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 6904 ins_encode %{ 6905 int vector_len = 0; 6906 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6907 %} 6908 ins_pipe( pipe_slow ); 6909 %} 6910 6911 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 6912 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6913 match(Set dst (MulVL src1 src2)); 6914 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 6915 ins_encode %{ 6916 int vector_len = 0; 6917 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6918 %} 6919 ins_pipe( pipe_slow ); 6920 %} 6921 6922 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 6923 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6924 match(Set dst (MulVL src (LoadVector mem))); 6925 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 6926 ins_encode %{ 6927 int vector_len = 0; 6928 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6929 %} 6930 ins_pipe( pipe_slow ); 6931 %} 6932 6933 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 6934 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6935 match(Set dst (MulVL src1 src2)); 6936 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 6937 ins_encode %{ 6938 int vector_len = 1; 6939 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6940 %} 6941 ins_pipe( pipe_slow ); 6942 %} 6943 6944 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 6945 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6946 match(Set dst (MulVL src (LoadVector mem))); 6947 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 6948 ins_encode %{ 6949 int vector_len = 1; 6950 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6951 %} 6952 ins_pipe( pipe_slow ); 6953 %} 6954 6955 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6956 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6957 match(Set dst (MulVL src1 src2)); 6958 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 6959 ins_encode %{ 6960 int vector_len = 2; 6961 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6962 %} 6963 ins_pipe( pipe_slow ); 6964 %} 6965 6966 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 6967 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6968 match(Set dst (MulVL src (LoadVector mem))); 6969 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 6970 ins_encode %{ 6971 int vector_len = 2; 6972 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 6978 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6979 match(Set dst (MulVI src1 src2)); 6980 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 6981 ins_encode %{ 6982 int vector_len = 1; 6983 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6984 %} 6985 ins_pipe( pipe_slow ); 6986 %} 6987 6988 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 6989 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6990 match(Set dst (MulVI src (LoadVector mem))); 6991 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 6992 ins_encode %{ 6993 int vector_len = 1; 6994 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6995 %} 6996 ins_pipe( pipe_slow ); 6997 %} 6998 6999 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7000 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7001 match(Set dst (MulVI src1 src2)); 7002 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7003 ins_encode %{ 7004 int vector_len = 2; 7005 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7006 %} 7007 ins_pipe( pipe_slow ); 7008 %} 7009 7010 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7011 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7012 match(Set dst (MulVI src (LoadVector mem))); 7013 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7014 ins_encode %{ 7015 int vector_len = 2; 7016 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7017 %} 7018 ins_pipe( pipe_slow ); 7019 %} 7020 7021 // Floats vector mul 7022 instruct vmul2F(vecD dst, vecD src) %{ 7023 predicate(n->as_Vector()->length() == 2); 7024 match(Set dst (MulVF dst src)); 7025 format %{ "mulps $dst,$src\t! mul packed2F" %} 7026 ins_encode %{ 7027 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7028 %} 7029 ins_pipe( pipe_slow ); 7030 %} 7031 7032 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7033 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7034 match(Set dst (MulVF src1 src2)); 7035 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7036 ins_encode %{ 7037 int vector_len = 0; 7038 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7039 %} 7040 ins_pipe( pipe_slow ); 7041 %} 7042 7043 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7044 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7045 match(Set dst (MulVF src (LoadVector mem))); 7046 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7047 ins_encode %{ 7048 int vector_len = 0; 7049 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7050 %} 7051 ins_pipe( pipe_slow ); 7052 %} 7053 7054 instruct vmul4F(vecX dst, vecX src) %{ 7055 predicate(n->as_Vector()->length() == 4); 7056 match(Set dst (MulVF dst src)); 7057 format %{ "mulps $dst,$src\t! mul packed4F" %} 7058 ins_encode %{ 7059 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7060 %} 7061 ins_pipe( pipe_slow ); 7062 %} 7063 7064 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7065 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7066 match(Set dst (MulVF src1 src2)); 7067 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7068 ins_encode %{ 7069 int vector_len = 0; 7070 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7071 %} 7072 ins_pipe( pipe_slow ); 7073 %} 7211 predicate(n->as_Vector()->length() == 2); 7212 match(Set dst (DivVF dst src)); 7213 format %{ "divps $dst,$src\t! div packed2F" %} 7214 ins_encode %{ 7215 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7216 %} 7217 ins_pipe( pipe_slow ); 7218 %} 7219 7220 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7221 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7222 match(Set dst (DivVF src1 src2)); 7223 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7224 ins_encode %{ 7225 int vector_len = 0; 7226 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7227 %} 7228 ins_pipe( pipe_slow ); 7229 %} 7230 7231 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7232 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7233 match(Set dst (DivVF src (LoadVector mem))); 7234 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7235 ins_encode %{ 7236 int vector_len = 0; 7237 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7238 %} 7239 ins_pipe( pipe_slow ); 7240 %} 7241 7242 instruct vdiv4F(vecX dst, vecX src) %{ 7243 predicate(n->as_Vector()->length() == 4); 7244 match(Set dst (DivVF dst src)); 7245 format %{ "divps $dst,$src\t! div packed4F" %} 7246 ins_encode %{ 7247 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7248 %} 7249 ins_pipe( pipe_slow ); 7250 %} 7251 7252 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7253 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7254 match(Set dst (DivVF src1 src2)); 7255 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7256 ins_encode %{ 7257 int vector_len = 0; 7258 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7259 %} 7260 ins_pipe( pipe_slow ); 7261 %} 8499 predicate(n->as_Vector()->length_in_bytes() == 4); 8500 match(Set dst (AndV dst src)); 8501 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 8502 ins_encode %{ 8503 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8504 %} 8505 ins_pipe( pipe_slow ); 8506 %} 8507 8508 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 8509 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8510 match(Set dst (AndV src1 src2)); 8511 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 8512 ins_encode %{ 8513 int vector_len = 0; 8514 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8515 %} 8516 ins_pipe( pipe_slow ); 8517 %} 8518 8519 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 8520 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8521 match(Set dst (AndV src (LoadVector mem))); 8522 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 8523 ins_encode %{ 8524 int vector_len = 0; 8525 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8526 %} 8527 ins_pipe( pipe_slow ); 8528 %} 8529 8530 instruct vand8B(vecD dst, vecD src) %{ 8531 predicate(n->as_Vector()->length_in_bytes() == 8); 8532 match(Set dst (AndV dst src)); 8533 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 8534 ins_encode %{ 8535 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8536 %} 8537 ins_pipe( pipe_slow ); 8538 %} 8539 8540 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 8541 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8542 match(Set dst (AndV src1 src2)); 8543 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 8544 ins_encode %{ 8545 int vector_len = 0; 8546 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8547 %} 8548 ins_pipe( pipe_slow ); 8549 %} 8550 8551 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 8552 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8553 match(Set dst (AndV src (LoadVector mem))); 8554 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 8555 ins_encode %{ 8556 int vector_len = 0; 8557 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8558 %} 8559 ins_pipe( pipe_slow ); 8560 %} 8561 8562 instruct vand16B(vecX dst, vecX src) %{ 8563 predicate(n->as_Vector()->length_in_bytes() == 16); 8564 match(Set dst (AndV dst src)); 8565 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 8566 ins_encode %{ 8567 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8568 %} 8569 ins_pipe( pipe_slow ); 8570 %} 8571 8572 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 8573 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8574 match(Set dst (AndV src1 src2)); 8575 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 8576 ins_encode %{ 8577 int vector_len = 0; 8578 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8579 %} 8580 ins_pipe( pipe_slow ); 8581 %} 8641 predicate(n->as_Vector()->length_in_bytes() == 4); 8642 match(Set dst (OrV dst src)); 8643 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 8644 ins_encode %{ 8645 __ por($dst$$XMMRegister, $src$$XMMRegister); 8646 %} 8647 ins_pipe( pipe_slow ); 8648 %} 8649 8650 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8651 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8652 match(Set dst (OrV src1 src2)); 8653 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 8654 ins_encode %{ 8655 int vector_len = 0; 8656 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8657 %} 8658 ins_pipe( pipe_slow ); 8659 %} 8660 8661 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 8662 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8663 match(Set dst (OrV src (LoadVector mem))); 8664 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 8665 ins_encode %{ 8666 int vector_len = 0; 8667 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8668 %} 8669 ins_pipe( pipe_slow ); 8670 %} 8671 8672 instruct vor8B(vecD dst, vecD src) %{ 8673 predicate(n->as_Vector()->length_in_bytes() == 8); 8674 match(Set dst (OrV dst src)); 8675 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8676 ins_encode %{ 8677 __ por($dst$$XMMRegister, $src$$XMMRegister); 8678 %} 8679 ins_pipe( pipe_slow ); 8680 %} 8681 8682 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8683 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8684 match(Set dst (OrV src1 src2)); 8685 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8686 ins_encode %{ 8687 int vector_len = 0; 8688 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8689 %} 8690 ins_pipe( pipe_slow ); 8691 %} 8692 8693 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 8694 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8695 match(Set dst (OrV src (LoadVector mem))); 8696 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 8697 ins_encode %{ 8698 int vector_len = 0; 8699 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8700 %} 8701 ins_pipe( pipe_slow ); 8702 %} 8703 8704 instruct vor16B(vecX dst, vecX src) %{ 8705 predicate(n->as_Vector()->length_in_bytes() == 16); 8706 match(Set dst (OrV dst src)); 8707 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 8708 ins_encode %{ 8709 __ por($dst$$XMMRegister, $src$$XMMRegister); 8710 %} 8711 ins_pipe( pipe_slow ); 8712 %} 8713 8714 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8715 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8716 match(Set dst (OrV src1 src2)); 8717 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 8718 ins_encode %{ 8719 int vector_len = 0; 8720 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8721 %} 8722 ins_pipe( pipe_slow ); 8723 %} 8783 predicate(n->as_Vector()->length_in_bytes() == 4); 8784 match(Set dst (XorV dst src)); 8785 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 8786 ins_encode %{ 8787 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8788 %} 8789 ins_pipe( pipe_slow ); 8790 %} 8791 8792 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8793 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8794 match(Set dst (XorV src1 src2)); 8795 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 8796 ins_encode %{ 8797 int vector_len = 0; 8798 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8799 %} 8800 ins_pipe( pipe_slow ); 8801 %} 8802 8803 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 8804 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8805 match(Set dst (XorV src (LoadVector mem))); 8806 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 8807 ins_encode %{ 8808 int vector_len = 0; 8809 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8810 %} 8811 ins_pipe( pipe_slow ); 8812 %} 8813 8814 instruct vxor8B(vecD dst, vecD src) %{ 8815 predicate(n->as_Vector()->length_in_bytes() == 8); 8816 match(Set dst (XorV dst src)); 8817 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 8818 ins_encode %{ 8819 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8820 %} 8821 ins_pipe( pipe_slow ); 8822 %} 8823 8824 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8825 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8826 match(Set dst (XorV src1 src2)); 8827 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 8828 ins_encode %{ 8829 int vector_len = 0; 8830 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8831 %} 8832 ins_pipe( pipe_slow ); 8833 %} 8834 8835 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 8836 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8837 match(Set dst (XorV src (LoadVector mem))); 8838 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 8839 ins_encode %{ 8840 int vector_len = 0; 8841 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8842 %} 8843 ins_pipe( pipe_slow ); 8844 %} 8845 8846 instruct vxor16B(vecX dst, vecX src) %{ 8847 predicate(n->as_Vector()->length_in_bytes() == 16); 8848 match(Set dst (XorV dst src)); 8849 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 8850 ins_encode %{ 8851 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8852 %} 8853 ins_pipe( pipe_slow ); 8854 %} 8855 8856 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8857 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8858 match(Set dst (XorV src1 src2)); 8859 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 8860 ins_encode %{ 8861 int vector_len = 0; |