2877 ins_cost(145); 2878 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2879 ins_encode %{ 2880 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2881 %} 2882 ins_pipe( pipe_slow ); 2883 %} 2884 2885 instruct storeV64(memory mem, vecZ src) %{ 2886 predicate(n->as_StoreVector()->memory_size() == 64); 2887 match(Set mem (StoreVector mem src)); 2888 ins_cost(145); 2889 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 2890 ins_encode %{ 2891 int vector_len = 2; 2892 __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); 2893 %} 2894 ins_pipe( pipe_slow ); 2895 %} 2896 2897 // Replicate byte scalar to be vector 2898 instruct Repl4B(vecS dst, rRegI src) %{ 2899 predicate(n->as_Vector()->length() == 4); 2900 match(Set dst (ReplicateB src)); 2901 format %{ "movd $dst,$src\n\t" 2902 "punpcklbw $dst,$dst\n\t" 2903 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2904 ins_encode %{ 2905 __ movdl($dst$$XMMRegister, $src$$Register); 2906 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2907 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2908 %} 2909 ins_pipe( pipe_slow ); 2910 %} 2911 2912 instruct Repl8B(vecD dst, rRegI src) %{ 2913 predicate(n->as_Vector()->length() == 8); 2914 match(Set dst (ReplicateB src)); 2915 format %{ "movd $dst,$src\n\t" 2916 "punpcklbw $dst,$dst\n\t" 2917 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2918 ins_encode %{ 2919 __ movdl($dst$$XMMRegister, $src$$Register); 2920 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2921 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2922 %} 2923 ins_pipe( pipe_slow ); 2924 %} 2925 2926 instruct Repl16B(vecX dst, rRegI src) %{ 2927 predicate(n->as_Vector()->length() == 16); 2928 match(Set dst (ReplicateB src)); 2929 format %{ "movd $dst,$src\n\t" 2930 "punpcklbw $dst,$dst\n\t" 2931 "pshuflw $dst,$dst,0x00\n\t" 2932 "punpcklqdq $dst,$dst\t! replicate16B" %} 2933 ins_encode %{ 2934 __ movdl($dst$$XMMRegister, $src$$Register); 2935 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2936 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2937 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2938 %} 2939 ins_pipe( pipe_slow ); 2940 %} 2941 2942 instruct Repl32B(vecY dst, rRegI src) %{ 2943 predicate(n->as_Vector()->length() == 32); 2944 match(Set dst (ReplicateB src)); 2945 format %{ "movd $dst,$src\n\t" 2946 "punpcklbw $dst,$dst\n\t" 2947 "pshuflw $dst,$dst,0x00\n\t" 2948 "punpcklqdq $dst,$dst\n\t" 2949 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2950 ins_encode %{ 2951 __ movdl($dst$$XMMRegister, $src$$Register); 2952 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2953 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2954 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2955 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2956 %} 2957 ins_pipe( pipe_slow ); 2958 %} 2959 2960 instruct Repl64B(vecZ dst, rRegI src) %{ 2961 predicate(n->as_Vector()->length() == 64); 2962 match(Set dst (ReplicateB src)); 2963 format %{ "movd $dst,$src\n\t" 2964 "punpcklbw $dst,$dst\n\t" 2965 "pshuflw $dst,$dst,0x00\n\t" 2966 "punpcklqdq $dst,$dst\n\t" 2967 "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t" 2968 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %} 2969 ins_encode %{ 2970 __ movdl($dst$$XMMRegister, $src$$Register); 2971 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2972 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2973 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2974 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2975 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2976 %} 2977 ins_pipe( pipe_slow ); 2978 %} 2979 2980 // Replicate byte scalar immediate to be vector by loading from const table. 2981 instruct Repl4B_imm(vecS dst, immI con) %{ 2982 predicate(n->as_Vector()->length() == 4); 2983 match(Set dst (ReplicateB con)); 2984 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 2985 ins_encode %{ 2986 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 2987 %} 2988 ins_pipe( pipe_slow ); 2989 %} 2990 2991 instruct Repl8B_imm(vecD dst, immI con) %{ 2992 predicate(n->as_Vector()->length() == 8); 2993 match(Set dst (ReplicateB con)); 2994 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 2995 ins_encode %{ 2996 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2997 %} 2998 ins_pipe( pipe_slow ); 2999 %} 3000 3001 instruct Repl16B_imm(vecX dst, immI con) %{ 3002 predicate(n->as_Vector()->length() == 16); 3003 match(Set dst (ReplicateB con)); 3004 format %{ "movq $dst,[$constantaddress]\n\t" 3005 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3006 ins_encode %{ 3007 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3008 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3009 %} 3010 ins_pipe( pipe_slow ); 3011 %} 3012 3013 instruct Repl32B_imm(vecY dst, immI con) %{ 3014 predicate(n->as_Vector()->length() == 32); 3015 match(Set dst (ReplicateB con)); 3016 format %{ "movq $dst,[$constantaddress]\n\t" 3017 "punpcklqdq $dst,$dst\n\t" 3018 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3019 ins_encode %{ 3020 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3021 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3022 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3023 %} 3024 ins_pipe( pipe_slow ); 3025 %} 3026 3027 instruct Repl64B_imm(vecZ dst, immI con) %{ 3028 predicate(n->as_Vector()->length() == 64); 3029 match(Set dst (ReplicateB con)); 3030 format %{ "movq $dst,[$constantaddress]\n\t" 3031 "punpcklqdq $dst,$dst\n\t" 3032 "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t" 3033 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %} 3034 ins_encode %{ 3035 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3036 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3037 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3038 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3039 %} 3040 ins_pipe( pipe_slow ); 3041 %} 3042 3043 // Replicate byte scalar zero to be vector 3044 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3045 predicate(n->as_Vector()->length() == 4); 3046 match(Set dst (ReplicateB zero)); 3047 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3048 ins_encode %{ 3049 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3050 %} 3051 ins_pipe( fpu_reg_reg ); 3052 %} 3053 3054 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3055 predicate(n->as_Vector()->length() == 8); 3056 match(Set dst (ReplicateB zero)); 3057 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3058 ins_encode %{ 3059 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3060 %} 3061 ins_pipe( fpu_reg_reg ); 3062 %} 3063 3064 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3065 predicate(n->as_Vector()->length() == 16); 3066 match(Set dst (ReplicateB zero)); 3067 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3068 ins_encode %{ 3069 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3070 %} 3071 ins_pipe( fpu_reg_reg ); 3072 %} 3073 3074 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3075 predicate(n->as_Vector()->length() == 32); 3076 match(Set dst (ReplicateB zero)); 3077 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3078 ins_encode %{ 3079 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3080 int vector_len = 1; 3081 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3082 %} 3083 ins_pipe( fpu_reg_reg ); 3084 %} 3085 3086 instruct Repl64B_zero(vecZ dst, immI0 zero) %{ 3087 predicate(n->as_Vector()->length() == 64); 3088 match(Set dst (ReplicateB zero)); 3089 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3090 ins_encode %{ 3091 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3092 int vector_len = 2; 3093 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3094 %} 3095 ins_pipe( fpu_reg_reg ); 3096 %} 3097 3098 // Replicate char/short (2 byte) scalar to be vector 3099 instruct Repl2S(vecS dst, rRegI src) %{ 3100 predicate(n->as_Vector()->length() == 2); 3101 match(Set dst (ReplicateS src)); 3102 format %{ "movd $dst,$src\n\t" 3103 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3104 ins_encode %{ 3105 __ movdl($dst$$XMMRegister, $src$$Register); 3106 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3107 %} 3108 ins_pipe( fpu_reg_reg ); 3109 %} 3110 3111 instruct Repl4S(vecD dst, rRegI src) %{ 3112 predicate(n->as_Vector()->length() == 4); 3113 match(Set dst (ReplicateS src)); 3114 format %{ "movd $dst,$src\n\t" 3115 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3116 ins_encode %{ 3117 __ movdl($dst$$XMMRegister, $src$$Register); 3118 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3119 %} 3120 ins_pipe( fpu_reg_reg ); 3121 %} 3122 3123 instruct Repl8S(vecX dst, rRegI src) %{ 3124 predicate(n->as_Vector()->length() == 8); 3125 match(Set dst (ReplicateS src)); 3126 format %{ "movd $dst,$src\n\t" 3127 "pshuflw $dst,$dst,0x00\n\t" 3128 "punpcklqdq $dst,$dst\t! replicate8S" %} 3129 ins_encode %{ 3130 __ movdl($dst$$XMMRegister, $src$$Register); 3131 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3132 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3133 %} 3134 ins_pipe( pipe_slow ); 3135 %} 3136 3137 instruct Repl16S(vecY dst, rRegI src) %{ 3138 predicate(n->as_Vector()->length() == 16); 3139 match(Set dst (ReplicateS src)); 3140 format %{ "movd $dst,$src\n\t" 3141 "pshuflw $dst,$dst,0x00\n\t" 3142 "punpcklqdq $dst,$dst\n\t" 3143 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3144 ins_encode %{ 3145 __ movdl($dst$$XMMRegister, $src$$Register); 3146 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3147 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3148 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3149 %} 3150 ins_pipe( pipe_slow ); 3151 %} 3152 3153 instruct Repl32S(vecZ dst, rRegI src) %{ 3154 predicate(n->as_Vector()->length() == 32); 3155 match(Set dst (ReplicateS src)); 3156 format %{ "movd $dst,$src\n\t" 3157 "pshuflw $dst,$dst,0x00\n\t" 3158 "punpcklqdq $dst,$dst\n\t" 3159 "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t" 3160 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %} 3161 ins_encode %{ 3162 __ movdl($dst$$XMMRegister, $src$$Register); 3163 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3164 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3165 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3166 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3167 %} 3168 ins_pipe( pipe_slow ); 3169 %} 3170 3171 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3172 instruct Repl2S_imm(vecS dst, immI con) %{ 3173 predicate(n->as_Vector()->length() == 2); 3174 match(Set dst (ReplicateS con)); 3175 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3176 ins_encode %{ 3177 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3178 %} 3179 ins_pipe( fpu_reg_reg ); 3180 %} 3181 3182 instruct Repl4S_imm(vecD dst, immI con) %{ 3183 predicate(n->as_Vector()->length() == 4); 3184 match(Set dst (ReplicateS con)); 3185 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3186 ins_encode %{ 3187 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3188 %} 3189 ins_pipe( fpu_reg_reg ); 3190 %} 3191 3192 instruct Repl8S_imm(vecX dst, immI con) %{ 3193 predicate(n->as_Vector()->length() == 8); 3194 match(Set dst (ReplicateS con)); 3195 format %{ "movq $dst,[$constantaddress]\n\t" 3196 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3197 ins_encode %{ 3198 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3199 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3200 %} 3201 ins_pipe( pipe_slow ); 3202 %} 3203 3204 instruct Repl16S_imm(vecY dst, immI con) %{ 3205 predicate(n->as_Vector()->length() == 16); 3206 match(Set dst (ReplicateS con)); 3207 format %{ "movq $dst,[$constantaddress]\n\t" 3208 "punpcklqdq $dst,$dst\n\t" 3209 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3210 ins_encode %{ 3211 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3212 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3213 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3214 %} 3215 ins_pipe( pipe_slow ); 3216 %} 3217 3218 instruct Repl32S_imm(vecZ dst, immI con) %{ 3219 predicate(n->as_Vector()->length() == 32); 3220 match(Set dst (ReplicateS con)); 3221 format %{ "movq $dst,[$constantaddress]\n\t" 3222 "punpcklqdq $dst,$dst\n\t" 3223 "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t" 3224 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %} 3225 ins_encode %{ 3226 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3227 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3228 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3229 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3230 %} 3231 ins_pipe( pipe_slow ); 3232 %} 3233 3234 // Replicate char/short (2 byte) scalar zero to be vector 3235 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3236 predicate(n->as_Vector()->length() == 2); 3237 match(Set dst (ReplicateS zero)); 3238 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3239 ins_encode %{ 3240 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3241 %} 3242 ins_pipe( fpu_reg_reg ); 3243 %} 3244 3245 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3246 predicate(n->as_Vector()->length() == 4); 3247 match(Set dst (ReplicateS zero)); 3248 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3249 ins_encode %{ 3250 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3251 %} 3252 ins_pipe( fpu_reg_reg ); 3253 %} 3254 3255 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3256 predicate(n->as_Vector()->length() == 8); 3257 match(Set dst (ReplicateS zero)); 3258 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3259 ins_encode %{ 3260 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3261 %} 3262 ins_pipe( fpu_reg_reg ); 3263 %} 3264 3265 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3266 predicate(n->as_Vector()->length() == 16); 3267 match(Set dst (ReplicateS zero)); 3268 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3269 ins_encode %{ 3270 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3271 int vector_len = 1; 3272 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3273 %} 3274 ins_pipe( fpu_reg_reg ); 3275 %} 3276 3277 instruct Repl32S_zero(vecZ dst, immI0 zero) %{ 3278 predicate(n->as_Vector()->length() == 32); 3279 match(Set dst (ReplicateS zero)); 3280 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3281 ins_encode %{ 3282 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3283 int vector_len = 2; 3284 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3285 %} 3286 ins_pipe( fpu_reg_reg ); 3287 %} 3288 3289 // Replicate integer (4 byte) scalar to be vector 3290 instruct Repl2I(vecD dst, rRegI src) %{ 3291 predicate(n->as_Vector()->length() == 2); 3292 match(Set dst (ReplicateI src)); 3293 format %{ "movd $dst,$src\n\t" 3294 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3295 ins_encode %{ 3296 __ movdl($dst$$XMMRegister, $src$$Register); 3297 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3298 %} 3299 ins_pipe( fpu_reg_reg ); 3300 %} 3301 3302 instruct Repl4I(vecX dst, rRegI src) %{ 3303 predicate(n->as_Vector()->length() == 4); 3304 match(Set dst (ReplicateI src)); 3305 format %{ "movd $dst,$src\n\t" 3306 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3307 ins_encode %{ 3308 __ movdl($dst$$XMMRegister, $src$$Register); 3309 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3310 %} 3311 ins_pipe( pipe_slow ); 3312 %} 3313 3314 instruct Repl8I(vecY dst, rRegI src) %{ 3315 predicate(n->as_Vector()->length() == 8); 3316 match(Set dst (ReplicateI src)); 3317 format %{ "movd $dst,$src\n\t" 3318 "pshufd $dst,$dst,0x00\n\t" 3319 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3320 ins_encode %{ 3321 __ movdl($dst$$XMMRegister, $src$$Register); 3322 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3323 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3324 %} 3325 ins_pipe( pipe_slow ); 3326 %} 3327 3328 instruct Repl16I(vecZ dst, rRegI src) %{ 3329 predicate(n->as_Vector()->length() == 16); 3330 match(Set dst (ReplicateI src)); 3331 format %{ "movd $dst,$src\n\t" 3332 "pshufd $dst,$dst,0x00\n\t" 3333 "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" 3334 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} 3335 ins_encode %{ 3336 __ movdl($dst$$XMMRegister, $src$$Register); 3337 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3338 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3339 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3340 %} 3341 ins_pipe( pipe_slow ); 3342 %} 3343 3344 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3345 instruct Repl2I_imm(vecD dst, immI con) %{ 3346 predicate(n->as_Vector()->length() == 2); 3347 match(Set dst (ReplicateI con)); 3348 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3349 ins_encode %{ 3350 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3351 %} 3352 ins_pipe( fpu_reg_reg ); 3353 %} 3354 3355 instruct Repl4I_imm(vecX dst, immI con) %{ 3356 predicate(n->as_Vector()->length() == 4); 3357 match(Set dst (ReplicateI con)); 3358 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3359 "punpcklqdq $dst,$dst" %} 3360 ins_encode %{ 3361 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3362 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3363 %} 3364 ins_pipe( pipe_slow ); 3365 %} 3366 3367 instruct Repl8I_imm(vecY dst, immI con) %{ 3368 predicate(n->as_Vector()->length() == 8); 3369 match(Set dst (ReplicateI con)); 3370 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3371 "punpcklqdq $dst,$dst\n\t" 3372 "vinserti128h $dst,$dst,$dst" %} 3373 ins_encode %{ 3374 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3375 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3376 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3377 %} 3378 ins_pipe( pipe_slow ); 3379 %} 3380 3381 instruct Repl16I_imm(vecZ dst, immI con) %{ 3382 predicate(n->as_Vector()->length() == 16); 3383 match(Set dst (ReplicateI con)); 3384 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 3385 "punpcklqdq $dst,$dst\n\t" 3386 "vinserti128h $dst,$dst,$dst\n\t" 3387 "vinserti64x4h $dst k0,$dst,$dst" %} 3388 ins_encode %{ 3389 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3390 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3391 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3392 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3393 %} 3394 ins_pipe( pipe_slow ); 3395 %} 3396 3397 // Integer could be loaded into xmm register directly from memory. 3398 instruct Repl2I_mem(vecD dst, memory mem) %{ 3399 predicate(n->as_Vector()->length() == 2); 3400 match(Set dst (ReplicateI (LoadI mem))); 3401 format %{ "movd $dst,$mem\n\t" 3402 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3403 ins_encode %{ 3404 __ movdl($dst$$XMMRegister, $mem$$Address); 3405 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3406 %} 3407 ins_pipe( fpu_reg_reg ); 3408 %} 3409 3410 instruct Repl4I_mem(vecX dst, memory mem) %{ 3411 predicate(n->as_Vector()->length() == 4); 3412 match(Set dst (ReplicateI (LoadI mem))); 3413 format %{ "movd $dst,$mem\n\t" 3414 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3415 ins_encode %{ 3416 __ movdl($dst$$XMMRegister, $mem$$Address); 3417 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3418 %} 3419 ins_pipe( pipe_slow ); 3420 %} 3421 3422 instruct Repl8I_mem(vecY dst, memory mem) %{ 3423 predicate(n->as_Vector()->length() == 8); 3424 match(Set dst (ReplicateI (LoadI mem))); 3425 format %{ "movd $dst,$mem\n\t" 3426 "pshufd $dst,$dst,0x00\n\t" 3427 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3428 ins_encode %{ 3429 __ movdl($dst$$XMMRegister, $mem$$Address); 3430 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3431 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3432 %} 3433 ins_pipe( pipe_slow ); 3434 %} 3435 3436 instruct Repl16I_mem(vecZ dst, memory mem) %{ 3437 predicate(n->as_Vector()->length() == 16); 3438 match(Set dst (ReplicateI (LoadI mem))); 3439 format %{ "movd $dst,$mem\n\t" 3440 "pshufd $dst,$dst,0x00\n\t" 3441 "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" 3442 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} 3443 ins_encode %{ 3444 __ movdl($dst$$XMMRegister, $mem$$Address); 3445 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3446 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3447 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3448 %} 3449 ins_pipe( pipe_slow ); 3450 %} 3451 3452 // Replicate integer (4 byte) scalar zero to be vector 3453 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3454 predicate(n->as_Vector()->length() == 2); 3455 match(Set dst (ReplicateI zero)); 3456 format %{ "pxor $dst,$dst\t! replicate2I" %} 3457 ins_encode %{ 3458 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3459 %} 3460 ins_pipe( fpu_reg_reg ); 3461 %} 3462 3463 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3464 predicate(n->as_Vector()->length() == 4); 3465 match(Set dst (ReplicateI zero)); 3466 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3467 ins_encode %{ 3468 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3469 %} 3470 ins_pipe( fpu_reg_reg ); 3471 %} 3472 3473 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3474 predicate(n->as_Vector()->length() == 8); 3475 match(Set dst (ReplicateI zero)); 3476 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3477 ins_encode %{ 3478 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3479 int vector_len = 1; 3480 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3481 %} 3482 ins_pipe( fpu_reg_reg ); 3483 %} 3484 3485 instruct Repl16I_zero(vecZ dst, immI0 zero) %{ 3486 predicate(n->as_Vector()->length() == 16); 3487 match(Set dst (ReplicateI zero)); 3488 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 3489 ins_encode %{ 3490 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 3491 int vector_len = 2; 3492 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3493 %} 3494 ins_pipe( fpu_reg_reg ); 3495 %} 3496 3497 // Replicate long (8 byte) scalar to be vector 3498 #ifdef _LP64 3499 instruct Repl2L(vecX dst, rRegL src) %{ 3500 predicate(n->as_Vector()->length() == 2); 3501 match(Set dst (ReplicateL src)); 3502 format %{ "movdq $dst,$src\n\t" 3503 "punpcklqdq $dst,$dst\t! replicate2L" %} 3504 ins_encode %{ 3505 __ movdq($dst$$XMMRegister, $src$$Register); 3506 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3507 %} 3508 ins_pipe( pipe_slow ); 3509 %} 3510 3511 instruct Repl4L(vecY dst, rRegL src) %{ 3512 predicate(n->as_Vector()->length() == 4); 3513 match(Set dst (ReplicateL src)); 3514 format %{ "movdq $dst,$src\n\t" 3515 "punpcklqdq $dst,$dst\n\t" 3516 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3517 ins_encode %{ 3518 __ movdq($dst$$XMMRegister, $src$$Register); 3519 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3520 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3521 %} 3522 ins_pipe( pipe_slow ); 3523 %} 3524 3525 instruct Repl8L(vecZ dst, rRegL src) %{ 3526 predicate(n->as_Vector()->length() == 8); 3527 match(Set dst (ReplicateL src)); 3528 format %{ "movdq $dst,$src\n\t" 3529 "punpcklqdq $dst,$dst\n\t" 3530 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3531 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3532 ins_encode %{ 3533 __ movdq($dst$$XMMRegister, $src$$Register); 3534 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3535 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3536 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3537 %} 3538 ins_pipe( pipe_slow ); 3539 %} 3540 #else // _LP64 3541 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3542 predicate(n->as_Vector()->length() == 2); 3543 match(Set dst (ReplicateL src)); 3544 effect(TEMP dst, USE src, TEMP tmp); 3545 format %{ "movdl $dst,$src.lo\n\t" 3546 "movdl $tmp,$src.hi\n\t" 3547 "punpckldq $dst,$tmp\n\t" 3548 "punpcklqdq $dst,$dst\t! replicate2L"%} 3549 ins_encode %{ 3550 __ movdl($dst$$XMMRegister, $src$$Register); 3551 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3552 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3553 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3554 %} 3555 ins_pipe( pipe_slow ); 3556 %} 3557 3558 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3559 predicate(n->as_Vector()->length() == 4); 3560 match(Set dst (ReplicateL src)); 3561 effect(TEMP dst, USE src, TEMP tmp); 3562 format %{ "movdl $dst,$src.lo\n\t" 3563 "movdl $tmp,$src.hi\n\t" 3564 "punpckldq $dst,$tmp\n\t" 3565 "punpcklqdq $dst,$dst\n\t" 3566 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3567 ins_encode %{ 3568 __ movdl($dst$$XMMRegister, $src$$Register); 3569 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3570 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3571 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3572 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3573 %} 3574 ins_pipe( pipe_slow ); 3575 %} 3576 3577 instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{ 3578 predicate(n->as_Vector()->length() == 4); 3579 match(Set dst (ReplicateL src)); 3580 effect(TEMP dst, USE src, TEMP tmp); 3581 format %{ "movdl $dst,$src.lo\n\t" 3582 "movdl $tmp,$src.hi\n\t" 3583 "punpckldq $dst,$tmp\n\t" 3584 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3585 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3586 ins_encode %{ 3587 __ movdl($dst$$XMMRegister, $src$$Register); 3588 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3589 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3590 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3591 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3592 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3593 %} 3594 ins_pipe( pipe_slow ); 3595 %} 3596 #endif // _LP64 3597 3598 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3599 instruct Repl2L_imm(vecX dst, immL con) %{ 3600 predicate(n->as_Vector()->length() == 2); 3601 match(Set dst (ReplicateL con)); 3602 format %{ "movq $dst,[$constantaddress]\n\t" 3603 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3604 ins_encode %{ 3605 __ movq($dst$$XMMRegister, $constantaddress($con)); 3606 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3607 %} 3608 ins_pipe( pipe_slow ); 3609 %} 3610 3611 instruct Repl4L_imm(vecY dst, immL con) %{ 3612 predicate(n->as_Vector()->length() == 4); 3613 match(Set dst (ReplicateL con)); 3614 format %{ "movq $dst,[$constantaddress]\n\t" 3615 "punpcklqdq $dst,$dst\n\t" 3616 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3617 ins_encode %{ 3618 __ movq($dst$$XMMRegister, $constantaddress($con)); 3619 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3620 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3621 %} 3622 ins_pipe( pipe_slow ); 3623 %} 3624 3625 instruct Repl8L_imm(vecZ dst, immL con) %{ 3626 predicate(n->as_Vector()->length() == 8); 3627 match(Set dst (ReplicateL con)); 3628 format %{ "movq $dst,[$constantaddress]\n\t" 3629 "punpcklqdq $dst,$dst\n\t" 3630 "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t" 3631 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %} 3632 ins_encode %{ 3633 __ movq($dst$$XMMRegister, $constantaddress($con)); 3634 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3635 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3636 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3637 %} 3638 ins_pipe( pipe_slow ); 3639 %} 3640 3641 // Long could be loaded into xmm register directly from memory. 3642 instruct Repl2L_mem(vecX dst, memory mem) %{ 3643 predicate(n->as_Vector()->length() == 2); 3644 match(Set dst (ReplicateL (LoadL mem))); 3645 format %{ "movq $dst,$mem\n\t" 3646 "punpcklqdq $dst,$dst\t! replicate2L" %} 3647 ins_encode %{ 3648 __ movq($dst$$XMMRegister, $mem$$Address); 3649 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3650 %} 3651 ins_pipe( pipe_slow ); 3652 %} 3653 3654 instruct Repl4L_mem(vecY dst, memory mem) %{ 3655 predicate(n->as_Vector()->length() == 4); 3656 match(Set dst (ReplicateL (LoadL mem))); 3657 format %{ "movq $dst,$mem\n\t" 3658 "punpcklqdq $dst,$dst\n\t" 3659 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3660 ins_encode %{ 3661 __ movq($dst$$XMMRegister, $mem$$Address); 3662 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3663 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3664 %} 3665 ins_pipe( pipe_slow ); 3666 %} 3667 3668 instruct Repl8L_mem(vecZ dst, memory mem) %{ 3669 predicate(n->as_Vector()->length() == 8); 3670 match(Set dst (ReplicateL (LoadL mem))); 3671 format %{ "movq $dst,$mem\n\t" 3672 "punpcklqdq $dst,$dst\n\t" 3673 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3674 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3675 ins_encode %{ 3676 __ movq($dst$$XMMRegister, $mem$$Address); 3677 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3678 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3679 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3680 %} 3681 ins_pipe( pipe_slow ); 3682 %} 3683 3684 // Replicate long (8 byte) scalar zero to be vector 3685 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3686 predicate(n->as_Vector()->length() == 2); 3687 match(Set dst (ReplicateL zero)); 3688 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3689 ins_encode %{ 3690 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3691 %} 3692 ins_pipe( fpu_reg_reg ); 3693 %} 3694 3695 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3696 predicate(n->as_Vector()->length() == 4); 3697 match(Set dst (ReplicateL zero)); 3698 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3699 ins_encode %{ 3700 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3701 int vector_len = 1; 3702 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3703 %} 3704 ins_pipe( fpu_reg_reg ); 3705 %} 3706 3707 instruct Repl8L_zero(vecZ dst, immL0 zero) %{ 3708 predicate(n->as_Vector()->length() == 8); 3709 match(Set dst (ReplicateL zero)); 3710 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 3711 ins_encode %{ 3712 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3713 int vector_len = 2; 3714 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3715 %} 3716 ins_pipe( fpu_reg_reg ); 3717 %} 3718 3719 // Replicate float (4 byte) scalar to be vector 3720 instruct Repl2F(vecD dst, regF src) %{ 3721 predicate(n->as_Vector()->length() == 2); 3722 match(Set dst (ReplicateF src)); 3723 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3724 ins_encode %{ 3725 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3726 %} 3727 ins_pipe( fpu_reg_reg ); 3728 %} 3729 3730 instruct Repl4F(vecX dst, regF src) %{ 3731 predicate(n->as_Vector()->length() == 4); 3732 match(Set dst (ReplicateF src)); 3733 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3734 ins_encode %{ 3735 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3736 %} 3737 ins_pipe( pipe_slow ); 3738 %} 3739 3740 instruct Repl8F(vecY dst, regF src) %{ 3741 predicate(n->as_Vector()->length() == 8); 3742 match(Set dst (ReplicateF src)); 3743 format %{ "pshufd $dst,$src,0x00\n\t" 3744 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3745 ins_encode %{ 3746 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3747 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3748 %} 3749 ins_pipe( pipe_slow ); 3750 %} 3751 3752 instruct Repl16F(vecZ dst, regF src) %{ 3753 predicate(n->as_Vector()->length() == 16); 3754 match(Set dst (ReplicateF src)); 3755 format %{ "pshufd $dst,$src,0x00\n\t" 3756 "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t" 3757 "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %} 3758 ins_encode %{ 3759 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3760 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3761 __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3762 %} 3763 ins_pipe( pipe_slow ); 3764 %} 3765 3766 // Replicate float (4 byte) scalar zero to be vector 3767 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3768 predicate(n->as_Vector()->length() == 2); 3769 match(Set dst (ReplicateF zero)); 3770 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3771 ins_encode %{ 3772 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3773 %} 3774 ins_pipe( fpu_reg_reg ); 3775 %} 3776 3777 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3778 predicate(n->as_Vector()->length() == 4); 3779 match(Set dst (ReplicateF zero)); 3780 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3781 ins_encode %{ 3782 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3783 %} 3784 ins_pipe( fpu_reg_reg ); 3785 %} 3786 3787 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3788 predicate(n->as_Vector()->length() == 8); 3789 match(Set dst (ReplicateF zero)); 3790 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3791 ins_encode %{ 3792 int vector_len = 1; 3793 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3794 %} 3795 ins_pipe( fpu_reg_reg ); 3796 %} 3797 3798 instruct Repl16F_zero(vecZ dst, immF0 zero) %{ 3799 predicate(n->as_Vector()->length() == 16); 3800 match(Set dst (ReplicateF zero)); 3801 format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} 3802 ins_encode %{ 3803 int vector_len = 2; 3804 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3805 %} 3806 ins_pipe( fpu_reg_reg ); 3807 %} 3808 3809 // Replicate double (8 bytes) scalar to be vector 3810 instruct Repl2D(vecX dst, regD src) %{ 3811 predicate(n->as_Vector()->length() == 2); 3812 match(Set dst (ReplicateD src)); 3813 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3814 ins_encode %{ 3815 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3816 %} 3817 ins_pipe( pipe_slow ); 3818 %} 3819 3820 instruct Repl4D(vecY dst, regD src) %{ 3821 predicate(n->as_Vector()->length() == 4); 3822 match(Set dst (ReplicateD src)); 3823 format %{ "pshufd $dst,$src,0x44\n\t" 3824 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3825 ins_encode %{ 3826 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3827 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3828 %} 3829 ins_pipe( pipe_slow ); 3830 %} 3831 3832 instruct Repl8D(vecZ dst, regD src) %{ 3833 predicate(n->as_Vector()->length() == 8); 3834 match(Set dst (ReplicateD src)); 3835 format %{ "pshufd $dst,$src,0x44\n\t" 3836 "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t" 3837 "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %} 3838 ins_encode %{ 3839 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3840 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3841 __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3842 %} 3843 ins_pipe( pipe_slow ); 3844 %} 3845 3846 // Replicate double (8 byte) scalar zero to be vector 3847 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3848 predicate(n->as_Vector()->length() == 2); 3849 match(Set dst (ReplicateD zero)); 3850 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3851 ins_encode %{ 3852 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3853 %} 3854 ins_pipe( fpu_reg_reg ); 3855 %} 3856 3857 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3858 predicate(n->as_Vector()->length() == 4); 3859 match(Set dst (ReplicateD zero)); 3860 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3861 ins_encode %{ 3862 int vector_len = 1; 3863 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3864 %} 3865 ins_pipe( fpu_reg_reg ); 3866 %} 3867 3868 instruct Repl8D_zero(vecZ dst, immD0 zero) %{ 3869 predicate(n->as_Vector()->length() == 8); 3870 match(Set dst (ReplicateD zero)); 3871 format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 3872 ins_encode %{ 3873 int vector_len = 2; 3874 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3875 %} 3876 ins_pipe( fpu_reg_reg ); 3877 %} 3878 3879 // ====================REDUCTION ARITHMETIC======================================= 3880 3881 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 3882 predicate(UseSSE > 2 && UseAVX == 0); 3883 match(Set dst (AddReductionVI src1 src2)); 3884 effect(TEMP tmp2, TEMP tmp); 3885 format %{ "movdqu $tmp2,$src2\n\t" 3886 "phaddd $tmp2,$tmp2\n\t" 3887 "movd $tmp,$src1\n\t" 3888 "paddd $tmp,$tmp2\n\t" 3889 "movd $dst,$tmp\t! add reduction2I" %} 4946 ins_pipe( pipe_slow ); 4947 %} 4948 4949 // ====================VECTOR ARITHMETIC======================================= 4950 4951 // --------------------------------- ADD -------------------------------------- 4952 4953 // Bytes vector add 4954 instruct vadd4B(vecS dst, vecS src) %{ 4955 predicate(n->as_Vector()->length() == 4); 4956 match(Set dst (AddVB dst src)); 4957 format %{ "paddb $dst,$src\t! add packed4B" %} 4958 ins_encode %{ 4959 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4960 %} 4961 ins_pipe( pipe_slow ); 4962 %} 4963 4964 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 4965 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4966 match(Set dst (AddVB src1 src2)); 4967 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 4968 ins_encode %{ 4969 int vector_len = 0; 4970 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4971 %} 4972 ins_pipe( pipe_slow ); 4973 %} 4974 4975 instruct vadd8B(vecD dst, vecD src) %{ 4976 predicate(n->as_Vector()->length() == 8); 4977 match(Set dst (AddVB dst src)); 4978 format %{ "paddb $dst,$src\t! add packed8B" %} 4979 ins_encode %{ 4980 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4981 %} 4982 ins_pipe( pipe_slow ); 4983 %} 4984 4985 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 4986 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4987 match(Set dst (AddVB src1 src2)); 4988 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 4989 ins_encode %{ 4990 int vector_len = 0; 4991 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4992 %} 4993 ins_pipe( pipe_slow ); 4994 %} 4995 4996 instruct vadd16B(vecX dst, vecX src) %{ 4997 predicate(n->as_Vector()->length() == 16); 4998 match(Set dst (AddVB dst src)); 4999 format %{ "paddb $dst,$src\t! add packed16B" %} 5000 ins_encode %{ 5001 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5002 %} 5003 ins_pipe( pipe_slow ); 5004 %} 5005 5006 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5007 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5008 match(Set dst (AddVB src1 src2)); 5009 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5010 ins_encode %{ 5011 int vector_len = 0; 5012 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5013 %} 5014 ins_pipe( pipe_slow ); 5015 %} 5074 predicate(n->as_Vector()->length() == 2); 5075 match(Set dst (AddVS dst src)); 5076 format %{ "paddw $dst,$src\t! add packed2S" %} 5077 ins_encode %{ 5078 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5079 %} 5080 ins_pipe( pipe_slow ); 5081 %} 5082 5083 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5084 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5085 match(Set dst (AddVS src1 src2)); 5086 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5087 ins_encode %{ 5088 int vector_len = 0; 5089 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5090 %} 5091 ins_pipe( pipe_slow ); 5092 %} 5093 5094 instruct vadd4S(vecD dst, vecD src) %{ 5095 predicate(n->as_Vector()->length() == 4); 5096 match(Set dst (AddVS dst src)); 5097 format %{ "paddw $dst,$src\t! add packed4S" %} 5098 ins_encode %{ 5099 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5100 %} 5101 ins_pipe( pipe_slow ); 5102 %} 5103 5104 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5105 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5106 match(Set dst (AddVS src1 src2)); 5107 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5108 ins_encode %{ 5109 int vector_len = 0; 5110 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5111 %} 5112 ins_pipe( pipe_slow ); 5113 %} 5114 5115 instruct vadd8S(vecX dst, vecX src) %{ 5116 predicate(n->as_Vector()->length() == 8); 5117 match(Set dst (AddVS dst src)); 5118 format %{ "paddw $dst,$src\t! add packed8S" %} 5119 ins_encode %{ 5120 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5121 %} 5122 ins_pipe( pipe_slow ); 5123 %} 5124 5125 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5126 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5127 match(Set dst (AddVS src1 src2)); 5128 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5129 ins_encode %{ 5130 int vector_len = 0; 5131 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5132 %} 5133 ins_pipe( pipe_slow ); 5134 %} 5193 predicate(n->as_Vector()->length() == 2); 5194 match(Set dst (AddVI dst src)); 5195 format %{ "paddd $dst,$src\t! add packed2I" %} 5196 ins_encode %{ 5197 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5198 %} 5199 ins_pipe( pipe_slow ); 5200 %} 5201 5202 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5203 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5204 match(Set dst (AddVI src1 src2)); 5205 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5206 ins_encode %{ 5207 int vector_len = 0; 5208 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5209 %} 5210 ins_pipe( pipe_slow ); 5211 %} 5212 5213 instruct vadd4I(vecX dst, vecX src) %{ 5214 predicate(n->as_Vector()->length() == 4); 5215 match(Set dst (AddVI dst src)); 5216 format %{ "paddd $dst,$src\t! add packed4I" %} 5217 ins_encode %{ 5218 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5219 %} 5220 ins_pipe( pipe_slow ); 5221 %} 5222 5223 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5224 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5225 match(Set dst (AddVI src1 src2)); 5226 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5227 ins_encode %{ 5228 int vector_len = 0; 5229 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5230 %} 5231 ins_pipe( pipe_slow ); 5232 %} 5368 predicate(n->as_Vector()->length() == 2); 5369 match(Set dst (AddVF dst src)); 5370 format %{ "addps $dst,$src\t! add packed2F" %} 5371 ins_encode %{ 5372 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5373 %} 5374 ins_pipe( pipe_slow ); 5375 %} 5376 5377 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 5378 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5379 match(Set dst (AddVF src1 src2)); 5380 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 5381 ins_encode %{ 5382 int vector_len = 0; 5383 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5384 %} 5385 ins_pipe( pipe_slow ); 5386 %} 5387 5388 instruct vadd4F(vecX dst, vecX src) %{ 5389 predicate(n->as_Vector()->length() == 4); 5390 match(Set dst (AddVF dst src)); 5391 format %{ "addps $dst,$src\t! add packed4F" %} 5392 ins_encode %{ 5393 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5394 %} 5395 ins_pipe( pipe_slow ); 5396 %} 5397 5398 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 5399 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5400 match(Set dst (AddVF src1 src2)); 5401 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 5402 ins_encode %{ 5403 int vector_len = 0; 5404 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5405 %} 5406 ins_pipe( pipe_slow ); 5407 %} 5545 predicate(n->as_Vector()->length() == 4); 5546 match(Set dst (SubVB dst src)); 5547 format %{ "psubb $dst,$src\t! sub packed4B" %} 5548 ins_encode %{ 5549 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5550 %} 5551 ins_pipe( pipe_slow ); 5552 %} 5553 5554 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 5555 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5556 match(Set dst (SubVB src1 src2)); 5557 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 5558 ins_encode %{ 5559 int vector_len = 0; 5560 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5561 %} 5562 ins_pipe( pipe_slow ); 5563 %} 5564 5565 instruct vsub8B(vecD dst, vecD src) %{ 5566 predicate(n->as_Vector()->length() == 8); 5567 match(Set dst (SubVB dst src)); 5568 format %{ "psubb $dst,$src\t! sub packed8B" %} 5569 ins_encode %{ 5570 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5571 %} 5572 ins_pipe( pipe_slow ); 5573 %} 5574 5575 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 5576 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5577 match(Set dst (SubVB src1 src2)); 5578 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 5579 ins_encode %{ 5580 int vector_len = 0; 5581 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5582 %} 5583 ins_pipe( pipe_slow ); 5584 %} 5585 5586 instruct vsub16B(vecX dst, vecX src) %{ 5587 predicate(n->as_Vector()->length() == 16); 5588 match(Set dst (SubVB dst src)); 5589 format %{ "psubb $dst,$src\t! sub packed16B" %} 5590 ins_encode %{ 5591 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5592 %} 5593 ins_pipe( pipe_slow ); 5594 %} 5595 5596 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 5597 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5598 match(Set dst (SubVB src1 src2)); 5599 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 5600 ins_encode %{ 5601 int vector_len = 0; 5602 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5603 %} 5604 ins_pipe( pipe_slow ); 5605 %} 5664 predicate(n->as_Vector()->length() == 2); 5665 match(Set dst (SubVS dst src)); 5666 format %{ "psubw $dst,$src\t! sub packed2S" %} 5667 ins_encode %{ 5668 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5669 %} 5670 ins_pipe( pipe_slow ); 5671 %} 5672 5673 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 5674 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5675 match(Set dst (SubVS src1 src2)); 5676 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 5677 ins_encode %{ 5678 int vector_len = 0; 5679 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5680 %} 5681 ins_pipe( pipe_slow ); 5682 %} 5683 5684 instruct vsub4S(vecD dst, vecD src) %{ 5685 predicate(n->as_Vector()->length() == 4); 5686 match(Set dst (SubVS dst src)); 5687 format %{ "psubw $dst,$src\t! sub packed4S" %} 5688 ins_encode %{ 5689 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 5695 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5696 match(Set dst (SubVS src1 src2)); 5697 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 5698 ins_encode %{ 5699 int vector_len = 0; 5700 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5701 %} 5702 ins_pipe( pipe_slow ); 5703 %} 5704 5705 instruct vsub8S(vecX dst, vecX src) %{ 5706 predicate(n->as_Vector()->length() == 8); 5707 match(Set dst (SubVS dst src)); 5708 format %{ "psubw $dst,$src\t! sub packed8S" %} 5709 ins_encode %{ 5710 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5711 %} 5712 ins_pipe( pipe_slow ); 5713 %} 5714 5715 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 5716 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5717 match(Set dst (SubVS src1 src2)); 5718 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 5719 ins_encode %{ 5720 int vector_len = 0; 5721 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5722 %} 5723 ins_pipe( pipe_slow ); 5724 %} 5783 predicate(n->as_Vector()->length() == 2); 5784 match(Set dst (SubVI dst src)); 5785 format %{ "psubd $dst,$src\t! sub packed2I" %} 5786 ins_encode %{ 5787 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 5793 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5794 match(Set dst (SubVI src1 src2)); 5795 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 5796 ins_encode %{ 5797 int vector_len = 0; 5798 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5799 %} 5800 ins_pipe( pipe_slow ); 5801 %} 5802 5803 instruct vsub4I(vecX dst, vecX src) %{ 5804 predicate(n->as_Vector()->length() == 4); 5805 match(Set dst (SubVI dst src)); 5806 format %{ "psubd $dst,$src\t! sub packed4I" %} 5807 ins_encode %{ 5808 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5809 %} 5810 ins_pipe( pipe_slow ); 5811 %} 5812 5813 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 5814 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5815 match(Set dst (SubVI src1 src2)); 5816 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 5817 ins_encode %{ 5818 int vector_len = 0; 5819 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5958 predicate(n->as_Vector()->length() == 2); 5959 match(Set dst (SubVF dst src)); 5960 format %{ "subps $dst,$src\t! sub packed2F" %} 5961 ins_encode %{ 5962 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 5968 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5969 match(Set dst (SubVF src1 src2)); 5970 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 5971 ins_encode %{ 5972 int vector_len = 0; 5973 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 5977 5978 instruct vsub4F(vecX dst, vecX src) %{ 5979 predicate(n->as_Vector()->length() == 4); 5980 match(Set dst (SubVF dst src)); 5981 format %{ "subps $dst,$src\t! sub packed4F" %} 5982 ins_encode %{ 5983 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5984 %} 5985 ins_pipe( pipe_slow ); 5986 %} 5987 5988 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 5989 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5990 match(Set dst (SubVF src1 src2)); 5991 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 5992 ins_encode %{ 5993 int vector_len = 0; 5994 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5995 %} 5996 ins_pipe( pipe_slow ); 5997 %} 6135 predicate(n->as_Vector()->length() == 2); 6136 match(Set dst (MulVS dst src)); 6137 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6138 ins_encode %{ 6139 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6140 %} 6141 ins_pipe( pipe_slow ); 6142 %} 6143 6144 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6145 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6146 match(Set dst (MulVS src1 src2)); 6147 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6148 ins_encode %{ 6149 int vector_len = 0; 6150 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6151 %} 6152 ins_pipe( pipe_slow ); 6153 %} 6154 6155 instruct vmul4S(vecD dst, vecD src) %{ 6156 predicate(n->as_Vector()->length() == 4); 6157 match(Set dst (MulVS dst src)); 6158 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6159 ins_encode %{ 6160 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6161 %} 6162 ins_pipe( pipe_slow ); 6163 %} 6164 6165 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6166 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6167 match(Set dst (MulVS src1 src2)); 6168 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6169 ins_encode %{ 6170 int vector_len = 0; 6171 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6172 %} 6173 ins_pipe( pipe_slow ); 6174 %} 6175 6176 instruct vmul8S(vecX dst, vecX src) %{ 6177 predicate(n->as_Vector()->length() == 8); 6178 match(Set dst (MulVS dst src)); 6179 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6180 ins_encode %{ 6181 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6182 %} 6183 ins_pipe( pipe_slow ); 6184 %} 6185 6186 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6187 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6188 match(Set dst (MulVS src1 src2)); 6189 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 6190 ins_encode %{ 6191 int vector_len = 0; 6192 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6193 %} 6194 ins_pipe( pipe_slow ); 6195 %} 6254 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 6255 match(Set dst (MulVI dst src)); 6256 format %{ "pmulld $dst,$src\t! mul packed2I" %} 6257 ins_encode %{ 6258 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6259 %} 6260 ins_pipe( pipe_slow ); 6261 %} 6262 6263 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 6264 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6265 match(Set dst (MulVI src1 src2)); 6266 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 6267 ins_encode %{ 6268 int vector_len = 0; 6269 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6270 %} 6271 ins_pipe( pipe_slow ); 6272 %} 6273 6274 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 6275 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6276 match(Set dst (MulVL src1 src2)); 6277 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 6278 ins_encode %{ 6279 int vector_len = 0; 6280 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6281 %} 6282 ins_pipe( pipe_slow ); 6283 %} 6284 6285 instruct vmul4I(vecX dst, vecX src) %{ 6286 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 6287 match(Set dst (MulVI dst src)); 6288 format %{ "pmulld $dst,$src\t! mul packed4I" %} 6289 ins_encode %{ 6290 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 6296 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6297 match(Set dst (MulVI src1 src2)); 6298 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 6299 ins_encode %{ 6300 int vector_len = 0; 6301 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 6307 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6308 match(Set dst (MulVI src (LoadVector mem))); 6309 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 6310 ins_encode %{ 6311 int vector_len = 0; 6312 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6313 %} 6314 ins_pipe( pipe_slow ); 6315 %} 6316 6317 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 6318 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6319 match(Set dst (MulVL src1 src2)); 6320 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 6321 ins_encode %{ 6322 int vector_len = 1; 6323 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 6329 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6330 match(Set dst (MulVL src (LoadVector mem))); 6331 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 6332 ins_encode %{ 6333 int vector_len = 1; 6334 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6338 6339 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 6340 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6341 match(Set dst (MulVI src1 src2)); 6342 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 6343 ins_encode %{ 6344 int vector_len = 1; 6345 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6346 %} 6347 ins_pipe( pipe_slow ); 6348 %} 6349 6350 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6351 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6352 match(Set dst (MulVL src1 src2)); 6353 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 6354 ins_encode %{ 6355 int vector_len = 2; 6356 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6357 %} 6358 ins_pipe( pipe_slow ); 6359 %} 6360 6361 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6362 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6363 match(Set dst (MulVI src1 src2)); 6364 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 6365 ins_encode %{ 6366 int vector_len = 2; 6367 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6368 %} 6369 ins_pipe( pipe_slow ); 6370 %} 6371 6372 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 6373 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6374 match(Set dst (MulVI src (LoadVector mem))); 6375 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 6376 ins_encode %{ 6377 int vector_len = 1; 6378 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6379 %} 6380 ins_pipe( pipe_slow ); 6381 %} 6382 6383 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 6384 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6385 match(Set dst (MulVL src (LoadVector mem))); 6386 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 6387 ins_encode %{ 6388 int vector_len = 2; 6389 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6390 %} 6391 ins_pipe( pipe_slow ); 6392 %} 6393 6394 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 6395 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6396 match(Set dst (MulVI src (LoadVector mem))); 6397 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 6398 ins_encode %{ 6399 int vector_len = 2; 6400 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6401 %} 6402 ins_pipe( pipe_slow ); 6403 %} 6404 6405 // Floats vector mul 6406 instruct vmul2F(vecD dst, vecD src) %{ 6407 predicate(n->as_Vector()->length() == 2); 6408 match(Set dst (MulVF dst src)); 6409 format %{ "mulps $dst,$src\t! mul packed2F" %} 6410 ins_encode %{ 6411 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 6417 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6418 match(Set dst (MulVF src1 src2)); 6419 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 6420 ins_encode %{ 6421 int vector_len = 0; 6422 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6423 %} 6424 ins_pipe( pipe_slow ); 6425 %} 6426 6427 instruct vmul4F(vecX dst, vecX src) %{ 6428 predicate(n->as_Vector()->length() == 4); 6429 match(Set dst (MulVF dst src)); 6430 format %{ "mulps $dst,$src\t! mul packed4F" %} 6431 ins_encode %{ 6432 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 6438 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6439 match(Set dst (MulVF src1 src2)); 6440 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 6441 ins_encode %{ 6442 int vector_len = 0; 6443 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6584 predicate(n->as_Vector()->length() == 2); 6585 match(Set dst (DivVF dst src)); 6586 format %{ "divps $dst,$src\t! div packed2F" %} 6587 ins_encode %{ 6588 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6589 %} 6590 ins_pipe( pipe_slow ); 6591 %} 6592 6593 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 6594 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6595 match(Set dst (DivVF src1 src2)); 6596 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 6597 ins_encode %{ 6598 int vector_len = 0; 6599 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6600 %} 6601 ins_pipe( pipe_slow ); 6602 %} 6603 6604 instruct vdiv4F(vecX dst, vecX src) %{ 6605 predicate(n->as_Vector()->length() == 4); 6606 match(Set dst (DivVF dst src)); 6607 format %{ "divps $dst,$src\t! div packed4F" %} 6608 ins_encode %{ 6609 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6610 %} 6611 ins_pipe( pipe_slow ); 6612 %} 6613 6614 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 6615 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6616 match(Set dst (DivVF src1 src2)); 6617 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 6618 ins_encode %{ 6619 int vector_len = 0; 6620 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 7861 predicate(n->as_Vector()->length_in_bytes() == 4); 7862 match(Set dst (AndV dst src)); 7863 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 7864 ins_encode %{ 7865 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7866 %} 7867 ins_pipe( pipe_slow ); 7868 %} 7869 7870 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 7871 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 7872 match(Set dst (AndV src1 src2)); 7873 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 7874 ins_encode %{ 7875 int vector_len = 0; 7876 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7877 %} 7878 ins_pipe( pipe_slow ); 7879 %} 7880 7881 instruct vand8B(vecD dst, vecD src) %{ 7882 predicate(n->as_Vector()->length_in_bytes() == 8); 7883 match(Set dst (AndV dst src)); 7884 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 7885 ins_encode %{ 7886 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7887 %} 7888 ins_pipe( pipe_slow ); 7889 %} 7890 7891 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 7892 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 7893 match(Set dst (AndV src1 src2)); 7894 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 7895 ins_encode %{ 7896 int vector_len = 0; 7897 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7898 %} 7899 ins_pipe( pipe_slow ); 7900 %} 7901 7902 instruct vand16B(vecX dst, vecX src) %{ 7903 predicate(n->as_Vector()->length_in_bytes() == 16); 7904 match(Set dst (AndV dst src)); 7905 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 7906 ins_encode %{ 7907 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7908 %} 7909 ins_pipe( pipe_slow ); 7910 %} 7911 7912 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 7913 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 7914 match(Set dst (AndV src1 src2)); 7915 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 7916 ins_encode %{ 7917 int vector_len = 0; 7918 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7919 %} 7920 ins_pipe( pipe_slow ); 7921 %} 7981 predicate(n->as_Vector()->length_in_bytes() == 4); 7982 match(Set dst (OrV dst src)); 7983 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 7984 ins_encode %{ 7985 __ por($dst$$XMMRegister, $src$$XMMRegister); 7986 %} 7987 ins_pipe( pipe_slow ); 7988 %} 7989 7990 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 7991 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 7992 match(Set dst (OrV src1 src2)); 7993 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 7994 ins_encode %{ 7995 int vector_len = 0; 7996 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7997 %} 7998 ins_pipe( pipe_slow ); 7999 %} 8000 8001 instruct vor8B(vecD dst, vecD src) %{ 8002 predicate(n->as_Vector()->length_in_bytes() == 8); 8003 match(Set dst (OrV dst src)); 8004 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8005 ins_encode %{ 8006 __ por($dst$$XMMRegister, $src$$XMMRegister); 8007 %} 8008 ins_pipe( pipe_slow ); 8009 %} 8010 8011 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8012 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8013 match(Set dst (OrV src1 src2)); 8014 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8015 ins_encode %{ 8016 int vector_len = 0; 8017 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8018 %} 8019 ins_pipe( pipe_slow ); 8020 %} 8021 8022 instruct vor16B(vecX dst, vecX src) %{ 8023 predicate(n->as_Vector()->length_in_bytes() == 16); 8024 match(Set dst (OrV dst src)); 8025 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 8026 ins_encode %{ 8027 __ por($dst$$XMMRegister, $src$$XMMRegister); 8028 %} 8029 ins_pipe( pipe_slow ); 8030 %} 8031 8032 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8033 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8034 match(Set dst (OrV src1 src2)); 8035 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 8036 ins_encode %{ 8037 int vector_len = 0; 8038 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8039 %} 8040 ins_pipe( pipe_slow ); 8041 %} 8101 predicate(n->as_Vector()->length_in_bytes() == 4); 8102 match(Set dst (XorV dst src)); 8103 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 8104 ins_encode %{ 8105 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8106 %} 8107 ins_pipe( pipe_slow ); 8108 %} 8109 8110 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8111 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8112 match(Set dst (XorV src1 src2)); 8113 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 8114 ins_encode %{ 8115 int vector_len = 0; 8116 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8117 %} 8118 ins_pipe( pipe_slow ); 8119 %} 8120 8121 instruct vxor8B(vecD dst, vecD src) %{ 8122 predicate(n->as_Vector()->length_in_bytes() == 8); 8123 match(Set dst (XorV dst src)); 8124 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 8125 ins_encode %{ 8126 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8127 %} 8128 ins_pipe( pipe_slow ); 8129 %} 8130 8131 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8132 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8133 match(Set dst (XorV src1 src2)); 8134 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 8135 ins_encode %{ 8136 int vector_len = 0; 8137 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8138 %} 8139 ins_pipe( pipe_slow ); 8140 %} 8141 8142 instruct vxor16B(vecX dst, vecX src) %{ 8143 predicate(n->as_Vector()->length_in_bytes() == 16); 8144 match(Set dst (XorV dst src)); 8145 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 8146 ins_encode %{ 8147 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8148 %} 8149 ins_pipe( pipe_slow ); 8150 %} 8151 8152 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8153 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8154 match(Set dst (XorV src1 src2)); 8155 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 8156 ins_encode %{ 8157 int vector_len = 0; | 2877 ins_cost(145); 2878 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2879 ins_encode %{ 2880 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2881 %} 2882 ins_pipe( pipe_slow ); 2883 %} 2884 2885 instruct storeV64(memory mem, vecZ src) %{ 2886 predicate(n->as_StoreVector()->memory_size() == 64); 2887 match(Set mem (StoreVector mem src)); 2888 ins_cost(145); 2889 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 2890 ins_encode %{ 2891 int vector_len = 2; 2892 __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); 2893 %} 2894 ins_pipe( pipe_slow ); 2895 %} 2896 2897 // ====================LEGACY REPLICATE======================================= 2898 2899 instruct Repl4B_mem(vecS dst, memory mem) %{ 2900 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2901 match(Set dst (ReplicateB (LoadB mem))); 2902 format %{ "punpcklbw $dst,$mem\n\t" 2903 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2904 ins_encode %{ 2905 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2906 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2907 %} 2908 ins_pipe( pipe_slow ); 2909 %} 2910 2911 instruct Repl8B_mem(vecD dst, memory mem) %{ 2912 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2913 match(Set dst (ReplicateB (LoadB mem))); 2914 format %{ "punpcklbw $dst,$mem\n\t" 2915 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2916 ins_encode %{ 2917 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2918 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2919 %} 2920 ins_pipe( pipe_slow ); 2921 %} 2922 2923 instruct Repl16B(vecX dst, rRegI src) %{ 2924 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2925 match(Set dst (ReplicateB src)); 2926 format %{ "movd $dst,$src\n\t" 2927 "punpcklbw $dst,$dst\n\t" 2928 "pshuflw $dst,$dst,0x00\n\t" 2929 "punpcklqdq $dst,$dst\t! replicate16B" %} 2930 ins_encode %{ 2931 __ movdl($dst$$XMMRegister, $src$$Register); 2932 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2933 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2934 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2935 %} 2936 ins_pipe( pipe_slow ); 2937 %} 2938 2939 instruct Repl16B_mem(vecX dst, memory mem) %{ 2940 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2941 match(Set dst (ReplicateB (LoadB mem))); 2942 format %{ "punpcklbw $dst,$mem\n\t" 2943 "pshuflw $dst,$dst,0x00\n\t" 2944 "punpcklqdq $dst,$dst\t! replicate16B" %} 2945 ins_encode %{ 2946 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2947 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2948 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2949 %} 2950 ins_pipe( pipe_slow ); 2951 %} 2952 2953 instruct Repl32B(vecY dst, rRegI src) %{ 2954 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2955 match(Set dst (ReplicateB src)); 2956 format %{ "movd $dst,$src\n\t" 2957 "punpcklbw $dst,$dst\n\t" 2958 "pshuflw $dst,$dst,0x00\n\t" 2959 "punpcklqdq $dst,$dst\n\t" 2960 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2961 ins_encode %{ 2962 __ movdl($dst$$XMMRegister, $src$$Register); 2963 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2964 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2965 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2966 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2967 %} 2968 ins_pipe( pipe_slow ); 2969 %} 2970 2971 instruct Repl32B_mem(vecY dst, memory mem) %{ 2972 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2973 match(Set dst (ReplicateB (LoadB mem))); 2974 format %{ "punpcklbw $dst,$mem\n\t" 2975 "pshuflw $dst,$dst,0x00\n\t" 2976 "punpcklqdq $dst,$dst\n\t" 2977 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2978 ins_encode %{ 2979 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2980 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2981 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2982 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2983 %} 2984 ins_pipe( pipe_slow ); 2985 %} 2986 2987 instruct Repl16B_imm(vecX dst, immI con) %{ 2988 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2989 match(Set dst (ReplicateB con)); 2990 format %{ "movq $dst,[$constantaddress]\n\t" 2991 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 2992 ins_encode %{ 2993 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2994 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2995 %} 2996 ins_pipe( pipe_slow ); 2997 %} 2998 2999 instruct Repl32B_imm(vecY dst, immI con) %{ 3000 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3001 match(Set dst (ReplicateB con)); 3002 format %{ "movq $dst,[$constantaddress]\n\t" 3003 "punpcklqdq $dst,$dst\n\t" 3004 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3005 ins_encode %{ 3006 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3007 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3008 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3009 %} 3010 ins_pipe( pipe_slow ); 3011 %} 3012 3013 instruct Repl4S(vecD dst, rRegI src) %{ 3014 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3015 match(Set dst (ReplicateS src)); 3016 format %{ "movd $dst,$src\n\t" 3017 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3018 ins_encode %{ 3019 __ movdl($dst$$XMMRegister, $src$$Register); 3020 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3021 %} 3022 ins_pipe( pipe_slow ); 3023 %} 3024 3025 instruct Repl4S_mem(vecD dst, memory mem) %{ 3026 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3027 match(Set dst (ReplicateS (LoadS mem))); 3028 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3029 ins_encode %{ 3030 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3031 %} 3032 ins_pipe( pipe_slow ); 3033 %} 3034 3035 instruct Repl8S(vecX dst, rRegI src) %{ 3036 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3037 match(Set dst (ReplicateS src)); 3038 format %{ "movd $dst,$src\n\t" 3039 "pshuflw $dst,$dst,0x00\n\t" 3040 "punpcklqdq $dst,$dst\t! replicate8S" %} 3041 ins_encode %{ 3042 __ movdl($dst$$XMMRegister, $src$$Register); 3043 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3044 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3045 %} 3046 ins_pipe( pipe_slow ); 3047 %} 3048 3049 instruct Repl8S_mem(vecX dst, memory mem) %{ 3050 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3051 match(Set dst (ReplicateS (LoadS mem))); 3052 format %{ "pshuflw $dst,$mem,0x00\n\t" 3053 "punpcklqdq $dst,$dst\t! replicate8S" %} 3054 ins_encode %{ 3055 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3056 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3057 %} 3058 ins_pipe( pipe_slow ); 3059 %} 3060 3061 instruct Repl8S_imm(vecX dst, immI con) %{ 3062 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3063 match(Set dst (ReplicateS con)); 3064 format %{ "movq $dst,[$constantaddress]\n\t" 3065 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3066 ins_encode %{ 3067 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3068 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3069 %} 3070 ins_pipe( pipe_slow ); 3071 %} 3072 3073 instruct Repl16S(vecY dst, rRegI src) %{ 3074 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3075 match(Set dst (ReplicateS src)); 3076 format %{ "movd $dst,$src\n\t" 3077 "pshuflw $dst,$dst,0x00\n\t" 3078 "punpcklqdq $dst,$dst\n\t" 3079 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3080 ins_encode %{ 3081 __ movdl($dst$$XMMRegister, $src$$Register); 3082 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3083 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3084 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3085 %} 3086 ins_pipe( pipe_slow ); 3087 %} 3088 3089 instruct Repl16S_mem(vecY dst, memory mem) %{ 3090 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3091 match(Set dst (ReplicateS (LoadS mem))); 3092 format %{ "pshuflw $dst,$mem,0x00\n\t" 3093 "punpcklqdq $dst,$dst\n\t" 3094 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3095 ins_encode %{ 3096 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3097 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3098 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3099 %} 3100 ins_pipe( pipe_slow ); 3101 %} 3102 3103 instruct Repl16S_imm(vecY dst, immI con) %{ 3104 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3105 match(Set dst (ReplicateS con)); 3106 format %{ "movq $dst,[$constantaddress]\n\t" 3107 "punpcklqdq $dst,$dst\n\t" 3108 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3109 ins_encode %{ 3110 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3111 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3112 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3113 %} 3114 ins_pipe( pipe_slow ); 3115 %} 3116 3117 instruct Repl4I(vecX dst, rRegI src) %{ 3118 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3119 match(Set dst (ReplicateI src)); 3120 format %{ "movd $dst,$src\n\t" 3121 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3122 ins_encode %{ 3123 __ movdl($dst$$XMMRegister, $src$$Register); 3124 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3125 %} 3126 ins_pipe( pipe_slow ); 3127 %} 3128 3129 instruct Repl4I_mem(vecX dst, memory mem) %{ 3130 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3131 match(Set dst (ReplicateI (LoadI mem))); 3132 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3133 ins_encode %{ 3134 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3135 %} 3136 ins_pipe( pipe_slow ); 3137 %} 3138 3139 instruct Repl8I(vecY dst, rRegI src) %{ 3140 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3141 match(Set dst (ReplicateI src)); 3142 format %{ "movd $dst,$src\n\t" 3143 "pshufd $dst,$dst,0x00\n\t" 3144 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3145 ins_encode %{ 3146 __ movdl($dst$$XMMRegister, $src$$Register); 3147 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3148 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3149 %} 3150 ins_pipe( pipe_slow ); 3151 %} 3152 3153 instruct Repl8I_mem(vecY dst, memory mem) %{ 3154 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3155 match(Set dst (ReplicateI (LoadI mem))); 3156 format %{ "pshufd $dst,$mem,0x00\n\t" 3157 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3158 ins_encode %{ 3159 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3160 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3161 %} 3162 ins_pipe( pipe_slow ); 3163 %} 3164 3165 instruct Repl4I_imm(vecX dst, immI con) %{ 3166 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3167 match(Set dst (ReplicateI con)); 3168 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3169 "punpcklqdq $dst,$dst" %} 3170 ins_encode %{ 3171 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3172 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3173 %} 3174 ins_pipe( pipe_slow ); 3175 %} 3176 3177 instruct Repl8I_imm(vecY dst, immI con) %{ 3178 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3179 match(Set dst (ReplicateI con)); 3180 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3181 "punpcklqdq $dst,$dst\n\t" 3182 "vinserti128h $dst,$dst,$dst" %} 3183 ins_encode %{ 3184 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3185 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3186 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3187 %} 3188 ins_pipe( pipe_slow ); 3189 %} 3190 3191 // Long could be loaded into xmm register directly from memory. 3192 instruct Repl2L_mem(vecX dst, memory mem) %{ 3193 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3194 match(Set dst (ReplicateL (LoadL mem))); 3195 format %{ "movq $dst,$mem\n\t" 3196 "punpcklqdq $dst,$dst\t! replicate2L" %} 3197 ins_encode %{ 3198 __ movq($dst$$XMMRegister, $mem$$Address); 3199 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3200 %} 3201 ins_pipe( pipe_slow ); 3202 %} 3203 3204 // Replicate long (8 byte) scalar to be vector 3205 #ifdef _LP64 3206 instruct Repl4L(vecY dst, rRegL src) %{ 3207 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3208 match(Set dst (ReplicateL src)); 3209 format %{ "movdq $dst,$src\n\t" 3210 "punpcklqdq $dst,$dst\n\t" 3211 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3212 ins_encode %{ 3213 __ movdq($dst$$XMMRegister, $src$$Register); 3214 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3215 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3216 %} 3217 ins_pipe( pipe_slow ); 3218 %} 3219 #else // _LP64 3220 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3221 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3222 match(Set dst (ReplicateL src)); 3223 effect(TEMP dst, USE src, TEMP tmp); 3224 format %{ "movdl $dst,$src.lo\n\t" 3225 "movdl $tmp,$src.hi\n\t" 3226 "punpckldq $dst,$tmp\n\t" 3227 "punpcklqdq $dst,$dst\n\t" 3228 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3229 ins_encode %{ 3230 __ movdl($dst$$XMMRegister, $src$$Register); 3231 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3232 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3233 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3234 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3235 %} 3236 ins_pipe( pipe_slow ); 3237 %} 3238 #endif // _LP64 3239 3240 instruct Repl4L_imm(vecY dst, immL con) %{ 3241 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3242 match(Set dst (ReplicateL con)); 3243 format %{ "movq $dst,[$constantaddress]\n\t" 3244 "punpcklqdq $dst,$dst\n\t" 3245 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3246 ins_encode %{ 3247 __ movq($dst$$XMMRegister, $constantaddress($con)); 3248 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3249 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3250 %} 3251 ins_pipe( pipe_slow ); 3252 %} 3253 3254 instruct Repl4L_mem(vecY dst, memory mem) %{ 3255 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3256 match(Set dst (ReplicateL (LoadL mem))); 3257 format %{ "movq $dst,$mem\n\t" 3258 "punpcklqdq $dst,$dst\n\t" 3259 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3260 ins_encode %{ 3261 __ movq($dst$$XMMRegister, $mem$$Address); 3262 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3263 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3264 %} 3265 ins_pipe( pipe_slow ); 3266 %} 3267 3268 instruct Repl2F_mem(vecD dst, memory mem) %{ 3269 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3270 match(Set dst (ReplicateF (LoadF mem))); 3271 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3272 ins_encode %{ 3273 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3274 %} 3275 ins_pipe( pipe_slow ); 3276 %} 3277 3278 instruct Repl4F_mem(vecX dst, memory mem) %{ 3279 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3280 match(Set dst (ReplicateF (LoadF mem))); 3281 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3282 ins_encode %{ 3283 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3284 %} 3285 ins_pipe( pipe_slow ); 3286 %} 3287 3288 instruct Repl8F(vecY dst, regF src) %{ 3289 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3290 match(Set dst (ReplicateF src)); 3291 format %{ "pshufd $dst,$src,0x00\n\t" 3292 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3293 ins_encode %{ 3294 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3295 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3296 %} 3297 ins_pipe( pipe_slow ); 3298 %} 3299 3300 instruct Repl8F_mem(vecY dst, memory mem) %{ 3301 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3302 match(Set dst (ReplicateF (LoadF mem))); 3303 format %{ "pshufd $dst,$mem,0x00\n\t" 3304 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3305 ins_encode %{ 3306 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3307 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3308 %} 3309 ins_pipe( pipe_slow ); 3310 %} 3311 3312 instruct Repl2D_mem(vecX dst, memory mem) %{ 3313 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3314 match(Set dst (ReplicateD (LoadD mem))); 3315 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3316 ins_encode %{ 3317 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3318 %} 3319 ins_pipe( pipe_slow ); 3320 %} 3321 3322 instruct Repl4D(vecY dst, regD src) %{ 3323 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3324 match(Set dst (ReplicateD src)); 3325 format %{ "pshufd $dst,$src,0x44\n\t" 3326 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3327 ins_encode %{ 3328 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3329 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3330 %} 3331 ins_pipe( pipe_slow ); 3332 %} 3333 3334 instruct Repl4D_mem(vecY dst, memory mem) %{ 3335 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3336 match(Set dst (ReplicateD (LoadD mem))); 3337 format %{ "pshufd $dst,$mem,0x44\n\t" 3338 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3339 ins_encode %{ 3340 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3341 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3342 %} 3343 ins_pipe( pipe_slow ); 3344 %} 3345 3346 // ====================GENERIC REPLICATE========================================== 3347 3348 // Replicate byte scalar to be vector 3349 instruct Repl4B(vecS dst, rRegI src) %{ 3350 predicate(n->as_Vector()->length() == 4); 3351 match(Set dst (ReplicateB src)); 3352 format %{ "movd $dst,$src\n\t" 3353 "punpcklbw $dst,$dst\n\t" 3354 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3355 ins_encode %{ 3356 __ movdl($dst$$XMMRegister, $src$$Register); 3357 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3358 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3359 %} 3360 ins_pipe( pipe_slow ); 3361 %} 3362 3363 instruct Repl8B(vecD dst, rRegI src) %{ 3364 predicate(n->as_Vector()->length() == 8); 3365 match(Set dst (ReplicateB src)); 3366 format %{ "movd $dst,$src\n\t" 3367 "punpcklbw $dst,$dst\n\t" 3368 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3369 ins_encode %{ 3370 __ movdl($dst$$XMMRegister, $src$$Register); 3371 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3372 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3373 %} 3374 ins_pipe( pipe_slow ); 3375 %} 3376 3377 // Replicate byte scalar immediate to be vector by loading from const table. 3378 instruct Repl4B_imm(vecS dst, immI con) %{ 3379 predicate(n->as_Vector()->length() == 4); 3380 match(Set dst (ReplicateB con)); 3381 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3382 ins_encode %{ 3383 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3384 %} 3385 ins_pipe( pipe_slow ); 3386 %} 3387 3388 instruct Repl8B_imm(vecD dst, immI con) %{ 3389 predicate(n->as_Vector()->length() == 8); 3390 match(Set dst (ReplicateB con)); 3391 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3392 ins_encode %{ 3393 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3394 %} 3395 ins_pipe( pipe_slow ); 3396 %} 3397 3398 // Replicate byte scalar zero to be vector 3399 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3400 predicate(n->as_Vector()->length() == 4); 3401 match(Set dst (ReplicateB zero)); 3402 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3403 ins_encode %{ 3404 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3405 %} 3406 ins_pipe( fpu_reg_reg ); 3407 %} 3408 3409 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3410 predicate(n->as_Vector()->length() == 8); 3411 match(Set dst (ReplicateB zero)); 3412 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3413 ins_encode %{ 3414 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3415 %} 3416 ins_pipe( fpu_reg_reg ); 3417 %} 3418 3419 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3420 predicate(n->as_Vector()->length() == 16); 3421 match(Set dst (ReplicateB zero)); 3422 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3423 ins_encode %{ 3424 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3425 %} 3426 ins_pipe( fpu_reg_reg ); 3427 %} 3428 3429 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3430 predicate(n->as_Vector()->length() == 32); 3431 match(Set dst (ReplicateB zero)); 3432 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3433 ins_encode %{ 3434 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3435 int vector_len = 1; 3436 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3437 %} 3438 ins_pipe( fpu_reg_reg ); 3439 %} 3440 3441 // Replicate char/short (2 byte) scalar to be vector 3442 instruct Repl2S(vecS dst, rRegI src) %{ 3443 predicate(n->as_Vector()->length() == 2); 3444 match(Set dst (ReplicateS src)); 3445 format %{ "movd $dst,$src\n\t" 3446 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3447 ins_encode %{ 3448 __ movdl($dst$$XMMRegister, $src$$Register); 3449 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3450 %} 3451 ins_pipe( fpu_reg_reg ); 3452 %} 3453 3454 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3455 instruct Repl2S_imm(vecS dst, immI con) %{ 3456 predicate(n->as_Vector()->length() == 2); 3457 match(Set dst (ReplicateS con)); 3458 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3459 ins_encode %{ 3460 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3461 %} 3462 ins_pipe( fpu_reg_reg ); 3463 %} 3464 3465 instruct Repl4S_imm(vecD dst, immI con) %{ 3466 predicate(n->as_Vector()->length() == 4); 3467 match(Set dst (ReplicateS con)); 3468 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3469 ins_encode %{ 3470 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3471 %} 3472 ins_pipe( fpu_reg_reg ); 3473 %} 3474 3475 // Replicate char/short (2 byte) scalar zero to be vector 3476 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3477 predicate(n->as_Vector()->length() == 2); 3478 match(Set dst (ReplicateS zero)); 3479 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3480 ins_encode %{ 3481 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3482 %} 3483 ins_pipe( fpu_reg_reg ); 3484 %} 3485 3486 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3487 predicate(n->as_Vector()->length() == 4); 3488 match(Set dst (ReplicateS zero)); 3489 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3490 ins_encode %{ 3491 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3492 %} 3493 ins_pipe( fpu_reg_reg ); 3494 %} 3495 3496 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3497 predicate(n->as_Vector()->length() == 8); 3498 match(Set dst (ReplicateS zero)); 3499 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3500 ins_encode %{ 3501 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3502 %} 3503 ins_pipe( fpu_reg_reg ); 3504 %} 3505 3506 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3507 predicate(n->as_Vector()->length() == 16); 3508 match(Set dst (ReplicateS zero)); 3509 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3510 ins_encode %{ 3511 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3512 int vector_len = 1; 3513 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3514 %} 3515 ins_pipe( fpu_reg_reg ); 3516 %} 3517 3518 // Replicate integer (4 byte) scalar to be vector 3519 instruct Repl2I(vecD dst, rRegI src) %{ 3520 predicate(n->as_Vector()->length() == 2); 3521 match(Set dst (ReplicateI src)); 3522 format %{ "movd $dst,$src\n\t" 3523 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3524 ins_encode %{ 3525 __ movdl($dst$$XMMRegister, $src$$Register); 3526 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3527 %} 3528 ins_pipe( fpu_reg_reg ); 3529 %} 3530 3531 // Integer could be loaded into xmm register directly from memory. 3532 instruct Repl2I_mem(vecD dst, memory mem) %{ 3533 predicate(n->as_Vector()->length() == 2); 3534 match(Set dst (ReplicateI (LoadI mem))); 3535 format %{ "movd $dst,$mem\n\t" 3536 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3537 ins_encode %{ 3538 __ movdl($dst$$XMMRegister, $mem$$Address); 3539 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3540 %} 3541 ins_pipe( fpu_reg_reg ); 3542 %} 3543 3544 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3545 instruct Repl2I_imm(vecD dst, immI con) %{ 3546 predicate(n->as_Vector()->length() == 2); 3547 match(Set dst (ReplicateI con)); 3548 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3549 ins_encode %{ 3550 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3551 %} 3552 ins_pipe( fpu_reg_reg ); 3553 %} 3554 3555 // Replicate integer (4 byte) scalar zero to be vector 3556 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3557 predicate(n->as_Vector()->length() == 2); 3558 match(Set dst (ReplicateI zero)); 3559 format %{ "pxor $dst,$dst\t! replicate2I" %} 3560 ins_encode %{ 3561 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3562 %} 3563 ins_pipe( fpu_reg_reg ); 3564 %} 3565 3566 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3567 predicate(n->as_Vector()->length() == 4); 3568 match(Set dst (ReplicateI zero)); 3569 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3570 ins_encode %{ 3571 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3572 %} 3573 ins_pipe( fpu_reg_reg ); 3574 %} 3575 3576 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3577 predicate(n->as_Vector()->length() == 8); 3578 match(Set dst (ReplicateI zero)); 3579 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3580 ins_encode %{ 3581 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3582 int vector_len = 1; 3583 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3584 %} 3585 ins_pipe( fpu_reg_reg ); 3586 %} 3587 3588 // Replicate long (8 byte) scalar to be vector 3589 #ifdef _LP64 3590 instruct Repl2L(vecX dst, rRegL src) %{ 3591 predicate(n->as_Vector()->length() == 2); 3592 match(Set dst (ReplicateL src)); 3593 format %{ "movdq $dst,$src\n\t" 3594 "punpcklqdq $dst,$dst\t! replicate2L" %} 3595 ins_encode %{ 3596 __ movdq($dst$$XMMRegister, $src$$Register); 3597 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3598 %} 3599 ins_pipe( pipe_slow ); 3600 %} 3601 #else // _LP64 3602 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3603 predicate(n->as_Vector()->length() == 2); 3604 match(Set dst (ReplicateL src)); 3605 effect(TEMP dst, USE src, TEMP tmp); 3606 format %{ "movdl $dst,$src.lo\n\t" 3607 "movdl $tmp,$src.hi\n\t" 3608 "punpckldq $dst,$tmp\n\t" 3609 "punpcklqdq $dst,$dst\t! replicate2L"%} 3610 ins_encode %{ 3611 __ movdl($dst$$XMMRegister, $src$$Register); 3612 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3613 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3614 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3615 %} 3616 ins_pipe( pipe_slow ); 3617 %} 3618 #endif // _LP64 3619 3620 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3621 instruct Repl2L_imm(vecX dst, immL con) %{ 3622 predicate(n->as_Vector()->length() == 2); 3623 match(Set dst (ReplicateL con)); 3624 format %{ "movq $dst,[$constantaddress]\n\t" 3625 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3626 ins_encode %{ 3627 __ movq($dst$$XMMRegister, $constantaddress($con)); 3628 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3629 %} 3630 ins_pipe( pipe_slow ); 3631 %} 3632 3633 // Replicate long (8 byte) scalar zero to be vector 3634 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3635 predicate(n->as_Vector()->length() == 2); 3636 match(Set dst (ReplicateL zero)); 3637 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3638 ins_encode %{ 3639 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3640 %} 3641 ins_pipe( fpu_reg_reg ); 3642 %} 3643 3644 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3645 predicate(n->as_Vector()->length() == 4); 3646 match(Set dst (ReplicateL zero)); 3647 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3648 ins_encode %{ 3649 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3650 int vector_len = 1; 3651 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3652 %} 3653 ins_pipe( fpu_reg_reg ); 3654 %} 3655 3656 // Replicate float (4 byte) scalar to be vector 3657 instruct Repl2F(vecD dst, regF src) %{ 3658 predicate(n->as_Vector()->length() == 2); 3659 match(Set dst (ReplicateF src)); 3660 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3661 ins_encode %{ 3662 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3663 %} 3664 ins_pipe( fpu_reg_reg ); 3665 %} 3666 3667 instruct Repl4F(vecX dst, regF src) %{ 3668 predicate(n->as_Vector()->length() == 4); 3669 match(Set dst (ReplicateF src)); 3670 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3671 ins_encode %{ 3672 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3673 %} 3674 ins_pipe( pipe_slow ); 3675 %} 3676 3677 // Replicate float (4 byte) scalar zero to be vector 3678 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3679 predicate(n->as_Vector()->length() == 2); 3680 match(Set dst (ReplicateF zero)); 3681 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3682 ins_encode %{ 3683 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3684 %} 3685 ins_pipe( fpu_reg_reg ); 3686 %} 3687 3688 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3689 predicate(n->as_Vector()->length() == 4); 3690 match(Set dst (ReplicateF zero)); 3691 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3692 ins_encode %{ 3693 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3694 %} 3695 ins_pipe( fpu_reg_reg ); 3696 %} 3697 3698 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3699 predicate(n->as_Vector()->length() == 8); 3700 match(Set dst (ReplicateF zero)); 3701 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3702 ins_encode %{ 3703 int vector_len = 1; 3704 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3705 %} 3706 ins_pipe( fpu_reg_reg ); 3707 %} 3708 3709 // Replicate double (8 bytes) scalar to be vector 3710 instruct Repl2D(vecX dst, regD src) %{ 3711 predicate(n->as_Vector()->length() == 2); 3712 match(Set dst (ReplicateD src)); 3713 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3714 ins_encode %{ 3715 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3716 %} 3717 ins_pipe( pipe_slow ); 3718 %} 3719 3720 // Replicate double (8 byte) scalar zero to be vector 3721 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3722 predicate(n->as_Vector()->length() == 2); 3723 match(Set dst (ReplicateD zero)); 3724 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3725 ins_encode %{ 3726 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3727 %} 3728 ins_pipe( fpu_reg_reg ); 3729 %} 3730 3731 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3732 predicate(n->as_Vector()->length() == 4); 3733 match(Set dst (ReplicateD zero)); 3734 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3735 ins_encode %{ 3736 int vector_len = 1; 3737 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3738 %} 3739 ins_pipe( fpu_reg_reg ); 3740 %} 3741 3742 // ====================EVEX REPLICATE============================================= 3743 3744 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3745 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3746 match(Set dst (ReplicateB (LoadB mem))); 3747 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3748 ins_encode %{ 3749 int vector_len = 0; 3750 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3751 %} 3752 ins_pipe( pipe_slow ); 3753 %} 3754 3755 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3756 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3757 match(Set dst (ReplicateB (LoadB mem))); 3758 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3759 ins_encode %{ 3760 int vector_len = 0; 3761 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3762 %} 3763 ins_pipe( pipe_slow ); 3764 %} 3765 3766 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3767 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3768 match(Set dst (ReplicateB src)); 3769 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3770 ins_encode %{ 3771 int vector_len = 0; 3772 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3773 %} 3774 ins_pipe( pipe_slow ); 3775 %} 3776 3777 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3778 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3779 match(Set dst (ReplicateB (LoadB mem))); 3780 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3781 ins_encode %{ 3782 int vector_len = 0; 3783 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3784 %} 3785 ins_pipe( pipe_slow ); 3786 %} 3787 3788 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3789 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3790 match(Set dst (ReplicateB src)); 3791 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3792 ins_encode %{ 3793 int vector_len = 1; 3794 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3795 %} 3796 ins_pipe( pipe_slow ); 3797 %} 3798 3799 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3800 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3801 match(Set dst (ReplicateB (LoadB mem))); 3802 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3803 ins_encode %{ 3804 int vector_len = 1; 3805 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3806 %} 3807 ins_pipe( pipe_slow ); 3808 %} 3809 3810 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3811 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3812 match(Set dst (ReplicateB src)); 3813 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3814 ins_encode %{ 3815 int vector_len = 2; 3816 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3817 %} 3818 ins_pipe( pipe_slow ); 3819 %} 3820 3821 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3822 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vlbw()); 3823 match(Set dst (ReplicateB (LoadB mem))); 3824 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3825 ins_encode %{ 3826 int vector_len = 2; 3827 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3828 %} 3829 ins_pipe( pipe_slow ); 3830 %} 3831 3832 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3833 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3834 match(Set dst (ReplicateB con)); 3835 format %{ "movq $dst,[$constantaddress]\n\t" 3836 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3837 ins_encode %{ 3838 int vector_len = 0; 3839 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3840 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3841 %} 3842 ins_pipe( pipe_slow ); 3843 %} 3844 3845 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3846 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3847 match(Set dst (ReplicateB con)); 3848 format %{ "movq $dst,[$constantaddress]\n\t" 3849 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3850 ins_encode %{ 3851 int vector_len = 1; 3852 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3853 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3854 %} 3855 ins_pipe( pipe_slow ); 3856 %} 3857 3858 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3859 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3860 match(Set dst (ReplicateB con)); 3861 format %{ "movq $dst,[$constantaddress]\n\t" 3862 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3863 ins_encode %{ 3864 int vector_len = 2; 3865 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3866 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3867 %} 3868 ins_pipe( pipe_slow ); 3870 %} 3871 3872 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3873 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3874 match(Set dst (ReplicateB zero)); 3875 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3876 ins_encode %{ 3877 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3878 int vector_len = 2; 3879 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3880 %} 3881 ins_pipe( fpu_reg_reg ); 3882 %} 3883 3884 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3885 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3886 match(Set dst (ReplicateS src)); 3887 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3888 ins_encode %{ 3889 int vector_len = 0; 3890 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3891 %} 3892 ins_pipe( pipe_slow ); 3893 %} 3894 3895 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3896 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3897 match(Set dst (ReplicateS (LoadS mem))); 3898 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3899 ins_encode %{ 3900 int vector_len = 0; 3901 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3902 %} 3903 ins_pipe( pipe_slow ); 3904 %} 3905 3906 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3907 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3908 match(Set dst (ReplicateS src)); 3909 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3910 ins_encode %{ 3911 int vector_len = 0; 3912 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3913 %} 3914 ins_pipe( pipe_slow ); 3915 %} 3916 3917 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3918 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3919 match(Set dst (ReplicateS (LoadS mem))); 3920 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 3921 ins_encode %{ 3922 int vector_len = 0; 3923 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3924 %} 3925 ins_pipe( pipe_slow ); 3926 %} 3927 3928 instruct Repl16S_evex(vecY dst, rRegI src) %{ 3929 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3930 match(Set dst (ReplicateS src)); 3931 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 3932 ins_encode %{ 3933 int vector_len = 1; 3934 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3935 %} 3936 ins_pipe( pipe_slow ); 3937 %} 3938 3939 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 3940 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3941 match(Set dst (ReplicateS (LoadS mem))); 3942 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 3943 ins_encode %{ 3944 int vector_len = 1; 3945 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3946 %} 3947 ins_pipe( pipe_slow ); 3948 %} 3949 3950 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 3951 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3952 match(Set dst (ReplicateS src)); 3953 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 3954 ins_encode %{ 3955 int vector_len = 2; 3956 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3957 %} 3958 ins_pipe( pipe_slow ); 3959 %} 3960 3961 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 3962 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3963 match(Set dst (ReplicateS (LoadS mem))); 3964 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 3965 ins_encode %{ 3966 int vector_len = 2; 3967 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3968 %} 3969 ins_pipe( pipe_slow ); 3970 %} 3971 3972 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 3973 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3974 match(Set dst (ReplicateS con)); 3975 format %{ "movq $dst,[$constantaddress]\n\t" 3976 "vpbroadcastw $dst,$dst\t! replicate8S" %} 3977 ins_encode %{ 3978 int vector_len = 0; 3979 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3980 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3981 %} 3982 ins_pipe( pipe_slow ); 3983 %} 3984 3985 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 3986 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3987 match(Set dst (ReplicateS con)); 3988 format %{ "movq $dst,[$constantaddress]\n\t" 3989 "vpbroadcastw $dst,$dst\t! replicate16S" %} 3990 ins_encode %{ 3991 int vector_len = 1; 3992 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3993 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3994 %} 3995 ins_pipe( pipe_slow ); 3996 %} 3997 3998 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 3999 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4000 match(Set dst (ReplicateS con)); 4001 format %{ "movq $dst,[$constantaddress]\n\t" 4002 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4003 ins_encode %{ 4004 int vector_len = 2; 4005 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4006 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4007 %} 4008 ins_pipe( pipe_slow ); 4009 %} 4010 4011 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4012 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4013 match(Set dst (ReplicateS zero)); 4014 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4015 ins_encode %{ 4016 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4017 int vector_len = 2; 4018 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4019 %} 4020 ins_pipe( fpu_reg_reg ); 4021 %} 4022 4023 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4024 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4025 match(Set dst (ReplicateI src)); 4026 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4027 ins_encode %{ 4028 int vector_len = 0; 4029 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4030 %} 4031 ins_pipe( pipe_slow ); 4032 %} 4033 4034 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4035 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4036 match(Set dst (ReplicateI (LoadI mem))); 4037 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4038 ins_encode %{ 4039 int vector_len = 0; 4040 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4041 %} 4042 ins_pipe( pipe_slow ); 4043 %} 4044 4045 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4046 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4047 match(Set dst (ReplicateI src)); 4048 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4049 ins_encode %{ 4050 int vector_len = 1; 4051 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4052 %} 4053 ins_pipe( pipe_slow ); 4054 %} 4055 4056 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4057 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4058 match(Set dst (ReplicateI (LoadI mem))); 4059 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4060 ins_encode %{ 4061 int vector_len = 1; 4062 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4063 %} 4064 ins_pipe( pipe_slow ); 4065 %} 4066 4067 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4068 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4069 match(Set dst (ReplicateI src)); 4070 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4071 ins_encode %{ 4072 int vector_len = 2; 4073 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4074 %} 4075 ins_pipe( pipe_slow ); 4076 %} 4077 4078 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4079 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4080 match(Set dst (ReplicateI (LoadI mem))); 4081 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4082 ins_encode %{ 4083 int vector_len = 2; 4084 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4085 %} 4086 ins_pipe( pipe_slow ); 4087 %} 4088 4089 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4090 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4091 match(Set dst (ReplicateI con)); 4092 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4093 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4094 ins_encode %{ 4095 int vector_len = 0; 4096 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4097 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4098 %} 4099 ins_pipe( pipe_slow ); 4100 %} 4101 4102 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4103 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4104 match(Set dst (ReplicateI con)); 4105 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4106 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4107 ins_encode %{ 4108 int vector_len = 1; 4109 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4110 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4111 %} 4112 ins_pipe( pipe_slow ); 4113 %} 4114 4115 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4116 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4117 match(Set dst (ReplicateI con)); 4118 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4119 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4120 ins_encode %{ 4121 int vector_len = 2; 4122 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4123 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4124 %} 4125 ins_pipe( pipe_slow ); 4126 %} 4127 4128 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4129 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4130 match(Set dst (ReplicateI zero)); 4131 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4132 ins_encode %{ 4133 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4134 int vector_len = 2; 4135 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4136 %} 4137 ins_pipe( fpu_reg_reg ); 4138 %} 4139 4140 // Replicate long (8 byte) scalar to be vector 4141 #ifdef _LP64 4142 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4143 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4144 match(Set dst (ReplicateL src)); 4145 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4146 ins_encode %{ 4147 int vector_len = 1; 4148 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4154 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4155 match(Set dst (ReplicateL src)); 4156 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4157 ins_encode %{ 4158 int vector_len = 2; 4159 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4160 %} 4161 ins_pipe( pipe_slow ); 4162 %} 4163 #else // _LP64 4164 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4165 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4166 match(Set dst (ReplicateL src)); 4167 effect(TEMP dst, USE src, TEMP tmp); 4168 format %{ "movdl $dst,$src.lo\n\t" 4169 "movdl $tmp,$src.hi\n\t" 4170 "punpckldq $dst,$tmp\n\t" 4171 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4172 ins_encode %{ 4173 int vector_len = 1; 4174 __ movdl($dst$$XMMRegister, $src$$Register); 4175 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4176 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4177 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4178 %} 4179 ins_pipe( pipe_slow ); 4180 %} 4181 4182 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4183 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4184 match(Set dst (ReplicateL src)); 4185 effect(TEMP dst, USE src, TEMP tmp); 4186 format %{ "movdl $dst,$src.lo\n\t" 4187 "movdl $tmp,$src.hi\n\t" 4188 "punpckldq $dst,$tmp\n\t" 4189 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4190 ins_encode %{ 4191 int vector_len = 2; 4192 __ movdl($dst$$XMMRegister, $src$$Register); 4193 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4194 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4195 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4196 %} 4197 ins_pipe( pipe_slow ); 4198 %} 4199 #endif // _LP64 4200 4201 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4202 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4203 match(Set dst (ReplicateL con)); 4204 format %{ "movq $dst,[$constantaddress]\n\t" 4205 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4206 ins_encode %{ 4207 int vector_len = 1; 4208 __ movq($dst$$XMMRegister, $constantaddress($con)); 4209 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4210 %} 4211 ins_pipe( pipe_slow ); 4212 %} 4213 4214 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4215 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4216 match(Set dst (ReplicateL con)); 4217 format %{ "movq $dst,[$constantaddress]\n\t" 4218 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4219 ins_encode %{ 4220 int vector_len = 2; 4221 __ movq($dst$$XMMRegister, $constantaddress($con)); 4222 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4223 %} 4224 ins_pipe( pipe_slow ); 4225 %} 4226 4227 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4228 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4229 match(Set dst (ReplicateL (LoadL mem))); 4230 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4231 ins_encode %{ 4232 int vector_len = 0; 4233 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4234 %} 4235 ins_pipe( pipe_slow ); 4236 %} 4237 4238 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4239 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4240 match(Set dst (ReplicateL (LoadL mem))); 4241 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4242 ins_encode %{ 4243 int vector_len = 1; 4244 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4245 %} 4246 ins_pipe( pipe_slow ); 4247 %} 4248 4249 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4250 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4251 match(Set dst (ReplicateL (LoadL mem))); 4252 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4253 ins_encode %{ 4254 int vector_len = 2; 4255 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4256 %} 4257 ins_pipe( pipe_slow ); 4258 %} 4259 4260 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4261 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4262 match(Set dst (ReplicateL zero)); 4263 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4264 ins_encode %{ 4265 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4266 int vector_len = 2; 4267 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4268 %} 4269 ins_pipe( fpu_reg_reg ); 4270 %} 4271 4272 instruct Repl8F_evex(vecY dst, regF src) %{ 4273 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4274 match(Set dst (ReplicateF src)); 4275 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4276 ins_encode %{ 4277 int vector_len = 1; 4278 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4279 %} 4280 ins_pipe( pipe_slow ); 4281 %} 4282 4283 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4284 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4285 match(Set dst (ReplicateF (LoadF mem))); 4286 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4287 ins_encode %{ 4288 int vector_len = 1; 4289 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4290 %} 4291 ins_pipe( pipe_slow ); 4292 %} 4293 4294 instruct Repl16F_evex(vecZ dst, regF src) %{ 4295 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4296 match(Set dst (ReplicateF src)); 4297 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4298 ins_encode %{ 4299 int vector_len = 2; 4300 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4301 %} 4302 ins_pipe( pipe_slow ); 4303 %} 4304 4305 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4306 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4307 match(Set dst (ReplicateF (LoadF mem))); 4308 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4309 ins_encode %{ 4310 int vector_len = 2; 4311 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4312 %} 4313 ins_pipe( pipe_slow ); 4314 %} 4315 4316 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4317 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4318 match(Set dst (ReplicateF zero)); 4319 format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} 4320 ins_encode %{ 4321 int vector_len = 2; 4322 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4323 %} 4324 ins_pipe( fpu_reg_reg ); 4325 %} 4326 4327 instruct Repl4D_evex(vecY dst, regD src) %{ 4328 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4329 match(Set dst (ReplicateD src)); 4330 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4331 ins_encode %{ 4332 int vector_len = 1; 4333 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4334 %} 4335 ins_pipe( pipe_slow ); 4336 %} 4337 4338 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4339 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4340 match(Set dst (ReplicateD (LoadD mem))); 4341 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4342 ins_encode %{ 4343 int vector_len = 1; 4344 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4345 %} 4346 ins_pipe( pipe_slow ); 4347 %} 4348 4349 instruct Repl8D_evex(vecZ dst, regD src) %{ 4350 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4351 match(Set dst (ReplicateD src)); 4352 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4353 ins_encode %{ 4354 int vector_len = 2; 4355 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4356 %} 4357 ins_pipe( pipe_slow ); 4358 %} 4359 4360 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4361 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4362 match(Set dst (ReplicateD (LoadD mem))); 4363 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4364 ins_encode %{ 4365 int vector_len = 2; 4366 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4367 %} 4368 ins_pipe( pipe_slow ); 4369 %} 4370 4371 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4372 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4373 match(Set dst (ReplicateD zero)); 4374 format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4375 ins_encode %{ 4376 int vector_len = 2; 4377 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4378 %} 4379 ins_pipe( fpu_reg_reg ); 4380 %} 4381 4382 // ====================REDUCTION ARITHMETIC======================================= 4383 4384 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4385 predicate(UseSSE > 2 && UseAVX == 0); 4386 match(Set dst (AddReductionVI src1 src2)); 4387 effect(TEMP tmp2, TEMP tmp); 4388 format %{ "movdqu $tmp2,$src2\n\t" 4389 "phaddd $tmp2,$tmp2\n\t" 4390 "movd $tmp,$src1\n\t" 4391 "paddd $tmp,$tmp2\n\t" 4392 "movd $dst,$tmp\t! add reduction2I" %} 5449 ins_pipe( pipe_slow ); 5450 %} 5451 5452 // ====================VECTOR ARITHMETIC======================================= 5453 5454 // --------------------------------- ADD -------------------------------------- 5455 5456 // Bytes vector add 5457 instruct vadd4B(vecS dst, vecS src) %{ 5458 predicate(n->as_Vector()->length() == 4); 5459 match(Set dst (AddVB dst src)); 5460 format %{ "paddb $dst,$src\t! add packed4B" %} 5461 ins_encode %{ 5462 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5463 %} 5464 ins_pipe( pipe_slow ); 5465 %} 5466 5467 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5468 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5469 match(Set dst (AddVB src1 src2)); 5470 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5471 ins_encode %{ 5472 int vector_len = 0; 5473 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5474 %} 5475 ins_pipe( pipe_slow ); 5476 %} 5477 5478 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 5479 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5480 match(Set dst (AddVB src (LoadVector mem))); 5481 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5482 ins_encode %{ 5483 int vector_len = 0; 5484 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5485 %} 5486 ins_pipe( pipe_slow ); 5487 %} 5488 5489 instruct vadd8B(vecD dst, vecD src) %{ 5490 predicate(n->as_Vector()->length() == 8); 5491 match(Set dst (AddVB dst src)); 5492 format %{ "paddb $dst,$src\t! add packed8B" %} 5493 ins_encode %{ 5494 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5495 %} 5496 ins_pipe( pipe_slow ); 5497 %} 5498 5499 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 5500 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5501 match(Set dst (AddVB src1 src2)); 5502 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5503 ins_encode %{ 5504 int vector_len = 0; 5505 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5506 %} 5507 ins_pipe( pipe_slow ); 5508 %} 5509 5510 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 5511 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5512 match(Set dst (AddVB src (LoadVector mem))); 5513 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5514 ins_encode %{ 5515 int vector_len = 0; 5516 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5517 %} 5518 ins_pipe( pipe_slow ); 5519 %} 5520 5521 instruct vadd16B(vecX dst, vecX src) %{ 5522 predicate(n->as_Vector()->length() == 16); 5523 match(Set dst (AddVB dst src)); 5524 format %{ "paddb $dst,$src\t! add packed16B" %} 5525 ins_encode %{ 5526 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5527 %} 5528 ins_pipe( pipe_slow ); 5529 %} 5530 5531 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5532 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5533 match(Set dst (AddVB src1 src2)); 5534 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5535 ins_encode %{ 5536 int vector_len = 0; 5537 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5538 %} 5539 ins_pipe( pipe_slow ); 5540 %} 5599 predicate(n->as_Vector()->length() == 2); 5600 match(Set dst (AddVS dst src)); 5601 format %{ "paddw $dst,$src\t! add packed2S" %} 5602 ins_encode %{ 5603 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5604 %} 5605 ins_pipe( pipe_slow ); 5606 %} 5607 5608 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5609 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5610 match(Set dst (AddVS src1 src2)); 5611 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5612 ins_encode %{ 5613 int vector_len = 0; 5614 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5615 %} 5616 ins_pipe( pipe_slow ); 5617 %} 5618 5619 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 5620 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5621 match(Set dst (AddVS src (LoadVector mem))); 5622 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5623 ins_encode %{ 5624 int vector_len = 0; 5625 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5626 %} 5627 ins_pipe( pipe_slow ); 5628 %} 5629 5630 instruct vadd4S(vecD dst, vecD src) %{ 5631 predicate(n->as_Vector()->length() == 4); 5632 match(Set dst (AddVS dst src)); 5633 format %{ "paddw $dst,$src\t! add packed4S" %} 5634 ins_encode %{ 5635 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5636 %} 5637 ins_pipe( pipe_slow ); 5638 %} 5639 5640 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5641 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5642 match(Set dst (AddVS src1 src2)); 5643 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5644 ins_encode %{ 5645 int vector_len = 0; 5646 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5647 %} 5648 ins_pipe( pipe_slow ); 5649 %} 5650 5651 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 5652 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5653 match(Set dst (AddVS src (LoadVector mem))); 5654 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5655 ins_encode %{ 5656 int vector_len = 0; 5657 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5658 %} 5659 ins_pipe( pipe_slow ); 5660 %} 5661 5662 instruct vadd8S(vecX dst, vecX src) %{ 5663 predicate(n->as_Vector()->length() == 8); 5664 match(Set dst (AddVS dst src)); 5665 format %{ "paddw $dst,$src\t! add packed8S" %} 5666 ins_encode %{ 5667 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5668 %} 5669 ins_pipe( pipe_slow ); 5670 %} 5671 5672 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5673 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5674 match(Set dst (AddVS src1 src2)); 5675 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5676 ins_encode %{ 5677 int vector_len = 0; 5678 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5679 %} 5680 ins_pipe( pipe_slow ); 5681 %} 5740 predicate(n->as_Vector()->length() == 2); 5741 match(Set dst (AddVI dst src)); 5742 format %{ "paddd $dst,$src\t! add packed2I" %} 5743 ins_encode %{ 5744 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5745 %} 5746 ins_pipe( pipe_slow ); 5747 %} 5748 5749 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5750 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5751 match(Set dst (AddVI src1 src2)); 5752 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5753 ins_encode %{ 5754 int vector_len = 0; 5755 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5756 %} 5757 ins_pipe( pipe_slow ); 5758 %} 5759 5760 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 5761 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5762 match(Set dst (AddVI src (LoadVector mem))); 5763 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 5764 ins_encode %{ 5765 int vector_len = 0; 5766 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5767 %} 5768 ins_pipe( pipe_slow ); 5769 %} 5770 5771 instruct vadd4I(vecX dst, vecX src) %{ 5772 predicate(n->as_Vector()->length() == 4); 5773 match(Set dst (AddVI dst src)); 5774 format %{ "paddd $dst,$src\t! add packed4I" %} 5775 ins_encode %{ 5776 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5782 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5783 match(Set dst (AddVI src1 src2)); 5784 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5785 ins_encode %{ 5786 int vector_len = 0; 5787 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5926 predicate(n->as_Vector()->length() == 2); 5927 match(Set dst (AddVF dst src)); 5928 format %{ "addps $dst,$src\t! add packed2F" %} 5929 ins_encode %{ 5930 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5931 %} 5932 ins_pipe( pipe_slow ); 5933 %} 5934 5935 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 5936 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5937 match(Set dst (AddVF src1 src2)); 5938 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 5939 ins_encode %{ 5940 int vector_len = 0; 5941 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5942 %} 5943 ins_pipe( pipe_slow ); 5944 %} 5945 5946 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 5947 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5948 match(Set dst (AddVF src (LoadVector mem))); 5949 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 5950 ins_encode %{ 5951 int vector_len = 0; 5952 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5953 %} 5954 ins_pipe( pipe_slow ); 5955 %} 5956 5957 instruct vadd4F(vecX dst, vecX src) %{ 5958 predicate(n->as_Vector()->length() == 4); 5959 match(Set dst (AddVF dst src)); 5960 format %{ "addps $dst,$src\t! add packed4F" %} 5961 ins_encode %{ 5962 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 5968 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5969 match(Set dst (AddVF src1 src2)); 5970 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 5971 ins_encode %{ 5972 int vector_len = 0; 5973 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 6114 predicate(n->as_Vector()->length() == 4); 6115 match(Set dst (SubVB dst src)); 6116 format %{ "psubb $dst,$src\t! sub packed4B" %} 6117 ins_encode %{ 6118 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6119 %} 6120 ins_pipe( pipe_slow ); 6121 %} 6122 6123 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6124 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6125 match(Set dst (SubVB src1 src2)); 6126 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6127 ins_encode %{ 6128 int vector_len = 0; 6129 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6130 %} 6131 ins_pipe( pipe_slow ); 6132 %} 6133 6134 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6135 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6136 match(Set dst (SubVB src (LoadVector mem))); 6137 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6138 ins_encode %{ 6139 int vector_len = 0; 6140 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6141 %} 6142 ins_pipe( pipe_slow ); 6143 %} 6144 6145 instruct vsub8B(vecD dst, vecD src) %{ 6146 predicate(n->as_Vector()->length() == 8); 6147 match(Set dst (SubVB dst src)); 6148 format %{ "psubb $dst,$src\t! sub packed8B" %} 6149 ins_encode %{ 6150 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6151 %} 6152 ins_pipe( pipe_slow ); 6153 %} 6154 6155 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6156 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6157 match(Set dst (SubVB src1 src2)); 6158 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6159 ins_encode %{ 6160 int vector_len = 0; 6161 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6162 %} 6163 ins_pipe( pipe_slow ); 6164 %} 6165 6166 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6167 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6168 match(Set dst (SubVB src (LoadVector mem))); 6169 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6170 ins_encode %{ 6171 int vector_len = 0; 6172 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6173 %} 6174 ins_pipe( pipe_slow ); 6175 %} 6176 6177 instruct vsub16B(vecX dst, vecX src) %{ 6178 predicate(n->as_Vector()->length() == 16); 6179 match(Set dst (SubVB dst src)); 6180 format %{ "psubb $dst,$src\t! sub packed16B" %} 6181 ins_encode %{ 6182 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6183 %} 6184 ins_pipe( pipe_slow ); 6185 %} 6186 6187 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6188 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6189 match(Set dst (SubVB src1 src2)); 6190 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6191 ins_encode %{ 6192 int vector_len = 0; 6193 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6194 %} 6195 ins_pipe( pipe_slow ); 6196 %} 6255 predicate(n->as_Vector()->length() == 2); 6256 match(Set dst (SubVS dst src)); 6257 format %{ "psubw $dst,$src\t! sub packed2S" %} 6258 ins_encode %{ 6259 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6260 %} 6261 ins_pipe( pipe_slow ); 6262 %} 6263 6264 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6265 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6266 match(Set dst (SubVS src1 src2)); 6267 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6268 ins_encode %{ 6269 int vector_len = 0; 6270 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6271 %} 6272 ins_pipe( pipe_slow ); 6273 %} 6274 6275 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6276 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6277 match(Set dst (SubVS src (LoadVector mem))); 6278 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6279 ins_encode %{ 6280 int vector_len = 0; 6281 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6282 %} 6283 ins_pipe( pipe_slow ); 6284 %} 6285 6286 instruct vsub4S(vecD dst, vecD src) %{ 6287 predicate(n->as_Vector()->length() == 4); 6288 match(Set dst (SubVS dst src)); 6289 format %{ "psubw $dst,$src\t! sub packed4S" %} 6290 ins_encode %{ 6291 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6297 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6298 match(Set dst (SubVS src1 src2)); 6299 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6300 ins_encode %{ 6301 int vector_len = 0; 6302 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6303 %} 6304 ins_pipe( pipe_slow ); 6305 %} 6306 6307 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6308 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6309 match(Set dst (SubVS src (LoadVector mem))); 6310 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6311 ins_encode %{ 6312 int vector_len = 0; 6313 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6314 %} 6315 ins_pipe( pipe_slow ); 6316 %} 6317 6318 instruct vsub8S(vecX dst, vecX src) %{ 6319 predicate(n->as_Vector()->length() == 8); 6320 match(Set dst (SubVS dst src)); 6321 format %{ "psubw $dst,$src\t! sub packed8S" %} 6322 ins_encode %{ 6323 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6329 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6330 match(Set dst (SubVS src1 src2)); 6331 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6332 ins_encode %{ 6333 int vector_len = 0; 6334 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6396 predicate(n->as_Vector()->length() == 2); 6397 match(Set dst (SubVI dst src)); 6398 format %{ "psubd $dst,$src\t! sub packed2I" %} 6399 ins_encode %{ 6400 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6401 %} 6402 ins_pipe( pipe_slow ); 6403 %} 6404 6405 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6406 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6407 match(Set dst (SubVI src1 src2)); 6408 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6409 ins_encode %{ 6410 int vector_len = 0; 6411 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6417 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6418 match(Set dst (SubVI src (LoadVector mem))); 6419 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6420 ins_encode %{ 6421 int vector_len = 0; 6422 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6423 %} 6424 ins_pipe( pipe_slow ); 6425 %} 6426 6427 instruct vsub4I(vecX dst, vecX src) %{ 6428 predicate(n->as_Vector()->length() == 4); 6429 match(Set dst (SubVI dst src)); 6430 format %{ "psubd $dst,$src\t! sub packed4I" %} 6431 ins_encode %{ 6432 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6438 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6439 match(Set dst (SubVI src1 src2)); 6440 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6441 ins_encode %{ 6442 int vector_len = 0; 6443 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6582 predicate(n->as_Vector()->length() == 2); 6583 match(Set dst (SubVF dst src)); 6584 format %{ "subps $dst,$src\t! sub packed2F" %} 6585 ins_encode %{ 6586 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6587 %} 6588 ins_pipe( pipe_slow ); 6589 %} 6590 6591 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 6592 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6593 match(Set dst (SubVF src1 src2)); 6594 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 6595 ins_encode %{ 6596 int vector_len = 0; 6597 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6598 %} 6599 ins_pipe( pipe_slow ); 6600 %} 6601 6602 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 6603 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6604 match(Set dst (SubVF src (LoadVector mem))); 6605 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 6606 ins_encode %{ 6607 int vector_len = 0; 6608 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6609 %} 6610 ins_pipe( pipe_slow ); 6611 %} 6612 6613 instruct vsub4F(vecX dst, vecX src) %{ 6614 predicate(n->as_Vector()->length() == 4); 6615 match(Set dst (SubVF dst src)); 6616 format %{ "subps $dst,$src\t! sub packed4F" %} 6617 ins_encode %{ 6618 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6619 %} 6620 ins_pipe( pipe_slow ); 6621 %} 6622 6623 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 6624 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6625 match(Set dst (SubVF src1 src2)); 6626 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 6627 ins_encode %{ 6628 int vector_len = 0; 6629 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6630 %} 6631 ins_pipe( pipe_slow ); 6632 %} 6770 predicate(n->as_Vector()->length() == 2); 6771 match(Set dst (MulVS dst src)); 6772 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6773 ins_encode %{ 6774 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6775 %} 6776 ins_pipe( pipe_slow ); 6777 %} 6778 6779 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6780 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6781 match(Set dst (MulVS src1 src2)); 6782 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6783 ins_encode %{ 6784 int vector_len = 0; 6785 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6786 %} 6787 ins_pipe( pipe_slow ); 6788 %} 6789 6790 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 6791 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6792 match(Set dst (MulVS src (LoadVector mem))); 6793 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 6794 ins_encode %{ 6795 int vector_len = 0; 6796 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6797 %} 6798 ins_pipe( pipe_slow ); 6799 %} 6800 6801 instruct vmul4S(vecD dst, vecD src) %{ 6802 predicate(n->as_Vector()->length() == 4); 6803 match(Set dst (MulVS dst src)); 6804 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6805 ins_encode %{ 6806 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6807 %} 6808 ins_pipe( pipe_slow ); 6809 %} 6810 6811 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6812 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6813 match(Set dst (MulVS src1 src2)); 6814 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6815 ins_encode %{ 6816 int vector_len = 0; 6817 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6818 %} 6819 ins_pipe( pipe_slow ); 6820 %} 6821 6822 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 6823 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6824 match(Set dst (MulVS src (LoadVector mem))); 6825 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 6826 ins_encode %{ 6827 int vector_len = 0; 6828 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6829 %} 6830 ins_pipe( pipe_slow ); 6831 %} 6832 6833 instruct vmul8S(vecX dst, vecX src) %{ 6834 predicate(n->as_Vector()->length() == 8); 6835 match(Set dst (MulVS dst src)); 6836 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6837 ins_encode %{ 6838 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6839 %} 6840 ins_pipe( pipe_slow ); 6841 %} 6842 6843 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6844 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6845 match(Set dst (MulVS src1 src2)); 6846 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 6847 ins_encode %{ 6848 int vector_len = 0; 6849 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6850 %} 6851 ins_pipe( pipe_slow ); 6852 %} 6911 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 6912 match(Set dst (MulVI dst src)); 6913 format %{ "pmulld $dst,$src\t! mul packed2I" %} 6914 ins_encode %{ 6915 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6916 %} 6917 ins_pipe( pipe_slow ); 6918 %} 6919 6920 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 6921 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6922 match(Set dst (MulVI src1 src2)); 6923 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 6924 ins_encode %{ 6925 int vector_len = 0; 6926 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6927 %} 6928 ins_pipe( pipe_slow ); 6929 %} 6930 6931 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 6932 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6933 match(Set dst (MulVI src (LoadVector mem))); 6934 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 6935 ins_encode %{ 6936 int vector_len = 0; 6937 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6938 %} 6939 ins_pipe( pipe_slow ); 6940 %} 6941 6942 instruct vmul4I(vecX dst, vecX src) %{ 6943 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 6944 match(Set dst (MulVI dst src)); 6945 format %{ "pmulld $dst,$src\t! mul packed4I" %} 6946 ins_encode %{ 6947 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6948 %} 6949 ins_pipe( pipe_slow ); 6950 %} 6951 6952 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 6953 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6954 match(Set dst (MulVI src1 src2)); 6955 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 6956 ins_encode %{ 6957 int vector_len = 0; 6958 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6959 %} 6960 ins_pipe( pipe_slow ); 6961 %} 6962 6963 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 6964 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6965 match(Set dst (MulVI src (LoadVector mem))); 6966 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 6967 ins_encode %{ 6968 int vector_len = 0; 6969 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6970 %} 6971 ins_pipe( pipe_slow ); 6972 %} 6973 6974 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 6975 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6976 match(Set dst (MulVL src1 src2)); 6977 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 6978 ins_encode %{ 6979 int vector_len = 0; 6980 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6981 %} 6982 ins_pipe( pipe_slow ); 6983 %} 6984 6985 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 6986 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6987 match(Set dst (MulVL src (LoadVector mem))); 6988 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 6989 ins_encode %{ 6990 int vector_len = 0; 6991 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6992 %} 6993 ins_pipe( pipe_slow ); 6994 %} 6995 6996 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 6997 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6998 match(Set dst (MulVL src1 src2)); 6999 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7000 ins_encode %{ 7001 int vector_len = 1; 7002 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7008 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7009 match(Set dst (MulVL src (LoadVector mem))); 7010 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7011 ins_encode %{ 7012 int vector_len = 1; 7013 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7019 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7020 match(Set dst (MulVL src1 src2)); 7021 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7022 ins_encode %{ 7023 int vector_len = 2; 7024 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7030 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7031 match(Set dst (MulVL src (LoadVector mem))); 7032 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7033 ins_encode %{ 7034 int vector_len = 2; 7035 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7036 %} 7037 ins_pipe( pipe_slow ); 7038 %} 7039 7040 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7041 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7042 match(Set dst (MulVI src1 src2)); 7043 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7044 ins_encode %{ 7045 int vector_len = 1; 7046 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7052 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7053 match(Set dst (MulVI src (LoadVector mem))); 7054 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7055 ins_encode %{ 7056 int vector_len = 1; 7057 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7058 %} 7059 ins_pipe( pipe_slow ); 7060 %} 7061 7062 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7063 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7064 match(Set dst (MulVI src1 src2)); 7065 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7066 ins_encode %{ 7067 int vector_len = 2; 7068 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7069 %} 7070 ins_pipe( pipe_slow ); 7071 %} 7072 7073 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7074 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7075 match(Set dst (MulVI src (LoadVector mem))); 7076 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7077 ins_encode %{ 7078 int vector_len = 2; 7079 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7080 %} 7081 ins_pipe( pipe_slow ); 7082 %} 7083 7084 // Floats vector mul 7085 instruct vmul2F(vecD dst, vecD src) %{ 7086 predicate(n->as_Vector()->length() == 2); 7087 match(Set dst (MulVF dst src)); 7088 format %{ "mulps $dst,$src\t! mul packed2F" %} 7089 ins_encode %{ 7090 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7091 %} 7092 ins_pipe( pipe_slow ); 7093 %} 7094 7095 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7096 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7097 match(Set dst (MulVF src1 src2)); 7098 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7099 ins_encode %{ 7100 int vector_len = 0; 7101 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7102 %} 7103 ins_pipe( pipe_slow ); 7104 %} 7105 7106 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7107 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7108 match(Set dst (MulVF src (LoadVector mem))); 7109 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7110 ins_encode %{ 7111 int vector_len = 0; 7112 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7113 %} 7114 ins_pipe( pipe_slow ); 7115 %} 7116 7117 instruct vmul4F(vecX dst, vecX src) %{ 7118 predicate(n->as_Vector()->length() == 4); 7119 match(Set dst (MulVF dst src)); 7120 format %{ "mulps $dst,$src\t! mul packed4F" %} 7121 ins_encode %{ 7122 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7123 %} 7124 ins_pipe( pipe_slow ); 7125 %} 7126 7127 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7128 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7129 match(Set dst (MulVF src1 src2)); 7130 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7131 ins_encode %{ 7132 int vector_len = 0; 7133 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7134 %} 7135 ins_pipe( pipe_slow ); 7136 %} 7274 predicate(n->as_Vector()->length() == 2); 7275 match(Set dst (DivVF dst src)); 7276 format %{ "divps $dst,$src\t! div packed2F" %} 7277 ins_encode %{ 7278 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7279 %} 7280 ins_pipe( pipe_slow ); 7281 %} 7282 7283 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7284 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7285 match(Set dst (DivVF src1 src2)); 7286 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7287 ins_encode %{ 7288 int vector_len = 0; 7289 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7290 %} 7291 ins_pipe( pipe_slow ); 7292 %} 7293 7294 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7295 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7296 match(Set dst (DivVF src (LoadVector mem))); 7297 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7298 ins_encode %{ 7299 int vector_len = 0; 7300 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7301 %} 7302 ins_pipe( pipe_slow ); 7303 %} 7304 7305 instruct vdiv4F(vecX dst, vecX src) %{ 7306 predicate(n->as_Vector()->length() == 4); 7307 match(Set dst (DivVF dst src)); 7308 format %{ "divps $dst,$src\t! div packed4F" %} 7309 ins_encode %{ 7310 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7311 %} 7312 ins_pipe( pipe_slow ); 7313 %} 7314 7315 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7316 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7317 match(Set dst (DivVF src1 src2)); 7318 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7319 ins_encode %{ 7320 int vector_len = 0; 7321 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7322 %} 7323 ins_pipe( pipe_slow ); 7324 %} 8562 predicate(n->as_Vector()->length_in_bytes() == 4); 8563 match(Set dst (AndV dst src)); 8564 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 8565 ins_encode %{ 8566 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8567 %} 8568 ins_pipe( pipe_slow ); 8569 %} 8570 8571 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 8572 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8573 match(Set dst (AndV src1 src2)); 8574 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 8575 ins_encode %{ 8576 int vector_len = 0; 8577 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8578 %} 8579 ins_pipe( pipe_slow ); 8580 %} 8581 8582 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 8583 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8584 match(Set dst (AndV src (LoadVector mem))); 8585 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 8586 ins_encode %{ 8587 int vector_len = 0; 8588 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8589 %} 8590 ins_pipe( pipe_slow ); 8591 %} 8592 8593 instruct vand8B(vecD dst, vecD src) %{ 8594 predicate(n->as_Vector()->length_in_bytes() == 8); 8595 match(Set dst (AndV dst src)); 8596 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 8597 ins_encode %{ 8598 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8599 %} 8600 ins_pipe( pipe_slow ); 8601 %} 8602 8603 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 8604 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8605 match(Set dst (AndV src1 src2)); 8606 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 8607 ins_encode %{ 8608 int vector_len = 0; 8609 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8610 %} 8611 ins_pipe( pipe_slow ); 8612 %} 8613 8614 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 8615 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8616 match(Set dst (AndV src (LoadVector mem))); 8617 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 8618 ins_encode %{ 8619 int vector_len = 0; 8620 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8621 %} 8622 ins_pipe( pipe_slow ); 8623 %} 8624 8625 instruct vand16B(vecX dst, vecX src) %{ 8626 predicate(n->as_Vector()->length_in_bytes() == 16); 8627 match(Set dst (AndV dst src)); 8628 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 8629 ins_encode %{ 8630 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8631 %} 8632 ins_pipe( pipe_slow ); 8633 %} 8634 8635 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 8636 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8637 match(Set dst (AndV src1 src2)); 8638 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 8639 ins_encode %{ 8640 int vector_len = 0; 8641 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8642 %} 8643 ins_pipe( pipe_slow ); 8644 %} 8704 predicate(n->as_Vector()->length_in_bytes() == 4); 8705 match(Set dst (OrV dst src)); 8706 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 8707 ins_encode %{ 8708 __ por($dst$$XMMRegister, $src$$XMMRegister); 8709 %} 8710 ins_pipe( pipe_slow ); 8711 %} 8712 8713 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8714 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8715 match(Set dst (OrV src1 src2)); 8716 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 8717 ins_encode %{ 8718 int vector_len = 0; 8719 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8720 %} 8721 ins_pipe( pipe_slow ); 8722 %} 8723 8724 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 8725 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8726 match(Set dst (OrV src (LoadVector mem))); 8727 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 8728 ins_encode %{ 8729 int vector_len = 0; 8730 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8731 %} 8732 ins_pipe( pipe_slow ); 8733 %} 8734 8735 instruct vor8B(vecD dst, vecD src) %{ 8736 predicate(n->as_Vector()->length_in_bytes() == 8); 8737 match(Set dst (OrV dst src)); 8738 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8739 ins_encode %{ 8740 __ por($dst$$XMMRegister, $src$$XMMRegister); 8741 %} 8742 ins_pipe( pipe_slow ); 8743 %} 8744 8745 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8746 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8747 match(Set dst (OrV src1 src2)); 8748 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8749 ins_encode %{ 8750 int vector_len = 0; 8751 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8752 %} 8753 ins_pipe( pipe_slow ); 8754 %} 8755 8756 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 8757 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8758 match(Set dst (OrV src (LoadVector mem))); 8759 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 8760 ins_encode %{ 8761 int vector_len = 0; 8762 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8763 %} 8764 ins_pipe( pipe_slow ); 8765 %} 8766 8767 instruct vor16B(vecX dst, vecX src) %{ 8768 predicate(n->as_Vector()->length_in_bytes() == 16); 8769 match(Set dst (OrV dst src)); 8770 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 8771 ins_encode %{ 8772 __ por($dst$$XMMRegister, $src$$XMMRegister); 8773 %} 8774 ins_pipe( pipe_slow ); 8775 %} 8776 8777 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8778 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8779 match(Set dst (OrV src1 src2)); 8780 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 8781 ins_encode %{ 8782 int vector_len = 0; 8783 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8784 %} 8785 ins_pipe( pipe_slow ); 8786 %} 8846 predicate(n->as_Vector()->length_in_bytes() == 4); 8847 match(Set dst (XorV dst src)); 8848 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 8849 ins_encode %{ 8850 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8851 %} 8852 ins_pipe( pipe_slow ); 8853 %} 8854 8855 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8856 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8857 match(Set dst (XorV src1 src2)); 8858 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 8859 ins_encode %{ 8860 int vector_len = 0; 8861 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8862 %} 8863 ins_pipe( pipe_slow ); 8864 %} 8865 8866 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 8867 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8868 match(Set dst (XorV src (LoadVector mem))); 8869 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 8870 ins_encode %{ 8871 int vector_len = 0; 8872 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8873 %} 8874 ins_pipe( pipe_slow ); 8875 %} 8876 8877 instruct vxor8B(vecD dst, vecD src) %{ 8878 predicate(n->as_Vector()->length_in_bytes() == 8); 8879 match(Set dst (XorV dst src)); 8880 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 8881 ins_encode %{ 8882 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8883 %} 8884 ins_pipe( pipe_slow ); 8885 %} 8886 8887 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8888 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8889 match(Set dst (XorV src1 src2)); 8890 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 8891 ins_encode %{ 8892 int vector_len = 0; 8893 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8894 %} 8895 ins_pipe( pipe_slow ); 8896 %} 8897 8898 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 8899 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8900 match(Set dst (XorV src (LoadVector mem))); 8901 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 8902 ins_encode %{ 8903 int vector_len = 0; 8904 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8905 %} 8906 ins_pipe( pipe_slow ); 8907 %} 8908 8909 instruct vxor16B(vecX dst, vecX src) %{ 8910 predicate(n->as_Vector()->length_in_bytes() == 16); 8911 match(Set dst (XorV dst src)); 8912 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 8913 ins_encode %{ 8914 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8915 %} 8916 ins_pipe( pipe_slow ); 8917 %} 8918 8919 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8920 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8921 match(Set dst (XorV src1 src2)); 8922 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 8923 ins_encode %{ 8924 int vector_len = 0; |