--- old/src/cpu/x86/vm/x86.ad 2016-03-01 16:13:30.668282244 -0800 +++ new/src/cpu/x86/vm/x86.ad 2016-03-01 16:13:30.564280497 -0800 @@ -3179,13 +3179,13 @@ "punpcklbw $dst,$dst\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate32B" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate32B" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3196,12 +3196,12 @@ format %{ "punpcklbw $dst,$mem\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate32B" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate32B" %} ins_encode %{ __ punpcklbw($dst$$XMMRegister, $mem$$Address); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3223,11 +3223,11 @@ match(Set dst (ReplicateB con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} + "vinserti128 $dst,$dst,$dst,0x1\t! lreplicate32B($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3298,12 +3298,12 @@ format %{ "movd $dst,$src\n\t" "pshuflw $dst,$dst,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3313,11 +3313,11 @@ match(Set dst (ReplicateS (LoadS mem))); format %{ "pshuflw $dst,$mem,0x00\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S" %} ins_encode %{ __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3327,11 +3327,11 @@ match(Set dst (ReplicateS con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate16S($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3363,11 +3363,11 @@ match(Set dst (ReplicateI src)); format %{ "movd $dst,$src\n\t" "pshufd $dst,$dst,0x00\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate8I" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate8I" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3376,10 +3376,10 @@ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateI (LoadI mem))); format %{ "pshufd $dst,$mem,0x00\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate8I" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate8I" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3401,11 +3401,11 @@ match(Set dst (ReplicateI con)); format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst" %} + "vinserti128 $dst,$dst,$dst,0x1" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3430,11 +3430,11 @@ match(Set dst (ReplicateL src)); format %{ "movdq $dst,$src\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %} ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3447,13 +3447,13 @@ "movdl $tmp,$src.hi\n\t" "punpckldq $dst,$tmp\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3464,11 +3464,11 @@ match(Set dst (ReplicateL con)); format %{ "movq $dst,[$constantaddress]\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L($con)" %} ins_encode %{ __ movq($dst$$XMMRegister, $constantaddress($con)); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3478,11 +3478,11 @@ match(Set dst (ReplicateL (LoadL mem))); format %{ "movq $dst,$mem\n\t" "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + "vinserti128 $dst,$dst,$dst,0x1\t! replicate4L" %} ins_encode %{ __ movq($dst$$XMMRegister, $mem$$Address); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3511,10 +3511,10 @@ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF src)); format %{ "pshufd $dst,$src,0x00\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} + "vinsertf128 $dst,$dst,$dst,0x1\t! replicate8F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3523,10 +3523,10 @@ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateF (LoadF mem))); format %{ "pshufd $dst,$mem,0x00\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} + "vinsertf128 $dst,$dst,$dst,0x1\t! replicate8F" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3576,10 +3576,10 @@ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD src)); format %{ "pshufd $dst,$src,0x44\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} + "vinsertf128 $dst,$dst,$dst,0x1\t! replicate4D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -3588,10 +3588,10 @@ predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); match(Set dst (ReplicateD (LoadD mem))); format %{ "pshufd $dst,$mem,0x44\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} + "vinsertf128 $dst,$dst,$dst,0x1\t! replicate4D" %} ins_encode %{ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); %} ins_pipe( pipe_slow ); %} @@ -4791,7 +4791,7 @@ effect(TEMP tmp, TEMP tmp2); format %{ "vphaddd $tmp,$src2,$src2\n\t" "vphaddd $tmp,$tmp,$tmp2\n\t" - "vextracti128 $tmp2,$tmp\n\t" + "vextracti128 $tmp2,$tmp,0x1\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" "movd $tmp2,$src1\n\t" "vpaddd $tmp2,$tmp2,$tmp\n\t" @@ -4800,7 +4800,7 @@ int vector_len = 1; __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); - __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); + __ vextracti128($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); __ movdl($tmp2$$XMMRegister, $src1$$Register); __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -4813,7 +4813,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpaddd $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" @@ -4824,7 +4824,7 @@ "movd $dst,$tmp2\t! add reduction8I" %} ins_encode %{ int vector_len = 0; - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); @@ -4843,7 +4843,7 @@ effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpaddd $tmp3,$tmp3,$src2\n\t" - "vextracti128 $tmp,$tmp3\n\t" + "vextracti128 $tmp,$tmp3,0x1\n\t" "vpaddd $tmp,$tmp,$tmp3\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpaddd $tmp,$tmp,$tmp2\n\t" @@ -4853,9 +4853,9 @@ "vpaddd $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ - __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x1); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); @@ -4892,7 +4892,7 @@ predicate(UseAVX > 2); match(Set dst (AddReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpaddq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" @@ -4900,7 +4900,7 @@ "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction4L" %} ins_encode %{ - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -4917,7 +4917,7 @@ effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpaddq $tmp2,$tmp2,$src2\n\t" - "vextracti128 $tmp,$tmp2\n\t" + "vextracti128 $tmp,$tmp2,0x1\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpaddq $tmp2,$tmp2,$tmp\n\t" @@ -4925,9 +4925,9 @@ "vpaddq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! add reduction8L" %} ins_encode %{ - __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5026,7 +5026,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf128 $tmp2,$src2\n\t" + "vextractf128 $tmp2,$src2,0x1\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5042,7 +5042,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5065,7 +5065,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5073,7 +5073,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5081,7 +5081,7 @@ "vaddss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vaddss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vaddss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vaddss $dst,$dst,$tmp\n\t" @@ -5097,7 +5097,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5105,7 +5105,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5113,7 +5113,7 @@ __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5162,7 +5162,7 @@ format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4h $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} @@ -5170,7 +5170,7 @@ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5185,15 +5185,15 @@ format %{ "vaddsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vaddsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} @@ -5201,15 +5201,15 @@ __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5307,7 +5307,7 @@ predicate(UseAVX > 0); match(Set dst (MulReductionVI src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" @@ -5318,7 +5318,7 @@ "movd $dst,$tmp2\t! mul reduction8I" %} ins_encode %{ int vector_len = 0; - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); @@ -5337,7 +5337,7 @@ effect(TEMP tmp, TEMP tmp2, TEMP tmp3); format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" "vpmulld $tmp3,$tmp3,$src2\n\t" - "vextracti128 $tmp,$tmp3\n\t" + "vextracti128 $tmp,$tmp3,0x1\n\t" "vpmulld $tmp,$tmp,$src2\n\t" "pshufd $tmp2,$tmp,0xE\n\t" "vpmulld $tmp,$tmp,$tmp2\n\t" @@ -5347,9 +5347,9 @@ "vpmulld $tmp2,$tmp,$tmp2\n\t" "movd $dst,$tmp2\t! mul reduction16I" %} ins_encode %{ - __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x1); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); @@ -5386,7 +5386,7 @@ predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); match(Set dst (MulReductionVL src1 src2)); effect(TEMP tmp, TEMP tmp2); - format %{ "vextracti128 $tmp,$src2\n\t" + format %{ "vextracti128 $tmp,$src2,0x1\n\t" "vpmullq $tmp2,$tmp,$src2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" @@ -5394,7 +5394,7 @@ "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction4L" %} ins_encode %{ - __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5411,7 +5411,7 @@ effect(TEMP tmp, TEMP tmp2); format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" "vpmullq $tmp2,$tmp2,$src2\n\t" - "vextracti128 $tmp,$tmp2\n\t" + "vextracti128 $tmp,$tmp2,0x1\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vpmullq $tmp2,$tmp2,$tmp\n\t" @@ -5419,9 +5419,9 @@ "vpmullq $tmp2,$tmp2,$tmp\n\t" "movdq $dst,$tmp2\t! mul reduction8L" %} ins_encode %{ - __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); + __ vextracti64x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); - __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ vextracti128($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); @@ -5520,7 +5520,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf128 $tmp2,$src2\n\t" + "vextractf128 $tmp2,$src2,0x1\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5536,7 +5536,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5559,7 +5559,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$src2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5567,7 +5567,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5575,7 +5575,7 @@ "vmulss $dst,$dst,$tmp\n\t" "pshufd $tmp,$tmp2,0x03\n\t" "vmulss $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vmulss $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0x01\n\t" "vmulss $dst,$dst,$tmp\n\t" @@ -5591,7 +5591,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5599,7 +5599,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5607,7 +5607,7 @@ __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5656,7 +5656,7 @@ format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf128 $tmp2,$src2\n\t" + "vextractf128 $tmp2,$src2,0x1\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} @@ -5664,7 +5664,7 @@ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); + __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); @@ -5679,15 +5679,15 @@ format %{ "vmulsd $dst,$dst,$src2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x1\n\t" + "vextractf32x4 $tmp2,$src2,0x1\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$src2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x2\n\t" + "vextractf32x4 $tmp2,$src2,0x2\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\n\t" - "vextractf32x4 $tmp2,$src2, 0x3\n\t" + "vextractf32x4 $tmp2,$src2,0x3\n\t" "vmulsd $dst,$dst,$tmp2\n\t" "pshufd $tmp,$tmp2,0xE\n\t" "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} @@ -5695,15 +5695,15 @@ __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); - __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); + __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);