--- old/src/cpu/sparc/vm/sparc.ad 2009-04-22 20:27:20.371139337 +0200 +++ new/src/cpu/sparc/vm/sparc.ad 2009-04-22 20:27:20.241481159 +0200 @@ -1712,6 +1712,23 @@ return as_DoubleFloatRegister(register_encoding); } +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + switch (opcode) { + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + if (!UsePopCountInstruction) + return false; + break; + } + + return true; // Per default match rules are supported. +} + int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk } @@ -9184,6 +9201,148 @@ ins_pipe(long_memory_op); %} + +//---------- Zeros Count Instructions ------------------------------------------ + +instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountLeadingZerosI src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + // x |= (x >> 1); + // x |= (x >> 2); + // x |= (x >> 4); + // x |= (x >> 8); + // x |= (x >> 16); + // return (WORDBITS - popc(x)); + format %{ "SRL $src,1,$dst\t! count leading zeros (int)\n\t" + "OR $src,$tmp,$dst\n\t" + "SRL $dst,2,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRL $dst,4,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRL $dst,8,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRL $dst,16,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "POPC $dst,$dst\n\t" + "MOV 32,$tmp\n\t" + "SUB $tmp,$dst,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + __ srl(Rsrc, 1, Rtmp); + __ or3(Rsrc, Rtmp, Rdst); + __ srl(Rdst, 2, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srl(Rdst, 4, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srl(Rdst, 8, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srl(Rdst, 16, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ popc(Rdst, Rdst); + __ mov(BitsPerInt, Rtmp); + __ sub(Rtmp, Rdst, Rdst); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL(iRegI dst, iRegL src, iRegL tmp, iRegL tmp2, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountLeadingZerosL src)); + effect(TEMP tmp, TEMP tmp2, KILL cr); + + // x |= (x >> 1); + // x |= (x >> 2); + // x |= (x >> 4); + // x |= (x >> 8); + // x |= (x >> 16); + // x |= (x >> 32); + // return (WORDBITS - popc(x)); + format %{ "SRLX $src,1,$tmp2\t! count leading zeros (long)\n\t" + "OR $src,$tmp2,$tmp\n\t" + "SRLX $tmp,2,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,4,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,8,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,16,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "POPC $tmp,$dst\n\t" + "MOV 64,$tmp2\n\t" + "SUB $tmp2,$dst,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ srlx(Rsrc, 1, Rtmp2); + __ or3(Rsrc, Rtmp2, Rtmp); + __ srlx(Rtmp, 2, Rtmp2); + __ or3(Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 4, Rtmp2); + __ or3(Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 8, Rtmp2); + __ or3(Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 16, Rtmp2); + __ or3(Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3(Rtmp, Rtmp2, Rtmp); + __ popc(Rtmp, Rdst); + __ mov(BitsPerLong, Rtmp2); + __ sub(Rtmp2, Rdst, Rdst); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountTrailingZerosI src)); + effect(TEMP tmp, KILL cr); + + // return popc(~x & (x - 1)); + format %{ "SUB $src, 1, $tmp\t! count trailing zeros (int)\n\t" + "ANDN $tmp, $src, $tmp\n\t" + "SRL $tmp, R_G0, Rtmp\n\t" + "POPC $tmp, $dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + __ sub(Rsrc, 1, Rtmp); + __ andn(Rtmp, Rsrc, Rtmp); + __ srl(Rtmp, G0, Rtmp); + __ popc(Rtmp, Rdst); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountTrailingZerosL src)); + effect(TEMP tmp, KILL cr); + + // return popc(~x & (x - 1)); + format %{ "SUB $src, 1, $tmp\t! count trailing zeros (long)\n\t" + "ANDN $tmp, $src, $tmp\n\t" + "POPC $tmp, $dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + __ sub(Rsrc, 1, Rtmp); + __ andn(Rtmp, Rsrc, Rtmp); + __ popc(Rtmp, Rdst); + %} + ins_pipe(ialu_reg); +%} + + //---------- Population Count Instructions ------------------------------------- instruct popCountI(iRegI dst, iRegI src) %{ --- old/src/cpu/x86/vm/assembler_x86.cpp 2009-04-22 20:27:21.246995468 +0200 +++ new/src/cpu/x86/vm/assembler_x86.cpp 2009-04-22 20:27:21.117176826 +0200 @@ -952,6 +952,21 @@ emit_operand(dst, src); } +void Assembler::bsfl(Register dst, Register src) { + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBC); + emit_byte(0xC0 | encode); +} + +void Assembler::bsrl(Register dst, Register src) { + assert(UseCountLeadingZerosInstruction == false, "encoding is treated as LZCNT"); + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBD); + emit_byte(0xC0 | encode); +} + void Assembler::bswapl(Register reg) { // bswap int encode = prefix_and_encode(reg->encoding()); emit_byte(0x0F); @@ -1438,6 +1453,15 @@ } } +void Assembler::lzcntl(Register dst, Register src) { + assert(UseCountLeadingZerosInstruction, "encoding is treated as BSR"); + emit_byte(0xF3); + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBD); + emit_byte(0xC0 | encode); +} + // Emit mfence instruction void Assembler::mfence() { NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) @@ -3688,6 +3712,21 @@ emit_arith(0x23, 0xC0, dst, src); } +void Assembler::bsfq(Register dst, Register src) { + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBC); + emit_byte(0xC0 | encode); +} + +void Assembler::bsrq(Register dst, Register src) { + assert(UseCountLeadingZerosInstruction == false, "encoding is treated as LZCNT"); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBD); + emit_byte(0xC0 | encode); +} + void Assembler::bswapq(Register reg) { int encode = prefixq_and_encode(reg->encoding()); emit_byte(0x0F); @@ -3941,6 +3980,15 @@ emit_data((int)imm32, rspec, narrow_oop_operand); } +void Assembler::lzcntq(Register dst, Register src) { + assert(UseCountLeadingZerosInstruction, "encoding is treated as BSR"); + emit_byte(0xF3); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBD); + emit_byte(0xC0 | encode); +} + void Assembler::movdq(XMMRegister dst, Register src) { // table D-1 says MMX/SSE2 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); --- old/src/cpu/x86/vm/assembler_x86.hpp 2009-04-22 20:27:22.097490923 +0200 +++ new/src/cpu/x86/vm/assembler_x86.hpp 2009-04-22 20:27:21.970862006 +0200 @@ -757,6 +757,18 @@ void andpd(XMMRegister dst, Address src); void andpd(XMMRegister dst, XMMRegister src); + void bsfl(Register dst, Register src); + +#ifdef _LP64 + void bsfq(Register dst, Register src); +#endif + + void bsrl(Register dst, Register src); + +#ifdef _LP64 + void bsrq(Register dst, Register src); +#endif + void bswapl(Register reg); void bswapq(Register reg); @@ -1061,6 +1073,12 @@ void lock(); + void lzcntl(Register dst, Register src); + +#ifdef _LP64 + void lzcntq(Register dst, Register src); +#endif + enum Membar_mask_bits { StoreStore = 1 << 3, LoadStore = 1 << 2, --- old/src/cpu/x86/vm/vm_version_x86.cpp 2009-04-22 20:27:22.809470494 +0200 +++ new/src/cpu/x86/vm/vm_version_x86.cpp 2009-04-22 20:27:22.684438250 +0200 @@ -284,7 +284,7 @@ } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -301,6 +301,7 @@ (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow() ? ", 3dnow" : ""), (supports_3dnow2() ? ", 3dnowext" : ""), + (supports_lzcnt() ? ", lzcnt": ""), (supports_sse4a() ? ", sse4a": ""), (supports_ht() ? ", ht": "")); _features_str = strdup(buf); @@ -364,6 +365,13 @@ UseXmmI2D = false; } } + + // Use count leading zeros count instruction if available. + if (supports_lzcnt()) { + if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { + UseCountLeadingZerosInstruction = true; + } + } } if( is_intel() ) { // Intel cpus specific settings --- old/src/cpu/x86/vm/vm_version_x86.hpp 2009-04-22 20:27:23.480326927 +0200 +++ new/src/cpu/x86/vm/vm_version_x86.hpp 2009-04-22 20:27:23.352411123 +0200 @@ -120,7 +120,7 @@ uint32_t LahfSahf : 1, CmpLegacy : 1, : 4, - abm : 1, + lzcnt : 1, sse4a : 1, misalignsse : 1, prefetchw : 1, @@ -182,7 +182,8 @@ CPU_SSE4A = (1 << 10), CPU_SSE4_1 = (1 << 11), CPU_SSE4_2 = (1 << 12), - CPU_POPCNT = (1 << 13) + CPU_POPCNT = (1 << 13), + CPU_LZCNT = (1 << 14) } cpuFeatureFlags; // cpuid information block. All info derived from executing cpuid with @@ -277,8 +278,6 @@ if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) result |= CPU_MMX; - if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) - result |= CPU_3DNOW; if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) result |= CPU_SSE; if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) @@ -287,14 +286,23 @@ result |= CPU_SSE3; if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) result |= CPU_SSSE3; - if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) - result |= CPU_SSE4A; if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) result |= CPU_SSE4_1; if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) result |= CPU_SSE4_2; if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) result |= CPU_POPCNT; + + // AMD features. + if (is_amd()) { + if (_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) + result |= CPU_3DNOW; + if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) + result |= CPU_LZCNT; + if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) + result |= CPU_SSE4A; + } + return result; } @@ -391,6 +399,7 @@ static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } + static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } static bool supports_compare_and_exchange() { return true; } --- old/src/cpu/x86/vm/x86_32.ad 2009-04-22 20:27:24.165389448 +0200 +++ new/src/cpu/x86/vm/x86_32.ad 2009-04-22 20:27:24.033513041 +0200 @@ -1281,6 +1281,13 @@ } +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + return true; // Per default match rules are supported. +} + int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk } @@ -6644,6 +6651,156 @@ %} +//---------- Zeros Count Instructions ------------------------------------------ + +instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ + predicate(UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosI src)); + effect(KILL cr); + + format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} + ins_encode %{ + __ lzcntl($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eRegI tmp, eFlagsReg cr) %{ + predicate(!UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosI src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + format %{ "BSR $tmp, $src\t# count leading zeros (int)\n\t" + "JNZ skip\n\t" + "MOV $tmp, -1\n" + "skip:\n\t" + "MOV $dst, 31\n\t" + "SUB $dst, $tmp" %} + ins_encode %{ + Label skip; + __ bsrl($tmp$$Register, $src$$Register); + __ jccb(Assembler::notZero, skip); + __ movl($tmp$$Register, -1); + __ bind(skip); + __ movl($dst$$Register, BitsPerInt - 1); + __ subl($dst$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ + predicate(UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosL src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" + "JNC done\n\t" + "LZCNT $tmp, $src.lo\n\t" + "ADD $dst, $tmp\n" + "done:" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Label done; + __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); + __ jccb(Assembler::carryClear, done); + __ lzcntl(Rtmp, Rsrc); + __ addl(Rdst, Rtmp); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ + predicate(!UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosL src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + format %{ "BSR $tmp, $src.hi\t# count leading zeros (long)\n\t" + "JZ msw_is_zero\n\t" + "MOV $dst, 31\n\t" + "SUB $dst, $tmp\n\t" + "JMP done\n" + "msw_is_zero:\n\t" + "BSR $tmp, $src.lo\n\t" + "JNZ lsw_is_not_zero\n\t" + "MOV $tmp, -1\n" + "lsw_is_not_zero:\n\t" + "MOV $dst, 63\n\t" + "SUB $dst, $tmp\n" + "done:" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Label msw_is_zero; + Label lsw_is_not_zero; + Label done; + __ bsrl(Rtmp, HIGH_FROM_LOW(Rsrc)); + __ jccb(Assembler::zero, msw_is_zero); + __ movl(Rdst, BitsPerLong - 1 - BitsPerInt); // Subtract 32 bit positions for LSW + __ subl(Rdst, Rtmp); + __ jmpb(done); + __ bind(msw_is_zero); + __ bsrl(Rtmp, Rsrc); + __ jccb(Assembler::notZero, lsw_is_not_zero); + __ movl(Rtmp, -1); + __ bind(lsw_is_not_zero); + __ movl(Rdst, BitsPerLong - 1); + __ subl(Rdst, Rtmp); + __ bind(done); + + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI(eRegI dst, eRegI src, eRegI tmp, eFlagsReg cr) %{ + match(Set dst (CountTrailingZerosI src)); + effect(TEMP tmp, KILL cr); + + format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" + "JNZ done\n\t" + "MOV $dst, 32\n" + "done:" %} + ins_encode %{ + Label done; + __ bsfl($dst$$Register, $src$$Register); + __ jccb(Assembler::notZero, done); + __ movl($dst$$Register, BitsPerInt); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ + match(Set dst (CountTrailingZerosL src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" + "JNZ done\n\t" + "BSF $tmp, $src.hi\n\t" + "MOV $dst, 32\n\t" + "CMOVZ $tmp, $dst\n\t" + "ADD $dst, $tmp\n" + "done:" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Label done; + __ bsfl(Rdst, Rsrc); + __ jccb(Assembler::notZero, done); + __ bsfl(Rtmp, HIGH_FROM_LOW(Rsrc)); + __ movl(Rdst, BitsPerInt); + __ cmovl(Assembler::zero, Rtmp, Rdst); + __ addl(Rdst, Rtmp); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + + //---------- Population Count Instructions ------------------------------------- instruct popCountI(eRegI dst, eRegI src) %{ --- old/src/cpu/x86/vm/x86_64.ad 2009-04-22 20:27:25.140600087 +0200 +++ new/src/cpu/x86/vm/x86_64.ad 2009-04-22 20:27:25.009565672 +0200 @@ -1980,6 +1980,13 @@ } +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + return true; // Per default match rules are supported. +} + int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk @@ -7656,6 +7663,115 @@ %} +//---------- Zeros Count Instructions ------------------------------------------ + +instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{ + predicate(UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosI src)); + effect(KILL cr); + + format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %} + ins_encode %{ + __ lzcntl($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rRegI tmp, rFlagsReg cr) %{ + predicate(!UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosI src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + format %{ "bsrl $tmp, $src\t# count leading zeros (int)\n\t" + "jnz skip\n\t" + "movl $tmp, -1\n" + "skip:\n\t" + "movl $dst, 31\n\t" + "subl $dst, $tmp" %} + ins_encode %{ + Label skip; + __ bsrl($tmp$$Register, $src$$Register); + __ jccb(Assembler::notZero, skip); + __ movl($tmp$$Register, -1); + __ bind(skip); + __ movl($dst$$Register, BitsPerInt - 1); + __ subl($dst$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{ + predicate(UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosL src)); + effect(KILL cr); + + format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %} + ins_encode %{ + __ lzcntq($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rRegI tmp, rFlagsReg cr) %{ + predicate(!UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosL src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + format %{ "bsrq $tmp, $src\t# count leading zeros (long)\n\t" + "jnz skip\n\t" + "movl $tmp, -1\n" + "skip:\n\t" + "movl $dst, 63\n\t" + "subl $dst, $tmp" %} + ins_encode %{ + Label skip; + __ bsrq($tmp$$Register, $src$$Register); + __ jccb(Assembler::notZero, skip); + __ movl($tmp$$Register, -1); + __ bind(skip); + __ movl($dst$$Register, BitsPerLong - 1); + __ subl($dst$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{ + match(Set dst (CountTrailingZerosI src)); + effect(KILL cr); + + format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t" + "jnz done\n\t" + "movl $dst, 32\n" + "done:" %} + ins_encode %{ + Label done; + __ bsfl($dst$$Register, $src$$Register); + __ jccb(Assembler::notZero, done); + __ movl($dst$$Register, BitsPerInt); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{ + match(Set dst (CountTrailingZerosL src)); + effect(KILL cr); + + format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t" + "jnz done\n\t" + "movl $dst, 64\n" + "done:" %} + ins_encode %{ + Label done; + __ bsfq($dst$$Register, $src$$Register); + __ jccb(Assembler::notZero, done); + __ movl($dst$$Register, BitsPerLong); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + + //---------- Population Count Instructions ------------------------------------- instruct popCountI(rRegI dst, rRegI src) %{ --- old/src/share/vm/classfile/vmSymbols.hpp 2009-04-22 20:27:26.080322707 +0200 +++ new/src/share/vm/classfile/vmSymbols.hpp 2009-04-22 20:27:25.951931922 +0200 @@ -313,6 +313,8 @@ template(value_name, "value") \ template(frontCacheEnabled_name, "frontCacheEnabled") \ template(stringCacheEnabled_name, "stringCacheEnabled") \ + template(numberOfLeadingZeros_name, "numberOfLeadingZeros") \ + template(numberOfTrailingZeros_name, "numberOfTrailingZeros") \ template(bitCount_name, "bitCount") \ template(profile_name, "profile") \ template(equals_name, "equals") \ @@ -559,6 +561,12 @@ do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_S) \ do_name( longBitsToDouble_name, "longBitsToDouble") \ \ + do_intrinsic(_numberOfLeadingZeros_i, java_lang_Integer, numberOfLeadingZeros_name,int_int_signature, F_S) \ + do_intrinsic(_numberOfLeadingZeros_l, java_lang_Long, numberOfLeadingZeros_name,long_int_signature, F_S) \ + \ + do_intrinsic(_numberOfTrailingZeros_i, java_lang_Integer, numberOfTrailingZeros_name,int_int_signature, F_S) \ + do_intrinsic(_numberOfTrailingZeros_l, java_lang_Long, numberOfTrailingZeros_name,long_int_signature, F_S) \ + \ do_intrinsic(_bitCount_i, java_lang_Integer, bitCount_name, int_int_signature, F_S) \ do_intrinsic(_bitCount_l, java_lang_Long, bitCount_name, long_int_signature, F_S) \ \ --- old/src/share/vm/opto/classes.hpp 2009-04-22 20:27:26.782848866 +0200 +++ new/src/share/vm/opto/classes.hpp 2009-04-22 20:27:26.653387358 +0200 @@ -104,6 +104,10 @@ macro(CosD) macro(CountedLoop) macro(CountedLoopEnd) +macro(CountLeadingZerosI) +macro(CountLeadingZerosL) +macro(CountTrailingZerosI) +macro(CountTrailingZerosL) macro(CreateEx) macro(DecodeN) macro(DivD) --- old/src/share/vm/opto/connode.hpp 2009-04-22 20:27:27.446190874 +0200 +++ new/src/share/vm/opto/connode.hpp 2009-04-22 20:27:27.318765345 +0200 @@ -636,6 +636,46 @@ virtual const Type* Value( PhaseTransform *phase ) const; }; +//---------- CountLeadingZerosINode -------------------------------------------- +// Count leading zeros (0-bit count starting from MSB) of an integer. +class CountLeadingZerosINode : public Node { +public: + CountLeadingZerosINode(Node* in1) : Node(0, in1) {} + virtual int Opcode() const; + const Type* bottom_type() const { return TypeInt::INT; } + virtual uint ideal_reg() const { return Op_RegI; } +}; + +//---------- CountLeadingZerosLNode -------------------------------------------- +// Count leading zeros (0-bit count starting from MSB) of a long. +class CountLeadingZerosLNode : public Node { +public: + CountLeadingZerosLNode(Node* in1) : Node(0, in1) {} + virtual int Opcode() const; + const Type* bottom_type() const { return TypeInt::INT; } + virtual uint ideal_reg() const { return Op_RegI; } +}; + +//---------- CountTrailingZerosINode ------------------------------------------- +// Count trailing zeros (0-bit count starting from LSB) of an integer. +class CountTrailingZerosINode : public Node { +public: + CountTrailingZerosINode(Node* in1) : Node(0, in1) {} + virtual int Opcode() const; + const Type* bottom_type() const { return TypeInt::INT; } + virtual uint ideal_reg() const { return Op_RegI; } +}; + +//---------- CountTrailingZerosLNode ------------------------------------------- +// Count trailing zeros (0-bit count starting from LSB) of a long. +class CountTrailingZerosLNode : public Node { +public: + CountTrailingZerosLNode(Node* in1) : Node(0, in1) {} + virtual int Opcode() const; + const Type* bottom_type() const { return TypeInt::INT; } + virtual uint ideal_reg() const { return Op_RegI; } +}; + //---------- PopCountINode ----------------------------------------------------- // Population count (bit count) of an integer. class PopCountINode : public Node { --- old/src/share/vm/opto/library_call.cpp 2009-04-22 20:27:28.122823684 +0200 +++ new/src/share/vm/opto/library_call.cpp 2009-04-22 20:27:27.990370718 +0200 @@ -222,6 +222,8 @@ bool inline_unsafe_CAS(BasicType type); bool inline_unsafe_ordered_store(BasicType type); bool inline_fp_conversions(vmIntrinsics::ID id); + bool inline_numberOfLeadingZeros(vmIntrinsics::ID id); + bool inline_numberOfTrailingZeros(vmIntrinsics::ID id); bool inline_bitCount(vmIntrinsics::ID id); bool inline_reverseBytes(vmIntrinsics::ID id); }; @@ -630,6 +632,14 @@ case vmIntrinsics::_longBitsToDouble: return inline_fp_conversions(intrinsic_id()); + case vmIntrinsics::_numberOfLeadingZeros_i: + case vmIntrinsics::_numberOfLeadingZeros_l: + return inline_numberOfLeadingZeros(intrinsic_id()); + + case vmIntrinsics::_numberOfTrailingZeros_i: + case vmIntrinsics::_numberOfTrailingZeros_l: + return inline_numberOfTrailingZeros(intrinsic_id()); + case vmIntrinsics::_bitCount_i: case vmIntrinsics::_bitCount_l: return inline_bitCount(intrinsic_id()); @@ -1844,6 +1854,48 @@ } } +//-------------------inline_numberOfLeadingZeros_int/long----------------------- +// inline int Integer.numberOfLeadingZeros(int) +// inline int Long.numberOfLeadingZeros(long) +bool LibraryCallKit::inline_numberOfLeadingZeros(vmIntrinsics::ID id) { + assert(id == vmIntrinsics::_numberOfLeadingZeros_i || id == vmIntrinsics::_numberOfLeadingZeros_l, "not numberOfLeadingZeros"); + if (id == vmIntrinsics::_numberOfLeadingZeros_i && !Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false; + if (id == vmIntrinsics::_numberOfLeadingZeros_l && !Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false; + _sp += arg_size(); // restore stack pointer + switch (id) { + case vmIntrinsics::_numberOfLeadingZeros_i: + push(_gvn.transform(new (C, 2) CountLeadingZerosINode(pop()))); + break; + case vmIntrinsics::_numberOfLeadingZeros_l: + push(_gvn.transform(new (C, 2) CountLeadingZerosLNode(pop_pair()))); + break; + default: + ShouldNotReachHere(); + } + return true; +} + +//-------------------inline_numberOfTrailingZeros_int/long---------------------- +// inline int Integer.numberOfTrailingZeros(int) +// inline int Long.numberOfTrailingZeros(long) +bool LibraryCallKit::inline_numberOfTrailingZeros(vmIntrinsics::ID id) { + assert(id == vmIntrinsics::_numberOfTrailingZeros_i || id == vmIntrinsics::_numberOfTrailingZeros_l, "not numberOfTrailingZeros"); + if (id == vmIntrinsics::_numberOfTrailingZeros_i && !Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false; + if (id == vmIntrinsics::_numberOfTrailingZeros_l && !Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false; + _sp += arg_size(); // restore stack pointer + switch (id) { + case vmIntrinsics::_numberOfTrailingZeros_i: + push(_gvn.transform(new (C, 2) CountTrailingZerosINode(pop()))); + break; + case vmIntrinsics::_numberOfTrailingZeros_l: + push(_gvn.transform(new (C, 2) CountTrailingZerosLNode(pop_pair()))); + break; + default: + ShouldNotReachHere(); + } + return true; +} + //----------------------------inline_bitCount_int/long----------------------- // inline int Integer.bitCount(int) // inline int Long.bitCount(long) --- old/src/share/vm/opto/matcher.hpp 2009-04-22 20:27:28.912245848 +0200 +++ new/src/share/vm/opto/matcher.hpp 2009-04-22 20:27:28.785667108 +0200 @@ -220,10 +220,16 @@ OptoRegPair *_parm_regs; // Array of machine registers per argument RegMask *_calling_convention_mask; // Array of RegMasks per argument - // Does matcher support this ideal node? + // Does matcher have a match rule for this ideal node? static const bool has_match_rule(int opcode); static const bool _hasMatchRule[_last_opcode]; + // Does matcher have a match rule for this ideal node and is the + // predicate (if there is one) true? + // NOTE: If this function is used more commonly in the future, ADLC + // should generate this one. + static const bool match_rule_supported(int opcode); + // Used to determine if we have fast l2f conversion // USII has it, USIII doesn't static const bool convL2FSupported(void); --- old/src/share/vm/runtime/globals.hpp 2009-04-22 20:27:29.588609473 +0200 +++ new/src/share/vm/runtime/globals.hpp 2009-04-22 20:27:29.455257258 +0200 @@ -2185,6 +2185,9 @@ diagnostic(bool, PrintIntrinsics, false, \ "prints attempted and successful inlining of intrinsics") \ \ + product(bool, UseCountLeadingZerosInstruction, false, \ + "Use count leading zeros instruction") \ + \ product(bool, UsePopCountInstruction, false, \ "Use population count instruction") \ \ --- /dev/null 2009-04-22 20:27:30.000000000 +0200 +++ new/test/compiler/6823354/Test6823354.java 2009-04-22 20:27:30.131169837 +0200 @@ -0,0 +1,120 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/** + * @test + * @bug 6823354 + * @summary These methods can be instrinsified by using bit scan, bit test, and population count instructions. + * + * @run main/othervm -Xcomp -XX:CompileOnly=Test6823354.lzcomp,Test6823354.tzcomp Test6823354 + */ + +public class Test6823354 { + // Arrays of corner case values. + static final int[] ia = new int[] { 0, -1, Integer.MIN_VALUE, Integer.MAX_VALUE }; + static final long[] la = new long[] { 0L, -1L, Long.MIN_VALUE, Long.MAX_VALUE }; + + public static void main(String[] args) { + // Load the class and the methods. + Integer.numberOfLeadingZeros(0); + Integer.numberOfTrailingZeros(0); + Long.numberOfLeadingZeros(0); + Long.numberOfTrailingZeros(0); + + lz(); + tz(); + } + + static void lz() { + // int + // Test corner cases. + for (int i = 0; i < ia.length; i++) { + int x = ia[i]; + check(x, lzcomp(x), lzint(x)); + } + for (int i = 0; i < Integer.SIZE; i++) { + int x = 1 << i; + check(x, lzcomp(x), lzint(x)); + } + + // long + // Test corner cases. + for (int i = 0; i < ia.length; i++) { + long x = la[i]; + check(x, lzcomp(x), lzint(x)); + } + for (int i = 0; i < Long.SIZE; i++) { + long x = 1L << i; + check(x, lzcomp(x), lzint(x)); + } + } + + static void tz() { + // int + // Test corner cases. + for (int i = 0; i < ia.length; i++) { + int x = ia[i]; + check(x, tzcomp(x), tzint(x)); + } + for (int i = 0; i < Integer.SIZE; i++) { + int x = 1 << i; + check(x, tzcomp(x), tzint(x)); + } + + // long + // Test corner cases. + for (int i = 0; i < la.length; i++) { + long x = la[i]; + check(x, tzcomp(x), tzint(x)); + } + for (int i = 0; i < Long.SIZE; i++) { + long x = 1L << i; + check(x, tzcomp(x), tzint(x)); + } + } + + static void check(int value, int result, int expected) { + //System.out.println(value + ": " + result + ", " + expected); + if (result != expected) + throw new InternalError(value + " failed: " + result + " != " + expected); + } + + static void check(long value, long result, long expected) { + //System.out.println(value + ": " + result + ", " + expected); + if (result != expected) + throw new InternalError(value + " failed: " + result + " != " + expected); + } + + static int lzint (int i) { return Integer.numberOfLeadingZeros(i); } + static int lzcomp(int i) { return Integer.numberOfLeadingZeros(i); } + + static int lzint (long l) { return Long.numberOfLeadingZeros(l); } + static int lzcomp(long l) { return Long.numberOfLeadingZeros(l); } + + static int tzint (int i) { return Integer.numberOfTrailingZeros(i); } + static int tzcomp(int i) { return Integer.numberOfTrailingZeros(i); } + + static int tzint (long l) { return Long.numberOfTrailingZeros(l); } + static int tzcomp(long l) { return Long.numberOfTrailingZeros(l); } +} +