--- old/src/cpu/sparc/vm/sparc.ad 2009-05-04 21:36:22.805085987 +0200 +++ new/src/cpu/sparc/vm/sparc.ad 2009-05-04 21:36:22.674728622 +0200 @@ -1712,6 +1712,23 @@ return as_DoubleFloatRegister(register_encoding); } +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + switch (opcode) { + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + if (!UsePopCountInstruction) + return false; + break; + } + + return true; // Per default match rules are supported. +} + int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk } @@ -9188,6 +9205,145 @@ ins_pipe(long_memory_op); %} + +//---------- Zeros Count Instructions ------------------------------------------ + +instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountLeadingZerosI src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + // x |= (x >> 1); + // x |= (x >> 2); + // x |= (x >> 4); + // x |= (x >> 8); + // x |= (x >> 16); + // return (WORDBITS - popc(x)); + format %{ "SRL $src,1,$dst\t! count leading zeros (int)\n\t" + "OR $src,$tmp,$dst\n\t" + "SRL $dst,2,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRL $dst,4,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRL $dst,8,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRL $dst,16,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "POPC $dst,$dst\n\t" + "MOV 32,$tmp\n\t" + "SUB $tmp,$dst,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + __ srl(Rsrc, 1, Rtmp); + __ or3(Rsrc, Rtmp, Rdst); + __ srl(Rdst, 2, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srl(Rdst, 4, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srl(Rdst, 8, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srl(Rdst, 16, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ popc(Rdst, Rdst); + __ mov(BitsPerInt, Rtmp); + __ sub(Rtmp, Rdst, Rdst); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountLeadingZerosL src)); + effect(TEMP dst, TEMP tmp, KILL cr); + + // x |= (x >> 1); + // x |= (x >> 2); + // x |= (x >> 4); + // x |= (x >> 8); + // x |= (x >> 16); + // x |= (x >> 32); + // return (WORDBITS - popc(x)); + format %{ "SRLX $src,1,$dst\t! count leading zeros (long)\n\t" + "OR $src,$tmp,$dst\n\t" + "SRLX $dst,2,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRLX $dst,4,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRLX $dst,8,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRLX $dst,16,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "SRLX $dst,32,$tmp\n\t" + "OR $dst,$tmp,$dst\n\t" + "POPC $dst,$dst\n\t" + "MOV 64,$tmp\n\t" + "SUB $tmp,$dst,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + __ srlx(Rsrc, 1, Rtmp); + __ or3(Rsrc, Rtmp, Rdst); + __ srlx(Rdst, 2, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srlx(Rdst, 4, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srlx(Rdst, 8, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srlx(Rdst, 16, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ srlx(Rdst, 32, Rtmp); + __ or3(Rdst, Rtmp, Rdst); + __ popc(Rdst, Rdst); + __ mov(BitsPerLong, Rtmp); + __ sub(Rtmp, Rdst, Rdst); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountTrailingZerosI src)); + effect(TEMP dst, KILL cr); + + // return popc(~x & (x - 1)); + format %{ "SUB $src,1,$dst\t! count trailing zeros (int)\n\t" + "ANDN $dst,$src,$dst\n\t" + "SRL $dst,R_G0,$dst\n\t" + "POPC $dst,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + __ sub(Rsrc, 1, Rdst); + __ andn(Rdst, Rsrc, Rdst); + __ srl(Rdst, G0, Rdst); + __ popc(Rdst, Rdst); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL(iRegI dst, iRegL src, flagsReg cr) %{ + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported + match(Set dst (CountTrailingZerosL src)); + effect(TEMP dst, KILL cr); + + // return popc(~x & (x - 1)); + format %{ "SUB $src,1,$dst\t! count trailing zeros (long)\n\t" + "ANDN $dst,$src,$dst\n\t" + "POPC $dst,$dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + __ sub(Rsrc, 1, Rdst); + __ andn(Rdst, Rsrc, Rdst); + __ popc(Rdst, Rdst); + %} + ins_pipe(ialu_reg); +%} + + //---------- Population Count Instructions ------------------------------------- instruct popCountI(iRegI dst, iRegI src) %{ --- old/src/cpu/x86/vm/assembler_x86.cpp 2009-05-04 21:36:23.714231155 +0200 +++ new/src/cpu/x86/vm/assembler_x86.cpp 2009-05-04 21:36:23.591203786 +0200 @@ -952,6 +952,21 @@ emit_operand(dst, src); } +void Assembler::bsfl(Register dst, Register src) { + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBC); + emit_byte(0xC0 | encode); +} + +void Assembler::bsrl(Register dst, Register src) { + assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBD); + emit_byte(0xC0 | encode); +} + void Assembler::bswapl(Register reg) { // bswap int encode = prefix_and_encode(reg->encoding()); emit_byte(0x0F); @@ -1438,6 +1453,15 @@ } } +void Assembler::lzcntl(Register dst, Register src) { + assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); + emit_byte(0xF3); + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBD); + emit_byte(0xC0 | encode); +} + // Emit mfence instruction void Assembler::mfence() { NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) @@ -3688,6 +3712,21 @@ emit_arith(0x23, 0xC0, dst, src); } +void Assembler::bsfq(Register dst, Register src) { + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBC); + emit_byte(0xC0 | encode); +} + +void Assembler::bsrq(Register dst, Register src) { + assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBD); + emit_byte(0xC0 | encode); +} + void Assembler::bswapq(Register reg) { int encode = prefixq_and_encode(reg->encoding()); emit_byte(0x0F); @@ -3941,6 +3980,15 @@ emit_data((int)imm32, rspec, narrow_oop_operand); } +void Assembler::lzcntq(Register dst, Register src) { + assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); + emit_byte(0xF3); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBD); + emit_byte(0xC0 | encode); +} + void Assembler::movdq(XMMRegister dst, Register src) { // table D-1 says MMX/SSE2 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); --- old/src/cpu/x86/vm/assembler_x86.hpp 2009-05-04 21:36:24.551883005 +0200 +++ new/src/cpu/x86/vm/assembler_x86.hpp 2009-05-04 21:36:24.424707665 +0200 @@ -757,6 +757,18 @@ void andpd(XMMRegister dst, Address src); void andpd(XMMRegister dst, XMMRegister src); + void bsfl(Register dst, Register src); + +#ifdef _LP64 + void bsfq(Register dst, Register src); +#endif + + void bsrl(Register dst, Register src); + +#ifdef _LP64 + void bsrq(Register dst, Register src); +#endif + void bswapl(Register reg); void bswapq(Register reg); @@ -1061,6 +1073,12 @@ void lock(); + void lzcntl(Register dst, Register src); + +#ifdef _LP64 + void lzcntq(Register dst, Register src); +#endif + enum Membar_mask_bits { StoreStore = 1 << 3, LoadStore = 1 << 2, --- old/src/cpu/x86/vm/vm_version_x86.cpp 2009-05-04 21:36:25.258080885 +0200 +++ new/src/cpu/x86/vm/vm_version_x86.cpp 2009-05-04 21:36:25.133526799 +0200 @@ -284,7 +284,7 @@ } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -301,6 +301,7 @@ (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow() ? ", 3dnow" : ""), (supports_3dnow2() ? ", 3dnowext" : ""), + (supports_lzcnt() ? ", lzcnt": ""), (supports_sse4a() ? ", sse4a": ""), (supports_ht() ? ", ht": "")); _features_str = strdup(buf); @@ -364,6 +365,13 @@ UseXmmI2D = false; } } + + // Use count leading zeros count instruction if available. + if (supports_lzcnt()) { + if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { + UseCountLeadingZerosInstruction = true; + } + } } if( is_intel() ) { // Intel cpus specific settings --- old/src/cpu/x86/vm/vm_version_x86.hpp 2009-05-04 21:36:25.930976502 +0200 +++ new/src/cpu/x86/vm/vm_version_x86.hpp 2009-05-04 21:36:25.805402720 +0200 @@ -120,7 +120,7 @@ uint32_t LahfSahf : 1, CmpLegacy : 1, : 4, - abm : 1, + lzcnt : 1, sse4a : 1, misalignsse : 1, prefetchw : 1, @@ -182,7 +182,8 @@ CPU_SSE4A = (1 << 10), CPU_SSE4_1 = (1 << 11), CPU_SSE4_2 = (1 << 12), - CPU_POPCNT = (1 << 13) + CPU_POPCNT = (1 << 13), + CPU_LZCNT = (1 << 14) } cpuFeatureFlags; // cpuid information block. All info derived from executing cpuid with @@ -277,8 +278,6 @@ if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) result |= CPU_MMX; - if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) - result |= CPU_3DNOW; if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) result |= CPU_SSE; if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) @@ -287,14 +286,23 @@ result |= CPU_SSE3; if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) result |= CPU_SSSE3; - if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) - result |= CPU_SSE4A; if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) result |= CPU_SSE4_1; if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) result |= CPU_SSE4_2; if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) result |= CPU_POPCNT; + + // AMD features. + if (is_amd()) { + if (_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) + result |= CPU_3DNOW; + if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) + result |= CPU_LZCNT; + if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) + result |= CPU_SSE4A; + } + return result; } @@ -391,6 +399,7 @@ static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } + static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } static bool supports_compare_and_exchange() { return true; } --- old/src/cpu/x86/vm/x86_32.ad 2009-05-04 21:36:26.630419815 +0200 +++ new/src/cpu/x86/vm/x86_32.ad 2009-05-04 21:36:26.496388541 +0200 @@ -1281,6 +1281,13 @@ } +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + return true; // Per default match rules are supported. +} + int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk } @@ -6644,6 +6651,153 @@ %} +//---------- Zeros Count Instructions ------------------------------------------ + +instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ + predicate(UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosI src)); + effect(KILL cr); + + format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} + ins_encode %{ + __ lzcntl($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{ + predicate(!UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosI src)); + effect(KILL cr); + + format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" + "JNZ skip\n\t" + "MOV $dst, -1\n" + "skip:\n\t" + "NEG $dst\n\t" + "ADD $dst, 31" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Label skip; + __ bsrl(Rdst, Rsrc); + __ jccb(Assembler::notZero, skip); + __ movl(Rdst, -1); + __ bind(skip); + __ negl(Rdst); + __ addl(Rdst, BitsPerInt - 1); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ + predicate(UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosL src)); + effect(TEMP dst, KILL cr); + + format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" + "JNC done\n\t" + "LZCNT $dst, $src.lo\n\t" + "ADD $dst, 32\n" + "done:" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Label done; + __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); + __ jccb(Assembler::carryClear, done); + __ lzcntl(Rdst, Rsrc); + __ addl(Rdst, BitsPerInt); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{ + predicate(!UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosL src)); + effect(TEMP dst, KILL cr); + + format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" + "JZ msw_is_zero\n\t" + "ADD $dst, 32\n\t" + "JMP not_zero\n" + "msw_is_zero:\n\t" + "BSR $dst, $src.lo\n\t" + "JNZ not_zero\n\t" + "MOV $dst, -1\n" + "not_zero:\n\t" + "NEG $dst\n\t" + "ADD $dst, 63\n" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Label msw_is_zero; + Label not_zero; + __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); + __ jccb(Assembler::zero, msw_is_zero); + __ addl(Rdst, BitsPerInt); + __ jmpb(not_zero); + __ bind(msw_is_zero); + __ bsrl(Rdst, Rsrc); + __ jccb(Assembler::notZero, not_zero); + __ movl(Rdst, -1); + __ bind(not_zero); + __ negl(Rdst); + __ addl(Rdst, BitsPerLong - 1); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ + match(Set dst (CountTrailingZerosI src)); + effect(KILL cr); + + format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" + "JNZ done\n\t" + "MOV $dst, 32\n" + "done:" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Label done; + __ bsfl(Rdst, $src$$Register); + __ jccb(Assembler::notZero, done); + __ movl(Rdst, BitsPerInt); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ + match(Set dst (CountTrailingZerosL src)); + effect(TEMP dst, KILL cr); + + format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" + "JNZ done\n\t" + "BSF $dst, $src.hi\n\t" + "JNZ msw_not_zero\n\t" + "MOV $dst, 32\n" + "msw_not_zero:\n\t" + "ADD $dst, 32\n" + "done:" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Label msw_not_zero; + Label done; + __ bsfl(Rdst, Rsrc); + __ jccb(Assembler::notZero, done); + __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); + __ jccb(Assembler::notZero, msw_not_zero); + __ movl(Rdst, BitsPerInt); + __ bind(msw_not_zero); + __ addl(Rdst, BitsPerInt); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + + //---------- Population Count Instructions ------------------------------------- instruct popCountI(eRegI dst, eRegI src) %{ --- old/src/cpu/x86/vm/x86_64.ad 2009-05-04 21:36:27.622217068 +0200 +++ new/src/cpu/x86/vm/x86_64.ad 2009-05-04 21:36:27.497316346 +0200 @@ -1980,6 +1980,13 @@ } +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + return true; // Per default match rules are supported. +} + int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk @@ -7656,6 +7663,121 @@ %} +//---------- Zeros Count Instructions ------------------------------------------ + +instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{ + predicate(UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosI src)); + effect(KILL cr); + + format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %} + ins_encode %{ + __ lzcntl($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{ + predicate(!UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosI src)); + effect(KILL cr); + + format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t" + "jnz skip\n\t" + "movl $dst, -1\n" + "skip:\n\t" + "negl $dst\n\t" + "addl $dst, 31" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Label skip; + __ bsrl(Rdst, Rsrc); + __ jccb(Assembler::notZero, skip); + __ movl(Rdst, -1); + __ bind(skip); + __ negl(Rdst); + __ addl(Rdst, BitsPerInt - 1); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{ + predicate(UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosL src)); + effect(KILL cr); + + format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %} + ins_encode %{ + __ lzcntq($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{ + predicate(!UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosL src)); + effect(KILL cr); + + format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t" + "jnz skip\n\t" + "movl $dst, -1\n" + "skip:\n\t" + "negl $dst\n\t" + "addl $dst, 63" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Label skip; + __ bsrq(Rdst, Rsrc); + __ jccb(Assembler::notZero, skip); + __ movl(Rdst, -1); + __ bind(skip); + __ negl(Rdst); + __ addl(Rdst, BitsPerLong - 1); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{ + match(Set dst (CountTrailingZerosI src)); + effect(KILL cr); + + format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t" + "jnz done\n\t" + "movl $dst, 32\n" + "done:" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Label done; + __ bsfl(Rdst, $src$$Register); + __ jccb(Assembler::notZero, done); + __ movl(Rdst, BitsPerInt); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{ + match(Set dst (CountTrailingZerosL src)); + effect(KILL cr); + + format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t" + "jnz done\n\t" + "movl $dst, 64\n" + "done:" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Label done; + __ bsfq(Rdst, $src$$Register); + __ jccb(Assembler::notZero, done); + __ movl(Rdst, BitsPerLong); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + + //---------- Population Count Instructions ------------------------------------- instruct popCountI(rRegI dst, rRegI src) %{ --- old/src/share/vm/classfile/vmSymbols.hpp 2009-05-04 21:36:28.552440001 +0200 +++ new/src/share/vm/classfile/vmSymbols.hpp 2009-05-04 21:36:28.429423727 +0200 @@ -313,6 +313,8 @@ template(value_name, "value") \ template(frontCacheEnabled_name, "frontCacheEnabled") \ template(stringCacheEnabled_name, "stringCacheEnabled") \ + template(numberOfLeadingZeros_name, "numberOfLeadingZeros") \ + template(numberOfTrailingZeros_name, "numberOfTrailingZeros") \ template(bitCount_name, "bitCount") \ template(profile_name, "profile") \ template(equals_name, "equals") \ @@ -559,6 +561,12 @@ do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_S) \ do_name( longBitsToDouble_name, "longBitsToDouble") \ \ + do_intrinsic(_numberOfLeadingZeros_i, java_lang_Integer, numberOfLeadingZeros_name,int_int_signature, F_S) \ + do_intrinsic(_numberOfLeadingZeros_l, java_lang_Long, numberOfLeadingZeros_name,long_int_signature, F_S) \ + \ + do_intrinsic(_numberOfTrailingZeros_i, java_lang_Integer, numberOfTrailingZeros_name,int_int_signature, F_S) \ + do_intrinsic(_numberOfTrailingZeros_l, java_lang_Long, numberOfTrailingZeros_name,long_int_signature, F_S) \ + \ do_intrinsic(_bitCount_i, java_lang_Integer, bitCount_name, int_int_signature, F_S) \ do_intrinsic(_bitCount_l, java_lang_Long, bitCount_name, long_int_signature, F_S) \ \ --- old/src/share/vm/opto/classes.hpp 2009-05-04 21:36:29.237301928 +0200 +++ new/src/share/vm/opto/classes.hpp 2009-05-04 21:36:29.116159559 +0200 @@ -104,6 +104,10 @@ macro(CosD) macro(CountedLoop) macro(CountedLoopEnd) +macro(CountLeadingZerosI) +macro(CountLeadingZerosL) +macro(CountTrailingZerosI) +macro(CountTrailingZerosL) macro(CreateEx) macro(DecodeN) macro(DivD) --- old/src/share/vm/opto/connode.cpp 2009-05-04 21:36:29.895185379 +0200 +++ new/src/share/vm/opto/connode.cpp 2009-05-04 21:36:29.773650179 +0200 @@ -1251,3 +1251,85 @@ v.set_jdouble(td->getd()); return TypeLong::make( v.get_jlong() ); } + +//------------------------------Ideal------------------------------------------ +Node* CountLeadingZerosINode::Ideal(PhaseGVN* phase, bool can_reshape) { + Node* in1 = in(1); + if (in1->is_Con()) { + jint i = phase->type(in1)->is_int()->get_con(); + // HD, Figure 5-6 + if (i == 0) + return ConINode::make(phase->C, BitsPerInt); + int n = 1; + unsigned int x = i; + if (x >> 16 == 0) { n += 16; x <<= 16; } + if (x >> 24 == 0) { n += 8; x <<= 8; } + if (x >> 28 == 0) { n += 4; x <<= 4; } + if (x >> 30 == 0) { n += 2; x <<= 2; } + n -= x >> 31; + return ConINode::make(phase->C, n); + } + return NULL; +} + +//------------------------------Ideal------------------------------------------ +Node* CountLeadingZerosLNode::Ideal(PhaseGVN* phase, bool can_reshape) { + Node* in1 = in(1); + if (in1->is_Con()) { + jlong l = phase->type(in1)->is_long()->get_con(); + // HD, Figure 5-6 + if (l == 0) + return ConINode::make(phase->C, BitsPerLong); + int n = 1; + unsigned int x = (((julong) l) >> 32); + if (x == 0) { n += 32; x = (int) l; } + if (x >> 16 == 0) { n += 16; x <<= 16; } + if (x >> 24 == 0) { n += 8; x <<= 8; } + if (x >> 28 == 0) { n += 4; x <<= 4; } + if (x >> 30 == 0) { n += 2; x <<= 2; } + n -= x >> 31; + return ConINode::make(phase->C, n); + } + return NULL; +} + +//------------------------------Ideal------------------------------------------ +Node* CountTrailingZerosINode::Ideal(PhaseGVN* phase, bool can_reshape) { + Node* in1 = in(1); + if (in1->is_Con()) { + jint i = phase->type(in1)->is_int()->get_con(); + // HD, Figure 5-14 + int y; + if (i == 0) + return ConINode::make(phase->C, BitsPerInt); + int n = 31; + y = i << 16; if (y != 0) { n = n - 16; i = y; } + y = i << 8; if (y != 0) { n = n - 8; i = y; } + y = i << 4; if (y != 0) { n = n - 4; i = y; } + y = i << 2; if (y != 0) { n = n - 2; i = y; } + y = i << 1; if (y != 0) { n = n - 1; } + return ConINode::make(phase->C, n); + } + return NULL; +} + +//------------------------------Ideal------------------------------------------ +Node* CountTrailingZerosLNode::Ideal(PhaseGVN* phase, bool can_reshape) { + Node* in1 = in(1); + if (in1->is_Con()) { + jlong l = phase->type(in1)->is_long()->get_con(); + // HD, Figure 5-14 + int x, y; + if (l == 0) + return ConINode::make(phase->C, BitsPerLong); + int n = 63; + y = (int) l; if (y != 0) { n = n - 32; x = y; } else x = (((julong) l) >> 32); + y = x << 16; if (y != 0) { n = n - 16; x = y; } + y = x << 8; if (y != 0) { n = n - 8; x = y; } + y = x << 4; if (y != 0) { n = n - 4; x = y; } + y = x << 2; if (y != 0) { n = n - 2; x = y; } + y = x << 1; if (y != 0) { n = n - 1; } + return ConINode::make(phase->C, n); + } + return NULL; +} --- old/src/share/vm/opto/connode.hpp 2009-05-04 21:36:30.609441416 +0200 +++ new/src/share/vm/opto/connode.hpp 2009-05-04 21:36:30.470839943 +0200 @@ -636,22 +636,62 @@ virtual const Type* Value( PhaseTransform *phase ) const; }; +//---------- CountBitsNode ----------------------------------------------------- +class CountBitsNode : public Node { +public: + CountBitsNode(Node* in1) : Node(0, in1) {} + const Type* bottom_type() const { return TypeInt::INT; } + virtual uint ideal_reg() const { return Op_RegI; } +}; + +//---------- CountLeadingZerosINode -------------------------------------------- +// Count leading zeros (0-bit count starting from MSB) of an integer. +class CountLeadingZerosINode : public CountBitsNode { +public: + CountLeadingZerosINode(Node* in1) : CountBitsNode(in1) {} + virtual int Opcode() const; + virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); +}; + +//---------- CountLeadingZerosLNode -------------------------------------------- +// Count leading zeros (0-bit count starting from MSB) of a long. +class CountLeadingZerosLNode : public CountBitsNode { +public: + CountLeadingZerosLNode(Node* in1) : CountBitsNode(in1) {} + virtual int Opcode() const; + virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); +}; + +//---------- CountTrailingZerosINode ------------------------------------------- +// Count trailing zeros (0-bit count starting from LSB) of an integer. +class CountTrailingZerosINode : public CountBitsNode { +public: + CountTrailingZerosINode(Node* in1) : CountBitsNode(in1) {} + virtual int Opcode() const; + virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); +}; + +//---------- CountTrailingZerosLNode ------------------------------------------- +// Count trailing zeros (0-bit count starting from LSB) of a long. +class CountTrailingZerosLNode : public CountBitsNode { +public: + CountTrailingZerosLNode(Node* in1) : CountBitsNode(in1) {} + virtual int Opcode() const; + virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); +}; + //---------- PopCountINode ----------------------------------------------------- // Population count (bit count) of an integer. -class PopCountINode : public Node { +class PopCountINode : public CountBitsNode { public: - PopCountINode(Node* in1) : Node(0, in1) {} + PopCountINode(Node* in1) : CountBitsNode(in1) {} virtual int Opcode() const; - const Type* bottom_type() const { return TypeInt::INT; } - virtual uint ideal_reg() const { return Op_RegI; } }; //---------- PopCountLNode ----------------------------------------------------- // Population count (bit count) of a long. -class PopCountLNode : public Node { +class PopCountLNode : public CountBitsNode { public: - PopCountLNode(Node* in1) : Node(0, in1) {} + PopCountLNode(Node* in1) : CountBitsNode(in1) {} virtual int Opcode() const; - const Type* bottom_type() const { return TypeInt::INT; } - virtual uint ideal_reg() const { return Op_RegI; } }; --- old/src/share/vm/opto/library_call.cpp 2009-05-04 21:36:31.315780664 +0200 +++ new/src/share/vm/opto/library_call.cpp 2009-05-04 21:36:31.186060040 +0200 @@ -222,6 +222,8 @@ bool inline_unsafe_CAS(BasicType type); bool inline_unsafe_ordered_store(BasicType type); bool inline_fp_conversions(vmIntrinsics::ID id); + bool inline_numberOfLeadingZeros(vmIntrinsics::ID id); + bool inline_numberOfTrailingZeros(vmIntrinsics::ID id); bool inline_bitCount(vmIntrinsics::ID id); bool inline_reverseBytes(vmIntrinsics::ID id); }; @@ -630,6 +632,14 @@ case vmIntrinsics::_longBitsToDouble: return inline_fp_conversions(intrinsic_id()); + case vmIntrinsics::_numberOfLeadingZeros_i: + case vmIntrinsics::_numberOfLeadingZeros_l: + return inline_numberOfLeadingZeros(intrinsic_id()); + + case vmIntrinsics::_numberOfTrailingZeros_i: + case vmIntrinsics::_numberOfTrailingZeros_l: + return inline_numberOfTrailingZeros(intrinsic_id()); + case vmIntrinsics::_bitCount_i: case vmIntrinsics::_bitCount_l: return inline_bitCount(intrinsic_id()); @@ -1844,6 +1854,48 @@ } } +//-------------------inline_numberOfLeadingZeros_int/long----------------------- +// inline int Integer.numberOfLeadingZeros(int) +// inline int Long.numberOfLeadingZeros(long) +bool LibraryCallKit::inline_numberOfLeadingZeros(vmIntrinsics::ID id) { + assert(id == vmIntrinsics::_numberOfLeadingZeros_i || id == vmIntrinsics::_numberOfLeadingZeros_l, "not numberOfLeadingZeros"); + if (id == vmIntrinsics::_numberOfLeadingZeros_i && !Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false; + if (id == vmIntrinsics::_numberOfLeadingZeros_l && !Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false; + _sp += arg_size(); // restore stack pointer + switch (id) { + case vmIntrinsics::_numberOfLeadingZeros_i: + push(_gvn.transform(new (C, 2) CountLeadingZerosINode(pop()))); + break; + case vmIntrinsics::_numberOfLeadingZeros_l: + push(_gvn.transform(new (C, 2) CountLeadingZerosLNode(pop_pair()))); + break; + default: + ShouldNotReachHere(); + } + return true; +} + +//-------------------inline_numberOfTrailingZeros_int/long---------------------- +// inline int Integer.numberOfTrailingZeros(int) +// inline int Long.numberOfTrailingZeros(long) +bool LibraryCallKit::inline_numberOfTrailingZeros(vmIntrinsics::ID id) { + assert(id == vmIntrinsics::_numberOfTrailingZeros_i || id == vmIntrinsics::_numberOfTrailingZeros_l, "not numberOfTrailingZeros"); + if (id == vmIntrinsics::_numberOfTrailingZeros_i && !Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false; + if (id == vmIntrinsics::_numberOfTrailingZeros_l && !Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false; + _sp += arg_size(); // restore stack pointer + switch (id) { + case vmIntrinsics::_numberOfTrailingZeros_i: + push(_gvn.transform(new (C, 2) CountTrailingZerosINode(pop()))); + break; + case vmIntrinsics::_numberOfTrailingZeros_l: + push(_gvn.transform(new (C, 2) CountTrailingZerosLNode(pop_pair()))); + break; + default: + ShouldNotReachHere(); + } + return true; +} + //----------------------------inline_bitCount_int/long----------------------- // inline int Integer.bitCount(int) // inline int Long.bitCount(long) --- old/src/share/vm/opto/matcher.hpp 2009-05-04 21:36:32.163845628 +0200 +++ new/src/share/vm/opto/matcher.hpp 2009-05-04 21:36:32.043298903 +0200 @@ -225,10 +225,16 @@ OptoRegPair *_parm_regs; // Array of machine registers per argument RegMask *_calling_convention_mask; // Array of RegMasks per argument - // Does matcher support this ideal node? + // Does matcher have a match rule for this ideal node? static const bool has_match_rule(int opcode); static const bool _hasMatchRule[_last_opcode]; + // Does matcher have a match rule for this ideal node and is the + // predicate (if there is one) true? + // NOTE: If this function is used more commonly in the future, ADLC + // should generate this one. + static const bool match_rule_supported(int opcode); + // Used to determine if we have fast l2f conversion // USII has it, USIII doesn't static const bool convL2FSupported(void); --- old/src/share/vm/runtime/globals.hpp 2009-05-04 21:36:32.841258763 +0200 +++ new/src/share/vm/runtime/globals.hpp 2009-05-04 21:36:32.712869176 +0200 @@ -2185,6 +2185,9 @@ diagnostic(bool, PrintIntrinsics, false, \ "prints attempted and successful inlining of intrinsics") \ \ + product(bool, UseCountLeadingZerosInstruction, false, \ + "Use count leading zeros instruction") \ + \ product(bool, UsePopCountInstruction, false, \ "Use population count instruction") \ \ --- /dev/null 2009-05-04 21:36:33.000000000 +0200 +++ new/test/compiler/6823354/Test6823354.java 2009-05-04 21:36:33.470999449 +0200 @@ -0,0 +1,266 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/** + * @test + * @bug 6823354 + * @summary These methods can be instrinsified by using bit scan, bit test, and population count instructions. + * + * @run main/othervm -Xcomp -XX:CompileOnly=Test6823354.lzcomp,Test6823354.tzcomp Test6823354 + */ + +import java.net.URLClassLoader; + +public class Test6823354 { + // Arrays of corner case values. + static final int[] ia = new int[] { 0, 1, -1, Integer.MIN_VALUE, Integer.MAX_VALUE }; + static final long[] la = new long[] { 0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE }; + + public static void main(String[] args) throws Exception { + // Load the classes and the methods. + Integer.numberOfLeadingZeros(0); + Integer.numberOfTrailingZeros(0); + Long.numberOfLeadingZeros(0); + Long.numberOfTrailingZeros(0); + + lz(); + tz(); + } + + static void lz() throws Exception { + // int + + // Test corner cases. + for (int i = 0; i < ia.length; i++) { + int x = ia[i]; + check(x, lzcomp(x), lzint(x)); + } + + // Test all possible return values. + for (int i = 0; i < Integer.SIZE; i++) { + int x = 1 << i; + check(x, lzcomp(x), lzint(x)); + } + + String classname = "Test6823354$lzconI"; + + // Test Ideal optimizations (constant values). + for (int i = 0; i < ia.length; i++) { + testclass(classname, ia[i]); + } + + // Test Ideal optimizations (constant values). + for (int i = 0; i < Integer.SIZE; i++) { + int x = 1 << i; + testclass(classname, x); + } + + + // long + + // Test corner cases. + for (int i = 0; i < ia.length; i++) { + long x = la[i]; + check(x, lzcomp(x), lzint(x)); + } + + // Test all possible return values. + for (int i = 0; i < Long.SIZE; i++) { + long x = 1L << i; + check(x, lzcomp(x), lzint(x)); + } + + classname = "Test6823354$lzconL"; + + // Test Ideal optimizations (constant values). + for (int i = 0; i < la.length; i++) { + testclass(classname, la[i]); + } + + // Test Ideal optimizations (constant values). + for (int i = 0; i < Long.SIZE; i++) { + long x = 1L << i; + testclass(classname, x); + } + } + + static void tz() throws Exception { + // int + + // Test corner cases. + for (int i = 0; i < ia.length; i++) { + int x = ia[i]; + check(x, tzcomp(x), tzint(x)); + } + + // Test all possible return values. + for (int i = 0; i < Integer.SIZE; i++) { + int x = 1 << i; + check(x, tzcomp(x), tzint(x)); + } + + String classname = "Test6823354$tzconI"; + + // Test Ideal optimizations (constant values). + for (int i = 0; i < ia.length; i++) { + testclass(classname, ia[i]); + } + + // Test Ideal optimizations (constant values). + for (int i = 0; i < Integer.SIZE; i++) { + int x = 1 << i; + testclass(classname, x); + } + + + // long + + // Test corner cases. + for (int i = 0; i < la.length; i++) { + long x = la[i]; + check(x, tzcomp(x), tzint(x)); + } + + // Test all possible return values. + for (int i = 0; i < Long.SIZE; i++) { + long x = 1L << i; + check(x, tzcomp(x), tzint(x)); + } + + classname = "Test6823354$tzconL"; + + // Test Ideal optimizations (constant values). + for (int i = 0; i < la.length; i++) { + testclass(classname, la[i]); + } + + // Test Ideal optimizations (constant values). + for (int i = 0; i < Long.SIZE; i++) { + long x = 1L << i; + testclass(classname, x); + } + } + + static void check(int value, int result, int expected) { + //System.out.println(value + ": " + result + ", " + expected); + if (result != expected) + throw new InternalError(value + " failed: " + result + " != " + expected); + } + + static void check(long value, long result, long expected) { + //System.out.println(value + ": " + result + ", " + expected); + if (result != expected) + throw new InternalError(value + " failed: " + result + " != " + expected); + } + + static int lzint( int i) { return Integer.numberOfLeadingZeros(i); } + static int lzcomp(int i) { return Integer.numberOfLeadingZeros(i); } + + static int lzint( long l) { return Long.numberOfLeadingZeros(l); } + static int lzcomp(long l) { return Long.numberOfLeadingZeros(l); } + + static int tzint( int i) { return Integer.numberOfTrailingZeros(i); } + static int tzcomp(int i) { return Integer.numberOfTrailingZeros(i); } + + static int tzint( long l) { return Long.numberOfTrailingZeros(l); } + static int tzcomp(long l) { return Long.numberOfTrailingZeros(l); } + + static void testclass(String classname, int x) throws Exception { + System.setProperty("value", "" + x); + loadandrunclass(classname); + } + + static void testclass(String classname, long x) throws Exception { + System.setProperty("value", "" + x); + loadandrunclass(classname); + } + + static void loadandrunclass(String classname) throws Exception { + Class cl = Class.forName(classname); + URLClassLoader apploader = (URLClassLoader) cl.getClassLoader(); + ClassLoader loader = new URLClassLoader(apploader.getURLs(), apploader.getParent()); + Class c = loader.loadClass(classname); + Runnable r = (Runnable) c.newInstance(); + r.run(); + } + + public static class lzconI implements Runnable { + static final int VALUE; + + static { + int value = 0; + try { + value = Integer.decode(System.getProperty("value")); + } catch (Throwable e) {} + VALUE = value; + } + + public void run() { check(VALUE, lzint(VALUE), dolzcomp()); } + static int dolzcomp() { return lzcomp(VALUE); } + } + + public static class lzconL implements Runnable { + static final long VALUE; + + static { + long value = 0; + try { + value = Long.decode(System.getProperty("value")); + } catch (Throwable e) {} + VALUE = value; + } + + public void run() { check(VALUE, lzint(VALUE), dolzcomp()); } + static int dolzcomp() { return lzcomp(VALUE); } + } + + public static class tzconI implements Runnable { + static final int VALUE; + + static { + int value = 0; + try { + value = Integer.decode(System.getProperty("value")); + } catch (Throwable e) {} + VALUE = value; + } + + public void run() { check(VALUE, tzint(VALUE), dotzcomp()); } + static int dotzcomp() { return tzcomp(VALUE); } + } + + public static class tzconL implements Runnable { + static final long VALUE; + + static { + long value = 0; + try { + value = Long.decode(System.getProperty("value")); + } catch (Throwable e) {} + VALUE = value; + } + + public void run() { check(VALUE, tzint(VALUE), dotzcomp()); } + static int dotzcomp() { return tzcomp(VALUE); } + } +}