# HG changeset patch # User enevill # Date 1435239689 0 # Thu Jun 25 13:41:29 2015 +0000 # Node ID 1fd5f7e18c3a8ac430291e82192eeb985b13f083 # Parent 0b76cb81b1651620ba18443cb36ab0fd50bd6f2d 8129426: aarch64: add support for PopCount in C2 Summary: Add support for PopCount using SIMD cnt and addv inst Reviewed-by: duke Contributed-by: alexander.alexeev@caviumnetworks.com diff --git a/src/cpu/aarch64/vm/aarch64.ad b/src/cpu/aarch64/vm/aarch64.ad --- a/src/cpu/aarch64/vm/aarch64.ad +++ b/src/cpu/aarch64/vm/aarch64.ad @@ -7402,6 +7402,96 @@ ins_pipe(ialu_reg); %} +//---------- Population Count Instructions ------------------------------------- +// + +instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "movw $src, $src\n\t" + "mov $tmp, $src\t# vector (1D)\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0 + __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI (LoadI mem))); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "ldrs $tmp, $mem\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +// Note: Long.bitCount(long) returns an int. +instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "mov $tmp, $src\t# vector (1D)\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL (LoadL mem))); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "ldrd $tmp, $mem\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + // ============================================================================ // MemBar Instruction diff --git a/src/cpu/aarch64/vm/assembler_aarch64.hpp b/src/cpu/aarch64/vm/assembler_aarch64.hpp --- a/src/cpu/aarch64/vm/assembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp @@ -2050,6 +2050,9 @@ INSN(negr, 1, 0b100000101110); INSN(notr, 1, 0b100000010110); INSN(addv, 0, 0b110001101110); + INSN(cls, 0, 0b100000010010); + INSN(clz, 1, 0b100000010010); + INSN(cnt, 0, 0b100000010110); #undef INSN diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp @@ -36,6 +36,7 @@ class MacroAssembler: public Assembler { friend class LIR_Assembler; + public: using Assembler::mov; using Assembler::movi; diff --git a/src/cpu/aarch64/vm/vm_version_aarch64.cpp b/src/cpu/aarch64/vm/vm_version_aarch64.cpp --- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp +++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp @@ -251,6 +251,10 @@ UseBarriersForVolatile = (_cpuFeatures & CPU_DMB_ATOMICS) != 0; } + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + UsePopCountInstruction = true; + } + #ifdef COMPILER2 if (FLAG_IS_DEFAULT(OptoScheduling)) { OptoScheduling = true;