< prev index next >

src/cpu/s390/vm/s390.ad

Print this page
rev 12273 : 8169317: [s390] Various minor bug fixes and adaptions.


1472   return offset;
1473 }
1474 
1475 //=============================================================================
1476 
1477 
1478 // Given a register encoding, produce an Integer Register object.
1479 static Register reg_to_register_object(int register_encoding) {
1480   assert(Z_R12->encoding() == Z_R12_enc, "wrong coding");
1481   return as_Register(register_encoding);
1482 }
1483 
1484 const bool Matcher::match_rule_supported(int opcode) {
1485   if (!has_match_rule(opcode)) return false;
1486 
1487   switch (opcode) {
1488     case Op_CountLeadingZerosI:
1489     case Op_CountLeadingZerosL:
1490     case Op_CountTrailingZerosI:
1491     case Op_CountTrailingZerosL:
1492       // Implementation requires FLOGR instruction.
1493       return UseCountLeadingZerosInstruction;
1494 
1495     case Op_ReverseBytesI:
1496     case Op_ReverseBytesL:
1497       return UseByteReverseInstruction;
1498 
1499     // PopCount supported by H/W from z/Architecture G5 (z196) on.
1500     case Op_PopCountI:
1501     case Op_PopCountL:
1502       return UsePopCountInstruction && VM_Version::has_PopCount();
1503 
1504     case Op_StrComp:
1505       return SpecialStringCompareTo;
1506     case Op_StrEquals:
1507       return SpecialStringEquals;
1508     case Op_StrIndexOf:
1509     case Op_StrIndexOfChar:
1510       return SpecialStringIndexOf;
1511 
1512     case Op_GetAndAddI:
1513     case Op_GetAndAddL:


9880   ins_pipe(pipe_class_dummy);
9881 %}
9882 
9883 instruct string_compareUL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
9884   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
9885   effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
9886   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
9887   ins_cost(300);
9888   format %{ "String Compare char[],byte[] $str1,$cnt1,$str2,$cnt2 -> $result" %}
9889   ins_encode %{
9890     __ string_compare($str2$$Register, $str1$$Register,
9891                       $cnt2$$Register, $cnt1$$Register,
9892                       $oddReg$$Register, $evenReg$$Register,
9893                       $result$$Register, StrIntrinsicNode::UL);
9894   %}
9895   ins_pipe(pipe_class_dummy);
9896 %}
9897 
9898 // String IndexOfChar
9899 instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
9900   predicate(CompactStrings);
9901   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
9902   effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
9903   ins_cost(200);
9904   format %{ "String IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %}
9905   ins_encode %{
9906     __ string_indexof_char($result$$Register,
9907                            $haystack$$Register, $haycnt$$Register,
9908                            $ch$$Register, 0 /* unused, ch is in register */,
9909                            $oddReg$$Register, $evenReg$$Register, false /*is_byte*/);
9910   %}
9911   ins_pipe(pipe_class_dummy);
9912 %}
9913 
9914 instruct indexOf_imm1_U(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
9915   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
9916   effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
9917   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none);
9918   ins_cost(200);
9919   format %{ "String IndexOf UL [0..$haycnt]($haystack), [0]($needle) -> $result" %}
9920   ins_encode %{


10573   ins_cost(DEFAULT_COST);
10574   // TODO: s390 port size(FIXED_SIZE);
10575   format %{ "LRVGR   $dst,$src\t# byte reverse long" %}
10576   opcode(LRVGR_ZOPC);
10577   ins_encode(z_rreform(dst, src));
10578   ins_pipe(pipe_class_dummy);
10579 %}
10580 
10581 // Leading zeroes
10582 
10583 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10584 // returns the bit position of the leftmost 1 in the 64bit source register.
10585 // As the bits are numbered from left to right (0..63), the returned
10586 // position index is equivalent to the number of leading zeroes.
10587 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10588 // returns position 64. That's exactly what we need.
10589 
10590 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10591   match(Set dst (CountLeadingZerosI src));
10592   effect(KILL tmp, KILL cr);
10593   predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
10594   ins_cost(3 * DEFAULT_COST);
10595   size(14);
10596   format %{ "SLLG    $dst,$src,32\t# no need to always count 32 zeroes first\n\t"
10597             "IILH    $dst,0x8000 \t# insert \"stop bit\" to force result 32 for zero src.\n\t"
10598             "FLOGR   $dst,$dst"
10599          %}
10600   ins_encode %{
10601     // Performance experiments indicate that "FLOGR" is using some kind of
10602     // iteration to find the leftmost "1" bit.
10603     //
10604     // The prior implementation zero-extended the 32-bit argument to 64 bit,
10605     // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10606     // We could gain measurable speedup in micro benchmark:
10607     //
10608     //               leading   trailing
10609     //   z10:   int     2.04       1.68
10610     //         long     1.00       1.02
10611     //   z196:  int     0.99       1.23
10612     //         long     1.00       1.11
10613     //
10614     // By shifting the argument into the high-word instead of zero-extending it.
10615     // The add'l branch on condition (taken for a zero argument, very infrequent,
10616     // good prediction) is well compensated for by the savings.
10617     //
10618     // We leave the previous implementation in for some time in the future when
10619     // the "FLOGR" instruction may become less iterative.
10620 
10621     // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10622     __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10623     __ z_iilh($dst$$Register, 0x8000);   // Insert "stop bit" to force result 32 for zero src.
10624     __ z_flogr($dst$$Register, $dst$$Register);
10625   %}
10626   ins_pipe(pipe_class_dummy);
10627 %}
10628 
10629 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10630   match(Set dst (CountLeadingZerosL src));
10631   effect(KILL tmp, KILL cr);
10632   predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
10633   ins_cost(DEFAULT_COST);
10634   size(4);
10635   format %{ "FLOGR   $dst,$src \t# count leading zeros (long)\n\t" %}
10636   ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10637   ins_pipe(pipe_class_dummy);
10638 %}
10639 
10640 // trailing zeroes
10641 
10642 // We transform the trailing zeroes problem to a leading zeroes problem
10643 // such that can use the FLOGR instruction to our advantage.
10644 
10645 // With
10646 //   tmp1 = src - 1
10647 // we flip all trailing zeroes to ones and the rightmost one to zero.
10648 // All other bits remain unchanged.
10649 // With the complement
10650 //   tmp2 = ~src
10651 // we get all ones in the trailing zeroes positions. Thus,
10652 //   tmp3 = tmp1 & tmp2
10653 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10654 // Now we can apply FLOGR and get 64-(trailing zeroes).
10655 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10656   match(Set dst (CountTrailingZerosI src));
10657   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10658   predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
10659   ins_cost(8 * DEFAULT_COST);
10660   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10661   format %{ "LLGFR   $dst,$src  \t# clear upper 32 bits (we are dealing with int)\n\t"
10662             "LCGFR   $tmp,$src  \t# load 2's complement (32->64 bit)\n\t"
10663             "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
10664             "AGHI    $tmp,-1    \t# tmp2 = -src-1 = ~src\n\t"
10665             "NGR     $dst,$tmp  \t# tmp3 = tmp1&tmp2\n\t"
10666             "FLOGR   $dst,$dst  \t# count trailing zeros (int)\n\t"
10667             "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10668             "LCR     $dst,$dst  \t# res = -tmp4"
10669          %}
10670   ins_encode %{
10671     Register Rdst = $dst$$Register;
10672     Register Rsrc = $src$$Register;
10673     // Rtmp only needed for for zero-argument shortcut. With kill effect in
10674     // match rule Rsrc = roddReg would be possible, saving one register.
10675     Register Rtmp = $tmp$$Register;
10676 
10677     assert_different_registers(Rdst, Rsrc, Rtmp);
10678 


10692     __ z_ahi(Rtmp,  -1);               // Subtract one to fill all trailing zero positions with ones.
10693                                        // Use 32bit op to prevent borrow propagation (case Rdst = 0x80000000)
10694                                        // into upper half of reg. Not relevant with sllg below.
10695     __ z_sllg(Rdst, Rtmp, 32);         // Shift interesting contents to upper half of register.
10696     __ z_bre(done);                    // Shortcut for argument = 1, result will be 0.
10697                                        // Depends on CC set by ahi above.
10698                                        // Taken very infrequently, good prediction, no BHT entry.
10699                                        // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10700                                        // after SLLG Rdst == 0(64bit)).
10701     __ z_flogr(Rdst, Rdst);            // Kills tmp which is the oddReg for dst.
10702     __ add2reg(Rdst,  -32);            // 32-pos(leftmost1) is #trailing zeros
10703     __ z_lcgfr(Rdst, Rdst);            // Provide 64bit result at no cost.
10704     __ bind(done);
10705   %}
10706   ins_pipe(pipe_class_dummy);
10707 %}
10708 
10709 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10710   match(Set dst (CountTrailingZerosL src));
10711   effect(TEMP_DEF dst, KILL tmp, KILL cr);
10712   predicate(UseCountLeadingZerosInstruction);  // See Matcher::match_rule_supported
10713   ins_cost(8 * DEFAULT_COST);
10714   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10715   format %{ "LCGR    $dst,$src  \t# preserve src\n\t"
10716             "NGR     $dst,$src  \t#"
10717             "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
10718             "FLOGR   $dst,$dst  \t# count trailing zeros (long), kill $tmp\n\t"
10719             "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10720             "LCR     $dst,$dst  \t#"
10721          %}
10722   ins_encode %{
10723     Register Rdst = $dst$$Register;
10724     Register Rsrc = $src$$Register;
10725     assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10726 
10727     // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10728     __ z_lcgr(Rdst, Rsrc);
10729     __ z_ngr(Rdst, Rsrc);
10730     __ add2reg(Rdst,   -1);
10731     __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10732     __ add2reg(Rdst,  -64);




1472   return offset;
1473 }
1474 
1475 //=============================================================================
1476 
1477 
1478 // Given a register encoding, produce an Integer Register object.
1479 static Register reg_to_register_object(int register_encoding) {
1480   assert(Z_R12->encoding() == Z_R12_enc, "wrong coding");
1481   return as_Register(register_encoding);
1482 }
1483 
1484 const bool Matcher::match_rule_supported(int opcode) {
1485   if (!has_match_rule(opcode)) return false;
1486 
1487   switch (opcode) {
1488     case Op_CountLeadingZerosI:
1489     case Op_CountLeadingZerosL:
1490     case Op_CountTrailingZerosI:
1491     case Op_CountTrailingZerosL:
1492       // Implementation requires FLOGR instruction, which is available since z9.
1493       return true;
1494 
1495     case Op_ReverseBytesI:
1496     case Op_ReverseBytesL:
1497       return UseByteReverseInstruction;
1498 
1499     // PopCount supported by H/W from z/Architecture G5 (z196) on.
1500     case Op_PopCountI:
1501     case Op_PopCountL:
1502       return UsePopCountInstruction && VM_Version::has_PopCount();
1503 
1504     case Op_StrComp:
1505       return SpecialStringCompareTo;
1506     case Op_StrEquals:
1507       return SpecialStringEquals;
1508     case Op_StrIndexOf:
1509     case Op_StrIndexOfChar:
1510       return SpecialStringIndexOf;
1511 
1512     case Op_GetAndAddI:
1513     case Op_GetAndAddL:


9880   ins_pipe(pipe_class_dummy);
9881 %}
9882 
9883 instruct string_compareUL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
9884   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
9885   effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
9886   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
9887   ins_cost(300);
9888   format %{ "String Compare char[],byte[] $str1,$cnt1,$str2,$cnt2 -> $result" %}
9889   ins_encode %{
9890     __ string_compare($str2$$Register, $str1$$Register,
9891                       $cnt2$$Register, $cnt1$$Register,
9892                       $oddReg$$Register, $evenReg$$Register,
9893                       $result$$Register, StrIntrinsicNode::UL);
9894   %}
9895   ins_pipe(pipe_class_dummy);
9896 %}
9897 
9898 // String IndexOfChar
9899 instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{

9900   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
9901   effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
9902   ins_cost(200);
9903   format %{ "String IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %}
9904   ins_encode %{
9905     __ string_indexof_char($result$$Register,
9906                            $haystack$$Register, $haycnt$$Register,
9907                            $ch$$Register, 0 /* unused, ch is in register */,
9908                            $oddReg$$Register, $evenReg$$Register, false /*is_byte*/);
9909   %}
9910   ins_pipe(pipe_class_dummy);
9911 %}
9912 
9913 instruct indexOf_imm1_U(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
9914   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
9915   effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
9916   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none);
9917   ins_cost(200);
9918   format %{ "String IndexOf UL [0..$haycnt]($haystack), [0]($needle) -> $result" %}
9919   ins_encode %{


10572   ins_cost(DEFAULT_COST);
10573   // TODO: s390 port size(FIXED_SIZE);
10574   format %{ "LRVGR   $dst,$src\t# byte reverse long" %}
10575   opcode(LRVGR_ZOPC);
10576   ins_encode(z_rreform(dst, src));
10577   ins_pipe(pipe_class_dummy);
10578 %}
10579 
10580 // Leading zeroes
10581 
10582 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10583 // returns the bit position of the leftmost 1 in the 64bit source register.
10584 // As the bits are numbered from left to right (0..63), the returned
10585 // position index is equivalent to the number of leading zeroes.
10586 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10587 // returns position 64. That's exactly what we need.
10588 
10589 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10590   match(Set dst (CountLeadingZerosI src));
10591   effect(KILL tmp, KILL cr);

10592   ins_cost(3 * DEFAULT_COST);
10593   size(14);
10594   format %{ "SLLG    $dst,$src,32\t# no need to always count 32 zeroes first\n\t"
10595             "IILH    $dst,0x8000 \t# insert \"stop bit\" to force result 32 for zero src.\n\t"
10596             "FLOGR   $dst,$dst"
10597          %}
10598   ins_encode %{
10599     // Performance experiments indicate that "FLOGR" is using some kind of
10600     // iteration to find the leftmost "1" bit.
10601     //
10602     // The prior implementation zero-extended the 32-bit argument to 64 bit,
10603     // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10604     // We could gain measurable speedup in micro benchmark:
10605     //
10606     //               leading   trailing
10607     //   z10:   int     2.04       1.68
10608     //         long     1.00       1.02
10609     //   z196:  int     0.99       1.23
10610     //         long     1.00       1.11
10611     //
10612     // By shifting the argument into the high-word instead of zero-extending it.
10613     // The add'l branch on condition (taken for a zero argument, very infrequent,
10614     // good prediction) is well compensated for by the savings.
10615     //
10616     // We leave the previous implementation in for some time in the future when
10617     // the "FLOGR" instruction may become less iterative.
10618 
10619     // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10620     __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10621     __ z_iilh($dst$$Register, 0x8000);   // Insert "stop bit" to force result 32 for zero src.
10622     __ z_flogr($dst$$Register, $dst$$Register);
10623   %}
10624   ins_pipe(pipe_class_dummy);
10625 %}
10626 
10627 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10628   match(Set dst (CountLeadingZerosL src));
10629   effect(KILL tmp, KILL cr);

10630   ins_cost(DEFAULT_COST);
10631   size(4);
10632   format %{ "FLOGR   $dst,$src \t# count leading zeros (long)\n\t" %}
10633   ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10634   ins_pipe(pipe_class_dummy);
10635 %}
10636 
10637 // trailing zeroes
10638 
10639 // We transform the trailing zeroes problem to a leading zeroes problem
10640 // such that can use the FLOGR instruction to our advantage.
10641 
10642 // With
10643 //   tmp1 = src - 1
10644 // we flip all trailing zeroes to ones and the rightmost one to zero.
10645 // All other bits remain unchanged.
10646 // With the complement
10647 //   tmp2 = ~src
10648 // we get all ones in the trailing zeroes positions. Thus,
10649 //   tmp3 = tmp1 & tmp2
10650 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10651 // Now we can apply FLOGR and get 64-(trailing zeroes).
10652 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10653   match(Set dst (CountTrailingZerosI src));
10654   effect(TEMP_DEF dst, TEMP tmp, KILL cr);

10655   ins_cost(8 * DEFAULT_COST);
10656   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10657   format %{ "LLGFR   $dst,$src  \t# clear upper 32 bits (we are dealing with int)\n\t"
10658             "LCGFR   $tmp,$src  \t# load 2's complement (32->64 bit)\n\t"
10659             "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
10660             "AGHI    $tmp,-1    \t# tmp2 = -src-1 = ~src\n\t"
10661             "NGR     $dst,$tmp  \t# tmp3 = tmp1&tmp2\n\t"
10662             "FLOGR   $dst,$dst  \t# count trailing zeros (int)\n\t"
10663             "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10664             "LCR     $dst,$dst  \t# res = -tmp4"
10665          %}
10666   ins_encode %{
10667     Register Rdst = $dst$$Register;
10668     Register Rsrc = $src$$Register;
10669     // Rtmp only needed for for zero-argument shortcut. With kill effect in
10670     // match rule Rsrc = roddReg would be possible, saving one register.
10671     Register Rtmp = $tmp$$Register;
10672 
10673     assert_different_registers(Rdst, Rsrc, Rtmp);
10674 


10688     __ z_ahi(Rtmp,  -1);               // Subtract one to fill all trailing zero positions with ones.
10689                                        // Use 32bit op to prevent borrow propagation (case Rdst = 0x80000000)
10690                                        // into upper half of reg. Not relevant with sllg below.
10691     __ z_sllg(Rdst, Rtmp, 32);         // Shift interesting contents to upper half of register.
10692     __ z_bre(done);                    // Shortcut for argument = 1, result will be 0.
10693                                        // Depends on CC set by ahi above.
10694                                        // Taken very infrequently, good prediction, no BHT entry.
10695                                        // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10696                                        // after SLLG Rdst == 0(64bit)).
10697     __ z_flogr(Rdst, Rdst);            // Kills tmp which is the oddReg for dst.
10698     __ add2reg(Rdst,  -32);            // 32-pos(leftmost1) is #trailing zeros
10699     __ z_lcgfr(Rdst, Rdst);            // Provide 64bit result at no cost.
10700     __ bind(done);
10701   %}
10702   ins_pipe(pipe_class_dummy);
10703 %}
10704 
10705 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10706   match(Set dst (CountTrailingZerosL src));
10707   effect(TEMP_DEF dst, KILL tmp, KILL cr);

10708   ins_cost(8 * DEFAULT_COST);
10709   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10710   format %{ "LCGR    $dst,$src  \t# preserve src\n\t"
10711             "NGR     $dst,$src  \t#"
10712             "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
10713             "FLOGR   $dst,$dst  \t# count trailing zeros (long), kill $tmp\n\t"
10714             "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10715             "LCR     $dst,$dst  \t#"
10716          %}
10717   ins_encode %{
10718     Register Rdst = $dst$$Register;
10719     Register Rsrc = $src$$Register;
10720     assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10721 
10722     // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10723     __ z_lcgr(Rdst, Rsrc);
10724     __ z_ngr(Rdst, Rsrc);
10725     __ add2reg(Rdst,   -1);
10726     __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10727     __ add2reg(Rdst,  -64);


< prev index next >