3416 //14:
3417 if (cntval & 2) {
3418 lwzx(R0, str1_reg, index_reg);
3419 lwzx(tmp2_reg, str2_reg, index_reg);
3420 cmpw(CCR0, R0, tmp2_reg);
3421 bne(CCR0, Ldone_false);
3422 if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar));
3423 }
3424 if (cntval & 1) {
3425 lhzx(R0, str1_reg, index_reg);
3426 lhzx(tmp2_reg, str2_reg, index_reg);
3427 cmpw(CCR0, R0, tmp2_reg);
3428 bne(CCR0, Ldone_false);
3429 }
3430 // fallthru: true
3431 }
3432 li(result_reg, 1);
3433 bind(Ldone_false);
3434 }
3435
3436
3437 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) {
3438 #ifdef ASSERT
3439 Label ok;
3440 if (check_equal) {
3441 beq(CCR0, ok);
3442 } else {
3443 bne(CCR0, ok);
3444 }
3445 stop(msg, id);
3446 bind(ok);
3447 #endif
3448 }
3449
3450 void MacroAssembler::asm_assert_mems_zero(bool check_equal, int size, int mem_offset,
3451 Register mem_base, const char* msg, int id) {
3452 #ifdef ASSERT
3453 switch (size) {
3454 case 4:
3455 lwz(R0, mem_offset, mem_base);
|
3416 //14:
3417 if (cntval & 2) {
3418 lwzx(R0, str1_reg, index_reg);
3419 lwzx(tmp2_reg, str2_reg, index_reg);
3420 cmpw(CCR0, R0, tmp2_reg);
3421 bne(CCR0, Ldone_false);
3422 if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar));
3423 }
3424 if (cntval & 1) {
3425 lhzx(R0, str1_reg, index_reg);
3426 lhzx(tmp2_reg, str2_reg, index_reg);
3427 cmpw(CCR0, R0, tmp2_reg);
3428 bne(CCR0, Ldone_false);
3429 }
3430 // fallthru: true
3431 }
3432 li(result_reg, 1);
3433 bind(Ldone_false);
3434 }
3435
3436 // dest_lo += src1 + src2
3437 // dest_hi += carry1 + carry2
3438 void MacroAssembler::add2_with_carry(Register dest_hi,
3439 Register dest_lo,
3440 Register src1, Register src2) {
3441 li(R0, 0);
3442 addc(dest_lo, dest_lo, src1);
3443 adde(dest_hi, dest_hi, R0);
3444 addc(dest_lo, dest_lo, src2);
3445 adde(dest_hi, dest_hi, R0);
3446 }
3447
3448 // Multiply 64 bit by 64 bit first loop.
3449 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart,
3450 Register x_xstart,
3451 Register y, Register y_idx,
3452 Register z,
3453 Register carry,
3454 Register product_high, Register product,
3455 Register idx, Register kdx,
3456 Register tmp) {
3457 // jlong carry, x[], y[], z[];
3458 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3459 // huge_128 product = y[idx] * x[xstart] + carry;
3460 // z[kdx] = (jlong)product;
3461 // carry = (jlong)(product >>> 64);
3462 // }
3463 // z[xstart] = carry;
3464
3465 Label L_first_loop, L_first_loop_exit;
3466 Label L_one_x, L_one_y, L_multiply;
3467
3468 addic_(xstart, xstart, -1);
3469 blt(CCR0, L_one_x); // Special case: length of x is 1.
3470
3471 // Load next two integers of x.
3472 sldi(tmp, xstart, LogBytesPerInt);
3473 ldx(x_xstart, x, tmp);
3474 #ifdef VM_LITTLE_ENDIAN
3475 rldicl(x_xstart, x_xstart, 32, 0);
3476 #endif
3477
3478 align(32, 16);
3479 bind(L_first_loop);
3480
3481 cmpdi(CCR0, idx, 1);
3482 blt(CCR0, L_first_loop_exit);
3483 addi(idx, idx, -2);
3484 beq(CCR0, L_one_y);
3485
3486 // Load next two integers of y.
3487 sldi(tmp, idx, LogBytesPerInt);
3488 ldx(y_idx, y, tmp);
3489 #ifdef VM_LITTLE_ENDIAN
3490 rldicl(y_idx, y_idx, 32, 0);
3491 #endif
3492
3493
3494 bind(L_multiply);
3495 multiply64(product_high, product, x_xstart, y_idx);
3496
3497 li(tmp, 0);
3498 addc(product, product, carry); // Add carry to result.
3499 adde(product_high, product_high, tmp); // Add carry of the last addition.
3500 addi(kdx, kdx, -2);
3501
3502 // Store result.
3503 #ifdef VM_LITTLE_ENDIAN
3504 rldicl(product, product, 32, 0);
3505 #endif
3506 sldi(tmp, kdx, LogBytesPerInt);
3507 stdx(product, z, tmp);
3508 mr_if_needed(carry, product_high);
3509 b(L_first_loop);
3510
3511
3512 bind(L_one_y); // Load one 32 bit portion of y as (0,value).
3513
3514 lwz(y_idx, 0, y);
3515 b(L_multiply);
3516
3517
3518 bind( L_one_x ); // Load one 32 bit portion of x as (0,value).
3519
3520 lwz(x_xstart, 0, x);
3521 b(L_first_loop);
3522
3523 bind(L_first_loop_exit);
3524 }
3525
3526 // Multiply 64 bit by 64 bit and add 128 bit.
3527 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y,
3528 Register z, Register yz_idx,
3529 Register idx, Register carry,
3530 Register product_high, Register product,
3531 Register tmp, int offset) {
3532
3533 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry;
3534 // z[kdx] = (jlong)product;
3535
3536 sldi(tmp, idx, LogBytesPerInt);
3537 if ( offset ) {
3538 addi(tmp, tmp, offset);
3539 }
3540 ldx(yz_idx, y, tmp);
3541 #ifdef VM_LITTLE_ENDIAN
3542 rldicl(yz_idx, yz_idx, 32, 0);
3543 #endif
3544
3545 multiply64(product_high, product, x_xstart, yz_idx);
3546 ldx(yz_idx, z, tmp);
3547 #ifdef VM_LITTLE_ENDIAN
3548 rldicl(yz_idx, yz_idx, 32, 0);
3549 #endif
3550
3551 add2_with_carry(product_high, product, carry, yz_idx);
3552
3553 sldi(tmp, idx, LogBytesPerInt);
3554 if ( offset ) {
3555 addi(tmp, tmp, offset);
3556 }
3557 #ifdef VM_LITTLE_ENDIAN
3558 rldicl(product, product, 32, 0);
3559 #endif
3560 stdx(product, z, tmp);
3561 }
3562
3563 // Multiply 128 bit by 128 bit. Unrolled inner loop.
3564 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
3565 Register y, Register z,
3566 Register yz_idx, Register idx, Register carry,
3567 Register product_high, Register product,
3568 Register carry2, Register tmp) {
3569
3570 // jlong carry, x[], y[], z[];
3571 // int kdx = ystart+1;
3572 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
3573 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry;
3574 // z[kdx+idx+1] = (jlong)product;
3575 // jlong carry2 = (jlong)(product >>> 64);
3576 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2;
3577 // z[kdx+idx] = (jlong)product;
3578 // carry = (jlong)(product >>> 64);
3579 // }
3580 // idx += 2;
3581 // if (idx > 0) {
3582 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry;
3583 // z[kdx+idx] = (jlong)product;
3584 // carry = (jlong)(product >>> 64);
3585 // }
3586
3587 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
3588 const Register jdx = R0;
3589
3590 // Scale the index.
3591 srdi_(jdx, idx, 2);
3592 beq(CCR0, L_third_loop_exit);
3593 mtctr(jdx);
3594
3595 align(32, 16);
3596 bind(L_third_loop);
3597
3598 addi(idx, idx, -4);
3599
3600 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product_high, product, tmp, 8);
3601 mr_if_needed(carry2, product_high);
3602
3603 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product_high, product, tmp, 0);
3604 mr_if_needed(carry, product_high);
3605 bdnz(L_third_loop);
3606
3607 bind(L_third_loop_exit); // Handle any left-over operand parts.
3608
3609 andi_(idx, idx, 0x3);
3610 beq(CCR0, L_post_third_loop_done);
3611
3612 Label L_check_1;
3613
3614 addic_(idx, idx, -2);
3615 blt(CCR0, L_check_1);
3616
3617 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product_high, product, tmp, 0);
3618 mr_if_needed(carry, product_high);
3619
3620 bind(L_check_1);
3621
3622 addi(idx, idx, 0x2);
3623 andi_(idx, idx, 0x1) ;
3624 addic_(idx, idx, -1);
3625 blt(CCR0, L_post_third_loop_done);
3626
3627 sldi(tmp, idx, LogBytesPerInt);
3628 lwzx(yz_idx, y, tmp);
3629 multiply64(product_high, product, x_xstart, yz_idx);
3630 lwzx(yz_idx, z, tmp);
3631
3632 add2_with_carry(product_high, product, yz_idx, carry);
3633
3634 sldi(tmp, idx, LogBytesPerInt);
3635 stwx(product, z, tmp);
3636 srdi(product, product, 32);
3637
3638 sldi(product_high, product_high, 32);
3639 orr(product, product, product_high);
3640 mr_if_needed(carry, product);
3641
3642 bind(L_post_third_loop_done);
3643 } // multiply_128_x_128_loop
3644
3645 void MacroAssembler::multiply_to_len(Register x, Register xlen,
3646 Register y, Register ylen,
3647 Register z, Register zlen,
3648 Register tmp1, Register tmp2,
3649 Register tmp3, Register tmp4,
3650 Register tmp5, Register tmp6,
3651 Register tmp7, Register tmp8,
3652 Register tmp9, Register tmp10,
3653 Register tmp11, Register tmp12,
3654 Register tmp13) {
3655
3656 ShortBranchVerifier sbv(this);
3657
3658 assert_different_registers(x, xlen, y, ylen, z, zlen,
3659 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
3660 assert_different_registers(x, xlen, y, ylen, z, zlen,
3661 tmp1, tmp2, tmp3, tmp4, tmp5, tmp7);
3662 assert_different_registers(x, xlen, y, ylen, z, zlen,
3663 tmp1, tmp2, tmp3, tmp4, tmp5, tmp8);
3664
3665 const Register idx = tmp1;
3666 const Register kdx = tmp2;
3667 const Register xstart = tmp3;
3668
3669 const Register y_idx = tmp4;
3670 const Register carry = tmp5;
3671 const Register product = tmp6;
3672 const Register product_high = tmp7;
3673 const Register x_xstart = tmp8;
3674 const Register tmp = tmp9;
3675
3676 // First Loop.
3677 //
3678 // final static long LONG_MASK = 0xffffffffL;
3679 // int xstart = xlen - 1;
3680 // int ystart = ylen - 1;
3681 // long carry = 0;
3682 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
3683 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
3684 // z[kdx] = (int)product;
3685 // carry = product >>> 32;
3686 // }
3687 // z[xstart] = (int)carry;
3688
3689 mr_if_needed(idx, ylen); // idx = ylen
3690 mr_if_needed(kdx, zlen); // kdx = xlen + ylen
3691 li(carry, 0); // carry = 0
3692
3693 Label L_done;
3694
3695 addic_(xstart, xlen, -1);
3696 blt(CCR0, L_done);
3697
3698 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z,
3699 carry, product_high, product, idx, kdx, tmp);
3700
3701 Label L_second_loop;
3702
3703 cmpdi(CCR0, kdx, 0);
3704 beq(CCR0, L_second_loop);
3705
3706 Label L_carry;
3707
3708 addic_(kdx, kdx, -1);
3709 beq(CCR0, L_carry);
3710
3711 // Store lower 32 bits of carry.
3712 sldi(tmp, kdx, LogBytesPerInt);
3713 stwx(carry, z, tmp);
3714 srdi(carry, carry, 32);
3715 addi(kdx, kdx, -1);
3716
3717
3718 bind(L_carry);
3719
3720 // Store upper 32 bits of carry.
3721 sldi(tmp, kdx, LogBytesPerInt);
3722 stwx(carry, z, tmp);
3723
3724 // Second and third (nested) loops.
3725 //
3726 // for (int i = xstart-1; i >= 0; i--) { // Second loop
3727 // carry = 0;
3728 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
3729 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
3730 // (z[k] & LONG_MASK) + carry;
3731 // z[k] = (int)product;
3732 // carry = product >>> 32;
3733 // }
3734 // z[i] = (int)carry;
3735 // }
3736 //
3737 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx
3738
3739 bind(L_second_loop);
3740
3741 li(carry, 0); // carry = 0;
3742
3743 addic_(xstart, xstart, -1); // i = xstart-1;
3744 blt(CCR0, L_done);
3745
3746 Register zsave = tmp10;
3747
3748 mr(zsave, z);
3749
3750
3751 Label L_last_x;
3752
3753 sldi(tmp, xstart, LogBytesPerInt);
3754 add(z, z, tmp); // z = z + k - j
3755 addi(z, z, 4);
3756 addic_(xstart, xstart, -1); // i = xstart-1;
3757 blt(CCR0, L_last_x);
3758
3759 sldi(tmp, xstart, LogBytesPerInt);
3760 ldx(x_xstart, x, tmp);
3761 #ifdef VM_LITTLE_ENDIAN
3762 rldicl(x_xstart, x_xstart, 32, 0);
3763 #endif
3764
3765
3766 Label L_third_loop_prologue;
3767
3768 bind(L_third_loop_prologue);
3769
3770 Register xsave = tmp11;
3771 Register xlensave = tmp12;
3772 Register ylensave = tmp13;
3773
3774 mr(xsave, x);
3775 mr(xlensave, xstart);
3776 mr(ylensave, ylen);
3777
3778
3779 multiply_128_x_128_loop(x_xstart, y, z, y_idx, ylen,
3780 carry, product_high, product, x, tmp);
3781
3782 mr(z, zsave);
3783 mr(x, xsave);
3784 mr(xlen, xlensave); // This is the decrement of the loop counter!
3785 mr(ylen, ylensave);
3786
3787 addi(tmp3, xlen, 1);
3788 sldi(tmp, tmp3, LogBytesPerInt);
3789 stwx(carry, z, tmp);
3790 addic_(tmp3, tmp3, -1);
3791 blt(CCR0, L_done);
3792
3793 srdi(carry, carry, 32);
3794 sldi(tmp, tmp3, LogBytesPerInt);
3795 stwx(carry, z, tmp);
3796 b(L_second_loop);
3797
3798 // Next infrequent code is moved outside loops.
3799 bind(L_last_x);
3800
3801 lwz(x_xstart, 0, x);
3802 b(L_third_loop_prologue);
3803
3804 bind(L_done);
3805 } // multiply_to_len
3806
3807 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) {
3808 #ifdef ASSERT
3809 Label ok;
3810 if (check_equal) {
3811 beq(CCR0, ok);
3812 } else {
3813 bne(CCR0, ok);
3814 }
3815 stop(msg, id);
3816 bind(ok);
3817 #endif
3818 }
3819
3820 void MacroAssembler::asm_assert_mems_zero(bool check_equal, int size, int mem_offset,
3821 Register mem_base, const char* msg, int id) {
3822 #ifdef ASSERT
3823 switch (size) {
3824 case 4:
3825 lwz(R0, mem_offset, mem_base);
|