6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "code/debugInfoRec.hpp"
29 #include "code/icBuffer.hpp"
30 #include "code/vtableStubs.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "oops/compiledICHolder.hpp"
33 #include "prims/jvmtiRedefineClassesTrace.hpp"
34 #include "runtime/sharedRuntime.hpp"
35 #include "runtime/vframeArray.hpp"
36 #include "vmreg_x86.inline.hpp"
37 #ifdef COMPILER1
38 #include "c1/c1_Runtime1.hpp"
39 #endif
40 #ifdef COMPILER2
41 #include "opto/runtime.hpp"
42 #endif
43
44 #define __ masm->
45
3494
3495 RegisterSaver::restore_live_registers(masm);
3496
3497 // exception pending => remove activation and forward to exception handler
3498
3499 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
3500
3501 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
3502 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3503
3504 // -------------
3505 // make sure all code is generated
3506 masm->flush();
3507
3508 // return the blob
3509 // frame_size_words or bytes??
3510 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3511 }
3512
3513
3514 #ifdef COMPILER2
3515 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3516 //
3517 //------------------------------generate_exception_blob---------------------------
3518 // creates exception blob at the end
3519 // Using exception blob, this code is jumped from a compiled method.
3520 // (see emit_exception_handler in x86_64.ad file)
3521 //
3522 // Given an exception pc at a call we call into the runtime for the
3523 // handler in this method. This handler might merely restore state
3524 // (i.e. callee save registers) unwind the frame and jump to the
3525 // exception handler for the nmethod if there is no Java level handler
3526 // for the nmethod.
3527 //
3528 // This code is entered with a jmp.
3529 //
3530 // Arguments:
3531 // rax: exception oop
3532 // rdx: exception pc
3533 //
|
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "alloca.h"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "code/debugInfoRec.hpp"
30 #include "code/icBuffer.hpp"
31 #include "code/vtableStubs.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "oops/compiledICHolder.hpp"
34 #include "prims/jvmtiRedefineClassesTrace.hpp"
35 #include "runtime/sharedRuntime.hpp"
36 #include "runtime/vframeArray.hpp"
37 #include "vmreg_x86.inline.hpp"
38 #ifdef COMPILER1
39 #include "c1/c1_Runtime1.hpp"
40 #endif
41 #ifdef COMPILER2
42 #include "opto/runtime.hpp"
43 #endif
44
45 #define __ masm->
46
3495
3496 RegisterSaver::restore_live_registers(masm);
3497
3498 // exception pending => remove activation and forward to exception handler
3499
3500 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
3501
3502 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
3503 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3504
3505 // -------------
3506 // make sure all code is generated
3507 masm->flush();
3508
3509 // return the blob
3510 // frame_size_words or bytes??
3511 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3512 }
3513
3514
3515 //------------------------------Montgomery multiplication------------------------
3516 //
3517
3518 #define ASM_SUBTRACT
3519
3520 #ifdef ASM_SUBTRACT
3521 // Subtract 0:b from carry:a. Return carry.
3522 static unsigned long
3523 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3524 long i = 0, cnt = len;
3525 unsigned long tmp;
3526 asm volatile("clc; "
3527 "0: ; "
3528 "mov (%[b], %[i], 8), %[tmp]; "
3529 "sbb %[tmp], (%[a], %[i], 8); "
3530 "inc %[i]; dec %[cnt]; "
3531 "jne 0b; "
3532 "mov %[carry], %[tmp]; sbb $0, %[tmp]; "
3533 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
3534 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
3535 : "memory");
3536 return tmp;
3537 }
3538 #else // ASM_SUBTRACT
3539 typedef int __attribute__((mode(TI))) int128;
3540
3541 // Subtract 0:b from carry:a. Return carry.
3542 static unsigned long
3543 sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) {
3544 int128 tmp = 0;
3545 int i;
3546 for (i = 0; i < len; i++) {
3547 tmp += a[i];
3548 tmp -= b[i];
3549 a[i] = tmp;
3550 tmp >>= 64;
3551 assert(-1 <= tmp && tmp <= 0, "invariant");
3552 }
3553 return tmp + carry;
3554 }
3555 #endif // ! ASM_SUBTRACT
3556
3557 // Multiply (unsigned) Long A by Long B, accumulating the double-
3558 // length result into the accumulator formed of T0, T1, and T2.
3559 #define MACC(A, B, T0, T1, T2) \
3560 do { \
3561 unsigned long hi, lo; \
3562 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
3563 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
3564 : "r"(A), "a"(B) : "cc"); \
3565 } while(0)
3566
3567 // As above, but add twice the double-length result into the
3568 // accumulator.
3569 #define MACC2(A, B, T0, T1, T2) \
3570 do { \
3571 unsigned long hi, lo; \
3572 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \
3573 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
3574 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
3575 : "r"(A), "a"(B) : "cc"); \
3576 } while(0)
3577
3578 // Fast Montgomery multiplication. The derivation of the algorithm is
3579 // in A Cryptographic Library for the Motorola DSP56000,
3580 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
3581
3582 static void __attribute__((noinline))
3583 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3584 unsigned long m[], unsigned long inv, int len) {
3585 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3586 int i;
3587
3588 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3589
3590 for (i = 0; i < len; i++) {
3591 int j;
3592 for (j = 0; j < i; j++) {
3593 MACC(a[j], b[i-j], t0, t1, t2);
3594 MACC(m[j], n[i-j], t0, t1, t2);
3595 }
3596 MACC(a[i], b[0], t0, t1, t2);
3597 m[i] = t0 * inv;
3598 MACC(m[i], n[0], t0, t1, t2);
3599
3600 assert(t0 == 0, "broken Montgomery multiply");
3601
3602 t0 = t1; t1 = t2; t2 = 0;
3603 }
3604
3605 for (i = len; i < 2*len; i++) {
3606 int j;
3607 for (j = i-len+1; j < len; j++) {
3608 MACC(a[j], b[i-j], t0, t1, t2);
3609 MACC(m[j], n[i-j], t0, t1, t2);
3610 }
3611 m[i-len] = t0;
3612 t0 = t1; t1 = t2; t2 = 0;
3613 }
3614
3615 while (t0)
3616 t0 = sub(m, n, t0, len);
3617 }
3618
3619 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3620 // multiplies so it should be up to 25% faster than Montgomery
3621 // multiplication. However, its loop control is more complex and it
3622 // may actually run slower on some machines.
3623
3624 static void __attribute__((noinline))
3625 montgomery_square(unsigned long a[], unsigned long n[],
3626 unsigned long m[], unsigned long inv, int len) {
3627 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3628 int i;
3629
3630 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3631
3632 for (i = 0; i < len; i++) {
3633 int j;
3634 int end = (i+1)/2;
3635 for (j = 0; j < end; j++) {
3636 MACC2(a[j], a[i-j], t0, t1, t2);
3637 MACC(m[j], n[i-j], t0, t1, t2);
3638 }
3639 if ((i & 1) == 0) {
3640 MACC(a[j], a[j], t0, t1, t2);
3641 }
3642 for (; j < i; j++) {
3643 MACC(m[j], n[i-j], t0, t1, t2);
3644 }
3645 m[i] = t0 * inv;
3646 MACC(m[i], n[0], t0, t1, t2);
3647
3648 assert(t0 == 0, "broken Montgomery square");
3649
3650 t0 = t1; t1 = t2; t2 = 0;
3651 }
3652
3653 for (i = len; i < 2*len; i++) {
3654 int start = i-len+1;
3655 int end = start + (len - start)/2;
3656 int j;
3657 for (j = start; j < end; j++) {
3658 MACC2(a[j], a[i-j], t0, t1, t2);
3659 MACC(m[j], n[i-j], t0, t1, t2);
3660 }
3661 if ((i & 1) == 0) {
3662 MACC(a[j], a[j], t0, t1, t2);
3663 }
3664 for (; j < len; j++) {
3665 MACC(m[j], n[i-j], t0, t1, t2);
3666 }
3667 m[i-len] = t0;
3668 t0 = t1; t1 = t2; t2 = 0;
3669 }
3670
3671 while (t0)
3672 t0 = sub(m, n, t0, len);
3673 }
3674
3675 // Swap words in a longword.
3676 static unsigned long swap(unsigned long x) {
3677 return (x << 32) | (x >> 32);
3678 }
3679
3680 // Copy len longwords from s to d, word-swapping as we go. The
3681 // destination array is reversed.
3682 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3683 d += len;
3684 while(len-- > 0) {
3685 d--;
3686 *d = swap(*s);
3687 s++;
3688 }
3689 }
3690
3691 // The threshold at which squaring is advantageous was determined
3692 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3693 #define MONTGOMERY_SQUARING_THRESHOLD 64
3694
3695 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3696 jint len, jlong inv,
3697 jint *m_ints) {
3698 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3699 int longwords = len/2;
3700
3701 // Make very sure we don't use so much space that the stack might
3702 // overflow. 512 jints corresponds to an 16384-bit integer and
3703 // will use here a total of 8k bytes of stack space.
3704 int total_allocation = longwords * sizeof (unsigned long) * 4;
3705 guarantee(total_allocation <= 8192, "must be");
3706 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3707
3708 // Local scratch arrays
3709 unsigned long
3710 *a = scratch + 0 * longwords,
3711 *b = scratch + 1 * longwords,
3712 *n = scratch + 2 * longwords,
3713 *m = scratch + 3 * longwords;
3714
3715 reverse_words((unsigned long *)a_ints, a, longwords);
3716 reverse_words((unsigned long *)b_ints, b, longwords);
3717 reverse_words((unsigned long *)n_ints, n, longwords);
3718
3719 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3720
3721 reverse_words(m, (unsigned long *)m_ints, longwords);
3722 }
3723
3724 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3725 jint len, jlong inv,
3726 jint *m_ints) {
3727 assert(len % 2 == 0, "array length in montgomery_square must be even");
3728 int longwords = len/2;
3729
3730 // Make very sure we don't use so much space that the stack might
3731 // overflow. 512 jints corresponds to an 16384-bit integer and
3732 // will use here a total of 6k bytes of stack space.
3733 int total_allocation = longwords * sizeof (unsigned long) * 3;
3734 guarantee(total_allocation <= 8192, "must be");
3735 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3736
3737 // Local scratch arrays
3738 unsigned long
3739 *a = scratch + 0 * longwords,
3740 *n = scratch + 1 * longwords,
3741 *m = scratch + 2 * longwords;
3742
3743 reverse_words((unsigned long *)a_ints, a, longwords);
3744 reverse_words((unsigned long *)n_ints, n, longwords);
3745
3746 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3747 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3748 } else {
3749 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3750 }
3751
3752 reverse_words(m, (unsigned long *)m_ints, longwords);
3753 }
3754
3755 #ifdef COMPILER2
3756 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3757 //
3758 //------------------------------generate_exception_blob---------------------------
3759 // creates exception blob at the end
3760 // Using exception blob, this code is jumped from a compiled method.
3761 // (see emit_exception_handler in x86_64.ad file)
3762 //
3763 // Given an exception pc at a call we call into the runtime for the
3764 // handler in this method. This handler might merely restore state
3765 // (i.e. callee save registers) unwind the frame and jump to the
3766 // exception handler for the nmethod if there is no Java level handler
3767 // for the nmethod.
3768 //
3769 // This code is entered with a jmp.
3770 //
3771 // Arguments:
3772 // rax: exception oop
3773 // rdx: exception pc
3774 //
|