6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "code/debugInfoRec.hpp"
29 #include "code/icBuffer.hpp"
30 #include "code/vtableStubs.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "oops/compiledICHolder.hpp"
33 #include "prims/jvmtiRedefineClassesTrace.hpp"
34 #include "runtime/sharedRuntime.hpp"
35 #include "runtime/vframeArray.hpp"
36 #include "vmreg_x86.inline.hpp"
37 #ifdef COMPILER1
38 #include "c1/c1_Runtime1.hpp"
39 #endif
40 #ifdef COMPILER2
41 #include "opto/runtime.hpp"
42 #endif
43
44 #define __ masm->
45
3494
3495 RegisterSaver::restore_live_registers(masm);
3496
3497 // exception pending => remove activation and forward to exception handler
3498
3499 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
3500
3501 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
3502 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3503
3504 // -------------
3505 // make sure all code is generated
3506 masm->flush();
3507
3508 // return the blob
3509 // frame_size_words or bytes??
3510 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3511 }
3512
3513
3514 #ifdef COMPILER2
3515 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3516 //
3517 //------------------------------generate_exception_blob---------------------------
3518 // creates exception blob at the end
3519 // Using exception blob, this code is jumped from a compiled method.
3520 // (see emit_exception_handler in x86_64.ad file)
3521 //
3522 // Given an exception pc at a call we call into the runtime for the
3523 // handler in this method. This handler might merely restore state
3524 // (i.e. callee save registers) unwind the frame and jump to the
3525 // exception handler for the nmethod if there is no Java level handler
3526 // for the nmethod.
3527 //
3528 // This code is entered with a jmp.
3529 //
3530 // Arguments:
3531 // rax: exception oop
3532 // rdx: exception pc
3533 //
|
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "alloca.h"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "code/debugInfoRec.hpp"
30 #include "code/icBuffer.hpp"
31 #include "code/vtableStubs.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "oops/compiledICHolder.hpp"
34 #include "prims/jvmtiRedefineClassesTrace.hpp"
35 #include "runtime/sharedRuntime.hpp"
36 #include "runtime/vframeArray.hpp"
37 #include "vmreg_x86.inline.hpp"
38 #ifdef COMPILER1
39 #include "c1/c1_Runtime1.hpp"
40 #endif
41 #ifdef COMPILER2
42 #include "opto/runtime.hpp"
43 #endif
44
45 #define __ masm->
46
3495
3496 RegisterSaver::restore_live_registers(masm);
3497
3498 // exception pending => remove activation and forward to exception handler
3499
3500 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
3501
3502 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
3503 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3504
3505 // -------------
3506 // make sure all code is generated
3507 masm->flush();
3508
3509 // return the blob
3510 // frame_size_words or bytes??
3511 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3512 }
3513
3514
3515 //------------------------------Montgomery multiplication------------------------
3516 //
3517
3518 #ifndef _WINDOWS
3519
3520 #define ASM_SUBTRACT
3521
3522 #ifdef ASM_SUBTRACT
3523 // Subtract 0:b from carry:a. Return carry.
3524 static unsigned long
3525 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3526 long i = 0, cnt = len;
3527 unsigned long tmp;
3528 asm volatile("clc; "
3529 "0: ; "
3530 "mov (%[b], %[i], 8), %[tmp]; "
3531 "sbb %[tmp], (%[a], %[i], 8); "
3532 "inc %[i]; dec %[cnt]; "
3533 "jne 0b; "
3534 "mov %[carry], %[tmp]; sbb $0, %[tmp]; "
3535 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
3536 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
3537 : "memory");
3538 return tmp;
3539 }
3540 #else // ASM_SUBTRACT
3541 typedef int __attribute__((mode(TI))) int128;
3542
3543 // Subtract 0:b from carry:a. Return carry.
3544 static unsigned long
3545 sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) {
3546 int128 tmp = 0;
3547 int i;
3548 for (i = 0; i < len; i++) {
3549 tmp += a[i];
3550 tmp -= b[i];
3551 a[i] = tmp;
3552 tmp >>= 64;
3553 assert(-1 <= tmp && tmp <= 0, "invariant");
3554 }
3555 return tmp + carry;
3556 }
3557 #endif // ! ASM_SUBTRACT
3558
3559 // Multiply (unsigned) Long A by Long B, accumulating the double-
3560 // length result into the accumulator formed of T0, T1, and T2.
3561 #define MACC(A, B, T0, T1, T2) \
3562 do { \
3563 unsigned long hi, lo; \
3564 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
3565 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
3566 : "r"(A), "a"(B) : "cc"); \
3567 } while(0)
3568
3569 // As above, but add twice the double-length result into the
3570 // accumulator.
3571 #define MACC2(A, B, T0, T1, T2) \
3572 do { \
3573 unsigned long hi, lo; \
3574 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \
3575 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
3576 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
3577 : "r"(A), "a"(B) : "cc"); \
3578 } while(0)
3579
3580 // Fast Montgomery multiplication. The derivation of the algorithm is
3581 // in A Cryptographic Library for the Motorola DSP56000,
3582 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
3583
3584 static void __attribute__((noinline))
3585 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3586 unsigned long m[], unsigned long inv, int len) {
3587 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3588 int i;
3589
3590 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3591
3592 for (i = 0; i < len; i++) {
3593 int j;
3594 for (j = 0; j < i; j++) {
3595 MACC(a[j], b[i-j], t0, t1, t2);
3596 MACC(m[j], n[i-j], t0, t1, t2);
3597 }
3598 MACC(a[i], b[0], t0, t1, t2);
3599 m[i] = t0 * inv;
3600 MACC(m[i], n[0], t0, t1, t2);
3601
3602 assert(t0 == 0, "broken Montgomery multiply");
3603
3604 t0 = t1; t1 = t2; t2 = 0;
3605 }
3606
3607 for (i = len; i < 2*len; i++) {
3608 int j;
3609 for (j = i-len+1; j < len; j++) {
3610 MACC(a[j], b[i-j], t0, t1, t2);
3611 MACC(m[j], n[i-j], t0, t1, t2);
3612 }
3613 m[i-len] = t0;
3614 t0 = t1; t1 = t2; t2 = 0;
3615 }
3616
3617 while (t0)
3618 t0 = sub(m, n, t0, len);
3619 }
3620
3621 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3622 // multiplies so it should be up to 25% faster than Montgomery
3623 // multiplication. However, its loop control is more complex and it
3624 // may actually run slower on some machines.
3625
3626 static void __attribute__((noinline))
3627 montgomery_square(unsigned long a[], unsigned long n[],
3628 unsigned long m[], unsigned long inv, int len) {
3629 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3630 int i;
3631
3632 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3633
3634 for (i = 0; i < len; i++) {
3635 int j;
3636 int end = (i+1)/2;
3637 for (j = 0; j < end; j++) {
3638 MACC2(a[j], a[i-j], t0, t1, t2);
3639 MACC(m[j], n[i-j], t0, t1, t2);
3640 }
3641 if ((i & 1) == 0) {
3642 MACC(a[j], a[j], t0, t1, t2);
3643 }
3644 for (; j < i; j++) {
3645 MACC(m[j], n[i-j], t0, t1, t2);
3646 }
3647 m[i] = t0 * inv;
3648 MACC(m[i], n[0], t0, t1, t2);
3649
3650 assert(t0 == 0, "broken Montgomery square");
3651
3652 t0 = t1; t1 = t2; t2 = 0;
3653 }
3654
3655 for (i = len; i < 2*len; i++) {
3656 int start = i-len+1;
3657 int end = start + (len - start)/2;
3658 int j;
3659 for (j = start; j < end; j++) {
3660 MACC2(a[j], a[i-j], t0, t1, t2);
3661 MACC(m[j], n[i-j], t0, t1, t2);
3662 }
3663 if ((i & 1) == 0) {
3664 MACC(a[j], a[j], t0, t1, t2);
3665 }
3666 for (; j < len; j++) {
3667 MACC(m[j], n[i-j], t0, t1, t2);
3668 }
3669 m[i-len] = t0;
3670 t0 = t1; t1 = t2; t2 = 0;
3671 }
3672
3673 while (t0)
3674 t0 = sub(m, n, t0, len);
3675 }
3676
3677 // Swap words in a longword.
3678 static unsigned long swap(unsigned long x) {
3679 return (x << 32) | (x >> 32);
3680 }
3681
3682 // Copy len longwords from s to d, word-swapping as we go. The
3683 // destination array is reversed.
3684 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3685 d += len;
3686 while(len-- > 0) {
3687 d--;
3688 *d = swap(*s);
3689 s++;
3690 }
3691 }
3692
3693 // The threshold at which squaring is advantageous was determined
3694 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3695 #define MONTGOMERY_SQUARING_THRESHOLD 64
3696
3697 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3698 jint len, jlong inv,
3699 jint *m_ints) {
3700 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3701 int longwords = len/2;
3702
3703 // Make very sure we don't use so much space that the stack might
3704 // overflow. 512 jints corresponds to an 16384-bit integer and
3705 // will use here a total of 8k bytes of stack space.
3706 int total_allocation = longwords * sizeof (unsigned long) * 4;
3707 guarantee(total_allocation <= 8192, "must be");
3708 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3709
3710 // Local scratch arrays
3711 unsigned long
3712 *a = scratch + 0 * longwords,
3713 *b = scratch + 1 * longwords,
3714 *n = scratch + 2 * longwords,
3715 *m = scratch + 3 * longwords;
3716
3717 reverse_words((unsigned long *)a_ints, a, longwords);
3718 reverse_words((unsigned long *)b_ints, b, longwords);
3719 reverse_words((unsigned long *)n_ints, n, longwords);
3720
3721 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3722
3723 reverse_words(m, (unsigned long *)m_ints, longwords);
3724 }
3725
3726 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3727 jint len, jlong inv,
3728 jint *m_ints) {
3729 assert(len % 2 == 0, "array length in montgomery_square must be even");
3730 int longwords = len/2;
3731
3732 // Make very sure we don't use so much space that the stack might
3733 // overflow. 512 jints corresponds to an 16384-bit integer and
3734 // will use here a total of 6k bytes of stack space.
3735 int total_allocation = longwords * sizeof (unsigned long) * 3;
3736 guarantee(total_allocation <= 8192, "must be");
3737 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3738
3739 // Local scratch arrays
3740 unsigned long
3741 *a = scratch + 0 * longwords,
3742 *n = scratch + 1 * longwords,
3743 *m = scratch + 2 * longwords;
3744
3745 reverse_words((unsigned long *)a_ints, a, longwords);
3746 reverse_words((unsigned long *)n_ints, n, longwords);
3747
3748 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3749 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3750 } else {
3751 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3752 }
3753
3754 reverse_words(m, (unsigned long *)m_ints, longwords);
3755 }
3756
3757 #endif // WINDOWS
3758
3759 #ifdef COMPILER2
3760 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3761 //
3762 //------------------------------generate_exception_blob---------------------------
3763 // creates exception blob at the end
3764 // Using exception blob, this code is jumped from a compiled method.
3765 // (see emit_exception_handler in x86_64.ad file)
3766 //
3767 // Given an exception pc at a call we call into the runtime for the
3768 // handler in this method. This handler might merely restore state
3769 // (i.e. callee save registers) unwind the frame and jump to the
3770 // exception handler for the nmethod if there is no Java level handler
3771 // for the nmethod.
3772 //
3773 // This code is entered with a jmp.
3774 //
3775 // Arguments:
3776 // rax: exception oop
3777 // rdx: exception pc
3778 //
|