< prev index next >

src/cpu/x86/vm/sharedRuntime_x86_64.cpp

Print this page




   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"



  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "oops/compiledICHolder.hpp"
  33 #include "prims/jvmtiRedefineClassesTrace.hpp"
  34 #include "runtime/sharedRuntime.hpp"
  35 #include "runtime/vframeArray.hpp"
  36 #include "vmreg_x86.inline.hpp"
  37 #ifdef COMPILER1
  38 #include "c1/c1_Runtime1.hpp"
  39 #endif
  40 #ifdef COMPILER2
  41 #include "opto/runtime.hpp"
  42 #endif
  43 
  44 #define __ masm->
  45 


3948   __ bind(pending);
3949 
3950   RegisterSaver::restore_live_registers(masm);
3951 
3952   // exception pending => remove activation and forward to exception handler
3953 
3954   __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
3955 
3956   __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
3957   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3958 
3959   // -------------
3960   // make sure all code is generated
3961   masm->flush();
3962 
3963   // return the  blob
3964   // frame_size_words or bytes??
3965   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3966 }
3967 




















































































































































































































































3968 
3969 #ifdef COMPILER2
3970 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3971 //
3972 //------------------------------generate_exception_blob---------------------------
3973 // creates exception blob at the end
3974 // Using exception blob, this code is jumped from a compiled method.
3975 // (see emit_exception_handler in x86_64.ad file)
3976 //
3977 // Given an exception pc at a call we call into the runtime for the
3978 // handler in this method. This handler might merely restore state
3979 // (i.e. callee save registers) unwind the frame and jump to the
3980 // exception handler for the nmethod if there is no Java level handler
3981 // for the nmethod.
3982 //
3983 // This code is entered with a jmp.
3984 //
3985 // Arguments:
3986 //   rax: exception oop
3987 //   rdx: exception pc




   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #ifndef _WINDOWS
  27 #include "alloca.h"
  28 #endif
  29 #include "asm/macroAssembler.hpp"
  30 #include "asm/macroAssembler.inline.hpp"
  31 #include "code/debugInfoRec.hpp"
  32 #include "code/icBuffer.hpp"
  33 #include "code/vtableStubs.hpp"
  34 #include "interpreter/interpreter.hpp"
  35 #include "oops/compiledICHolder.hpp"
  36 #include "prims/jvmtiRedefineClassesTrace.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/vframeArray.hpp"
  39 #include "vmreg_x86.inline.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_Runtime1.hpp"
  42 #endif
  43 #ifdef COMPILER2
  44 #include "opto/runtime.hpp"
  45 #endif
  46 
  47 #define __ masm->
  48 


3951   __ bind(pending);
3952 
3953   RegisterSaver::restore_live_registers(masm);
3954 
3955   // exception pending => remove activation and forward to exception handler
3956 
3957   __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
3958 
3959   __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
3960   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3961 
3962   // -------------
3963   // make sure all code is generated
3964   masm->flush();
3965 
3966   // return the  blob
3967   // frame_size_words or bytes??
3968   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3969 }
3970 
3971 
3972 //------------------------------Montgomery multiplication------------------------
3973 //
3974 
3975 #ifndef _WINDOWS
3976 
3977 #define ASM_SUBTRACT
3978 
3979 #ifdef ASM_SUBTRACT
3980 // Subtract 0:b from carry:a.  Return carry.
3981 static unsigned long
3982 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3983   long i = 0, cnt = len;
3984   unsigned long tmp;
3985   asm volatile("clc; "
3986                "0: ; "
3987                "mov (%[b], %[i], 8), %[tmp]; "
3988                "sbb %[tmp], (%[a], %[i], 8); "
3989                "inc %[i]; dec %[cnt]; "
3990                "jne 0b; "
3991                "mov %[carry], %[tmp]; sbb $0, %[tmp]; "
3992                : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
3993                : [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
3994                : "memory");
3995   return tmp;
3996 }
3997 #else // ASM_SUBTRACT
3998 typedef int __attribute__((mode(TI))) int128;
3999 
4000 // Subtract 0:b from carry:a.  Return carry.
4001 static unsigned long
4002 sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) {
4003   int128 tmp = 0;
4004   int i;
4005   for (i = 0; i < len; i++) {
4006     tmp += a[i];
4007     tmp -= b[i];
4008     a[i] = tmp;
4009     tmp >>= 64;
4010     assert(-1 <= tmp && tmp <= 0, "invariant");
4011   }
4012   return tmp + carry;
4013 }
4014 #endif // ! ASM_SUBTRACT
4015 
4016 // Multiply (unsigned) Long A by Long B, accumulating the double-
4017 // length result into the accumulator formed of T0, T1, and T2.
4018 #define MACC(A, B, T0, T1, T2)                                  \
4019 do {                                                            \
4020   unsigned long hi, lo;                                         \
4021   asm("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4"        \
4022            : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)  \
4023            : "r"(A), "a"(B) : "cc");                            \
4024  } while(0)
4025 
4026 // As above, but add twice the double-length result into the
4027 // accumulator.
4028 #define MACC2(A, B, T0, T1, T2)                                 \
4029 do {                                                            \
4030   unsigned long hi, lo;                                         \
4031   asm("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; "      \
4032            "add %%rax, %2; adc %%rdx, %3; adc $0, %4"           \
4033            : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)  \
4034            : "r"(A), "a"(B) : "cc");                            \
4035  } while(0)
4036 
4037 // Fast Montgomery multiplication.  The derivation of the algorithm is
4038 // in  A Cryptographic Library for the Motorola DSP56000,
4039 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
4040 
4041 static void __attribute__((noinline))
4042 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
4043                     unsigned long m[], unsigned long inv, int len) {
4044   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
4045   int i;
4046 
4047   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
4048 
4049   for (i = 0; i < len; i++) {
4050     int j;
4051     for (j = 0; j < i; j++) {
4052       MACC(a[j], b[i-j], t0, t1, t2);
4053       MACC(m[j], n[i-j], t0, t1, t2);
4054     }
4055     MACC(a[i], b[0], t0, t1, t2);
4056     m[i] = t0 * inv;
4057     MACC(m[i], n[0], t0, t1, t2);
4058 
4059     assert(t0 == 0, "broken Montgomery multiply");
4060 
4061     t0 = t1; t1 = t2; t2 = 0;
4062   }
4063 
4064   for (i = len; i < 2*len; i++) {
4065     int j;
4066     for (j = i-len+1; j < len; j++) {
4067       MACC(a[j], b[i-j], t0, t1, t2);
4068       MACC(m[j], n[i-j], t0, t1, t2);
4069     }
4070     m[i-len] = t0;
4071     t0 = t1; t1 = t2; t2 = 0;
4072   }
4073 
4074   while (t0)
4075     t0 = sub(m, n, t0, len);
4076 }
4077 
4078 // Fast Montgomery squaring.  This uses asymptotically 25% fewer
4079 // multiplies so it should be up to 25% faster than Montgomery
4080 // multiplication.  However, its loop control is more complex and it
4081 // may actually run slower on some machines.
4082 
4083 static void __attribute__((noinline))
4084 montgomery_square(unsigned long a[], unsigned long n[],
4085                   unsigned long m[], unsigned long inv, int len) {
4086   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
4087   int i;
4088 
4089   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
4090 
4091   for (i = 0; i < len; i++) {
4092     int j;
4093     int end = (i+1)/2;
4094     for (j = 0; j < end; j++) {
4095       MACC2(a[j], a[i-j], t0, t1, t2);
4096       MACC(m[j], n[i-j], t0, t1, t2);
4097     }
4098     if ((i & 1) == 0) {
4099       MACC(a[j], a[j], t0, t1, t2);
4100     }
4101     for (; j < i; j++) {
4102       MACC(m[j], n[i-j], t0, t1, t2);
4103     }
4104     m[i] = t0 * inv;
4105     MACC(m[i], n[0], t0, t1, t2);
4106 
4107     assert(t0 == 0, "broken Montgomery square");
4108 
4109     t0 = t1; t1 = t2; t2 = 0;
4110   }
4111 
4112   for (i = len; i < 2*len; i++) {
4113     int start = i-len+1;
4114     int end = start + (len - start)/2;
4115     int j;
4116     for (j = start; j < end; j++) {
4117       MACC2(a[j], a[i-j], t0, t1, t2);
4118       MACC(m[j], n[i-j], t0, t1, t2);
4119     }
4120     if ((i & 1) == 0) {
4121       MACC(a[j], a[j], t0, t1, t2);
4122     }
4123     for (; j < len; j++) {
4124       MACC(m[j], n[i-j], t0, t1, t2);
4125     }
4126     m[i-len] = t0;
4127     t0 = t1; t1 = t2; t2 = 0;
4128   }
4129 
4130   while (t0)
4131     t0 = sub(m, n, t0, len);
4132 }
4133 
4134 // Swap words in a longword.
4135 static unsigned long swap(unsigned long x) {
4136   return (x << 32) | (x >> 32);
4137 }
4138 
4139 // Copy len longwords from s to d, word-swapping as we go.  The
4140 // destination array is reversed.
4141 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
4142   d += len;
4143   while(len-- > 0) {
4144     d--;
4145     *d = swap(*s);
4146     s++;
4147   }
4148 }
4149 
4150 // The threshold at which squaring is advantageous was determined
4151 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
4152 #define MONTGOMERY_SQUARING_THRESHOLD 64
4153 
4154 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
4155                                         jint len, jlong inv,
4156                                         jint *m_ints) {
4157   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
4158   int longwords = len/2;
4159 
4160   // Make very sure we don't use so much space that the stack might
4161   // overflow.  512 jints corresponds to an 16384-bit integer and
4162   // will use here a total of 8k bytes of stack space.
4163   int total_allocation = longwords * sizeof (unsigned long) * 4;
4164   guarantee(total_allocation <= 8192, "must be");
4165   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
4166 
4167   // Local scratch arrays
4168   unsigned long
4169     *a = scratch + 0 * longwords,
4170     *b = scratch + 1 * longwords,
4171     *n = scratch + 2 * longwords,
4172     *m = scratch + 3 * longwords;
4173 
4174   reverse_words((unsigned long *)a_ints, a, longwords);
4175   reverse_words((unsigned long *)b_ints, b, longwords);
4176   reverse_words((unsigned long *)n_ints, n, longwords);
4177 
4178   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
4179 
4180   reverse_words(m, (unsigned long *)m_ints, longwords);
4181 }
4182 
4183 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
4184                                       jint len, jlong inv,
4185                                       jint *m_ints) {
4186   assert(len % 2 == 0, "array length in montgomery_square must be even");
4187   int longwords = len/2;
4188 
4189   // Make very sure we don't use so much space that the stack might
4190   // overflow.  512 jints corresponds to an 16384-bit integer and
4191   // will use here a total of 6k bytes of stack space.
4192   int total_allocation = longwords * sizeof (unsigned long) * 3;
4193   guarantee(total_allocation <= 8192, "must be");
4194   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
4195 
4196   // Local scratch arrays
4197   unsigned long
4198     *a = scratch + 0 * longwords,
4199     *n = scratch + 1 * longwords,
4200     *m = scratch + 2 * longwords;
4201 
4202   reverse_words((unsigned long *)a_ints, a, longwords);
4203   reverse_words((unsigned long *)n_ints, n, longwords);
4204 
4205   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
4206     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
4207   } else {
4208     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
4209   }
4210 
4211   reverse_words(m, (unsigned long *)m_ints, longwords);
4212 }
4213 
4214 #endif // WINDOWS
4215 
4216 #ifdef COMPILER2
4217 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
4218 //
4219 //------------------------------generate_exception_blob---------------------------
4220 // creates exception blob at the end
4221 // Using exception blob, this code is jumped from a compiled method.
4222 // (see emit_exception_handler in x86_64.ad file)
4223 //
4224 // Given an exception pc at a call we call into the runtime for the
4225 // handler in this method. This handler might merely restore state
4226 // (i.e. callee save registers) unwind the frame and jump to the
4227 // exception handler for the nmethod if there is no Java level handler
4228 // for the nmethod.
4229 //
4230 // This code is entered with a jmp.
4231 //
4232 // Arguments:
4233 //   rax: exception oop
4234 //   rdx: exception pc


< prev index next >