6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "code/debugInfoRec.hpp" 29 #include "code/icBuffer.hpp" 30 #include "code/vtableStubs.hpp" 31 #include "interpreter/interpreter.hpp" 32 #include "oops/compiledICHolder.hpp" 33 #include "prims/jvmtiRedefineClassesTrace.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/vframeArray.hpp" 36 #include "vmreg_x86.inline.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_Runtime1.hpp" 39 #endif 40 #ifdef COMPILER2 41 #include "opto/runtime.hpp" 42 #endif 43 44 #define __ masm-> 45 3948 __ bind(pending); 3949 3950 RegisterSaver::restore_live_registers(masm); 3951 3952 // exception pending => remove activation and forward to exception handler 3953 3954 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); 3955 3956 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 3957 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3958 3959 // ------------- 3960 // make sure all code is generated 3961 masm->flush(); 3962 3963 // return the blob 3964 // frame_size_words or bytes?? 3965 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 3966 } 3967 3968 3969 #ifdef COMPILER2 3970 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame 3971 // 3972 //------------------------------generate_exception_blob--------------------------- 3973 // creates exception blob at the end 3974 // Using exception blob, this code is jumped from a compiled method. 3975 // (see emit_exception_handler in x86_64.ad file) 3976 // 3977 // Given an exception pc at a call we call into the runtime for the 3978 // handler in this method. This handler might merely restore state 3979 // (i.e. callee save registers) unwind the frame and jump to the 3980 // exception handler for the nmethod if there is no Java level handler 3981 // for the nmethod. 3982 // 3983 // This code is entered with a jmp. 3984 // 3985 // Arguments: 3986 // rax: exception oop 3987 // rdx: exception pc | 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #ifndef _WINDOWS 27 #include "alloca.h" 28 #endif 29 #include "asm/macroAssembler.hpp" 30 #include "asm/macroAssembler.inline.hpp" 31 #include "code/debugInfoRec.hpp" 32 #include "code/icBuffer.hpp" 33 #include "code/vtableStubs.hpp" 34 #include "interpreter/interpreter.hpp" 35 #include "oops/compiledICHolder.hpp" 36 #include "prims/jvmtiRedefineClassesTrace.hpp" 37 #include "runtime/sharedRuntime.hpp" 38 #include "runtime/vframeArray.hpp" 39 #include "vmreg_x86.inline.hpp" 40 #ifdef COMPILER1 41 #include "c1/c1_Runtime1.hpp" 42 #endif 43 #ifdef COMPILER2 44 #include "opto/runtime.hpp" 45 #endif 46 47 #define __ masm-> 48 3951 __ bind(pending); 3952 3953 RegisterSaver::restore_live_registers(masm); 3954 3955 // exception pending => remove activation and forward to exception handler 3956 3957 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); 3958 3959 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 3960 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3961 3962 // ------------- 3963 // make sure all code is generated 3964 masm->flush(); 3965 3966 // return the blob 3967 // frame_size_words or bytes?? 3968 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 3969 } 3970 3971 3972 //------------------------------Montgomery multiplication------------------------ 3973 // 3974 3975 #ifndef _WINDOWS 3976 3977 #define ASM_SUBTRACT 3978 3979 #ifdef ASM_SUBTRACT 3980 // Subtract 0:b from carry:a. Return carry. 3981 static unsigned long 3982 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { 3983 long i = 0, cnt = len; 3984 unsigned long tmp; 3985 asm volatile("clc; " 3986 "0: ; " 3987 "mov (%[b], %[i], 8), %[tmp]; " 3988 "sbb %[tmp], (%[a], %[i], 8); " 3989 "inc %[i]; dec %[cnt]; " 3990 "jne 0b; " 3991 "mov %[carry], %[tmp]; sbb $0, %[tmp]; " 3992 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) 3993 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) 3994 : "memory"); 3995 return tmp; 3996 } 3997 #else // ASM_SUBTRACT 3998 typedef int __attribute__((mode(TI))) int128; 3999 4000 // Subtract 0:b from carry:a. Return carry. 4001 static unsigned long 4002 sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) { 4003 int128 tmp = 0; 4004 int i; 4005 for (i = 0; i < len; i++) { 4006 tmp += a[i]; 4007 tmp -= b[i]; 4008 a[i] = tmp; 4009 tmp >>= 64; 4010 assert(-1 <= tmp && tmp <= 0, "invariant"); 4011 } 4012 return tmp + carry; 4013 } 4014 #endif // ! ASM_SUBTRACT 4015 4016 // Multiply (unsigned) Long A by Long B, accumulating the double- 4017 // length result into the accumulator formed of T0, T1, and T2. 4018 #define MACC(A, B, T0, T1, T2) \ 4019 do { \ 4020 unsigned long hi, lo; \ 4021 asm("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 4022 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 4023 : "r"(A), "a"(B) : "cc"); \ 4024 } while(0) 4025 4026 // As above, but add twice the double-length result into the 4027 // accumulator. 4028 #define MACC2(A, B, T0, T1, T2) \ 4029 do { \ 4030 unsigned long hi, lo; \ 4031 asm("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \ 4032 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 4033 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 4034 : "r"(A), "a"(B) : "cc"); \ 4035 } while(0) 4036 4037 // Fast Montgomery multiplication. The derivation of the algorithm is 4038 // in A Cryptographic Library for the Motorola DSP56000, 4039 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. 4040 4041 static void __attribute__((noinline)) 4042 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], 4043 unsigned long m[], unsigned long inv, int len) { 4044 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 4045 int i; 4046 4047 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); 4048 4049 for (i = 0; i < len; i++) { 4050 int j; 4051 for (j = 0; j < i; j++) { 4052 MACC(a[j], b[i-j], t0, t1, t2); 4053 MACC(m[j], n[i-j], t0, t1, t2); 4054 } 4055 MACC(a[i], b[0], t0, t1, t2); 4056 m[i] = t0 * inv; 4057 MACC(m[i], n[0], t0, t1, t2); 4058 4059 assert(t0 == 0, "broken Montgomery multiply"); 4060 4061 t0 = t1; t1 = t2; t2 = 0; 4062 } 4063 4064 for (i = len; i < 2*len; i++) { 4065 int j; 4066 for (j = i-len+1; j < len; j++) { 4067 MACC(a[j], b[i-j], t0, t1, t2); 4068 MACC(m[j], n[i-j], t0, t1, t2); 4069 } 4070 m[i-len] = t0; 4071 t0 = t1; t1 = t2; t2 = 0; 4072 } 4073 4074 while (t0) 4075 t0 = sub(m, n, t0, len); 4076 } 4077 4078 // Fast Montgomery squaring. This uses asymptotically 25% fewer 4079 // multiplies so it should be up to 25% faster than Montgomery 4080 // multiplication. However, its loop control is more complex and it 4081 // may actually run slower on some machines. 4082 4083 static void __attribute__((noinline)) 4084 montgomery_square(unsigned long a[], unsigned long n[], 4085 unsigned long m[], unsigned long inv, int len) { 4086 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 4087 int i; 4088 4089 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); 4090 4091 for (i = 0; i < len; i++) { 4092 int j; 4093 int end = (i+1)/2; 4094 for (j = 0; j < end; j++) { 4095 MACC2(a[j], a[i-j], t0, t1, t2); 4096 MACC(m[j], n[i-j], t0, t1, t2); 4097 } 4098 if ((i & 1) == 0) { 4099 MACC(a[j], a[j], t0, t1, t2); 4100 } 4101 for (; j < i; j++) { 4102 MACC(m[j], n[i-j], t0, t1, t2); 4103 } 4104 m[i] = t0 * inv; 4105 MACC(m[i], n[0], t0, t1, t2); 4106 4107 assert(t0 == 0, "broken Montgomery square"); 4108 4109 t0 = t1; t1 = t2; t2 = 0; 4110 } 4111 4112 for (i = len; i < 2*len; i++) { 4113 int start = i-len+1; 4114 int end = start + (len - start)/2; 4115 int j; 4116 for (j = start; j < end; j++) { 4117 MACC2(a[j], a[i-j], t0, t1, t2); 4118 MACC(m[j], n[i-j], t0, t1, t2); 4119 } 4120 if ((i & 1) == 0) { 4121 MACC(a[j], a[j], t0, t1, t2); 4122 } 4123 for (; j < len; j++) { 4124 MACC(m[j], n[i-j], t0, t1, t2); 4125 } 4126 m[i-len] = t0; 4127 t0 = t1; t1 = t2; t2 = 0; 4128 } 4129 4130 while (t0) 4131 t0 = sub(m, n, t0, len); 4132 } 4133 4134 // Swap words in a longword. 4135 static unsigned long swap(unsigned long x) { 4136 return (x << 32) | (x >> 32); 4137 } 4138 4139 // Copy len longwords from s to d, word-swapping as we go. The 4140 // destination array is reversed. 4141 static void reverse_words(unsigned long *s, unsigned long *d, int len) { 4142 d += len; 4143 while(len-- > 0) { 4144 d--; 4145 *d = swap(*s); 4146 s++; 4147 } 4148 } 4149 4150 // The threshold at which squaring is advantageous was determined 4151 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 4152 #define MONTGOMERY_SQUARING_THRESHOLD 64 4153 4154 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 4155 jint len, jlong inv, 4156 jint *m_ints) { 4157 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 4158 int longwords = len/2; 4159 4160 // Make very sure we don't use so much space that the stack might 4161 // overflow. 512 jints corresponds to an 16384-bit integer and 4162 // will use here a total of 8k bytes of stack space. 4163 int total_allocation = longwords * sizeof (unsigned long) * 4; 4164 guarantee(total_allocation <= 8192, "must be"); 4165 unsigned long *scratch = (unsigned long *)alloca(total_allocation); 4166 4167 // Local scratch arrays 4168 unsigned long 4169 *a = scratch + 0 * longwords, 4170 *b = scratch + 1 * longwords, 4171 *n = scratch + 2 * longwords, 4172 *m = scratch + 3 * longwords; 4173 4174 reverse_words((unsigned long *)a_ints, a, longwords); 4175 reverse_words((unsigned long *)b_ints, b, longwords); 4176 reverse_words((unsigned long *)n_ints, n, longwords); 4177 4178 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); 4179 4180 reverse_words(m, (unsigned long *)m_ints, longwords); 4181 } 4182 4183 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 4184 jint len, jlong inv, 4185 jint *m_ints) { 4186 assert(len % 2 == 0, "array length in montgomery_square must be even"); 4187 int longwords = len/2; 4188 4189 // Make very sure we don't use so much space that the stack might 4190 // overflow. 512 jints corresponds to an 16384-bit integer and 4191 // will use here a total of 6k bytes of stack space. 4192 int total_allocation = longwords * sizeof (unsigned long) * 3; 4193 guarantee(total_allocation <= 8192, "must be"); 4194 unsigned long *scratch = (unsigned long *)alloca(total_allocation); 4195 4196 // Local scratch arrays 4197 unsigned long 4198 *a = scratch + 0 * longwords, 4199 *n = scratch + 1 * longwords, 4200 *m = scratch + 2 * longwords; 4201 4202 reverse_words((unsigned long *)a_ints, a, longwords); 4203 reverse_words((unsigned long *)n_ints, n, longwords); 4204 4205 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 4206 ::montgomery_square(a, n, m, (unsigned long)inv, longwords); 4207 } else { 4208 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); 4209 } 4210 4211 reverse_words(m, (unsigned long *)m_ints, longwords); 4212 } 4213 4214 #endif // WINDOWS 4215 4216 #ifdef COMPILER2 4217 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame 4218 // 4219 //------------------------------generate_exception_blob--------------------------- 4220 // creates exception blob at the end 4221 // Using exception blob, this code is jumped from a compiled method. 4222 // (see emit_exception_handler in x86_64.ad file) 4223 // 4224 // Given an exception pc at a call we call into the runtime for the 4225 // handler in this method. This handler might merely restore state 4226 // (i.e. callee save registers) unwind the frame and jump to the 4227 // exception handler for the nmethod if there is no Java level handler 4228 // for the nmethod. 4229 // 4230 // This code is entered with a jmp. 4231 // 4232 // Arguments: 4233 // rax: exception oop 4234 // rdx: exception pc |