3971 __ eor(tmp1, tmp1, tmp2); 3972 __ cbnz(tmp1, NOT_EQUAL); 3973 __ br(__ GT, SMALL_LOOP); 3974 __ bind(POST_LOOP); 3975 __ ldr(tmp1, Address(a1, cnt1)); 3976 __ ldr(tmp2, Address(a2, cnt1)); 3977 __ eor(tmp1, tmp1, tmp2); 3978 __ cbnz(tmp1, NOT_EQUAL); 3979 __ bind(EQUAL); 3980 __ mov(result, true); 3981 __ bind(NOT_EQUAL); 3982 if (!UseSIMDForArrayEquals) { 3983 __ pop(spilled_regs, sp); 3984 } 3985 __ bind(NOT_EQUAL_NO_POP); 3986 __ leave(); 3987 __ ret(lr); 3988 return entry; 3989 } 3990 3991 3992 /** 3993 * Arguments: 3994 * 3995 * Input: 3996 * c_rarg0 - current state address 3997 * c_rarg1 - H key address 3998 * c_rarg2 - data address 3999 * c_rarg3 - number of blocks 4000 * 4001 * Output: 4002 * Updated state at c_rarg0 4003 */ 4004 address generate_ghash_processBlocks() { 4005 // Bafflingly, GCM uses little-endian for the byte order, but 4006 // big-endian for the bit order. For example, the polynomial 1 is 4007 // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00. 4008 // 4009 // So, we must either reverse the bytes in each word and do 4010 // everything big-endian or reverse the bits in each byte and do 5058 CAST_FROM_FN_PTR(address, 5059 SharedRuntime:: 5060 throw_IncompatibleClassChangeError)); 5061 5062 StubRoutines::_throw_NullPointerException_at_call_entry = 5063 generate_throw_exception("NullPointerException at call throw_exception", 5064 CAST_FROM_FN_PTR(address, 5065 SharedRuntime:: 5066 throw_NullPointerException_at_call)); 5067 5068 // arraycopy stubs used by compilers 5069 generate_arraycopy_stubs(); 5070 5071 // has negatives stub for large arrays. 5072 StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long); 5073 5074 // array equals stub for large arrays. 5075 if (!UseSimpleArrayEquals) { 5076 StubRoutines::aarch64::_large_array_equals = generate_large_array_equals(); 5077 } 5078 5079 if (UseMultiplyToLenIntrinsic) { 5080 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 5081 } 5082 5083 if (UseSquareToLenIntrinsic) { 5084 StubRoutines::_squareToLen = generate_squareToLen(); 5085 } 5086 5087 if (UseMulAddIntrinsic) { 5088 StubRoutines::_mulAdd = generate_mulAdd(); 5089 } 5090 5091 if (UseMontgomeryMultiplyIntrinsic) { 5092 StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); 5093 MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); 5094 StubRoutines::_montgomeryMultiply = g.generate_multiply(); 5095 } 5096 5097 if (UseMontgomerySquareIntrinsic) { | 3971 __ eor(tmp1, tmp1, tmp2); 3972 __ cbnz(tmp1, NOT_EQUAL); 3973 __ br(__ GT, SMALL_LOOP); 3974 __ bind(POST_LOOP); 3975 __ ldr(tmp1, Address(a1, cnt1)); 3976 __ ldr(tmp2, Address(a2, cnt1)); 3977 __ eor(tmp1, tmp1, tmp2); 3978 __ cbnz(tmp1, NOT_EQUAL); 3979 __ bind(EQUAL); 3980 __ mov(result, true); 3981 __ bind(NOT_EQUAL); 3982 if (!UseSIMDForArrayEquals) { 3983 __ pop(spilled_regs, sp); 3984 } 3985 __ bind(NOT_EQUAL_NO_POP); 3986 __ leave(); 3987 __ ret(lr); 3988 return entry; 3989 } 3990 3991 // R0 = result 3992 // R1 = str2 3993 // R2 = cnt1 3994 // R3 = str1 3995 // R4 = cnt2 3996 // This generic linear code use few additional ideas, which makes it faster: 3997 // 1) we can safely keep at least 1st register of pattern(since length >= 8) 3998 // in order to skip initial loading(help in systems with 1 ld pipeline) 3999 // 2) we can use "fast" algorithm of finding single character to search for 4000 // first symbol with less branches(1 branch per each loaded register instead 4001 // of branch for each symbol), so, this is where constants like 4002 // 0x0101...01, 0x00010001...0001, 0x7f7f...7f, 0x7fff7fff...7fff comes from 4003 // 3) after loading and analyzing 1st register of source string, it can be 4004 // used to search for every 1st character entry, saving few loads in 4005 // comparison with "simplier-but-slower" implementation 4006 // 4) in order to avoid lots of push/pop operations, code below is heavily 4007 // re-using/re-initializing/compressing register values, which makes code 4008 // larger and a bit less readable, however, most of extra operations are 4009 // issued during loads or branches, so, penalty is minimal 4010 address generate_string_indexof_linear(bool str1_isL, bool str2_isL) { 4011 const char* stubName = str1_isL 4012 ? (str2_isL ? "indexof_linear_ll" : "indexof_linear_ul") 4013 : "indexof_linear_uu"; 4014 StubCodeMark mark(this, "StubRoutines", stubName); 4015 __ align(CodeEntryAlignment); 4016 address entry = __ pc(); 4017 4018 int str1_chr_size = str1_isL ? 1 : 2; 4019 int str2_chr_size = str2_isL ? 1 : 2; 4020 int str1_chr_shift = str1_isL ? 0 : 1; 4021 int str2_chr_shift = str2_isL ? 0 : 1; 4022 bool isL = str1_isL && str2_isL; 4023 // parameters 4024 Register result = r0, str2 = r1, cnt1 = r2, str1 = r3, cnt2 = r4; 4025 // temporary registers 4026 Register tmp1 = r20, tmp2 = r21, tmp3 = r22, tmp4 = r23; 4027 RegSet spilled_regs = RegSet::range(tmp1, tmp4); 4028 // redefinitions 4029 Register ch1 = rscratch1, ch2 = rscratch2, first = tmp3; 4030 4031 __ push(spilled_regs, sp); 4032 Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO, L_SMALL_MATCH_LOOP, 4033 L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED, 4034 L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP, 4035 L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH, 4036 L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2, 4037 L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH; 4038 // Read whole register from str1. It is safe, because length >=8 here 4039 __ ldr(ch1, Address(str1)); 4040 // Read whole register from str2. It is safe, because length >=8 here 4041 __ ldr(ch2, Address(str2)); 4042 __ andr(first, ch1, str1_isL ? 0xFF : 0xFFFF); 4043 if (str1_isL != str2_isL) { 4044 __ eor(v0, __ T16B, v0, v0); 4045 } 4046 __ mov(tmp1, str2_isL ? 0x0101010101010101 : 0x0001000100010001); 4047 __ mul(first, first, tmp1); 4048 // check if we have less than 1 register to check 4049 __ subs(cnt2, cnt2, wordSize/str2_chr_size - 1); 4050 if (str1_isL != str2_isL) { 4051 __ fmovd(v1, ch1); 4052 } 4053 __ br(__ LE, L_SMALL); 4054 __ eor(ch2, first, ch2); 4055 if (str1_isL != str2_isL) { 4056 __ zip1(v1, __ T16B, v1, v0); 4057 } 4058 __ sub(tmp2, ch2, tmp1); 4059 __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 4060 __ bics(tmp2, tmp2, ch2); 4061 if (str1_isL != str2_isL) { 4062 __ fmovd(ch1, v1); 4063 } 4064 __ br(__ NE, L_HAS_ZERO); 4065 __ subs(cnt2, cnt2, wordSize/str2_chr_size); 4066 __ add(result, result, wordSize/str2_chr_size); 4067 __ add(str2, str2, wordSize); 4068 __ br(__ LT, L_POST_LOOP); 4069 __ BIND(L_LOOP); 4070 __ ldr(ch2, Address(str2)); 4071 __ eor(ch2, first, ch2); 4072 __ sub(tmp2, ch2, tmp1); 4073 __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 4074 __ bics(tmp2, tmp2, ch2); 4075 __ br(__ NE, L_HAS_ZERO); 4076 __ BIND(L_LOOP_PROCEED); 4077 __ subs(cnt2, cnt2, wordSize/str2_chr_size); 4078 __ add(str2, str2, wordSize); 4079 __ add(result, result, wordSize/str2_chr_size); 4080 __ br(__ GE, L_LOOP); 4081 __ BIND(L_POST_LOOP); 4082 __ cmp(cnt2, -wordSize/str2_chr_size); // no extra characters to check 4083 __ br(__ LE, NOMATCH); 4084 __ ldr(ch2, Address(str2)); 4085 __ sub(cnt2, zr, cnt2, __ LSL, LogBitsPerByte + str2_chr_shift); 4086 __ eor(ch2, first, ch2); 4087 __ sub(tmp2, ch2, tmp1); 4088 __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 4089 __ mov(tmp4, -1); // all bits set 4090 __ b(L_SMALL_PROCEED); 4091 __ align(OptoLoopAlignment); 4092 __ BIND(L_SMALL); 4093 __ sub(cnt2, zr, cnt2, __ LSL, LogBitsPerByte + str2_chr_shift); 4094 __ eor(ch2, first, ch2); 4095 if (str1_isL != str2_isL) { 4096 __ zip1(v1, __ T16B, v1, v0); 4097 } 4098 __ sub(tmp2, ch2, tmp1); 4099 __ mov(tmp4, -1); // all bits set 4100 __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 4101 if (str1_isL != str2_isL) { 4102 __ fmovd(ch1, v1); // move converted 4 symbols 4103 } 4104 __ BIND(L_SMALL_PROCEED); 4105 __ lsrv(tmp4, tmp4, cnt2); // mask. zeroes on useless bits. 4106 __ bic(tmp2, tmp2, ch2); 4107 __ ands(tmp2, tmp2, tmp4); // clear useless bits and check 4108 __ rbit(tmp2, tmp2); 4109 __ br(__ EQ, NOMATCH); 4110 __ BIND(L_SMALL_HAS_ZERO_LOOP); 4111 __ clz(tmp4, tmp2); // potentially long. Up to 4 cycles on some cpu's 4112 __ cmp(cnt1, wordSize/str2_chr_size); 4113 __ br(__ LE, L_SMALL_CMP_LOOP_LAST_CMP2); 4114 if (str2_isL) { // LL 4115 __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); // address of "index" 4116 __ ldr(ch2, Address(str2)); // read whole register of str2. Safe. 4117 __ lslv(tmp2, tmp2, tmp4); // shift off leading zeroes from match info 4118 __ add(result, result, tmp4, __ LSR, LogBitsPerByte); 4119 __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info 4120 } else { 4121 __ mov(ch2, 0xE); // all bits in byte set except last one 4122 __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount 4123 __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe. 4124 __ lslv(tmp2, tmp2, tmp4); 4125 __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4126 __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4127 __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info 4128 __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4129 } 4130 __ cmp(ch1, ch2); 4131 __ mov(tmp4, wordSize/str2_chr_size); 4132 __ br(__ NE, L_SMALL_CMP_LOOP_NOMATCH); 4133 __ BIND(L_SMALL_CMP_LOOP); 4134 str1_isL ? __ ldrb(first, Address(str1, tmp4, Address::lsl(str1_chr_shift))) 4135 : __ ldrh(first, Address(str1, tmp4, Address::lsl(str1_chr_shift))); 4136 str2_isL ? __ ldrb(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift))) 4137 : __ ldrh(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift))); 4138 __ add(tmp4, tmp4, 1); 4139 __ cmp(tmp4, cnt1); 4140 __ br(__ GE, L_SMALL_CMP_LOOP_LAST_CMP); 4141 __ cmp(first, ch2); 4142 __ br(__ EQ, L_SMALL_CMP_LOOP); 4143 __ BIND(L_SMALL_CMP_LOOP_NOMATCH); 4144 __ cbz(tmp2, NOMATCH); // no more matches. exit 4145 __ clz(tmp4, tmp2); 4146 __ add(result, result, 1); // advance index 4147 __ add(str2, str2, str2_chr_size); // advance pointer 4148 __ b(L_SMALL_HAS_ZERO_LOOP); 4149 __ align(OptoLoopAlignment); 4150 __ BIND(L_SMALL_CMP_LOOP_LAST_CMP); 4151 __ cmp(first, ch2); 4152 __ br(__ NE, L_SMALL_CMP_LOOP_NOMATCH); 4153 __ b(DONE); 4154 __ align(OptoLoopAlignment); 4155 __ BIND(L_SMALL_CMP_LOOP_LAST_CMP2); 4156 if (str2_isL) { // LL 4157 __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); // address of "index" 4158 __ ldr(ch2, Address(str2)); // read whole register of str2. Safe. 4159 __ lslv(tmp2, tmp2, tmp4); // shift off leading zeroes from match info 4160 __ add(result, result, tmp4, __ LSR, LogBitsPerByte); 4161 __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info 4162 } else { 4163 __ mov(ch2, 0xE); // all bits in byte set except last one 4164 __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount 4165 __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe. 4166 __ lslv(tmp2, tmp2, tmp4); 4167 __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4168 __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4169 __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info 4170 __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4171 } 4172 __ cmp(ch1, ch2); 4173 __ br(__ NE, L_SMALL_CMP_LOOP_NOMATCH); 4174 __ b(DONE); 4175 __ align(OptoLoopAlignment); 4176 __ BIND(L_HAS_ZERO); 4177 __ rbit(tmp2, tmp2); 4178 __ clz(tmp4, tmp2); // potentially long. Up to 4 cycles on some CPU's 4179 // Now, perform compression of counters(cnt2 and cnt1) into one register. 4180 // It's fine because both counters are 32bit and are not changed in this 4181 // loop. Just restore it on exit. So, cnt1 can be re-used in this loop. 4182 __ orr(cnt2, cnt2, cnt1, __ LSL, BitsPerByte * wordSize / 2); 4183 __ sub(result, result, 1); 4184 __ BIND(L_HAS_ZERO_LOOP); 4185 __ mov(cnt1, wordSize/str2_chr_size); 4186 __ cmp(cnt1, cnt2, __ LSR, BitsPerByte * wordSize / 2); 4187 __ br(__ GE, L_CMP_LOOP_LAST_CMP2); // case of 8 bytes only to compare 4188 if (str2_isL) { 4189 __ lsr(ch2, tmp4, LogBitsPerByte + str2_chr_shift); // char index 4190 __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe. 4191 __ lslv(tmp2, tmp2, tmp4); 4192 __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4193 __ add(tmp4, tmp4, 1); 4194 __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4195 __ lsl(tmp2, tmp2, 1); 4196 __ mov(tmp4, wordSize/str2_chr_size); 4197 } else { 4198 __ mov(ch2, 0xE); 4199 __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount 4200 __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe. 4201 __ lslv(tmp2, tmp2, tmp4); 4202 __ add(tmp4, tmp4, 1); 4203 __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4204 __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); 4205 __ lsl(tmp2, tmp2, 1); 4206 __ mov(tmp4, wordSize/str2_chr_size); 4207 __ sub(str2, str2, str2_chr_size); 4208 } 4209 __ cmp(ch1, ch2); 4210 __ mov(tmp4, wordSize/str2_chr_size); 4211 __ br(__ NE, L_CMP_LOOP_NOMATCH); 4212 __ BIND(L_CMP_LOOP); 4213 str1_isL ? __ ldrb(cnt1, Address(str1, tmp4, Address::lsl(str1_chr_shift))) 4214 : __ ldrh(cnt1, Address(str1, tmp4, Address::lsl(str1_chr_shift))); 4215 str2_isL ? __ ldrb(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift))) 4216 : __ ldrh(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift))); 4217 __ add(tmp4, tmp4, 1); 4218 __ cmp(tmp4, cnt2, __ LSR, BitsPerByte * wordSize / 2); 4219 __ br(__ GE, L_CMP_LOOP_LAST_CMP); 4220 __ cmp(cnt1, ch2); 4221 __ br(__ EQ, L_CMP_LOOP); 4222 __ BIND(L_CMP_LOOP_NOMATCH); 4223 // here we're not matched 4224 __ cbz(tmp2, L_HAS_ZERO_LOOP_NOMATCH); // no more matches. Proceed to main loop 4225 __ clz(tmp4, tmp2); 4226 __ add(str2, str2, str2_chr_size); // advance pointer 4227 __ b(L_HAS_ZERO_LOOP); 4228 __ align(OptoLoopAlignment); 4229 __ BIND(L_CMP_LOOP_LAST_CMP); 4230 __ cmp(cnt1, ch2); 4231 __ br(__ NE, L_CMP_LOOP_NOMATCH); 4232 __ b(DONE); 4233 __ align(OptoLoopAlignment); 4234 __ BIND(L_CMP_LOOP_LAST_CMP2); 4235 if (str2_isL) { 4236 __ lsr(ch2, tmp4, LogBitsPerByte + str2_chr_shift); // char index 4237 __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe. 4238 __ lslv(tmp2, tmp2, tmp4); 4239 __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4240 __ add(tmp4, tmp4, 1); 4241 __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4242 __ lsl(tmp2, tmp2, 1); 4243 } else { 4244 __ mov(ch2, 0xE); 4245 __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount 4246 __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe. 4247 __ lslv(tmp2, tmp2, tmp4); 4248 __ add(tmp4, tmp4, 1); 4249 __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift); 4250 __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); 4251 __ lsl(tmp2, tmp2, 1); 4252 __ sub(str2, str2, str2_chr_size); 4253 } 4254 __ cmp(ch1, ch2); 4255 __ br(__ NE, L_CMP_LOOP_NOMATCH); 4256 __ b(DONE); 4257 __ align(OptoLoopAlignment); 4258 __ BIND(L_HAS_ZERO_LOOP_NOMATCH); 4259 // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until 4260 // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP, 4261 // so, result was increased at max by wordSize/str2_chr_size - 1, so, 4262 // respective high bit wasn't changed. L_LOOP_PROCEED will increase 4263 // result by analyzed characters value, so, we can just reset lower bits 4264 // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL 4265 // 2) restore cnt1 and cnt2 values from "compressed" cnt2 4266 // 3) advance str2 value to represent next str2 octet. result & 7/3 is 4267 // index of last analyzed substring inside current octet. So, str2 in at 4268 // respective start address. We need to advance it to next octet 4269 __ andr(tmp2, result, wordSize/str2_chr_size - 1); // symbols analyzed 4270 __ lsr(cnt1, cnt2, BitsPerByte * wordSize / 2); 4271 __ bfm(result, zr, 0, 2 - str2_chr_shift); 4272 __ sub(str2, str2, tmp2, __ LSL, str2_chr_shift); // restore str2 4273 __ movw(cnt2, cnt2); 4274 __ b(L_LOOP_PROCEED); 4275 __ align(OptoLoopAlignment); 4276 __ BIND(NOMATCH); 4277 __ mov(result, -1); 4278 __ BIND(DONE); 4279 __ pop(spilled_regs, sp); 4280 __ ret(lr); 4281 return entry; 4282 } 4283 4284 void generate_string_indexof_stubs() { 4285 StubRoutines::aarch64::_string_indexof_linear_ll = generate_string_indexof_linear(true, true); 4286 StubRoutines::aarch64::_string_indexof_linear_uu = generate_string_indexof_linear(false, false); 4287 StubRoutines::aarch64::_string_indexof_linear_ul = generate_string_indexof_linear(true, false); 4288 } 4289 4290 /** 4291 * Arguments: 4292 * 4293 * Input: 4294 * c_rarg0 - current state address 4295 * c_rarg1 - H key address 4296 * c_rarg2 - data address 4297 * c_rarg3 - number of blocks 4298 * 4299 * Output: 4300 * Updated state at c_rarg0 4301 */ 4302 address generate_ghash_processBlocks() { 4303 // Bafflingly, GCM uses little-endian for the byte order, but 4304 // big-endian for the bit order. For example, the polynomial 1 is 4305 // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00. 4306 // 4307 // So, we must either reverse the bytes in each word and do 4308 // everything big-endian or reverse the bits in each byte and do 5356 CAST_FROM_FN_PTR(address, 5357 SharedRuntime:: 5358 throw_IncompatibleClassChangeError)); 5359 5360 StubRoutines::_throw_NullPointerException_at_call_entry = 5361 generate_throw_exception("NullPointerException at call throw_exception", 5362 CAST_FROM_FN_PTR(address, 5363 SharedRuntime:: 5364 throw_NullPointerException_at_call)); 5365 5366 // arraycopy stubs used by compilers 5367 generate_arraycopy_stubs(); 5368 5369 // has negatives stub for large arrays. 5370 StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long); 5371 5372 // array equals stub for large arrays. 5373 if (!UseSimpleArrayEquals) { 5374 StubRoutines::aarch64::_large_array_equals = generate_large_array_equals(); 5375 } 5376 5377 generate_string_indexof_stubs(); 5378 5379 if (UseMultiplyToLenIntrinsic) { 5380 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 5381 } 5382 5383 if (UseSquareToLenIntrinsic) { 5384 StubRoutines::_squareToLen = generate_squareToLen(); 5385 } 5386 5387 if (UseMulAddIntrinsic) { 5388 StubRoutines::_mulAdd = generate_mulAdd(); 5389 } 5390 5391 if (UseMontgomeryMultiplyIntrinsic) { 5392 StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); 5393 MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); 5394 StubRoutines::_montgomeryMultiply = g.generate_multiply(); 5395 } 5396 5397 if (UseMontgomerySquareIntrinsic) { |