< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page




3971       __ eor(tmp1, tmp1, tmp2);
3972       __ cbnz(tmp1, NOT_EQUAL);
3973       __ br(__ GT, SMALL_LOOP);
3974     __ bind(POST_LOOP);
3975       __ ldr(tmp1, Address(a1, cnt1));
3976       __ ldr(tmp2, Address(a2, cnt1));
3977       __ eor(tmp1, tmp1, tmp2);
3978       __ cbnz(tmp1, NOT_EQUAL);
3979     __ bind(EQUAL);
3980       __ mov(result, true);
3981     __ bind(NOT_EQUAL);
3982       if (!UseSIMDForArrayEquals) {
3983         __ pop(spilled_regs, sp);
3984       }
3985     __ bind(NOT_EQUAL_NO_POP);
3986     __ leave();
3987     __ ret(lr);
3988     return entry;
3989   }
3990 










































































































































































































































































































3991 
3992   /**
3993    *  Arguments:
3994    *
3995    *  Input:
3996    *  c_rarg0   - current state address
3997    *  c_rarg1   - H key address
3998    *  c_rarg2   - data address
3999    *  c_rarg3   - number of blocks
4000    *
4001    *  Output:
4002    *  Updated state at c_rarg0
4003    */
4004   address generate_ghash_processBlocks() {
4005     // Bafflingly, GCM uses little-endian for the byte order, but
4006     // big-endian for the bit order.  For example, the polynomial 1 is
4007     // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00.
4008     //
4009     // So, we must either reverse the bytes in each word and do
4010     // everything big-endian or reverse the bits in each byte and do


5058                                CAST_FROM_FN_PTR(address,
5059                                                 SharedRuntime::
5060                                                 throw_IncompatibleClassChangeError));
5061 
5062     StubRoutines::_throw_NullPointerException_at_call_entry =
5063       generate_throw_exception("NullPointerException at call throw_exception",
5064                                CAST_FROM_FN_PTR(address,
5065                                                 SharedRuntime::
5066                                                 throw_NullPointerException_at_call));
5067 
5068     // arraycopy stubs used by compilers
5069     generate_arraycopy_stubs();
5070 
5071     // has negatives stub for large arrays.
5072     StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long);
5073 
5074     // array equals stub for large arrays.
5075     if (!UseSimpleArrayEquals) {
5076       StubRoutines::aarch64::_large_array_equals = generate_large_array_equals();
5077     }


5078 
5079     if (UseMultiplyToLenIntrinsic) {
5080       StubRoutines::_multiplyToLen = generate_multiplyToLen();
5081     }
5082 
5083     if (UseSquareToLenIntrinsic) {
5084       StubRoutines::_squareToLen = generate_squareToLen();
5085     }
5086 
5087     if (UseMulAddIntrinsic) {
5088       StubRoutines::_mulAdd = generate_mulAdd();
5089     }
5090 
5091     if (UseMontgomeryMultiplyIntrinsic) {
5092       StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
5093       MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
5094       StubRoutines::_montgomeryMultiply = g.generate_multiply();
5095     }
5096 
5097     if (UseMontgomerySquareIntrinsic) {




3971       __ eor(tmp1, tmp1, tmp2);
3972       __ cbnz(tmp1, NOT_EQUAL);
3973       __ br(__ GT, SMALL_LOOP);
3974     __ bind(POST_LOOP);
3975       __ ldr(tmp1, Address(a1, cnt1));
3976       __ ldr(tmp2, Address(a2, cnt1));
3977       __ eor(tmp1, tmp1, tmp2);
3978       __ cbnz(tmp1, NOT_EQUAL);
3979     __ bind(EQUAL);
3980       __ mov(result, true);
3981     __ bind(NOT_EQUAL);
3982       if (!UseSIMDForArrayEquals) {
3983         __ pop(spilled_regs, sp);
3984       }
3985     __ bind(NOT_EQUAL_NO_POP);
3986     __ leave();
3987     __ ret(lr);
3988     return entry;
3989   }
3990 
3991   // R0 = result
3992   // R1 = str2
3993   // R2 = cnt1
3994   // R3 = str1
3995   // R4 = cnt2
3996   // This generic linear code use few additional ideas, which makes it faster:
3997   // 1) we can safely keep at least 1st register of pattern(since length >= 8)
3998   // in order to skip initial loading(help in systems with 1 ld pipeline)
3999   // 2) we can use "fast" algorithm of finding single character to search for
4000   // first symbol with less branches(1 branch per each loaded register instead
4001   // of branch for each symbol), so, this is where constants like
4002   // 0x0101...01, 0x00010001...0001, 0x7f7f...7f, 0x7fff7fff...7fff comes from
4003   // 3) after loading and analyzing 1st register of source string, it can be
4004   // used to search for every 1st character entry, saving few loads in
4005   // comparison with "simplier-but-slower" implementation
4006   // 4) in order to avoid lots of push/pop operations, code below is heavily
4007   // re-using/re-initializing/compressing register values, which makes code
4008   // larger and a bit less readable, however, most of extra operations are
4009   // issued during loads or branches, so, penalty is minimal
4010   address generate_string_indexof_linear(bool str1_isL, bool str2_isL) {
4011     const char* stubName = str1_isL
4012         ? (str2_isL ? "indexof_linear_ll" : "indexof_linear_ul")
4013         : "indexof_linear_uu";
4014     StubCodeMark mark(this, "StubRoutines", stubName);
4015     __ align(CodeEntryAlignment);
4016     address entry = __ pc();
4017 
4018     int str1_chr_size = str1_isL ? 1 : 2;
4019     int str2_chr_size = str2_isL ? 1 : 2;
4020     int str1_chr_shift = str1_isL ? 0 : 1;
4021     int str2_chr_shift = str2_isL ? 0 : 1;
4022     bool isL = str1_isL && str2_isL;
4023    // parameters
4024     Register result = r0, str2 = r1, cnt1 = r2, str1 = r3, cnt2 = r4;
4025     // temporary registers
4026     Register tmp1 = r20, tmp2 = r21, tmp3 = r22, tmp4 = r23;
4027     RegSet spilled_regs = RegSet::range(tmp1, tmp4);
4028     // redefinitions
4029     Register ch1 = rscratch1, ch2 = rscratch2, first = tmp3;
4030 
4031     __ push(spilled_regs, sp);
4032     Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO, L_SMALL_MATCH_LOOP,
4033         L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
4034         L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
4035         L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
4036         L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
4037         L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
4038     // Read whole register from str1. It is safe, because length >=8 here
4039     __ ldr(ch1, Address(str1));
4040     // Read whole register from str2. It is safe, because length >=8 here
4041     __ ldr(ch2, Address(str2));
4042     __ andr(first, ch1, str1_isL ? 0xFF : 0xFFFF);
4043     if (str1_isL != str2_isL) {
4044       __ eor(v0, __ T16B, v0, v0);
4045     }
4046     __ mov(tmp1, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
4047     __ mul(first, first, tmp1);
4048     // check if we have less than 1 register to check
4049     __ subs(cnt2, cnt2, wordSize/str2_chr_size - 1);
4050     if (str1_isL != str2_isL) {
4051       __ fmovd(v1, ch1);
4052     }
4053     __ br(__ LE, L_SMALL);
4054     __ eor(ch2, first, ch2);
4055     if (str1_isL != str2_isL) {
4056       __ zip1(v1, __ T16B, v1, v0);
4057     }
4058     __ sub(tmp2, ch2, tmp1);
4059     __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
4060     __ bics(tmp2, tmp2, ch2);
4061     if (str1_isL != str2_isL) {
4062       __ fmovd(ch1, v1);
4063     }
4064     __ br(__ NE, L_HAS_ZERO);
4065     __ subs(cnt2, cnt2, wordSize/str2_chr_size);
4066     __ add(result, result, wordSize/str2_chr_size);
4067     __ add(str2, str2, wordSize);
4068     __ br(__ LT, L_POST_LOOP);
4069     __ BIND(L_LOOP);
4070       __ ldr(ch2, Address(str2));
4071       __ eor(ch2, first, ch2);
4072       __ sub(tmp2, ch2, tmp1);
4073       __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
4074       __ bics(tmp2, tmp2, ch2);
4075       __ br(__ NE, L_HAS_ZERO);
4076     __ BIND(L_LOOP_PROCEED);
4077       __ subs(cnt2, cnt2, wordSize/str2_chr_size);
4078       __ add(str2, str2, wordSize);
4079       __ add(result, result, wordSize/str2_chr_size);
4080       __ br(__ GE, L_LOOP);
4081     __ BIND(L_POST_LOOP);
4082       __ cmp(cnt2, -wordSize/str2_chr_size); // no extra characters to check
4083       __ br(__ LE, NOMATCH);
4084       __ ldr(ch2, Address(str2));
4085       __ sub(cnt2, zr, cnt2, __ LSL, LogBitsPerByte + str2_chr_shift);
4086       __ eor(ch2, first, ch2);
4087       __ sub(tmp2, ch2, tmp1);
4088       __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
4089       __ mov(tmp4, -1); // all bits set
4090       __ b(L_SMALL_PROCEED);
4091     __ align(OptoLoopAlignment);
4092     __ BIND(L_SMALL);
4093       __ sub(cnt2, zr, cnt2, __ LSL, LogBitsPerByte + str2_chr_shift);
4094       __ eor(ch2, first, ch2);
4095       if (str1_isL != str2_isL) {
4096         __ zip1(v1, __ T16B, v1, v0);
4097       }
4098       __ sub(tmp2, ch2, tmp1);
4099       __ mov(tmp4, -1); // all bits set
4100       __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
4101       if (str1_isL != str2_isL) {
4102         __ fmovd(ch1, v1); // move converted 4 symbols
4103       }
4104     __ BIND(L_SMALL_PROCEED);
4105       __ lsrv(tmp4, tmp4, cnt2); // mask. zeroes on useless bits.
4106       __ bic(tmp2, tmp2, ch2);
4107       __ ands(tmp2, tmp2, tmp4); // clear useless bits and check
4108       __ rbit(tmp2, tmp2);
4109       __ br(__ EQ, NOMATCH);
4110     __ BIND(L_SMALL_HAS_ZERO_LOOP);
4111       __ clz(tmp4, tmp2); // potentially long. Up to 4 cycles on some cpu's
4112       __ cmp(cnt1, wordSize/str2_chr_size);
4113       __ br(__ LE, L_SMALL_CMP_LOOP_LAST_CMP2);
4114       if (str2_isL) { // LL
4115         __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); // address of "index"
4116         __ ldr(ch2, Address(str2)); // read whole register of str2. Safe.
4117         __ lslv(tmp2, tmp2, tmp4); // shift off leading zeroes from match info
4118         __ add(result, result, tmp4, __ LSR, LogBitsPerByte);
4119         __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info
4120       } else {
4121         __ mov(ch2, 0xE); // all bits in byte set except last one
4122         __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount
4123         __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
4124         __ lslv(tmp2, tmp2, tmp4);
4125         __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4126         __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4127         __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info
4128         __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4129       }
4130       __ cmp(ch1, ch2);
4131       __ mov(tmp4, wordSize/str2_chr_size);
4132       __ br(__ NE, L_SMALL_CMP_LOOP_NOMATCH);
4133     __ BIND(L_SMALL_CMP_LOOP);
4134       str1_isL ? __ ldrb(first, Address(str1, tmp4, Address::lsl(str1_chr_shift)))
4135                : __ ldrh(first, Address(str1, tmp4, Address::lsl(str1_chr_shift)));
4136       str2_isL ? __ ldrb(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift)))
4137                : __ ldrh(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift)));
4138       __ add(tmp4, tmp4, 1);
4139       __ cmp(tmp4, cnt1);
4140       __ br(__ GE, L_SMALL_CMP_LOOP_LAST_CMP);
4141       __ cmp(first, ch2);
4142       __ br(__ EQ, L_SMALL_CMP_LOOP);
4143     __ BIND(L_SMALL_CMP_LOOP_NOMATCH);
4144       __ cbz(tmp2, NOMATCH); // no more matches. exit
4145       __ clz(tmp4, tmp2);
4146       __ add(result, result, 1); // advance index
4147       __ add(str2, str2, str2_chr_size); // advance pointer
4148       __ b(L_SMALL_HAS_ZERO_LOOP);
4149     __ align(OptoLoopAlignment);
4150     __ BIND(L_SMALL_CMP_LOOP_LAST_CMP);
4151       __ cmp(first, ch2);
4152       __ br(__ NE, L_SMALL_CMP_LOOP_NOMATCH);
4153       __ b(DONE);
4154     __ align(OptoLoopAlignment);
4155     __ BIND(L_SMALL_CMP_LOOP_LAST_CMP2);
4156       if (str2_isL) { // LL
4157         __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); // address of "index"
4158         __ ldr(ch2, Address(str2)); // read whole register of str2. Safe.
4159         __ lslv(tmp2, tmp2, tmp4); // shift off leading zeroes from match info
4160         __ add(result, result, tmp4, __ LSR, LogBitsPerByte);
4161         __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info
4162       } else {
4163         __ mov(ch2, 0xE); // all bits in byte set except last one
4164         __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount
4165         __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
4166         __ lslv(tmp2, tmp2, tmp4);
4167         __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4168         __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4169         __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info
4170         __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4171       }
4172       __ cmp(ch1, ch2);
4173       __ br(__ NE, L_SMALL_CMP_LOOP_NOMATCH);
4174       __ b(DONE);
4175     __ align(OptoLoopAlignment);
4176     __ BIND(L_HAS_ZERO);
4177       __ rbit(tmp2, tmp2);
4178       __ clz(tmp4, tmp2); // potentially long. Up to 4 cycles on some CPU's
4179       // Now, perform compression of counters(cnt2 and cnt1) into one register.
4180       // It's fine because both counters are 32bit and are not changed in this
4181       // loop. Just restore it on exit. So, cnt1 can be re-used in this loop.
4182       __ orr(cnt2, cnt2, cnt1, __ LSL, BitsPerByte * wordSize / 2);
4183       __ sub(result, result, 1);
4184     __ BIND(L_HAS_ZERO_LOOP);
4185       __ mov(cnt1, wordSize/str2_chr_size);
4186       __ cmp(cnt1, cnt2, __ LSR, BitsPerByte * wordSize / 2);
4187       __ br(__ GE, L_CMP_LOOP_LAST_CMP2); // case of 8 bytes only to compare
4188       if (str2_isL) {
4189         __ lsr(ch2, tmp4, LogBitsPerByte + str2_chr_shift); // char index
4190         __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
4191         __ lslv(tmp2, tmp2, tmp4);
4192         __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4193         __ add(tmp4, tmp4, 1);
4194         __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4195         __ lsl(tmp2, tmp2, 1);
4196         __ mov(tmp4, wordSize/str2_chr_size);
4197       } else {
4198         __ mov(ch2, 0xE);
4199         __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount
4200         __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
4201         __ lslv(tmp2, tmp2, tmp4);
4202         __ add(tmp4, tmp4, 1);
4203         __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4204         __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte);
4205         __ lsl(tmp2, tmp2, 1);
4206         __ mov(tmp4, wordSize/str2_chr_size);
4207         __ sub(str2, str2, str2_chr_size);
4208       }
4209       __ cmp(ch1, ch2);
4210       __ mov(tmp4, wordSize/str2_chr_size);
4211       __ br(__ NE, L_CMP_LOOP_NOMATCH);
4212     __ BIND(L_CMP_LOOP);
4213       str1_isL ? __ ldrb(cnt1, Address(str1, tmp4, Address::lsl(str1_chr_shift)))
4214                : __ ldrh(cnt1, Address(str1, tmp4, Address::lsl(str1_chr_shift)));
4215       str2_isL ? __ ldrb(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift)))
4216                : __ ldrh(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift)));
4217       __ add(tmp4, tmp4, 1);
4218       __ cmp(tmp4, cnt2, __ LSR, BitsPerByte * wordSize / 2);
4219       __ br(__ GE, L_CMP_LOOP_LAST_CMP);
4220       __ cmp(cnt1, ch2);
4221       __ br(__ EQ, L_CMP_LOOP);
4222     __ BIND(L_CMP_LOOP_NOMATCH);
4223       // here we're not matched
4224       __ cbz(tmp2, L_HAS_ZERO_LOOP_NOMATCH); // no more matches. Proceed to main loop
4225       __ clz(tmp4, tmp2);
4226       __ add(str2, str2, str2_chr_size); // advance pointer
4227       __ b(L_HAS_ZERO_LOOP);
4228     __ align(OptoLoopAlignment);
4229     __ BIND(L_CMP_LOOP_LAST_CMP);
4230       __ cmp(cnt1, ch2);
4231       __ br(__ NE, L_CMP_LOOP_NOMATCH);
4232       __ b(DONE);
4233     __ align(OptoLoopAlignment);
4234     __ BIND(L_CMP_LOOP_LAST_CMP2);
4235       if (str2_isL) {
4236         __ lsr(ch2, tmp4, LogBitsPerByte + str2_chr_shift); // char index
4237         __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
4238         __ lslv(tmp2, tmp2, tmp4);
4239         __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4240         __ add(tmp4, tmp4, 1);
4241         __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4242         __ lsl(tmp2, tmp2, 1);
4243       } else {
4244         __ mov(ch2, 0xE);
4245         __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount
4246         __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
4247         __ lslv(tmp2, tmp2, tmp4);
4248         __ add(tmp4, tmp4, 1);
4249         __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
4250         __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte);
4251         __ lsl(tmp2, tmp2, 1);
4252         __ sub(str2, str2, str2_chr_size);
4253       }
4254       __ cmp(ch1, ch2);
4255       __ br(__ NE, L_CMP_LOOP_NOMATCH);
4256       __ b(DONE);
4257     __ align(OptoLoopAlignment);
4258     __ BIND(L_HAS_ZERO_LOOP_NOMATCH);
4259       // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
4260       // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
4261       // so, result was increased at max by wordSize/str2_chr_size - 1, so,
4262       // respective high bit wasn't changed. L_LOOP_PROCEED will increase
4263       // result by analyzed characters value, so, we can just reset lower bits
4264       // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
4265       // 2) restore cnt1 and cnt2 values from "compressed" cnt2
4266       // 3) advance str2 value to represent next str2 octet. result & 7/3 is
4267       // index of last analyzed substring inside current octet. So, str2 in at
4268       // respective start address. We need to advance it to next octet
4269       __ andr(tmp2, result, wordSize/str2_chr_size - 1); // symbols analyzed
4270       __ lsr(cnt1, cnt2, BitsPerByte * wordSize / 2);
4271       __ bfm(result, zr, 0, 2 - str2_chr_shift);
4272       __ sub(str2, str2, tmp2, __ LSL, str2_chr_shift); // restore str2
4273       __ movw(cnt2, cnt2);
4274       __ b(L_LOOP_PROCEED);
4275     __ align(OptoLoopAlignment);
4276     __ BIND(NOMATCH);
4277       __ mov(result, -1);
4278     __ BIND(DONE);
4279       __ pop(spilled_regs, sp);
4280       __ ret(lr);
4281     return entry;
4282   }
4283 
4284   void generate_string_indexof_stubs() {
4285     StubRoutines::aarch64::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
4286     StubRoutines::aarch64::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
4287     StubRoutines::aarch64::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
4288   }
4289 
4290   /**
4291    *  Arguments:
4292    *
4293    *  Input:
4294    *  c_rarg0   - current state address
4295    *  c_rarg1   - H key address
4296    *  c_rarg2   - data address
4297    *  c_rarg3   - number of blocks
4298    *
4299    *  Output:
4300    *  Updated state at c_rarg0
4301    */
4302   address generate_ghash_processBlocks() {
4303     // Bafflingly, GCM uses little-endian for the byte order, but
4304     // big-endian for the bit order.  For example, the polynomial 1 is
4305     // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00.
4306     //
4307     // So, we must either reverse the bytes in each word and do
4308     // everything big-endian or reverse the bits in each byte and do


5356                                CAST_FROM_FN_PTR(address,
5357                                                 SharedRuntime::
5358                                                 throw_IncompatibleClassChangeError));
5359 
5360     StubRoutines::_throw_NullPointerException_at_call_entry =
5361       generate_throw_exception("NullPointerException at call throw_exception",
5362                                CAST_FROM_FN_PTR(address,
5363                                                 SharedRuntime::
5364                                                 throw_NullPointerException_at_call));
5365 
5366     // arraycopy stubs used by compilers
5367     generate_arraycopy_stubs();
5368 
5369     // has negatives stub for large arrays.
5370     StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long);
5371 
5372     // array equals stub for large arrays.
5373     if (!UseSimpleArrayEquals) {
5374       StubRoutines::aarch64::_large_array_equals = generate_large_array_equals();
5375     }
5376 
5377     generate_string_indexof_stubs();
5378 
5379     if (UseMultiplyToLenIntrinsic) {
5380       StubRoutines::_multiplyToLen = generate_multiplyToLen();
5381     }
5382 
5383     if (UseSquareToLenIntrinsic) {
5384       StubRoutines::_squareToLen = generate_squareToLen();
5385     }
5386 
5387     if (UseMulAddIntrinsic) {
5388       StubRoutines::_mulAdd = generate_mulAdd();
5389     }
5390 
5391     if (UseMontgomeryMultiplyIntrinsic) {
5392       StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
5393       MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
5394       StubRoutines::_montgomeryMultiply = g.generate_multiply();
5395     }
5396 
5397     if (UseMontgomerySquareIntrinsic) {


< prev index next >