jdk_jdk Sdiff src/hotspot/cpu/aarch64

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

3971       __ eor(tmp1, tmp1, tmp2);
3972       __ cbnz(tmp1, NOT_EQUAL);
3973       __ br(__ GT, SMALL_LOOP);
3974     __ bind(POST_LOOP);
3975       __ ldr(tmp1, Address(a1, cnt1));
3976       __ ldr(tmp2, Address(a2, cnt1));
3977       __ eor(tmp1, tmp1, tmp2);
3978       __ cbnz(tmp1, NOT_EQUAL);
3979     __ bind(EQUAL);
3980       __ mov(result, true);
3981     __ bind(NOT_EQUAL);
3982       if (!UseSIMDForArrayEquals) {
3983         __ pop(spilled_regs, sp);
3984       }
3985     __ bind(NOT_EQUAL_NO_POP);
3986     __ leave();
3987     __ ret(lr);
3988     return entry;
3989   }
3990 





























































3991 
3992   /**
3993    *  Arguments:
3994    *
3995    *  Input:
3996    *  c_rarg0   - current state address
3997    *  c_rarg1   - H key address
3998    *  c_rarg2   - data address
3999    *  c_rarg3   - number of blocks
4000    *
4001    *  Output:
4002    *  Updated state at c_rarg0
4003    */
4004   address generate_ghash_processBlocks() {
4005     // Bafflingly, GCM uses little-endian for the byte order, but
4006     // big-endian for the bit order.  For example, the polynomial 1 is
4007     // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00.
4008     //
4009     // So, we must either reverse the bytes in each word and do
4010     // everything big-endian or reverse the bits in each byte and do

5058                                CAST_FROM_FN_PTR(address,
5059                                                 SharedRuntime::
5060                                                 throw_IncompatibleClassChangeError));
5061 
5062     StubRoutines::_throw_NullPointerException_at_call_entry =
5063       generate_throw_exception("NullPointerException at call throw_exception",
5064                                CAST_FROM_FN_PTR(address,
5065                                                 SharedRuntime::
5066                                                 throw_NullPointerException_at_call));
5067 
5068     // arraycopy stubs used by compilers
5069     generate_arraycopy_stubs();
5070 
5071     // has negatives stub for large arrays.
5072     StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long);
5073 
5074     // array equals stub for large arrays.
5075     if (!UseSimpleArrayEquals) {
5076       StubRoutines::aarch64::_large_array_equals = generate_large_array_equals();
5077     }



5078 
5079     if (UseMultiplyToLenIntrinsic) {
5080       StubRoutines::_multiplyToLen = generate_multiplyToLen();
5081     }
5082 
5083     if (UseSquareToLenIntrinsic) {
5084       StubRoutines::_squareToLen = generate_squareToLen();
5085     }
5086 
5087     if (UseMulAddIntrinsic) {
5088       StubRoutines::_mulAdd = generate_mulAdd();
5089     }
5090 
5091     if (UseMontgomeryMultiplyIntrinsic) {
5092       StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
5093       MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
5094       StubRoutines::_montgomeryMultiply = g.generate_multiply();
5095     }
5096 
5097     if (UseMontgomerySquareIntrinsic) {

3971       __ eor(tmp1, tmp1, tmp2);
3972       __ cbnz(tmp1, NOT_EQUAL);
3973       __ br(__ GT, SMALL_LOOP);
3974     __ bind(POST_LOOP);
3975       __ ldr(tmp1, Address(a1, cnt1));
3976       __ ldr(tmp2, Address(a2, cnt1));
3977       __ eor(tmp1, tmp1, tmp2);
3978       __ cbnz(tmp1, NOT_EQUAL);
3979     __ bind(EQUAL);
3980       __ mov(result, true);
3981     __ bind(NOT_EQUAL);
3982       if (!UseSIMDForArrayEquals) {
3983         __ pop(spilled_regs, sp);
3984       }
3985     __ bind(NOT_EQUAL_NO_POP);
3986     __ leave();
3987     __ ret(lr);
3988     return entry;
3989   }
3990 
3991   void inflate_and_store_2_fp_registers(bool generatePrfm,
3992       FloatRegister src1, FloatRegister src2) {
3993     Register dst = r1;
3994     __ zip1(v1, __ T16B, src1, v0);
3995     __ zip2(v2, __ T16B, src1, v0);
3996     if (generatePrfm) {
3997       __ prfm(Address(dst, SoftwarePrefetchHintDistance), PSTL1STRM);
3998     }
3999     __ zip1(v3, __ T16B, src2, v0);
4000     __ zip2(v4, __ T16B, src2, v0);
4001     __ st1(v1, v2, v3, v4, __ T16B, Address(__ post(dst, 64)));
4002   }
4003 
4004   // R0 = src
4005   // R1 = dst
4006   // R2 = len
4007   // R3 = len >> 3
4008   // V0 = 0
4009   // v1 = loaded 8 bytes
4010   address generate_large_byte_array_inflate() {
4011     StubCodeMark mark(this, "StubRoutines", "large_byte_array_inflate");
4012     __ align(CodeEntryAlignment);
4013     address entry = __ pc();
4014     Label LOOP, LOOP_START, LOOP_PRFM, LOOP_PRFM_START, DONE;
4015     Register src = r0, dst = r1, len = r2, octetCounter = r3;
4016     const int large_loop_threshold = MAX(64, SoftwarePrefetchHintDistance)/8 + 4;
4017 
4018     // do one more 8-byte read to have address 16-byte aligned in most cases
4019     // also use single store instruction
4020     __ ldrd(v2, __ post(src, 8));
4021     __ sub(octetCounter, octetCounter, 2);
4022     __ zip1(v1, __ T16B, v1, v0);
4023     __ zip1(v2, __ T16B, v2, v0);
4024     __ st1(v1, v2, __ T16B, __ post(dst, 32));
4025     __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
4026     __ cmp(octetCounter, large_loop_threshold);
4027     __ br(__ LE, LOOP_START);
4028     __ b(LOOP_PRFM_START);
4029     __ bind(LOOP_PRFM);
4030       __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
4031     __ bind(LOOP_PRFM_START);
4032       __ prfm(Address(src, SoftwarePrefetchHintDistance));
4033       __ sub(octetCounter, octetCounter, 8);
4034       __ cmp(octetCounter, large_loop_threshold);
4035       inflate_and_store_2_fp_registers(true, v3, v4);
4036       inflate_and_store_2_fp_registers(true, v5, v6);
4037       __ br(__ GT, LOOP_PRFM);
4038       __ cmp(octetCounter, 8);
4039       __ br(__ LT, DONE);
4040     __ bind(LOOP);
4041       __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
4042       __ bind(LOOP_START);
4043       __ sub(octetCounter, octetCounter, 8);
4044       __ cmp(octetCounter, 8);
4045       inflate_and_store_2_fp_registers(false, v3, v4);
4046       inflate_and_store_2_fp_registers(false, v5, v6);
4047       __ br(__ GE, LOOP);
4048     __ bind(DONE);
4049       __ ret(lr);
4050     return entry;
4051   }
4052 
4053   /**
4054    *  Arguments:
4055    *
4056    *  Input:
4057    *  c_rarg0   - current state address
4058    *  c_rarg1   - H key address
4059    *  c_rarg2   - data address
4060    *  c_rarg3   - number of blocks
4061    *
4062    *  Output:
4063    *  Updated state at c_rarg0
4064    */
4065   address generate_ghash_processBlocks() {
4066     // Bafflingly, GCM uses little-endian for the byte order, but
4067     // big-endian for the bit order.  For example, the polynomial 1 is
4068     // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00.
4069     //
4070     // So, we must either reverse the bytes in each word and do
4071     // everything big-endian or reverse the bits in each byte and do

5119                                CAST_FROM_FN_PTR(address,
5120                                                 SharedRuntime::
5121                                                 throw_IncompatibleClassChangeError));
5122 
5123     StubRoutines::_throw_NullPointerException_at_call_entry =
5124       generate_throw_exception("NullPointerException at call throw_exception",
5125                                CAST_FROM_FN_PTR(address,
5126                                                 SharedRuntime::
5127                                                 throw_NullPointerException_at_call));
5128 
5129     // arraycopy stubs used by compilers
5130     generate_arraycopy_stubs();
5131 
5132     // has negatives stub for large arrays.
5133     StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long);
5134 
5135     // array equals stub for large arrays.
5136     if (!UseSimpleArrayEquals) {
5137       StubRoutines::aarch64::_large_array_equals = generate_large_array_equals();
5138     }
5139 
5140     // byte_array_inflate stub for large arrays.
5141     StubRoutines::aarch64::_large_byte_array_inflate = generate_large_byte_array_inflate();
5142 
5143     if (UseMultiplyToLenIntrinsic) {
5144       StubRoutines::_multiplyToLen = generate_multiplyToLen();
5145     }
5146 
5147     if (UseSquareToLenIntrinsic) {
5148       StubRoutines::_squareToLen = generate_squareToLen();
5149     }
5150 
5151     if (UseMulAddIntrinsic) {
5152       StubRoutines::_mulAdd = generate_mulAdd();
5153     }
5154 
5155     if (UseMontgomeryMultiplyIntrinsic) {
5156       StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
5157       MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
5158       StubRoutines::_montgomeryMultiply = g.generate_multiply();
5159     }
5160 
5161     if (UseMontgomerySquareIntrinsic) {

< prev index next >