< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page




2912  *   crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24]
2913  *
2914  */
2915 void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp,
2916         Register table0, Register table1, Register table2, Register table3,
2917         bool upper) {
2918   eor(v, crc, v, upper ? LSR:LSL, upper ? 32:0);
2919   uxtb(tmp, v);
2920   ldrw(crc, Address(table3, tmp, Address::lsl(2)));
2921   ubfx(tmp, v, 8, 8);
2922   ldrw(tmp, Address(table2, tmp, Address::lsl(2)));
2923   eor(crc, crc, tmp);
2924   ubfx(tmp, v, 16, 8);
2925   ldrw(tmp, Address(table1, tmp, Address::lsl(2)));
2926   eor(crc, crc, tmp);
2927   ubfx(tmp, v, 24, 8);
2928   ldrw(tmp, Address(table0, tmp, Address::lsl(2)));
2929   eor(crc, crc, tmp);
2930 }
2931 
2932 /**
2933  * @param crc   register containing existing CRC (32-bit)
2934  * @param buf   register pointing to input byte buffer (byte*)
2935  * @param len   register containing number of bytes
2936  * @param table register that will contain address of CRC table
2937  * @param tmp   scratch register
2938  */
2939 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
2940         Register table0, Register table1, Register table2, Register table3,
2941         Register tmp, Register tmp2, Register tmp3) {
2942   Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
2943   unsigned long offset;
2944 
2945     ornw(crc, zr, crc);
2946 
2947   if (UseCRC32) {
2948     Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop;
2949 
2950       subs(len, len, 64);
2951       br(Assembler::GE, CRC_by64_loop);
2952       adds(len, len, 64-4);

2953       br(Assembler::GE, CRC_by4_loop);
2954       adds(len, len, 4);
2955       br(Assembler::GT, CRC_by1_loop);
2956       b(L_exit);
2957 














2958     BIND(CRC_by4_loop);
2959       ldrw(tmp, Address(post(buf, 4)));
2960       subs(len, len, 4);
2961       crc32w(crc, crc, tmp);
2962       br(Assembler::GE, CRC_by4_loop);
2963       adds(len, len, 4);
2964       br(Assembler::LE, L_exit);
2965     BIND(CRC_by1_loop);
2966       ldrb(tmp, Address(post(buf, 1)));
2967       subs(len, len, 1);
2968       crc32b(crc, crc, tmp);
2969       br(Assembler::GT, CRC_by1_loop);
2970       b(L_exit);
2971 


















2972       align(CodeEntryAlignment);
2973     BIND(CRC_by64_loop);
2974       subs(len, len, 64);
2975       ldp(tmp, tmp3, Address(post(buf, 16)));
2976       crc32x(crc, crc, tmp);
2977       crc32x(crc, crc, tmp3);
2978       ldp(tmp, tmp3, Address(post(buf, 16)));
2979       crc32x(crc, crc, tmp);
2980       crc32x(crc, crc, tmp3);
2981       ldp(tmp, tmp3, Address(post(buf, 16)));
2982       crc32x(crc, crc, tmp);
2983       crc32x(crc, crc, tmp3);
2984       ldp(tmp, tmp3, Address(post(buf, 16)));
2985       crc32x(crc, crc, tmp);





2986       crc32x(crc, crc, tmp3);





2987       br(Assembler::GE, CRC_by64_loop);
2988       adds(len, len, 64-4);
2989       br(Assembler::GE, CRC_by4_loop);
2990       adds(len, len, 4);
2991       br(Assembler::GT, CRC_by1_loop);





2992     BIND(L_exit);
2993       ornw(crc, zr, crc);

















2994       return;
2995   }


2996 
2997     adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset);
2998     if (offset) add(table0, table0, offset);
2999     add(table1, table0, 1*256*sizeof(juint));
3000     add(table2, table0, 2*256*sizeof(juint));
3001     add(table3, table0, 3*256*sizeof(juint));
3002 
3003   if (UseNeon) {
3004       cmp(len, 64);
3005       br(Assembler::LT, L_by16);
3006       eor(v16, T16B, v16, v16);
3007 
3008     Label L_fold;
3009 
3010       add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants
3011 
3012       ld1(v0, v1, T2D, post(buf, 32));
3013       ld1r(v4, T2D, post(tmp, 8));
3014       ld1r(v5, T2D, post(tmp, 8));
3015       ld1r(v6, T2D, post(tmp, 8));




2912  *   crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24]
2913  *
2914  */
2915 void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp,
2916         Register table0, Register table1, Register table2, Register table3,
2917         bool upper) {
2918   eor(v, crc, v, upper ? LSR:LSL, upper ? 32:0);
2919   uxtb(tmp, v);
2920   ldrw(crc, Address(table3, tmp, Address::lsl(2)));
2921   ubfx(tmp, v, 8, 8);
2922   ldrw(tmp, Address(table2, tmp, Address::lsl(2)));
2923   eor(crc, crc, tmp);
2924   ubfx(tmp, v, 16, 8);
2925   ldrw(tmp, Address(table1, tmp, Address::lsl(2)));
2926   eor(crc, crc, tmp);
2927   ubfx(tmp, v, 24, 8);
2928   ldrw(tmp, Address(table0, tmp, Address::lsl(2)));
2929   eor(crc, crc, tmp);
2930 }
2931 
2932 void MacroAssembler::kernel_crc32_using_crc32(Register crc, Register buf,
2933         Register len, Register tmp0, Register tmp1, Register tmp2,
2934         Register tmp3) {
2935     Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
2936     assert_different_registers(crc, buf, len, tmp0, tmp1, tmp2, tmp3);







2937 
2938     ornw(crc, zr, crc);
2939 
2940     subs(len, len, 128);
2941     br(Assembler::GE, CRC_by64_pre);
2942   BIND(CRC_less64);
2943     adds(len, len, 128-32);
2944     br(Assembler::GE, CRC_by32_loop);
2945   BIND(CRC_less32)
2946     adds(len, len, 32-4);
2947     br(Assembler::GE, CRC_by4_loop);
2948     adds(len, len, 4);
2949     br(Assembler::GT, CRC_by1_loop);
2950     b(L_exit);
2951 
2952   BIND(CRC_by32_loop);
2953     ldp(tmp0, tmp1, Address(post(buf, 16)));
2954     subs(len, len, 32);
2955     crc32x(crc, crc, tmp0);
2956     ldr(tmp2, Address(post(buf, 8)));
2957     crc32x(crc, crc, tmp1);
2958     ldr(tmp3, Address(post(buf, 8)));
2959     crc32x(crc, crc, tmp2);
2960     crc32x(crc, crc, tmp3);
2961     br(Assembler::GE, CRC_by32_loop);
2962     cmn(len, 32);
2963     br(Assembler::NE, CRC_less32);
2964     b(L_exit);
2965 
2966   BIND(CRC_by4_loop);
2967     ldrw(tmp0, Address(post(buf, 4)));
2968     subs(len, len, 4);
2969     crc32w(crc, crc, tmp0);
2970     br(Assembler::GE, CRC_by4_loop);
2971     adds(len, len, 4);
2972     br(Assembler::LE, L_exit);
2973   BIND(CRC_by1_loop);
2974     ldrb(tmp0, Address(post(buf, 1)));
2975     subs(len, len, 1);
2976     crc32b(crc, crc, tmp0);
2977     br(Assembler::GT, CRC_by1_loop);
2978     b(L_exit);
2979 
2980   BIND(CRC_by64_pre);
2981     sub(buf, buf, 8);
2982     ldp(tmp0, tmp1, Address(buf, 8));
2983     crc32x(crc, crc, tmp0);
2984     ldr(tmp2, Address(buf, 24));
2985     crc32x(crc, crc, tmp1);
2986     ldr(tmp3, Address(buf, 32));
2987     crc32x(crc, crc, tmp2);
2988     ldr(tmp0, Address(buf, 40));
2989     crc32x(crc, crc, tmp3);
2990     ldr(tmp1, Address(buf, 48));
2991     crc32x(crc, crc, tmp0);
2992     ldr(tmp2, Address(buf, 56));
2993     crc32x(crc, crc, tmp1);
2994     ldr(tmp3, Address(pre(buf, 64)));
2995 
2996     b(CRC_by64_loop);
2997 
2998     align(CodeEntryAlignment);
2999   BIND(CRC_by64_loop);
3000     subs(len, len, 64);
3001     crc32x(crc, crc, tmp2);
3002     ldr(tmp0, Address(buf, 8));






3003     crc32x(crc, crc, tmp3);
3004     ldr(tmp1, Address(buf, 16));
3005     crc32x(crc, crc, tmp0);
3006     ldr(tmp2, Address(buf, 24));
3007     crc32x(crc, crc, tmp1);
3008     ldr(tmp3, Address(buf, 32));
3009     crc32x(crc, crc, tmp2);
3010     ldr(tmp0, Address(buf, 40));
3011     crc32x(crc, crc, tmp3);
3012     ldr(tmp1, Address(buf, 48));
3013     crc32x(crc, crc, tmp0);
3014     ldr(tmp2, Address(buf, 56));
3015     crc32x(crc, crc, tmp1);
3016     ldr(tmp3, Address(pre(buf, 64)));
3017     br(Assembler::GE, CRC_by64_loop);
3018 
3019     // post-loop
3020     crc32x(crc, crc, tmp2);
3021     crc32x(crc, crc, tmp3);
3022 
3023     sub(len, len, 64);
3024     add(buf, buf, 8);
3025     cmn(len, 128);
3026     br(Assembler::NE, CRC_less64);
3027   BIND(L_exit);
3028     ornw(crc, zr, crc);
3029 }
3030 
3031 /**
3032  * @param crc   register containing existing CRC (32-bit)
3033  * @param buf   register pointing to input byte buffer (byte*)
3034  * @param len   register containing number of bytes
3035  * @param table register that will contain address of CRC table
3036  * @param tmp   scratch register
3037  */
3038 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
3039         Register table0, Register table1, Register table2, Register table3,
3040         Register tmp, Register tmp2, Register tmp3) {
3041   Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
3042   unsigned long offset;
3043 
3044   if (UseCRC32) {
3045       kernel_crc32_using_crc32(crc, buf, len, table0, table1, table2, table3);
3046       return;
3047   }
3048 
3049     ornw(crc, zr, crc);
3050 
3051     adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset);
3052     if (offset) add(table0, table0, offset);
3053     add(table1, table0, 1*256*sizeof(juint));
3054     add(table2, table0, 2*256*sizeof(juint));
3055     add(table3, table0, 3*256*sizeof(juint));
3056 
3057   if (UseNeon) {
3058       cmp(len, 64);
3059       br(Assembler::LT, L_by16);
3060       eor(v16, T16B, v16, v16);
3061 
3062     Label L_fold;
3063 
3064       add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants
3065 
3066       ld1(v0, v1, T2D, post(buf, 32));
3067       ld1r(v4, T2D, post(tmp, 8));
3068       ld1r(v5, T2D, post(tmp, 8));
3069       ld1r(v6, T2D, post(tmp, 8));


< prev index next >