2912 * crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24] 2913 * 2914 */ 2915 void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp, 2916 Register table0, Register table1, Register table2, Register table3, 2917 bool upper) { 2918 eor(v, crc, v, upper ? LSR:LSL, upper ? 32:0); 2919 uxtb(tmp, v); 2920 ldrw(crc, Address(table3, tmp, Address::lsl(2))); 2921 ubfx(tmp, v, 8, 8); 2922 ldrw(tmp, Address(table2, tmp, Address::lsl(2))); 2923 eor(crc, crc, tmp); 2924 ubfx(tmp, v, 16, 8); 2925 ldrw(tmp, Address(table1, tmp, Address::lsl(2))); 2926 eor(crc, crc, tmp); 2927 ubfx(tmp, v, 24, 8); 2928 ldrw(tmp, Address(table0, tmp, Address::lsl(2))); 2929 eor(crc, crc, tmp); 2930 } 2931 2932 /** 2933 * @param crc register containing existing CRC (32-bit) 2934 * @param buf register pointing to input byte buffer (byte*) 2935 * @param len register containing number of bytes 2936 * @param table register that will contain address of CRC table 2937 * @param tmp scratch register 2938 */ 2939 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, 2940 Register table0, Register table1, Register table2, Register table3, 2941 Register tmp, Register tmp2, Register tmp3) { 2942 Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit; 2943 unsigned long offset; 2944 2945 ornw(crc, zr, crc); 2946 2947 if (UseCRC32) { 2948 Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop; 2949 2950 subs(len, len, 64); 2951 br(Assembler::GE, CRC_by64_loop); 2952 adds(len, len, 64-4); 2953 br(Assembler::GE, CRC_by4_loop); 2954 adds(len, len, 4); 2955 br(Assembler::GT, CRC_by1_loop); 2956 b(L_exit); 2957 2958 BIND(CRC_by4_loop); 2959 ldrw(tmp, Address(post(buf, 4))); 2960 subs(len, len, 4); 2961 crc32w(crc, crc, tmp); 2962 br(Assembler::GE, CRC_by4_loop); 2963 adds(len, len, 4); 2964 br(Assembler::LE, L_exit); 2965 BIND(CRC_by1_loop); 2966 ldrb(tmp, Address(post(buf, 1))); 2967 subs(len, len, 1); 2968 crc32b(crc, crc, tmp); 2969 br(Assembler::GT, CRC_by1_loop); 2970 b(L_exit); 2971 2972 align(CodeEntryAlignment); 2973 BIND(CRC_by64_loop); 2974 subs(len, len, 64); 2975 ldp(tmp, tmp3, Address(post(buf, 16))); 2976 crc32x(crc, crc, tmp); 2977 crc32x(crc, crc, tmp3); 2978 ldp(tmp, tmp3, Address(post(buf, 16))); 2979 crc32x(crc, crc, tmp); 2980 crc32x(crc, crc, tmp3); 2981 ldp(tmp, tmp3, Address(post(buf, 16))); 2982 crc32x(crc, crc, tmp); 2983 crc32x(crc, crc, tmp3); 2984 ldp(tmp, tmp3, Address(post(buf, 16))); 2985 crc32x(crc, crc, tmp); 2986 crc32x(crc, crc, tmp3); 2987 br(Assembler::GE, CRC_by64_loop); 2988 adds(len, len, 64-4); 2989 br(Assembler::GE, CRC_by4_loop); 2990 adds(len, len, 4); 2991 br(Assembler::GT, CRC_by1_loop); 2992 BIND(L_exit); 2993 ornw(crc, zr, crc); 2994 return; 2995 } 2996 2997 adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset); 2998 if (offset) add(table0, table0, offset); 2999 add(table1, table0, 1*256*sizeof(juint)); 3000 add(table2, table0, 2*256*sizeof(juint)); 3001 add(table3, table0, 3*256*sizeof(juint)); 3002 3003 if (UseNeon) { 3004 cmp(len, 64); 3005 br(Assembler::LT, L_by16); 3006 eor(v16, T16B, v16, v16); 3007 3008 Label L_fold; 3009 3010 add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants 3011 3012 ld1(v0, v1, T2D, post(buf, 32)); 3013 ld1r(v4, T2D, post(tmp, 8)); 3014 ld1r(v5, T2D, post(tmp, 8)); 3015 ld1r(v6, T2D, post(tmp, 8)); | 2912 * crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24] 2913 * 2914 */ 2915 void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp, 2916 Register table0, Register table1, Register table2, Register table3, 2917 bool upper) { 2918 eor(v, crc, v, upper ? LSR:LSL, upper ? 32:0); 2919 uxtb(tmp, v); 2920 ldrw(crc, Address(table3, tmp, Address::lsl(2))); 2921 ubfx(tmp, v, 8, 8); 2922 ldrw(tmp, Address(table2, tmp, Address::lsl(2))); 2923 eor(crc, crc, tmp); 2924 ubfx(tmp, v, 16, 8); 2925 ldrw(tmp, Address(table1, tmp, Address::lsl(2))); 2926 eor(crc, crc, tmp); 2927 ubfx(tmp, v, 24, 8); 2928 ldrw(tmp, Address(table0, tmp, Address::lsl(2))); 2929 eor(crc, crc, tmp); 2930 } 2931 2932 void MacroAssembler::kernel_crc32_using_crc32(Register crc, Register buf, 2933 Register len, Register tmp0, Register tmp1, Register tmp2, 2934 Register tmp3) { 2935 Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; 2936 assert_different_registers(crc, buf, len, tmp0, tmp1, tmp2, tmp3); 2937 2938 ornw(crc, zr, crc); 2939 2940 subs(len, len, 128); 2941 br(Assembler::GE, CRC_by64_pre); 2942 BIND(CRC_less64); 2943 adds(len, len, 128-32); 2944 br(Assembler::GE, CRC_by32_loop); 2945 BIND(CRC_less32) 2946 adds(len, len, 32-4); 2947 br(Assembler::GE, CRC_by4_loop); 2948 adds(len, len, 4); 2949 br(Assembler::GT, CRC_by1_loop); 2950 b(L_exit); 2951 2952 BIND(CRC_by32_loop); 2953 ldp(tmp0, tmp1, Address(post(buf, 16))); 2954 subs(len, len, 32); 2955 crc32x(crc, crc, tmp0); 2956 ldr(tmp2, Address(post(buf, 8))); 2957 crc32x(crc, crc, tmp1); 2958 ldr(tmp3, Address(post(buf, 8))); 2959 crc32x(crc, crc, tmp2); 2960 crc32x(crc, crc, tmp3); 2961 br(Assembler::GE, CRC_by32_loop); 2962 cmn(len, 32); 2963 br(Assembler::NE, CRC_less32); 2964 b(L_exit); 2965 2966 BIND(CRC_by4_loop); 2967 ldrw(tmp0, Address(post(buf, 4))); 2968 subs(len, len, 4); 2969 crc32w(crc, crc, tmp0); 2970 br(Assembler::GE, CRC_by4_loop); 2971 adds(len, len, 4); 2972 br(Assembler::LE, L_exit); 2973 BIND(CRC_by1_loop); 2974 ldrb(tmp0, Address(post(buf, 1))); 2975 subs(len, len, 1); 2976 crc32b(crc, crc, tmp0); 2977 br(Assembler::GT, CRC_by1_loop); 2978 b(L_exit); 2979 2980 BIND(CRC_by64_pre); 2981 sub(buf, buf, 8); 2982 ldp(tmp0, tmp1, Address(buf, 8)); 2983 crc32x(crc, crc, tmp0); 2984 ldr(tmp2, Address(buf, 24)); 2985 crc32x(crc, crc, tmp1); 2986 ldr(tmp3, Address(buf, 32)); 2987 crc32x(crc, crc, tmp2); 2988 ldr(tmp0, Address(buf, 40)); 2989 crc32x(crc, crc, tmp3); 2990 ldr(tmp1, Address(buf, 48)); 2991 crc32x(crc, crc, tmp0); 2992 ldr(tmp2, Address(buf, 56)); 2993 crc32x(crc, crc, tmp1); 2994 ldr(tmp3, Address(pre(buf, 64))); 2995 2996 b(CRC_by64_loop); 2997 2998 align(CodeEntryAlignment); 2999 BIND(CRC_by64_loop); 3000 subs(len, len, 64); 3001 crc32x(crc, crc, tmp2); 3002 ldr(tmp0, Address(buf, 8)); 3003 crc32x(crc, crc, tmp3); 3004 ldr(tmp1, Address(buf, 16)); 3005 crc32x(crc, crc, tmp0); 3006 ldr(tmp2, Address(buf, 24)); 3007 crc32x(crc, crc, tmp1); 3008 ldr(tmp3, Address(buf, 32)); 3009 crc32x(crc, crc, tmp2); 3010 ldr(tmp0, Address(buf, 40)); 3011 crc32x(crc, crc, tmp3); 3012 ldr(tmp1, Address(buf, 48)); 3013 crc32x(crc, crc, tmp0); 3014 ldr(tmp2, Address(buf, 56)); 3015 crc32x(crc, crc, tmp1); 3016 ldr(tmp3, Address(pre(buf, 64))); 3017 br(Assembler::GE, CRC_by64_loop); 3018 3019 // post-loop 3020 crc32x(crc, crc, tmp2); 3021 crc32x(crc, crc, tmp3); 3022 3023 sub(len, len, 64); 3024 add(buf, buf, 8); 3025 cmn(len, 128); 3026 br(Assembler::NE, CRC_less64); 3027 BIND(L_exit); 3028 ornw(crc, zr, crc); 3029 } 3030 3031 /** 3032 * @param crc register containing existing CRC (32-bit) 3033 * @param buf register pointing to input byte buffer (byte*) 3034 * @param len register containing number of bytes 3035 * @param table register that will contain address of CRC table 3036 * @param tmp scratch register 3037 */ 3038 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, 3039 Register table0, Register table1, Register table2, Register table3, 3040 Register tmp, Register tmp2, Register tmp3) { 3041 Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit; 3042 unsigned long offset; 3043 3044 if (UseCRC32) { 3045 kernel_crc32_using_crc32(crc, buf, len, table0, table1, table2, table3); 3046 return; 3047 } 3048 3049 ornw(crc, zr, crc); 3050 3051 adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset); 3052 if (offset) add(table0, table0, offset); 3053 add(table1, table0, 1*256*sizeof(juint)); 3054 add(table2, table0, 2*256*sizeof(juint)); 3055 add(table3, table0, 3*256*sizeof(juint)); 3056 3057 if (UseNeon) { 3058 cmp(len, 64); 3059 br(Assembler::LT, L_by16); 3060 eor(v16, T16B, v16, v16); 3061 3062 Label L_fold; 3063 3064 add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants 3065 3066 ld1(v0, v1, T2D, post(buf, 32)); 3067 ld1r(v4, T2D, post(tmp, 8)); 3068 ld1r(v5, T2D, post(tmp, 8)); 3069 ld1r(v6, T2D, post(tmp, 8)); |