926 Register reg_h,
927 int iter);
928 void sha256_AVX2_four_rounds_compute_first(int start);
929 void sha256_AVX2_four_rounds_compute_last(int start);
930 void sha256_AVX2_one_round_and_sched(
931 XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */
932 XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */
933 XMMRegister xmm_2, /* ymm6 */
934 XMMRegister xmm_3, /* ymm7 */
935 Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */
936 Register reg_b, /* ebx */ /* full cycle is 8 iterations */
937 Register reg_c, /* edi */
938 Register reg_d, /* esi */
939 Register reg_e, /* r8d */
940 Register reg_f, /* r9d */
941 Register reg_g, /* r10d */
942 Register reg_h, /* r11d */
943 int iter);
944
945 void addm(int disp, Register r1, Register r2);
946
947 public:
948 void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
949 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
950 Register buf, Register state, Register ofs, Register limit, Register rsp,
951 bool multi_block, XMMRegister shuf_mask);
952 #endif
953
954 #ifdef _LP64
955 private:
956 void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d,
957 Register e, Register f, Register g, Register h, int iteration);
958
959 void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
960 Register a, Register b, Register c, Register d, Register e, Register f,
961 Register g, Register h, int iteration);
962
963 void addmq(int disp, Register r1, Register r2);
964 public:
965 void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
966 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
967 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block,
968 XMMRegister shuf_mask);
969 #endif
970
971 void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
1481 }
1482 void vextractf64x4_low(XMMRegister dst, XMMRegister src) {
1483 Assembler::vextractf64x4(dst, src, 0);
1484 }
1485 void vextractf64x4_low(Address dst, XMMRegister src) {
1486 Assembler::vextractf64x4(dst, src, 0);
1487 }
1488 void vinsertf64x4_low(XMMRegister dst, Address src) {
1489 Assembler::vinsertf64x4(dst, dst, src, 0);
1490 }
1491
1492 // Carry-Less Multiplication Quadword
1493 void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1494 // 0x00 - multiply lower 64 bits [0:63]
1495 Assembler::vpclmulqdq(dst, nds, src, 0x00);
1496 }
1497 void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1498 // 0x11 - multiply upper 64 bits [64:127]
1499 Assembler::vpclmulqdq(dst, nds, src, 0x11);
1500 }
1501 void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1502 // 0x00 - multiply lower 64 bits [0:63]
1503 Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len);
1504 }
1505 void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1506 // 0x11 - multiply upper 64 bits [64:127]
1507 Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len);
1508 }
1509
1510 // Data
1511
1512 void cmov32( Condition cc, Register dst, Address src);
1513 void cmov32( Condition cc, Register dst, Register src);
1514
1515 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
1516
1517 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
1518 void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
1519
1520 void movoop(Register dst, jobject obj);
|
926 Register reg_h,
927 int iter);
928 void sha256_AVX2_four_rounds_compute_first(int start);
929 void sha256_AVX2_four_rounds_compute_last(int start);
930 void sha256_AVX2_one_round_and_sched(
931 XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */
932 XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */
933 XMMRegister xmm_2, /* ymm6 */
934 XMMRegister xmm_3, /* ymm7 */
935 Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */
936 Register reg_b, /* ebx */ /* full cycle is 8 iterations */
937 Register reg_c, /* edi */
938 Register reg_d, /* esi */
939 Register reg_e, /* r8d */
940 Register reg_f, /* r9d */
941 Register reg_g, /* r10d */
942 Register reg_h, /* r11d */
943 int iter);
944
945 void addm(int disp, Register r1, Register r2);
946 void gfmul(XMMRegister tmp0, XMMRegister t);
947 void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0,
948 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3);
949 void generateHtbl_one_block(Register htbl);
950 void generateHtbl_eight_blocks(Register htbl);
951 public:
952 void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
953 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
954 Register buf, Register state, Register ofs, Register limit, Register rsp,
955 bool multi_block, XMMRegister shuf_mask);
956 void avx_ghash(Register state, Register htbl, Register data, Register blocks);
957 #endif
958
959 #ifdef _LP64
960 private:
961 void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d,
962 Register e, Register f, Register g, Register h, int iteration);
963
964 void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
965 Register a, Register b, Register c, Register d, Register e, Register f,
966 Register g, Register h, int iteration);
967
968 void addmq(int disp, Register r1, Register r2);
969 public:
970 void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
971 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
972 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block,
973 XMMRegister shuf_mask);
974 #endif
975
976 void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
1486 }
1487 void vextractf64x4_low(XMMRegister dst, XMMRegister src) {
1488 Assembler::vextractf64x4(dst, src, 0);
1489 }
1490 void vextractf64x4_low(Address dst, XMMRegister src) {
1491 Assembler::vextractf64x4(dst, src, 0);
1492 }
1493 void vinsertf64x4_low(XMMRegister dst, Address src) {
1494 Assembler::vinsertf64x4(dst, dst, src, 0);
1495 }
1496
1497 // Carry-Less Multiplication Quadword
1498 void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1499 // 0x00 - multiply lower 64 bits [0:63]
1500 Assembler::vpclmulqdq(dst, nds, src, 0x00);
1501 }
1502 void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1503 // 0x11 - multiply upper 64 bits [64:127]
1504 Assembler::vpclmulqdq(dst, nds, src, 0x11);
1505 }
1506 void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1507 // 0x10 - multiply nds[0:63] and src[64:127]
1508 Assembler::vpclmulqdq(dst, nds, src, 0x10);
1509 }
1510 void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1511 //0x01 - multiply nds[64:127] and src[0:63]
1512 Assembler::vpclmulqdq(dst, nds, src, 0x01);
1513 }
1514
1515 void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1516 // 0x00 - multiply lower 64 bits [0:63]
1517 Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len);
1518 }
1519 void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1520 // 0x11 - multiply upper 64 bits [64:127]
1521 Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len);
1522 }
1523
1524 // Data
1525
1526 void cmov32( Condition cc, Register dst, Address src);
1527 void cmov32( Condition cc, Register dst, Register src);
1528
1529 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
1530
1531 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
1532 void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
1533
1534 void movoop(Register dst, jobject obj);
|