926 Register reg_h, 927 int iter); 928 void sha256_AVX2_four_rounds_compute_first(int start); 929 void sha256_AVX2_four_rounds_compute_last(int start); 930 void sha256_AVX2_one_round_and_sched( 931 XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */ 932 XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */ 933 XMMRegister xmm_2, /* ymm6 */ 934 XMMRegister xmm_3, /* ymm7 */ 935 Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */ 936 Register reg_b, /* ebx */ /* full cycle is 8 iterations */ 937 Register reg_c, /* edi */ 938 Register reg_d, /* esi */ 939 Register reg_e, /* r8d */ 940 Register reg_f, /* r9d */ 941 Register reg_g, /* r10d */ 942 Register reg_h, /* r11d */ 943 int iter); 944 945 void addm(int disp, Register r1, Register r2); 946 947 public: 948 void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 949 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 950 Register buf, Register state, Register ofs, Register limit, Register rsp, 951 bool multi_block, XMMRegister shuf_mask); 952 #endif 953 954 #ifdef _LP64 955 private: 956 void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d, 957 Register e, Register f, Register g, Register h, int iteration); 958 959 void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 960 Register a, Register b, Register c, Register d, Register e, Register f, 961 Register g, Register h, int iteration); 962 963 void addmq(int disp, Register r1, Register r2); 964 public: 965 void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 966 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 967 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, 968 XMMRegister shuf_mask); 969 #endif 970 971 void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, 1481 } 1482 void vextractf64x4_low(XMMRegister dst, XMMRegister src) { 1483 Assembler::vextractf64x4(dst, src, 0); 1484 } 1485 void vextractf64x4_low(Address dst, XMMRegister src) { 1486 Assembler::vextractf64x4(dst, src, 0); 1487 } 1488 void vinsertf64x4_low(XMMRegister dst, Address src) { 1489 Assembler::vinsertf64x4(dst, dst, src, 0); 1490 } 1491 1492 // Carry-Less Multiplication Quadword 1493 void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 1494 // 0x00 - multiply lower 64 bits [0:63] 1495 Assembler::vpclmulqdq(dst, nds, src, 0x00); 1496 } 1497 void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 1498 // 0x11 - multiply upper 64 bits [64:127] 1499 Assembler::vpclmulqdq(dst, nds, src, 0x11); 1500 } 1501 void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 1502 // 0x00 - multiply lower 64 bits [0:63] 1503 Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len); 1504 } 1505 void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 1506 // 0x11 - multiply upper 64 bits [64:127] 1507 Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len); 1508 } 1509 1510 // Data 1511 1512 void cmov32( Condition cc, Register dst, Address src); 1513 void cmov32( Condition cc, Register dst, Register src); 1514 1515 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } 1516 1517 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } 1518 void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } 1519 1520 void movoop(Register dst, jobject obj); | 926 Register reg_h, 927 int iter); 928 void sha256_AVX2_four_rounds_compute_first(int start); 929 void sha256_AVX2_four_rounds_compute_last(int start); 930 void sha256_AVX2_one_round_and_sched( 931 XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */ 932 XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */ 933 XMMRegister xmm_2, /* ymm6 */ 934 XMMRegister xmm_3, /* ymm7 */ 935 Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */ 936 Register reg_b, /* ebx */ /* full cycle is 8 iterations */ 937 Register reg_c, /* edi */ 938 Register reg_d, /* esi */ 939 Register reg_e, /* r8d */ 940 Register reg_f, /* r9d */ 941 Register reg_g, /* r10d */ 942 Register reg_h, /* r11d */ 943 int iter); 944 945 void addm(int disp, Register r1, Register r2); 946 void gfmul(XMMRegister tmp0, XMMRegister t); 947 void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0, 948 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3); 949 void generateHtbl(Register htbl); 950 951 public: 952 void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 953 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 954 Register buf, Register state, Register ofs, Register limit, Register rsp, 955 bool multi_block, XMMRegister shuf_mask); 956 void avx_ghash(Register state, Register htbl, Register data, Register blocks); 957 #endif 958 959 #ifdef _LP64 960 private: 961 void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d, 962 Register e, Register f, Register g, Register h, int iteration); 963 964 void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, 965 Register a, Register b, Register c, Register d, Register e, Register f, 966 Register g, Register h, int iteration); 967 968 void addmq(int disp, Register r1, Register r2); 969 public: 970 void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 971 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 972 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, 973 XMMRegister shuf_mask); 974 #endif 975 976 void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, 1486 } 1487 void vextractf64x4_low(XMMRegister dst, XMMRegister src) { 1488 Assembler::vextractf64x4(dst, src, 0); 1489 } 1490 void vextractf64x4_low(Address dst, XMMRegister src) { 1491 Assembler::vextractf64x4(dst, src, 0); 1492 } 1493 void vinsertf64x4_low(XMMRegister dst, Address src) { 1494 Assembler::vinsertf64x4(dst, dst, src, 0); 1495 } 1496 1497 // Carry-Less Multiplication Quadword 1498 void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 1499 // 0x00 - multiply lower 64 bits [0:63] 1500 Assembler::vpclmulqdq(dst, nds, src, 0x00); 1501 } 1502 void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 1503 // 0x11 - multiply upper 64 bits [64:127] 1504 Assembler::vpclmulqdq(dst, nds, src, 0x11); 1505 } 1506 void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 1507 // 0x10 - multiply nds[0:63] and src[64:127] 1508 Assembler::vpclmulqdq(dst, nds, src, 0x10); 1509 } 1510 void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { 1511 //0x01 - multiply nds[64:127] and src[0:63] 1512 Assembler::vpclmulqdq(dst, nds, src, 0x01); 1513 } 1514 1515 void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 1516 // 0x00 - multiply lower 64 bits [0:63] 1517 Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len); 1518 } 1519 void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 1520 // 0x11 - multiply upper 64 bits [64:127] 1521 Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len); 1522 } 1523 1524 // Data 1525 1526 void cmov32( Condition cc, Register dst, Address src); 1527 void cmov32( Condition cc, Register dst, Register src); 1528 1529 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } 1530 1531 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } 1532 void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } 1533 1534 void movoop(Register dst, jobject obj); |