< prev index next >

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Print this page
rev 51719 : [mq]: 8210676


1889   //             ignored
1890   //   is_oop  - true => oop array, so generate store check code
1891   //   name    - stub name string
1892   //
1893   // Inputs:
1894   //   c_rarg0   - source array address
1895   //   c_rarg1   - destination array address
1896   //   c_rarg2   - element count, treated as ssize_t, can be zero
1897   //
1898   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1899   // the hardware handle it.  The two dwords within qwords that span
1900   // cache line boundaries will still be loaded and stored atomicly.
1901   //
1902   address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1903                                          address *entry, const char *name,
1904                                          bool dest_uninitialized = false) {
1905     __ align(CodeEntryAlignment);
1906     StubCodeMark mark(this, "StubRoutines", name);
1907     address start = __ pc();
1908 
1909     Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1910     const Register from        = rdi;  // source array address
1911     const Register to          = rsi;  // destination array address
1912     const Register count       = rdx;  // elements count
1913     const Register dword_count = rcx;
1914     const Register qword_count = count;
1915 
1916     __ enter(); // required for proper stackwalking of RuntimeStub frame
1917     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1918 
1919     if (entry != NULL) {
1920       *entry = __ pc();
1921        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1922       BLOCK_COMMENT("Entry:");
1923     }
1924 
1925     array_overlap_test(nooverlap_target, Address::times_4);
1926     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1927                       // r9 and r10 may be used to save non-volatile registers
1928 
1929     DecoratorSet decorators = IN_HEAP | IS_ARRAY;


3831     const XMMRegister xmm_result2 = xmm7;
3832     const XMMRegister xmm_result3 = xmm8;
3833     const XMMRegister xmm_result4 = xmm9;
3834     const XMMRegister xmm_result5 = xmm10;
3835 
3836     const XMMRegister xmm_from0 = xmm11;
3837     const XMMRegister xmm_from1 = xmm12;
3838     const XMMRegister xmm_from2 = xmm13;
3839     const XMMRegister xmm_from3 = xmm14; //the last one is xmm14. we have to preserve it on WIN64.
3840     const XMMRegister xmm_from4 = xmm3; //reuse xmm3~4. Because xmm_key_tmp0~1 are useless when loading input text
3841     const XMMRegister xmm_from5 = xmm4;
3842 
3843     //for key_128, key_192, key_256
3844     const int rounds[3] = {10, 12, 14};
3845     Label L_exit_preLoop, L_preLoop_start;
3846     Label L_multiBlock_loopTop[3];
3847     Label L_singleBlockLoopTop[3];
3848     Label L__incCounter[3][6]; //for 6 blocks
3849     Label L__incCounter_single[3]; //for single block, key128, key192, key256
3850     Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3];
3851     Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3];
3852 
3853     Label L_exit;
3854 
3855     __ enter(); // required for proper stackwalking of RuntimeStub frame
3856 
3857     // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
3858     // context for the registers used, where all instructions below are using 128-bit mode
3859     // On EVEX without VL and BW, these instructions will all be AVX.
3860     if (VM_Version::supports_avx512vlbw()) {
3861         __ movl(rax, 0xffff);
3862         __ kmovql(k1, rax);
3863     }
3864 
3865 #ifdef _WIN64
3866     // allocate spill slots for r13, r14
3867     enum {
3868         saved_r13_offset,
3869         saved_r14_offset
3870     };
3871     __ subptr(rsp, 2 * wordSize);




1889   //             ignored
1890   //   is_oop  - true => oop array, so generate store check code
1891   //   name    - stub name string
1892   //
1893   // Inputs:
1894   //   c_rarg0   - source array address
1895   //   c_rarg1   - destination array address
1896   //   c_rarg2   - element count, treated as ssize_t, can be zero
1897   //
1898   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1899   // the hardware handle it.  The two dwords within qwords that span
1900   // cache line boundaries will still be loaded and stored atomicly.
1901   //
1902   address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1903                                          address *entry, const char *name,
1904                                          bool dest_uninitialized = false) {
1905     __ align(CodeEntryAlignment);
1906     StubCodeMark mark(this, "StubRoutines", name);
1907     address start = __ pc();
1908 
1909     Label L_copy_bytes, L_copy_8_bytes, L_exit;
1910     const Register from        = rdi;  // source array address
1911     const Register to          = rsi;  // destination array address
1912     const Register count       = rdx;  // elements count
1913     const Register dword_count = rcx;
1914     const Register qword_count = count;
1915 
1916     __ enter(); // required for proper stackwalking of RuntimeStub frame
1917     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1918 
1919     if (entry != NULL) {
1920       *entry = __ pc();
1921        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1922       BLOCK_COMMENT("Entry:");
1923     }
1924 
1925     array_overlap_test(nooverlap_target, Address::times_4);
1926     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1927                       // r9 and r10 may be used to save non-volatile registers
1928 
1929     DecoratorSet decorators = IN_HEAP | IS_ARRAY;


3831     const XMMRegister xmm_result2 = xmm7;
3832     const XMMRegister xmm_result3 = xmm8;
3833     const XMMRegister xmm_result4 = xmm9;
3834     const XMMRegister xmm_result5 = xmm10;
3835 
3836     const XMMRegister xmm_from0 = xmm11;
3837     const XMMRegister xmm_from1 = xmm12;
3838     const XMMRegister xmm_from2 = xmm13;
3839     const XMMRegister xmm_from3 = xmm14; //the last one is xmm14. we have to preserve it on WIN64.
3840     const XMMRegister xmm_from4 = xmm3; //reuse xmm3~4. Because xmm_key_tmp0~1 are useless when loading input text
3841     const XMMRegister xmm_from5 = xmm4;
3842 
3843     //for key_128, key_192, key_256
3844     const int rounds[3] = {10, 12, 14};
3845     Label L_exit_preLoop, L_preLoop_start;
3846     Label L_multiBlock_loopTop[3];
3847     Label L_singleBlockLoopTop[3];
3848     Label L__incCounter[3][6]; //for 6 blocks
3849     Label L__incCounter_single[3]; //for single block, key128, key192, key256
3850     Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3];
3851     Label L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3];
3852 
3853     Label L_exit;
3854 
3855     __ enter(); // required for proper stackwalking of RuntimeStub frame
3856 
3857     // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
3858     // context for the registers used, where all instructions below are using 128-bit mode
3859     // On EVEX without VL and BW, these instructions will all be AVX.
3860     if (VM_Version::supports_avx512vlbw()) {
3861         __ movl(rax, 0xffff);
3862         __ kmovql(k1, rax);
3863     }
3864 
3865 #ifdef _WIN64
3866     // allocate spill slots for r13, r14
3867     enum {
3868         saved_r13_offset,
3869         saved_r14_offset
3870     };
3871     __ subptr(rsp, 2 * wordSize);


< prev index next >