src/cpu/x86/vm/stubGenerator_x86_64.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File g1-bulk-zeroing-reduction Sdiff src/cpu/x86/vm

src/cpu/x86/vm/stubGenerator_x86_64.cpp

Print this page




1752 
1753   // Arguments:
1754   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1755   //             ignored
1756   //   is_oop  - true => oop array, so generate store check code
1757   //   name    - stub name string
1758   //
1759   // Inputs:
1760   //   c_rarg0   - source array address
1761   //   c_rarg1   - destination array address
1762   //   c_rarg2   - element count, treated as ssize_t, can be zero
1763   //
1764   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1765   // the hardware handle it.  The two dwords within qwords that span
1766   // cache line boundaries will still be loaded and stored atomicly.
1767   //
1768   // Side Effects:
1769   //   disjoint_int_copy_entry is set to the no-overlap entry point
1770   //   used by generate_conjoint_int_oop_copy().
1771   //
1772   address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, const char *name) {

1773     __ align(CodeEntryAlignment);
1774     StubCodeMark mark(this, "StubRoutines", name);
1775     address start = __ pc();
1776 
1777     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1778     const Register from        = rdi;  // source array address
1779     const Register to          = rsi;  // destination array address
1780     const Register count       = rdx;  // elements count
1781     const Register dword_count = rcx;
1782     const Register qword_count = count;
1783     const Register end_from    = from; // source array end address
1784     const Register end_to      = to;   // destination array end address
1785     const Register saved_to    = r11;  // saved destination array address
1786     // End pointers are inclusive, and if count is not zero they point
1787     // to the last unit copied:  end_to[0] := end_from[0]
1788 
1789     __ enter(); // required for proper stackwalking of RuntimeStub frame
1790     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1791 
1792     if (entry != NULL) {
1793       *entry = __ pc();
1794       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1795       BLOCK_COMMENT("Entry:");
1796     }
1797 
1798     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1799                       // r9 and r10 may be used to save non-volatile registers
1800     if (is_oop) {
1801       __ movq(saved_to, to);

1802       gen_write_ref_array_pre_barrier(to, count);
1803     }

1804 
1805     // 'from', 'to' and 'count' are now valid
1806     __ movptr(dword_count, count);
1807     __ shrptr(count, 1); // count => qword_count
1808 
1809     // Copy from low to high addresses.  Use 'to' as scratch.
1810     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1811     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1812     __ negptr(qword_count);
1813     __ jmp(L_copy_32_bytes);
1814 
1815     // Copy trailing qwords
1816   __ BIND(L_copy_8_bytes);
1817     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1818     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1819     __ increment(qword_count);
1820     __ jcc(Assembler::notZero, L_copy_8_bytes);
1821 
1822     // Check for and copy trailing dword
1823   __ BIND(L_copy_4_bytes);


1843 
1844     return start;
1845   }
1846 
1847   // Arguments:
1848   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1849   //             ignored
1850   //   is_oop  - true => oop array, so generate store check code
1851   //   name    - stub name string
1852   //
1853   // Inputs:
1854   //   c_rarg0   - source array address
1855   //   c_rarg1   - destination array address
1856   //   c_rarg2   - element count, treated as ssize_t, can be zero
1857   //
1858   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1859   // the hardware handle it.  The two dwords within qwords that span
1860   // cache line boundaries will still be loaded and stored atomicly.
1861   //
1862   address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1863                                          address *entry, const char *name) {

1864     __ align(CodeEntryAlignment);
1865     StubCodeMark mark(this, "StubRoutines", name);
1866     address start = __ pc();
1867 
1868     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1869     const Register from        = rdi;  // source array address
1870     const Register to          = rsi;  // destination array address
1871     const Register count       = rdx;  // elements count
1872     const Register dword_count = rcx;
1873     const Register qword_count = count;
1874 
1875     __ enter(); // required for proper stackwalking of RuntimeStub frame
1876     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1877 
1878     if (entry != NULL) {
1879       *entry = __ pc();
1880        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1881       BLOCK_COMMENT("Entry:");
1882     }
1883 
1884     array_overlap_test(nooverlap_target, Address::times_4);
1885     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1886                       // r9 and r10 may be used to save non-volatile registers
1887 
1888     if (is_oop) {
1889       // no registers are destroyed by this call
1890       gen_write_ref_array_pre_barrier(to, count);
1891     }
1892 
1893     assert_clean_int(count, rax); // Make sure 'count' is clean int.
1894     // 'from', 'to' and 'count' are now valid
1895     __ movptr(dword_count, count);
1896     __ shrptr(count, 1); // count => qword_count
1897 
1898     // Copy from high to low addresses.  Use 'to' as scratch.
1899 
1900     // Check for and copy trailing dword
1901     __ testl(dword_count, 1);
1902     __ jcc(Assembler::zero, L_copy_32_bytes);
1903     __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1904     __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1905     __ jmp(L_copy_32_bytes);
1906 
1907     // Copy trailing qwords
1908   __ BIND(L_copy_8_bytes);


1936     __ ret(0);
1937 
1938     return start;
1939   }
1940 
1941   // Arguments:
1942   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
1943   //             ignored
1944   //   is_oop  - true => oop array, so generate store check code
1945   //   name    - stub name string
1946   //
1947   // Inputs:
1948   //   c_rarg0   - source array address
1949   //   c_rarg1   - destination array address
1950   //   c_rarg2   - element count, treated as ssize_t, can be zero
1951   //
1952  // Side Effects:
1953   //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1954   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
1955   //
1956   address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, const char *name) {

1957     __ align(CodeEntryAlignment);
1958     StubCodeMark mark(this, "StubRoutines", name);
1959     address start = __ pc();
1960 
1961     Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
1962     const Register from        = rdi;  // source array address
1963     const Register to          = rsi;  // destination array address
1964     const Register qword_count = rdx;  // elements count
1965     const Register end_from    = from; // source array end address
1966     const Register end_to      = rcx;  // destination array end address
1967     const Register saved_to    = to;
1968     // End pointers are inclusive, and if count is not zero they point
1969     // to the last unit copied:  end_to[0] := end_from[0]
1970 
1971     __ enter(); // required for proper stackwalking of RuntimeStub frame
1972     // Save no-overlap entry point for generate_conjoint_long_oop_copy()
1973     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1974 
1975     if (entry != NULL) {
1976       *entry = __ pc();
1977       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1978       BLOCK_COMMENT("Entry:");
1979     }
1980 
1981     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1982                       // r9 and r10 may be used to save non-volatile registers
1983     // 'from', 'to' and 'qword_count' are now valid
1984     if (is_oop) {
1985       // no registers are destroyed by this call
1986       gen_write_ref_array_pre_barrier(to, qword_count);
1987     }
1988 
1989     // Copy from low to high addresses.  Use 'to' as scratch.
1990     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1991     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1992     __ negptr(qword_count);
1993     __ jmp(L_copy_32_bytes);
1994 
1995     // Copy trailing qwords
1996   __ BIND(L_copy_8_bytes);
1997     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1998     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1999     __ increment(qword_count);
2000     __ jcc(Assembler::notZero, L_copy_8_bytes);
2001 
2002     if (is_oop) {
2003       __ jmp(L_exit);
2004     } else {


2021     }
2022     restore_arg_regs();
2023     __ xorptr(rax, rax); // return 0
2024     __ leave(); // required for proper stackwalking of RuntimeStub frame
2025     __ ret(0);
2026 
2027     return start;
2028   }
2029 
2030   // Arguments:
2031   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2032   //             ignored
2033   //   is_oop  - true => oop array, so generate store check code
2034   //   name    - stub name string
2035   //
2036   // Inputs:
2037   //   c_rarg0   - source array address
2038   //   c_rarg1   - destination array address
2039   //   c_rarg2   - element count, treated as ssize_t, can be zero
2040   //
2041   address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
2042                                           address *entry, const char *name) {

2043     __ align(CodeEntryAlignment);
2044     StubCodeMark mark(this, "StubRoutines", name);
2045     address start = __ pc();
2046 
2047     Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
2048     const Register from        = rdi;  // source array address
2049     const Register to          = rsi;  // destination array address
2050     const Register qword_count = rdx;  // elements count
2051     const Register saved_count = rcx;
2052 
2053     __ enter(); // required for proper stackwalking of RuntimeStub frame
2054     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
2055 
2056     if (entry != NULL) {
2057       *entry = __ pc();
2058       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2059       BLOCK_COMMENT("Entry:");
2060     }
2061 
2062     array_overlap_test(nooverlap_target, Address::times_8);
2063     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2064                       // r9 and r10 may be used to save non-volatile registers
2065     // 'from', 'to' and 'qword_count' are now valid
2066     if (is_oop) {
2067       // Save to and count for store barrier
2068       __ movptr(saved_count, qword_count);

2069       // No registers are destroyed by this call
2070       gen_write_ref_array_pre_barrier(to, saved_count);
2071     }

2072 
2073     __ jmp(L_copy_32_bytes);
2074 
2075     // Copy trailing qwords
2076   __ BIND(L_copy_8_bytes);
2077     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2078     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2079     __ decrement(qword_count);
2080     __ jcc(Assembler::notZero, L_copy_8_bytes);
2081 
2082     if (is_oop) {
2083       __ jmp(L_exit);
2084     } else {
2085       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
2086       restore_arg_regs();
2087       __ xorptr(rax, rax); // return 0
2088       __ leave(); // required for proper stackwalking of RuntimeStub frame
2089       __ ret(0);
2090     }
2091 


2129     __ BIND(L_miss);
2130   }
2131 
2132   //
2133   //  Generate checkcasting array copy stub
2134   //
2135   //  Input:
2136   //    c_rarg0   - source array address
2137   //    c_rarg1   - destination array address
2138   //    c_rarg2   - element count, treated as ssize_t, can be zero
2139   //    c_rarg3   - size_t ckoff (super_check_offset)
2140   // not Win64
2141   //    c_rarg4   - oop ckval (super_klass)
2142   // Win64
2143   //    rsp+40    - oop ckval (super_klass)
2144   //
2145   //  Output:
2146   //    rax ==  0  -  success
2147   //    rax == -1^K - failure, where K is partial transfer count
2148   //
2149   address generate_checkcast_copy(const char *name, address *entry) {
2150 
2151     Label L_load_element, L_store_element, L_do_card_marks, L_done;
2152 
2153     // Input registers (after setup_arg_regs)
2154     const Register from        = rdi;   // source array address
2155     const Register to          = rsi;   // destination array address
2156     const Register length      = rdx;   // elements count
2157     const Register ckoff       = rcx;   // super_check_offset
2158     const Register ckval       = r8;    // super_klass
2159 
2160     // Registers used as temps (r13, r14 are save-on-entry)
2161     const Register end_from    = from;  // source array end address
2162     const Register end_to      = r13;   // destination array end address
2163     const Register count       = rdx;   // -(count_remaining)
2164     const Register r14_length  = r14;   // saved copy of length
2165     // End pointers are inclusive, and if length is not zero they point
2166     // to the last unit copied:  end_to[0] := end_from[0]
2167 
2168     const Register rax_oop    = rax;    // actual oop copied
2169     const Register r11_klass  = r11;    // oop._klass


2223     BLOCK_COMMENT("assert consistent ckoff/ckval");
2224     // The ckoff and ckval must be mutually consistent,
2225     // even though caller generates both.
2226     { Label L;
2227       int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2228                         Klass::super_check_offset_offset_in_bytes());
2229       __ cmpl(ckoff, Address(ckval, sco_offset));
2230       __ jcc(Assembler::equal, L);
2231       __ stop("super_check_offset inconsistent");
2232       __ bind(L);
2233     }
2234 #endif //ASSERT
2235 
2236     // Loop-invariant addresses.  They are exclusive end pointers.
2237     Address end_from_addr(from, length, TIMES_OOP, 0);
2238     Address   end_to_addr(to,   length, TIMES_OOP, 0);
2239     // Loop-variant addresses.  They assume post-incremented count < 0.
2240     Address from_element_addr(end_from, count, TIMES_OOP, 0);
2241     Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
2242 

2243     gen_write_ref_array_pre_barrier(to, count);

2244 
2245     // Copy from low to high addresses, indexed from the end of each array.
2246     __ lea(end_from, end_from_addr);
2247     __ lea(end_to,   end_to_addr);
2248     __ movptr(r14_length, length);        // save a copy of the length
2249     assert(length == count, "");          // else fix next line:
2250     __ negptr(count);                     // negate and test the length
2251     __ jcc(Assembler::notZero, L_load_element);
2252 
2253     // Empty array:  Nothing to do.
2254     __ xorptr(rax, rax);                  // return 0 on (trivial) success
2255     __ jmp(L_done);
2256 
2257     // ======== begin loop ========
2258     // (Loop is rotated; its entry is L_load_element.)
2259     // Loop control:
2260     //   for (count = -count; count != 0; count++)
2261     // Base pointers src, dst are biased by 8*(count-1),to last element.
2262     __ align(OptoLoopAlignment);
2263 


2733                                                                             "jshort_disjoint_arraycopy");
2734     StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
2735                                                                             "jshort_arraycopy");
2736 
2737     StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, &entry,
2738                                                                               "jint_disjoint_arraycopy");
2739     StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, entry,
2740                                                                               &entry_jint_arraycopy, "jint_arraycopy");
2741 
2742     StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, &entry,
2743                                                                                "jlong_disjoint_arraycopy");
2744     StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, entry,
2745                                                                                &entry_jlong_arraycopy, "jlong_arraycopy");
2746 
2747 
2748     if (UseCompressedOops) {
2749       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, &entry,
2750                                                                               "oop_disjoint_arraycopy");
2751       StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, entry,
2752                                                                               &entry_oop_arraycopy, "oop_arraycopy");




2753     } else {
2754       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, &entry,
2755                                                                                "oop_disjoint_arraycopy");
2756       StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, entry,
2757                                                                                &entry_oop_arraycopy, "oop_arraycopy");




2758     }
2759 
2760     StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);


2761     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
2762                                                               entry_jbyte_arraycopy,
2763                                                               entry_jshort_arraycopy,
2764                                                               entry_jint_arraycopy,
2765                                                               entry_jlong_arraycopy);
2766     StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
2767                                                                entry_jbyte_arraycopy,
2768                                                                entry_jshort_arraycopy,
2769                                                                entry_jint_arraycopy,
2770                                                                entry_oop_arraycopy,
2771                                                                entry_jlong_arraycopy,
2772                                                                entry_checkcast_arraycopy);
2773 
2774     StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
2775     StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
2776     StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
2777     StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
2778     StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
2779     StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
2780 
2781     // We don't generate specialized code for HeapWord-aligned source
2782     // arrays, so just use the code we've already generated
2783     StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
2784     StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
2785 
2786     StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
2787     StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
2788 
2789     StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
2790     StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
2791 
2792     StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
2793     StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
2794 
2795     StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
2796     StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;



2797   }
2798 
2799   void generate_math_stubs() {
2800     {
2801       StubCodeMark mark(this, "StubRoutines", "log");
2802       StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
2803 
2804       __ subq(rsp, 8);
2805       __ movdbl(Address(rsp, 0), xmm0);
2806       __ fld_d(Address(rsp, 0));
2807       __ flog();
2808       __ fstp_d(Address(rsp, 0));
2809       __ movdbl(xmm0, Address(rsp, 0));
2810       __ addq(rsp, 8);
2811       __ ret(0);
2812     }
2813     {
2814       StubCodeMark mark(this, "StubRoutines", "log10");
2815       StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
2816 




1752 
1753   // Arguments:
1754   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1755   //             ignored
1756   //   is_oop  - true => oop array, so generate store check code
1757   //   name    - stub name string
1758   //
1759   // Inputs:
1760   //   c_rarg0   - source array address
1761   //   c_rarg1   - destination array address
1762   //   c_rarg2   - element count, treated as ssize_t, can be zero
1763   //
1764   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1765   // the hardware handle it.  The two dwords within qwords that span
1766   // cache line boundaries will still be loaded and stored atomicly.
1767   //
1768   // Side Effects:
1769   //   disjoint_int_copy_entry is set to the no-overlap entry point
1770   //   used by generate_conjoint_int_oop_copy().
1771   //
1772   address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
1773                                          const char *name, bool need_pre_barrier = true) {
1774     __ align(CodeEntryAlignment);
1775     StubCodeMark mark(this, "StubRoutines", name);
1776     address start = __ pc();
1777 
1778     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1779     const Register from        = rdi;  // source array address
1780     const Register to          = rsi;  // destination array address
1781     const Register count       = rdx;  // elements count
1782     const Register dword_count = rcx;
1783     const Register qword_count = count;
1784     const Register end_from    = from; // source array end address
1785     const Register end_to      = to;   // destination array end address
1786     const Register saved_to    = r11;  // saved destination array address
1787     // End pointers are inclusive, and if count is not zero they point
1788     // to the last unit copied:  end_to[0] := end_from[0]
1789 
1790     __ enter(); // required for proper stackwalking of RuntimeStub frame
1791     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1792 
1793     if (entry != NULL) {
1794       *entry = __ pc();
1795       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1796       BLOCK_COMMENT("Entry:");
1797     }
1798 
1799     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1800                       // r9 and r10 may be used to save non-volatile registers
1801     if (is_oop) {
1802       __ movq(saved_to, to);
1803       if (need_pre_barrier) {
1804         gen_write_ref_array_pre_barrier(to, count);
1805       }
1806     }
1807 
1808     // 'from', 'to' and 'count' are now valid
1809     __ movptr(dword_count, count);
1810     __ shrptr(count, 1); // count => qword_count
1811 
1812     // Copy from low to high addresses.  Use 'to' as scratch.
1813     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1814     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1815     __ negptr(qword_count);
1816     __ jmp(L_copy_32_bytes);
1817 
1818     // Copy trailing qwords
1819   __ BIND(L_copy_8_bytes);
1820     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1821     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1822     __ increment(qword_count);
1823     __ jcc(Assembler::notZero, L_copy_8_bytes);
1824 
1825     // Check for and copy trailing dword
1826   __ BIND(L_copy_4_bytes);


1846 
1847     return start;
1848   }
1849 
1850   // Arguments:
1851   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1852   //             ignored
1853   //   is_oop  - true => oop array, so generate store check code
1854   //   name    - stub name string
1855   //
1856   // Inputs:
1857   //   c_rarg0   - source array address
1858   //   c_rarg1   - destination array address
1859   //   c_rarg2   - element count, treated as ssize_t, can be zero
1860   //
1861   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1862   // the hardware handle it.  The two dwords within qwords that span
1863   // cache line boundaries will still be loaded and stored atomicly.
1864   //
1865   address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1866                                          address *entry, const char *name,
1867                                          bool need_pre_barrier = true) {
1868     __ align(CodeEntryAlignment);
1869     StubCodeMark mark(this, "StubRoutines", name);
1870     address start = __ pc();
1871 
1872     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1873     const Register from        = rdi;  // source array address
1874     const Register to          = rsi;  // destination array address
1875     const Register count       = rdx;  // elements count
1876     const Register dword_count = rcx;
1877     const Register qword_count = count;
1878 
1879     __ enter(); // required for proper stackwalking of RuntimeStub frame
1880     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1881 
1882     if (entry != NULL) {
1883       *entry = __ pc();
1884        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1885       BLOCK_COMMENT("Entry:");
1886     }
1887 
1888     array_overlap_test(nooverlap_target, Address::times_4);
1889     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1890                       // r9 and r10 may be used to save non-volatile registers
1891 
1892     if (is_oop && need_pre_barrier) {
1893       // no registers are destroyed by this call
1894       gen_write_ref_array_pre_barrier(to, count);
1895     }
1896 
1897     assert_clean_int(count, rax); // Make sure 'count' is clean int.
1898     // 'from', 'to' and 'count' are now valid
1899     __ movptr(dword_count, count);
1900     __ shrptr(count, 1); // count => qword_count
1901 
1902     // Copy from high to low addresses.  Use 'to' as scratch.
1903 
1904     // Check for and copy trailing dword
1905     __ testl(dword_count, 1);
1906     __ jcc(Assembler::zero, L_copy_32_bytes);
1907     __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1908     __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1909     __ jmp(L_copy_32_bytes);
1910 
1911     // Copy trailing qwords
1912   __ BIND(L_copy_8_bytes);


1940     __ ret(0);
1941 
1942     return start;
1943   }
1944 
1945   // Arguments:
1946   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
1947   //             ignored
1948   //   is_oop  - true => oop array, so generate store check code
1949   //   name    - stub name string
1950   //
1951   // Inputs:
1952   //   c_rarg0   - source array address
1953   //   c_rarg1   - destination array address
1954   //   c_rarg2   - element count, treated as ssize_t, can be zero
1955   //
1956  // Side Effects:
1957   //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1958   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
1959   //
1960   address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
1961                                           const char *name, bool need_pre_barrier = true) {
1962     __ align(CodeEntryAlignment);
1963     StubCodeMark mark(this, "StubRoutines", name);
1964     address start = __ pc();
1965 
1966     Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
1967     const Register from        = rdi;  // source array address
1968     const Register to          = rsi;  // destination array address
1969     const Register qword_count = rdx;  // elements count
1970     const Register end_from    = from; // source array end address
1971     const Register end_to      = rcx;  // destination array end address
1972     const Register saved_to    = to;
1973     // End pointers are inclusive, and if count is not zero they point
1974     // to the last unit copied:  end_to[0] := end_from[0]
1975 
1976     __ enter(); // required for proper stackwalking of RuntimeStub frame
1977     // Save no-overlap entry point for generate_conjoint_long_oop_copy()
1978     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1979 
1980     if (entry != NULL) {
1981       *entry = __ pc();
1982       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1983       BLOCK_COMMENT("Entry:");
1984     }
1985 
1986     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1987                       // r9 and r10 may be used to save non-volatile registers
1988     // 'from', 'to' and 'qword_count' are now valid
1989     if (is_oop && need_pre_barrier) {
1990       // no registers are destroyed by this call
1991       gen_write_ref_array_pre_barrier(to, qword_count);
1992     }
1993 
1994     // Copy from low to high addresses.  Use 'to' as scratch.
1995     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1996     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1997     __ negptr(qword_count);
1998     __ jmp(L_copy_32_bytes);
1999 
2000     // Copy trailing qwords
2001   __ BIND(L_copy_8_bytes);
2002     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2003     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2004     __ increment(qword_count);
2005     __ jcc(Assembler::notZero, L_copy_8_bytes);
2006 
2007     if (is_oop) {
2008       __ jmp(L_exit);
2009     } else {


2026     }
2027     restore_arg_regs();
2028     __ xorptr(rax, rax); // return 0
2029     __ leave(); // required for proper stackwalking of RuntimeStub frame
2030     __ ret(0);
2031 
2032     return start;
2033   }
2034 
2035   // Arguments:
2036   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2037   //             ignored
2038   //   is_oop  - true => oop array, so generate store check code
2039   //   name    - stub name string
2040   //
2041   // Inputs:
2042   //   c_rarg0   - source array address
2043   //   c_rarg1   - destination array address
2044   //   c_rarg2   - element count, treated as ssize_t, can be zero
2045   //
2046   address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
2047                                           address nooverlap_target, address *entry,
2048                                           const char *name, bool need_pre_barrier = true) {
2049     __ align(CodeEntryAlignment);
2050     StubCodeMark mark(this, "StubRoutines", name);
2051     address start = __ pc();
2052 
2053     Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
2054     const Register from        = rdi;  // source array address
2055     const Register to          = rsi;  // destination array address
2056     const Register qword_count = rdx;  // elements count
2057     const Register saved_count = rcx;
2058 
2059     __ enter(); // required for proper stackwalking of RuntimeStub frame
2060     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
2061 
2062     if (entry != NULL) {
2063       *entry = __ pc();
2064       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2065       BLOCK_COMMENT("Entry:");
2066     }
2067 
2068     array_overlap_test(nooverlap_target, Address::times_8);
2069     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2070                       // r9 and r10 may be used to save non-volatile registers
2071     // 'from', 'to' and 'qword_count' are now valid
2072     if (is_oop) {
2073       // Save to and count for store barrier
2074       __ movptr(saved_count, qword_count);
2075       if (need_pre_barrier) {
2076         // No registers are destroyed by this call
2077         gen_write_ref_array_pre_barrier(to, saved_count);
2078       }
2079     }
2080 
2081     __ jmp(L_copy_32_bytes);
2082 
2083     // Copy trailing qwords
2084   __ BIND(L_copy_8_bytes);
2085     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2086     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2087     __ decrement(qword_count);
2088     __ jcc(Assembler::notZero, L_copy_8_bytes);
2089 
2090     if (is_oop) {
2091       __ jmp(L_exit);
2092     } else {
2093       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
2094       restore_arg_regs();
2095       __ xorptr(rax, rax); // return 0
2096       __ leave(); // required for proper stackwalking of RuntimeStub frame
2097       __ ret(0);
2098     }
2099 


2137     __ BIND(L_miss);
2138   }
2139 
2140   //
2141   //  Generate checkcasting array copy stub
2142   //
2143   //  Input:
2144   //    c_rarg0   - source array address
2145   //    c_rarg1   - destination array address
2146   //    c_rarg2   - element count, treated as ssize_t, can be zero
2147   //    c_rarg3   - size_t ckoff (super_check_offset)
2148   // not Win64
2149   //    c_rarg4   - oop ckval (super_klass)
2150   // Win64
2151   //    rsp+40    - oop ckval (super_klass)
2152   //
2153   //  Output:
2154   //    rax ==  0  -  success
2155   //    rax == -1^K - failure, where K is partial transfer count
2156   //
2157   address generate_checkcast_copy(const char *name, address *entry, bool need_pre_barrier = true) {
2158 
2159     Label L_load_element, L_store_element, L_do_card_marks, L_done;
2160 
2161     // Input registers (after setup_arg_regs)
2162     const Register from        = rdi;   // source array address
2163     const Register to          = rsi;   // destination array address
2164     const Register length      = rdx;   // elements count
2165     const Register ckoff       = rcx;   // super_check_offset
2166     const Register ckval       = r8;    // super_klass
2167 
2168     // Registers used as temps (r13, r14 are save-on-entry)
2169     const Register end_from    = from;  // source array end address
2170     const Register end_to      = r13;   // destination array end address
2171     const Register count       = rdx;   // -(count_remaining)
2172     const Register r14_length  = r14;   // saved copy of length
2173     // End pointers are inclusive, and if length is not zero they point
2174     // to the last unit copied:  end_to[0] := end_from[0]
2175 
2176     const Register rax_oop    = rax;    // actual oop copied
2177     const Register r11_klass  = r11;    // oop._klass


2231     BLOCK_COMMENT("assert consistent ckoff/ckval");
2232     // The ckoff and ckval must be mutually consistent,
2233     // even though caller generates both.
2234     { Label L;
2235       int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2236                         Klass::super_check_offset_offset_in_bytes());
2237       __ cmpl(ckoff, Address(ckval, sco_offset));
2238       __ jcc(Assembler::equal, L);
2239       __ stop("super_check_offset inconsistent");
2240       __ bind(L);
2241     }
2242 #endif //ASSERT
2243 
2244     // Loop-invariant addresses.  They are exclusive end pointers.
2245     Address end_from_addr(from, length, TIMES_OOP, 0);
2246     Address   end_to_addr(to,   length, TIMES_OOP, 0);
2247     // Loop-variant addresses.  They assume post-incremented count < 0.
2248     Address from_element_addr(end_from, count, TIMES_OOP, 0);
2249     Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
2250 
2251     if (need_pre_barrier) {
2252       gen_write_ref_array_pre_barrier(to, count);
2253     }
2254 
2255     // Copy from low to high addresses, indexed from the end of each array.
2256     __ lea(end_from, end_from_addr);
2257     __ lea(end_to,   end_to_addr);
2258     __ movptr(r14_length, length);        // save a copy of the length
2259     assert(length == count, "");          // else fix next line:
2260     __ negptr(count);                     // negate and test the length
2261     __ jcc(Assembler::notZero, L_load_element);
2262 
2263     // Empty array:  Nothing to do.
2264     __ xorptr(rax, rax);                  // return 0 on (trivial) success
2265     __ jmp(L_done);
2266 
2267     // ======== begin loop ========
2268     // (Loop is rotated; its entry is L_load_element.)
2269     // Loop control:
2270     //   for (count = -count; count != 0; count++)
2271     // Base pointers src, dst are biased by 8*(count-1),to last element.
2272     __ align(OptoLoopAlignment);
2273 


2743                                                                             "jshort_disjoint_arraycopy");
2744     StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
2745                                                                             "jshort_arraycopy");
2746 
2747     StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, &entry,
2748                                                                               "jint_disjoint_arraycopy");
2749     StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, entry,
2750                                                                               &entry_jint_arraycopy, "jint_arraycopy");
2751 
2752     StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, &entry,
2753                                                                                "jlong_disjoint_arraycopy");
2754     StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, entry,
2755                                                                                &entry_jlong_arraycopy, "jlong_arraycopy");
2756 
2757 
2758     if (UseCompressedOops) {
2759       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, &entry,
2760                                                                               "oop_disjoint_arraycopy");
2761       StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, entry,
2762                                                                               &entry_oop_arraycopy, "oop_arraycopy");
2763       StubRoutines::_oop_disjoint_arraycopy_no_pre  = generate_disjoint_int_oop_copy(false, true, &entry,
2764                                                                                      "oop_disjoint_arraycopy_no_pre", false);
2765       StubRoutines::_oop_arraycopy_no_pre           = generate_conjoint_int_oop_copy(false, true, entry,
2766                                                                                      NULL, "oop_arraycopy_no_pre", false);
2767     } else {
2768       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, &entry,
2769                                                                                "oop_disjoint_arraycopy");
2770       StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, entry,
2771                                                                                &entry_oop_arraycopy, "oop_arraycopy");
2772       StubRoutines::_oop_disjoint_arraycopy_no_pre  = generate_disjoint_long_oop_copy(false, true, &entry,
2773                                                                                       "oop_disjoint_arraycopy_no_pre", false);
2774       StubRoutines::_oop_arraycopy_no_pre           = generate_conjoint_long_oop_copy(false, true, entry,
2775                                                                                       NULL, "oop_arraycopy_no_pre", false);
2776     }
2777 
2778     StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
2779     StubRoutines::_checkcast_arraycopy_no_pre = generate_checkcast_copy("checkcast_arraycopy_no_pre", NULL, false);
2780 
2781     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
2782                                                               entry_jbyte_arraycopy,
2783                                                               entry_jshort_arraycopy,
2784                                                               entry_jint_arraycopy,
2785                                                               entry_jlong_arraycopy);
2786     StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
2787                                                                entry_jbyte_arraycopy,
2788                                                                entry_jshort_arraycopy,
2789                                                                entry_jint_arraycopy,
2790                                                                entry_oop_arraycopy,
2791                                                                entry_jlong_arraycopy,
2792                                                                entry_checkcast_arraycopy);
2793 
2794     StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
2795     StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
2796     StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
2797     StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
2798     StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
2799     StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
2800 
2801     // We don't generate specialized code for HeapWord-aligned source
2802     // arrays, so just use the code we've already generated
2803     StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
2804     StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
2805 
2806     StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
2807     StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
2808 
2809     StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
2810     StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
2811 
2812     StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
2813     StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
2814 
2815     StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
2816     StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
2817 
2818     StubRoutines::_arrayof_oop_disjoint_arraycopy_no_pre    = StubRoutines::_oop_disjoint_arraycopy_no_pre;
2819     StubRoutines::_arrayof_oop_arraycopy_no_pre             = StubRoutines::_oop_arraycopy_no_pre;
2820   }
2821 
2822   void generate_math_stubs() {
2823     {
2824       StubCodeMark mark(this, "StubRoutines", "log");
2825       StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
2826 
2827       __ subq(rsp, 8);
2828       __ movdbl(Address(rsp, 0), xmm0);
2829       __ fld_d(Address(rsp, 0));
2830       __ flog();
2831       __ fstp_d(Address(rsp, 0));
2832       __ movdbl(xmm0, Address(rsp, 0));
2833       __ addq(rsp, 8);
2834       __ ret(0);
2835     }
2836     {
2837       StubCodeMark mark(this, "StubRoutines", "log10");
2838       StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
2839 


src/cpu/x86/vm/stubGenerator_x86_64.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File