1143 __ mov(rcx, rax); // c_rarg3 (via rax)
1144 #else
1145 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1146 "unexpected argument registers");
1147 #endif
1148 }
1149
1150 void restore_arg_regs() {
1151 const Register saved_rdi = r9;
1152 const Register saved_rsi = r10;
1153 #ifdef _WIN64
1154 __ movptr(rdi, saved_rdi);
1155 __ movptr(rsi, saved_rsi);
1156 #endif
1157 }
1158
1159 // Generate code for an array write pre barrier
1160 //
1161 // addr - starting address
1162 // count - element count
1163 //
1164 // Destroy no registers!
1165 //
1166 void gen_write_ref_array_pre_barrier(Register addr, Register count) {
1167 BarrierSet* bs = Universe::heap()->barrier_set();
1168 switch (bs->kind()) {
1169 case BarrierSet::G1SATBCT:
1170 case BarrierSet::G1SATBCTLogging:
1171 {
1172 __ pusha(); // push registers
1173 if (count == c_rarg0) {
1174 if (addr == c_rarg1) {
1175 // exactly backwards!!
1176 __ xchgptr(c_rarg1, c_rarg0);
1177 } else {
1178 __ movptr(c_rarg1, count);
1179 __ movptr(c_rarg0, addr);
1180 }
1181
1182 } else {
1183 __ movptr(c_rarg0, addr);
1184 __ movptr(c_rarg1, count);
1185 }
1186 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
1187 __ popa();
1188 }
1189 break;
1190 case BarrierSet::CardTableModRef:
1191 case BarrierSet::CardTableExtension:
1192 case BarrierSet::ModRef:
1193 break;
1194 default:
1195 ShouldNotReachHere();
1196
1197 }
1198 }
1199
1200 //
1201 // Generate code for an array write post barrier
1752
1753 // Arguments:
1754 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1755 // ignored
1756 // is_oop - true => oop array, so generate store check code
1757 // name - stub name string
1758 //
1759 // Inputs:
1760 // c_rarg0 - source array address
1761 // c_rarg1 - destination array address
1762 // c_rarg2 - element count, treated as ssize_t, can be zero
1763 //
1764 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1765 // the hardware handle it. The two dwords within qwords that span
1766 // cache line boundaries will still be loaded and stored atomicly.
1767 //
1768 // Side Effects:
1769 // disjoint_int_copy_entry is set to the no-overlap entry point
1770 // used by generate_conjoint_int_oop_copy().
1771 //
1772 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, const char *name) {
1773 __ align(CodeEntryAlignment);
1774 StubCodeMark mark(this, "StubRoutines", name);
1775 address start = __ pc();
1776
1777 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1778 const Register from = rdi; // source array address
1779 const Register to = rsi; // destination array address
1780 const Register count = rdx; // elements count
1781 const Register dword_count = rcx;
1782 const Register qword_count = count;
1783 const Register end_from = from; // source array end address
1784 const Register end_to = to; // destination array end address
1785 const Register saved_to = r11; // saved destination array address
1786 // End pointers are inclusive, and if count is not zero they point
1787 // to the last unit copied: end_to[0] := end_from[0]
1788
1789 __ enter(); // required for proper stackwalking of RuntimeStub frame
1790 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1791
1792 if (entry != NULL) {
1793 *entry = __ pc();
1794 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1795 BLOCK_COMMENT("Entry:");
1796 }
1797
1798 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1799 // r9 and r10 may be used to save non-volatile registers
1800 if (is_oop) {
1801 __ movq(saved_to, to);
1802 gen_write_ref_array_pre_barrier(to, count);
1803 }
1804
1805 // 'from', 'to' and 'count' are now valid
1806 __ movptr(dword_count, count);
1807 __ shrptr(count, 1); // count => qword_count
1808
1809 // Copy from low to high addresses. Use 'to' as scratch.
1810 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1811 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
1812 __ negptr(qword_count);
1813 __ jmp(L_copy_32_bytes);
1814
1815 // Copy trailing qwords
1816 __ BIND(L_copy_8_bytes);
1817 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1818 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1819 __ increment(qword_count);
1820 __ jcc(Assembler::notZero, L_copy_8_bytes);
1821
1822 // Check for and copy trailing dword
1843
1844 return start;
1845 }
1846
1847 // Arguments:
1848 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1849 // ignored
1850 // is_oop - true => oop array, so generate store check code
1851 // name - stub name string
1852 //
1853 // Inputs:
1854 // c_rarg0 - source array address
1855 // c_rarg1 - destination array address
1856 // c_rarg2 - element count, treated as ssize_t, can be zero
1857 //
1858 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1859 // the hardware handle it. The two dwords within qwords that span
1860 // cache line boundaries will still be loaded and stored atomicly.
1861 //
1862 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1863 address *entry, const char *name) {
1864 __ align(CodeEntryAlignment);
1865 StubCodeMark mark(this, "StubRoutines", name);
1866 address start = __ pc();
1867
1868 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1869 const Register from = rdi; // source array address
1870 const Register to = rsi; // destination array address
1871 const Register count = rdx; // elements count
1872 const Register dword_count = rcx;
1873 const Register qword_count = count;
1874
1875 __ enter(); // required for proper stackwalking of RuntimeStub frame
1876 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1877
1878 if (entry != NULL) {
1879 *entry = __ pc();
1880 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1881 BLOCK_COMMENT("Entry:");
1882 }
1883
1884 array_overlap_test(nooverlap_target, Address::times_4);
1885 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1886 // r9 and r10 may be used to save non-volatile registers
1887
1888 if (is_oop) {
1889 // no registers are destroyed by this call
1890 gen_write_ref_array_pre_barrier(to, count);
1891 }
1892
1893 assert_clean_int(count, rax); // Make sure 'count' is clean int.
1894 // 'from', 'to' and 'count' are now valid
1895 __ movptr(dword_count, count);
1896 __ shrptr(count, 1); // count => qword_count
1897
1898 // Copy from high to low addresses. Use 'to' as scratch.
1899
1900 // Check for and copy trailing dword
1901 __ testl(dword_count, 1);
1902 __ jcc(Assembler::zero, L_copy_32_bytes);
1903 __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1904 __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1905 __ jmp(L_copy_32_bytes);
1906
1907 // Copy trailing qwords
1908 __ BIND(L_copy_8_bytes);
1909 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
1910 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
1936 __ ret(0);
1937
1938 return start;
1939 }
1940
1941 // Arguments:
1942 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
1943 // ignored
1944 // is_oop - true => oop array, so generate store check code
1945 // name - stub name string
1946 //
1947 // Inputs:
1948 // c_rarg0 - source array address
1949 // c_rarg1 - destination array address
1950 // c_rarg2 - element count, treated as ssize_t, can be zero
1951 //
1952 // Side Effects:
1953 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1954 // no-overlap entry point used by generate_conjoint_long_oop_copy().
1955 //
1956 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, const char *name) {
1957 __ align(CodeEntryAlignment);
1958 StubCodeMark mark(this, "StubRoutines", name);
1959 address start = __ pc();
1960
1961 Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
1962 const Register from = rdi; // source array address
1963 const Register to = rsi; // destination array address
1964 const Register qword_count = rdx; // elements count
1965 const Register end_from = from; // source array end address
1966 const Register end_to = rcx; // destination array end address
1967 const Register saved_to = to;
1968 // End pointers are inclusive, and if count is not zero they point
1969 // to the last unit copied: end_to[0] := end_from[0]
1970
1971 __ enter(); // required for proper stackwalking of RuntimeStub frame
1972 // Save no-overlap entry point for generate_conjoint_long_oop_copy()
1973 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1974
1975 if (entry != NULL) {
1976 *entry = __ pc();
1977 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1978 BLOCK_COMMENT("Entry:");
1979 }
1980
1981 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1982 // r9 and r10 may be used to save non-volatile registers
1983 // 'from', 'to' and 'qword_count' are now valid
1984 if (is_oop) {
1985 // no registers are destroyed by this call
1986 gen_write_ref_array_pre_barrier(to, qword_count);
1987 }
1988
1989 // Copy from low to high addresses. Use 'to' as scratch.
1990 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1991 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
1992 __ negptr(qword_count);
1993 __ jmp(L_copy_32_bytes);
1994
1995 // Copy trailing qwords
1996 __ BIND(L_copy_8_bytes);
1997 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1998 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1999 __ increment(qword_count);
2000 __ jcc(Assembler::notZero, L_copy_8_bytes);
2001
2002 if (is_oop) {
2003 __ jmp(L_exit);
2004 } else {
2005 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
2006 restore_arg_regs();
2021 }
2022 restore_arg_regs();
2023 __ xorptr(rax, rax); // return 0
2024 __ leave(); // required for proper stackwalking of RuntimeStub frame
2025 __ ret(0);
2026
2027 return start;
2028 }
2029
2030 // Arguments:
2031 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2032 // ignored
2033 // is_oop - true => oop array, so generate store check code
2034 // name - stub name string
2035 //
2036 // Inputs:
2037 // c_rarg0 - source array address
2038 // c_rarg1 - destination array address
2039 // c_rarg2 - element count, treated as ssize_t, can be zero
2040 //
2041 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
2042 address *entry, const char *name) {
2043 __ align(CodeEntryAlignment);
2044 StubCodeMark mark(this, "StubRoutines", name);
2045 address start = __ pc();
2046
2047 Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
2048 const Register from = rdi; // source array address
2049 const Register to = rsi; // destination array address
2050 const Register qword_count = rdx; // elements count
2051 const Register saved_count = rcx;
2052
2053 __ enter(); // required for proper stackwalking of RuntimeStub frame
2054 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2055
2056 if (entry != NULL) {
2057 *entry = __ pc();
2058 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2059 BLOCK_COMMENT("Entry:");
2060 }
2061
2062 array_overlap_test(nooverlap_target, Address::times_8);
2063 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2064 // r9 and r10 may be used to save non-volatile registers
2065 // 'from', 'to' and 'qword_count' are now valid
2066 if (is_oop) {
2067 // Save to and count for store barrier
2068 __ movptr(saved_count, qword_count);
2069 // No registers are destroyed by this call
2070 gen_write_ref_array_pre_barrier(to, saved_count);
2071 }
2072
2073 __ jmp(L_copy_32_bytes);
2074
2075 // Copy trailing qwords
2076 __ BIND(L_copy_8_bytes);
2077 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2078 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2079 __ decrement(qword_count);
2080 __ jcc(Assembler::notZero, L_copy_8_bytes);
2081
2082 if (is_oop) {
2083 __ jmp(L_exit);
2084 } else {
2085 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
2086 restore_arg_regs();
2087 __ xorptr(rax, rax); // return 0
2088 __ leave(); // required for proper stackwalking of RuntimeStub frame
2089 __ ret(0);
2090 }
2129 __ BIND(L_miss);
2130 }
2131
2132 //
2133 // Generate checkcasting array copy stub
2134 //
2135 // Input:
2136 // c_rarg0 - source array address
2137 // c_rarg1 - destination array address
2138 // c_rarg2 - element count, treated as ssize_t, can be zero
2139 // c_rarg3 - size_t ckoff (super_check_offset)
2140 // not Win64
2141 // c_rarg4 - oop ckval (super_klass)
2142 // Win64
2143 // rsp+40 - oop ckval (super_klass)
2144 //
2145 // Output:
2146 // rax == 0 - success
2147 // rax == -1^K - failure, where K is partial transfer count
2148 //
2149 address generate_checkcast_copy(const char *name, address *entry) {
2150
2151 Label L_load_element, L_store_element, L_do_card_marks, L_done;
2152
2153 // Input registers (after setup_arg_regs)
2154 const Register from = rdi; // source array address
2155 const Register to = rsi; // destination array address
2156 const Register length = rdx; // elements count
2157 const Register ckoff = rcx; // super_check_offset
2158 const Register ckval = r8; // super_klass
2159
2160 // Registers used as temps (r13, r14 are save-on-entry)
2161 const Register end_from = from; // source array end address
2162 const Register end_to = r13; // destination array end address
2163 const Register count = rdx; // -(count_remaining)
2164 const Register r14_length = r14; // saved copy of length
2165 // End pointers are inclusive, and if length is not zero they point
2166 // to the last unit copied: end_to[0] := end_from[0]
2167
2168 const Register rax_oop = rax; // actual oop copied
2169 const Register r11_klass = r11; // oop._klass
2223 BLOCK_COMMENT("assert consistent ckoff/ckval");
2224 // The ckoff and ckval must be mutually consistent,
2225 // even though caller generates both.
2226 { Label L;
2227 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2228 Klass::super_check_offset_offset_in_bytes());
2229 __ cmpl(ckoff, Address(ckval, sco_offset));
2230 __ jcc(Assembler::equal, L);
2231 __ stop("super_check_offset inconsistent");
2232 __ bind(L);
2233 }
2234 #endif //ASSERT
2235
2236 // Loop-invariant addresses. They are exclusive end pointers.
2237 Address end_from_addr(from, length, TIMES_OOP, 0);
2238 Address end_to_addr(to, length, TIMES_OOP, 0);
2239 // Loop-variant addresses. They assume post-incremented count < 0.
2240 Address from_element_addr(end_from, count, TIMES_OOP, 0);
2241 Address to_element_addr(end_to, count, TIMES_OOP, 0);
2242
2243 gen_write_ref_array_pre_barrier(to, count);
2244
2245 // Copy from low to high addresses, indexed from the end of each array.
2246 __ lea(end_from, end_from_addr);
2247 __ lea(end_to, end_to_addr);
2248 __ movptr(r14_length, length); // save a copy of the length
2249 assert(length == count, ""); // else fix next line:
2250 __ negptr(count); // negate and test the length
2251 __ jcc(Assembler::notZero, L_load_element);
2252
2253 // Empty array: Nothing to do.
2254 __ xorptr(rax, rax); // return 0 on (trivial) success
2255 __ jmp(L_done);
2256
2257 // ======== begin loop ========
2258 // (Loop is rotated; its entry is L_load_element.)
2259 // Loop control:
2260 // for (count = -count; count != 0; count++)
2261 // Base pointers src, dst are biased by 8*(count-1),to last element.
2262 __ align(OptoLoopAlignment);
2263
2733 "jshort_disjoint_arraycopy");
2734 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
2735 "jshort_arraycopy");
2736
2737 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, &entry,
2738 "jint_disjoint_arraycopy");
2739 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, entry,
2740 &entry_jint_arraycopy, "jint_arraycopy");
2741
2742 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, &entry,
2743 "jlong_disjoint_arraycopy");
2744 StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, entry,
2745 &entry_jlong_arraycopy, "jlong_arraycopy");
2746
2747
2748 if (UseCompressedOops) {
2749 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, &entry,
2750 "oop_disjoint_arraycopy");
2751 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, entry,
2752 &entry_oop_arraycopy, "oop_arraycopy");
2753 } else {
2754 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, &entry,
2755 "oop_disjoint_arraycopy");
2756 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, entry,
2757 &entry_oop_arraycopy, "oop_arraycopy");
2758 }
2759
2760 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
2761 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
2762 entry_jbyte_arraycopy,
2763 entry_jshort_arraycopy,
2764 entry_jint_arraycopy,
2765 entry_jlong_arraycopy);
2766 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
2767 entry_jbyte_arraycopy,
2768 entry_jshort_arraycopy,
2769 entry_jint_arraycopy,
2770 entry_oop_arraycopy,
2771 entry_jlong_arraycopy,
2772 entry_checkcast_arraycopy);
2773
2774 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
2775 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
2776 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
2777 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
2778 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
2779 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
2780
2781 // We don't generate specialized code for HeapWord-aligned source
2782 // arrays, so just use the code we've already generated
2783 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy;
2784 StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy;
2785
2786 StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
2787 StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy;
2788
2789 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
2790 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
2791
2792 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
2793 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
2794
2795 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
2796 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
2797 }
2798
2799 void generate_math_stubs() {
2800 {
2801 StubCodeMark mark(this, "StubRoutines", "log");
2802 StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
2803
2804 __ subq(rsp, 8);
2805 __ movdbl(Address(rsp, 0), xmm0);
2806 __ fld_d(Address(rsp, 0));
2807 __ flog();
2808 __ fstp_d(Address(rsp, 0));
2809 __ movdbl(xmm0, Address(rsp, 0));
2810 __ addq(rsp, 8);
2811 __ ret(0);
2812 }
2813 {
2814 StubCodeMark mark(this, "StubRoutines", "log10");
2815 StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
2816
|
1143 __ mov(rcx, rax); // c_rarg3 (via rax)
1144 #else
1145 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1146 "unexpected argument registers");
1147 #endif
1148 }
1149
1150 void restore_arg_regs() {
1151 const Register saved_rdi = r9;
1152 const Register saved_rsi = r10;
1153 #ifdef _WIN64
1154 __ movptr(rdi, saved_rdi);
1155 __ movptr(rsi, saved_rsi);
1156 #endif
1157 }
1158
1159 // Generate code for an array write pre barrier
1160 //
1161 // addr - starting address
1162 // count - element count
1163 // tmp - scratch register
1164 //
1165 // Destroy no registers!
1166 //
1167 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
1168 BarrierSet* bs = Universe::heap()->barrier_set();
1169 switch (bs->kind()) {
1170 case BarrierSet::G1SATBCT:
1171 case BarrierSet::G1SATBCTLogging:
1172 // With G1, don't generate the call if we statically know that the target in uninitialized
1173 if (!dest_uninitialized) {
1174 __ pusha(); // push registers
1175 if (count == c_rarg0) {
1176 if (addr == c_rarg1) {
1177 // exactly backwards!!
1178 __ xchgptr(c_rarg1, c_rarg0);
1179 } else {
1180 __ movptr(c_rarg1, count);
1181 __ movptr(c_rarg0, addr);
1182 }
1183 } else {
1184 __ movptr(c_rarg0, addr);
1185 __ movptr(c_rarg1, count);
1186 }
1187 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
1188 __ popa();
1189 }
1190 break;
1191 case BarrierSet::CardTableModRef:
1192 case BarrierSet::CardTableExtension:
1193 case BarrierSet::ModRef:
1194 break;
1195 default:
1196 ShouldNotReachHere();
1197
1198 }
1199 }
1200
1201 //
1202 // Generate code for an array write post barrier
1753
1754 // Arguments:
1755 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1756 // ignored
1757 // is_oop - true => oop array, so generate store check code
1758 // name - stub name string
1759 //
1760 // Inputs:
1761 // c_rarg0 - source array address
1762 // c_rarg1 - destination array address
1763 // c_rarg2 - element count, treated as ssize_t, can be zero
1764 //
1765 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1766 // the hardware handle it. The two dwords within qwords that span
1767 // cache line boundaries will still be loaded and stored atomicly.
1768 //
1769 // Side Effects:
1770 // disjoint_int_copy_entry is set to the no-overlap entry point
1771 // used by generate_conjoint_int_oop_copy().
1772 //
1773 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
1774 const char *name, bool dest_uninitialized = false) {
1775 __ align(CodeEntryAlignment);
1776 StubCodeMark mark(this, "StubRoutines", name);
1777 address start = __ pc();
1778
1779 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1780 const Register from = rdi; // source array address
1781 const Register to = rsi; // destination array address
1782 const Register count = rdx; // elements count
1783 const Register dword_count = rcx;
1784 const Register qword_count = count;
1785 const Register end_from = from; // source array end address
1786 const Register end_to = to; // destination array end address
1787 const Register saved_to = r11; // saved destination array address
1788 // End pointers are inclusive, and if count is not zero they point
1789 // to the last unit copied: end_to[0] := end_from[0]
1790
1791 __ enter(); // required for proper stackwalking of RuntimeStub frame
1792 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1793
1794 if (entry != NULL) {
1795 *entry = __ pc();
1796 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1797 BLOCK_COMMENT("Entry:");
1798 }
1799
1800 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1801 // r9 and r10 may be used to save non-volatile registers
1802 if (is_oop) {
1803 __ movq(saved_to, to);
1804 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
1805 }
1806
1807 // 'from', 'to' and 'count' are now valid
1808 __ movptr(dword_count, count);
1809 __ shrptr(count, 1); // count => qword_count
1810
1811 // Copy from low to high addresses. Use 'to' as scratch.
1812 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1813 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
1814 __ negptr(qword_count);
1815 __ jmp(L_copy_32_bytes);
1816
1817 // Copy trailing qwords
1818 __ BIND(L_copy_8_bytes);
1819 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1820 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1821 __ increment(qword_count);
1822 __ jcc(Assembler::notZero, L_copy_8_bytes);
1823
1824 // Check for and copy trailing dword
1845
1846 return start;
1847 }
1848
1849 // Arguments:
1850 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1851 // ignored
1852 // is_oop - true => oop array, so generate store check code
1853 // name - stub name string
1854 //
1855 // Inputs:
1856 // c_rarg0 - source array address
1857 // c_rarg1 - destination array address
1858 // c_rarg2 - element count, treated as ssize_t, can be zero
1859 //
1860 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1861 // the hardware handle it. The two dwords within qwords that span
1862 // cache line boundaries will still be loaded and stored atomicly.
1863 //
1864 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1865 address *entry, const char *name,
1866 bool dest_uninitialized = false) {
1867 __ align(CodeEntryAlignment);
1868 StubCodeMark mark(this, "StubRoutines", name);
1869 address start = __ pc();
1870
1871 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1872 const Register from = rdi; // source array address
1873 const Register to = rsi; // destination array address
1874 const Register count = rdx; // elements count
1875 const Register dword_count = rcx;
1876 const Register qword_count = count;
1877
1878 __ enter(); // required for proper stackwalking of RuntimeStub frame
1879 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1880
1881 if (entry != NULL) {
1882 *entry = __ pc();
1883 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1884 BLOCK_COMMENT("Entry:");
1885 }
1886
1887 array_overlap_test(nooverlap_target, Address::times_4);
1888 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1889 // r9 and r10 may be used to save non-volatile registers
1890
1891 if (is_oop) {
1892 // no registers are destroyed by this call
1893 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
1894 }
1895
1896 assert_clean_int(count, rax); // Make sure 'count' is clean int.
1897 // 'from', 'to' and 'count' are now valid
1898 __ movptr(dword_count, count);
1899 __ shrptr(count, 1); // count => qword_count
1900
1901 // Copy from high to low addresses. Use 'to' as scratch.
1902
1903 // Check for and copy trailing dword
1904 __ testl(dword_count, 1);
1905 __ jcc(Assembler::zero, L_copy_32_bytes);
1906 __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1907 __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1908 __ jmp(L_copy_32_bytes);
1909
1910 // Copy trailing qwords
1911 __ BIND(L_copy_8_bytes);
1912 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
1913 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
1939 __ ret(0);
1940
1941 return start;
1942 }
1943
1944 // Arguments:
1945 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
1946 // ignored
1947 // is_oop - true => oop array, so generate store check code
1948 // name - stub name string
1949 //
1950 // Inputs:
1951 // c_rarg0 - source array address
1952 // c_rarg1 - destination array address
1953 // c_rarg2 - element count, treated as ssize_t, can be zero
1954 //
1955 // Side Effects:
1956 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1957 // no-overlap entry point used by generate_conjoint_long_oop_copy().
1958 //
1959 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
1960 const char *name, bool dest_uninitialized = false) {
1961 __ align(CodeEntryAlignment);
1962 StubCodeMark mark(this, "StubRoutines", name);
1963 address start = __ pc();
1964
1965 Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
1966 const Register from = rdi; // source array address
1967 const Register to = rsi; // destination array address
1968 const Register qword_count = rdx; // elements count
1969 const Register end_from = from; // source array end address
1970 const Register end_to = rcx; // destination array end address
1971 const Register saved_to = to;
1972 // End pointers are inclusive, and if count is not zero they point
1973 // to the last unit copied: end_to[0] := end_from[0]
1974
1975 __ enter(); // required for proper stackwalking of RuntimeStub frame
1976 // Save no-overlap entry point for generate_conjoint_long_oop_copy()
1977 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1978
1979 if (entry != NULL) {
1980 *entry = __ pc();
1981 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1982 BLOCK_COMMENT("Entry:");
1983 }
1984
1985 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1986 // r9 and r10 may be used to save non-volatile registers
1987 // 'from', 'to' and 'qword_count' are now valid
1988 if (is_oop) {
1989 // no registers are destroyed by this call
1990 gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
1991 }
1992
1993 // Copy from low to high addresses. Use 'to' as scratch.
1994 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1995 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
1996 __ negptr(qword_count);
1997 __ jmp(L_copy_32_bytes);
1998
1999 // Copy trailing qwords
2000 __ BIND(L_copy_8_bytes);
2001 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2002 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2003 __ increment(qword_count);
2004 __ jcc(Assembler::notZero, L_copy_8_bytes);
2005
2006 if (is_oop) {
2007 __ jmp(L_exit);
2008 } else {
2009 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
2010 restore_arg_regs();
2025 }
2026 restore_arg_regs();
2027 __ xorptr(rax, rax); // return 0
2028 __ leave(); // required for proper stackwalking of RuntimeStub frame
2029 __ ret(0);
2030
2031 return start;
2032 }
2033
2034 // Arguments:
2035 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2036 // ignored
2037 // is_oop - true => oop array, so generate store check code
2038 // name - stub name string
2039 //
2040 // Inputs:
2041 // c_rarg0 - source array address
2042 // c_rarg1 - destination array address
2043 // c_rarg2 - element count, treated as ssize_t, can be zero
2044 //
2045 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
2046 address nooverlap_target, address *entry,
2047 const char *name, bool dest_uninitialized = false) {
2048 __ align(CodeEntryAlignment);
2049 StubCodeMark mark(this, "StubRoutines", name);
2050 address start = __ pc();
2051
2052 Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
2053 const Register from = rdi; // source array address
2054 const Register to = rsi; // destination array address
2055 const Register qword_count = rdx; // elements count
2056 const Register saved_count = rcx;
2057
2058 __ enter(); // required for proper stackwalking of RuntimeStub frame
2059 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2060
2061 if (entry != NULL) {
2062 *entry = __ pc();
2063 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2064 BLOCK_COMMENT("Entry:");
2065 }
2066
2067 array_overlap_test(nooverlap_target, Address::times_8);
2068 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2069 // r9 and r10 may be used to save non-volatile registers
2070 // 'from', 'to' and 'qword_count' are now valid
2071 if (is_oop) {
2072 // Save to and count for store barrier
2073 __ movptr(saved_count, qword_count);
2074 // No registers are destroyed by this call
2075 gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
2076 }
2077
2078 __ jmp(L_copy_32_bytes);
2079
2080 // Copy trailing qwords
2081 __ BIND(L_copy_8_bytes);
2082 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2083 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2084 __ decrement(qword_count);
2085 __ jcc(Assembler::notZero, L_copy_8_bytes);
2086
2087 if (is_oop) {
2088 __ jmp(L_exit);
2089 } else {
2090 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
2091 restore_arg_regs();
2092 __ xorptr(rax, rax); // return 0
2093 __ leave(); // required for proper stackwalking of RuntimeStub frame
2094 __ ret(0);
2095 }
2134 __ BIND(L_miss);
2135 }
2136
2137 //
2138 // Generate checkcasting array copy stub
2139 //
2140 // Input:
2141 // c_rarg0 - source array address
2142 // c_rarg1 - destination array address
2143 // c_rarg2 - element count, treated as ssize_t, can be zero
2144 // c_rarg3 - size_t ckoff (super_check_offset)
2145 // not Win64
2146 // c_rarg4 - oop ckval (super_klass)
2147 // Win64
2148 // rsp+40 - oop ckval (super_klass)
2149 //
2150 // Output:
2151 // rax == 0 - success
2152 // rax == -1^K - failure, where K is partial transfer count
2153 //
2154 address generate_checkcast_copy(const char *name, address *entry,
2155 bool dest_uninitialized = false) {
2156
2157 Label L_load_element, L_store_element, L_do_card_marks, L_done;
2158
2159 // Input registers (after setup_arg_regs)
2160 const Register from = rdi; // source array address
2161 const Register to = rsi; // destination array address
2162 const Register length = rdx; // elements count
2163 const Register ckoff = rcx; // super_check_offset
2164 const Register ckval = r8; // super_klass
2165
2166 // Registers used as temps (r13, r14 are save-on-entry)
2167 const Register end_from = from; // source array end address
2168 const Register end_to = r13; // destination array end address
2169 const Register count = rdx; // -(count_remaining)
2170 const Register r14_length = r14; // saved copy of length
2171 // End pointers are inclusive, and if length is not zero they point
2172 // to the last unit copied: end_to[0] := end_from[0]
2173
2174 const Register rax_oop = rax; // actual oop copied
2175 const Register r11_klass = r11; // oop._klass
2229 BLOCK_COMMENT("assert consistent ckoff/ckval");
2230 // The ckoff and ckval must be mutually consistent,
2231 // even though caller generates both.
2232 { Label L;
2233 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2234 Klass::super_check_offset_offset_in_bytes());
2235 __ cmpl(ckoff, Address(ckval, sco_offset));
2236 __ jcc(Assembler::equal, L);
2237 __ stop("super_check_offset inconsistent");
2238 __ bind(L);
2239 }
2240 #endif //ASSERT
2241
2242 // Loop-invariant addresses. They are exclusive end pointers.
2243 Address end_from_addr(from, length, TIMES_OOP, 0);
2244 Address end_to_addr(to, length, TIMES_OOP, 0);
2245 // Loop-variant addresses. They assume post-incremented count < 0.
2246 Address from_element_addr(end_from, count, TIMES_OOP, 0);
2247 Address to_element_addr(end_to, count, TIMES_OOP, 0);
2248
2249 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
2250
2251 // Copy from low to high addresses, indexed from the end of each array.
2252 __ lea(end_from, end_from_addr);
2253 __ lea(end_to, end_to_addr);
2254 __ movptr(r14_length, length); // save a copy of the length
2255 assert(length == count, ""); // else fix next line:
2256 __ negptr(count); // negate and test the length
2257 __ jcc(Assembler::notZero, L_load_element);
2258
2259 // Empty array: Nothing to do.
2260 __ xorptr(rax, rax); // return 0 on (trivial) success
2261 __ jmp(L_done);
2262
2263 // ======== begin loop ========
2264 // (Loop is rotated; its entry is L_load_element.)
2265 // Loop control:
2266 // for (count = -count; count != 0; count++)
2267 // Base pointers src, dst are biased by 8*(count-1),to last element.
2268 __ align(OptoLoopAlignment);
2269
2739 "jshort_disjoint_arraycopy");
2740 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
2741 "jshort_arraycopy");
2742
2743 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, &entry,
2744 "jint_disjoint_arraycopy");
2745 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, entry,
2746 &entry_jint_arraycopy, "jint_arraycopy");
2747
2748 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, &entry,
2749 "jlong_disjoint_arraycopy");
2750 StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, entry,
2751 &entry_jlong_arraycopy, "jlong_arraycopy");
2752
2753
2754 if (UseCompressedOops) {
2755 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, &entry,
2756 "oop_disjoint_arraycopy");
2757 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, entry,
2758 &entry_oop_arraycopy, "oop_arraycopy");
2759 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, &entry,
2760 "oop_disjoint_arraycopy_uninit",
2761 /*dest_uninitialized*/true);
2762 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, entry,
2763 NULL, "oop_arraycopy_uninit",
2764 /*dest_uninitialized*/true);
2765 } else {
2766 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, &entry,
2767 "oop_disjoint_arraycopy");
2768 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, entry,
2769 &entry_oop_arraycopy, "oop_arraycopy");
2770 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, &entry,
2771 "oop_disjoint_arraycopy_uninit",
2772 /*dest_uninitialized*/true);
2773 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, entry,
2774 NULL, "oop_arraycopy_uninit",
2775 /*dest_uninitialized*/true);
2776 }
2777
2778 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
2779 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
2780 /*dest_uninitialized*/true);
2781
2782 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
2783 entry_jbyte_arraycopy,
2784 entry_jshort_arraycopy,
2785 entry_jint_arraycopy,
2786 entry_jlong_arraycopy);
2787 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
2788 entry_jbyte_arraycopy,
2789 entry_jshort_arraycopy,
2790 entry_jint_arraycopy,
2791 entry_oop_arraycopy,
2792 entry_jlong_arraycopy,
2793 entry_checkcast_arraycopy);
2794
2795 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
2796 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
2797 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
2798 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
2799 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
2800 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
2801
2802 // We don't generate specialized code for HeapWord-aligned source
2803 // arrays, so just use the code we've already generated
2804 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy;
2805 StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy;
2806
2807 StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
2808 StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy;
2809
2810 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
2811 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
2812
2813 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
2814 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
2815
2816 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
2817 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
2818
2819 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
2820 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
2821 }
2822
2823 void generate_math_stubs() {
2824 {
2825 StubCodeMark mark(this, "StubRoutines", "log");
2826 StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
2827
2828 __ subq(rsp, 8);
2829 __ movdbl(Address(rsp, 0), xmm0);
2830 __ fld_d(Address(rsp, 0));
2831 __ flog();
2832 __ fstp_d(Address(rsp, 0));
2833 __ movdbl(xmm0, Address(rsp, 0));
2834 __ addq(rsp, 8);
2835 __ ret(0);
2836 }
2837 {
2838 StubCodeMark mark(this, "StubRoutines", "log10");
2839 StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
2840
|