src/cpu/x86/vm/stubGenerator_x86_64.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File g1-bulk-zeroing-reduction Sdiff src/cpu/x86/vm

src/cpu/x86/vm/stubGenerator_x86_64.cpp

Print this page




1143       __ mov(rcx, rax); // c_rarg3 (via rax)
1144 #else
1145     assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1146            "unexpected argument registers");
1147 #endif
1148   }
1149 
1150   void restore_arg_regs() {
1151     const Register saved_rdi = r9;
1152     const Register saved_rsi = r10;
1153 #ifdef _WIN64
1154     __ movptr(rdi, saved_rdi);
1155     __ movptr(rsi, saved_rsi);
1156 #endif
1157   }
1158 
1159   // Generate code for an array write pre barrier
1160   //
1161   //     addr    -  starting address
1162   //     count    -  element count

1163   //
1164   //     Destroy no registers!
1165   //
1166   void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
1167     BarrierSet* bs = Universe::heap()->barrier_set();
1168     switch (bs->kind()) {
1169       case BarrierSet::G1SATBCT:
1170       case BarrierSet::G1SATBCTLogging:
1171         {

1172           __ pusha();                      // push registers
1173           if (count == c_rarg0) {
1174             if (addr == c_rarg1) {
1175               // exactly backwards!!
1176               __ xchgptr(c_rarg1, c_rarg0);
1177             } else {
1178               __ movptr(c_rarg1, count);
1179               __ movptr(c_rarg0, addr);
1180             }
1181 
1182           } else {
1183             __ movptr(c_rarg0, addr);
1184             __ movptr(c_rarg1, count);
1185           }
1186           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
1187           __ popa();
1188         }
1189         break;
1190       case BarrierSet::CardTableModRef:
1191       case BarrierSet::CardTableExtension:
1192       case BarrierSet::ModRef:
1193         break;
1194       default:
1195         ShouldNotReachHere();
1196 
1197     }
1198   }
1199 
1200   //
1201   // Generate code for an array write post barrier


1752 
1753   // Arguments:
1754   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1755   //             ignored
1756   //   is_oop  - true => oop array, so generate store check code
1757   //   name    - stub name string
1758   //
1759   // Inputs:
1760   //   c_rarg0   - source array address
1761   //   c_rarg1   - destination array address
1762   //   c_rarg2   - element count, treated as ssize_t, can be zero
1763   //
1764   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1765   // the hardware handle it.  The two dwords within qwords that span
1766   // cache line boundaries will still be loaded and stored atomicly.
1767   //
1768   // Side Effects:
1769   //   disjoint_int_copy_entry is set to the no-overlap entry point
1770   //   used by generate_conjoint_int_oop_copy().
1771   //
1772   address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, const char *name) {

1773     __ align(CodeEntryAlignment);
1774     StubCodeMark mark(this, "StubRoutines", name);
1775     address start = __ pc();
1776 
1777     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1778     const Register from        = rdi;  // source array address
1779     const Register to          = rsi;  // destination array address
1780     const Register count       = rdx;  // elements count
1781     const Register dword_count = rcx;
1782     const Register qword_count = count;
1783     const Register end_from    = from; // source array end address
1784     const Register end_to      = to;   // destination array end address
1785     const Register saved_to    = r11;  // saved destination array address
1786     // End pointers are inclusive, and if count is not zero they point
1787     // to the last unit copied:  end_to[0] := end_from[0]
1788 
1789     __ enter(); // required for proper stackwalking of RuntimeStub frame
1790     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1791 
1792     if (entry != NULL) {
1793       *entry = __ pc();
1794       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1795       BLOCK_COMMENT("Entry:");
1796     }
1797 
1798     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1799                       // r9 and r10 may be used to save non-volatile registers
1800     if (is_oop) {
1801       __ movq(saved_to, to);
1802       gen_write_ref_array_pre_barrier(to, count);
1803     }
1804 
1805     // 'from', 'to' and 'count' are now valid
1806     __ movptr(dword_count, count);
1807     __ shrptr(count, 1); // count => qword_count
1808 
1809     // Copy from low to high addresses.  Use 'to' as scratch.
1810     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1811     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1812     __ negptr(qword_count);
1813     __ jmp(L_copy_32_bytes);
1814 
1815     // Copy trailing qwords
1816   __ BIND(L_copy_8_bytes);
1817     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1818     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1819     __ increment(qword_count);
1820     __ jcc(Assembler::notZero, L_copy_8_bytes);
1821 
1822     // Check for and copy trailing dword


1843 
1844     return start;
1845   }
1846 
1847   // Arguments:
1848   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1849   //             ignored
1850   //   is_oop  - true => oop array, so generate store check code
1851   //   name    - stub name string
1852   //
1853   // Inputs:
1854   //   c_rarg0   - source array address
1855   //   c_rarg1   - destination array address
1856   //   c_rarg2   - element count, treated as ssize_t, can be zero
1857   //
1858   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1859   // the hardware handle it.  The two dwords within qwords that span
1860   // cache line boundaries will still be loaded and stored atomicly.
1861   //
1862   address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1863                                          address *entry, const char *name) {

1864     __ align(CodeEntryAlignment);
1865     StubCodeMark mark(this, "StubRoutines", name);
1866     address start = __ pc();
1867 
1868     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1869     const Register from        = rdi;  // source array address
1870     const Register to          = rsi;  // destination array address
1871     const Register count       = rdx;  // elements count
1872     const Register dword_count = rcx;
1873     const Register qword_count = count;
1874 
1875     __ enter(); // required for proper stackwalking of RuntimeStub frame
1876     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1877 
1878     if (entry != NULL) {
1879       *entry = __ pc();
1880        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1881       BLOCK_COMMENT("Entry:");
1882     }
1883 
1884     array_overlap_test(nooverlap_target, Address::times_4);
1885     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1886                       // r9 and r10 may be used to save non-volatile registers
1887 
1888     if (is_oop) {
1889       // no registers are destroyed by this call
1890       gen_write_ref_array_pre_barrier(to, count);
1891     }
1892 
1893     assert_clean_int(count, rax); // Make sure 'count' is clean int.
1894     // 'from', 'to' and 'count' are now valid
1895     __ movptr(dword_count, count);
1896     __ shrptr(count, 1); // count => qword_count
1897 
1898     // Copy from high to low addresses.  Use 'to' as scratch.
1899 
1900     // Check for and copy trailing dword
1901     __ testl(dword_count, 1);
1902     __ jcc(Assembler::zero, L_copy_32_bytes);
1903     __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1904     __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1905     __ jmp(L_copy_32_bytes);
1906 
1907     // Copy trailing qwords
1908   __ BIND(L_copy_8_bytes);
1909     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
1910     __ movq(Address(to, qword_count, Address::times_8, -8), rax);


1936     __ ret(0);
1937 
1938     return start;
1939   }
1940 
1941   // Arguments:
1942   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
1943   //             ignored
1944   //   is_oop  - true => oop array, so generate store check code
1945   //   name    - stub name string
1946   //
1947   // Inputs:
1948   //   c_rarg0   - source array address
1949   //   c_rarg1   - destination array address
1950   //   c_rarg2   - element count, treated as ssize_t, can be zero
1951   //
1952  // Side Effects:
1953   //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1954   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
1955   //
1956   address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, const char *name) {

1957     __ align(CodeEntryAlignment);
1958     StubCodeMark mark(this, "StubRoutines", name);
1959     address start = __ pc();
1960 
1961     Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
1962     const Register from        = rdi;  // source array address
1963     const Register to          = rsi;  // destination array address
1964     const Register qword_count = rdx;  // elements count
1965     const Register end_from    = from; // source array end address
1966     const Register end_to      = rcx;  // destination array end address
1967     const Register saved_to    = to;
1968     // End pointers are inclusive, and if count is not zero they point
1969     // to the last unit copied:  end_to[0] := end_from[0]
1970 
1971     __ enter(); // required for proper stackwalking of RuntimeStub frame
1972     // Save no-overlap entry point for generate_conjoint_long_oop_copy()
1973     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1974 
1975     if (entry != NULL) {
1976       *entry = __ pc();
1977       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1978       BLOCK_COMMENT("Entry:");
1979     }
1980 
1981     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1982                       // r9 and r10 may be used to save non-volatile registers
1983     // 'from', 'to' and 'qword_count' are now valid
1984     if (is_oop) {
1985       // no registers are destroyed by this call
1986       gen_write_ref_array_pre_barrier(to, qword_count);
1987     }
1988 
1989     // Copy from low to high addresses.  Use 'to' as scratch.
1990     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1991     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1992     __ negptr(qword_count);
1993     __ jmp(L_copy_32_bytes);
1994 
1995     // Copy trailing qwords
1996   __ BIND(L_copy_8_bytes);
1997     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1998     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1999     __ increment(qword_count);
2000     __ jcc(Assembler::notZero, L_copy_8_bytes);
2001 
2002     if (is_oop) {
2003       __ jmp(L_exit);
2004     } else {
2005       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
2006       restore_arg_regs();


2021     }
2022     restore_arg_regs();
2023     __ xorptr(rax, rax); // return 0
2024     __ leave(); // required for proper stackwalking of RuntimeStub frame
2025     __ ret(0);
2026 
2027     return start;
2028   }
2029 
2030   // Arguments:
2031   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2032   //             ignored
2033   //   is_oop  - true => oop array, so generate store check code
2034   //   name    - stub name string
2035   //
2036   // Inputs:
2037   //   c_rarg0   - source array address
2038   //   c_rarg1   - destination array address
2039   //   c_rarg2   - element count, treated as ssize_t, can be zero
2040   //
2041   address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
2042                                           address *entry, const char *name) {

2043     __ align(CodeEntryAlignment);
2044     StubCodeMark mark(this, "StubRoutines", name);
2045     address start = __ pc();
2046 
2047     Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
2048     const Register from        = rdi;  // source array address
2049     const Register to          = rsi;  // destination array address
2050     const Register qword_count = rdx;  // elements count
2051     const Register saved_count = rcx;
2052 
2053     __ enter(); // required for proper stackwalking of RuntimeStub frame
2054     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
2055 
2056     if (entry != NULL) {
2057       *entry = __ pc();
2058       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2059       BLOCK_COMMENT("Entry:");
2060     }
2061 
2062     array_overlap_test(nooverlap_target, Address::times_8);
2063     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2064                       // r9 and r10 may be used to save non-volatile registers
2065     // 'from', 'to' and 'qword_count' are now valid
2066     if (is_oop) {
2067       // Save to and count for store barrier
2068       __ movptr(saved_count, qword_count);
2069       // No registers are destroyed by this call
2070       gen_write_ref_array_pre_barrier(to, saved_count);
2071     }
2072 
2073     __ jmp(L_copy_32_bytes);
2074 
2075     // Copy trailing qwords
2076   __ BIND(L_copy_8_bytes);
2077     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2078     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2079     __ decrement(qword_count);
2080     __ jcc(Assembler::notZero, L_copy_8_bytes);
2081 
2082     if (is_oop) {
2083       __ jmp(L_exit);
2084     } else {
2085       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
2086       restore_arg_regs();
2087       __ xorptr(rax, rax); // return 0
2088       __ leave(); // required for proper stackwalking of RuntimeStub frame
2089       __ ret(0);
2090     }


2129     __ BIND(L_miss);
2130   }
2131 
2132   //
2133   //  Generate checkcasting array copy stub
2134   //
2135   //  Input:
2136   //    c_rarg0   - source array address
2137   //    c_rarg1   - destination array address
2138   //    c_rarg2   - element count, treated as ssize_t, can be zero
2139   //    c_rarg3   - size_t ckoff (super_check_offset)
2140   // not Win64
2141   //    c_rarg4   - oop ckval (super_klass)
2142   // Win64
2143   //    rsp+40    - oop ckval (super_klass)
2144   //
2145   //  Output:
2146   //    rax ==  0  -  success
2147   //    rax == -1^K - failure, where K is partial transfer count
2148   //
2149   address generate_checkcast_copy(const char *name, address *entry) {

2150 
2151     Label L_load_element, L_store_element, L_do_card_marks, L_done;
2152 
2153     // Input registers (after setup_arg_regs)
2154     const Register from        = rdi;   // source array address
2155     const Register to          = rsi;   // destination array address
2156     const Register length      = rdx;   // elements count
2157     const Register ckoff       = rcx;   // super_check_offset
2158     const Register ckval       = r8;    // super_klass
2159 
2160     // Registers used as temps (r13, r14 are save-on-entry)
2161     const Register end_from    = from;  // source array end address
2162     const Register end_to      = r13;   // destination array end address
2163     const Register count       = rdx;   // -(count_remaining)
2164     const Register r14_length  = r14;   // saved copy of length
2165     // End pointers are inclusive, and if length is not zero they point
2166     // to the last unit copied:  end_to[0] := end_from[0]
2167 
2168     const Register rax_oop    = rax;    // actual oop copied
2169     const Register r11_klass  = r11;    // oop._klass


2223     BLOCK_COMMENT("assert consistent ckoff/ckval");
2224     // The ckoff and ckval must be mutually consistent,
2225     // even though caller generates both.
2226     { Label L;
2227       int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2228                         Klass::super_check_offset_offset_in_bytes());
2229       __ cmpl(ckoff, Address(ckval, sco_offset));
2230       __ jcc(Assembler::equal, L);
2231       __ stop("super_check_offset inconsistent");
2232       __ bind(L);
2233     }
2234 #endif //ASSERT
2235 
2236     // Loop-invariant addresses.  They are exclusive end pointers.
2237     Address end_from_addr(from, length, TIMES_OOP, 0);
2238     Address   end_to_addr(to,   length, TIMES_OOP, 0);
2239     // Loop-variant addresses.  They assume post-incremented count < 0.
2240     Address from_element_addr(end_from, count, TIMES_OOP, 0);
2241     Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
2242 
2243     gen_write_ref_array_pre_barrier(to, count);
2244 
2245     // Copy from low to high addresses, indexed from the end of each array.
2246     __ lea(end_from, end_from_addr);
2247     __ lea(end_to,   end_to_addr);
2248     __ movptr(r14_length, length);        // save a copy of the length
2249     assert(length == count, "");          // else fix next line:
2250     __ negptr(count);                     // negate and test the length
2251     __ jcc(Assembler::notZero, L_load_element);
2252 
2253     // Empty array:  Nothing to do.
2254     __ xorptr(rax, rax);                  // return 0 on (trivial) success
2255     __ jmp(L_done);
2256 
2257     // ======== begin loop ========
2258     // (Loop is rotated; its entry is L_load_element.)
2259     // Loop control:
2260     //   for (count = -count; count != 0; count++)
2261     // Base pointers src, dst are biased by 8*(count-1),to last element.
2262     __ align(OptoLoopAlignment);
2263 


2733                                                                             "jshort_disjoint_arraycopy");
2734     StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
2735                                                                             "jshort_arraycopy");
2736 
2737     StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, &entry,
2738                                                                               "jint_disjoint_arraycopy");
2739     StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, entry,
2740                                                                               &entry_jint_arraycopy, "jint_arraycopy");
2741 
2742     StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, &entry,
2743                                                                                "jlong_disjoint_arraycopy");
2744     StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, entry,
2745                                                                                &entry_jlong_arraycopy, "jlong_arraycopy");
2746 
2747 
2748     if (UseCompressedOops) {
2749       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, &entry,
2750                                                                               "oop_disjoint_arraycopy");
2751       StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, entry,
2752                                                                               &entry_oop_arraycopy, "oop_arraycopy");






2753     } else {
2754       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, &entry,
2755                                                                                "oop_disjoint_arraycopy");
2756       StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, entry,
2757                                                                                &entry_oop_arraycopy, "oop_arraycopy");






2758     }
2759 
2760     StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);



2761     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
2762                                                               entry_jbyte_arraycopy,
2763                                                               entry_jshort_arraycopy,
2764                                                               entry_jint_arraycopy,
2765                                                               entry_jlong_arraycopy);
2766     StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
2767                                                                entry_jbyte_arraycopy,
2768                                                                entry_jshort_arraycopy,
2769                                                                entry_jint_arraycopy,
2770                                                                entry_oop_arraycopy,
2771                                                                entry_jlong_arraycopy,
2772                                                                entry_checkcast_arraycopy);
2773 
2774     StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
2775     StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
2776     StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
2777     StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
2778     StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
2779     StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
2780 
2781     // We don't generate specialized code for HeapWord-aligned source
2782     // arrays, so just use the code we've already generated
2783     StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
2784     StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
2785 
2786     StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
2787     StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
2788 
2789     StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
2790     StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
2791 
2792     StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
2793     StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
2794 
2795     StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
2796     StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;



2797   }
2798 
2799   void generate_math_stubs() {
2800     {
2801       StubCodeMark mark(this, "StubRoutines", "log");
2802       StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
2803 
2804       __ subq(rsp, 8);
2805       __ movdbl(Address(rsp, 0), xmm0);
2806       __ fld_d(Address(rsp, 0));
2807       __ flog();
2808       __ fstp_d(Address(rsp, 0));
2809       __ movdbl(xmm0, Address(rsp, 0));
2810       __ addq(rsp, 8);
2811       __ ret(0);
2812     }
2813     {
2814       StubCodeMark mark(this, "StubRoutines", "log10");
2815       StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
2816 




1143       __ mov(rcx, rax); // c_rarg3 (via rax)
1144 #else
1145     assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1146            "unexpected argument registers");
1147 #endif
1148   }
1149 
1150   void restore_arg_regs() {
1151     const Register saved_rdi = r9;
1152     const Register saved_rsi = r10;
1153 #ifdef _WIN64
1154     __ movptr(rdi, saved_rdi);
1155     __ movptr(rsi, saved_rsi);
1156 #endif
1157   }
1158 
1159   // Generate code for an array write pre barrier
1160   //
1161   //     addr    -  starting address
1162   //     count   -  element count
1163   //     tmp     - scratch register
1164   //
1165   //     Destroy no registers!
1166   //
1167   void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
1168     BarrierSet* bs = Universe::heap()->barrier_set();
1169     switch (bs->kind()) {
1170       case BarrierSet::G1SATBCT:
1171       case BarrierSet::G1SATBCTLogging:
1172         // With G1, don't generate the call if we statically know that the target in uninitialized
1173         if (!dest_uninitialized) {
1174            __ pusha();                      // push registers
1175            if (count == c_rarg0) {
1176              if (addr == c_rarg1) {
1177                // exactly backwards!!
1178                __ xchgptr(c_rarg1, c_rarg0);
1179              } else {
1180                __ movptr(c_rarg1, count);
1181                __ movptr(c_rarg0, addr);
1182              }

1183            } else {
1184              __ movptr(c_rarg0, addr);
1185              __ movptr(c_rarg1, count);
1186            }
1187            __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
1188            __ popa();
1189         }
1190          break;
1191       case BarrierSet::CardTableModRef:
1192       case BarrierSet::CardTableExtension:
1193       case BarrierSet::ModRef:
1194         break;
1195       default:
1196         ShouldNotReachHere();
1197 
1198     }
1199   }
1200 
1201   //
1202   // Generate code for an array write post barrier


1753 
1754   // Arguments:
1755   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1756   //             ignored
1757   //   is_oop  - true => oop array, so generate store check code
1758   //   name    - stub name string
1759   //
1760   // Inputs:
1761   //   c_rarg0   - source array address
1762   //   c_rarg1   - destination array address
1763   //   c_rarg2   - element count, treated as ssize_t, can be zero
1764   //
1765   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1766   // the hardware handle it.  The two dwords within qwords that span
1767   // cache line boundaries will still be loaded and stored atomicly.
1768   //
1769   // Side Effects:
1770   //   disjoint_int_copy_entry is set to the no-overlap entry point
1771   //   used by generate_conjoint_int_oop_copy().
1772   //
1773   address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
1774                                          const char *name, bool dest_uninitialized = false) {
1775     __ align(CodeEntryAlignment);
1776     StubCodeMark mark(this, "StubRoutines", name);
1777     address start = __ pc();
1778 
1779     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1780     const Register from        = rdi;  // source array address
1781     const Register to          = rsi;  // destination array address
1782     const Register count       = rdx;  // elements count
1783     const Register dword_count = rcx;
1784     const Register qword_count = count;
1785     const Register end_from    = from; // source array end address
1786     const Register end_to      = to;   // destination array end address
1787     const Register saved_to    = r11;  // saved destination array address
1788     // End pointers are inclusive, and if count is not zero they point
1789     // to the last unit copied:  end_to[0] := end_from[0]
1790 
1791     __ enter(); // required for proper stackwalking of RuntimeStub frame
1792     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1793 
1794     if (entry != NULL) {
1795       *entry = __ pc();
1796       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1797       BLOCK_COMMENT("Entry:");
1798     }
1799 
1800     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1801                       // r9 and r10 may be used to save non-volatile registers
1802     if (is_oop) {
1803       __ movq(saved_to, to);
1804       gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
1805     }
1806 
1807     // 'from', 'to' and 'count' are now valid
1808     __ movptr(dword_count, count);
1809     __ shrptr(count, 1); // count => qword_count
1810 
1811     // Copy from low to high addresses.  Use 'to' as scratch.
1812     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1813     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1814     __ negptr(qword_count);
1815     __ jmp(L_copy_32_bytes);
1816 
1817     // Copy trailing qwords
1818   __ BIND(L_copy_8_bytes);
1819     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1820     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1821     __ increment(qword_count);
1822     __ jcc(Assembler::notZero, L_copy_8_bytes);
1823 
1824     // Check for and copy trailing dword


1845 
1846     return start;
1847   }
1848 
1849   // Arguments:
1850   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1851   //             ignored
1852   //   is_oop  - true => oop array, so generate store check code
1853   //   name    - stub name string
1854   //
1855   // Inputs:
1856   //   c_rarg0   - source array address
1857   //   c_rarg1   - destination array address
1858   //   c_rarg2   - element count, treated as ssize_t, can be zero
1859   //
1860   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1861   // the hardware handle it.  The two dwords within qwords that span
1862   // cache line boundaries will still be loaded and stored atomicly.
1863   //
1864   address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1865                                          address *entry, const char *name,
1866                                          bool dest_uninitialized = false) {
1867     __ align(CodeEntryAlignment);
1868     StubCodeMark mark(this, "StubRoutines", name);
1869     address start = __ pc();
1870 
1871     Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1872     const Register from        = rdi;  // source array address
1873     const Register to          = rsi;  // destination array address
1874     const Register count       = rdx;  // elements count
1875     const Register dword_count = rcx;
1876     const Register qword_count = count;
1877 
1878     __ enter(); // required for proper stackwalking of RuntimeStub frame
1879     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1880 
1881     if (entry != NULL) {
1882       *entry = __ pc();
1883        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1884       BLOCK_COMMENT("Entry:");
1885     }
1886 
1887     array_overlap_test(nooverlap_target, Address::times_4);
1888     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1889                       // r9 and r10 may be used to save non-volatile registers
1890 
1891     if (is_oop) {
1892       // no registers are destroyed by this call
1893       gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
1894     }
1895 
1896     assert_clean_int(count, rax); // Make sure 'count' is clean int.
1897     // 'from', 'to' and 'count' are now valid
1898     __ movptr(dword_count, count);
1899     __ shrptr(count, 1); // count => qword_count
1900 
1901     // Copy from high to low addresses.  Use 'to' as scratch.
1902 
1903     // Check for and copy trailing dword
1904     __ testl(dword_count, 1);
1905     __ jcc(Assembler::zero, L_copy_32_bytes);
1906     __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1907     __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1908     __ jmp(L_copy_32_bytes);
1909 
1910     // Copy trailing qwords
1911   __ BIND(L_copy_8_bytes);
1912     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
1913     __ movq(Address(to, qword_count, Address::times_8, -8), rax);


1939     __ ret(0);
1940 
1941     return start;
1942   }
1943 
1944   // Arguments:
1945   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
1946   //             ignored
1947   //   is_oop  - true => oop array, so generate store check code
1948   //   name    - stub name string
1949   //
1950   // Inputs:
1951   //   c_rarg0   - source array address
1952   //   c_rarg1   - destination array address
1953   //   c_rarg2   - element count, treated as ssize_t, can be zero
1954   //
1955  // Side Effects:
1956   //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1957   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
1958   //
1959   address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
1960                                           const char *name, bool dest_uninitialized = false) {
1961     __ align(CodeEntryAlignment);
1962     StubCodeMark mark(this, "StubRoutines", name);
1963     address start = __ pc();
1964 
1965     Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
1966     const Register from        = rdi;  // source array address
1967     const Register to          = rsi;  // destination array address
1968     const Register qword_count = rdx;  // elements count
1969     const Register end_from    = from; // source array end address
1970     const Register end_to      = rcx;  // destination array end address
1971     const Register saved_to    = to;
1972     // End pointers are inclusive, and if count is not zero they point
1973     // to the last unit copied:  end_to[0] := end_from[0]
1974 
1975     __ enter(); // required for proper stackwalking of RuntimeStub frame
1976     // Save no-overlap entry point for generate_conjoint_long_oop_copy()
1977     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1978 
1979     if (entry != NULL) {
1980       *entry = __ pc();
1981       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1982       BLOCK_COMMENT("Entry:");
1983     }
1984 
1985     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1986                       // r9 and r10 may be used to save non-volatile registers
1987     // 'from', 'to' and 'qword_count' are now valid
1988     if (is_oop) {
1989       // no registers are destroyed by this call
1990       gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
1991     }
1992 
1993     // Copy from low to high addresses.  Use 'to' as scratch.
1994     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1995     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1996     __ negptr(qword_count);
1997     __ jmp(L_copy_32_bytes);
1998 
1999     // Copy trailing qwords
2000   __ BIND(L_copy_8_bytes);
2001     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2002     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2003     __ increment(qword_count);
2004     __ jcc(Assembler::notZero, L_copy_8_bytes);
2005 
2006     if (is_oop) {
2007       __ jmp(L_exit);
2008     } else {
2009       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
2010       restore_arg_regs();


2025     }
2026     restore_arg_regs();
2027     __ xorptr(rax, rax); // return 0
2028     __ leave(); // required for proper stackwalking of RuntimeStub frame
2029     __ ret(0);
2030 
2031     return start;
2032   }
2033 
2034   // Arguments:
2035   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2036   //             ignored
2037   //   is_oop  - true => oop array, so generate store check code
2038   //   name    - stub name string
2039   //
2040   // Inputs:
2041   //   c_rarg0   - source array address
2042   //   c_rarg1   - destination array address
2043   //   c_rarg2   - element count, treated as ssize_t, can be zero
2044   //
2045   address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
2046                                           address nooverlap_target, address *entry,
2047                                           const char *name, bool dest_uninitialized = false) {
2048     __ align(CodeEntryAlignment);
2049     StubCodeMark mark(this, "StubRoutines", name);
2050     address start = __ pc();
2051 
2052     Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
2053     const Register from        = rdi;  // source array address
2054     const Register to          = rsi;  // destination array address
2055     const Register qword_count = rdx;  // elements count
2056     const Register saved_count = rcx;
2057 
2058     __ enter(); // required for proper stackwalking of RuntimeStub frame
2059     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
2060 
2061     if (entry != NULL) {
2062       *entry = __ pc();
2063       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2064       BLOCK_COMMENT("Entry:");
2065     }
2066 
2067     array_overlap_test(nooverlap_target, Address::times_8);
2068     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2069                       // r9 and r10 may be used to save non-volatile registers
2070     // 'from', 'to' and 'qword_count' are now valid
2071     if (is_oop) {
2072       // Save to and count for store barrier
2073       __ movptr(saved_count, qword_count);
2074       // No registers are destroyed by this call
2075       gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
2076     }
2077 
2078     __ jmp(L_copy_32_bytes);
2079 
2080     // Copy trailing qwords
2081   __ BIND(L_copy_8_bytes);
2082     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2083     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2084     __ decrement(qword_count);
2085     __ jcc(Assembler::notZero, L_copy_8_bytes);
2086 
2087     if (is_oop) {
2088       __ jmp(L_exit);
2089     } else {
2090       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
2091       restore_arg_regs();
2092       __ xorptr(rax, rax); // return 0
2093       __ leave(); // required for proper stackwalking of RuntimeStub frame
2094       __ ret(0);
2095     }


2134     __ BIND(L_miss);
2135   }
2136 
2137   //
2138   //  Generate checkcasting array copy stub
2139   //
2140   //  Input:
2141   //    c_rarg0   - source array address
2142   //    c_rarg1   - destination array address
2143   //    c_rarg2   - element count, treated as ssize_t, can be zero
2144   //    c_rarg3   - size_t ckoff (super_check_offset)
2145   // not Win64
2146   //    c_rarg4   - oop ckval (super_klass)
2147   // Win64
2148   //    rsp+40    - oop ckval (super_klass)
2149   //
2150   //  Output:
2151   //    rax ==  0  -  success
2152   //    rax == -1^K - failure, where K is partial transfer count
2153   //
2154   address generate_checkcast_copy(const char *name, address *entry,
2155                                   bool dest_uninitialized = false) {
2156 
2157     Label L_load_element, L_store_element, L_do_card_marks, L_done;
2158 
2159     // Input registers (after setup_arg_regs)
2160     const Register from        = rdi;   // source array address
2161     const Register to          = rsi;   // destination array address
2162     const Register length      = rdx;   // elements count
2163     const Register ckoff       = rcx;   // super_check_offset
2164     const Register ckval       = r8;    // super_klass
2165 
2166     // Registers used as temps (r13, r14 are save-on-entry)
2167     const Register end_from    = from;  // source array end address
2168     const Register end_to      = r13;   // destination array end address
2169     const Register count       = rdx;   // -(count_remaining)
2170     const Register r14_length  = r14;   // saved copy of length
2171     // End pointers are inclusive, and if length is not zero they point
2172     // to the last unit copied:  end_to[0] := end_from[0]
2173 
2174     const Register rax_oop    = rax;    // actual oop copied
2175     const Register r11_klass  = r11;    // oop._klass


2229     BLOCK_COMMENT("assert consistent ckoff/ckval");
2230     // The ckoff and ckval must be mutually consistent,
2231     // even though caller generates both.
2232     { Label L;
2233       int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2234                         Klass::super_check_offset_offset_in_bytes());
2235       __ cmpl(ckoff, Address(ckval, sco_offset));
2236       __ jcc(Assembler::equal, L);
2237       __ stop("super_check_offset inconsistent");
2238       __ bind(L);
2239     }
2240 #endif //ASSERT
2241 
2242     // Loop-invariant addresses.  They are exclusive end pointers.
2243     Address end_from_addr(from, length, TIMES_OOP, 0);
2244     Address   end_to_addr(to,   length, TIMES_OOP, 0);
2245     // Loop-variant addresses.  They assume post-incremented count < 0.
2246     Address from_element_addr(end_from, count, TIMES_OOP, 0);
2247     Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
2248 
2249     gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
2250 
2251     // Copy from low to high addresses, indexed from the end of each array.
2252     __ lea(end_from, end_from_addr);
2253     __ lea(end_to,   end_to_addr);
2254     __ movptr(r14_length, length);        // save a copy of the length
2255     assert(length == count, "");          // else fix next line:
2256     __ negptr(count);                     // negate and test the length
2257     __ jcc(Assembler::notZero, L_load_element);
2258 
2259     // Empty array:  Nothing to do.
2260     __ xorptr(rax, rax);                  // return 0 on (trivial) success
2261     __ jmp(L_done);
2262 
2263     // ======== begin loop ========
2264     // (Loop is rotated; its entry is L_load_element.)
2265     // Loop control:
2266     //   for (count = -count; count != 0; count++)
2267     // Base pointers src, dst are biased by 8*(count-1),to last element.
2268     __ align(OptoLoopAlignment);
2269 


2739                                                                             "jshort_disjoint_arraycopy");
2740     StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
2741                                                                             "jshort_arraycopy");
2742 
2743     StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, &entry,
2744                                                                               "jint_disjoint_arraycopy");
2745     StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, entry,
2746                                                                               &entry_jint_arraycopy, "jint_arraycopy");
2747 
2748     StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, &entry,
2749                                                                                "jlong_disjoint_arraycopy");
2750     StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, entry,
2751                                                                                &entry_jlong_arraycopy, "jlong_arraycopy");
2752 
2753 
2754     if (UseCompressedOops) {
2755       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, &entry,
2756                                                                               "oop_disjoint_arraycopy");
2757       StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, entry,
2758                                                                               &entry_oop_arraycopy, "oop_arraycopy");
2759       StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_int_oop_copy(false, true, &entry,
2760                                                                                      "oop_disjoint_arraycopy_uninit",
2761                                                                                      /*dest_uninitialized*/true);
2762       StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_int_oop_copy(false, true, entry,
2763                                                                                      NULL, "oop_arraycopy_uninit",
2764                                                                                      /*dest_uninitialized*/true);
2765     } else {
2766       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, &entry,
2767                                                                                "oop_disjoint_arraycopy");
2768       StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, entry,
2769                                                                                &entry_oop_arraycopy, "oop_arraycopy");
2770       StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_long_oop_copy(false, true, &entry,
2771                                                                                       "oop_disjoint_arraycopy_uninit",
2772                                                                                       /*dest_uninitialized*/true);
2773       StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_long_oop_copy(false, true, entry,
2774                                                                                       NULL, "oop_arraycopy_uninit",
2775                                                                                       /*dest_uninitialized*/true);
2776     }
2777 
2778     StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
2779     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
2780                                                                         /*dest_uninitialized*/true);
2781 
2782     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
2783                                                               entry_jbyte_arraycopy,
2784                                                               entry_jshort_arraycopy,
2785                                                               entry_jint_arraycopy,
2786                                                               entry_jlong_arraycopy);
2787     StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
2788                                                                entry_jbyte_arraycopy,
2789                                                                entry_jshort_arraycopy,
2790                                                                entry_jint_arraycopy,
2791                                                                entry_oop_arraycopy,
2792                                                                entry_jlong_arraycopy,
2793                                                                entry_checkcast_arraycopy);
2794 
2795     StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
2796     StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
2797     StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
2798     StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
2799     StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
2800     StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
2801 
2802     // We don't generate specialized code for HeapWord-aligned source
2803     // arrays, so just use the code we've already generated
2804     StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
2805     StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
2806 
2807     StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
2808     StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
2809 
2810     StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
2811     StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
2812 
2813     StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
2814     StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
2815 
2816     StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
2817     StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
2818 
2819     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
2820     StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
2821   }
2822 
2823   void generate_math_stubs() {
2824     {
2825       StubCodeMark mark(this, "StubRoutines", "log");
2826       StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
2827 
2828       __ subq(rsp, 8);
2829       __ movdbl(Address(rsp, 0), xmm0);
2830       __ fld_d(Address(rsp, 0));
2831       __ flog();
2832       __ fstp_d(Address(rsp, 0));
2833       __ movdbl(xmm0, Address(rsp, 0));
2834       __ addq(rsp, 8);
2835       __ ret(0);
2836     }
2837     {
2838       StubCodeMark mark(this, "StubRoutines", "log10");
2839       StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
2840 


src/cpu/x86/vm/stubGenerator_x86_64.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File