src/cpu/x86/vm/stubGenerator_x86_64.cpp

Print this page
rev 4534 : 8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
Summary: Changed gen_write_ref_array_post_barrier() code on x64 to pass start address and number of copied oop elements. In generate_checkcast_copy() skip post barrier code if no elements are copied.
Reviewed-by: roland


1228            }
1229            __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
1230            __ popa();
1231         }
1232          break;
1233       case BarrierSet::CardTableModRef:
1234       case BarrierSet::CardTableExtension:
1235       case BarrierSet::ModRef:
1236         break;
1237       default:
1238         ShouldNotReachHere();
1239 
1240     }
1241   }
1242 
1243   //
1244   // Generate code for an array write post barrier
1245   //
1246   //  Input:
1247   //     start    - register containing starting address of destination array
1248   //     end      - register containing ending address of destination array
1249   //     scratch  - scratch register
1250   //
1251   //  The input registers are overwritten.
1252   //  The ending address is inclusive.
1253   void  gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) {
1254     assert_different_registers(start, end, scratch);
1255     BarrierSet* bs = Universe::heap()->barrier_set();
1256     switch (bs->kind()) {
1257       case BarrierSet::G1SATBCT:
1258       case BarrierSet::G1SATBCTLogging:
1259 
1260         {
1261           __ pusha();                      // push registers (overkill)
1262           // must compute element count unless barrier set interface is changed (other platforms supply count)
1263           assert_different_registers(start, end, scratch);
1264           __ lea(scratch, Address(end, BytesPerHeapOop));
1265           __ subptr(scratch, start);               // subtract start to get #bytes
1266           __ shrptr(scratch, LogBytesPerHeapOop);  // convert to element count
1267           __ mov(c_rarg0, start);
1268           __ mov(c_rarg1, scratch);




1269           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
1270           __ popa();
1271         }
1272         break;
1273       case BarrierSet::CardTableModRef:
1274       case BarrierSet::CardTableExtension:
1275         {
1276           CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1277           assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1278 
1279           Label L_loop;

1280 


1281            __ shrptr(start, CardTableModRefBS::card_shift);
1282            __ addptr(end, BytesPerHeapOop);
1283            __ shrptr(end, CardTableModRefBS::card_shift);
1284            __ subptr(end, start); // number of bytes to copy
1285 
1286           intptr_t disp = (intptr_t) ct->byte_map_base;
1287           if (Assembler::is_simm32(disp)) {
1288             Address cardtable(noreg, noreg, Address::no_scale, disp);
1289             __ lea(scratch, cardtable);
1290           } else {
1291             ExternalAddress cardtable((address)disp);
1292             __ lea(scratch, cardtable);
1293           }
1294 
1295           const Register count = end; // 'end' register contains bytes count now
1296           __ addptr(start, scratch);
1297         __ BIND(L_loop);
1298           __ movb(Address(start, count, Address::times_1), 0);
1299           __ decrement(count);
1300           __ jcc(Assembler::greaterEqual, L_loop);
1301         }
1302         break;
1303       default:
1304         ShouldNotReachHere();
1305 
1306     }
1307   }
1308 
1309 
1310   // Copy big chunks forward
1311   //
1312   // Inputs:
1313   //   end_from     - source arrays end address
1314   //   end_to       - destination array end address
1315   //   qword_count  - 64-bits element count, negative


1927     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1928     __ negptr(qword_count);
1929     __ jmp(L_copy_bytes);
1930 
1931     // Copy trailing qwords
1932   __ BIND(L_copy_8_bytes);
1933     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1934     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1935     __ increment(qword_count);
1936     __ jcc(Assembler::notZero, L_copy_8_bytes);
1937 
1938     // Check for and copy trailing dword
1939   __ BIND(L_copy_4_bytes);
1940     __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
1941     __ jccb(Assembler::zero, L_exit);
1942     __ movl(rax, Address(end_from, 8));
1943     __ movl(Address(end_to, 8), rax);
1944 
1945   __ BIND(L_exit);
1946     if (is_oop) {
1947       __ leaq(end_to, Address(saved_to, dword_count, Address::times_4, -4));
1948       gen_write_ref_array_post_barrier(saved_to, end_to, rax);
1949     }
1950     restore_arg_regs();
1951     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1952     __ xorptr(rax, rax); // return 0
1953     __ leave(); // required for proper stackwalking of RuntimeStub frame
1954     __ ret(0);
1955 
1956     // Copy in multi-bytes chunks
1957     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1958     __ jmp(L_copy_4_bytes);
1959 
1960     return start;
1961   }
1962 
1963   // Arguments:
1964   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1965   //             ignored
1966   //   is_oop  - true => oop array, so generate store check code
1967   //   name    - stub name string
1968   //


2023 
2024     // Copy trailing qwords
2025   __ BIND(L_copy_8_bytes);
2026     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2027     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2028     __ decrement(qword_count);
2029     __ jcc(Assembler::notZero, L_copy_8_bytes);
2030 
2031     if (is_oop) {
2032       __ jmp(L_exit);
2033     }
2034     restore_arg_regs();
2035     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
2036     __ xorptr(rax, rax); // return 0
2037     __ leave(); // required for proper stackwalking of RuntimeStub frame
2038     __ ret(0);
2039 
2040     // Copy in multi-bytes chunks
2041     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2042 
2043    __ bind(L_exit);
2044      if (is_oop) {
2045        Register end_to = rdx;
2046        __ leaq(end_to, Address(to, dword_count, Address::times_4, -4));
2047        gen_write_ref_array_post_barrier(to, end_to, rax);
2048      }
2049     restore_arg_regs();
2050     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
2051     __ xorptr(rax, rax); // return 0
2052     __ leave(); // required for proper stackwalking of RuntimeStub frame
2053     __ ret(0);
2054 
2055     return start;
2056   }
2057 
2058   // Arguments:
2059   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2060   //             ignored
2061   //   is_oop  - true => oop array, so generate store check code
2062   //   name    - stub name string
2063   //
2064   // Inputs:
2065   //   c_rarg0   - source array address
2066   //   c_rarg1   - destination array address
2067   //   c_rarg2   - element count, treated as ssize_t, can be zero
2068   //
2069  // Side Effects:
2070   //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
2071   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
2072   //
2073   address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
2074                                           const char *name, bool dest_uninitialized = false) {
2075     __ align(CodeEntryAlignment);
2076     StubCodeMark mark(this, "StubRoutines", name);
2077     address start = __ pc();
2078 
2079     Label L_copy_bytes, L_copy_8_bytes, L_exit;
2080     const Register from        = rdi;  // source array address
2081     const Register to          = rsi;  // destination array address
2082     const Register qword_count = rdx;  // elements count
2083     const Register end_from    = from; // source array end address
2084     const Register end_to      = rcx;  // destination array end address
2085     const Register saved_to    = to;

2086     // End pointers are inclusive, and if count is not zero they point
2087     // to the last unit copied:  end_to[0] := end_from[0]
2088 
2089     __ enter(); // required for proper stackwalking of RuntimeStub frame
2090     // Save no-overlap entry point for generate_conjoint_long_oop_copy()
2091     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
2092 
2093     if (entry != NULL) {
2094       *entry = __ pc();
2095       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2096       BLOCK_COMMENT("Entry:");
2097     }
2098 
2099     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2100                       // r9 and r10 may be used to save non-volatile registers
2101     // 'from', 'to' and 'qword_count' are now valid
2102     if (is_oop) {


2103       // no registers are destroyed by this call
2104       gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
2105     }
2106 
2107     // Copy from low to high addresses.  Use 'to' as scratch.
2108     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
2109     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
2110     __ negptr(qword_count);
2111     __ jmp(L_copy_bytes);
2112 
2113     // Copy trailing qwords
2114   __ BIND(L_copy_8_bytes);
2115     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2116     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2117     __ increment(qword_count);
2118     __ jcc(Assembler::notZero, L_copy_8_bytes);
2119 
2120     if (is_oop) {
2121       __ jmp(L_exit);
2122     } else {
2123       restore_arg_regs();
2124       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2125       __ xorptr(rax, rax); // return 0
2126       __ leave(); // required for proper stackwalking of RuntimeStub frame
2127       __ ret(0);
2128     }
2129 
2130     // Copy in multi-bytes chunks
2131     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2132 
2133     if (is_oop) {
2134     __ BIND(L_exit);
2135       gen_write_ref_array_post_barrier(saved_to, end_to, rax);
2136     }
2137     restore_arg_regs();
2138     if (is_oop) {
2139       inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2140     } else {
2141       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2142     }
2143     __ xorptr(rax, rax); // return 0
2144     __ leave(); // required for proper stackwalking of RuntimeStub frame
2145     __ ret(0);
2146 
2147     return start;
2148   }
2149 
2150   // Arguments:
2151   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2152   //             ignored
2153   //   is_oop  - true => oop array, so generate store check code
2154   //   name    - stub name string
2155   //


2198     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2199     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2200     __ decrement(qword_count);
2201     __ jcc(Assembler::notZero, L_copy_8_bytes);
2202 
2203     if (is_oop) {
2204       __ jmp(L_exit);
2205     } else {
2206       restore_arg_regs();
2207       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2208       __ xorptr(rax, rax); // return 0
2209       __ leave(); // required for proper stackwalking of RuntimeStub frame
2210       __ ret(0);
2211     }
2212 
2213     // Copy in multi-bytes chunks
2214     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2215 
2216     if (is_oop) {
2217     __ BIND(L_exit);
2218       __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
2219       gen_write_ref_array_post_barrier(to, rcx, rax);
2220     }
2221     restore_arg_regs();
2222     if (is_oop) {
2223       inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2224     } else {
2225       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2226     }
2227     __ xorptr(rax, rax); // return 0
2228     __ leave(); // required for proper stackwalking of RuntimeStub frame
2229     __ ret(0);
2230 
2231     return start;
2232   }
2233 
2234 
2235   // Helper for generating a dynamic type check.
2236   // Smashes no registers.
2237   void generate_type_check(Register sub_klass,
2238                            Register super_check_offset,
2239                            Register super_klass,


2386 
2387     __ BIND(L_store_element);
2388     __ store_heap_oop(to_element_addr, rax_oop);  // store the oop
2389     __ increment(count);               // increment the count toward zero
2390     __ jcc(Assembler::zero, L_do_card_marks);
2391 
2392     // ======== loop entry is here ========
2393     __ BIND(L_load_element);
2394     __ load_heap_oop(rax_oop, from_element_addr); // load the oop
2395     __ testptr(rax_oop, rax_oop);
2396     __ jcc(Assembler::zero, L_store_element);
2397 
2398     __ load_klass(r11_klass, rax_oop);// query the object klass
2399     generate_type_check(r11_klass, ckoff, ckval, L_store_element);
2400     // ======== end loop ========
2401 
2402     // It was a real error; we must depend on the caller to finish the job.
2403     // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
2404     // Emit GC store barriers for the oops we have copied (r14 + rdx),
2405     // and report their number to the caller.
2406     assert_different_registers(rax, r14_length, count, to, end_to, rcx);
2407     __ lea(end_to, to_element_addr);
2408     __ addptr(end_to, -heapOopSize);      // make an inclusive end pointer
2409     gen_write_ref_array_post_barrier(to, end_to, rscratch1);
2410     __ movptr(rax, r14_length);           // original oops
2411     __ addptr(rax, count);                // K = (original - remaining) oops
2412     __ notptr(rax);                       // report (-1^K) to caller
2413     __ jmp(L_done);
2414 
2415     // Come here on success only.
2416     __ BIND(L_do_card_marks);
2417     __ addptr(end_to, -heapOopSize);         // make an inclusive end pointer
2418     gen_write_ref_array_post_barrier(to, end_to, rscratch1);
2419     __ xorptr(rax, rax);                  // return 0 on success
2420 



2421     // Common exit point (success or failure).
2422     __ BIND(L_done);
2423     __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
2424     __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
2425     restore_arg_regs();
2426     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
2427     __ leave(); // required for proper stackwalking of RuntimeStub frame
2428     __ ret(0);
2429 
2430     return start;
2431   }
2432 
2433   //
2434   //  Generate 'unsafe' array copy stub
2435   //  Though just as safe as the other stubs, it takes an unscaled
2436   //  size_t argument instead of an element count.
2437   //
2438   //  Input:
2439   //    c_rarg0   - source array address
2440   //    c_rarg1   - destination array address




1228            }
1229            __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
1230            __ popa();
1231         }
1232          break;
1233       case BarrierSet::CardTableModRef:
1234       case BarrierSet::CardTableExtension:
1235       case BarrierSet::ModRef:
1236         break;
1237       default:
1238         ShouldNotReachHere();
1239 
1240     }
1241   }
1242 
1243   //
1244   // Generate code for an array write post barrier
1245   //
1246   //  Input:
1247   //     start    - register containing starting address of destination array
1248   //     count    - elements count
1249   //     scratch  - scratch register
1250   //
1251   //  The input registers are overwritten.
1252   //
1253   void  gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
1254     assert_different_registers(start, count, scratch);
1255     BarrierSet* bs = Universe::heap()->barrier_set();
1256     switch (bs->kind()) {
1257       case BarrierSet::G1SATBCT:
1258       case BarrierSet::G1SATBCTLogging:

1259         {
1260           __ pusha();             // push registers (overkill)
1261           if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
1262             assert_different_registers(c_rarg1, start);
1263             __ mov(c_rarg1, count);


1264             __ mov(c_rarg0, start);
1265           } else {
1266             assert_different_registers(c_rarg0, count);
1267             __ mov(c_rarg0, start);
1268             __ mov(c_rarg1, count);
1269           }
1270           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
1271           __ popa();
1272         }
1273         break;
1274       case BarrierSet::CardTableModRef:
1275       case BarrierSet::CardTableExtension:
1276         {
1277           CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1278           assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1279 
1280           Label L_loop;
1281           const Register end = count;
1282 
1283           __ leaq(end, Address(start, count, TIMES_OOP, 0));  // end == start+count*oop_size
1284           __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
1285           __ shrptr(start, CardTableModRefBS::card_shift);

1286           __ shrptr(end,   CardTableModRefBS::card_shift);
1287           __ subptr(end, start); // end --> cards count
1288 
1289           int64_t disp = (int64_t) ct->byte_map_base;
1290           __ mov64(scratch, disp);








1291           __ addptr(start, scratch);
1292         __ BIND(L_loop);
1293           __ movb(Address(start, count, Address::times_1), 0);
1294           __ decrement(count);
1295           __ jcc(Assembler::greaterEqual, L_loop);
1296         }
1297         break;
1298       default:
1299         ShouldNotReachHere();
1300 
1301     }
1302   }
1303 
1304 
1305   // Copy big chunks forward
1306   //
1307   // Inputs:
1308   //   end_from     - source arrays end address
1309   //   end_to       - destination array end address
1310   //   qword_count  - 64-bits element count, negative


1922     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1923     __ negptr(qword_count);
1924     __ jmp(L_copy_bytes);
1925 
1926     // Copy trailing qwords
1927   __ BIND(L_copy_8_bytes);
1928     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1929     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1930     __ increment(qword_count);
1931     __ jcc(Assembler::notZero, L_copy_8_bytes);
1932 
1933     // Check for and copy trailing dword
1934   __ BIND(L_copy_4_bytes);
1935     __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
1936     __ jccb(Assembler::zero, L_exit);
1937     __ movl(rax, Address(end_from, 8));
1938     __ movl(Address(end_to, 8), rax);
1939 
1940   __ BIND(L_exit);
1941     if (is_oop) {
1942       gen_write_ref_array_post_barrier(saved_to, dword_count, rax);

1943     }
1944     restore_arg_regs();
1945     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1946     __ xorptr(rax, rax); // return 0
1947     __ leave(); // required for proper stackwalking of RuntimeStub frame
1948     __ ret(0);
1949 
1950     // Copy in multi-bytes chunks
1951     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1952     __ jmp(L_copy_4_bytes);
1953 
1954     return start;
1955   }
1956 
1957   // Arguments:
1958   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1959   //             ignored
1960   //   is_oop  - true => oop array, so generate store check code
1961   //   name    - stub name string
1962   //


2017 
2018     // Copy trailing qwords
2019   __ BIND(L_copy_8_bytes);
2020     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2021     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2022     __ decrement(qword_count);
2023     __ jcc(Assembler::notZero, L_copy_8_bytes);
2024 
2025     if (is_oop) {
2026       __ jmp(L_exit);
2027     }
2028     restore_arg_regs();
2029     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
2030     __ xorptr(rax, rax); // return 0
2031     __ leave(); // required for proper stackwalking of RuntimeStub frame
2032     __ ret(0);
2033 
2034     // Copy in multi-bytes chunks
2035     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2036 
2037   __ BIND(L_exit);
2038     if (is_oop) {
2039       gen_write_ref_array_post_barrier(to, dword_count, rax);


2040     }
2041     restore_arg_regs();
2042     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
2043     __ xorptr(rax, rax); // return 0
2044     __ leave(); // required for proper stackwalking of RuntimeStub frame
2045     __ ret(0);
2046 
2047     return start;
2048   }
2049 
2050   // Arguments:
2051   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2052   //             ignored
2053   //   is_oop  - true => oop array, so generate store check code
2054   //   name    - stub name string
2055   //
2056   // Inputs:
2057   //   c_rarg0   - source array address
2058   //   c_rarg1   - destination array address
2059   //   c_rarg2   - element count, treated as ssize_t, can be zero
2060   //
2061  // Side Effects:
2062   //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
2063   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
2064   //
2065   address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
2066                                           const char *name, bool dest_uninitialized = false) {
2067     __ align(CodeEntryAlignment);
2068     StubCodeMark mark(this, "StubRoutines", name);
2069     address start = __ pc();
2070 
2071     Label L_copy_bytes, L_copy_8_bytes, L_exit;
2072     const Register from        = rdi;  // source array address
2073     const Register to          = rsi;  // destination array address
2074     const Register qword_count = rdx;  // elements count
2075     const Register end_from    = from; // source array end address
2076     const Register end_to      = rcx;  // destination array end address
2077     const Register saved_to    = to;
2078     const Register saved_count = r11;
2079     // End pointers are inclusive, and if count is not zero they point
2080     // to the last unit copied:  end_to[0] := end_from[0]
2081 
2082     __ enter(); // required for proper stackwalking of RuntimeStub frame
2083     // Save no-overlap entry point for generate_conjoint_long_oop_copy()
2084     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
2085 
2086     if (entry != NULL) {
2087       *entry = __ pc();
2088       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2089       BLOCK_COMMENT("Entry:");
2090     }
2091 
2092     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2093                       // r9 and r10 may be used to save non-volatile registers
2094     // 'from', 'to' and 'qword_count' are now valid
2095     if (is_oop) {
2096       // Save to and count for store barrier
2097       __ movptr(saved_count, qword_count);
2098       // no registers are destroyed by this call
2099       gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
2100     }
2101 
2102     // Copy from low to high addresses.  Use 'to' as scratch.
2103     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
2104     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
2105     __ negptr(qword_count);
2106     __ jmp(L_copy_bytes);
2107 
2108     // Copy trailing qwords
2109   __ BIND(L_copy_8_bytes);
2110     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2111     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2112     __ increment(qword_count);
2113     __ jcc(Assembler::notZero, L_copy_8_bytes);
2114 
2115     if (is_oop) {
2116       __ jmp(L_exit);
2117     } else {
2118       restore_arg_regs();
2119       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2120       __ xorptr(rax, rax); // return 0
2121       __ leave(); // required for proper stackwalking of RuntimeStub frame
2122       __ ret(0);
2123     }
2124 
2125     // Copy in multi-bytes chunks
2126     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2127 
2128     if (is_oop) {
2129     __ BIND(L_exit);
2130       gen_write_ref_array_post_barrier(saved_to, saved_count, rax);
2131     }
2132     restore_arg_regs();
2133     if (is_oop) {
2134       inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2135     } else {
2136       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2137     }
2138     __ xorptr(rax, rax); // return 0
2139     __ leave(); // required for proper stackwalking of RuntimeStub frame
2140     __ ret(0);
2141 
2142     return start;
2143   }
2144 
2145   // Arguments:
2146   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2147   //             ignored
2148   //   is_oop  - true => oop array, so generate store check code
2149   //   name    - stub name string
2150   //


2193     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2194     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2195     __ decrement(qword_count);
2196     __ jcc(Assembler::notZero, L_copy_8_bytes);
2197 
2198     if (is_oop) {
2199       __ jmp(L_exit);
2200     } else {
2201       restore_arg_regs();
2202       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2203       __ xorptr(rax, rax); // return 0
2204       __ leave(); // required for proper stackwalking of RuntimeStub frame
2205       __ ret(0);
2206     }
2207 
2208     // Copy in multi-bytes chunks
2209     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2210 
2211     if (is_oop) {
2212     __ BIND(L_exit);
2213       gen_write_ref_array_post_barrier(to, saved_count, rax);

2214     }
2215     restore_arg_regs();
2216     if (is_oop) {
2217       inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2218     } else {
2219       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2220     }
2221     __ xorptr(rax, rax); // return 0
2222     __ leave(); // required for proper stackwalking of RuntimeStub frame
2223     __ ret(0);
2224 
2225     return start;
2226   }
2227 
2228 
2229   // Helper for generating a dynamic type check.
2230   // Smashes no registers.
2231   void generate_type_check(Register sub_klass,
2232                            Register super_check_offset,
2233                            Register super_klass,


2380 
2381     __ BIND(L_store_element);
2382     __ store_heap_oop(to_element_addr, rax_oop);  // store the oop
2383     __ increment(count);               // increment the count toward zero
2384     __ jcc(Assembler::zero, L_do_card_marks);
2385 
2386     // ======== loop entry is here ========
2387     __ BIND(L_load_element);
2388     __ load_heap_oop(rax_oop, from_element_addr); // load the oop
2389     __ testptr(rax_oop, rax_oop);
2390     __ jcc(Assembler::zero, L_store_element);
2391 
2392     __ load_klass(r11_klass, rax_oop);// query the object klass
2393     generate_type_check(r11_klass, ckoff, ckval, L_store_element);
2394     // ======== end loop ========
2395 
2396     // It was a real error; we must depend on the caller to finish the job.
2397     // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
2398     // Emit GC store barriers for the oops we have copied (r14 + rdx),
2399     // and report their number to the caller.
2400     assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
2401     Label L_post_barrier;
2402     __ addptr(r14_length, count);     // K = (original - remaining) oops
2403     __ movptr(rax, r14_length);       // save the value
2404     __ notptr(rax);                   // report (-1^K) to caller (does not affect flags)
2405     __ jccb(Assembler::notZero, L_post_barrier);
2406     __ jmp(L_done); // K == 0, nothing was copied, skip post barrier

2407 
2408     // Come here on success only.
2409     __ BIND(L_do_card_marks);


2410     __ xorptr(rax, rax);              // return 0 on success
2411 
2412     __ BIND(L_post_barrier);
2413     gen_write_ref_array_post_barrier(to, r14_length, rscratch1);
2414 
2415     // Common exit point (success or failure).
2416     __ BIND(L_done);
2417     __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
2418     __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
2419     restore_arg_regs();
2420     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
2421     __ leave(); // required for proper stackwalking of RuntimeStub frame
2422     __ ret(0);
2423 
2424     return start;
2425   }
2426 
2427   //
2428   //  Generate 'unsafe' array copy stub
2429   //  Though just as safe as the other stubs, it takes an unscaled
2430   //  size_t argument instead of an element count.
2431   //
2432   //  Input:
2433   //    c_rarg0   - source array address
2434   //    c_rarg1   - destination array address