1269 __ BIND(L_loop); 1270 __ movb(Address(start, count, Address::times_1), 0); 1271 __ decrement(count); 1272 __ jcc(Assembler::greaterEqual, L_loop); 1273 } 1274 break; 1275 default: 1276 ShouldNotReachHere(); 1277 1278 } 1279 } 1280 1281 1282 // Copy big chunks forward 1283 // 1284 // Inputs: 1285 // end_from - source arrays end address 1286 // end_to - destination array end address 1287 // qword_count - 64-bits element count, negative 1288 // to - scratch 1289 // L_copy_32_bytes - entry label 1290 // L_copy_8_bytes - exit label 1291 // 1292 void copy_32_bytes_forward(Register end_from, Register end_to, 1293 Register qword_count, Register to, 1294 Label& L_copy_32_bytes, Label& L_copy_8_bytes) { 1295 DEBUG_ONLY(__ stop("enter at entry label, not here")); 1296 Label L_loop; 1297 __ align(OptoLoopAlignment); 1298 __ BIND(L_loop); 1299 if(UseUnalignedLoadStores) { 1300 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); 1301 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); 1302 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); 1303 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); 1304 1305 } else { 1306 __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); 1307 __ movq(Address(end_to, qword_count, Address::times_8, -24), to); 1308 __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); 1309 __ movq(Address(end_to, qword_count, Address::times_8, -16), to); 1310 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); 1311 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); 1312 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); 1313 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); 1314 } 1315 __ BIND(L_copy_32_bytes); 1316 __ addptr(qword_count, 4); 1317 __ jcc(Assembler::lessEqual, L_loop); 1318 __ subptr(qword_count, 4); 1319 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords 1320 } 1321 1322 1323 // Copy big chunks backward 1324 // 1325 // Inputs: 1326 // from - source arrays address 1327 // dest - destination array address 1328 // qword_count - 64-bits element count 1329 // to - scratch 1330 // L_copy_32_bytes - entry label 1331 // L_copy_8_bytes - exit label 1332 // 1333 void copy_32_bytes_backward(Register from, Register dest, 1334 Register qword_count, Register to, 1335 Label& L_copy_32_bytes, Label& L_copy_8_bytes) { 1336 DEBUG_ONLY(__ stop("enter at entry label, not here")); 1337 Label L_loop; 1338 __ align(OptoLoopAlignment); 1339 __ BIND(L_loop); 1340 if(UseUnalignedLoadStores) { 1341 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); 1342 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); 1343 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); 1344 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); 1345 1346 } else { 1347 __ movq(to, Address(from, qword_count, Address::times_8, 24)); 1348 __ movq(Address(dest, qword_count, Address::times_8, 24), to); 1349 __ movq(to, Address(from, qword_count, Address::times_8, 16)); 1350 __ movq(Address(dest, qword_count, Address::times_8, 16), to); 1351 __ movq(to, Address(from, qword_count, Address::times_8, 8)); 1352 __ movq(Address(dest, qword_count, Address::times_8, 8), to); 1353 __ movq(to, Address(from, qword_count, Address::times_8, 0)); 1354 __ movq(Address(dest, qword_count, Address::times_8, 0), to); 1355 } 1356 __ BIND(L_copy_32_bytes); 1357 __ subptr(qword_count, 4); 1358 __ jcc(Assembler::greaterEqual, L_loop); 1359 __ addptr(qword_count, 4); 1360 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords 1361 } 1362 1363 1364 // Arguments: 1365 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1366 // ignored 1367 // name - stub name string 1368 // 1369 // Inputs: 1370 // c_rarg0 - source array address 1371 // c_rarg1 - destination array address 1372 // c_rarg2 - element count, treated as ssize_t, can be zero 1373 // 1374 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1375 // we let the hardware handle it. The one to eight bytes within words, 1376 // dwords or qwords that span cache line boundaries will still be loaded 1377 // and stored atomically. 1378 // 1379 // Side Effects: 1380 // disjoint_byte_copy_entry is set to the no-overlap entry point 1381 // used by generate_conjoint_byte_copy(). 1382 // 1383 address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) { 1384 __ align(CodeEntryAlignment); 1385 StubCodeMark mark(this, "StubRoutines", name); 1386 address start = __ pc(); 1387 1388 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; 1389 Label L_copy_byte, L_exit; 1390 const Register from = rdi; // source array address 1391 const Register to = rsi; // destination array address 1392 const Register count = rdx; // elements count 1393 const Register byte_count = rcx; 1394 const Register qword_count = count; 1395 const Register end_from = from; // source array end address 1396 const Register end_to = to; // destination array end address 1397 // End pointers are inclusive, and if count is not zero they point 1398 // to the last unit copied: end_to[0] := end_from[0] 1399 1400 __ enter(); // required for proper stackwalking of RuntimeStub frame 1401 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1402 1403 if (entry != NULL) { 1404 *entry = __ pc(); 1405 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1406 BLOCK_COMMENT("Entry:"); 1407 } 1408 1409 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1410 // r9 and r10 may be used to save non-volatile registers 1411 1412 // 'from', 'to' and 'count' are now valid 1413 __ movptr(byte_count, count); 1414 __ shrptr(count, 3); // count => qword_count 1415 1416 // Copy from low to high addresses. Use 'to' as scratch. 1417 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 1418 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 1419 __ negptr(qword_count); // make the count negative 1420 __ jmp(L_copy_32_bytes); 1421 1422 // Copy trailing qwords 1423 __ BIND(L_copy_8_bytes); 1424 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 1425 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 1426 __ increment(qword_count); 1427 __ jcc(Assembler::notZero, L_copy_8_bytes); 1428 1429 // Check for and copy trailing dword 1430 __ BIND(L_copy_4_bytes); 1431 __ testl(byte_count, 4); 1432 __ jccb(Assembler::zero, L_copy_2_bytes); 1433 __ movl(rax, Address(end_from, 8)); 1434 __ movl(Address(end_to, 8), rax); 1435 1436 __ addptr(end_from, 4); 1437 __ addptr(end_to, 4); 1438 1439 // Check for and copy trailing word 1440 __ BIND(L_copy_2_bytes); 1443 __ movw(rax, Address(end_from, 8)); 1444 __ movw(Address(end_to, 8), rax); 1445 1446 __ addptr(end_from, 2); 1447 __ addptr(end_to, 2); 1448 1449 // Check for and copy trailing byte 1450 __ BIND(L_copy_byte); 1451 __ testl(byte_count, 1); 1452 __ jccb(Assembler::zero, L_exit); 1453 __ movb(rax, Address(end_from, 8)); 1454 __ movb(Address(end_to, 8), rax); 1455 1456 __ BIND(L_exit); 1457 restore_arg_regs(); 1458 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free 1459 __ xorptr(rax, rax); // return 0 1460 __ leave(); // required for proper stackwalking of RuntimeStub frame 1461 __ ret(0); 1462 1463 // Copy in 32-bytes chunks 1464 copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 1465 __ jmp(L_copy_4_bytes); 1466 1467 return start; 1468 } 1469 1470 // Arguments: 1471 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1472 // ignored 1473 // name - stub name string 1474 // 1475 // Inputs: 1476 // c_rarg0 - source array address 1477 // c_rarg1 - destination array address 1478 // c_rarg2 - element count, treated as ssize_t, can be zero 1479 // 1480 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1481 // we let the hardware handle it. The one to eight bytes within words, 1482 // dwords or qwords that span cache line boundaries will still be loaded 1483 // and stored atomically. 1484 // 1485 address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, 1486 address* entry, const char *name) { 1487 __ align(CodeEntryAlignment); 1488 StubCodeMark mark(this, "StubRoutines", name); 1489 address start = __ pc(); 1490 1491 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; 1492 const Register from = rdi; // source array address 1493 const Register to = rsi; // destination array address 1494 const Register count = rdx; // elements count 1495 const Register byte_count = rcx; 1496 const Register qword_count = count; 1497 1498 __ enter(); // required for proper stackwalking of RuntimeStub frame 1499 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1500 1501 if (entry != NULL) { 1502 *entry = __ pc(); 1503 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1504 BLOCK_COMMENT("Entry:"); 1505 } 1506 1507 array_overlap_test(nooverlap_target, Address::times_1); 1508 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1509 // r9 and r10 may be used to save non-volatile registers 1510 1511 // 'from', 'to' and 'count' are now valid 1514 1515 // Copy from high to low addresses. 1516 1517 // Check for and copy trailing byte 1518 __ testl(byte_count, 1); 1519 __ jcc(Assembler::zero, L_copy_2_bytes); 1520 __ movb(rax, Address(from, byte_count, Address::times_1, -1)); 1521 __ movb(Address(to, byte_count, Address::times_1, -1), rax); 1522 __ decrement(byte_count); // Adjust for possible trailing word 1523 1524 // Check for and copy trailing word 1525 __ BIND(L_copy_2_bytes); 1526 __ testl(byte_count, 2); 1527 __ jcc(Assembler::zero, L_copy_4_bytes); 1528 __ movw(rax, Address(from, byte_count, Address::times_1, -2)); 1529 __ movw(Address(to, byte_count, Address::times_1, -2), rax); 1530 1531 // Check for and copy trailing dword 1532 __ BIND(L_copy_4_bytes); 1533 __ testl(byte_count, 4); 1534 __ jcc(Assembler::zero, L_copy_32_bytes); 1535 __ movl(rax, Address(from, qword_count, Address::times_8)); 1536 __ movl(Address(to, qword_count, Address::times_8), rax); 1537 __ jmp(L_copy_32_bytes); 1538 1539 // Copy trailing qwords 1540 __ BIND(L_copy_8_bytes); 1541 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 1542 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1543 __ decrement(qword_count); 1544 __ jcc(Assembler::notZero, L_copy_8_bytes); 1545 1546 restore_arg_regs(); 1547 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free 1548 __ xorptr(rax, rax); // return 0 1549 __ leave(); // required for proper stackwalking of RuntimeStub frame 1550 __ ret(0); 1551 1552 // Copy in 32-bytes chunks 1553 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 1554 1555 restore_arg_regs(); 1556 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free 1557 __ xorptr(rax, rax); // return 0 1558 __ leave(); // required for proper stackwalking of RuntimeStub frame 1559 __ ret(0); 1560 1561 return start; 1562 } 1563 1564 // Arguments: 1565 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1566 // ignored 1567 // name - stub name string 1568 // 1569 // Inputs: 1570 // c_rarg0 - source array address 1571 // c_rarg1 - destination array address 1572 // c_rarg2 - element count, treated as ssize_t, can be zero 1573 // 1574 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 1575 // let the hardware handle it. The two or four words within dwords 1576 // or qwords that span cache line boundaries will still be loaded 1577 // and stored atomically. 1578 // 1579 // Side Effects: 1580 // disjoint_short_copy_entry is set to the no-overlap entry point 1581 // used by generate_conjoint_short_copy(). 1582 // 1583 address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) { 1584 __ align(CodeEntryAlignment); 1585 StubCodeMark mark(this, "StubRoutines", name); 1586 address start = __ pc(); 1587 1588 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit; 1589 const Register from = rdi; // source array address 1590 const Register to = rsi; // destination array address 1591 const Register count = rdx; // elements count 1592 const Register word_count = rcx; 1593 const Register qword_count = count; 1594 const Register end_from = from; // source array end address 1595 const Register end_to = to; // destination array end address 1596 // End pointers are inclusive, and if count is not zero they point 1597 // to the last unit copied: end_to[0] := end_from[0] 1598 1599 __ enter(); // required for proper stackwalking of RuntimeStub frame 1600 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1601 1602 if (entry != NULL) { 1603 *entry = __ pc(); 1604 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1605 BLOCK_COMMENT("Entry:"); 1606 } 1607 1608 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1609 // r9 and r10 may be used to save non-volatile registers 1610 1611 // 'from', 'to' and 'count' are now valid 1612 __ movptr(word_count, count); 1613 __ shrptr(count, 2); // count => qword_count 1614 1615 // Copy from low to high addresses. Use 'to' as scratch. 1616 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 1617 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 1618 __ negptr(qword_count); 1619 __ jmp(L_copy_32_bytes); 1620 1621 // Copy trailing qwords 1622 __ BIND(L_copy_8_bytes); 1623 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 1624 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 1625 __ increment(qword_count); 1626 __ jcc(Assembler::notZero, L_copy_8_bytes); 1627 1628 // Original 'dest' is trashed, so we can't use it as a 1629 // base register for a possible trailing word copy 1630 1631 // Check for and copy trailing dword 1632 __ BIND(L_copy_4_bytes); 1633 __ testl(word_count, 2); 1634 __ jccb(Assembler::zero, L_copy_2_bytes); 1635 __ movl(rax, Address(end_from, 8)); 1636 __ movl(Address(end_to, 8), rax); 1637 1638 __ addptr(end_from, 4); 1639 __ addptr(end_to, 4); 1640 1641 // Check for and copy trailing word 1642 __ BIND(L_copy_2_bytes); 1643 __ testl(word_count, 1); 1644 __ jccb(Assembler::zero, L_exit); 1645 __ movw(rax, Address(end_from, 8)); 1646 __ movw(Address(end_to, 8), rax); 1647 1648 __ BIND(L_exit); 1649 restore_arg_regs(); 1650 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free 1651 __ xorptr(rax, rax); // return 0 1652 __ leave(); // required for proper stackwalking of RuntimeStub frame 1653 __ ret(0); 1654 1655 // Copy in 32-bytes chunks 1656 copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 1657 __ jmp(L_copy_4_bytes); 1658 1659 return start; 1660 } 1661 1662 address generate_fill(BasicType t, bool aligned, const char *name) { 1663 __ align(CodeEntryAlignment); 1664 StubCodeMark mark(this, "StubRoutines", name); 1665 address start = __ pc(); 1666 1667 BLOCK_COMMENT("Entry:"); 1668 1669 const Register to = c_rarg0; // source array address 1670 const Register value = c_rarg1; // value 1671 const Register count = c_rarg2; // elements count 1672 1673 __ enter(); // required for proper stackwalking of RuntimeStub frame 1674 1675 __ generate_fill(t, aligned, to, value, count, rax, xmm0); 1676 1683 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1684 // ignored 1685 // name - stub name string 1686 // 1687 // Inputs: 1688 // c_rarg0 - source array address 1689 // c_rarg1 - destination array address 1690 // c_rarg2 - element count, treated as ssize_t, can be zero 1691 // 1692 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 1693 // let the hardware handle it. The two or four words within dwords 1694 // or qwords that span cache line boundaries will still be loaded 1695 // and stored atomically. 1696 // 1697 address generate_conjoint_short_copy(bool aligned, address nooverlap_target, 1698 address *entry, const char *name) { 1699 __ align(CodeEntryAlignment); 1700 StubCodeMark mark(this, "StubRoutines", name); 1701 address start = __ pc(); 1702 1703 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes; 1704 const Register from = rdi; // source array address 1705 const Register to = rsi; // destination array address 1706 const Register count = rdx; // elements count 1707 const Register word_count = rcx; 1708 const Register qword_count = count; 1709 1710 __ enter(); // required for proper stackwalking of RuntimeStub frame 1711 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1712 1713 if (entry != NULL) { 1714 *entry = __ pc(); 1715 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1716 BLOCK_COMMENT("Entry:"); 1717 } 1718 1719 array_overlap_test(nooverlap_target, Address::times_2); 1720 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1721 // r9 and r10 may be used to save non-volatile registers 1722 1723 // 'from', 'to' and 'count' are now valid 1724 __ movptr(word_count, count); 1725 __ shrptr(count, 2); // count => qword_count 1726 1727 // Copy from high to low addresses. Use 'to' as scratch. 1728 1729 // Check for and copy trailing word 1730 __ testl(word_count, 1); 1731 __ jccb(Assembler::zero, L_copy_4_bytes); 1732 __ movw(rax, Address(from, word_count, Address::times_2, -2)); 1733 __ movw(Address(to, word_count, Address::times_2, -2), rax); 1734 1735 // Check for and copy trailing dword 1736 __ BIND(L_copy_4_bytes); 1737 __ testl(word_count, 2); 1738 __ jcc(Assembler::zero, L_copy_32_bytes); 1739 __ movl(rax, Address(from, qword_count, Address::times_8)); 1740 __ movl(Address(to, qword_count, Address::times_8), rax); 1741 __ jmp(L_copy_32_bytes); 1742 1743 // Copy trailing qwords 1744 __ BIND(L_copy_8_bytes); 1745 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 1746 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1747 __ decrement(qword_count); 1748 __ jcc(Assembler::notZero, L_copy_8_bytes); 1749 1750 restore_arg_regs(); 1751 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free 1752 __ xorptr(rax, rax); // return 0 1753 __ leave(); // required for proper stackwalking of RuntimeStub frame 1754 __ ret(0); 1755 1756 // Copy in 32-bytes chunks 1757 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 1758 1759 restore_arg_regs(); 1760 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free 1761 __ xorptr(rax, rax); // return 0 1762 __ leave(); // required for proper stackwalking of RuntimeStub frame 1763 __ ret(0); 1764 1765 return start; 1766 } 1767 1768 // Arguments: 1769 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1770 // ignored 1771 // is_oop - true => oop array, so generate store check code 1772 // name - stub name string 1773 // 1774 // Inputs: 1775 // c_rarg0 - source array address 1776 // c_rarg1 - destination array address 1777 // c_rarg2 - element count, treated as ssize_t, can be zero 1778 // 1779 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1780 // the hardware handle it. The two dwords within qwords that span 1781 // cache line boundaries will still be loaded and stored atomicly. 1782 // 1783 // Side Effects: 1784 // disjoint_int_copy_entry is set to the no-overlap entry point 1785 // used by generate_conjoint_int_oop_copy(). 1786 // 1787 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, 1788 const char *name, bool dest_uninitialized = false) { 1789 __ align(CodeEntryAlignment); 1790 StubCodeMark mark(this, "StubRoutines", name); 1791 address start = __ pc(); 1792 1793 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; 1794 const Register from = rdi; // source array address 1795 const Register to = rsi; // destination array address 1796 const Register count = rdx; // elements count 1797 const Register dword_count = rcx; 1798 const Register qword_count = count; 1799 const Register end_from = from; // source array end address 1800 const Register end_to = to; // destination array end address 1801 const Register saved_to = r11; // saved destination array address 1802 // End pointers are inclusive, and if count is not zero they point 1803 // to the last unit copied: end_to[0] := end_from[0] 1804 1805 __ enter(); // required for proper stackwalking of RuntimeStub frame 1806 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1807 1808 if (entry != NULL) { 1809 *entry = __ pc(); 1810 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1811 BLOCK_COMMENT("Entry:"); 1812 } 1813 1814 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1815 // r9 and r10 may be used to save non-volatile registers 1816 if (is_oop) { 1817 __ movq(saved_to, to); 1818 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); 1819 } 1820 1821 // 'from', 'to' and 'count' are now valid 1822 __ movptr(dword_count, count); 1823 __ shrptr(count, 1); // count => qword_count 1824 1825 // Copy from low to high addresses. Use 'to' as scratch. 1826 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 1827 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 1828 __ negptr(qword_count); 1829 __ jmp(L_copy_32_bytes); 1830 1831 // Copy trailing qwords 1832 __ BIND(L_copy_8_bytes); 1833 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 1834 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 1835 __ increment(qword_count); 1836 __ jcc(Assembler::notZero, L_copy_8_bytes); 1837 1838 // Check for and copy trailing dword 1839 __ BIND(L_copy_4_bytes); 1840 __ testl(dword_count, 1); // Only byte test since the value is 0 or 1 1841 __ jccb(Assembler::zero, L_exit); 1842 __ movl(rax, Address(end_from, 8)); 1843 __ movl(Address(end_to, 8), rax); 1844 1845 __ BIND(L_exit); 1846 if (is_oop) { 1847 __ leaq(end_to, Address(saved_to, dword_count, Address::times_4, -4)); 1848 gen_write_ref_array_post_barrier(saved_to, end_to, rax); 1849 } 1850 restore_arg_regs(); 1851 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free 1852 __ xorptr(rax, rax); // return 0 1853 __ leave(); // required for proper stackwalking of RuntimeStub frame 1854 __ ret(0); 1855 1856 // Copy 32-bytes chunks 1857 copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 1858 __ jmp(L_copy_4_bytes); 1859 1860 return start; 1861 } 1862 1863 // Arguments: 1864 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1865 // ignored 1866 // is_oop - true => oop array, so generate store check code 1867 // name - stub name string 1868 // 1869 // Inputs: 1870 // c_rarg0 - source array address 1871 // c_rarg1 - destination array address 1872 // c_rarg2 - element count, treated as ssize_t, can be zero 1873 // 1874 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1875 // the hardware handle it. The two dwords within qwords that span 1876 // cache line boundaries will still be loaded and stored atomicly. 1877 // 1878 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target, 1879 address *entry, const char *name, 1880 bool dest_uninitialized = false) { 1881 __ align(CodeEntryAlignment); 1882 StubCodeMark mark(this, "StubRoutines", name); 1883 address start = __ pc(); 1884 1885 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit; 1886 const Register from = rdi; // source array address 1887 const Register to = rsi; // destination array address 1888 const Register count = rdx; // elements count 1889 const Register dword_count = rcx; 1890 const Register qword_count = count; 1891 1892 __ enter(); // required for proper stackwalking of RuntimeStub frame 1893 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1894 1895 if (entry != NULL) { 1896 *entry = __ pc(); 1897 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1898 BLOCK_COMMENT("Entry:"); 1899 } 1900 1901 array_overlap_test(nooverlap_target, Address::times_4); 1902 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1903 // r9 and r10 may be used to save non-volatile registers 1904 1905 if (is_oop) { 1906 // no registers are destroyed by this call 1907 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); 1908 } 1909 1910 assert_clean_int(count, rax); // Make sure 'count' is clean int. 1911 // 'from', 'to' and 'count' are now valid 1912 __ movptr(dword_count, count); 1913 __ shrptr(count, 1); // count => qword_count 1914 1915 // Copy from high to low addresses. Use 'to' as scratch. 1916 1917 // Check for and copy trailing dword 1918 __ testl(dword_count, 1); 1919 __ jcc(Assembler::zero, L_copy_32_bytes); 1920 __ movl(rax, Address(from, dword_count, Address::times_4, -4)); 1921 __ movl(Address(to, dword_count, Address::times_4, -4), rax); 1922 __ jmp(L_copy_32_bytes); 1923 1924 // Copy trailing qwords 1925 __ BIND(L_copy_8_bytes); 1926 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 1927 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1928 __ decrement(qword_count); 1929 __ jcc(Assembler::notZero, L_copy_8_bytes); 1930 1931 if (is_oop) { 1932 __ jmp(L_exit); 1933 } 1934 restore_arg_regs(); 1935 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free 1936 __ xorptr(rax, rax); // return 0 1937 __ leave(); // required for proper stackwalking of RuntimeStub frame 1938 __ ret(0); 1939 1940 // Copy in 32-bytes chunks 1941 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 1942 1943 __ bind(L_exit); 1944 if (is_oop) { 1945 Register end_to = rdx; 1946 __ leaq(end_to, Address(to, dword_count, Address::times_4, -4)); 1947 gen_write_ref_array_post_barrier(to, end_to, rax); 1948 } 1949 restore_arg_regs(); 1950 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free 1951 __ xorptr(rax, rax); // return 0 1952 __ leave(); // required for proper stackwalking of RuntimeStub frame 1953 __ ret(0); 1954 1955 return start; 1956 } 1957 1958 // Arguments: 1959 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 1960 // ignored 1961 // is_oop - true => oop array, so generate store check code 1962 // name - stub name string 1963 // 1964 // Inputs: 1965 // c_rarg0 - source array address 1966 // c_rarg1 - destination array address 1967 // c_rarg2 - element count, treated as ssize_t, can be zero 1968 // 1969 // Side Effects: 1970 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the 1971 // no-overlap entry point used by generate_conjoint_long_oop_copy(). 1972 // 1973 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, 1974 const char *name, bool dest_uninitialized = false) { 1975 __ align(CodeEntryAlignment); 1976 StubCodeMark mark(this, "StubRoutines", name); 1977 address start = __ pc(); 1978 1979 Label L_copy_32_bytes, L_copy_8_bytes, L_exit; 1980 const Register from = rdi; // source array address 1981 const Register to = rsi; // destination array address 1982 const Register qword_count = rdx; // elements count 1983 const Register end_from = from; // source array end address 1984 const Register end_to = rcx; // destination array end address 1985 const Register saved_to = to; 1986 // End pointers are inclusive, and if count is not zero they point 1987 // to the last unit copied: end_to[0] := end_from[0] 1988 1989 __ enter(); // required for proper stackwalking of RuntimeStub frame 1990 // Save no-overlap entry point for generate_conjoint_long_oop_copy() 1991 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1992 1993 if (entry != NULL) { 1994 *entry = __ pc(); 1995 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1996 BLOCK_COMMENT("Entry:"); 1997 } 1998 1999 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 2000 // r9 and r10 may be used to save non-volatile registers 2001 // 'from', 'to' and 'qword_count' are now valid 2002 if (is_oop) { 2003 // no registers are destroyed by this call 2004 gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized); 2005 } 2006 2007 // Copy from low to high addresses. Use 'to' as scratch. 2008 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 2009 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 2010 __ negptr(qword_count); 2011 __ jmp(L_copy_32_bytes); 2012 2013 // Copy trailing qwords 2014 __ BIND(L_copy_8_bytes); 2015 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 2016 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 2017 __ increment(qword_count); 2018 __ jcc(Assembler::notZero, L_copy_8_bytes); 2019 2020 if (is_oop) { 2021 __ jmp(L_exit); 2022 } else { 2023 restore_arg_regs(); 2024 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2025 __ xorptr(rax, rax); // return 0 2026 __ leave(); // required for proper stackwalking of RuntimeStub frame 2027 __ ret(0); 2028 } 2029 2030 // Copy 64-byte chunks 2031 copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 2032 2033 if (is_oop) { 2034 __ BIND(L_exit); 2035 gen_write_ref_array_post_barrier(saved_to, end_to, rax); 2036 } 2037 restore_arg_regs(); 2038 if (is_oop) { 2039 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free 2040 } else { 2041 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2042 } 2043 __ xorptr(rax, rax); // return 0 2044 __ leave(); // required for proper stackwalking of RuntimeStub frame 2045 __ ret(0); 2046 2047 return start; 2048 } 2049 2050 // Arguments: 2051 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 2052 // ignored 2053 // is_oop - true => oop array, so generate store check code 2054 // name - stub name string 2055 // 2056 // Inputs: 2057 // c_rarg0 - source array address 2058 // c_rarg1 - destination array address 2059 // c_rarg2 - element count, treated as ssize_t, can be zero 2060 // 2061 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, 2062 address nooverlap_target, address *entry, 2063 const char *name, bool dest_uninitialized = false) { 2064 __ align(CodeEntryAlignment); 2065 StubCodeMark mark(this, "StubRoutines", name); 2066 address start = __ pc(); 2067 2068 Label L_copy_32_bytes, L_copy_8_bytes, L_exit; 2069 const Register from = rdi; // source array address 2070 const Register to = rsi; // destination array address 2071 const Register qword_count = rdx; // elements count 2072 const Register saved_count = rcx; 2073 2074 __ enter(); // required for proper stackwalking of RuntimeStub frame 2075 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 2076 2077 if (entry != NULL) { 2078 *entry = __ pc(); 2079 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2080 BLOCK_COMMENT("Entry:"); 2081 } 2082 2083 array_overlap_test(nooverlap_target, Address::times_8); 2084 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 2085 // r9 and r10 may be used to save non-volatile registers 2086 // 'from', 'to' and 'qword_count' are now valid 2087 if (is_oop) { 2088 // Save to and count for store barrier 2089 __ movptr(saved_count, qword_count); 2090 // No registers are destroyed by this call 2091 gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized); 2092 } 2093 2094 __ jmp(L_copy_32_bytes); 2095 2096 // Copy trailing qwords 2097 __ BIND(L_copy_8_bytes); 2098 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 2099 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 2100 __ decrement(qword_count); 2101 __ jcc(Assembler::notZero, L_copy_8_bytes); 2102 2103 if (is_oop) { 2104 __ jmp(L_exit); 2105 } else { 2106 restore_arg_regs(); 2107 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2108 __ xorptr(rax, rax); // return 0 2109 __ leave(); // required for proper stackwalking of RuntimeStub frame 2110 __ ret(0); 2111 } 2112 2113 // Copy in 32-bytes chunks 2114 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 2115 2116 if (is_oop) { 2117 __ BIND(L_exit); 2118 __ lea(rcx, Address(to, saved_count, Address::times_8, -8)); 2119 gen_write_ref_array_post_barrier(to, rcx, rax); 2120 } 2121 restore_arg_regs(); 2122 if (is_oop) { 2123 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free 2124 } else { 2125 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2126 } 2127 __ xorptr(rax, rax); // return 0 2128 __ leave(); // required for proper stackwalking of RuntimeStub frame 2129 __ ret(0); 2130 2131 return start; 2132 } 2133 2134 | 1269 __ BIND(L_loop); 1270 __ movb(Address(start, count, Address::times_1), 0); 1271 __ decrement(count); 1272 __ jcc(Assembler::greaterEqual, L_loop); 1273 } 1274 break; 1275 default: 1276 ShouldNotReachHere(); 1277 1278 } 1279 } 1280 1281 1282 // Copy big chunks forward 1283 // 1284 // Inputs: 1285 // end_from - source arrays end address 1286 // end_to - destination array end address 1287 // qword_count - 64-bits element count, negative 1288 // to - scratch 1289 // L_copy_bytes - entry label 1290 // L_copy_8_bytes - exit label 1291 // 1292 void copy_bytes_forward(Register end_from, Register end_to, 1293 Register qword_count, Register to, 1294 Label& L_copy_bytes, Label& L_copy_8_bytes) { 1295 DEBUG_ONLY(__ stop("enter at entry label, not here")); 1296 Label L_loop; 1297 __ align(OptoLoopAlignment); 1298 if (UseUnalignedLoadStores) { 1299 Label L_end; 1300 // Copy 64-bytes per iteration 1301 __ BIND(L_loop); 1302 if (UseAVX >= 2) { 1303 __ vmovdqu(xmm0,Address(end_from, qword_count, Address::times_8, -56)); 1304 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); 1305 __ vmovdqu(xmm1,Address(end_from, qword_count, Address::times_8, -24)); 1306 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1); 1307 } else { 1308 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); 1309 __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); 1310 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40)); 1311 __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1); 1312 __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24)); 1313 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2); 1314 __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8)); 1315 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3); 1316 } 1317 __ BIND(L_copy_bytes); 1318 __ addptr(qword_count, 8); 1319 __ jcc(Assembler::lessEqual, L_loop); 1320 __ subptr(qword_count, 4); // sub(8) and add(4) 1321 __ jccb(Assembler::greater, L_end); 1322 // Copy trailing 32 bytes 1323 if (UseAVX >= 2) { 1324 __ vmovdqu(xmm0,Address(end_from, qword_count, Address::times_8, -24)); 1325 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); 1326 } else { 1327 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); 1328 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); 1329 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); 1330 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); 1331 } 1332 __ addptr(qword_count, 4); 1333 __ BIND(L_end); 1334 } else { 1335 // Copy 32-bytes per iteration 1336 __ BIND(L_loop); 1337 __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); 1338 __ movq(Address(end_to, qword_count, Address::times_8, -24), to); 1339 __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); 1340 __ movq(Address(end_to, qword_count, Address::times_8, -16), to); 1341 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); 1342 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); 1343 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); 1344 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); 1345 1346 __ BIND(L_copy_bytes); 1347 __ addptr(qword_count, 4); 1348 __ jcc(Assembler::lessEqual, L_loop); 1349 } 1350 __ subptr(qword_count, 4); 1351 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords 1352 } 1353 1354 // Copy big chunks backward 1355 // 1356 // Inputs: 1357 // from - source arrays address 1358 // dest - destination array address 1359 // qword_count - 64-bits element count 1360 // to - scratch 1361 // L_copy_bytes - entry label 1362 // L_copy_8_bytes - exit label 1363 // 1364 void copy_bytes_backward(Register from, Register dest, 1365 Register qword_count, Register to, 1366 Label& L_copy_bytes, Label& L_copy_8_bytes) { 1367 DEBUG_ONLY(__ stop("enter at entry label, not here")); 1368 Label L_loop; 1369 __ align(OptoLoopAlignment); 1370 if (UseUnalignedLoadStores) { 1371 Label L_end; 1372 // Copy 64-bytes per iteration 1373 __ BIND(L_loop); 1374 if (UseAVX >= 2) { 1375 __ vmovdqu(xmm0,Address(from, qword_count, Address::times_8, 32)); 1376 __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); 1377 __ vmovdqu(xmm1,Address(from, qword_count, Address::times_8, 0)); 1378 __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); 1379 } else { 1380 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48)); 1381 __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0); 1382 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32)); 1383 __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1); 1384 __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16)); 1385 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2); 1386 __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0)); 1387 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3); 1388 } 1389 __ BIND(L_copy_bytes); 1390 __ subptr(qword_count, 8); 1391 __ jcc(Assembler::greaterEqual, L_loop); 1392 1393 __ addptr(qword_count, 4); // add(8) and sub(4) 1394 __ jccb(Assembler::less, L_end); 1395 // Copy trailing 32 bytes 1396 if (UseAVX >= 2) { 1397 __ vmovdqu(xmm0,Address(from, qword_count, Address::times_8, 0)); 1398 __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0); 1399 } else { 1400 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); 1401 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); 1402 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); 1403 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); 1404 } 1405 __ subptr(qword_count, 4); 1406 __ BIND(L_end); 1407 } else { 1408 // Copy 32-bytes per iteration 1409 __ BIND(L_loop); 1410 __ movq(to, Address(from, qword_count, Address::times_8, 24)); 1411 __ movq(Address(dest, qword_count, Address::times_8, 24), to); 1412 __ movq(to, Address(from, qword_count, Address::times_8, 16)); 1413 __ movq(Address(dest, qword_count, Address::times_8, 16), to); 1414 __ movq(to, Address(from, qword_count, Address::times_8, 8)); 1415 __ movq(Address(dest, qword_count, Address::times_8, 8), to); 1416 __ movq(to, Address(from, qword_count, Address::times_8, 0)); 1417 __ movq(Address(dest, qword_count, Address::times_8, 0), to); 1418 1419 __ BIND(L_copy_bytes); 1420 __ subptr(qword_count, 4); 1421 __ jcc(Assembler::greaterEqual, L_loop); 1422 } 1423 __ addptr(qword_count, 4); 1424 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords 1425 } 1426 1427 1428 // Arguments: 1429 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1430 // ignored 1431 // name - stub name string 1432 // 1433 // Inputs: 1434 // c_rarg0 - source array address 1435 // c_rarg1 - destination array address 1436 // c_rarg2 - element count, treated as ssize_t, can be zero 1437 // 1438 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1439 // we let the hardware handle it. The one to eight bytes within words, 1440 // dwords or qwords that span cache line boundaries will still be loaded 1441 // and stored atomically. 1442 // 1443 // Side Effects: 1444 // disjoint_byte_copy_entry is set to the no-overlap entry point 1445 // used by generate_conjoint_byte_copy(). 1446 // 1447 address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) { 1448 __ align(CodeEntryAlignment); 1449 StubCodeMark mark(this, "StubRoutines", name); 1450 address start = __ pc(); 1451 1452 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; 1453 Label L_copy_byte, L_exit; 1454 const Register from = rdi; // source array address 1455 const Register to = rsi; // destination array address 1456 const Register count = rdx; // elements count 1457 const Register byte_count = rcx; 1458 const Register qword_count = count; 1459 const Register end_from = from; // source array end address 1460 const Register end_to = to; // destination array end address 1461 // End pointers are inclusive, and if count is not zero they point 1462 // to the last unit copied: end_to[0] := end_from[0] 1463 1464 __ enter(); // required for proper stackwalking of RuntimeStub frame 1465 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1466 1467 if (entry != NULL) { 1468 *entry = __ pc(); 1469 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1470 BLOCK_COMMENT("Entry:"); 1471 } 1472 1473 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1474 // r9 and r10 may be used to save non-volatile registers 1475 1476 // 'from', 'to' and 'count' are now valid 1477 __ movptr(byte_count, count); 1478 __ shrptr(count, 3); // count => qword_count 1479 1480 // Copy from low to high addresses. Use 'to' as scratch. 1481 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 1482 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 1483 __ negptr(qword_count); // make the count negative 1484 __ jmp(L_copy_bytes); 1485 1486 // Copy trailing qwords 1487 __ BIND(L_copy_8_bytes); 1488 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 1489 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 1490 __ increment(qword_count); 1491 __ jcc(Assembler::notZero, L_copy_8_bytes); 1492 1493 // Check for and copy trailing dword 1494 __ BIND(L_copy_4_bytes); 1495 __ testl(byte_count, 4); 1496 __ jccb(Assembler::zero, L_copy_2_bytes); 1497 __ movl(rax, Address(end_from, 8)); 1498 __ movl(Address(end_to, 8), rax); 1499 1500 __ addptr(end_from, 4); 1501 __ addptr(end_to, 4); 1502 1503 // Check for and copy trailing word 1504 __ BIND(L_copy_2_bytes); 1507 __ movw(rax, Address(end_from, 8)); 1508 __ movw(Address(end_to, 8), rax); 1509 1510 __ addptr(end_from, 2); 1511 __ addptr(end_to, 2); 1512 1513 // Check for and copy trailing byte 1514 __ BIND(L_copy_byte); 1515 __ testl(byte_count, 1); 1516 __ jccb(Assembler::zero, L_exit); 1517 __ movb(rax, Address(end_from, 8)); 1518 __ movb(Address(end_to, 8), rax); 1519 1520 __ BIND(L_exit); 1521 restore_arg_regs(); 1522 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free 1523 __ xorptr(rax, rax); // return 0 1524 __ leave(); // required for proper stackwalking of RuntimeStub frame 1525 __ ret(0); 1526 1527 // Copy in multi-bytes chunks 1528 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 1529 __ jmp(L_copy_4_bytes); 1530 1531 return start; 1532 } 1533 1534 // Arguments: 1535 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1536 // ignored 1537 // name - stub name string 1538 // 1539 // Inputs: 1540 // c_rarg0 - source array address 1541 // c_rarg1 - destination array address 1542 // c_rarg2 - element count, treated as ssize_t, can be zero 1543 // 1544 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, 1545 // we let the hardware handle it. The one to eight bytes within words, 1546 // dwords or qwords that span cache line boundaries will still be loaded 1547 // and stored atomically. 1548 // 1549 address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, 1550 address* entry, const char *name) { 1551 __ align(CodeEntryAlignment); 1552 StubCodeMark mark(this, "StubRoutines", name); 1553 address start = __ pc(); 1554 1555 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; 1556 const Register from = rdi; // source array address 1557 const Register to = rsi; // destination array address 1558 const Register count = rdx; // elements count 1559 const Register byte_count = rcx; 1560 const Register qword_count = count; 1561 1562 __ enter(); // required for proper stackwalking of RuntimeStub frame 1563 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1564 1565 if (entry != NULL) { 1566 *entry = __ pc(); 1567 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1568 BLOCK_COMMENT("Entry:"); 1569 } 1570 1571 array_overlap_test(nooverlap_target, Address::times_1); 1572 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1573 // r9 and r10 may be used to save non-volatile registers 1574 1575 // 'from', 'to' and 'count' are now valid 1578 1579 // Copy from high to low addresses. 1580 1581 // Check for and copy trailing byte 1582 __ testl(byte_count, 1); 1583 __ jcc(Assembler::zero, L_copy_2_bytes); 1584 __ movb(rax, Address(from, byte_count, Address::times_1, -1)); 1585 __ movb(Address(to, byte_count, Address::times_1, -1), rax); 1586 __ decrement(byte_count); // Adjust for possible trailing word 1587 1588 // Check for and copy trailing word 1589 __ BIND(L_copy_2_bytes); 1590 __ testl(byte_count, 2); 1591 __ jcc(Assembler::zero, L_copy_4_bytes); 1592 __ movw(rax, Address(from, byte_count, Address::times_1, -2)); 1593 __ movw(Address(to, byte_count, Address::times_1, -2), rax); 1594 1595 // Check for and copy trailing dword 1596 __ BIND(L_copy_4_bytes); 1597 __ testl(byte_count, 4); 1598 __ jcc(Assembler::zero, L_copy_bytes); 1599 __ movl(rax, Address(from, qword_count, Address::times_8)); 1600 __ movl(Address(to, qword_count, Address::times_8), rax); 1601 __ jmp(L_copy_bytes); 1602 1603 // Copy trailing qwords 1604 __ BIND(L_copy_8_bytes); 1605 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 1606 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1607 __ decrement(qword_count); 1608 __ jcc(Assembler::notZero, L_copy_8_bytes); 1609 1610 restore_arg_regs(); 1611 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free 1612 __ xorptr(rax, rax); // return 0 1613 __ leave(); // required for proper stackwalking of RuntimeStub frame 1614 __ ret(0); 1615 1616 // Copy in multi-bytes chunks 1617 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 1618 1619 restore_arg_regs(); 1620 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free 1621 __ xorptr(rax, rax); // return 0 1622 __ leave(); // required for proper stackwalking of RuntimeStub frame 1623 __ ret(0); 1624 1625 return start; 1626 } 1627 1628 // Arguments: 1629 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1630 // ignored 1631 // name - stub name string 1632 // 1633 // Inputs: 1634 // c_rarg0 - source array address 1635 // c_rarg1 - destination array address 1636 // c_rarg2 - element count, treated as ssize_t, can be zero 1637 // 1638 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 1639 // let the hardware handle it. The two or four words within dwords 1640 // or qwords that span cache line boundaries will still be loaded 1641 // and stored atomically. 1642 // 1643 // Side Effects: 1644 // disjoint_short_copy_entry is set to the no-overlap entry point 1645 // used by generate_conjoint_short_copy(). 1646 // 1647 address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) { 1648 __ align(CodeEntryAlignment); 1649 StubCodeMark mark(this, "StubRoutines", name); 1650 address start = __ pc(); 1651 1652 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit; 1653 const Register from = rdi; // source array address 1654 const Register to = rsi; // destination array address 1655 const Register count = rdx; // elements count 1656 const Register word_count = rcx; 1657 const Register qword_count = count; 1658 const Register end_from = from; // source array end address 1659 const Register end_to = to; // destination array end address 1660 // End pointers are inclusive, and if count is not zero they point 1661 // to the last unit copied: end_to[0] := end_from[0] 1662 1663 __ enter(); // required for proper stackwalking of RuntimeStub frame 1664 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1665 1666 if (entry != NULL) { 1667 *entry = __ pc(); 1668 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1669 BLOCK_COMMENT("Entry:"); 1670 } 1671 1672 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1673 // r9 and r10 may be used to save non-volatile registers 1674 1675 // 'from', 'to' and 'count' are now valid 1676 __ movptr(word_count, count); 1677 __ shrptr(count, 2); // count => qword_count 1678 1679 // Copy from low to high addresses. Use 'to' as scratch. 1680 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 1681 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 1682 __ negptr(qword_count); 1683 __ jmp(L_copy_bytes); 1684 1685 // Copy trailing qwords 1686 __ BIND(L_copy_8_bytes); 1687 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 1688 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 1689 __ increment(qword_count); 1690 __ jcc(Assembler::notZero, L_copy_8_bytes); 1691 1692 // Original 'dest' is trashed, so we can't use it as a 1693 // base register for a possible trailing word copy 1694 1695 // Check for and copy trailing dword 1696 __ BIND(L_copy_4_bytes); 1697 __ testl(word_count, 2); 1698 __ jccb(Assembler::zero, L_copy_2_bytes); 1699 __ movl(rax, Address(end_from, 8)); 1700 __ movl(Address(end_to, 8), rax); 1701 1702 __ addptr(end_from, 4); 1703 __ addptr(end_to, 4); 1704 1705 // Check for and copy trailing word 1706 __ BIND(L_copy_2_bytes); 1707 __ testl(word_count, 1); 1708 __ jccb(Assembler::zero, L_exit); 1709 __ movw(rax, Address(end_from, 8)); 1710 __ movw(Address(end_to, 8), rax); 1711 1712 __ BIND(L_exit); 1713 restore_arg_regs(); 1714 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free 1715 __ xorptr(rax, rax); // return 0 1716 __ leave(); // required for proper stackwalking of RuntimeStub frame 1717 __ ret(0); 1718 1719 // Copy in multi-bytes chunks 1720 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 1721 __ jmp(L_copy_4_bytes); 1722 1723 return start; 1724 } 1725 1726 address generate_fill(BasicType t, bool aligned, const char *name) { 1727 __ align(CodeEntryAlignment); 1728 StubCodeMark mark(this, "StubRoutines", name); 1729 address start = __ pc(); 1730 1731 BLOCK_COMMENT("Entry:"); 1732 1733 const Register to = c_rarg0; // source array address 1734 const Register value = c_rarg1; // value 1735 const Register count = c_rarg2; // elements count 1736 1737 __ enter(); // required for proper stackwalking of RuntimeStub frame 1738 1739 __ generate_fill(t, aligned, to, value, count, rax, xmm0); 1740 1747 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1748 // ignored 1749 // name - stub name string 1750 // 1751 // Inputs: 1752 // c_rarg0 - source array address 1753 // c_rarg1 - destination array address 1754 // c_rarg2 - element count, treated as ssize_t, can be zero 1755 // 1756 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 1757 // let the hardware handle it. The two or four words within dwords 1758 // or qwords that span cache line boundaries will still be loaded 1759 // and stored atomically. 1760 // 1761 address generate_conjoint_short_copy(bool aligned, address nooverlap_target, 1762 address *entry, const char *name) { 1763 __ align(CodeEntryAlignment); 1764 StubCodeMark mark(this, "StubRoutines", name); 1765 address start = __ pc(); 1766 1767 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes; 1768 const Register from = rdi; // source array address 1769 const Register to = rsi; // destination array address 1770 const Register count = rdx; // elements count 1771 const Register word_count = rcx; 1772 const Register qword_count = count; 1773 1774 __ enter(); // required for proper stackwalking of RuntimeStub frame 1775 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1776 1777 if (entry != NULL) { 1778 *entry = __ pc(); 1779 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1780 BLOCK_COMMENT("Entry:"); 1781 } 1782 1783 array_overlap_test(nooverlap_target, Address::times_2); 1784 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1785 // r9 and r10 may be used to save non-volatile registers 1786 1787 // 'from', 'to' and 'count' are now valid 1788 __ movptr(word_count, count); 1789 __ shrptr(count, 2); // count => qword_count 1790 1791 // Copy from high to low addresses. Use 'to' as scratch. 1792 1793 // Check for and copy trailing word 1794 __ testl(word_count, 1); 1795 __ jccb(Assembler::zero, L_copy_4_bytes); 1796 __ movw(rax, Address(from, word_count, Address::times_2, -2)); 1797 __ movw(Address(to, word_count, Address::times_2, -2), rax); 1798 1799 // Check for and copy trailing dword 1800 __ BIND(L_copy_4_bytes); 1801 __ testl(word_count, 2); 1802 __ jcc(Assembler::zero, L_copy_bytes); 1803 __ movl(rax, Address(from, qword_count, Address::times_8)); 1804 __ movl(Address(to, qword_count, Address::times_8), rax); 1805 __ jmp(L_copy_bytes); 1806 1807 // Copy trailing qwords 1808 __ BIND(L_copy_8_bytes); 1809 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 1810 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1811 __ decrement(qword_count); 1812 __ jcc(Assembler::notZero, L_copy_8_bytes); 1813 1814 restore_arg_regs(); 1815 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free 1816 __ xorptr(rax, rax); // return 0 1817 __ leave(); // required for proper stackwalking of RuntimeStub frame 1818 __ ret(0); 1819 1820 // Copy in multi-bytes chunks 1821 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 1822 1823 restore_arg_regs(); 1824 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free 1825 __ xorptr(rax, rax); // return 0 1826 __ leave(); // required for proper stackwalking of RuntimeStub frame 1827 __ ret(0); 1828 1829 return start; 1830 } 1831 1832 // Arguments: 1833 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1834 // ignored 1835 // is_oop - true => oop array, so generate store check code 1836 // name - stub name string 1837 // 1838 // Inputs: 1839 // c_rarg0 - source array address 1840 // c_rarg1 - destination array address 1841 // c_rarg2 - element count, treated as ssize_t, can be zero 1842 // 1843 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1844 // the hardware handle it. The two dwords within qwords that span 1845 // cache line boundaries will still be loaded and stored atomicly. 1846 // 1847 // Side Effects: 1848 // disjoint_int_copy_entry is set to the no-overlap entry point 1849 // used by generate_conjoint_int_oop_copy(). 1850 // 1851 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, 1852 const char *name, bool dest_uninitialized = false) { 1853 __ align(CodeEntryAlignment); 1854 StubCodeMark mark(this, "StubRoutines", name); 1855 address start = __ pc(); 1856 1857 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; 1858 const Register from = rdi; // source array address 1859 const Register to = rsi; // destination array address 1860 const Register count = rdx; // elements count 1861 const Register dword_count = rcx; 1862 const Register qword_count = count; 1863 const Register end_from = from; // source array end address 1864 const Register end_to = to; // destination array end address 1865 const Register saved_to = r11; // saved destination array address 1866 // End pointers are inclusive, and if count is not zero they point 1867 // to the last unit copied: end_to[0] := end_from[0] 1868 1869 __ enter(); // required for proper stackwalking of RuntimeStub frame 1870 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1871 1872 if (entry != NULL) { 1873 *entry = __ pc(); 1874 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1875 BLOCK_COMMENT("Entry:"); 1876 } 1877 1878 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1879 // r9 and r10 may be used to save non-volatile registers 1880 if (is_oop) { 1881 __ movq(saved_to, to); 1882 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); 1883 } 1884 1885 // 'from', 'to' and 'count' are now valid 1886 __ movptr(dword_count, count); 1887 __ shrptr(count, 1); // count => qword_count 1888 1889 // Copy from low to high addresses. Use 'to' as scratch. 1890 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 1891 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 1892 __ negptr(qword_count); 1893 __ jmp(L_copy_bytes); 1894 1895 // Copy trailing qwords 1896 __ BIND(L_copy_8_bytes); 1897 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 1898 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 1899 __ increment(qword_count); 1900 __ jcc(Assembler::notZero, L_copy_8_bytes); 1901 1902 // Check for and copy trailing dword 1903 __ BIND(L_copy_4_bytes); 1904 __ testl(dword_count, 1); // Only byte test since the value is 0 or 1 1905 __ jccb(Assembler::zero, L_exit); 1906 __ movl(rax, Address(end_from, 8)); 1907 __ movl(Address(end_to, 8), rax); 1908 1909 __ BIND(L_exit); 1910 if (is_oop) { 1911 __ leaq(end_to, Address(saved_to, dword_count, Address::times_4, -4)); 1912 gen_write_ref_array_post_barrier(saved_to, end_to, rax); 1913 } 1914 restore_arg_regs(); 1915 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free 1916 __ xorptr(rax, rax); // return 0 1917 __ leave(); // required for proper stackwalking of RuntimeStub frame 1918 __ ret(0); 1919 1920 // Copy in multi-bytes chunks 1921 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 1922 __ jmp(L_copy_4_bytes); 1923 1924 return start; 1925 } 1926 1927 // Arguments: 1928 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1929 // ignored 1930 // is_oop - true => oop array, so generate store check code 1931 // name - stub name string 1932 // 1933 // Inputs: 1934 // c_rarg0 - source array address 1935 // c_rarg1 - destination array address 1936 // c_rarg2 - element count, treated as ssize_t, can be zero 1937 // 1938 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1939 // the hardware handle it. The two dwords within qwords that span 1940 // cache line boundaries will still be loaded and stored atomicly. 1941 // 1942 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target, 1943 address *entry, const char *name, 1944 bool dest_uninitialized = false) { 1945 __ align(CodeEntryAlignment); 1946 StubCodeMark mark(this, "StubRoutines", name); 1947 address start = __ pc(); 1948 1949 Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit; 1950 const Register from = rdi; // source array address 1951 const Register to = rsi; // destination array address 1952 const Register count = rdx; // elements count 1953 const Register dword_count = rcx; 1954 const Register qword_count = count; 1955 1956 __ enter(); // required for proper stackwalking of RuntimeStub frame 1957 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1958 1959 if (entry != NULL) { 1960 *entry = __ pc(); 1961 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1962 BLOCK_COMMENT("Entry:"); 1963 } 1964 1965 array_overlap_test(nooverlap_target, Address::times_4); 1966 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1967 // r9 and r10 may be used to save non-volatile registers 1968 1969 if (is_oop) { 1970 // no registers are destroyed by this call 1971 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); 1972 } 1973 1974 assert_clean_int(count, rax); // Make sure 'count' is clean int. 1975 // 'from', 'to' and 'count' are now valid 1976 __ movptr(dword_count, count); 1977 __ shrptr(count, 1); // count => qword_count 1978 1979 // Copy from high to low addresses. Use 'to' as scratch. 1980 1981 // Check for and copy trailing dword 1982 __ testl(dword_count, 1); 1983 __ jcc(Assembler::zero, L_copy_bytes); 1984 __ movl(rax, Address(from, dword_count, Address::times_4, -4)); 1985 __ movl(Address(to, dword_count, Address::times_4, -4), rax); 1986 __ jmp(L_copy_bytes); 1987 1988 // Copy trailing qwords 1989 __ BIND(L_copy_8_bytes); 1990 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 1991 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1992 __ decrement(qword_count); 1993 __ jcc(Assembler::notZero, L_copy_8_bytes); 1994 1995 if (is_oop) { 1996 __ jmp(L_exit); 1997 } 1998 restore_arg_regs(); 1999 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free 2000 __ xorptr(rax, rax); // return 0 2001 __ leave(); // required for proper stackwalking of RuntimeStub frame 2002 __ ret(0); 2003 2004 // Copy in multi-bytes chunks 2005 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 2006 2007 __ bind(L_exit); 2008 if (is_oop) { 2009 Register end_to = rdx; 2010 __ leaq(end_to, Address(to, dword_count, Address::times_4, -4)); 2011 gen_write_ref_array_post_barrier(to, end_to, rax); 2012 } 2013 restore_arg_regs(); 2014 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free 2015 __ xorptr(rax, rax); // return 0 2016 __ leave(); // required for proper stackwalking of RuntimeStub frame 2017 __ ret(0); 2018 2019 return start; 2020 } 2021 2022 // Arguments: 2023 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 2024 // ignored 2025 // is_oop - true => oop array, so generate store check code 2026 // name - stub name string 2027 // 2028 // Inputs: 2029 // c_rarg0 - source array address 2030 // c_rarg1 - destination array address 2031 // c_rarg2 - element count, treated as ssize_t, can be zero 2032 // 2033 // Side Effects: 2034 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the 2035 // no-overlap entry point used by generate_conjoint_long_oop_copy(). 2036 // 2037 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, 2038 const char *name, bool dest_uninitialized = false) { 2039 __ align(CodeEntryAlignment); 2040 StubCodeMark mark(this, "StubRoutines", name); 2041 address start = __ pc(); 2042 2043 Label L_copy_bytes, L_copy_8_bytes, L_exit; 2044 const Register from = rdi; // source array address 2045 const Register to = rsi; // destination array address 2046 const Register qword_count = rdx; // elements count 2047 const Register end_from = from; // source array end address 2048 const Register end_to = rcx; // destination array end address 2049 const Register saved_to = to; 2050 // End pointers are inclusive, and if count is not zero they point 2051 // to the last unit copied: end_to[0] := end_from[0] 2052 2053 __ enter(); // required for proper stackwalking of RuntimeStub frame 2054 // Save no-overlap entry point for generate_conjoint_long_oop_copy() 2055 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 2056 2057 if (entry != NULL) { 2058 *entry = __ pc(); 2059 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2060 BLOCK_COMMENT("Entry:"); 2061 } 2062 2063 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 2064 // r9 and r10 may be used to save non-volatile registers 2065 // 'from', 'to' and 'qword_count' are now valid 2066 if (is_oop) { 2067 // no registers are destroyed by this call 2068 gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized); 2069 } 2070 2071 // Copy from low to high addresses. Use 'to' as scratch. 2072 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 2073 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 2074 __ negptr(qword_count); 2075 __ jmp(L_copy_bytes); 2076 2077 // Copy trailing qwords 2078 __ BIND(L_copy_8_bytes); 2079 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); 2080 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); 2081 __ increment(qword_count); 2082 __ jcc(Assembler::notZero, L_copy_8_bytes); 2083 2084 if (is_oop) { 2085 __ jmp(L_exit); 2086 } else { 2087 restore_arg_regs(); 2088 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2089 __ xorptr(rax, rax); // return 0 2090 __ leave(); // required for proper stackwalking of RuntimeStub frame 2091 __ ret(0); 2092 } 2093 2094 // Copy in multi-bytes chunks 2095 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 2096 2097 if (is_oop) { 2098 __ BIND(L_exit); 2099 gen_write_ref_array_post_barrier(saved_to, end_to, rax); 2100 } 2101 restore_arg_regs(); 2102 if (is_oop) { 2103 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free 2104 } else { 2105 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2106 } 2107 __ xorptr(rax, rax); // return 0 2108 __ leave(); // required for proper stackwalking of RuntimeStub frame 2109 __ ret(0); 2110 2111 return start; 2112 } 2113 2114 // Arguments: 2115 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 2116 // ignored 2117 // is_oop - true => oop array, so generate store check code 2118 // name - stub name string 2119 // 2120 // Inputs: 2121 // c_rarg0 - source array address 2122 // c_rarg1 - destination array address 2123 // c_rarg2 - element count, treated as ssize_t, can be zero 2124 // 2125 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, 2126 address nooverlap_target, address *entry, 2127 const char *name, bool dest_uninitialized = false) { 2128 __ align(CodeEntryAlignment); 2129 StubCodeMark mark(this, "StubRoutines", name); 2130 address start = __ pc(); 2131 2132 Label L_copy_bytes, L_copy_8_bytes, L_exit; 2133 const Register from = rdi; // source array address 2134 const Register to = rsi; // destination array address 2135 const Register qword_count = rdx; // elements count 2136 const Register saved_count = rcx; 2137 2138 __ enter(); // required for proper stackwalking of RuntimeStub frame 2139 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 2140 2141 if (entry != NULL) { 2142 *entry = __ pc(); 2143 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 2144 BLOCK_COMMENT("Entry:"); 2145 } 2146 2147 array_overlap_test(nooverlap_target, Address::times_8); 2148 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 2149 // r9 and r10 may be used to save non-volatile registers 2150 // 'from', 'to' and 'qword_count' are now valid 2151 if (is_oop) { 2152 // Save to and count for store barrier 2153 __ movptr(saved_count, qword_count); 2154 // No registers are destroyed by this call 2155 gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized); 2156 } 2157 2158 __ jmp(L_copy_bytes); 2159 2160 // Copy trailing qwords 2161 __ BIND(L_copy_8_bytes); 2162 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 2163 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 2164 __ decrement(qword_count); 2165 __ jcc(Assembler::notZero, L_copy_8_bytes); 2166 2167 if (is_oop) { 2168 __ jmp(L_exit); 2169 } else { 2170 restore_arg_regs(); 2171 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2172 __ xorptr(rax, rax); // return 0 2173 __ leave(); // required for proper stackwalking of RuntimeStub frame 2174 __ ret(0); 2175 } 2176 2177 // Copy in multi-bytes chunks 2178 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); 2179 2180 if (is_oop) { 2181 __ BIND(L_exit); 2182 __ lea(rcx, Address(to, saved_count, Address::times_8, -8)); 2183 gen_write_ref_array_post_barrier(to, rcx, rax); 2184 } 2185 restore_arg_regs(); 2186 if (is_oop) { 2187 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free 2188 } else { 2189 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free 2190 } 2191 __ xorptr(rax, rax); // return 0 2192 __ leave(); // required for proper stackwalking of RuntimeStub frame 2193 __ ret(0); 2194 2195 return start; 2196 } 2197 2198 |