< prev index next >

src/cpu/ppc/vm/stubGenerator_ppc.cpp

Print this page
rev 9821 : 8146613: PPC64: C2 does no longer respect int to long conversion for stub calls
Reviewed-by: goetz
   1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright 2012, 2015 SAP AG. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *


1060       __ blr();
1061     }
1062 
1063     if (t == T_SHORT) {
1064       Label L_fill_2;
1065       __ bind(L_fill_elements);
1066       __ andi_(temp, count, 1);
1067       __ beq(CCR0, L_fill_2);
1068       __ sth(value, 0, to);
1069       __ addi(to, to, 2);
1070       __ bind(L_fill_2);
1071       __ andi_(temp, count, 2);
1072       __ beq(CCR0, L_exit);
1073       __ sth(value, 0, to);
1074       __ sth(value, 2, to);
1075       __ blr();
1076     }
1077     return start;
1078   }
1079 






1080 
1081   // Generate overlap test for array copy stubs.
1082   //
1083   // Input:
1084   //   R3_ARG1    -  from
1085   //   R4_ARG2    -  to
1086   //   R5_ARG3    -  element count
1087   //
1088   void array_overlap_test(address no_overlap_target, int log2_elem_size) {
1089     Register tmp1 = R6_ARG4;
1090     Register tmp2 = R7_ARG5;
1091 
1092 #ifdef ASSERT
1093     __ srdi_(tmp2, R5_ARG3, 31);
1094     __ asm_assert_eq("missing zero extend", 0xAFFE);
1095 #endif
1096 
1097     __ subf(tmp1, R3_ARG1, R4_ARG2); // distance in bytes
1098     __ sldi(tmp2, R5_ARG3, log2_elem_size); // size in bytes
1099     __ cmpld(CCR0, R3_ARG1, R4_ARG2); // Use unsigned comparison!
1100     __ cmpld(CCR1, tmp1, tmp2);
1101     __ crnand(CCR0, Assembler::less, CCR1, Assembler::less);
1102     // Overlaps if Src before dst and distance smaller than size.
1103     // Branch to forward copy routine otherwise (within range of 32kB).
1104     __ bc(Assembler::bcondCRbiIs1, Assembler::bi0(CCR0, Assembler::less), no_overlap_target);
1105 
1106     // need to copy backwards
1107   }
1108 
1109   // The guideline in the implementations of generate_disjoint_xxx_copy
1110   // (xxx=byte,short,int,long,oop) is to copy as many elements as possible with
1111   // single instructions, but to avoid alignment interrupts (see subsequent
1112   // comment). Furthermore, we try to minimize misaligned access, even
1113   // though they cause no alignment interrupt.
1114   //
1115   // In Big-Endian mode, the PowerPC architecture requires implementations to
1116   // handle automatically misaligned integer halfword and word accesses,
1117   // word-aligned integer doubleword accesses, and word-aligned floating-point
1118   // accesses. Other accesses may or may not generate an Alignment interrupt
1119   // depending on the implementation.
1120   // Alignment interrupt handling may require on the order of hundreds of cycles,
1121   // so every effort should be made to avoid misaligned memory values.
1122   //
1123   //
1124   // Generate stub for disjoint byte copy.  If "aligned" is true, the
1125   // "from" and "to" addresses are assumed to be heapword aligned.
1126   //
1127   // Arguments for generated stub:
1128   //      from:  R3_ARG1
1129   //      to:    R4_ARG2
1130   //      count: R5_ARG3 treated as signed
1131   //
1132   address generate_disjoint_byte_copy(bool aligned, const char * name) {
1133     StubCodeMark mark(this, "StubRoutines", name);
1134     address start = __ function_entry();

1135 
1136     Register tmp1 = R6_ARG4;
1137     Register tmp2 = R7_ARG5;
1138     Register tmp3 = R8_ARG6;
1139     Register tmp4 = R9_ARG7;
1140 
1141 
1142     Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9;

1143     // Don't try anything fancy if arrays don't have many elements.
1144     __ li(tmp3, 0);
1145     __ cmpwi(CCR0, R5_ARG3, 17);
1146     __ ble(CCR0, l_6); // copy 4 at a time
1147 
1148     if (!aligned) {
1149       __ xorr(tmp1, R3_ARG1, R4_ARG2);
1150       __ andi_(tmp1, tmp1, 3);
1151       __ bne(CCR0, l_6); // If arrays don't have the same alignment mod 4, do 4 element copy.
1152 
1153       // Copy elements if necessary to align to 4 bytes.
1154       __ neg(tmp1, R3_ARG1); // Compute distance to alignment boundary.
1155       __ andi_(tmp1, tmp1, 3);
1156       __ beq(CCR0, l_2);
1157 
1158       __ subf(R5_ARG3, tmp1, R5_ARG3);
1159       __ bind(l_9);
1160       __ lbz(tmp2, 0, R3_ARG1);
1161       __ addic_(tmp1, tmp1, -1);
1162       __ stb(tmp2, 0, R4_ARG2);


1247     }
1248 
1249     __ bind(l_4);
1250     __ li(R3_RET, 0); // return 0
1251     __ blr();
1252 
1253     return start;
1254   }
1255 
1256   // Generate stub for conjoint byte copy.  If "aligned" is true, the
1257   // "from" and "to" addresses are assumed to be heapword aligned.
1258   //
1259   // Arguments for generated stub:
1260   //      from:  R3_ARG1
1261   //      to:    R4_ARG2
1262   //      count: R5_ARG3 treated as signed
1263   //
1264   address generate_conjoint_byte_copy(bool aligned, const char * name) {
1265     StubCodeMark mark(this, "StubRoutines", name);
1266     address start = __ function_entry();

1267 
1268     Register tmp1 = R6_ARG4;
1269     Register tmp2 = R7_ARG5;
1270     Register tmp3 = R8_ARG6;
1271 
1272     address nooverlap_target = aligned ?
1273       STUB_ENTRY(arrayof_jbyte_disjoint_arraycopy) :
1274       STUB_ENTRY(jbyte_disjoint_arraycopy);
1275 
1276     array_overlap_test(nooverlap_target, 0);
1277     // Do reverse copy. We assume the case of actual overlap is rare enough
1278     // that we don't have to optimize it.
1279     Label l_1, l_2;
1280 
1281     __ b(l_2);
1282     __ bind(l_1);
1283     __ stbx(tmp1, R4_ARG2, R5_ARG3);
1284     __ bind(l_2);
1285     __ addic_(R5_ARG3, R5_ARG3, -1);
1286     __ lbzx(tmp1, R3_ARG1, R5_ARG3);


1339   //  instructions in this stub. POWER allows such accesses.
1340   //
1341   //  According to the manuals (PowerISA_V2.06_PUBLIC, Book II,
1342   //  Chapter 2: Effect of Operand Placement on Performance) unaligned
1343   //  integer load/stores have good performance. Only unaligned
1344   //  floating point load/stores can have poor performance.
1345   //
1346   //  TODO:
1347   //
1348   //  1. check if aligning the backbranch target of loops is beneficial
1349   //
1350   address generate_disjoint_short_copy(bool aligned, const char * name) {
1351     StubCodeMark mark(this, "StubRoutines", name);
1352 
1353     Register tmp1 = R6_ARG4;
1354     Register tmp2 = R7_ARG5;
1355     Register tmp3 = R8_ARG6;
1356     Register tmp4 = R9_ARG7;
1357 
1358     address start = __ function_entry();

1359 
1360       Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8;

1361     // don't try anything fancy if arrays don't have many elements
1362     __ li(tmp3, 0);
1363     __ cmpwi(CCR0, R5_ARG3, 9);
1364     __ ble(CCR0, l_6); // copy 2 at a time
1365 
1366     if (!aligned) {
1367       __ xorr(tmp1, R3_ARG1, R4_ARG2);
1368       __ andi_(tmp1, tmp1, 3);
1369       __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy
1370 
1371       // At this point it is guaranteed that both, from and to have the same alignment mod 4.
1372 
1373       // Copy 1 element if necessary to align to 4 bytes.
1374       __ andi_(tmp1, R3_ARG1, 3);
1375       __ beq(CCR0, l_2);
1376 
1377       __ lhz(tmp2, 0, R3_ARG1);
1378       __ addi(R3_ARG1, R3_ARG1, 2);
1379       __ sth(tmp2, 0, R4_ARG2);
1380       __ addi(R4_ARG2, R4_ARG2, 2);


1469       __ bdnz(l_5);
1470     }
1471     __ bind(l_4);
1472     __ li(R3_RET, 0); // return 0
1473     __ blr();
1474 
1475     return start;
1476   }
1477 
1478   // Generate stub for conjoint short copy.  If "aligned" is true, the
1479   // "from" and "to" addresses are assumed to be heapword aligned.
1480   //
1481   // Arguments for generated stub:
1482   //      from:  R3_ARG1
1483   //      to:    R4_ARG2
1484   //      count: R5_ARG3 treated as signed
1485   //
1486   address generate_conjoint_short_copy(bool aligned, const char * name) {
1487     StubCodeMark mark(this, "StubRoutines", name);
1488     address start = __ function_entry();

1489 
1490     Register tmp1 = R6_ARG4;
1491     Register tmp2 = R7_ARG5;
1492     Register tmp3 = R8_ARG6;
1493 
1494     address nooverlap_target = aligned ?
1495       STUB_ENTRY(arrayof_jshort_disjoint_arraycopy) :
1496       STUB_ENTRY(jshort_disjoint_arraycopy);
1497 
1498     array_overlap_test(nooverlap_target, 1);
1499 
1500     Label l_1, l_2;
1501     __ sldi(tmp1, R5_ARG3, 1);
1502     __ b(l_2);
1503     __ bind(l_1);
1504     __ sthx(tmp2, R4_ARG2, tmp1);
1505     __ bind(l_2);
1506     __ addic_(tmp1, tmp1, -2);
1507     __ lhzx(tmp2, R3_ARG1, tmp1);
1508     __ bge(CCR0, l_1);


1511     __ blr();
1512 
1513     return start;
1514   }
1515 
1516   // Generate core code for disjoint int copy (and oop copy on 32-bit).  If "aligned"
1517   // is true, the "from" and "to" addresses are assumed to be heapword aligned.
1518   //
1519   // Arguments:
1520   //      from:  R3_ARG1
1521   //      to:    R4_ARG2
1522   //      count: R5_ARG3 treated as signed
1523   //
1524   void generate_disjoint_int_copy_core(bool aligned) {
1525     Register tmp1 = R6_ARG4;
1526     Register tmp2 = R7_ARG5;
1527     Register tmp3 = R8_ARG6;
1528     Register tmp4 = R0;
1529 
1530     Label l_1, l_2, l_3, l_4, l_5, l_6;

1531     // for short arrays, just do single element copy
1532     __ li(tmp3, 0);
1533     __ cmpwi(CCR0, R5_ARG3, 5);
1534     __ ble(CCR0, l_2);
1535 
1536     if (!aligned) {
1537         // check if arrays have same alignment mod 8.
1538         __ xorr(tmp1, R3_ARG1, R4_ARG2);
1539         __ andi_(R0, tmp1, 7);
1540         // Not the same alignment, but ld and std just need to be 4 byte aligned.
1541         __ bne(CCR0, l_4); // to OR from is 8 byte aligned -> copy 2 at a time
1542 
1543         // copy 1 element to align to and from on an 8 byte boundary
1544         __ andi_(R0, R3_ARG1, 7);
1545         __ beq(CCR0, l_4);
1546 
1547         __ lwzx(tmp2, R3_ARG1, tmp3);
1548         __ addi(R5_ARG3, R5_ARG3, -1);
1549         __ stwx(tmp2, R4_ARG2, tmp3);
1550         { // FasterArrayCopy


1593       __ lwzu(tmp2, 4, R3_ARG1);
1594       __ stwu(tmp2, 4, R4_ARG2);
1595       __ bdnz(l_3);
1596     }
1597 
1598     __ bind(l_1);
1599     return;
1600   }
1601 
1602   // Generate stub for disjoint int copy.  If "aligned" is true, the
1603   // "from" and "to" addresses are assumed to be heapword aligned.
1604   //
1605   // Arguments for generated stub:
1606   //      from:  R3_ARG1
1607   //      to:    R4_ARG2
1608   //      count: R5_ARG3 treated as signed
1609   //
1610   address generate_disjoint_int_copy(bool aligned, const char * name) {
1611     StubCodeMark mark(this, "StubRoutines", name);
1612     address start = __ function_entry();

1613     generate_disjoint_int_copy_core(aligned);
1614     __ li(R3_RET, 0); // return 0
1615     __ blr();
1616     return start;
1617   }
1618 
1619   // Generate core code for conjoint int copy (and oop copy on
1620   // 32-bit).  If "aligned" is true, the "from" and "to" addresses
1621   // are assumed to be heapword aligned.
1622   //
1623   // Arguments:
1624   //      from:  R3_ARG1
1625   //      to:    R4_ARG2
1626   //      count: R5_ARG3 treated as signed
1627   //
1628   void generate_conjoint_int_copy_core(bool aligned) {
1629     // Do reverse copy.  We assume the case of actual overlap is rare enough
1630     // that we don't have to optimize it.
1631 
1632     Label l_1, l_2, l_3, l_4, l_5, l_6;


1678       __ stw(R0, -4, R4_ARG2);
1679       __ addi(R3_ARG1, R3_ARG1, -4);
1680       __ addi(R4_ARG2, R4_ARG2, -4);
1681       __ bdnz(l_3);
1682 
1683       __ bind(l_6);
1684     }
1685   }
1686 
1687   // Generate stub for conjoint int copy.  If "aligned" is true, the
1688   // "from" and "to" addresses are assumed to be heapword aligned.
1689   //
1690   // Arguments for generated stub:
1691   //      from:  R3_ARG1
1692   //      to:    R4_ARG2
1693   //      count: R5_ARG3 treated as signed
1694   //
1695   address generate_conjoint_int_copy(bool aligned, const char * name) {
1696     StubCodeMark mark(this, "StubRoutines", name);
1697     address start = __ function_entry();
1698 
1699     address nooverlap_target = aligned ?
1700       STUB_ENTRY(arrayof_jint_disjoint_arraycopy) :
1701       STUB_ENTRY(jint_disjoint_arraycopy);
1702 
1703     array_overlap_test(nooverlap_target, 2);
1704 
1705     generate_conjoint_int_copy_core(aligned);
1706 
1707     __ li(R3_RET, 0); // return 0
1708     __ blr();
1709 
1710     return start;
1711   }
1712 
1713   // Generate core code for disjoint long copy (and oop copy on
1714   // 64-bit).  If "aligned" is true, the "from" and "to" addresses
1715   // are assumed to be heapword aligned.
1716   //
1717   // Arguments:
1718   //      from:  R3_ARG1


1765       __ bind(l_2);
1766       __ ldu(R0, 8, R3_ARG1);
1767       __ stdu(R0, 8, R4_ARG2);
1768       __ bdnz(l_2);
1769 
1770     }
1771     __ bind(l_1);
1772   }
1773 
1774   // Generate stub for disjoint long copy.  If "aligned" is true, the
1775   // "from" and "to" addresses are assumed to be heapword aligned.
1776   //
1777   // Arguments for generated stub:
1778   //      from:  R3_ARG1
1779   //      to:    R4_ARG2
1780   //      count: R5_ARG3 treated as signed
1781   //
1782   address generate_disjoint_long_copy(bool aligned, const char * name) {
1783     StubCodeMark mark(this, "StubRoutines", name);
1784     address start = __ function_entry();

1785     generate_disjoint_long_copy_core(aligned);
1786     __ li(R3_RET, 0); // return 0
1787     __ blr();
1788 
1789     return start;
1790   }
1791 
1792   // Generate core code for conjoint long copy (and oop copy on
1793   // 64-bit).  If "aligned" is true, the "from" and "to" addresses
1794   // are assumed to be heapword aligned.
1795   //
1796   // Arguments:
1797   //      from:  R3_ARG1
1798   //      to:    R4_ARG2
1799   //      count: R5_ARG3 treated as signed
1800   //
1801   void generate_conjoint_long_copy_core(bool aligned) {
1802     Register tmp1 = R6_ARG4;
1803     Register tmp2 = R7_ARG5;
1804     Register tmp3 = R8_ARG6;


1848       __ std(R0, -8, R4_ARG2);
1849       __ addi(R3_ARG1, R3_ARG1, -8);
1850       __ addi(R4_ARG2, R4_ARG2, -8);
1851       __ bdnz(l_3);
1852 
1853     }
1854     __ bind(l_1);
1855   }
1856 
1857   // Generate stub for conjoint long copy.  If "aligned" is true, the
1858   // "from" and "to" addresses are assumed to be heapword aligned.
1859   //
1860   // Arguments for generated stub:
1861   //      from:  R3_ARG1
1862   //      to:    R4_ARG2
1863   //      count: R5_ARG3 treated as signed
1864   //
1865   address generate_conjoint_long_copy(bool aligned, const char * name) {
1866     StubCodeMark mark(this, "StubRoutines", name);
1867     address start = __ function_entry();
1868 
1869     address nooverlap_target = aligned ?
1870       STUB_ENTRY(arrayof_jlong_disjoint_arraycopy) :
1871       STUB_ENTRY(jlong_disjoint_arraycopy);
1872 
1873     array_overlap_test(nooverlap_target, 3);
1874     generate_conjoint_long_copy_core(aligned);
1875 
1876     __ li(R3_RET, 0); // return 0
1877     __ blr();
1878 
1879     return start;
1880   }
1881 
1882   // Generate stub for conjoint oop copy.  If "aligned" is true, the
1883   // "from" and "to" addresses are assumed to be heapword aligned.
1884   //
1885   // Arguments for generated stub:
1886   //      from:  R3_ARG1
1887   //      to:    R4_ARG2
1888   //      count: R5_ARG3 treated as signed
1889   //      dest_uninitialized: G1 support
1890   //
1891   address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
1892     StubCodeMark mark(this, "StubRoutines", name);
1893 
1894     address start = __ function_entry();
1895 
1896     address nooverlap_target = aligned ?
1897       STUB_ENTRY(arrayof_oop_disjoint_arraycopy) :
1898       STUB_ENTRY(oop_disjoint_arraycopy);
1899 
1900     gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
1901 
1902     // Save arguments.
1903     __ mr(R9_ARG7, R4_ARG2);
1904     __ mr(R10_ARG8, R5_ARG3);
1905 
1906     if (UseCompressedOops) {
1907       array_overlap_test(nooverlap_target, 2);
1908       generate_conjoint_int_copy_core(aligned);
1909     } else {
1910       array_overlap_test(nooverlap_target, 3);
1911       generate_conjoint_long_copy_core(aligned);
1912     }
1913 
1914     gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
1915     __ li(R3_RET, 0); // return 0
1916     __ blr();
1917     return start;
1918   }
1919 
1920   // Generate stub for disjoint oop copy.  If "aligned" is true, the
1921   // "from" and "to" addresses are assumed to be heapword aligned.
1922   //
1923   // Arguments for generated stub:
1924   //      from:  R3_ARG1
1925   //      to:    R4_ARG2
1926   //      count: R5_ARG3 treated as signed
1927   //      dest_uninitialized: G1 support
1928   //
1929   address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
1930     StubCodeMark mark(this, "StubRoutines", name);
1931     address start = __ function_entry();
1932 
1933     gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
1934 
1935     // save some arguments, disjoint_long_copy_core destroys them.
1936     // needed for post barrier
1937     __ mr(R9_ARG7, R4_ARG2);
1938     __ mr(R10_ARG8, R5_ARG3);
1939 
1940     if (UseCompressedOops) {
1941       generate_disjoint_int_copy_core(aligned);
1942     } else {
1943       generate_disjoint_long_copy_core(aligned);
1944     }
1945 
1946     gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
1947     __ li(R3_RET, 0); // return 0
1948     __ blr();
1949 
1950     return start;
1951   }
1952 


1986   address generate_checkcast_copy(const char *name, bool dest_uninitialized) {
1987 
1988     const Register R3_from   = R3_ARG1;      // source array address
1989     const Register R4_to     = R4_ARG2;      // destination array address
1990     const Register R5_count  = R5_ARG3;      // elements count
1991     const Register R6_ckoff  = R6_ARG4;      // super_check_offset
1992     const Register R7_ckval  = R7_ARG5;      // super_klass
1993 
1994     const Register R8_offset = R8_ARG6;      // loop var, with stride wordSize
1995     const Register R9_remain = R9_ARG7;      // loop var, with stride -1
1996     const Register R10_oop   = R10_ARG8;     // actual oop copied
1997     const Register R11_klass = R11_scratch1; // oop._klass
1998     const Register R12_tmp   = R12_scratch2;
1999 
2000     const Register R2_minus1 = R2;
2001 
2002     //__ align(CodeEntryAlignment);
2003     StubCodeMark mark(this, "StubRoutines", name);
2004     address start = __ function_entry();
2005 
2006     // TODO: Assert that int is 64 bit sign extended and arrays are not conjoint.

















2007 
2008     gen_write_ref_array_pre_barrier(R3_from, R4_to, R5_count, dest_uninitialized, R12_tmp, /* preserve: */ R6_ckoff, R7_ckval);
2009 
2010     //inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R12_tmp, R3_RET);
2011 
2012     Label load_element, store_element, store_null, success, do_card_marks;
2013     __ or_(R9_remain, R5_count, R5_count); // Initialize loop index, and test it.
2014     __ li(R8_offset, 0);                   // Offset from start of arrays.
2015     __ li(R2_minus1, -1);
2016     __ bne(CCR0, load_element);
2017 
2018     // Empty array: Nothing to do.
2019     __ li(R3_RET, 0);           // Return 0 on (trivial) success.
2020     __ blr();
2021 
2022     // ======== begin loop ========
2023     // (Entry is load_element.)
2024     __ align(OptoLoopAlignment);
2025     __ bind(store_element);
2026     if (UseCompressedOops) {


2435 
2436     // special/generic versions
2437     StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", false);
2438     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true);
2439 
2440     StubRoutines::_unsafe_arraycopy  = generate_unsafe_copy("unsafe_arraycopy",
2441                                                             STUB_ENTRY(jbyte_arraycopy),
2442                                                             STUB_ENTRY(jshort_arraycopy),
2443                                                             STUB_ENTRY(jint_arraycopy),
2444                                                             STUB_ENTRY(jlong_arraycopy));
2445     StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
2446                                                              STUB_ENTRY(jbyte_arraycopy),
2447                                                              STUB_ENTRY(jshort_arraycopy),
2448                                                              STUB_ENTRY(jint_arraycopy),
2449                                                              STUB_ENTRY(oop_arraycopy),
2450                                                              STUB_ENTRY(oop_disjoint_arraycopy),
2451                                                              STUB_ENTRY(jlong_arraycopy),
2452                                                              STUB_ENTRY(checkcast_arraycopy));
2453 
2454     // fill routines

2455     StubRoutines::_jbyte_fill          = generate_fill(T_BYTE,  false, "jbyte_fill");
2456     StubRoutines::_jshort_fill         = generate_fill(T_SHORT, false, "jshort_fill");
2457     StubRoutines::_jint_fill           = generate_fill(T_INT,   false, "jint_fill");
2458     StubRoutines::_arrayof_jbyte_fill  = generate_fill(T_BYTE,  true, "arrayof_jbyte_fill");
2459     StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
2460     StubRoutines::_arrayof_jint_fill   = generate_fill(T_INT,   true, "arrayof_jint_fill");
2461   }

2462 
2463   // Safefetch stubs.
2464   void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) {
2465     // safefetch signatures:
2466     //   int      SafeFetch32(int*      adr, int      errValue);
2467     //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
2468     //
2469     // arguments:
2470     //   R3_ARG1 = adr
2471     //   R4_ARG2 = errValue
2472     //
2473     // result:
2474     //   R3_RET  = *adr or errValue
2475 
2476     StubCodeMark mark(this, "StubRoutines", name);
2477 
2478     // Entry point, pc or function descriptor.
2479     *entry = __ function_entry();
2480 
2481     // Load *adr into R4_ARG2, may fault.


2525     const Register zlen  = R8;
2526 
2527     const Register tmp1  = R2; // TOC not used.
2528     const Register tmp2  = R9;
2529     const Register tmp3  = R10;
2530     const Register tmp4  = R11;
2531     const Register tmp5  = R12;
2532 
2533     // non-volatile regs
2534     const Register tmp6  = R31;
2535     const Register tmp7  = R30;
2536     const Register tmp8  = R29;
2537     const Register tmp9  = R28;
2538     const Register tmp10 = R27;
2539     const Register tmp11 = R26;
2540     const Register tmp12 = R25;
2541     const Register tmp13 = R24;
2542 
2543     BLOCK_COMMENT("Entry:");
2544 





2545     // Save non-volatile regs (frameless).
2546     int current_offs = 8;
2547     __ std(R24, -current_offs, R1_SP); current_offs += 8;
2548     __ std(R25, -current_offs, R1_SP); current_offs += 8;
2549     __ std(R26, -current_offs, R1_SP); current_offs += 8;
2550     __ std(R27, -current_offs, R1_SP); current_offs += 8;
2551     __ std(R28, -current_offs, R1_SP); current_offs += 8;
2552     __ std(R29, -current_offs, R1_SP); current_offs += 8;
2553     __ std(R30, -current_offs, R1_SP); current_offs += 8;
2554     __ std(R31, -current_offs, R1_SP);
2555 
2556     __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5,
2557                        tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13);
2558 
2559     // Restore non-volatile regs.
2560     current_offs = 8;
2561     __ ld(R24, -current_offs, R1_SP); current_offs += 8;
2562     __ ld(R25, -current_offs, R1_SP); current_offs += 8;
2563     __ ld(R26, -current_offs, R1_SP); current_offs += 8;
2564     __ ld(R27, -current_offs, R1_SP); current_offs += 8;


   1 /*
   2  * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright 2012, 2016 SAP AG. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *


1060       __ blr();
1061     }
1062 
1063     if (t == T_SHORT) {
1064       Label L_fill_2;
1065       __ bind(L_fill_elements);
1066       __ andi_(temp, count, 1);
1067       __ beq(CCR0, L_fill_2);
1068       __ sth(value, 0, to);
1069       __ addi(to, to, 2);
1070       __ bind(L_fill_2);
1071       __ andi_(temp, count, 2);
1072       __ beq(CCR0, L_exit);
1073       __ sth(value, 0, to);
1074       __ sth(value, 2, to);
1075       __ blr();
1076     }
1077     return start;
1078   }
1079 
1080   inline void assert_positive_int(Register count) {
1081 #ifdef ASSERT
1082     __ srdi_(R0, count, 31);
1083     __ asm_assert_eq("missing zero extend", 0xAFFE);
1084 #endif
1085   }
1086 
1087   // Generate overlap test for array copy stubs.
1088   //
1089   // Input:
1090   //   R3_ARG1    -  from
1091   //   R4_ARG2    -  to
1092   //   R5_ARG3    -  element count
1093   //
1094   void array_overlap_test(address no_overlap_target, int log2_elem_size) {
1095     Register tmp1 = R6_ARG4;
1096     Register tmp2 = R7_ARG5;
1097 
1098     assert_positive_int(R5_ARG3);



1099 
1100     __ subf(tmp1, R3_ARG1, R4_ARG2); // distance in bytes
1101     __ sldi(tmp2, R5_ARG3, log2_elem_size); // size in bytes
1102     __ cmpld(CCR0, R3_ARG1, R4_ARG2); // Use unsigned comparison!
1103     __ cmpld(CCR1, tmp1, tmp2);
1104     __ crnand(CCR0, Assembler::less, CCR1, Assembler::less);
1105     // Overlaps if Src before dst and distance smaller than size.
1106     // Branch to forward copy routine otherwise (within range of 32kB).
1107     __ bc(Assembler::bcondCRbiIs1, Assembler::bi0(CCR0, Assembler::less), no_overlap_target);
1108 
1109     // need to copy backwards
1110   }
1111 
1112   // The guideline in the implementations of generate_disjoint_xxx_copy
1113   // (xxx=byte,short,int,long,oop) is to copy as many elements as possible with
1114   // single instructions, but to avoid alignment interrupts (see subsequent
1115   // comment). Furthermore, we try to minimize misaligned access, even
1116   // though they cause no alignment interrupt.
1117   //
1118   // In Big-Endian mode, the PowerPC architecture requires implementations to
1119   // handle automatically misaligned integer halfword and word accesses,
1120   // word-aligned integer doubleword accesses, and word-aligned floating-point
1121   // accesses. Other accesses may or may not generate an Alignment interrupt
1122   // depending on the implementation.
1123   // Alignment interrupt handling may require on the order of hundreds of cycles,
1124   // so every effort should be made to avoid misaligned memory values.
1125   //
1126   //
1127   // Generate stub for disjoint byte copy.  If "aligned" is true, the
1128   // "from" and "to" addresses are assumed to be heapword aligned.
1129   //
1130   // Arguments for generated stub:
1131   //      from:  R3_ARG1
1132   //      to:    R4_ARG2
1133   //      count: R5_ARG3 treated as signed
1134   //
1135   address generate_disjoint_byte_copy(bool aligned, const char * name) {
1136     StubCodeMark mark(this, "StubRoutines", name);
1137     address start = __ function_entry();
1138     assert_positive_int(R5_ARG3);
1139 
1140     Register tmp1 = R6_ARG4;
1141     Register tmp2 = R7_ARG5;
1142     Register tmp3 = R8_ARG6;
1143     Register tmp4 = R9_ARG7;
1144 

1145     Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9;
1146 
1147     // Don't try anything fancy if arrays don't have many elements.
1148     __ li(tmp3, 0);
1149     __ cmpwi(CCR0, R5_ARG3, 17);
1150     __ ble(CCR0, l_6); // copy 4 at a time
1151 
1152     if (!aligned) {
1153       __ xorr(tmp1, R3_ARG1, R4_ARG2);
1154       __ andi_(tmp1, tmp1, 3);
1155       __ bne(CCR0, l_6); // If arrays don't have the same alignment mod 4, do 4 element copy.
1156 
1157       // Copy elements if necessary to align to 4 bytes.
1158       __ neg(tmp1, R3_ARG1); // Compute distance to alignment boundary.
1159       __ andi_(tmp1, tmp1, 3);
1160       __ beq(CCR0, l_2);
1161 
1162       __ subf(R5_ARG3, tmp1, R5_ARG3);
1163       __ bind(l_9);
1164       __ lbz(tmp2, 0, R3_ARG1);
1165       __ addic_(tmp1, tmp1, -1);
1166       __ stb(tmp2, 0, R4_ARG2);


1251     }
1252 
1253     __ bind(l_4);
1254     __ li(R3_RET, 0); // return 0
1255     __ blr();
1256 
1257     return start;
1258   }
1259 
1260   // Generate stub for conjoint byte copy.  If "aligned" is true, the
1261   // "from" and "to" addresses are assumed to be heapword aligned.
1262   //
1263   // Arguments for generated stub:
1264   //      from:  R3_ARG1
1265   //      to:    R4_ARG2
1266   //      count: R5_ARG3 treated as signed
1267   //
1268   address generate_conjoint_byte_copy(bool aligned, const char * name) {
1269     StubCodeMark mark(this, "StubRoutines", name);
1270     address start = __ function_entry();
1271     assert_positive_int(R5_ARG3);
1272 
1273     Register tmp1 = R6_ARG4;
1274     Register tmp2 = R7_ARG5;
1275     Register tmp3 = R8_ARG6;
1276 
1277     address nooverlap_target = aligned ?
1278       STUB_ENTRY(arrayof_jbyte_disjoint_arraycopy) :
1279       STUB_ENTRY(jbyte_disjoint_arraycopy);
1280 
1281     array_overlap_test(nooverlap_target, 0);
1282     // Do reverse copy. We assume the case of actual overlap is rare enough
1283     // that we don't have to optimize it.
1284     Label l_1, l_2;
1285 
1286     __ b(l_2);
1287     __ bind(l_1);
1288     __ stbx(tmp1, R4_ARG2, R5_ARG3);
1289     __ bind(l_2);
1290     __ addic_(R5_ARG3, R5_ARG3, -1);
1291     __ lbzx(tmp1, R3_ARG1, R5_ARG3);


1344   //  instructions in this stub. POWER allows such accesses.
1345   //
1346   //  According to the manuals (PowerISA_V2.06_PUBLIC, Book II,
1347   //  Chapter 2: Effect of Operand Placement on Performance) unaligned
1348   //  integer load/stores have good performance. Only unaligned
1349   //  floating point load/stores can have poor performance.
1350   //
1351   //  TODO:
1352   //
1353   //  1. check if aligning the backbranch target of loops is beneficial
1354   //
1355   address generate_disjoint_short_copy(bool aligned, const char * name) {
1356     StubCodeMark mark(this, "StubRoutines", name);
1357 
1358     Register tmp1 = R6_ARG4;
1359     Register tmp2 = R7_ARG5;
1360     Register tmp3 = R8_ARG6;
1361     Register tmp4 = R9_ARG7;
1362 
1363     address start = __ function_entry();
1364     assert_positive_int(R5_ARG3);
1365 
1366       Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8;
1367 
1368     // don't try anything fancy if arrays don't have many elements
1369     __ li(tmp3, 0);
1370     __ cmpwi(CCR0, R5_ARG3, 9);
1371     __ ble(CCR0, l_6); // copy 2 at a time
1372 
1373     if (!aligned) {
1374       __ xorr(tmp1, R3_ARG1, R4_ARG2);
1375       __ andi_(tmp1, tmp1, 3);
1376       __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy
1377 
1378       // At this point it is guaranteed that both, from and to have the same alignment mod 4.
1379 
1380       // Copy 1 element if necessary to align to 4 bytes.
1381       __ andi_(tmp1, R3_ARG1, 3);
1382       __ beq(CCR0, l_2);
1383 
1384       __ lhz(tmp2, 0, R3_ARG1);
1385       __ addi(R3_ARG1, R3_ARG1, 2);
1386       __ sth(tmp2, 0, R4_ARG2);
1387       __ addi(R4_ARG2, R4_ARG2, 2);


1476       __ bdnz(l_5);
1477     }
1478     __ bind(l_4);
1479     __ li(R3_RET, 0); // return 0
1480     __ blr();
1481 
1482     return start;
1483   }
1484 
1485   // Generate stub for conjoint short copy.  If "aligned" is true, the
1486   // "from" and "to" addresses are assumed to be heapword aligned.
1487   //
1488   // Arguments for generated stub:
1489   //      from:  R3_ARG1
1490   //      to:    R4_ARG2
1491   //      count: R5_ARG3 treated as signed
1492   //
1493   address generate_conjoint_short_copy(bool aligned, const char * name) {
1494     StubCodeMark mark(this, "StubRoutines", name);
1495     address start = __ function_entry();
1496     assert_positive_int(R5_ARG3);
1497 
1498     Register tmp1 = R6_ARG4;
1499     Register tmp2 = R7_ARG5;
1500     Register tmp3 = R8_ARG6;
1501 
1502     address nooverlap_target = aligned ?
1503       STUB_ENTRY(arrayof_jshort_disjoint_arraycopy) :
1504       STUB_ENTRY(jshort_disjoint_arraycopy);
1505 
1506     array_overlap_test(nooverlap_target, 1);
1507 
1508     Label l_1, l_2;
1509     __ sldi(tmp1, R5_ARG3, 1);
1510     __ b(l_2);
1511     __ bind(l_1);
1512     __ sthx(tmp2, R4_ARG2, tmp1);
1513     __ bind(l_2);
1514     __ addic_(tmp1, tmp1, -2);
1515     __ lhzx(tmp2, R3_ARG1, tmp1);
1516     __ bge(CCR0, l_1);


1519     __ blr();
1520 
1521     return start;
1522   }
1523 
1524   // Generate core code for disjoint int copy (and oop copy on 32-bit).  If "aligned"
1525   // is true, the "from" and "to" addresses are assumed to be heapword aligned.
1526   //
1527   // Arguments:
1528   //      from:  R3_ARG1
1529   //      to:    R4_ARG2
1530   //      count: R5_ARG3 treated as signed
1531   //
1532   void generate_disjoint_int_copy_core(bool aligned) {
1533     Register tmp1 = R6_ARG4;
1534     Register tmp2 = R7_ARG5;
1535     Register tmp3 = R8_ARG6;
1536     Register tmp4 = R0;
1537 
1538     Label l_1, l_2, l_3, l_4, l_5, l_6;
1539 
1540     // for short arrays, just do single element copy
1541     __ li(tmp3, 0);
1542     __ cmpwi(CCR0, R5_ARG3, 5);
1543     __ ble(CCR0, l_2);
1544 
1545     if (!aligned) {
1546         // check if arrays have same alignment mod 8.
1547         __ xorr(tmp1, R3_ARG1, R4_ARG2);
1548         __ andi_(R0, tmp1, 7);
1549         // Not the same alignment, but ld and std just need to be 4 byte aligned.
1550         __ bne(CCR0, l_4); // to OR from is 8 byte aligned -> copy 2 at a time
1551 
1552         // copy 1 element to align to and from on an 8 byte boundary
1553         __ andi_(R0, R3_ARG1, 7);
1554         __ beq(CCR0, l_4);
1555 
1556         __ lwzx(tmp2, R3_ARG1, tmp3);
1557         __ addi(R5_ARG3, R5_ARG3, -1);
1558         __ stwx(tmp2, R4_ARG2, tmp3);
1559         { // FasterArrayCopy


1602       __ lwzu(tmp2, 4, R3_ARG1);
1603       __ stwu(tmp2, 4, R4_ARG2);
1604       __ bdnz(l_3);
1605     }
1606 
1607     __ bind(l_1);
1608     return;
1609   }
1610 
1611   // Generate stub for disjoint int copy.  If "aligned" is true, the
1612   // "from" and "to" addresses are assumed to be heapword aligned.
1613   //
1614   // Arguments for generated stub:
1615   //      from:  R3_ARG1
1616   //      to:    R4_ARG2
1617   //      count: R5_ARG3 treated as signed
1618   //
1619   address generate_disjoint_int_copy(bool aligned, const char * name) {
1620     StubCodeMark mark(this, "StubRoutines", name);
1621     address start = __ function_entry();
1622     assert_positive_int(R5_ARG3);
1623     generate_disjoint_int_copy_core(aligned);
1624     __ li(R3_RET, 0); // return 0
1625     __ blr();
1626     return start;
1627   }
1628 
1629   // Generate core code for conjoint int copy (and oop copy on
1630   // 32-bit).  If "aligned" is true, the "from" and "to" addresses
1631   // are assumed to be heapword aligned.
1632   //
1633   // Arguments:
1634   //      from:  R3_ARG1
1635   //      to:    R4_ARG2
1636   //      count: R5_ARG3 treated as signed
1637   //
1638   void generate_conjoint_int_copy_core(bool aligned) {
1639     // Do reverse copy.  We assume the case of actual overlap is rare enough
1640     // that we don't have to optimize it.
1641 
1642     Label l_1, l_2, l_3, l_4, l_5, l_6;


1688       __ stw(R0, -4, R4_ARG2);
1689       __ addi(R3_ARG1, R3_ARG1, -4);
1690       __ addi(R4_ARG2, R4_ARG2, -4);
1691       __ bdnz(l_3);
1692 
1693       __ bind(l_6);
1694     }
1695   }
1696 
1697   // Generate stub for conjoint int copy.  If "aligned" is true, the
1698   // "from" and "to" addresses are assumed to be heapword aligned.
1699   //
1700   // Arguments for generated stub:
1701   //      from:  R3_ARG1
1702   //      to:    R4_ARG2
1703   //      count: R5_ARG3 treated as signed
1704   //
1705   address generate_conjoint_int_copy(bool aligned, const char * name) {
1706     StubCodeMark mark(this, "StubRoutines", name);
1707     address start = __ function_entry();
1708     assert_positive_int(R5_ARG3);
1709     address nooverlap_target = aligned ?
1710       STUB_ENTRY(arrayof_jint_disjoint_arraycopy) :
1711       STUB_ENTRY(jint_disjoint_arraycopy);
1712 
1713     array_overlap_test(nooverlap_target, 2);
1714 
1715     generate_conjoint_int_copy_core(aligned);
1716 
1717     __ li(R3_RET, 0); // return 0
1718     __ blr();
1719 
1720     return start;
1721   }
1722 
1723   // Generate core code for disjoint long copy (and oop copy on
1724   // 64-bit).  If "aligned" is true, the "from" and "to" addresses
1725   // are assumed to be heapword aligned.
1726   //
1727   // Arguments:
1728   //      from:  R3_ARG1


1775       __ bind(l_2);
1776       __ ldu(R0, 8, R3_ARG1);
1777       __ stdu(R0, 8, R4_ARG2);
1778       __ bdnz(l_2);
1779 
1780     }
1781     __ bind(l_1);
1782   }
1783 
1784   // Generate stub for disjoint long copy.  If "aligned" is true, the
1785   // "from" and "to" addresses are assumed to be heapword aligned.
1786   //
1787   // Arguments for generated stub:
1788   //      from:  R3_ARG1
1789   //      to:    R4_ARG2
1790   //      count: R5_ARG3 treated as signed
1791   //
1792   address generate_disjoint_long_copy(bool aligned, const char * name) {
1793     StubCodeMark mark(this, "StubRoutines", name);
1794     address start = __ function_entry();
1795     assert_positive_int(R5_ARG3);
1796     generate_disjoint_long_copy_core(aligned);
1797     __ li(R3_RET, 0); // return 0
1798     __ blr();
1799 
1800     return start;
1801   }
1802 
1803   // Generate core code for conjoint long copy (and oop copy on
1804   // 64-bit).  If "aligned" is true, the "from" and "to" addresses
1805   // are assumed to be heapword aligned.
1806   //
1807   // Arguments:
1808   //      from:  R3_ARG1
1809   //      to:    R4_ARG2
1810   //      count: R5_ARG3 treated as signed
1811   //
1812   void generate_conjoint_long_copy_core(bool aligned) {
1813     Register tmp1 = R6_ARG4;
1814     Register tmp2 = R7_ARG5;
1815     Register tmp3 = R8_ARG6;


1859       __ std(R0, -8, R4_ARG2);
1860       __ addi(R3_ARG1, R3_ARG1, -8);
1861       __ addi(R4_ARG2, R4_ARG2, -8);
1862       __ bdnz(l_3);
1863 
1864     }
1865     __ bind(l_1);
1866   }
1867 
1868   // Generate stub for conjoint long copy.  If "aligned" is true, the
1869   // "from" and "to" addresses are assumed to be heapword aligned.
1870   //
1871   // Arguments for generated stub:
1872   //      from:  R3_ARG1
1873   //      to:    R4_ARG2
1874   //      count: R5_ARG3 treated as signed
1875   //
1876   address generate_conjoint_long_copy(bool aligned, const char * name) {
1877     StubCodeMark mark(this, "StubRoutines", name);
1878     address start = __ function_entry();
1879     assert_positive_int(R5_ARG3);
1880     address nooverlap_target = aligned ?
1881       STUB_ENTRY(arrayof_jlong_disjoint_arraycopy) :
1882       STUB_ENTRY(jlong_disjoint_arraycopy);
1883 
1884     array_overlap_test(nooverlap_target, 3);
1885     generate_conjoint_long_copy_core(aligned);
1886 
1887     __ li(R3_RET, 0); // return 0
1888     __ blr();
1889 
1890     return start;
1891   }
1892 
1893   // Generate stub for conjoint oop copy.  If "aligned" is true, the
1894   // "from" and "to" addresses are assumed to be heapword aligned.
1895   //
1896   // Arguments for generated stub:
1897   //      from:  R3_ARG1
1898   //      to:    R4_ARG2
1899   //      count: R5_ARG3 treated as signed
1900   //      dest_uninitialized: G1 support
1901   //
1902   address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
1903     StubCodeMark mark(this, "StubRoutines", name);
1904 
1905     address start = __ function_entry();
1906     assert_positive_int(R5_ARG3);
1907     address nooverlap_target = aligned ?
1908       STUB_ENTRY(arrayof_oop_disjoint_arraycopy) :
1909       STUB_ENTRY(oop_disjoint_arraycopy);
1910 
1911     gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
1912 
1913     // Save arguments.
1914     __ mr(R9_ARG7, R4_ARG2);
1915     __ mr(R10_ARG8, R5_ARG3);
1916 
1917     if (UseCompressedOops) {
1918       array_overlap_test(nooverlap_target, 2);
1919       generate_conjoint_int_copy_core(aligned);
1920     } else {
1921       array_overlap_test(nooverlap_target, 3);
1922       generate_conjoint_long_copy_core(aligned);
1923     }
1924 
1925     gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
1926     __ li(R3_RET, 0); // return 0
1927     __ blr();
1928     return start;
1929   }
1930 
1931   // Generate stub for disjoint oop copy.  If "aligned" is true, the
1932   // "from" and "to" addresses are assumed to be heapword aligned.
1933   //
1934   // Arguments for generated stub:
1935   //      from:  R3_ARG1
1936   //      to:    R4_ARG2
1937   //      count: R5_ARG3 treated as signed
1938   //      dest_uninitialized: G1 support
1939   //
1940   address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
1941     StubCodeMark mark(this, "StubRoutines", name);
1942     address start = __ function_entry();
1943     assert_positive_int(R5_ARG3);
1944     gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
1945 
1946     // save some arguments, disjoint_long_copy_core destroys them.
1947     // needed for post barrier
1948     __ mr(R9_ARG7, R4_ARG2);
1949     __ mr(R10_ARG8, R5_ARG3);
1950 
1951     if (UseCompressedOops) {
1952       generate_disjoint_int_copy_core(aligned);
1953     } else {
1954       generate_disjoint_long_copy_core(aligned);
1955     }
1956 
1957     gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
1958     __ li(R3_RET, 0); // return 0
1959     __ blr();
1960 
1961     return start;
1962   }
1963 


1997   address generate_checkcast_copy(const char *name, bool dest_uninitialized) {
1998 
1999     const Register R3_from   = R3_ARG1;      // source array address
2000     const Register R4_to     = R4_ARG2;      // destination array address
2001     const Register R5_count  = R5_ARG3;      // elements count
2002     const Register R6_ckoff  = R6_ARG4;      // super_check_offset
2003     const Register R7_ckval  = R7_ARG5;      // super_klass
2004 
2005     const Register R8_offset = R8_ARG6;      // loop var, with stride wordSize
2006     const Register R9_remain = R9_ARG7;      // loop var, with stride -1
2007     const Register R10_oop   = R10_ARG8;     // actual oop copied
2008     const Register R11_klass = R11_scratch1; // oop._klass
2009     const Register R12_tmp   = R12_scratch2;
2010 
2011     const Register R2_minus1 = R2;
2012 
2013     //__ align(CodeEntryAlignment);
2014     StubCodeMark mark(this, "StubRoutines", name);
2015     address start = __ function_entry();
2016 
2017     // Assert that int is 64 bit sign extended and arrays are not conjoint.
2018 #ifdef ASSERT
2019     {
2020     assert_positive_int(R5_ARG3);
2021     const Register tmp1 = R11_scratch1, tmp2 = R12_scratch2;
2022     Label no_overlap;
2023     __ subf(tmp1, R3_ARG1, R4_ARG2); // distance in bytes
2024     __ sldi(tmp2, R5_ARG3, LogBytesPerHeapOop); // size in bytes
2025     __ cmpld(CCR0, R3_ARG1, R4_ARG2); // Use unsigned comparison!
2026     __ cmpld(CCR1, tmp1, tmp2);
2027     __ crnand(CCR0, Assembler::less, CCR1, Assembler::less);
2028     // Overlaps if Src before dst and distance smaller than size.
2029     // Branch to forward copy routine otherwise.
2030     __ blt(CCR0, no_overlap);
2031     __ stop("overlap in checkcast_copy", 0x9543);
2032     __ bind(no_overlap);
2033     }
2034 #endif
2035 
2036     gen_write_ref_array_pre_barrier(R3_from, R4_to, R5_count, dest_uninitialized, R12_tmp, /* preserve: */ R6_ckoff, R7_ckval);
2037 
2038     //inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R12_tmp, R3_RET);
2039 
2040     Label load_element, store_element, store_null, success, do_card_marks;
2041     __ or_(R9_remain, R5_count, R5_count); // Initialize loop index, and test it.
2042     __ li(R8_offset, 0);                   // Offset from start of arrays.
2043     __ li(R2_minus1, -1);
2044     __ bne(CCR0, load_element);
2045 
2046     // Empty array: Nothing to do.
2047     __ li(R3_RET, 0);           // Return 0 on (trivial) success.
2048     __ blr();
2049 
2050     // ======== begin loop ========
2051     // (Entry is load_element.)
2052     __ align(OptoLoopAlignment);
2053     __ bind(store_element);
2054     if (UseCompressedOops) {


2463 
2464     // special/generic versions
2465     StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", false);
2466     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true);
2467 
2468     StubRoutines::_unsafe_arraycopy  = generate_unsafe_copy("unsafe_arraycopy",
2469                                                             STUB_ENTRY(jbyte_arraycopy),
2470                                                             STUB_ENTRY(jshort_arraycopy),
2471                                                             STUB_ENTRY(jint_arraycopy),
2472                                                             STUB_ENTRY(jlong_arraycopy));
2473     StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
2474                                                              STUB_ENTRY(jbyte_arraycopy),
2475                                                              STUB_ENTRY(jshort_arraycopy),
2476                                                              STUB_ENTRY(jint_arraycopy),
2477                                                              STUB_ENTRY(oop_arraycopy),
2478                                                              STUB_ENTRY(oop_disjoint_arraycopy),
2479                                                              STUB_ENTRY(jlong_arraycopy),
2480                                                              STUB_ENTRY(checkcast_arraycopy));
2481 
2482     // fill routines
2483     if (OptimizeFill) {
2484       StubRoutines::_jbyte_fill          = generate_fill(T_BYTE,  false, "jbyte_fill");
2485       StubRoutines::_jshort_fill         = generate_fill(T_SHORT, false, "jshort_fill");
2486       StubRoutines::_jint_fill           = generate_fill(T_INT,   false, "jint_fill");
2487       StubRoutines::_arrayof_jbyte_fill  = generate_fill(T_BYTE,  true, "arrayof_jbyte_fill");
2488       StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
2489       StubRoutines::_arrayof_jint_fill   = generate_fill(T_INT,   true, "arrayof_jint_fill");
2490     }
2491   }
2492 
2493   // Safefetch stubs.
2494   void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) {
2495     // safefetch signatures:
2496     //   int      SafeFetch32(int*      adr, int      errValue);
2497     //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
2498     //
2499     // arguments:
2500     //   R3_ARG1 = adr
2501     //   R4_ARG2 = errValue
2502     //
2503     // result:
2504     //   R3_RET  = *adr or errValue
2505 
2506     StubCodeMark mark(this, "StubRoutines", name);
2507 
2508     // Entry point, pc or function descriptor.
2509     *entry = __ function_entry();
2510 
2511     // Load *adr into R4_ARG2, may fault.


2555     const Register zlen  = R8;
2556 
2557     const Register tmp1  = R2; // TOC not used.
2558     const Register tmp2  = R9;
2559     const Register tmp3  = R10;
2560     const Register tmp4  = R11;
2561     const Register tmp5  = R12;
2562 
2563     // non-volatile regs
2564     const Register tmp6  = R31;
2565     const Register tmp7  = R30;
2566     const Register tmp8  = R29;
2567     const Register tmp9  = R28;
2568     const Register tmp10 = R27;
2569     const Register tmp11 = R26;
2570     const Register tmp12 = R25;
2571     const Register tmp13 = R24;
2572 
2573     BLOCK_COMMENT("Entry:");
2574 
2575     // C2 does not respect int to long conversion for stub calls.
2576     __ clrldi(xlen, xlen, 32);
2577     __ clrldi(ylen, ylen, 32);
2578     __ clrldi(zlen, zlen, 32);
2579 
2580     // Save non-volatile regs (frameless).
2581     int current_offs = 8;
2582     __ std(R24, -current_offs, R1_SP); current_offs += 8;
2583     __ std(R25, -current_offs, R1_SP); current_offs += 8;
2584     __ std(R26, -current_offs, R1_SP); current_offs += 8;
2585     __ std(R27, -current_offs, R1_SP); current_offs += 8;
2586     __ std(R28, -current_offs, R1_SP); current_offs += 8;
2587     __ std(R29, -current_offs, R1_SP); current_offs += 8;
2588     __ std(R30, -current_offs, R1_SP); current_offs += 8;
2589     __ std(R31, -current_offs, R1_SP);
2590 
2591     __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5,
2592                        tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13);
2593 
2594     // Restore non-volatile regs.
2595     current_offs = 8;
2596     __ ld(R24, -current_offs, R1_SP); current_offs += 8;
2597     __ ld(R25, -current_offs, R1_SP); current_offs += 8;
2598     __ ld(R26, -current_offs, R1_SP); current_offs += 8;
2599     __ ld(R27, -current_offs, R1_SP); current_offs += 8;


< prev index next >