Print this page


Split Close
Expand all
Collapse all
          --- old/src/cpu/x86/vm/stubGenerator_x86_64.cpp
          +++ new/src/cpu/x86/vm/stubGenerator_x86_64.cpp
↓ open down ↓ 1761 lines elided ↑ open up ↑
1762 1762    //   c_rarg2   - element count, treated as ssize_t, can be zero
1763 1763    //
1764 1764    // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1765 1765    // the hardware handle it.  The two dwords within qwords that span
1766 1766    // cache line boundaries will still be loaded and stored atomicly.
1767 1767    //
1768 1768    // Side Effects:
1769 1769    //   disjoint_int_copy_entry is set to the no-overlap entry point
1770 1770    //   used by generate_conjoint_int_oop_copy().
1771 1771    //
1772      -  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, const char *name) {
     1772 +  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
     1773 +                                         const char *name, bool need_pre_barrier = true) {
1773 1774      __ align(CodeEntryAlignment);
1774 1775      StubCodeMark mark(this, "StubRoutines", name);
1775 1776      address start = __ pc();
1776 1777  
1777 1778      Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1778 1779      const Register from        = rdi;  // source array address
1779 1780      const Register to          = rsi;  // destination array address
1780 1781      const Register count       = rdx;  // elements count
1781 1782      const Register dword_count = rcx;
1782 1783      const Register qword_count = count;
↓ open down ↓ 9 lines elided ↑ open up ↑
1792 1793      if (entry != NULL) {
1793 1794        *entry = __ pc();
1794 1795        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1795 1796        BLOCK_COMMENT("Entry:");
1796 1797      }
1797 1798  
1798 1799      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1799 1800                        // r9 and r10 may be used to save non-volatile registers
1800 1801      if (is_oop) {
1801 1802        __ movq(saved_to, to);
1802      -      gen_write_ref_array_pre_barrier(to, count);
     1803 +      if (need_pre_barrier) {
     1804 +        gen_write_ref_array_pre_barrier(to, count);
     1805 +      }
1803 1806      }
1804 1807  
1805 1808      // 'from', 'to' and 'count' are now valid
1806 1809      __ movptr(dword_count, count);
1807 1810      __ shrptr(count, 1); // count => qword_count
1808 1811  
1809 1812      // Copy from low to high addresses.  Use 'to' as scratch.
1810 1813      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1811 1814      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1812 1815      __ negptr(qword_count);
↓ open down ↓ 40 lines elided ↑ open up ↑
1853 1856    // Inputs:
1854 1857    //   c_rarg0   - source array address
1855 1858    //   c_rarg1   - destination array address
1856 1859    //   c_rarg2   - element count, treated as ssize_t, can be zero
1857 1860    //
1858 1861    // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1859 1862    // the hardware handle it.  The two dwords within qwords that span
1860 1863    // cache line boundaries will still be loaded and stored atomicly.
1861 1864    //
1862 1865    address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1863      -                                         address *entry, const char *name) {
     1866 +                                         address *entry, const char *name,
     1867 +                                         bool need_pre_barrier = true) {
1864 1868      __ align(CodeEntryAlignment);
1865 1869      StubCodeMark mark(this, "StubRoutines", name);
1866 1870      address start = __ pc();
1867 1871  
1868 1872      Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1869 1873      const Register from        = rdi;  // source array address
1870 1874      const Register to          = rsi;  // destination array address
1871 1875      const Register count       = rdx;  // elements count
1872 1876      const Register dword_count = rcx;
1873 1877      const Register qword_count = count;
↓ open down ↓ 4 lines elided ↑ open up ↑
1878 1882      if (entry != NULL) {
1879 1883        *entry = __ pc();
1880 1884         // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1881 1885        BLOCK_COMMENT("Entry:");
1882 1886      }
1883 1887  
1884 1888      array_overlap_test(nooverlap_target, Address::times_4);
1885 1889      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1886 1890                        // r9 and r10 may be used to save non-volatile registers
1887 1891  
1888      -    if (is_oop) {
     1892 +    if (is_oop && need_pre_barrier) {
1889 1893        // no registers are destroyed by this call
1890 1894        gen_write_ref_array_pre_barrier(to, count);
1891 1895      }
1892 1896  
1893 1897      assert_clean_int(count, rax); // Make sure 'count' is clean int.
1894 1898      // 'from', 'to' and 'count' are now valid
1895 1899      __ movptr(dword_count, count);
1896 1900      __ shrptr(count, 1); // count => qword_count
1897 1901  
1898 1902      // Copy from high to low addresses.  Use 'to' as scratch.
↓ open down ↓ 47 lines elided ↑ open up ↑
1946 1950    //
1947 1951    // Inputs:
1948 1952    //   c_rarg0   - source array address
1949 1953    //   c_rarg1   - destination array address
1950 1954    //   c_rarg2   - element count, treated as ssize_t, can be zero
1951 1955    //
1952 1956   // Side Effects:
1953 1957    //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1954 1958    //   no-overlap entry point used by generate_conjoint_long_oop_copy().
1955 1959    //
1956      -  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, const char *name) {
     1960 +  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
     1961 +                                          const char *name, bool need_pre_barrier = true) {
1957 1962      __ align(CodeEntryAlignment);
1958 1963      StubCodeMark mark(this, "StubRoutines", name);
1959 1964      address start = __ pc();
1960 1965  
1961 1966      Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
1962 1967      const Register from        = rdi;  // source array address
1963 1968      const Register to          = rsi;  // destination array address
1964 1969      const Register qword_count = rdx;  // elements count
1965 1970      const Register end_from    = from; // source array end address
1966 1971      const Register end_to      = rcx;  // destination array end address
↓ open down ↓ 7 lines elided ↑ open up ↑
1974 1979  
1975 1980      if (entry != NULL) {
1976 1981        *entry = __ pc();
1977 1982        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1978 1983        BLOCK_COMMENT("Entry:");
1979 1984      }
1980 1985  
1981 1986      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1982 1987                        // r9 and r10 may be used to save non-volatile registers
1983 1988      // 'from', 'to' and 'qword_count' are now valid
1984      -    if (is_oop) {
     1989 +    if (is_oop && need_pre_barrier) {
1985 1990        // no registers are destroyed by this call
1986 1991        gen_write_ref_array_pre_barrier(to, qword_count);
1987 1992      }
1988 1993  
1989 1994      // Copy from low to high addresses.  Use 'to' as scratch.
1990 1995      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1991 1996      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1992 1997      __ negptr(qword_count);
1993 1998      __ jmp(L_copy_32_bytes);
1994 1999  
↓ open down ↓ 36 lines elided ↑ open up ↑
2031 2036    //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2032 2037    //             ignored
2033 2038    //   is_oop  - true => oop array, so generate store check code
2034 2039    //   name    - stub name string
2035 2040    //
2036 2041    // Inputs:
2037 2042    //   c_rarg0   - source array address
2038 2043    //   c_rarg1   - destination array address
2039 2044    //   c_rarg2   - element count, treated as ssize_t, can be zero
2040 2045    //
2041      -  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
2042      -                                          address *entry, const char *name) {
     2046 +  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
     2047 +                                          address nooverlap_target, address *entry,
     2048 +                                          const char *name, bool need_pre_barrier = true) {
2043 2049      __ align(CodeEntryAlignment);
2044 2050      StubCodeMark mark(this, "StubRoutines", name);
2045 2051      address start = __ pc();
2046 2052  
2047 2053      Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
2048 2054      const Register from        = rdi;  // source array address
2049 2055      const Register to          = rsi;  // destination array address
2050 2056      const Register qword_count = rdx;  // elements count
2051 2057      const Register saved_count = rcx;
2052 2058  
↓ open down ↓ 6 lines elided ↑ open up ↑
2059 2065        BLOCK_COMMENT("Entry:");
2060 2066      }
2061 2067  
2062 2068      array_overlap_test(nooverlap_target, Address::times_8);
2063 2069      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2064 2070                        // r9 and r10 may be used to save non-volatile registers
2065 2071      // 'from', 'to' and 'qword_count' are now valid
2066 2072      if (is_oop) {
2067 2073        // Save to and count for store barrier
2068 2074        __ movptr(saved_count, qword_count);
2069      -      // No registers are destroyed by this call
2070      -      gen_write_ref_array_pre_barrier(to, saved_count);
     2075 +      if (need_pre_barrier) {
     2076 +        // No registers are destroyed by this call
     2077 +        gen_write_ref_array_pre_barrier(to, saved_count);
     2078 +      }
2071 2079      }
2072 2080  
2073 2081      __ jmp(L_copy_32_bytes);
2074 2082  
2075 2083      // Copy trailing qwords
2076 2084    __ BIND(L_copy_8_bytes);
2077 2085      __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2078 2086      __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2079 2087      __ decrement(qword_count);
2080 2088      __ jcc(Assembler::notZero, L_copy_8_bytes);
↓ open down ↓ 58 lines elided ↑ open up ↑
2139 2147    //    c_rarg3   - size_t ckoff (super_check_offset)
2140 2148    // not Win64
2141 2149    //    c_rarg4   - oop ckval (super_klass)
2142 2150    // Win64
2143 2151    //    rsp+40    - oop ckval (super_klass)
2144 2152    //
2145 2153    //  Output:
2146 2154    //    rax ==  0  -  success
2147 2155    //    rax == -1^K - failure, where K is partial transfer count
2148 2156    //
2149      -  address generate_checkcast_copy(const char *name, address *entry) {
     2157 +  address generate_checkcast_copy(const char *name, address *entry, bool need_pre_barrier = true) {
2150 2158  
2151 2159      Label L_load_element, L_store_element, L_do_card_marks, L_done;
2152 2160  
2153 2161      // Input registers (after setup_arg_regs)
2154 2162      const Register from        = rdi;   // source array address
2155 2163      const Register to          = rsi;   // destination array address
2156 2164      const Register length      = rdx;   // elements count
2157 2165      const Register ckoff       = rcx;   // super_check_offset
2158 2166      const Register ckval       = r8;    // super_klass
2159 2167  
↓ open down ↓ 73 lines elided ↑ open up ↑
2233 2241      }
2234 2242  #endif //ASSERT
2235 2243  
2236 2244      // Loop-invariant addresses.  They are exclusive end pointers.
2237 2245      Address end_from_addr(from, length, TIMES_OOP, 0);
2238 2246      Address   end_to_addr(to,   length, TIMES_OOP, 0);
2239 2247      // Loop-variant addresses.  They assume post-incremented count < 0.
2240 2248      Address from_element_addr(end_from, count, TIMES_OOP, 0);
2241 2249      Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
2242 2250  
2243      -    gen_write_ref_array_pre_barrier(to, count);
     2251 +    if (need_pre_barrier) {
     2252 +      gen_write_ref_array_pre_barrier(to, count);
     2253 +    }
2244 2254  
2245 2255      // Copy from low to high addresses, indexed from the end of each array.
2246 2256      __ lea(end_from, end_from_addr);
2247 2257      __ lea(end_to,   end_to_addr);
2248 2258      __ movptr(r14_length, length);        // save a copy of the length
2249 2259      assert(length == count, "");          // else fix next line:
2250 2260      __ negptr(count);                     // negate and test the length
2251 2261      __ jcc(Assembler::notZero, L_load_element);
2252 2262  
2253 2263      // Empty array:  Nothing to do.
↓ open down ↓ 489 lines elided ↑ open up ↑
2743 2753                                                                                 "jlong_disjoint_arraycopy");
2744 2754      StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, entry,
2745 2755                                                                                 &entry_jlong_arraycopy, "jlong_arraycopy");
2746 2756  
2747 2757  
2748 2758      if (UseCompressedOops) {
2749 2759        StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, &entry,
2750 2760                                                                                "oop_disjoint_arraycopy");
2751 2761        StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, entry,
2752 2762                                                                                &entry_oop_arraycopy, "oop_arraycopy");
     2763 +      StubRoutines::_oop_disjoint_arraycopy_no_pre  = generate_disjoint_int_oop_copy(false, true, &entry,
     2764 +                                                                                     "oop_disjoint_arraycopy_no_pre", false);
     2765 +      StubRoutines::_oop_arraycopy_no_pre           = generate_conjoint_int_oop_copy(false, true, entry,
     2766 +                                                                                     NULL, "oop_arraycopy_no_pre", false);
2753 2767      } else {
2754 2768        StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, &entry,
2755 2769                                                                                 "oop_disjoint_arraycopy");
2756 2770        StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, entry,
2757 2771                                                                                 &entry_oop_arraycopy, "oop_arraycopy");
     2772 +      StubRoutines::_oop_disjoint_arraycopy_no_pre  = generate_disjoint_long_oop_copy(false, true, &entry,
     2773 +                                                                                      "oop_disjoint_arraycopy_no_pre", false);
     2774 +      StubRoutines::_oop_arraycopy_no_pre           = generate_conjoint_long_oop_copy(false, true, entry,
     2775 +                                                                                      NULL, "oop_arraycopy_no_pre", false);
2758 2776      }
2759 2777  
2760      -    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
     2778 +    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
     2779 +    StubRoutines::_checkcast_arraycopy_no_pre = generate_checkcast_copy("checkcast_arraycopy_no_pre", NULL, false);
     2780 +
2761 2781      StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
2762 2782                                                                entry_jbyte_arraycopy,
2763 2783                                                                entry_jshort_arraycopy,
2764 2784                                                                entry_jint_arraycopy,
2765 2785                                                                entry_jlong_arraycopy);
2766 2786      StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
2767 2787                                                                 entry_jbyte_arraycopy,
2768 2788                                                                 entry_jshort_arraycopy,
2769 2789                                                                 entry_jint_arraycopy,
2770 2790                                                                 entry_oop_arraycopy,
↓ open down ↓ 16 lines elided ↑ open up ↑
2787 2807      StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
2788 2808  
2789 2809      StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
2790 2810      StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
2791 2811  
2792 2812      StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
2793 2813      StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
2794 2814  
2795 2815      StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
2796 2816      StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
     2817 +
     2818 +    StubRoutines::_arrayof_oop_disjoint_arraycopy_no_pre    = StubRoutines::_oop_disjoint_arraycopy_no_pre;
     2819 +    StubRoutines::_arrayof_oop_arraycopy_no_pre             = StubRoutines::_oop_arraycopy_no_pre;
2797 2820    }
2798 2821  
2799 2822    void generate_math_stubs() {
2800 2823      {
2801 2824        StubCodeMark mark(this, "StubRoutines", "log");
2802 2825        StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
2803 2826  
2804 2827        __ subq(rsp, 8);
2805 2828        __ movdbl(Address(rsp, 0), xmm0);
2806 2829        __ fld_d(Address(rsp, 0));
↓ open down ↓ 286 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX