Print this page


Split Close
Expand all
Collapse all
          --- old/src/cpu/x86/vm/stubGenerator_x86_64.cpp
          +++ new/src/cpu/x86/vm/stubGenerator_x86_64.cpp
↓ open down ↓ 1151 lines elided ↑ open up ↑
1152 1152      const Register saved_rsi = r10;
1153 1153  #ifdef _WIN64
1154 1154      __ movptr(rdi, saved_rdi);
1155 1155      __ movptr(rsi, saved_rsi);
1156 1156  #endif
1157 1157    }
1158 1158  
1159 1159    // Generate code for an array write pre barrier
1160 1160    //
1161 1161    //     addr    -  starting address
1162      -  //     count    -  element count
     1162 +  //     count   -  element count
     1163 +  //     tmp     - scratch register
1163 1164    //
1164 1165    //     Destroy no registers!
1165 1166    //
1166      -  void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
     1167 +  void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
1167 1168      BarrierSet* bs = Universe::heap()->barrier_set();
1168 1169      switch (bs->kind()) {
1169 1170        case BarrierSet::G1SATBCT:
1170 1171        case BarrierSet::G1SATBCTLogging:
1171      -        {
1172      -          __ pusha();                      // push registers
1173      -          if (count == c_rarg0) {
1174      -            if (addr == c_rarg1) {
1175      -              // exactly backwards!!
1176      -              __ xchgptr(c_rarg1, c_rarg0);
1177      -            } else {
1178      -              __ movptr(c_rarg1, count);
1179      -              __ movptr(c_rarg0, addr);
1180      -            }
1181      -
1182      -          } else {
1183      -            __ movptr(c_rarg0, addr);
1184      -            __ movptr(c_rarg1, count);
1185      -          }
1186      -          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
1187      -          __ popa();
     1172 +        // With G1, don't generate the call if we statically know that the target in uninitialized
     1173 +        if (!dest_uninitialized) {
     1174 +           __ pusha();                      // push registers
     1175 +           if (count == c_rarg0) {
     1176 +             if (addr == c_rarg1) {
     1177 +               // exactly backwards!!
     1178 +               __ xchgptr(c_rarg1, c_rarg0);
     1179 +             } else {
     1180 +               __ movptr(c_rarg1, count);
     1181 +               __ movptr(c_rarg0, addr);
     1182 +             }
     1183 +           } else {
     1184 +             __ movptr(c_rarg0, addr);
     1185 +             __ movptr(c_rarg1, count);
     1186 +           }
     1187 +           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
     1188 +           __ popa();
1188 1189          }
1189      -        break;
     1190 +         break;
1190 1191        case BarrierSet::CardTableModRef:
1191 1192        case BarrierSet::CardTableExtension:
1192 1193        case BarrierSet::ModRef:
1193 1194          break;
1194 1195        default:
1195 1196          ShouldNotReachHere();
1196 1197  
1197 1198      }
1198 1199    }
1199 1200  
↓ open down ↓ 562 lines elided ↑ open up ↑
1762 1763    //   c_rarg2   - element count, treated as ssize_t, can be zero
1763 1764    //
1764 1765    // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1765 1766    // the hardware handle it.  The two dwords within qwords that span
1766 1767    // cache line boundaries will still be loaded and stored atomicly.
1767 1768    //
1768 1769    // Side Effects:
1769 1770    //   disjoint_int_copy_entry is set to the no-overlap entry point
1770 1771    //   used by generate_conjoint_int_oop_copy().
1771 1772    //
1772      -  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, const char *name) {
     1773 +  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
     1774 +                                         const char *name, bool dest_uninitialized = false) {
1773 1775      __ align(CodeEntryAlignment);
1774 1776      StubCodeMark mark(this, "StubRoutines", name);
1775 1777      address start = __ pc();
1776 1778  
1777 1779      Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1778 1780      const Register from        = rdi;  // source array address
1779 1781      const Register to          = rsi;  // destination array address
1780 1782      const Register count       = rdx;  // elements count
1781 1783      const Register dword_count = rcx;
1782 1784      const Register qword_count = count;
↓ open down ↓ 9 lines elided ↑ open up ↑
1792 1794      if (entry != NULL) {
1793 1795        *entry = __ pc();
1794 1796        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1795 1797        BLOCK_COMMENT("Entry:");
1796 1798      }
1797 1799  
1798 1800      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1799 1801                        // r9 and r10 may be used to save non-volatile registers
1800 1802      if (is_oop) {
1801 1803        __ movq(saved_to, to);
1802      -      gen_write_ref_array_pre_barrier(to, count);
     1804 +      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
1803 1805      }
1804 1806  
1805 1807      // 'from', 'to' and 'count' are now valid
1806 1808      __ movptr(dword_count, count);
1807 1809      __ shrptr(count, 1); // count => qword_count
1808 1810  
1809 1811      // Copy from low to high addresses.  Use 'to' as scratch.
1810 1812      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1811 1813      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1812 1814      __ negptr(qword_count);
↓ open down ↓ 40 lines elided ↑ open up ↑
1853 1855    // Inputs:
1854 1856    //   c_rarg0   - source array address
1855 1857    //   c_rarg1   - destination array address
1856 1858    //   c_rarg2   - element count, treated as ssize_t, can be zero
1857 1859    //
1858 1860    // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1859 1861    // the hardware handle it.  The two dwords within qwords that span
1860 1862    // cache line boundaries will still be loaded and stored atomicly.
1861 1863    //
1862 1864    address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1863      -                                         address *entry, const char *name) {
     1865 +                                         address *entry, const char *name,
     1866 +                                         bool dest_uninitialized = false) {
1864 1867      __ align(CodeEntryAlignment);
1865 1868      StubCodeMark mark(this, "StubRoutines", name);
1866 1869      address start = __ pc();
1867 1870  
1868 1871      Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1869 1872      const Register from        = rdi;  // source array address
1870 1873      const Register to          = rsi;  // destination array address
1871 1874      const Register count       = rdx;  // elements count
1872 1875      const Register dword_count = rcx;
1873 1876      const Register qword_count = count;
↓ open down ↓ 6 lines elided ↑ open up ↑
1880 1883         // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1881 1884        BLOCK_COMMENT("Entry:");
1882 1885      }
1883 1886  
1884 1887      array_overlap_test(nooverlap_target, Address::times_4);
1885 1888      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1886 1889                        // r9 and r10 may be used to save non-volatile registers
1887 1890  
1888 1891      if (is_oop) {
1889 1892        // no registers are destroyed by this call
1890      -      gen_write_ref_array_pre_barrier(to, count);
     1893 +      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
1891 1894      }
1892 1895  
1893 1896      assert_clean_int(count, rax); // Make sure 'count' is clean int.
1894 1897      // 'from', 'to' and 'count' are now valid
1895 1898      __ movptr(dword_count, count);
1896 1899      __ shrptr(count, 1); // count => qword_count
1897 1900  
1898 1901      // Copy from high to low addresses.  Use 'to' as scratch.
1899 1902  
1900 1903      // Check for and copy trailing dword
↓ open down ↓ 45 lines elided ↑ open up ↑
1946 1949    //
1947 1950    // Inputs:
1948 1951    //   c_rarg0   - source array address
1949 1952    //   c_rarg1   - destination array address
1950 1953    //   c_rarg2   - element count, treated as ssize_t, can be zero
1951 1954    //
1952 1955   // Side Effects:
1953 1956    //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1954 1957    //   no-overlap entry point used by generate_conjoint_long_oop_copy().
1955 1958    //
1956      -  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, const char *name) {
     1959 +  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
     1960 +                                          const char *name, bool dest_uninitialized = false) {
1957 1961      __ align(CodeEntryAlignment);
1958 1962      StubCodeMark mark(this, "StubRoutines", name);
1959 1963      address start = __ pc();
1960 1964  
1961 1965      Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
1962 1966      const Register from        = rdi;  // source array address
1963 1967      const Register to          = rsi;  // destination array address
1964 1968      const Register qword_count = rdx;  // elements count
1965 1969      const Register end_from    = from; // source array end address
1966 1970      const Register end_to      = rcx;  // destination array end address
↓ open down ↓ 9 lines elided ↑ open up ↑
1976 1980        *entry = __ pc();
1977 1981        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1978 1982        BLOCK_COMMENT("Entry:");
1979 1983      }
1980 1984  
1981 1985      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1982 1986                        // r9 and r10 may be used to save non-volatile registers
1983 1987      // 'from', 'to' and 'qword_count' are now valid
1984 1988      if (is_oop) {
1985 1989        // no registers are destroyed by this call
1986      -      gen_write_ref_array_pre_barrier(to, qword_count);
     1990 +      gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
1987 1991      }
1988 1992  
1989 1993      // Copy from low to high addresses.  Use 'to' as scratch.
1990 1994      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1991 1995      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1992 1996      __ negptr(qword_count);
1993 1997      __ jmp(L_copy_32_bytes);
1994 1998  
1995 1999      // Copy trailing qwords
1996 2000    __ BIND(L_copy_8_bytes);
↓ open down ↓ 34 lines elided ↑ open up ↑
2031 2035    //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2032 2036    //             ignored
2033 2037    //   is_oop  - true => oop array, so generate store check code
2034 2038    //   name    - stub name string
2035 2039    //
2036 2040    // Inputs:
2037 2041    //   c_rarg0   - source array address
2038 2042    //   c_rarg1   - destination array address
2039 2043    //   c_rarg2   - element count, treated as ssize_t, can be zero
2040 2044    //
2041      -  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
2042      -                                          address *entry, const char *name) {
     2045 +  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
     2046 +                                          address nooverlap_target, address *entry,
     2047 +                                          const char *name, bool dest_uninitialized = false) {
2043 2048      __ align(CodeEntryAlignment);
2044 2049      StubCodeMark mark(this, "StubRoutines", name);
2045 2050      address start = __ pc();
2046 2051  
2047 2052      Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
2048 2053      const Register from        = rdi;  // source array address
2049 2054      const Register to          = rsi;  // destination array address
2050 2055      const Register qword_count = rdx;  // elements count
2051 2056      const Register saved_count = rcx;
2052 2057  
↓ open down ↓ 7 lines elided ↑ open up ↑
2060 2065      }
2061 2066  
2062 2067      array_overlap_test(nooverlap_target, Address::times_8);
2063 2068      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2064 2069                        // r9 and r10 may be used to save non-volatile registers
2065 2070      // 'from', 'to' and 'qword_count' are now valid
2066 2071      if (is_oop) {
2067 2072        // Save to and count for store barrier
2068 2073        __ movptr(saved_count, qword_count);
2069 2074        // No registers are destroyed by this call
2070      -      gen_write_ref_array_pre_barrier(to, saved_count);
     2075 +      gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
2071 2076      }
2072 2077  
2073 2078      __ jmp(L_copy_32_bytes);
2074 2079  
2075 2080      // Copy trailing qwords
2076 2081    __ BIND(L_copy_8_bytes);
2077 2082      __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2078 2083      __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2079 2084      __ decrement(qword_count);
2080 2085      __ jcc(Assembler::notZero, L_copy_8_bytes);
↓ open down ↓ 58 lines elided ↑ open up ↑
2139 2144    //    c_rarg3   - size_t ckoff (super_check_offset)
2140 2145    // not Win64
2141 2146    //    c_rarg4   - oop ckval (super_klass)
2142 2147    // Win64
2143 2148    //    rsp+40    - oop ckval (super_klass)
2144 2149    //
2145 2150    //  Output:
2146 2151    //    rax ==  0  -  success
2147 2152    //    rax == -1^K - failure, where K is partial transfer count
2148 2153    //
2149      -  address generate_checkcast_copy(const char *name, address *entry) {
     2154 +  address generate_checkcast_copy(const char *name, address *entry,
     2155 +                                  bool dest_uninitialized = false) {
2150 2156  
2151 2157      Label L_load_element, L_store_element, L_do_card_marks, L_done;
2152 2158  
2153 2159      // Input registers (after setup_arg_regs)
2154 2160      const Register from        = rdi;   // source array address
2155 2161      const Register to          = rsi;   // destination array address
2156 2162      const Register length      = rdx;   // elements count
2157 2163      const Register ckoff       = rcx;   // super_check_offset
2158 2164      const Register ckval       = r8;    // super_klass
2159 2165  
↓ open down ↓ 73 lines elided ↑ open up ↑
2233 2239      }
2234 2240  #endif //ASSERT
2235 2241  
2236 2242      // Loop-invariant addresses.  They are exclusive end pointers.
2237 2243      Address end_from_addr(from, length, TIMES_OOP, 0);
2238 2244      Address   end_to_addr(to,   length, TIMES_OOP, 0);
2239 2245      // Loop-variant addresses.  They assume post-incremented count < 0.
2240 2246      Address from_element_addr(end_from, count, TIMES_OOP, 0);
2241 2247      Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
2242 2248  
2243      -    gen_write_ref_array_pre_barrier(to, count);
     2249 +    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
2244 2250  
2245 2251      // Copy from low to high addresses, indexed from the end of each array.
2246 2252      __ lea(end_from, end_from_addr);
2247 2253      __ lea(end_to,   end_to_addr);
2248 2254      __ movptr(r14_length, length);        // save a copy of the length
2249 2255      assert(length == count, "");          // else fix next line:
2250 2256      __ negptr(count);                     // negate and test the length
2251 2257      __ jcc(Assembler::notZero, L_load_element);
2252 2258  
2253 2259      // Empty array:  Nothing to do.
↓ open down ↓ 489 lines elided ↑ open up ↑
2743 2749                                                                                 "jlong_disjoint_arraycopy");
2744 2750      StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, entry,
2745 2751                                                                                 &entry_jlong_arraycopy, "jlong_arraycopy");
2746 2752  
2747 2753  
2748 2754      if (UseCompressedOops) {
2749 2755        StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, &entry,
2750 2756                                                                                "oop_disjoint_arraycopy");
2751 2757        StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, entry,
2752 2758                                                                                &entry_oop_arraycopy, "oop_arraycopy");
     2759 +      StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_int_oop_copy(false, true, &entry,
     2760 +                                                                                     "oop_disjoint_arraycopy_uninit",
     2761 +                                                                                     /*dest_uninitialized*/true);
     2762 +      StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_int_oop_copy(false, true, entry,
     2763 +                                                                                     NULL, "oop_arraycopy_uninit",
     2764 +                                                                                     /*dest_uninitialized*/true);
2753 2765      } else {
2754 2766        StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, &entry,
2755 2767                                                                                 "oop_disjoint_arraycopy");
2756 2768        StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, entry,
2757 2769                                                                                 &entry_oop_arraycopy, "oop_arraycopy");
     2770 +      StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_long_oop_copy(false, true, &entry,
     2771 +                                                                                      "oop_disjoint_arraycopy_uninit",
     2772 +                                                                                      /*dest_uninitialized*/true);
     2773 +      StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_long_oop_copy(false, true, entry,
     2774 +                                                                                      NULL, "oop_arraycopy_uninit",
     2775 +                                                                                      /*dest_uninitialized*/true);
2758 2776      }
2759 2777  
2760      -    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
     2778 +    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
     2779 +    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
     2780 +                                                                        /*dest_uninitialized*/true);
     2781 +
2761 2782      StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
2762 2783                                                                entry_jbyte_arraycopy,
2763 2784                                                                entry_jshort_arraycopy,
2764 2785                                                                entry_jint_arraycopy,
2765 2786                                                                entry_jlong_arraycopy);
2766 2787      StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
2767 2788                                                                 entry_jbyte_arraycopy,
2768 2789                                                                 entry_jshort_arraycopy,
2769 2790                                                                 entry_jint_arraycopy,
2770 2791                                                                 entry_oop_arraycopy,
↓ open down ↓ 16 lines elided ↑ open up ↑
2787 2808      StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
2788 2809  
2789 2810      StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
2790 2811      StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
2791 2812  
2792 2813      StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
2793 2814      StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
2794 2815  
2795 2816      StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
2796 2817      StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
     2818 +
     2819 +    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
     2820 +    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
2797 2821    }
2798 2822  
2799 2823    void generate_math_stubs() {
2800 2824      {
2801 2825        StubCodeMark mark(this, "StubRoutines", "log");
2802 2826        StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
2803 2827  
2804 2828        __ subq(rsp, 8);
2805 2829        __ movdbl(Address(rsp, 0), xmm0);
2806 2830        __ fld_d(Address(rsp, 0));
↓ open down ↓ 286 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX