1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 27 #include "gc/shared/memset_with_concurrent_readers.hpp" 28 #include "runtime/prefetch.inline.hpp" 29 #include "utilities/debug.hpp" 30 #include "utilities/globalDefinitions.hpp" 31 #include "utilities/macros.hpp" 32 33 #if INCLUDE_ALL_GCS 34 35 // An implementation of memset, for use when there may be concurrent 36 // readers of the region being stored into. 37 // 38 // We can't use the standard library memset if it is implemented using 39 // block initializing stores. Doing so can result in concurrent readers 40 // seeing spurious zeros. 41 // 42 // We can't use the obvious C/C++ for-loop, because the compiler may 43 // recognize the idiomatic loop and optimize it into a call to the 44 // standard library memset; we've seen exactly this happen with, for 45 // example, Solaris Studio 12.3. Hence the use of inline assembly 46 // code, hiding loops from the compiler's optimizer. 47 // 48 // We don't attempt to use the standard library memset when it is safe 49 // to do so. We could conservatively do so by detecting the presence 50 // of block initializing stores (VM_Version::has_blk_init()), but the 51 // implementation provided here should be sufficient. 52 53 static void fill_subword(void* start, void* end, int value); 54 55 void memset_with_concurrent_readers(void* to, int value, size_t size) { 56 Prefetch::write(to, 0); 57 void* end = static_cast<char*>(to) + size; 58 if (size >= BytesPerWord) { 59 // Fill any partial word prefix. 60 uintx* aligned_to = static_cast<uintx*>(align_ptr_up(to, BytesPerWord)); 61 fill_subword(to, aligned_to, value); 62 63 // Compute fill word. 64 STATIC_ASSERT(BitsPerByte == 8); 65 STATIC_ASSERT(BitsPerWord == 64); 66 uintx xvalue = value & 0xff; 67 xvalue |= (xvalue << 8); 68 xvalue |= (xvalue << 16); 69 xvalue |= (xvalue << 32); 70 71 uintx* aligned_end = static_cast<uintx*>(align_ptr_down(end, BytesPerWord)); 72 assert(aligned_to <= aligned_end, "invariant"); 73 74 // for ( ; aligned_to < aligned_end; ++aligned_to) { 75 // *aligned_to = xvalue; 76 // } 77 uintptr_t temp; 78 __asm__ volatile( 79 // Unroll loop x8. 80 " sub %[aend], %[ato], %[temp]\n\t" 81 " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words 82 " ba %xcc, 2f\n\t" // goto TEST always 83 " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words 84 // LOOP: 85 "1:\n\t" // unrolled x8 store loop top 86 " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to 87 " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented 88 " stx %[xvalue], [%[ato]-56]\n\t" 89 " stx %[xvalue], [%[ato]-48]\n\t" 90 " stx %[xvalue], [%[ato]-40]\n\t" 91 " stx %[xvalue], [%[ato]-32]\n\t" 92 " stx %[xvalue], [%[ato]-24]\n\t" 93 " stx %[xvalue], [%[ato]-16]\n\t" 94 " stx %[xvalue], [%[ato]-8]\n\t" 95 // TEST: 96 "2:\n\t" 97 " bgu,a %xcc, 1b\n\t" // goto LOOP if more than 7 words remaining 98 " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration 99 // Fill remaining < 8 full words. 100 // Dispatch on (aligned_end - aligned_to). 101 // offset := (7 - (aligned_end - aligned_to)) + 3 102 // 3 instructions from rdpc to DISPATCH 103 " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end 104 " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4 105 " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size 106 " rd %pc, %[temp]\n\t" // dispatch on scaled offset 107 " jmpl %[temp]+%[ato], %g0\n\t" 108 " nop\n\t" 109 // DISPATCH: no direct reference, but without it the store block may be elided. 110 "3:\n\t" 111 " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue 112 " stx %[xvalue], [%[aend]-48]\n\t" 113 " stx %[xvalue], [%[aend]-40]\n\t" 114 " stx %[xvalue], [%[aend]-32]\n\t" 115 " stx %[xvalue], [%[aend]-24]\n\t" 116 " stx %[xvalue], [%[aend]-16]\n\t" 117 " stx %[xvalue], [%[aend]-8]\n\t" // aligned_end[-1] = xvalue 118 : /* no outputs */ 119 : [ato] "&+r" (aligned_to), 120 [aend] "r" (aligned_end), 121 [xvalue] "r" (xvalue), 122 [temp] "&=r" (temp) 123 : "cc", "memory"); 124 to = aligned_end; // setup for suffix 125 } 126 // Fill any partial word suffix. Also the prefix if size < BytesPerWord. 127 fill_subword(to, end, value); 128 } 129 130 static void fill_subword(void* start, void* end, int value) { 131 STATIC_ASSERT(BytesPerWord == 8); 132 assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition"); 133 // Dispatch on (end - start). 134 void* pc; 135 __asm__ volatile( 136 // offset := (7 - (end - start)) + 3 137 // 3 instructions from rdpc to DISPATCH 138 " sub %[offset], %[end], %[offset]\n\t" // offset := start - end 139 " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4 140 " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size 141 " rd %pc, %[pc]\n\t" // dispatch on scaled offset 142 " jmpl %[pc]+%[offset], %g0\n\t" 143 " nop\n\t" 144 // DISPATCH: no direct reference, but without it the store block may be elided. 145 "1:\n\t" 146 " stb %[value], [%[end]-7]\n\t" // end[-7] = value 147 " stb %[value], [%[end]-6]\n\t" 148 " stb %[value], [%[end]-5]\n\t" 149 " stb %[value], [%[end]-4]\n\t" 150 " stb %[value], [%[end]-3]\n\t" 151 " stb %[value], [%[end]-2]\n\t" 152 " stb %[value], [%[end]-1]\n\t" // end[-1] = value 153 : /* no outputs */ 154 : [offset] "&+r" (start), 155 [end] "r" (end), 156 [value] "r" (value), 157 [pc] "&=r" (pc) 158 : "memory"); 159 } 160 161 #endif // INCLUDE_ALL_GCS