69 " stb %[value], [%[end]-7]\n\t" // end[-7] = value 70 " stb %[value], [%[end]-6]\n\t" 71 " stb %[value], [%[end]-5]\n\t" 72 " stb %[value], [%[end]-4]\n\t" 73 " stb %[value], [%[end]-3]\n\t" 74 " stb %[value], [%[end]-2]\n\t" 75 " stb %[value], [%[end]-1]\n\t" // end[-1] = value 76 : /* only temporaries/overwritten outputs */ 77 [pc] "=&r" (pc), // temp 78 [offset] "+&r" (start) 79 : [end] "r" (end), 80 [value] "r" (value) 81 : "memory"); 82 } 83 84 void memset_with_concurrent_readers(void* to, int value, size_t size) { 85 Prefetch::write(to, 0); 86 void* end = static_cast<char*>(to) + size; 87 if (size >= (size_t)BytesPerWord) { 88 // Fill any partial word prefix. 89 uintx* aligned_to = static_cast<uintx*>(align_ptr_up(to, BytesPerWord)); 90 fill_subword(to, aligned_to, value); 91 92 // Compute fill word. 93 STATIC_ASSERT(BitsPerByte == 8); 94 STATIC_ASSERT(BitsPerWord == 64); 95 uintx xvalue = value & 0xff; 96 xvalue |= (xvalue << 8); 97 xvalue |= (xvalue << 16); 98 xvalue |= (xvalue << 32); 99 100 uintx* aligned_end = static_cast<uintx*>(align_ptr_down(end, BytesPerWord)); 101 assert(aligned_to <= aligned_end, "invariant"); 102 103 // for ( ; aligned_to < aligned_end; ++aligned_to) { 104 // *aligned_to = xvalue; 105 // } 106 uintptr_t temp; 107 __asm__ volatile( 108 // Unroll loop x8. 109 " sub %[aend], %[ato], %[temp]\n\t" 110 " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words 111 " ba %%xcc, 2f\n\t" // goto TEST always 112 " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words 113 // LOOP: 114 "1:\n\t" // unrolled x8 store loop top 115 " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to 116 " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented 117 " stx %[xvalue], [%[ato]-56]\n\t" 118 " stx %[xvalue], [%[ato]-48]\n\t" 119 " stx %[xvalue], [%[ato]-40]\n\t" 120 " stx %[xvalue], [%[ato]-32]\n\t" | 69 " stb %[value], [%[end]-7]\n\t" // end[-7] = value 70 " stb %[value], [%[end]-6]\n\t" 71 " stb %[value], [%[end]-5]\n\t" 72 " stb %[value], [%[end]-4]\n\t" 73 " stb %[value], [%[end]-3]\n\t" 74 " stb %[value], [%[end]-2]\n\t" 75 " stb %[value], [%[end]-1]\n\t" // end[-1] = value 76 : /* only temporaries/overwritten outputs */ 77 [pc] "=&r" (pc), // temp 78 [offset] "+&r" (start) 79 : [end] "r" (end), 80 [value] "r" (value) 81 : "memory"); 82 } 83 84 void memset_with_concurrent_readers(void* to, int value, size_t size) { 85 Prefetch::write(to, 0); 86 void* end = static_cast<char*>(to) + size; 87 if (size >= (size_t)BytesPerWord) { 88 // Fill any partial word prefix. 89 uintx* aligned_to = static_cast<uintx*>(align_up(to, BytesPerWord)); 90 fill_subword(to, aligned_to, value); 91 92 // Compute fill word. 93 STATIC_ASSERT(BitsPerByte == 8); 94 STATIC_ASSERT(BitsPerWord == 64); 95 uintx xvalue = value & 0xff; 96 xvalue |= (xvalue << 8); 97 xvalue |= (xvalue << 16); 98 xvalue |= (xvalue << 32); 99 100 uintx* aligned_end = static_cast<uintx*>(align_down(end, BytesPerWord)); 101 assert(aligned_to <= aligned_end, "invariant"); 102 103 // for ( ; aligned_to < aligned_end; ++aligned_to) { 104 // *aligned_to = xvalue; 105 // } 106 uintptr_t temp; 107 __asm__ volatile( 108 // Unroll loop x8. 109 " sub %[aend], %[ato], %[temp]\n\t" 110 " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words 111 " ba %%xcc, 2f\n\t" // goto TEST always 112 " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words 113 // LOOP: 114 "1:\n\t" // unrolled x8 store loop top 115 " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to 116 " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented 117 " stx %[xvalue], [%[ato]-56]\n\t" 118 " stx %[xvalue], [%[ato]-48]\n\t" 119 " stx %[xvalue], [%[ato]-40]\n\t" 120 " stx %[xvalue], [%[ato]-32]\n\t" |