< prev index next >
src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp
Print this page
rev 8798 : [mq]: inc1
@@ -77,36 +77,39 @@
uintptr_t temp;
__asm__ volatile(
// Unroll loop x8.
" sub %[aend], %[ato], %[temp]\n\t"
" cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words
- " ba %xcc, 2f\n\t"
+ " ba %xcc, 2f\n\t" // goto TEST always
" sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
+ // LOOP:
"1:\n\t" // unrolled x8 store loop top
" cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to
" stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
" stx %[xvalue], [%[ato]-56]\n\t"
" stx %[xvalue], [%[ato]-48]\n\t"
" stx %[xvalue], [%[ato]-40]\n\t"
" stx %[xvalue], [%[ato]-32]\n\t"
" stx %[xvalue], [%[ato]-24]\n\t"
" stx %[xvalue], [%[ato]-16]\n\t"
" stx %[xvalue], [%[ato]-8]\n\t"
+ // TEST:
"2:\n\t"
- " bgu,a %xcc, 1b\n\t" // loop if more than 7 words remaining
+ " bgu,a %xcc, 1b\n\t" // goto LOOP if more than 7 words remaining
" add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration
// Fill remaining < 8 full words.
// Dispatch on (aligned_end - aligned_to).
// offset := (7 - (aligned_end - aligned_to)) + 3
- // 3 instructions from rdpc to dispatch start
+ // 3 instructions from rdpc to DISPATCH
" sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
" srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4
" add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size
" rd %pc, %[temp]\n\t" // dispatch on scaled offset
" jmpl %[temp]+%[ato], %g0\n\t"
" nop\n\t"
- "3:\n\t" // dispatch start
+ // DISPATCH: no direct reference, but without it the store block may be elided.
+ "3:\n\t"
" stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
" stx %[xvalue], [%[aend]-48]\n\t"
" stx %[xvalue], [%[aend]-40]\n\t"
" stx %[xvalue], [%[aend]-32]\n\t"
" stx %[xvalue], [%[aend]-24]\n\t"
@@ -129,18 +132,19 @@
assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition");
// Dispatch on (end - start).
void* pc;
__asm__ volatile(
// offset := (7 - (end - start)) + 3
- // 3 instructions from rdpc to dispatch start
+ // 3 instructions from rdpc to DISPATCH
" sub %[offset], %[end], %[offset]\n\t" // offset := start - end
" sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
" add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
" rd %pc, %[pc]\n\t" // dispatch on scaled offset
" jmpl %[pc]+%[offset], %g0\n\t"
" nop\n\t"
- "1:\n\t" // dispatch start
+ // DISPATCH: no direct reference, but without it the store block may be elided.
+ "1:\n\t"
" stb %[value], [%[end]-7]\n\t" // end[-7] = value
" stb %[value], [%[end]-6]\n\t"
" stb %[value], [%[end]-5]\n\t"
" stb %[value], [%[end]-4]\n\t"
" stb %[value], [%[end]-3]\n\t"
< prev index next >