--- old/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp 2015-08-19 16:28:05.087585253 -0400 +++ new/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp 2015-08-19 16:28:05.003584836 -0400 @@ -79,8 +79,9 @@ // Unroll loop x8. " sub %[aend], %[ato], %[temp]\n\t" " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words - " ba %xcc, 2f\n\t" + " ba %xcc, 2f\n\t" // goto TEST always " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words + // LOOP: "1:\n\t" // unrolled x8 store loop top " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented @@ -91,20 +92,22 @@ " stx %[xvalue], [%[ato]-24]\n\t" " stx %[xvalue], [%[ato]-16]\n\t" " stx %[xvalue], [%[ato]-8]\n\t" + // TEST: "2:\n\t" - " bgu,a %xcc, 1b\n\t" // loop if more than 7 words remaining + " bgu,a %xcc, 1b\n\t" // goto LOOP if more than 7 words remaining " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration // Fill remaining < 8 full words. // Dispatch on (aligned_end - aligned_to). // offset := (7 - (aligned_end - aligned_to)) + 3 - // 3 instructions from rdpc to dispatch start + // 3 instructions from rdpc to DISPATCH " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4 " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size " rd %pc, %[temp]\n\t" // dispatch on scaled offset " jmpl %[temp]+%[ato], %g0\n\t" " nop\n\t" - "3:\n\t" // dispatch start + // DISPATCH: no direct reference, but without it the store block may be elided. + "3:\n\t" " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue " stx %[xvalue], [%[aend]-48]\n\t" " stx %[xvalue], [%[aend]-40]\n\t" @@ -131,14 +134,15 @@ void* pc; __asm__ volatile( // offset := (7 - (end - start)) + 3 - // 3 instructions from rdpc to dispatch start + // 3 instructions from rdpc to DISPATCH " sub %[offset], %[end], %[offset]\n\t" // offset := start - end " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4 " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size " rd %pc, %[pc]\n\t" // dispatch on scaled offset " jmpl %[pc]+%[offset], %g0\n\t" " nop\n\t" - "1:\n\t" // dispatch start + // DISPATCH: no direct reference, but without it the store block may be elided. + "1:\n\t" " stb %[value], [%[end]-7]\n\t" // end[-7] = value " stb %[value], [%[end]-6]\n\t" " stb %[value], [%[end]-5]\n\t"