< prev index next >

src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp

Print this page
rev 8798 : [mq]: inc1

@@ -77,36 +77,39 @@
     uintptr_t temp;
     __asm__ volatile(
       // Unroll loop x8.
       " sub %[aend], %[ato], %[temp]\n\t"
       " cmp %[temp], 56\n\t"           // cc := (aligned_end - aligned_to) > 7 words
-      " ba %xcc, 2f\n\t"
+      " ba %xcc, 2f\n\t"               // goto TEST always
       "  sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
+      // LOOP:
       "1:\n\t"                         // unrolled x8 store loop top
       " cmp %[temp], %[ato]\n\t"       // cc := limit > (next) aligned_to
       " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
       " stx %[xvalue], [%[ato]-56]\n\t"
       " stx %[xvalue], [%[ato]-48]\n\t"
       " stx %[xvalue], [%[ato]-40]\n\t"
       " stx %[xvalue], [%[ato]-32]\n\t"
       " stx %[xvalue], [%[ato]-24]\n\t"
       " stx %[xvalue], [%[ato]-16]\n\t"
       " stx %[xvalue], [%[ato]-8]\n\t"
+      // TEST:
       "2:\n\t"
-      " bgu,a %xcc, 1b\n\t"            // loop if more than 7 words remaining
+      " bgu,a %xcc, 1b\n\t"            // goto LOOP if more than 7 words remaining
       "  add %[ato], 64, %[ato]\n\t"   // aligned_to += 8, for next iteration
       // Fill remaining < 8 full words.
       // Dispatch on (aligned_end - aligned_to).
       // offset := (7 - (aligned_end - aligned_to)) + 3
-      //   3 instructions from rdpc to dispatch start
+      //   3 instructions from rdpc to DISPATCH
       " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
       " srax %[ato], 1, %[ato]\n\t"      // scale offset for instruction size of 4
       " add %[ato], 40, %[ato]\n\t"      // offset += 10 * instruction size
       " rd %pc, %[temp]\n\t"             // dispatch on scaled offset
       " jmpl %[temp]+%[ato], %g0\n\t"
       "  nop\n\t"
-      "3:\n\t"                           // dispatch start
+      // DISPATCH: no direct reference, but without it the store block may be elided.
+      "3:\n\t"
       " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
       " stx %[xvalue], [%[aend]-48]\n\t"
       " stx %[xvalue], [%[aend]-40]\n\t"
       " stx %[xvalue], [%[aend]-32]\n\t"
       " stx %[xvalue], [%[aend]-24]\n\t"

@@ -129,18 +132,19 @@
   assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition");
   // Dispatch on (end - start).
   void* pc;
   __asm__ volatile(
     // offset := (7 - (end - start)) + 3
-    //   3 instructions from rdpc to dispatch start
+    //   3 instructions from rdpc to DISPATCH
     " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
     " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
     " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
     " rd %pc, %[pc]\n\t"                // dispatch on scaled offset
     " jmpl %[pc]+%[offset], %g0\n\t"
     "  nop\n\t"
-    "1:\n\t"                        // dispatch start
+    // DISPATCH: no direct reference, but without it the store block may be elided.
+    "1:\n\t"
     " stb %[value], [%[end]-7]\n\t" // end[-7] = value
     " stb %[value], [%[end]-6]\n\t"
     " stb %[value], [%[end]-5]\n\t"
     " stb %[value], [%[end]-4]\n\t"
     " stb %[value], [%[end]-3]\n\t"
< prev index next >