< prev index next >

src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp

Print this page
rev 8798 : [mq]: inc1


  62 
  63     // Compute fill word.
  64     STATIC_ASSERT(BitsPerByte == 8);
  65     STATIC_ASSERT(BitsPerWord == 64);
  66     uintx xvalue = value & 0xff;
  67     xvalue |= (xvalue << 8);
  68     xvalue |= (xvalue << 16);
  69     xvalue |= (xvalue << 32);
  70 
  71     uintx* aligned_end = static_cast<uintx*>(align_ptr_down(end, BytesPerWord));
  72     assert(aligned_to <= aligned_end, "invariant");
  73 
  74     // for ( ; aligned_to < aligned_end; ++aligned_to) {
  75     //   *aligned_to = xvalue;
  76     // }
  77     uintptr_t temp;
  78     __asm__ volatile(
  79       // Unroll loop x8.
  80       " sub %[aend], %[ato], %[temp]\n\t"
  81       " cmp %[temp], 56\n\t"           // cc := (aligned_end - aligned_to) > 7 words
  82       " ba %xcc, 2f\n\t"
  83       "  sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words

  84       "1:\n\t"                         // unrolled x8 store loop top
  85       " cmp %[temp], %[ato]\n\t"       // cc := limit > (next) aligned_to
  86       " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
  87       " stx %[xvalue], [%[ato]-56]\n\t"
  88       " stx %[xvalue], [%[ato]-48]\n\t"
  89       " stx %[xvalue], [%[ato]-40]\n\t"
  90       " stx %[xvalue], [%[ato]-32]\n\t"
  91       " stx %[xvalue], [%[ato]-24]\n\t"
  92       " stx %[xvalue], [%[ato]-16]\n\t"
  93       " stx %[xvalue], [%[ato]-8]\n\t"

  94       "2:\n\t"
  95       " bgu,a %xcc, 1b\n\t"            // loop if more than 7 words remaining
  96       "  add %[ato], 64, %[ato]\n\t"   // aligned_to += 8, for next iteration
  97       // Fill remaining < 8 full words.
  98       // Dispatch on (aligned_end - aligned_to).
  99       // offset := (7 - (aligned_end - aligned_to)) + 3
 100       //   3 instructions from rdpc to dispatch start
 101       " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
 102       " srax %[ato], 1, %[ato]\n\t"      // scale offset for instruction size of 4
 103       " add %[ato], 40, %[ato]\n\t"      // offset += 10 * instruction size
 104       " rd %pc, %[temp]\n\t"             // dispatch on scaled offset
 105       " jmpl %[temp]+%[ato], %g0\n\t"
 106       "  nop\n\t"
 107       "3:\n\t"                           // dispatch start

 108       " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
 109       " stx %[xvalue], [%[aend]-48]\n\t"
 110       " stx %[xvalue], [%[aend]-40]\n\t"
 111       " stx %[xvalue], [%[aend]-32]\n\t"
 112       " stx %[xvalue], [%[aend]-24]\n\t"
 113       " stx %[xvalue], [%[aend]-16]\n\t"
 114       " stx %[xvalue], [%[aend]-8]\n\t"  // aligned_end[-1] = xvalue
 115       : /* no outputs */
 116       : [ato] "&+r" (aligned_to),
 117         [aend] "r" (aligned_end),
 118         [xvalue] "r" (xvalue),
 119         [temp] "&=r" (temp)
 120       : "cc", "memory");
 121     to = aligned_end;           // setup for suffix
 122   }
 123   // Fill any partial word suffix.  Also the prefix if size < BytesPerWord.
 124   fill_subword(to, end, value);
 125 }
 126 
 127 static void fill_subword(void* start, void* end, int value) {
 128   STATIC_ASSERT(BytesPerWord == 8);
 129   assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition");
 130   // Dispatch on (end - start).
 131   void* pc;
 132   __asm__ volatile(
 133     // offset := (7 - (end - start)) + 3
 134     //   3 instructions from rdpc to dispatch start
 135     " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
 136     " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
 137     " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
 138     " rd %pc, %[pc]\n\t"                // dispatch on scaled offset
 139     " jmpl %[pc]+%[offset], %g0\n\t"
 140     "  nop\n\t"
 141     "1:\n\t"                        // dispatch start

 142     " stb %[value], [%[end]-7]\n\t" // end[-7] = value
 143     " stb %[value], [%[end]-6]\n\t"
 144     " stb %[value], [%[end]-5]\n\t"
 145     " stb %[value], [%[end]-4]\n\t"
 146     " stb %[value], [%[end]-3]\n\t"
 147     " stb %[value], [%[end]-2]\n\t"
 148     " stb %[value], [%[end]-1]\n\t" // end[-1] = value
 149     : /* no outputs */
 150     : [offset] "&+r" (start),
 151       [end] "r" (end),
 152       [value] "r" (value),
 153       [pc] "&=r" (pc)
 154     : "memory");
 155 }
 156 
 157 #endif // INCLUDE_ALL_GCS


  62 
  63     // Compute fill word.
  64     STATIC_ASSERT(BitsPerByte == 8);
  65     STATIC_ASSERT(BitsPerWord == 64);
  66     uintx xvalue = value & 0xff;
  67     xvalue |= (xvalue << 8);
  68     xvalue |= (xvalue << 16);
  69     xvalue |= (xvalue << 32);
  70 
  71     uintx* aligned_end = static_cast<uintx*>(align_ptr_down(end, BytesPerWord));
  72     assert(aligned_to <= aligned_end, "invariant");
  73 
  74     // for ( ; aligned_to < aligned_end; ++aligned_to) {
  75     //   *aligned_to = xvalue;
  76     // }
  77     uintptr_t temp;
  78     __asm__ volatile(
  79       // Unroll loop x8.
  80       " sub %[aend], %[ato], %[temp]\n\t"
  81       " cmp %[temp], 56\n\t"           // cc := (aligned_end - aligned_to) > 7 words
  82       " ba %xcc, 2f\n\t"               // goto TEST always
  83       "  sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
  84       // LOOP:
  85       "1:\n\t"                         // unrolled x8 store loop top
  86       " cmp %[temp], %[ato]\n\t"       // cc := limit > (next) aligned_to
  87       " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
  88       " stx %[xvalue], [%[ato]-56]\n\t"
  89       " stx %[xvalue], [%[ato]-48]\n\t"
  90       " stx %[xvalue], [%[ato]-40]\n\t"
  91       " stx %[xvalue], [%[ato]-32]\n\t"
  92       " stx %[xvalue], [%[ato]-24]\n\t"
  93       " stx %[xvalue], [%[ato]-16]\n\t"
  94       " stx %[xvalue], [%[ato]-8]\n\t"
  95       // TEST:
  96       "2:\n\t"
  97       " bgu,a %xcc, 1b\n\t"            // goto LOOP if more than 7 words remaining
  98       "  add %[ato], 64, %[ato]\n\t"   // aligned_to += 8, for next iteration
  99       // Fill remaining < 8 full words.
 100       // Dispatch on (aligned_end - aligned_to).
 101       // offset := (7 - (aligned_end - aligned_to)) + 3
 102       //   3 instructions from rdpc to DISPATCH
 103       " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
 104       " srax %[ato], 1, %[ato]\n\t"      // scale offset for instruction size of 4
 105       " add %[ato], 40, %[ato]\n\t"      // offset += 10 * instruction size
 106       " rd %pc, %[temp]\n\t"             // dispatch on scaled offset
 107       " jmpl %[temp]+%[ato], %g0\n\t"
 108       "  nop\n\t"
 109       // DISPATCH: no direct reference, but without it the store block may be elided.
 110       "3:\n\t"
 111       " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
 112       " stx %[xvalue], [%[aend]-48]\n\t"
 113       " stx %[xvalue], [%[aend]-40]\n\t"
 114       " stx %[xvalue], [%[aend]-32]\n\t"
 115       " stx %[xvalue], [%[aend]-24]\n\t"
 116       " stx %[xvalue], [%[aend]-16]\n\t"
 117       " stx %[xvalue], [%[aend]-8]\n\t"  // aligned_end[-1] = xvalue
 118       : /* no outputs */
 119       : [ato] "&+r" (aligned_to),
 120         [aend] "r" (aligned_end),
 121         [xvalue] "r" (xvalue),
 122         [temp] "&=r" (temp)
 123       : "cc", "memory");
 124     to = aligned_end;           // setup for suffix
 125   }
 126   // Fill any partial word suffix.  Also the prefix if size < BytesPerWord.
 127   fill_subword(to, end, value);
 128 }
 129 
 130 static void fill_subword(void* start, void* end, int value) {
 131   STATIC_ASSERT(BytesPerWord == 8);
 132   assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition");
 133   // Dispatch on (end - start).
 134   void* pc;
 135   __asm__ volatile(
 136     // offset := (7 - (end - start)) + 3
 137     //   3 instructions from rdpc to DISPATCH
 138     " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
 139     " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
 140     " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
 141     " rd %pc, %[pc]\n\t"                // dispatch on scaled offset
 142     " jmpl %[pc]+%[offset], %g0\n\t"
 143     "  nop\n\t"
 144     // DISPATCH: no direct reference, but without it the store block may be elided.
 145     "1:\n\t"
 146     " stb %[value], [%[end]-7]\n\t" // end[-7] = value
 147     " stb %[value], [%[end]-6]\n\t"
 148     " stb %[value], [%[end]-5]\n\t"
 149     " stb %[value], [%[end]-4]\n\t"
 150     " stb %[value], [%[end]-3]\n\t"
 151     " stb %[value], [%[end]-2]\n\t"
 152     " stb %[value], [%[end]-1]\n\t" // end[-1] = value
 153     : /* no outputs */
 154     : [offset] "&+r" (start),
 155       [end] "r" (end),
 156       [value] "r" (value),
 157       [pc] "&=r" (pc)
 158     : "memory");
 159 }
 160 
 161 #endif // INCLUDE_ALL_GCS
< prev index next >