62
63 // Compute fill word.
64 STATIC_ASSERT(BitsPerByte == 8);
65 STATIC_ASSERT(BitsPerWord == 64);
66 uintx xvalue = value & 0xff;
67 xvalue |= (xvalue << 8);
68 xvalue |= (xvalue << 16);
69 xvalue |= (xvalue << 32);
70
71 uintx* aligned_end = static_cast<uintx*>(align_ptr_down(end, BytesPerWord));
72 assert(aligned_to <= aligned_end, "invariant");
73
74 // for ( ; aligned_to < aligned_end; ++aligned_to) {
75 // *aligned_to = xvalue;
76 // }
77 uintptr_t temp;
78 __asm__ volatile(
79 // Unroll loop x8.
80 " sub %[aend], %[ato], %[temp]\n\t"
81 " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words
82 " ba %xcc, 2f\n\t"
83 " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
84 "1:\n\t" // unrolled x8 store loop top
85 " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to
86 " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
87 " stx %[xvalue], [%[ato]-56]\n\t"
88 " stx %[xvalue], [%[ato]-48]\n\t"
89 " stx %[xvalue], [%[ato]-40]\n\t"
90 " stx %[xvalue], [%[ato]-32]\n\t"
91 " stx %[xvalue], [%[ato]-24]\n\t"
92 " stx %[xvalue], [%[ato]-16]\n\t"
93 " stx %[xvalue], [%[ato]-8]\n\t"
94 "2:\n\t"
95 " bgu,a %xcc, 1b\n\t" // loop if more than 7 words remaining
96 " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration
97 // Fill remaining < 8 full words.
98 // Dispatch on (aligned_end - aligned_to).
99 // offset := (7 - (aligned_end - aligned_to)) + 3
100 // 3 instructions from rdpc to dispatch start
101 " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
102 " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4
103 " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size
104 " rd %pc, %[temp]\n\t" // dispatch on scaled offset
105 " jmpl %[temp]+%[ato], %g0\n\t"
106 " nop\n\t"
107 "3:\n\t" // dispatch start
108 " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
109 " stx %[xvalue], [%[aend]-48]\n\t"
110 " stx %[xvalue], [%[aend]-40]\n\t"
111 " stx %[xvalue], [%[aend]-32]\n\t"
112 " stx %[xvalue], [%[aend]-24]\n\t"
113 " stx %[xvalue], [%[aend]-16]\n\t"
114 " stx %[xvalue], [%[aend]-8]\n\t" // aligned_end[-1] = xvalue
115 : /* no outputs */
116 : [ato] "&+r" (aligned_to),
117 [aend] "r" (aligned_end),
118 [xvalue] "r" (xvalue),
119 [temp] "&=r" (temp)
120 : "cc", "memory");
121 to = aligned_end; // setup for suffix
122 }
123 // Fill any partial word suffix. Also the prefix if size < BytesPerWord.
124 fill_subword(to, end, value);
125 }
126
127 static void fill_subword(void* start, void* end, int value) {
128 STATIC_ASSERT(BytesPerWord == 8);
129 assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition");
130 // Dispatch on (end - start).
131 void* pc;
132 __asm__ volatile(
133 // offset := (7 - (end - start)) + 3
134 // 3 instructions from rdpc to dispatch start
135 " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
136 " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
137 " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
138 " rd %pc, %[pc]\n\t" // dispatch on scaled offset
139 " jmpl %[pc]+%[offset], %g0\n\t"
140 " nop\n\t"
141 "1:\n\t" // dispatch start
142 " stb %[value], [%[end]-7]\n\t" // end[-7] = value
143 " stb %[value], [%[end]-6]\n\t"
144 " stb %[value], [%[end]-5]\n\t"
145 " stb %[value], [%[end]-4]\n\t"
146 " stb %[value], [%[end]-3]\n\t"
147 " stb %[value], [%[end]-2]\n\t"
148 " stb %[value], [%[end]-1]\n\t" // end[-1] = value
149 : /* no outputs */
150 : [offset] "&+r" (start),
151 [end] "r" (end),
152 [value] "r" (value),
153 [pc] "&=r" (pc)
154 : "memory");
155 }
156
157 #endif // INCLUDE_ALL_GCS
|
62
63 // Compute fill word.
64 STATIC_ASSERT(BitsPerByte == 8);
65 STATIC_ASSERT(BitsPerWord == 64);
66 uintx xvalue = value & 0xff;
67 xvalue |= (xvalue << 8);
68 xvalue |= (xvalue << 16);
69 xvalue |= (xvalue << 32);
70
71 uintx* aligned_end = static_cast<uintx*>(align_ptr_down(end, BytesPerWord));
72 assert(aligned_to <= aligned_end, "invariant");
73
74 // for ( ; aligned_to < aligned_end; ++aligned_to) {
75 // *aligned_to = xvalue;
76 // }
77 uintptr_t temp;
78 __asm__ volatile(
79 // Unroll loop x8.
80 " sub %[aend], %[ato], %[temp]\n\t"
81 " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words
82 " ba %xcc, 2f\n\t" // goto TEST always
83 " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
84 // LOOP:
85 "1:\n\t" // unrolled x8 store loop top
86 " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to
87 " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
88 " stx %[xvalue], [%[ato]-56]\n\t"
89 " stx %[xvalue], [%[ato]-48]\n\t"
90 " stx %[xvalue], [%[ato]-40]\n\t"
91 " stx %[xvalue], [%[ato]-32]\n\t"
92 " stx %[xvalue], [%[ato]-24]\n\t"
93 " stx %[xvalue], [%[ato]-16]\n\t"
94 " stx %[xvalue], [%[ato]-8]\n\t"
95 // TEST:
96 "2:\n\t"
97 " bgu,a %xcc, 1b\n\t" // goto LOOP if more than 7 words remaining
98 " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration
99 // Fill remaining < 8 full words.
100 // Dispatch on (aligned_end - aligned_to).
101 // offset := (7 - (aligned_end - aligned_to)) + 3
102 // 3 instructions from rdpc to DISPATCH
103 " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
104 " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4
105 " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size
106 " rd %pc, %[temp]\n\t" // dispatch on scaled offset
107 " jmpl %[temp]+%[ato], %g0\n\t"
108 " nop\n\t"
109 // DISPATCH: no direct reference, but without it the store block may be elided.
110 "3:\n\t"
111 " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
112 " stx %[xvalue], [%[aend]-48]\n\t"
113 " stx %[xvalue], [%[aend]-40]\n\t"
114 " stx %[xvalue], [%[aend]-32]\n\t"
115 " stx %[xvalue], [%[aend]-24]\n\t"
116 " stx %[xvalue], [%[aend]-16]\n\t"
117 " stx %[xvalue], [%[aend]-8]\n\t" // aligned_end[-1] = xvalue
118 : /* no outputs */
119 : [ato] "&+r" (aligned_to),
120 [aend] "r" (aligned_end),
121 [xvalue] "r" (xvalue),
122 [temp] "&=r" (temp)
123 : "cc", "memory");
124 to = aligned_end; // setup for suffix
125 }
126 // Fill any partial word suffix. Also the prefix if size < BytesPerWord.
127 fill_subword(to, end, value);
128 }
129
130 static void fill_subword(void* start, void* end, int value) {
131 STATIC_ASSERT(BytesPerWord == 8);
132 assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition");
133 // Dispatch on (end - start).
134 void* pc;
135 __asm__ volatile(
136 // offset := (7 - (end - start)) + 3
137 // 3 instructions from rdpc to DISPATCH
138 " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
139 " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
140 " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
141 " rd %pc, %[pc]\n\t" // dispatch on scaled offset
142 " jmpl %[pc]+%[offset], %g0\n\t"
143 " nop\n\t"
144 // DISPATCH: no direct reference, but without it the store block may be elided.
145 "1:\n\t"
146 " stb %[value], [%[end]-7]\n\t" // end[-7] = value
147 " stb %[value], [%[end]-6]\n\t"
148 " stb %[value], [%[end]-5]\n\t"
149 " stb %[value], [%[end]-4]\n\t"
150 " stb %[value], [%[end]-3]\n\t"
151 " stb %[value], [%[end]-2]\n\t"
152 " stb %[value], [%[end]-1]\n\t" // end[-1] = value
153 : /* no outputs */
154 : [offset] "&+r" (start),
155 [end] "r" (end),
156 [value] "r" (value),
157 [pc] "&=r" (pc)
158 : "memory");
159 }
160
161 #endif // INCLUDE_ALL_GCS
|