Print this page
rev 1839 : 6961690: load oops from constant table on SPARC
Summary: oops should be loaded from the constant table of an nmethod instead of materializing them with a long code sequence.
Reviewed-by:
Split |
Close |
Expand all |
Collapse all |
--- old/src/cpu/sparc/vm/assembler_sparc.cpp
+++ new/src/cpu/sparc/vm/assembler_sparc.cpp
1 1 /*
2 2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "incls/_precompiled.incl"
26 26 #include "incls/_assembler_sparc.cpp.incl"
27 27
28 28 // Convert the raw encoding form into the form expected by the
29 29 // constructor for Address.
30 30 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
31 31 assert(scale == 0, "not supported");
32 32 RelocationHolder rspec;
33 33 if (disp_is_oop) {
34 34 rspec = Relocation::spec_simple(relocInfo::oop_type);
35 35 }
36 36
37 37 Register rindex = as_Register(index);
38 38 if (rindex != G0) {
39 39 Address madr(as_Register(base), rindex);
40 40 madr._rspec = rspec;
41 41 return madr;
42 42 } else {
43 43 Address madr(as_Register(base), disp);
44 44 madr._rspec = rspec;
45 45 return madr;
46 46 }
47 47 }
48 48
49 49 Address Argument::address_in_frame() const {
50 50 // Warning: In LP64 mode disp will occupy more than 10 bits, but
51 51 // op codes such as ld or ldx, only access disp() to get
52 52 // their simm13 argument.
53 53 int disp = ((_number - Argument::n_register_parameters + frame::memory_parameter_word_sp_offset) * BytesPerWord) + STACK_BIAS;
54 54 if (is_in())
55 55 return Address(FP, disp); // In argument.
56 56 else
57 57 return Address(SP, disp); // Out argument.
58 58 }
59 59
60 60 static const char* argumentNames[][2] = {
61 61 {"A0","P0"}, {"A1","P1"}, {"A2","P2"}, {"A3","P3"}, {"A4","P4"},
62 62 {"A5","P5"}, {"A6","P6"}, {"A7","P7"}, {"A8","P8"}, {"A9","P9"},
63 63 {"A(n>9)","P(n>9)"}
64 64 };
65 65
66 66 const char* Argument::name() const {
67 67 int nofArgs = sizeof argumentNames / sizeof argumentNames[0];
68 68 int num = number();
69 69 if (num >= nofArgs) num = nofArgs - 1;
70 70 return argumentNames[num][is_in() ? 1 : 0];
71 71 }
72 72
73 73 void Assembler::print_instruction(int inst) {
74 74 const char* s;
75 75 switch (inv_op(inst)) {
76 76 default: s = "????"; break;
77 77 case call_op: s = "call"; break;
78 78 case branch_op:
79 79 switch (inv_op2(inst)) {
80 80 case bpr_op2: s = "bpr"; break;
81 81 case fb_op2: s = "fb"; break;
82 82 case fbp_op2: s = "fbp"; break;
83 83 case br_op2: s = "br"; break;
84 84 case bp_op2: s = "bp"; break;
85 85 case cb_op2: s = "cb"; break;
86 86 default: s = "????"; break;
87 87 }
88 88 }
89 89 ::tty->print("%s", s);
90 90 }
91 91
92 92
93 93 // Patch instruction inst at offset inst_pos to refer to dest_pos
94 94 // and return the resulting instruction.
95 95 // We should have pcs, not offsets, but since all is relative, it will work out
96 96 // OK.
97 97 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
98 98
99 99 int m; // mask for displacement field
100 100 int v; // new value for displacement field
101 101 const int word_aligned_ones = -4;
102 102 switch (inv_op(inst)) {
103 103 default: ShouldNotReachHere();
104 104 case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break;
105 105 case branch_op:
106 106 switch (inv_op2(inst)) {
107 107 case bpr_op2: m = wdisp16(word_aligned_ones, 0); v = wdisp16(dest_pos, inst_pos); break;
108 108 case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break;
109 109 case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break;
110 110 case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
111 111 case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
112 112 case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
113 113 default: ShouldNotReachHere();
114 114 }
115 115 }
116 116 return inst & ~m | v;
117 117 }
118 118
119 119 // Return the offset of the branch destionation of instruction inst
120 120 // at offset pos.
121 121 // Should have pcs, but since all is relative, it works out.
122 122 int Assembler::branch_destination(int inst, int pos) {
123 123 int r;
124 124 switch (inv_op(inst)) {
125 125 default: ShouldNotReachHere();
126 126 case call_op: r = inv_wdisp(inst, pos, 30); break;
127 127 case branch_op:
128 128 switch (inv_op2(inst)) {
129 129 case bpr_op2: r = inv_wdisp16(inst, pos); break;
130 130 case fbp_op2: r = inv_wdisp( inst, pos, 19); break;
131 131 case bp_op2: r = inv_wdisp( inst, pos, 19); break;
132 132 case fb_op2: r = inv_wdisp( inst, pos, 22); break;
133 133 case br_op2: r = inv_wdisp( inst, pos, 22); break;
134 134 case cb_op2: r = inv_wdisp( inst, pos, 22); break;
135 135 default: ShouldNotReachHere();
136 136 }
137 137 }
138 138 return r;
139 139 }
140 140
141 141 int AbstractAssembler::code_fill_byte() {
142 142 return 0x00; // illegal instruction 0x00000000
143 143 }
144 144
145 145 Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
146 146 switch (in) {
147 147 case rc_z: return equal;
148 148 case rc_lez: return lessEqual;
149 149 case rc_lz: return less;
150 150 case rc_nz: return notEqual;
151 151 case rc_gz: return greater;
152 152 case rc_gez: return greaterEqual;
153 153 default:
154 154 ShouldNotReachHere();
155 155 }
156 156 return equal;
157 157 }
158 158
159 159 // Generate a bunch 'o stuff (including v9's
160 160 #ifndef PRODUCT
161 161 void Assembler::test_v9() {
162 162 add( G0, G1, G2 );
163 163 add( G3, 0, G4 );
164 164
165 165 addcc( G5, G6, G7 );
166 166 addcc( I0, 1, I1 );
167 167 addc( I2, I3, I4 );
168 168 addc( I5, -1, I6 );
169 169 addccc( I7, L0, L1 );
170 170 addccc( L2, (1 << 12) - 2, L3 );
171 171
172 172 Label lbl1, lbl2, lbl3;
173 173
174 174 bind(lbl1);
175 175
176 176 bpr( rc_z, true, pn, L4, pc(), relocInfo::oop_type );
177 177 delayed()->nop();
178 178 bpr( rc_lez, false, pt, L5, lbl1);
179 179 delayed()->nop();
180 180
181 181 fb( f_never, true, pc() + 4, relocInfo::none);
182 182 delayed()->nop();
183 183 fb( f_notEqual, false, lbl2 );
184 184 delayed()->nop();
185 185
186 186 fbp( f_notZero, true, fcc0, pn, pc() - 4, relocInfo::none);
187 187 delayed()->nop();
188 188 fbp( f_lessOrGreater, false, fcc1, pt, lbl3 );
189 189 delayed()->nop();
190 190
191 191 br( equal, true, pc() + 1024, relocInfo::none);
192 192 delayed()->nop();
193 193 br( lessEqual, false, lbl1 );
194 194 delayed()->nop();
195 195 br( never, false, lbl1 );
196 196 delayed()->nop();
197 197
198 198 bp( less, true, icc, pn, pc(), relocInfo::none);
199 199 delayed()->nop();
200 200 bp( lessEqualUnsigned, false, xcc, pt, lbl2 );
201 201 delayed()->nop();
202 202
203 203 call( pc(), relocInfo::none);
204 204 delayed()->nop();
205 205 call( lbl3 );
206 206 delayed()->nop();
207 207
208 208
209 209 casa( L6, L7, O0 );
210 210 casxa( O1, O2, O3, 0 );
211 211
212 212 udiv( O4, O5, O7 );
213 213 udiv( G0, (1 << 12) - 1, G1 );
214 214 sdiv( G1, G2, G3 );
215 215 sdiv( G4, -((1 << 12) - 1), G5 );
216 216 udivcc( G6, G7, I0 );
217 217 udivcc( I1, -((1 << 12) - 2), I2 );
218 218 sdivcc( I3, I4, I5 );
219 219 sdivcc( I6, -((1 << 12) - 0), I7 );
220 220
221 221 done();
222 222 retry();
223 223
224 224 fadd( FloatRegisterImpl::S, F0, F1, F2 );
225 225 fsub( FloatRegisterImpl::D, F34, F0, F62 );
226 226
227 227 fcmp( FloatRegisterImpl::Q, fcc0, F0, F60);
228 228 fcmpe( FloatRegisterImpl::S, fcc1, F31, F30);
229 229
230 230 ftox( FloatRegisterImpl::D, F2, F4 );
231 231 ftoi( FloatRegisterImpl::Q, F4, F8 );
232 232
233 233 ftof( FloatRegisterImpl::S, FloatRegisterImpl::Q, F3, F12 );
234 234
235 235 fxtof( FloatRegisterImpl::S, F4, F5 );
236 236 fitof( FloatRegisterImpl::D, F6, F8 );
237 237
238 238 fmov( FloatRegisterImpl::Q, F16, F20 );
239 239 fneg( FloatRegisterImpl::S, F6, F7 );
240 240 fabs( FloatRegisterImpl::D, F10, F12 );
241 241
242 242 fmul( FloatRegisterImpl::Q, F24, F28, F32 );
243 243 fmul( FloatRegisterImpl::S, FloatRegisterImpl::D, F8, F9, F14 );
244 244 fdiv( FloatRegisterImpl::S, F10, F11, F12 );
245 245
246 246 fsqrt( FloatRegisterImpl::S, F13, F14 );
247 247
248 248 flush( L0, L1 );
249 249 flush( L2, -1 );
250 250
251 251 flushw();
252 252
253 253 illtrap( (1 << 22) - 2);
254 254
255 255 impdep1( 17, (1 << 19) - 1 );
256 256 impdep2( 3, 0 );
257 257
258 258 jmpl( L3, L4, L5 );
259 259 delayed()->nop();
260 260 jmpl( L6, -1, L7, Relocation::spec_simple(relocInfo::none));
261 261 delayed()->nop();
262 262
263 263
264 264 ldf( FloatRegisterImpl::S, O0, O1, F15 );
265 265 ldf( FloatRegisterImpl::D, O2, -1, F14 );
266 266
267 267
268 268 ldfsr( O3, O4 );
269 269 ldfsr( O5, -1 );
270 270 ldxfsr( O6, O7 );
271 271 ldxfsr( I0, -1 );
272 272
273 273 ldfa( FloatRegisterImpl::D, I1, I2, 1, F16 );
274 274 ldfa( FloatRegisterImpl::Q, I3, -1, F36 );
275 275
276 276 ldsb( I4, I5, I6 );
277 277 ldsb( I7, -1, G0 );
278 278 ldsh( G1, G3, G4 );
279 279 ldsh( G5, -1, G6 );
280 280 ldsw( G7, L0, L1 );
281 281 ldsw( L2, -1, L3 );
282 282 ldub( L4, L5, L6 );
283 283 ldub( L7, -1, O0 );
284 284 lduh( O1, O2, O3 );
285 285 lduh( O4, -1, O5 );
286 286 lduw( O6, O7, G0 );
287 287 lduw( G1, -1, G2 );
288 288 ldx( G3, G4, G5 );
289 289 ldx( G6, -1, G7 );
290 290 ldd( I0, I1, I2 );
291 291 ldd( I3, -1, I4 );
292 292
293 293 ldsba( I5, I6, 2, I7 );
294 294 ldsba( L0, -1, L1 );
295 295 ldsha( L2, L3, 3, L4 );
296 296 ldsha( L5, -1, L6 );
297 297 ldswa( L7, O0, (1 << 8) - 1, O1 );
298 298 ldswa( O2, -1, O3 );
299 299 lduba( O4, O5, 0, O6 );
300 300 lduba( O7, -1, I0 );
301 301 lduha( I1, I2, 1, I3 );
302 302 lduha( I4, -1, I5 );
303 303 lduwa( I6, I7, 2, L0 );
304 304 lduwa( L1, -1, L2 );
305 305 ldxa( L3, L4, 3, L5 );
306 306 ldxa( L6, -1, L7 );
307 307 ldda( G0, G1, 4, G2 );
308 308 ldda( G3, -1, G4 );
309 309
310 310 ldstub( G5, G6, G7 );
311 311 ldstub( O0, -1, O1 );
312 312
313 313 ldstuba( O2, O3, 5, O4 );
314 314 ldstuba( O5, -1, O6 );
315 315
316 316 and3( I0, L0, O0 );
317 317 and3( G7, -1, O7 );
318 318 andcc( L2, I2, G2 );
319 319 andcc( L4, -1, G4 );
320 320 andn( I5, I6, I7 );
321 321 andn( I6, -1, I7 );
322 322 andncc( I5, I6, I7 );
323 323 andncc( I7, -1, I6 );
324 324 or3( I5, I6, I7 );
325 325 or3( I7, -1, I6 );
326 326 orcc( I5, I6, I7 );
327 327 orcc( I7, -1, I6 );
328 328 orn( I5, I6, I7 );
329 329 orn( I7, -1, I6 );
330 330 orncc( I5, I6, I7 );
331 331 orncc( I7, -1, I6 );
332 332 xor3( I5, I6, I7 );
333 333 xor3( I7, -1, I6 );
334 334 xorcc( I5, I6, I7 );
335 335 xorcc( I7, -1, I6 );
336 336 xnor( I5, I6, I7 );
337 337 xnor( I7, -1, I6 );
338 338 xnorcc( I5, I6, I7 );
339 339 xnorcc( I7, -1, I6 );
340 340
341 341 membar( Membar_mask_bits(StoreStore | LoadStore | StoreLoad | LoadLoad | Sync | MemIssue | Lookaside ) );
342 342 membar( StoreStore );
343 343 membar( LoadStore );
344 344 membar( StoreLoad );
345 345 membar( LoadLoad );
346 346 membar( Sync );
347 347 membar( MemIssue );
348 348 membar( Lookaside );
349 349
350 350 fmov( FloatRegisterImpl::S, f_ordered, true, fcc2, F16, F17 );
351 351 fmov( FloatRegisterImpl::D, rc_lz, L5, F18, F20 );
352 352
353 353 movcc( overflowClear, false, icc, I6, L4 );
354 354 movcc( f_unorderedOrEqual, true, fcc2, (1 << 10) - 1, O0 );
355 355
356 356 movr( rc_nz, I5, I6, I7 );
357 357 movr( rc_gz, L1, -1, L2 );
358 358
359 359 mulx( I5, I6, I7 );
360 360 mulx( I7, -1, I6 );
361 361 sdivx( I5, I6, I7 );
362 362 sdivx( I7, -1, I6 );
363 363 udivx( I5, I6, I7 );
364 364 udivx( I7, -1, I6 );
365 365
366 366 umul( I5, I6, I7 );
367 367 umul( I7, -1, I6 );
368 368 smul( I5, I6, I7 );
369 369 smul( I7, -1, I6 );
370 370 umulcc( I5, I6, I7 );
371 371 umulcc( I7, -1, I6 );
372 372 smulcc( I5, I6, I7 );
373 373 smulcc( I7, -1, I6 );
374 374
375 375 mulscc( I5, I6, I7 );
376 376 mulscc( I7, -1, I6 );
377 377
378 378 nop();
379 379
380 380
381 381 popc( G0, G1);
382 382 popc( -1, G2);
383 383
384 384 prefetch( L1, L2, severalReads );
385 385 prefetch( L3, -1, oneRead );
386 386 prefetcha( O3, O2, 6, severalWritesAndPossiblyReads );
387 387 prefetcha( G2, -1, oneWrite );
388 388
389 389 rett( I7, I7);
390 390 delayed()->nop();
391 391 rett( G0, -1, relocInfo::none);
392 392 delayed()->nop();
393 393
394 394 save( I5, I6, I7 );
395 395 save( I7, -1, I6 );
396 396 restore( I5, I6, I7 );
397 397 restore( I7, -1, I6 );
398 398
399 399 saved();
400 400 restored();
401 401
402 402 sethi( 0xaaaaaaaa, I3, Relocation::spec_simple(relocInfo::none));
403 403
404 404 sll( I5, I6, I7 );
405 405 sll( I7, 31, I6 );
406 406 srl( I5, I6, I7 );
407 407 srl( I7, 0, I6 );
408 408 sra( I5, I6, I7 );
409 409 sra( I7, 30, I6 );
410 410 sllx( I5, I6, I7 );
411 411 sllx( I7, 63, I6 );
412 412 srlx( I5, I6, I7 );
413 413 srlx( I7, 0, I6 );
414 414 srax( I5, I6, I7 );
415 415 srax( I7, 62, I6 );
416 416
417 417 sir( -1 );
418 418
419 419 stbar();
420 420
421 421 stf( FloatRegisterImpl::Q, F40, G0, I7 );
422 422 stf( FloatRegisterImpl::S, F18, I3, -1 );
423 423
424 424 stfsr( L1, L2 );
425 425 stfsr( I7, -1 );
426 426 stxfsr( I6, I5 );
427 427 stxfsr( L4, -1 );
428 428
429 429 stfa( FloatRegisterImpl::D, F22, I6, I7, 7 );
430 430 stfa( FloatRegisterImpl::Q, F44, G0, -1 );
431 431
432 432 stb( L5, O2, I7 );
433 433 stb( I7, I6, -1 );
434 434 sth( L5, O2, I7 );
435 435 sth( I7, I6, -1 );
436 436 stw( L5, O2, I7 );
437 437 stw( I7, I6, -1 );
438 438 stx( L5, O2, I7 );
439 439 stx( I7, I6, -1 );
440 440 std( L5, O2, I7 );
441 441 std( I7, I6, -1 );
442 442
443 443 stba( L5, O2, I7, 8 );
444 444 stba( I7, I6, -1 );
445 445 stha( L5, O2, I7, 9 );
446 446 stha( I7, I6, -1 );
447 447 stwa( L5, O2, I7, 0 );
448 448 stwa( I7, I6, -1 );
449 449 stxa( L5, O2, I7, 11 );
450 450 stxa( I7, I6, -1 );
451 451 stda( L5, O2, I7, 12 );
452 452 stda( I7, I6, -1 );
453 453
454 454 sub( I5, I6, I7 );
455 455 sub( I7, -1, I6 );
456 456 subcc( I5, I6, I7 );
457 457 subcc( I7, -1, I6 );
458 458 subc( I5, I6, I7 );
459 459 subc( I7, -1, I6 );
460 460 subccc( I5, I6, I7 );
461 461 subccc( I7, -1, I6 );
462 462
463 463 swap( I5, I6, I7 );
464 464 swap( I7, -1, I6 );
465 465
466 466 swapa( G0, G1, 13, G2 );
467 467 swapa( I7, -1, I6 );
468 468
469 469 taddcc( I5, I6, I7 );
470 470 taddcc( I7, -1, I6 );
471 471 taddcctv( I5, I6, I7 );
472 472 taddcctv( I7, -1, I6 );
473 473
474 474 tsubcc( I5, I6, I7 );
475 475 tsubcc( I7, -1, I6 );
476 476 tsubcctv( I5, I6, I7 );
477 477 tsubcctv( I7, -1, I6 );
478 478
479 479 trap( overflowClear, xcc, G0, G1 );
480 480 trap( lessEqual, icc, I7, 17 );
481 481
482 482 bind(lbl2);
483 483 bind(lbl3);
484 484
485 485 code()->decode();
486 486 }
487 487
488 488 // Generate a bunch 'o stuff unique to V8
489 489 void Assembler::test_v8_onlys() {
490 490 Label lbl1;
491 491
492 492 cb( cp_0or1or2, false, pc() - 4, relocInfo::none);
493 493 delayed()->nop();
494 494 cb( cp_never, true, lbl1);
495 495 delayed()->nop();
496 496
497 497 cpop1(1, 2, 3, 4);
498 498 cpop2(5, 6, 7, 8);
499 499
500 500 ldc( I0, I1, 31);
501 501 ldc( I2, -1, 0);
502 502
503 503 lddc( I4, I4, 30);
504 504 lddc( I6, 0, 1 );
505 505
506 506 ldcsr( L0, L1, 0);
507 507 ldcsr( L1, (1 << 12) - 1, 17 );
508 508
509 509 stc( 31, L4, L5);
510 510 stc( 30, L6, -(1 << 12) );
511 511
512 512 stdc( 0, L7, G0);
513 513 stdc( 1, G1, 0 );
514 514
515 515 stcsr( 16, G2, G3);
516 516 stcsr( 17, G4, 1 );
517 517
518 518 stdcq( 4, G5, G6);
519 519 stdcq( 5, G7, -1 );
520 520
521 521 bind(lbl1);
522 522
523 523 code()->decode();
524 524 }
525 525 #endif
526 526
527 527 // Implementation of MacroAssembler
528 528
529 529 void MacroAssembler::null_check(Register reg, int offset) {
530 530 if (needs_explicit_null_check((intptr_t)offset)) {
531 531 // provoke OS NULL exception if reg = NULL by
532 532 // accessing M[reg] w/o changing any registers
533 533 ld_ptr(reg, 0, G0);
534 534 }
535 535 else {
536 536 // nothing to do, (later) access of M[reg + offset]
537 537 // will provoke OS NULL exception if reg = NULL
538 538 }
539 539 }
540 540
541 541 // Ring buffer jumps
542 542
543 543 #ifndef PRODUCT
544 544 void MacroAssembler::ret( bool trace ) { if (trace) {
545 545 mov(I7, O7); // traceable register
546 546 JMP(O7, 2 * BytesPerInstWord);
547 547 } else {
548 548 jmpl( I7, 2 * BytesPerInstWord, G0 );
549 549 }
550 550 }
551 551
552 552 void MacroAssembler::retl( bool trace ) { if (trace) JMP(O7, 2 * BytesPerInstWord);
553 553 else jmpl( O7, 2 * BytesPerInstWord, G0 ); }
554 554 #endif /* PRODUCT */
555 555
556 556
557 557 void MacroAssembler::jmp2(Register r1, Register r2, const char* file, int line ) {
558 558 assert_not_delayed();
559 559 // This can only be traceable if r1 & r2 are visible after a window save
560 560 if (TraceJumps) {
561 561 #ifndef PRODUCT
562 562 save_frame(0);
563 563 verify_thread();
564 564 ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0);
565 565 add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1);
566 566 sll(O0, exact_log2(4*sizeof(intptr_t)), O2);
567 567 add(O2, O1, O1);
568 568
569 569 add(r1->after_save(), r2->after_save(), O2);
570 570 set((intptr_t)file, O3);
571 571 set(line, O4);
572 572 Label L;
573 573 // get nearby pc, store jmp target
574 574 call(L, relocInfo::none); // No relocation for call to pc+0x8
575 575 delayed()->st(O2, O1, 0);
576 576 bind(L);
577 577
578 578 // store nearby pc
579 579 st(O7, O1, sizeof(intptr_t));
580 580 // store file
581 581 st(O3, O1, 2*sizeof(intptr_t));
582 582 // store line
583 583 st(O4, O1, 3*sizeof(intptr_t));
584 584 add(O0, 1, O0);
585 585 and3(O0, JavaThread::jump_ring_buffer_size - 1, O0);
586 586 st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()));
587 587 restore();
588 588 #endif /* PRODUCT */
589 589 }
590 590 jmpl(r1, r2, G0);
591 591 }
592 592 void MacroAssembler::jmp(Register r1, int offset, const char* file, int line ) {
593 593 assert_not_delayed();
594 594 // This can only be traceable if r1 is visible after a window save
595 595 if (TraceJumps) {
596 596 #ifndef PRODUCT
597 597 save_frame(0);
598 598 verify_thread();
599 599 ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0);
600 600 add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1);
601 601 sll(O0, exact_log2(4*sizeof(intptr_t)), O2);
602 602 add(O2, O1, O1);
603 603
604 604 add(r1->after_save(), offset, O2);
605 605 set((intptr_t)file, O3);
606 606 set(line, O4);
607 607 Label L;
608 608 // get nearby pc, store jmp target
609 609 call(L, relocInfo::none); // No relocation for call to pc+0x8
610 610 delayed()->st(O2, O1, 0);
611 611 bind(L);
612 612
613 613 // store nearby pc
614 614 st(O7, O1, sizeof(intptr_t));
615 615 // store file
616 616 st(O3, O1, 2*sizeof(intptr_t));
617 617 // store line
618 618 st(O4, O1, 3*sizeof(intptr_t));
619 619 add(O0, 1, O0);
620 620 and3(O0, JavaThread::jump_ring_buffer_size - 1, O0);
621 621 st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()));
622 622 restore();
623 623 #endif /* PRODUCT */
624 624 }
625 625 jmp(r1, offset);
626 626 }
627 627
628 628 // This code sequence is relocatable to any address, even on LP64.
629 629 void MacroAssembler::jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line) {
630 630 assert_not_delayed();
631 631 // Force fixed length sethi because NativeJump and NativeFarCall don't handle
632 632 // variable length instruction streams.
633 633 patchable_sethi(addrlit, temp);
634 634 Address a(temp, addrlit.low10() + offset); // Add the offset to the displacement.
635 635 if (TraceJumps) {
636 636 #ifndef PRODUCT
637 637 // Must do the add here so relocation can find the remainder of the
638 638 // value to be relocated.
639 639 add(a.base(), a.disp(), a.base(), addrlit.rspec(offset));
640 640 save_frame(0);
641 641 verify_thread();
642 642 ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0);
643 643 add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1);
644 644 sll(O0, exact_log2(4*sizeof(intptr_t)), O2);
645 645 add(O2, O1, O1);
646 646
647 647 set((intptr_t)file, O3);
648 648 set(line, O4);
649 649 Label L;
650 650
651 651 // get nearby pc, store jmp target
652 652 call(L, relocInfo::none); // No relocation for call to pc+0x8
653 653 delayed()->st(a.base()->after_save(), O1, 0);
654 654 bind(L);
655 655
656 656 // store nearby pc
657 657 st(O7, O1, sizeof(intptr_t));
658 658 // store file
659 659 st(O3, O1, 2*sizeof(intptr_t));
660 660 // store line
661 661 st(O4, O1, 3*sizeof(intptr_t));
662 662 add(O0, 1, O0);
663 663 and3(O0, JavaThread::jump_ring_buffer_size - 1, O0);
664 664 st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()));
665 665 restore();
666 666 jmpl(a.base(), G0, d);
667 667 #else
668 668 jmpl(a.base(), a.disp(), d);
669 669 #endif /* PRODUCT */
670 670 } else {
671 671 jmpl(a.base(), a.disp(), d);
672 672 }
673 673 }
674 674
675 675 void MacroAssembler::jump(const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line) {
676 676 jumpl(addrlit, temp, G0, offset, file, line);
677 677 }
678 678
679 679
680 680 // Convert to C varargs format
681 681 void MacroAssembler::set_varargs( Argument inArg, Register d ) {
682 682 // spill register-resident args to their memory slots
683 683 // (SPARC calling convention requires callers to have already preallocated these)
684 684 // Note that the inArg might in fact be an outgoing argument,
685 685 // if a leaf routine or stub does some tricky argument shuffling.
686 686 // This routine must work even though one of the saved arguments
687 687 // is in the d register (e.g., set_varargs(Argument(0, false), O0)).
688 688 for (Argument savePtr = inArg;
689 689 savePtr.is_register();
690 690 savePtr = savePtr.successor()) {
691 691 st_ptr(savePtr.as_register(), savePtr.address_in_frame());
692 692 }
693 693 // return the address of the first memory slot
694 694 Address a = inArg.address_in_frame();
695 695 add(a.base(), a.disp(), d);
696 696 }
697 697
698 698 // Conditional breakpoint (for assertion checks in assembly code)
699 699 void MacroAssembler::breakpoint_trap(Condition c, CC cc) {
700 700 trap(c, cc, G0, ST_RESERVED_FOR_USER_0);
701 701 }
702 702
703 703 // We want to use ST_BREAKPOINT here, but the debugger is confused by it.
704 704 void MacroAssembler::breakpoint_trap() {
705 705 trap(ST_RESERVED_FOR_USER_0);
706 706 }
707 707
708 708 // flush windows (except current) using flushw instruction if avail.
709 709 void MacroAssembler::flush_windows() {
710 710 if (VM_Version::v9_instructions_work()) flushw();
711 711 else flush_windows_trap();
712 712 }
713 713
714 714 // Write serialization page so VM thread can do a pseudo remote membar
715 715 // We use the current thread pointer to calculate a thread specific
716 716 // offset to write to within the page. This minimizes bus traffic
717 717 // due to cache line collision.
718 718 void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
719 719 srl(thread, os::get_serialize_page_shift_count(), tmp2);
720 720 if (Assembler::is_simm13(os::vm_page_size())) {
721 721 and3(tmp2, (os::vm_page_size() - sizeof(int)), tmp2);
722 722 }
723 723 else {
724 724 set((os::vm_page_size() - sizeof(int)), tmp1);
725 725 and3(tmp2, tmp1, tmp2);
726 726 }
727 727 set(os::get_memory_serialize_page(), tmp1);
728 728 st(G0, tmp1, tmp2);
729 729 }
730 730
731 731
732 732
733 733 void MacroAssembler::enter() {
734 734 Unimplemented();
735 735 }
736 736
737 737 void MacroAssembler::leave() {
738 738 Unimplemented();
739 739 }
740 740
741 741 void MacroAssembler::mult(Register s1, Register s2, Register d) {
742 742 if(VM_Version::v9_instructions_work()) {
743 743 mulx (s1, s2, d);
744 744 } else {
745 745 smul (s1, s2, d);
746 746 }
747 747 }
748 748
749 749 void MacroAssembler::mult(Register s1, int simm13a, Register d) {
750 750 if(VM_Version::v9_instructions_work()) {
751 751 mulx (s1, simm13a, d);
752 752 } else {
753 753 smul (s1, simm13a, d);
754 754 }
755 755 }
756 756
757 757
758 758 #ifdef ASSERT
759 759 void MacroAssembler::read_ccr_v8_assert(Register ccr_save) {
760 760 const Register s1 = G3_scratch;
761 761 const Register s2 = G4_scratch;
762 762 Label get_psr_test;
763 763 // Get the condition codes the V8 way.
764 764 read_ccr_trap(s1);
765 765 mov(ccr_save, s2);
766 766 // This is a test of V8 which has icc but not xcc
767 767 // so mask off the xcc bits
768 768 and3(s2, 0xf, s2);
769 769 // Compare condition codes from the V8 and V9 ways.
770 770 subcc(s2, s1, G0);
771 771 br(Assembler::notEqual, true, Assembler::pt, get_psr_test);
772 772 delayed()->breakpoint_trap();
773 773 bind(get_psr_test);
774 774 }
775 775
776 776 void MacroAssembler::write_ccr_v8_assert(Register ccr_save) {
777 777 const Register s1 = G3_scratch;
778 778 const Register s2 = G4_scratch;
779 779 Label set_psr_test;
780 780 // Write out the saved condition codes the V8 way
781 781 write_ccr_trap(ccr_save, s1, s2);
782 782 // Read back the condition codes using the V9 instruction
783 783 rdccr(s1);
784 784 mov(ccr_save, s2);
785 785 // This is a test of V8 which has icc but not xcc
786 786 // so mask off the xcc bits
787 787 and3(s2, 0xf, s2);
788 788 and3(s1, 0xf, s1);
789 789 // Compare the V8 way with the V9 way.
790 790 subcc(s2, s1, G0);
791 791 br(Assembler::notEqual, true, Assembler::pt, set_psr_test);
792 792 delayed()->breakpoint_trap();
793 793 bind(set_psr_test);
794 794 }
795 795 #else
796 796 #define read_ccr_v8_assert(x)
797 797 #define write_ccr_v8_assert(x)
798 798 #endif // ASSERT
799 799
800 800 void MacroAssembler::read_ccr(Register ccr_save) {
801 801 if (VM_Version::v9_instructions_work()) {
802 802 rdccr(ccr_save);
803 803 // Test code sequence used on V8. Do not move above rdccr.
804 804 read_ccr_v8_assert(ccr_save);
805 805 } else {
806 806 read_ccr_trap(ccr_save);
807 807 }
808 808 }
809 809
810 810 void MacroAssembler::write_ccr(Register ccr_save) {
811 811 if (VM_Version::v9_instructions_work()) {
812 812 // Test code sequence used on V8. Do not move below wrccr.
813 813 write_ccr_v8_assert(ccr_save);
814 814 wrccr(ccr_save);
815 815 } else {
816 816 const Register temp_reg1 = G3_scratch;
817 817 const Register temp_reg2 = G4_scratch;
818 818 write_ccr_trap(ccr_save, temp_reg1, temp_reg2);
819 819 }
820 820 }
821 821
822 822
823 823 // Calls to C land
824 824
825 825 #ifdef ASSERT
826 826 // a hook for debugging
827 827 static Thread* reinitialize_thread() {
828 828 return ThreadLocalStorage::thread();
829 829 }
830 830 #else
831 831 #define reinitialize_thread ThreadLocalStorage::thread
832 832 #endif
833 833
834 834 #ifdef ASSERT
835 835 address last_get_thread = NULL;
836 836 #endif
837 837
838 838 // call this when G2_thread is not known to be valid
839 839 void MacroAssembler::get_thread() {
840 840 save_frame(0); // to avoid clobbering O0
841 841 mov(G1, L0); // avoid clobbering G1
842 842 mov(G5_method, L1); // avoid clobbering G5
843 843 mov(G3, L2); // avoid clobbering G3 also
844 844 mov(G4, L5); // avoid clobbering G4
845 845 #ifdef ASSERT
846 846 AddressLiteral last_get_thread_addrlit(&last_get_thread);
847 847 set(last_get_thread_addrlit, L3);
848 848 inc(L4, get_pc(L4) + 2 * BytesPerInstWord); // skip getpc() code + inc + st_ptr to point L4 at call
849 849 st_ptr(L4, L3, 0);
850 850 #endif
851 851 call(CAST_FROM_FN_PTR(address, reinitialize_thread), relocInfo::runtime_call_type);
852 852 delayed()->nop();
853 853 mov(L0, G1);
854 854 mov(L1, G5_method);
855 855 mov(L2, G3);
856 856 mov(L5, G4);
857 857 restore(O0, 0, G2_thread);
858 858 }
859 859
860 860 static Thread* verify_thread_subroutine(Thread* gthread_value) {
861 861 Thread* correct_value = ThreadLocalStorage::thread();
862 862 guarantee(gthread_value == correct_value, "G2_thread value must be the thread");
863 863 return correct_value;
864 864 }
865 865
866 866 void MacroAssembler::verify_thread() {
867 867 if (VerifyThread) {
868 868 // NOTE: this chops off the heads of the 64-bit O registers.
869 869 #ifdef CC_INTERP
870 870 save_frame(0);
871 871 #else
872 872 // make sure G2_thread contains the right value
873 873 save_frame_and_mov(0, Lmethod, Lmethod); // to avoid clobbering O0 (and propagate Lmethod for -Xprof)
874 874 mov(G1, L1); // avoid clobbering G1
875 875 // G2 saved below
876 876 mov(G3, L3); // avoid clobbering G3
877 877 mov(G4, L4); // avoid clobbering G4
878 878 mov(G5_method, L5); // avoid clobbering G5_method
879 879 #endif /* CC_INTERP */
880 880 #if defined(COMPILER2) && !defined(_LP64)
881 881 // Save & restore possible 64-bit Long arguments in G-regs
882 882 srlx(G1,32,L0);
883 883 srlx(G4,32,L6);
884 884 #endif
885 885 call(CAST_FROM_FN_PTR(address,verify_thread_subroutine), relocInfo::runtime_call_type);
886 886 delayed()->mov(G2_thread, O0);
887 887
888 888 mov(L1, G1); // Restore G1
889 889 // G2 restored below
890 890 mov(L3, G3); // restore G3
891 891 mov(L4, G4); // restore G4
892 892 mov(L5, G5_method); // restore G5_method
893 893 #if defined(COMPILER2) && !defined(_LP64)
894 894 // Save & restore possible 64-bit Long arguments in G-regs
895 895 sllx(L0,32,G2); // Move old high G1 bits high in G2
896 896 sllx(G1, 0,G1); // Clear current high G1 bits
897 897 or3 (G1,G2,G1); // Recover 64-bit G1
898 898 sllx(L6,32,G2); // Move old high G4 bits high in G2
899 899 sllx(G4, 0,G4); // Clear current high G4 bits
900 900 or3 (G4,G2,G4); // Recover 64-bit G4
901 901 #endif
902 902 restore(O0, 0, G2_thread);
903 903 }
904 904 }
905 905
906 906
907 907 void MacroAssembler::save_thread(const Register thread_cache) {
908 908 verify_thread();
909 909 if (thread_cache->is_valid()) {
910 910 assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile");
911 911 mov(G2_thread, thread_cache);
912 912 }
913 913 if (VerifyThread) {
914 914 // smash G2_thread, as if the VM were about to anyway
915 915 set(0x67676767, G2_thread);
916 916 }
917 917 }
918 918
919 919
920 920 void MacroAssembler::restore_thread(const Register thread_cache) {
921 921 if (thread_cache->is_valid()) {
922 922 assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile");
923 923 mov(thread_cache, G2_thread);
924 924 verify_thread();
925 925 } else {
926 926 // do it the slow way
927 927 get_thread();
928 928 }
929 929 }
930 930
931 931
932 932 // %%% maybe get rid of [re]set_last_Java_frame
933 933 void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_Java_pc) {
934 934 assert_not_delayed();
935 935 Address flags(G2_thread, JavaThread::frame_anchor_offset() +
936 936 JavaFrameAnchor::flags_offset());
937 937 Address pc_addr(G2_thread, JavaThread::last_Java_pc_offset());
938 938
939 939 // Always set last_Java_pc and flags first because once last_Java_sp is visible
940 940 // has_last_Java_frame is true and users will look at the rest of the fields.
941 941 // (Note: flags should always be zero before we get here so doesn't need to be set.)
942 942
943 943 #ifdef ASSERT
944 944 // Verify that flags was zeroed on return to Java
945 945 Label PcOk;
946 946 save_frame(0); // to avoid clobbering O0
947 947 ld_ptr(pc_addr, L0);
948 948 tst(L0);
949 949 #ifdef _LP64
950 950 brx(Assembler::zero, false, Assembler::pt, PcOk);
951 951 #else
952 952 br(Assembler::zero, false, Assembler::pt, PcOk);
953 953 #endif // _LP64
954 954 delayed() -> nop();
955 955 stop("last_Java_pc not zeroed before leaving Java");
956 956 bind(PcOk);
957 957
958 958 // Verify that flags was zeroed on return to Java
959 959 Label FlagsOk;
960 960 ld(flags, L0);
961 961 tst(L0);
962 962 br(Assembler::zero, false, Assembler::pt, FlagsOk);
963 963 delayed() -> restore();
964 964 stop("flags not zeroed before leaving Java");
965 965 bind(FlagsOk);
966 966 #endif /* ASSERT */
967 967 //
968 968 // When returning from calling out from Java mode the frame anchor's last_Java_pc
969 969 // will always be set to NULL. It is set here so that if we are doing a call to
970 970 // native (not VM) that we capture the known pc and don't have to rely on the
971 971 // native call having a standard frame linkage where we can find the pc.
972 972
973 973 if (last_Java_pc->is_valid()) {
974 974 st_ptr(last_Java_pc, pc_addr);
975 975 }
976 976
977 977 #ifdef _LP64
978 978 #ifdef ASSERT
979 979 // Make sure that we have an odd stack
980 980 Label StackOk;
981 981 andcc(last_java_sp, 0x01, G0);
982 982 br(Assembler::notZero, false, Assembler::pt, StackOk);
983 983 delayed() -> nop();
984 984 stop("Stack Not Biased in set_last_Java_frame");
985 985 bind(StackOk);
986 986 #endif // ASSERT
987 987 assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
988 988 add( last_java_sp, STACK_BIAS, G4_scratch );
989 989 st_ptr(G4_scratch, G2_thread, JavaThread::last_Java_sp_offset());
990 990 #else
991 991 st_ptr(last_java_sp, G2_thread, JavaThread::last_Java_sp_offset());
992 992 #endif // _LP64
993 993 }
994 994
995 995 void MacroAssembler::reset_last_Java_frame(void) {
996 996 assert_not_delayed();
997 997
998 998 Address sp_addr(G2_thread, JavaThread::last_Java_sp_offset());
999 999 Address pc_addr(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
1000 1000 Address flags (G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
1001 1001
1002 1002 #ifdef ASSERT
1003 1003 // check that it WAS previously set
1004 1004 #ifdef CC_INTERP
1005 1005 save_frame(0);
1006 1006 #else
1007 1007 save_frame_and_mov(0, Lmethod, Lmethod); // Propagate Lmethod to helper frame for -Xprof
1008 1008 #endif /* CC_INTERP */
1009 1009 ld_ptr(sp_addr, L0);
1010 1010 tst(L0);
1011 1011 breakpoint_trap(Assembler::zero, Assembler::ptr_cc);
1012 1012 restore();
1013 1013 #endif // ASSERT
1014 1014
1015 1015 st_ptr(G0, sp_addr);
1016 1016 // Always return last_Java_pc to zero
1017 1017 st_ptr(G0, pc_addr);
1018 1018 // Always null flags after return to Java
1019 1019 st(G0, flags);
1020 1020 }
1021 1021
1022 1022
1023 1023 void MacroAssembler::call_VM_base(
1024 1024 Register oop_result,
1025 1025 Register thread_cache,
1026 1026 Register last_java_sp,
1027 1027 address entry_point,
1028 1028 int number_of_arguments,
1029 1029 bool check_exceptions)
1030 1030 {
1031 1031 assert_not_delayed();
1032 1032
1033 1033 // determine last_java_sp register
1034 1034 if (!last_java_sp->is_valid()) {
1035 1035 last_java_sp = SP;
1036 1036 }
1037 1037 // debugging support
1038 1038 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
1039 1039
1040 1040 // 64-bit last_java_sp is biased!
1041 1041 set_last_Java_frame(last_java_sp, noreg);
1042 1042 if (VerifyThread) mov(G2_thread, O0); // about to be smashed; pass early
1043 1043 save_thread(thread_cache);
1044 1044 // do the call
1045 1045 call(entry_point, relocInfo::runtime_call_type);
1046 1046 if (!VerifyThread)
1047 1047 delayed()->mov(G2_thread, O0); // pass thread as first argument
1048 1048 else
1049 1049 delayed()->nop(); // (thread already passed)
1050 1050 restore_thread(thread_cache);
1051 1051 reset_last_Java_frame();
1052 1052
1053 1053 // check for pending exceptions. use Gtemp as scratch register.
1054 1054 if (check_exceptions) {
1055 1055 check_and_forward_exception(Gtemp);
1056 1056 }
1057 1057
1058 1058 // get oop result if there is one and reset the value in the thread
1059 1059 if (oop_result->is_valid()) {
1060 1060 get_vm_result(oop_result);
1061 1061 }
1062 1062 }
1063 1063
1064 1064 void MacroAssembler::check_and_forward_exception(Register scratch_reg)
1065 1065 {
1066 1066 Label L;
1067 1067
1068 1068 check_and_handle_popframe(scratch_reg);
1069 1069 check_and_handle_earlyret(scratch_reg);
1070 1070
1071 1071 Address exception_addr(G2_thread, Thread::pending_exception_offset());
1072 1072 ld_ptr(exception_addr, scratch_reg);
1073 1073 br_null(scratch_reg,false,pt,L);
1074 1074 delayed()->nop();
1075 1075 // we use O7 linkage so that forward_exception_entry has the issuing PC
1076 1076 call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1077 1077 delayed()->nop();
1078 1078 bind(L);
1079 1079 }
1080 1080
1081 1081
1082 1082 void MacroAssembler::check_and_handle_popframe(Register scratch_reg) {
1083 1083 }
1084 1084
1085 1085
1086 1086 void MacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
1087 1087 }
1088 1088
1089 1089
1090 1090 void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1091 1091 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
1092 1092 }
1093 1093
1094 1094
1095 1095 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
1096 1096 // O0 is reserved for the thread
1097 1097 mov(arg_1, O1);
1098 1098 call_VM(oop_result, entry_point, 1, check_exceptions);
1099 1099 }
1100 1100
1101 1101
1102 1102 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
1103 1103 // O0 is reserved for the thread
1104 1104 mov(arg_1, O1);
1105 1105 mov(arg_2, O2); assert(arg_2 != O1, "smashed argument");
1106 1106 call_VM(oop_result, entry_point, 2, check_exceptions);
1107 1107 }
1108 1108
1109 1109
1110 1110 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
1111 1111 // O0 is reserved for the thread
1112 1112 mov(arg_1, O1);
1113 1113 mov(arg_2, O2); assert(arg_2 != O1, "smashed argument");
1114 1114 mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument");
1115 1115 call_VM(oop_result, entry_point, 3, check_exceptions);
1116 1116 }
1117 1117
1118 1118
1119 1119
1120 1120 // Note: The following call_VM overloadings are useful when a "save"
1121 1121 // has already been performed by a stub, and the last Java frame is
1122 1122 // the previous one. In that case, last_java_sp must be passed as FP
1123 1123 // instead of SP.
1124 1124
1125 1125
1126 1126 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
1127 1127 call_VM_base(oop_result, noreg, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1128 1128 }
1129 1129
1130 1130
1131 1131 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
1132 1132 // O0 is reserved for the thread
1133 1133 mov(arg_1, O1);
1134 1134 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1135 1135 }
1136 1136
1137 1137
1138 1138 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
1139 1139 // O0 is reserved for the thread
1140 1140 mov(arg_1, O1);
1141 1141 mov(arg_2, O2); assert(arg_2 != O1, "smashed argument");
1142 1142 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1143 1143 }
1144 1144
1145 1145
1146 1146 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
1147 1147 // O0 is reserved for the thread
1148 1148 mov(arg_1, O1);
1149 1149 mov(arg_2, O2); assert(arg_2 != O1, "smashed argument");
1150 1150 mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument");
1151 1151 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1152 1152 }
1153 1153
1154 1154
1155 1155
1156 1156 void MacroAssembler::call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments) {
1157 1157 assert_not_delayed();
1158 1158 save_thread(thread_cache);
1159 1159 // do the call
1160 1160 call(entry_point, relocInfo::runtime_call_type);
1161 1161 delayed()->nop();
1162 1162 restore_thread(thread_cache);
1163 1163 }
1164 1164
1165 1165
1166 1166 void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments) {
1167 1167 call_VM_leaf_base(thread_cache, entry_point, number_of_arguments);
1168 1168 }
1169 1169
1170 1170
1171 1171 void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1) {
1172 1172 mov(arg_1, O0);
1173 1173 call_VM_leaf(thread_cache, entry_point, 1);
1174 1174 }
1175 1175
1176 1176
1177 1177 void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2) {
1178 1178 mov(arg_1, O0);
1179 1179 mov(arg_2, O1); assert(arg_2 != O0, "smashed argument");
1180 1180 call_VM_leaf(thread_cache, entry_point, 2);
1181 1181 }
1182 1182
1183 1183
1184 1184 void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3) {
1185 1185 mov(arg_1, O0);
1186 1186 mov(arg_2, O1); assert(arg_2 != O0, "smashed argument");
1187 1187 mov(arg_3, O2); assert(arg_3 != O0 && arg_3 != O1, "smashed argument");
1188 1188 call_VM_leaf(thread_cache, entry_point, 3);
1189 1189 }
1190 1190
1191 1191
1192 1192 void MacroAssembler::get_vm_result(Register oop_result) {
1193 1193 verify_thread();
1194 1194 Address vm_result_addr(G2_thread, JavaThread::vm_result_offset());
1195 1195 ld_ptr( vm_result_addr, oop_result);
1196 1196 st_ptr(G0, vm_result_addr);
1197 1197 verify_oop(oop_result);
1198 1198 }
1199 1199
1200 1200
1201 1201 void MacroAssembler::get_vm_result_2(Register oop_result) {
1202 1202 verify_thread();
1203 1203 Address vm_result_addr_2(G2_thread, JavaThread::vm_result_2_offset());
1204 1204 ld_ptr(vm_result_addr_2, oop_result);
1205 1205 st_ptr(G0, vm_result_addr_2);
1206 1206 verify_oop(oop_result);
1207 1207 }
1208 1208
1209 1209
1210 1210 // We require that C code which does not return a value in vm_result will
1211 1211 // leave it undisturbed.
1212 1212 void MacroAssembler::set_vm_result(Register oop_result) {
1213 1213 verify_thread();
1214 1214 Address vm_result_addr(G2_thread, JavaThread::vm_result_offset());
1215 1215 verify_oop(oop_result);
1216 1216
1217 1217 # ifdef ASSERT
1218 1218 // Check that we are not overwriting any other oop.
1219 1219 #ifdef CC_INTERP
1220 1220 save_frame(0);
1221 1221 #else
1222 1222 save_frame_and_mov(0, Lmethod, Lmethod); // Propagate Lmethod for -Xprof
1223 1223 #endif /* CC_INTERP */
1224 1224 ld_ptr(vm_result_addr, L0);
1225 1225 tst(L0);
1226 1226 restore();
1227 1227 breakpoint_trap(notZero, Assembler::ptr_cc);
1228 1228 // }
1229 1229 # endif
1230 1230
1231 1231 st_ptr(oop_result, vm_result_addr);
1232 1232 }
1233 1233
1234 1234
1235 1235 void MacroAssembler::card_table_write(jbyte* byte_map_base,
1236 1236 Register tmp, Register obj) {
1237 1237 #ifdef _LP64
1238 1238 srlx(obj, CardTableModRefBS::card_shift, obj);
1239 1239 #else
1240 1240 srl(obj, CardTableModRefBS::card_shift, obj);
1241 1241 #endif
1242 1242 assert(tmp != obj, "need separate temp reg");
1243 1243 set((address) byte_map_base, tmp);
1244 1244 stb(G0, tmp, obj);
1245 1245 }
1246 1246
1247 1247
1248 1248 void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
1249 1249 address save_pc;
1250 1250 int shiftcnt;
1251 1251 #ifdef _LP64
1252 1252 # ifdef CHECK_DELAY
1253 1253 assert_not_delayed((char*) "cannot put two instructions in delay slot");
1254 1254 # endif
1255 1255 v9_dep();
1256 1256 save_pc = pc();
1257 1257
1258 1258 int msb32 = (int) (addrlit.value() >> 32);
1259 1259 int lsb32 = (int) (addrlit.value());
1260 1260
1261 1261 if (msb32 == 0 && lsb32 >= 0) {
1262 1262 Assembler::sethi(lsb32, d, addrlit.rspec());
1263 1263 }
1264 1264 else if (msb32 == -1) {
1265 1265 Assembler::sethi(~lsb32, d, addrlit.rspec());
1266 1266 xor3(d, ~low10(~0), d);
1267 1267 }
1268 1268 else {
1269 1269 Assembler::sethi(msb32, d, addrlit.rspec()); // msb 22-bits
1270 1270 if (msb32 & 0x3ff) // Any bits?
1271 1271 or3(d, msb32 & 0x3ff, d); // msb 32-bits are now in lsb 32
1272 1272 if (lsb32 & 0xFFFFFC00) { // done?
1273 1273 if ((lsb32 >> 20) & 0xfff) { // Any bits set?
1274 1274 sllx(d, 12, d); // Make room for next 12 bits
1275 1275 or3(d, (lsb32 >> 20) & 0xfff, d); // Or in next 12
1276 1276 shiftcnt = 0; // We already shifted
1277 1277 }
1278 1278 else
1279 1279 shiftcnt = 12;
1280 1280 if ((lsb32 >> 10) & 0x3ff) {
1281 1281 sllx(d, shiftcnt + 10, d); // Make room for last 10 bits
1282 1282 or3(d, (lsb32 >> 10) & 0x3ff, d); // Or in next 10
1283 1283 shiftcnt = 0;
1284 1284 }
1285 1285 else
1286 1286 shiftcnt = 10;
1287 1287 sllx(d, shiftcnt + 10, d); // Shift leaving disp field 0'd
1288 1288 }
1289 1289 else
1290 1290 sllx(d, 32, d);
1291 1291 }
1292 1292 // Pad out the instruction sequence so it can be patched later.
1293 1293 if (ForceRelocatable || (addrlit.rtype() != relocInfo::none &&
1294 1294 addrlit.rtype() != relocInfo::runtime_call_type)) {
1295 1295 while (pc() < (save_pc + (7 * BytesPerInstWord)))
1296 1296 nop();
1297 1297 }
1298 1298 #else
1299 1299 Assembler::sethi(addrlit.value(), d, addrlit.rspec());
1300 1300 #endif
1301 1301 }
1302 1302
1303 1303
1304 1304 void MacroAssembler::sethi(const AddressLiteral& addrlit, Register d) {
1305 1305 internal_sethi(addrlit, d, false);
1306 1306 }
1307 1307
1308 1308
1309 1309 void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d) {
1310 1310 internal_sethi(addrlit, d, true);
1311 1311 }
1312 1312
1313 1313
1314 1314 int MacroAssembler::size_of_sethi(address a, bool worst_case) {
1315 1315 #ifdef _LP64
1316 1316 if (worst_case) return 7;
1317 1317 intptr_t iaddr = (intptr_t)a;
1318 1318 int hi32 = (int)(iaddr >> 32);
1319 1319 int lo32 = (int)(iaddr);
1320 1320 int inst_count;
1321 1321 if (hi32 == 0 && lo32 >= 0)
1322 1322 inst_count = 1;
1323 1323 else if (hi32 == -1)
1324 1324 inst_count = 2;
1325 1325 else {
1326 1326 inst_count = 2;
1327 1327 if ( hi32 & 0x3ff )
1328 1328 inst_count++;
1329 1329 if ( lo32 & 0xFFFFFC00 ) {
1330 1330 if( (lo32 >> 20) & 0xfff ) inst_count += 2;
1331 1331 if( (lo32 >> 10) & 0x3ff ) inst_count += 2;
1332 1332 }
1333 1333 }
1334 1334 return BytesPerInstWord * inst_count;
1335 1335 #else
1336 1336 return BytesPerInstWord;
1337 1337 #endif
1338 1338 }
1339 1339
1340 1340 int MacroAssembler::worst_case_size_of_set() {
1341 1341 return size_of_sethi(NULL, true) + 1;
1342 1342 }
1343 1343
1344 1344
1345 1345 void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
1346 1346 intptr_t value = addrlit.value();
1347 1347
1348 1348 if (!ForceRelocatable && addrlit.rspec().type() == relocInfo::none) {
1349 1349 // can optimize
1350 1350 if (-4096 <= value && value <= 4095) {
1351 1351 or3(G0, value, d); // setsw (this leaves upper 32 bits sign-extended)
1352 1352 return;
1353 1353 }
1354 1354 if (inv_hi22(hi22(value)) == value) {
1355 1355 sethi(addrlit, d);
1356 1356 return;
1357 1357 }
1358 1358 }
1359 1359 assert_not_delayed((char*) "cannot put two instructions in delay slot");
1360 1360 internal_sethi(addrlit, d, ForceRelocatable);
1361 1361 if (ForceRelocatable || addrlit.rspec().type() != relocInfo::none || addrlit.low10() != 0) {
1362 1362 add(d, addrlit.low10(), d, addrlit.rspec());
1363 1363 }
1364 1364 }
1365 1365
1366 1366 void MacroAssembler::set(const AddressLiteral& al, Register d) {
1367 1367 internal_set(al, d, false);
1368 1368 }
1369 1369
1370 1370 void MacroAssembler::set(intptr_t value, Register d) {
1371 1371 AddressLiteral al(value);
1372 1372 internal_set(al, d, false);
1373 1373 }
1374 1374
1375 1375 void MacroAssembler::set(address addr, Register d, RelocationHolder const& rspec) {
1376 1376 AddressLiteral al(addr, rspec);
1377 1377 internal_set(al, d, false);
1378 1378 }
1379 1379
1380 1380 void MacroAssembler::patchable_set(const AddressLiteral& al, Register d) {
1381 1381 internal_set(al, d, true);
1382 1382 }
1383 1383
1384 1384 void MacroAssembler::patchable_set(intptr_t value, Register d) {
1385 1385 AddressLiteral al(value);
1386 1386 internal_set(al, d, true);
1387 1387 }
1388 1388
1389 1389
1390 1390 void MacroAssembler::set64(jlong value, Register d, Register tmp) {
1391 1391 assert_not_delayed();
1392 1392 v9_dep();
1393 1393
1394 1394 int hi = (int)(value >> 32);
1395 1395 int lo = (int)(value & ~0);
1396 1396 // (Matcher::isSimpleConstant64 knows about the following optimizations.)
1397 1397 if (Assembler::is_simm13(lo) && value == lo) {
1398 1398 or3(G0, lo, d);
1399 1399 } else if (hi == 0) {
1400 1400 Assembler::sethi(lo, d); // hardware version zero-extends to upper 32
1401 1401 if (low10(lo) != 0)
1402 1402 or3(d, low10(lo), d);
1403 1403 }
1404 1404 else if (hi == -1) {
1405 1405 Assembler::sethi(~lo, d); // hardware version zero-extends to upper 32
1406 1406 xor3(d, low10(lo) ^ ~low10(~0), d);
1407 1407 }
1408 1408 else if (lo == 0) {
1409 1409 if (Assembler::is_simm13(hi)) {
1410 1410 or3(G0, hi, d);
1411 1411 } else {
1412 1412 Assembler::sethi(hi, d); // hardware version zero-extends to upper 32
1413 1413 if (low10(hi) != 0)
1414 1414 or3(d, low10(hi), d);
1415 1415 }
1416 1416 sllx(d, 32, d);
1417 1417 }
1418 1418 else {
1419 1419 Assembler::sethi(hi, tmp);
↓ open down ↓ |
1419 lines elided |
↑ open up ↑ |
1420 1420 Assembler::sethi(lo, d); // macro assembler version sign-extends
1421 1421 if (low10(hi) != 0)
1422 1422 or3 (tmp, low10(hi), tmp);
1423 1423 if (low10(lo) != 0)
1424 1424 or3 ( d, low10(lo), d);
1425 1425 sllx(tmp, 32, tmp);
1426 1426 or3 (d, tmp, d);
1427 1427 }
1428 1428 }
1429 1429
1430 +int MacroAssembler::size_of_set64(jlong value) {
1431 + v9_dep();
1432 +
1433 + int hi = (int)(value >> 32);
1434 + int lo = (int)(value & ~0);
1435 + int count = 0;
1436 +
1437 + // (Matcher::isSimpleConstant64 knows about the following optimizations.)
1438 + if (Assembler::is_simm13(lo) && value == lo) {
1439 + count++;
1440 + } else if (hi == 0) {
1441 + count++;
1442 + if (low10(lo) != 0)
1443 + count++;
1444 + }
1445 + else if (hi == -1) {
1446 + count += 2;
1447 + }
1448 + else if (lo == 0) {
1449 + if (Assembler::is_simm13(hi)) {
1450 + count++;
1451 + } else {
1452 + count++;
1453 + if (low10(hi) != 0)
1454 + count++;
1455 + }
1456 + count++;
1457 + }
1458 + else {
1459 + count += 2;
1460 + if (low10(hi) != 0)
1461 + count++;
1462 + if (low10(lo) != 0)
1463 + count++;
1464 + count += 2;
1465 + }
1466 + return count;
1467 +}
1468 +
1430 1469 // compute size in bytes of sparc frame, given
1431 1470 // number of extraWords
1432 1471 int MacroAssembler::total_frame_size_in_bytes(int extraWords) {
1433 1472
1434 1473 int nWords = frame::memory_parameter_word_sp_offset;
1435 1474
1436 1475 nWords += extraWords;
1437 1476
1438 1477 if (nWords & 1) ++nWords; // round up to double-word
1439 1478
1440 1479 return nWords * BytesPerWord;
1441 1480 }
1442 1481
1443 1482
1444 1483 // save_frame: given number of "extra" words in frame,
1445 1484 // issue approp. save instruction (p 200, v8 manual)
1446 1485
1447 1486 void MacroAssembler::save_frame(int extraWords = 0) {
1448 1487 int delta = -total_frame_size_in_bytes(extraWords);
1449 1488 if (is_simm13(delta)) {
1450 1489 save(SP, delta, SP);
1451 1490 } else {
1452 1491 set(delta, G3_scratch);
1453 1492 save(SP, G3_scratch, SP);
1454 1493 }
1455 1494 }
1456 1495
1457 1496
1458 1497 void MacroAssembler::save_frame_c1(int size_in_bytes) {
1459 1498 if (is_simm13(-size_in_bytes)) {
1460 1499 save(SP, -size_in_bytes, SP);
1461 1500 } else {
1462 1501 set(-size_in_bytes, G3_scratch);
1463 1502 save(SP, G3_scratch, SP);
1464 1503 }
1465 1504 }
1466 1505
1467 1506
1468 1507 void MacroAssembler::save_frame_and_mov(int extraWords,
1469 1508 Register s1, Register d1,
1470 1509 Register s2, Register d2) {
1471 1510 assert_not_delayed();
1472 1511
1473 1512 // The trick here is to use precisely the same memory word
1474 1513 // that trap handlers also use to save the register.
1475 1514 // This word cannot be used for any other purpose, but
1476 1515 // it works fine to save the register's value, whether or not
1477 1516 // an interrupt flushes register windows at any given moment!
1478 1517 Address s1_addr;
1479 1518 if (s1->is_valid() && (s1->is_in() || s1->is_local())) {
1480 1519 s1_addr = s1->address_in_saved_window();
1481 1520 st_ptr(s1, s1_addr);
1482 1521 }
1483 1522
1484 1523 Address s2_addr;
1485 1524 if (s2->is_valid() && (s2->is_in() || s2->is_local())) {
1486 1525 s2_addr = s2->address_in_saved_window();
1487 1526 st_ptr(s2, s2_addr);
1488 1527 }
1489 1528
1490 1529 save_frame(extraWords);
1491 1530
1492 1531 if (s1_addr.base() == SP) {
1493 1532 ld_ptr(s1_addr.after_save(), d1);
1494 1533 } else if (s1->is_valid()) {
1495 1534 mov(s1->after_save(), d1);
1496 1535 }
1497 1536
1498 1537 if (s2_addr.base() == SP) {
1499 1538 ld_ptr(s2_addr.after_save(), d2);
1500 1539 } else if (s2->is_valid()) {
1501 1540 mov(s2->after_save(), d2);
1502 1541 }
1503 1542 }
1504 1543
1505 1544
1506 1545 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) {
1507 1546 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1508 1547 int oop_index = oop_recorder()->allocate_index(obj);
1509 1548 return AddressLiteral(obj, oop_Relocation::spec(oop_index));
1510 1549 }
1511 1550
1512 1551
1513 1552 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
1514 1553 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1515 1554 int oop_index = oop_recorder()->find_index(obj);
1516 1555 return AddressLiteral(obj, oop_Relocation::spec(oop_index));
1517 1556 }
1518 1557
1519 1558 void MacroAssembler::set_narrow_oop(jobject obj, Register d) {
1520 1559 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1521 1560 int oop_index = oop_recorder()->find_index(obj);
1522 1561 RelocationHolder rspec = oop_Relocation::spec(oop_index);
1523 1562
1524 1563 assert_not_delayed();
1525 1564 // Relocation with special format (see relocInfo_sparc.hpp).
1526 1565 relocate(rspec, 1);
1527 1566 // Assembler::sethi(0x3fffff, d);
1528 1567 emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(0x3fffff) );
1529 1568 // Don't add relocation for 'add'. Do patching during 'sethi' processing.
1530 1569 add(d, 0x3ff, d);
1531 1570
1532 1571 }
1533 1572
1534 1573
1535 1574 void MacroAssembler::align(int modulus) {
1536 1575 while (offset() % modulus != 0) nop();
1537 1576 }
1538 1577
1539 1578
1540 1579 void MacroAssembler::safepoint() {
1541 1580 relocate(breakpoint_Relocation::spec(breakpoint_Relocation::safepoint));
1542 1581 }
1543 1582
1544 1583
1545 1584 void RegistersForDebugging::print(outputStream* s) {
1546 1585 int j;
1547 1586 for ( j = 0; j < 8; ++j )
1548 1587 if ( j != 6 ) s->print_cr("i%d = 0x%.16lx", j, i[j]);
1549 1588 else s->print_cr( "fp = 0x%.16lx", i[j]);
1550 1589 s->cr();
1551 1590
1552 1591 for ( j = 0; j < 8; ++j )
1553 1592 s->print_cr("l%d = 0x%.16lx", j, l[j]);
1554 1593 s->cr();
1555 1594
1556 1595 for ( j = 0; j < 8; ++j )
1557 1596 if ( j != 6 ) s->print_cr("o%d = 0x%.16lx", j, o[j]);
1558 1597 else s->print_cr( "sp = 0x%.16lx", o[j]);
1559 1598 s->cr();
1560 1599
1561 1600 for ( j = 0; j < 8; ++j )
1562 1601 s->print_cr("g%d = 0x%.16lx", j, g[j]);
1563 1602 s->cr();
1564 1603
1565 1604 // print out floats with compression
1566 1605 for (j = 0; j < 32; ) {
1567 1606 jfloat val = f[j];
1568 1607 int last = j;
1569 1608 for ( ; last+1 < 32; ++last ) {
1570 1609 char b1[1024], b2[1024];
1571 1610 sprintf(b1, "%f", val);
1572 1611 sprintf(b2, "%f", f[last+1]);
1573 1612 if (strcmp(b1, b2))
1574 1613 break;
1575 1614 }
1576 1615 s->print("f%d", j);
1577 1616 if ( j != last ) s->print(" - f%d", last);
1578 1617 s->print(" = %f", val);
1579 1618 s->fill_to(25);
1580 1619 s->print_cr(" (0x%x)", val);
1581 1620 j = last + 1;
1582 1621 }
1583 1622 s->cr();
1584 1623
1585 1624 // and doubles (evens only)
1586 1625 for (j = 0; j < 32; ) {
1587 1626 jdouble val = d[j];
1588 1627 int last = j;
1589 1628 for ( ; last+1 < 32; ++last ) {
1590 1629 char b1[1024], b2[1024];
1591 1630 sprintf(b1, "%f", val);
1592 1631 sprintf(b2, "%f", d[last+1]);
1593 1632 if (strcmp(b1, b2))
1594 1633 break;
1595 1634 }
1596 1635 s->print("d%d", 2 * j);
1597 1636 if ( j != last ) s->print(" - d%d", last);
1598 1637 s->print(" = %f", val);
1599 1638 s->fill_to(30);
1600 1639 s->print("(0x%x)", *(int*)&val);
1601 1640 s->fill_to(42);
1602 1641 s->print_cr("(0x%x)", *(1 + (int*)&val));
1603 1642 j = last + 1;
1604 1643 }
1605 1644 s->cr();
1606 1645 }
1607 1646
1608 1647 void RegistersForDebugging::save_registers(MacroAssembler* a) {
1609 1648 a->sub(FP, round_to(sizeof(RegistersForDebugging), sizeof(jdouble)) - STACK_BIAS, O0);
1610 1649 a->flush_windows();
1611 1650 int i;
1612 1651 for (i = 0; i < 8; ++i) {
1613 1652 a->ld_ptr(as_iRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, i_offset(i));
1614 1653 a->ld_ptr(as_lRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, l_offset(i));
1615 1654 a->st_ptr(as_oRegister(i)->after_save(), O0, o_offset(i));
1616 1655 a->st_ptr(as_gRegister(i)->after_save(), O0, g_offset(i));
1617 1656 }
1618 1657 for (i = 0; i < 32; ++i) {
1619 1658 a->stf(FloatRegisterImpl::S, as_FloatRegister(i), O0, f_offset(i));
1620 1659 }
1621 1660 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
1622 1661 a->stf(FloatRegisterImpl::D, as_FloatRegister(i), O0, d_offset(i));
1623 1662 }
1624 1663 }
1625 1664
1626 1665 void RegistersForDebugging::restore_registers(MacroAssembler* a, Register r) {
1627 1666 for (int i = 1; i < 8; ++i) {
1628 1667 a->ld_ptr(r, g_offset(i), as_gRegister(i));
1629 1668 }
1630 1669 for (int j = 0; j < 32; ++j) {
1631 1670 a->ldf(FloatRegisterImpl::S, O0, f_offset(j), as_FloatRegister(j));
1632 1671 }
1633 1672 for (int k = 0; k < (VM_Version::v9_instructions_work() ? 64 : 32); k += 2) {
1634 1673 a->ldf(FloatRegisterImpl::D, O0, d_offset(k), as_FloatRegister(k));
1635 1674 }
1636 1675 }
1637 1676
1638 1677
1639 1678 // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
1640 1679 void MacroAssembler::push_fTOS() {
1641 1680 // %%%%%% need to implement this
1642 1681 }
1643 1682
1644 1683 // pops double TOS element from CPU stack and pushes on FPU stack
1645 1684 void MacroAssembler::pop_fTOS() {
1646 1685 // %%%%%% need to implement this
1647 1686 }
1648 1687
1649 1688 void MacroAssembler::empty_FPU_stack() {
1650 1689 // %%%%%% need to implement this
1651 1690 }
1652 1691
1653 1692 void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * file, int line) {
1654 1693 // plausibility check for oops
1655 1694 if (!VerifyOops) return;
1656 1695
1657 1696 if (reg == G0) return; // always NULL, which is always an oop
1658 1697
1659 1698 char buffer[64];
1660 1699 #ifdef COMPILER1
1661 1700 if (CommentedAssembly) {
1662 1701 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1663 1702 block_comment(buffer);
1664 1703 }
1665 1704 #endif
1666 1705
1667 1706 int len = strlen(file) + strlen(msg) + 1 + 4;
1668 1707 sprintf(buffer, "%d", line);
1669 1708 len += strlen(buffer);
1670 1709 sprintf(buffer, " at offset %d ", offset());
1671 1710 len += strlen(buffer);
1672 1711 char * real_msg = new char[len];
1673 1712 sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
1674 1713
1675 1714 // Call indirectly to solve generation ordering problem
1676 1715 AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address());
1677 1716
1678 1717 // Make some space on stack above the current register window.
1679 1718 // Enough to hold 8 64-bit registers.
1680 1719 add(SP,-8*8,SP);
1681 1720
1682 1721 // Save some 64-bit registers; a normal 'save' chops the heads off
1683 1722 // of 64-bit longs in the 32-bit build.
1684 1723 stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8);
1685 1724 stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8);
1686 1725 mov(reg,O0); // Move arg into O0; arg might be in O7 which is about to be crushed
1687 1726 stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8);
1688 1727
1689 1728 set((intptr_t)real_msg, O1);
1690 1729 // Load address to call to into O7
1691 1730 load_ptr_contents(a, O7);
1692 1731 // Register call to verify_oop_subroutine
1693 1732 callr(O7, G0);
1694 1733 delayed()->nop();
1695 1734 // recover frame size
1696 1735 add(SP, 8*8,SP);
1697 1736 }
1698 1737
1699 1738 void MacroAssembler::_verify_oop_addr(Address addr, const char* msg, const char * file, int line) {
1700 1739 // plausibility check for oops
1701 1740 if (!VerifyOops) return;
1702 1741
1703 1742 char buffer[64];
1704 1743 sprintf(buffer, "%d", line);
1705 1744 int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
1706 1745 sprintf(buffer, " at SP+%d ", addr.disp());
1707 1746 len += strlen(buffer);
1708 1747 char * real_msg = new char[len];
1709 1748 sprintf(real_msg, "%s at SP+%d (%s:%d)", msg, addr.disp(), file, line);
1710 1749
1711 1750 // Call indirectly to solve generation ordering problem
1712 1751 AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address());
1713 1752
1714 1753 // Make some space on stack above the current register window.
1715 1754 // Enough to hold 8 64-bit registers.
1716 1755 add(SP,-8*8,SP);
1717 1756
1718 1757 // Save some 64-bit registers; a normal 'save' chops the heads off
1719 1758 // of 64-bit longs in the 32-bit build.
1720 1759 stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8);
1721 1760 stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8);
1722 1761 ld_ptr(addr.base(), addr.disp() + 8*8, O0); // Load arg into O0; arg might be in O7 which is about to be crushed
1723 1762 stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8);
1724 1763
1725 1764 set((intptr_t)real_msg, O1);
1726 1765 // Load address to call to into O7
1727 1766 load_ptr_contents(a, O7);
1728 1767 // Register call to verify_oop_subroutine
1729 1768 callr(O7, G0);
1730 1769 delayed()->nop();
1731 1770 // recover frame size
1732 1771 add(SP, 8*8,SP);
1733 1772 }
1734 1773
1735 1774 // side-door communication with signalHandler in os_solaris.cpp
1736 1775 address MacroAssembler::_verify_oop_implicit_branch[3] = { NULL };
1737 1776
1738 1777 // This macro is expanded just once; it creates shared code. Contract:
1739 1778 // receives an oop in O0. Must restore O0 & O7 from TLS. Must not smash ANY
1740 1779 // registers, including flags. May not use a register 'save', as this blows
1741 1780 // the high bits of the O-regs if they contain Long values. Acts as a 'leaf'
1742 1781 // call.
1743 1782 void MacroAssembler::verify_oop_subroutine() {
1744 1783 assert( VM_Version::v9_instructions_work(), "VerifyOops not supported for V8" );
1745 1784
1746 1785 // Leaf call; no frame.
1747 1786 Label succeed, fail, null_or_fail;
1748 1787
1749 1788 // O0 and O7 were saved already (O0 in O0's TLS home, O7 in O5's TLS home).
1750 1789 // O0 is now the oop to be checked. O7 is the return address.
1751 1790 Register O0_obj = O0;
1752 1791
1753 1792 // Save some more registers for temps.
1754 1793 stx(O2,SP,frame::register_save_words*wordSize+STACK_BIAS+2*8);
1755 1794 stx(O3,SP,frame::register_save_words*wordSize+STACK_BIAS+3*8);
1756 1795 stx(O4,SP,frame::register_save_words*wordSize+STACK_BIAS+4*8);
1757 1796 stx(O5,SP,frame::register_save_words*wordSize+STACK_BIAS+5*8);
1758 1797
1759 1798 // Save flags
1760 1799 Register O5_save_flags = O5;
1761 1800 rdccr( O5_save_flags );
1762 1801
1763 1802 { // count number of verifies
1764 1803 Register O2_adr = O2;
1765 1804 Register O3_accum = O3;
1766 1805 inc_counter(StubRoutines::verify_oop_count_addr(), O2_adr, O3_accum);
1767 1806 }
1768 1807
1769 1808 Register O2_mask = O2;
1770 1809 Register O3_bits = O3;
1771 1810 Register O4_temp = O4;
1772 1811
1773 1812 // mark lower end of faulting range
1774 1813 assert(_verify_oop_implicit_branch[0] == NULL, "set once");
1775 1814 _verify_oop_implicit_branch[0] = pc();
1776 1815
1777 1816 // We can't check the mark oop because it could be in the process of
1778 1817 // locking or unlocking while this is running.
1779 1818 set(Universe::verify_oop_mask (), O2_mask);
1780 1819 set(Universe::verify_oop_bits (), O3_bits);
1781 1820
1782 1821 // assert((obj & oop_mask) == oop_bits);
1783 1822 and3(O0_obj, O2_mask, O4_temp);
1784 1823 cmp(O4_temp, O3_bits);
1785 1824 brx(notEqual, false, pn, null_or_fail);
1786 1825 delayed()->nop();
1787 1826
1788 1827 if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
1789 1828 // the null_or_fail case is useless; must test for null separately
1790 1829 br_null(O0_obj, false, pn, succeed);
1791 1830 delayed()->nop();
1792 1831 }
1793 1832
1794 1833 // Check the klassOop of this object for being in the right area of memory.
1795 1834 // Cannot do the load in the delay above slot in case O0 is null
1796 1835 load_klass(O0_obj, O0_obj);
1797 1836 // assert((klass & klass_mask) == klass_bits);
1798 1837 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
1799 1838 set(Universe::verify_klass_mask(), O2_mask);
1800 1839 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
1801 1840 set(Universe::verify_klass_bits(), O3_bits);
1802 1841 and3(O0_obj, O2_mask, O4_temp);
1803 1842 cmp(O4_temp, O3_bits);
1804 1843 brx(notEqual, false, pn, fail);
1805 1844 delayed()->nop();
1806 1845 // Check the klass's klass
1807 1846 load_klass(O0_obj, O0_obj);
1808 1847 and3(O0_obj, O2_mask, O4_temp);
1809 1848 cmp(O4_temp, O3_bits);
1810 1849 brx(notEqual, false, pn, fail);
1811 1850 delayed()->wrccr( O5_save_flags ); // Restore CCR's
1812 1851
1813 1852 // mark upper end of faulting range
1814 1853 _verify_oop_implicit_branch[1] = pc();
1815 1854
1816 1855 //-----------------------
1817 1856 // all tests pass
1818 1857 bind(succeed);
1819 1858
1820 1859 // Restore prior 64-bit registers
1821 1860 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+0*8,O0);
1822 1861 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+1*8,O1);
1823 1862 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+2*8,O2);
1824 1863 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+3*8,O3);
1825 1864 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+4*8,O4);
1826 1865 ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+5*8,O5);
1827 1866
1828 1867 retl(); // Leaf return; restore prior O7 in delay slot
1829 1868 delayed()->ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+7*8,O7);
1830 1869
1831 1870 //-----------------------
1832 1871 bind(null_or_fail); // nulls are less common but OK
1833 1872 br_null(O0_obj, false, pt, succeed);
1834 1873 delayed()->wrccr( O5_save_flags ); // Restore CCR's
1835 1874
1836 1875 //-----------------------
1837 1876 // report failure:
1838 1877 bind(fail);
1839 1878 _verify_oop_implicit_branch[2] = pc();
1840 1879
1841 1880 wrccr( O5_save_flags ); // Restore CCR's
1842 1881
1843 1882 save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
1844 1883
1845 1884 // stop_subroutine expects message pointer in I1.
1846 1885 mov(I1, O1);
1847 1886
1848 1887 // Restore prior 64-bit registers
1849 1888 ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+0*8,I0);
1850 1889 ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+1*8,I1);
1851 1890 ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+2*8,I2);
1852 1891 ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+3*8,I3);
1853 1892 ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+4*8,I4);
1854 1893 ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+5*8,I5);
1855 1894
1856 1895 // factor long stop-sequence into subroutine to save space
1857 1896 assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet");
1858 1897
1859 1898 // call indirectly to solve generation ordering problem
1860 1899 AddressLiteral al(StubRoutines::Sparc::stop_subroutine_entry_address());
1861 1900 load_ptr_contents(al, O5);
1862 1901 jmpl(O5, 0, O7);
1863 1902 delayed()->nop();
1864 1903 }
1865 1904
1866 1905
1867 1906 void MacroAssembler::stop(const char* msg) {
1868 1907 // save frame first to get O7 for return address
1869 1908 // add one word to size in case struct is odd number of words long
1870 1909 // It must be doubleword-aligned for storing doubles into it.
1871 1910
1872 1911 save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
1873 1912
1874 1913 // stop_subroutine expects message pointer in I1.
1875 1914 set((intptr_t)msg, O1);
1876 1915
1877 1916 // factor long stop-sequence into subroutine to save space
1878 1917 assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet");
1879 1918
1880 1919 // call indirectly to solve generation ordering problem
1881 1920 AddressLiteral a(StubRoutines::Sparc::stop_subroutine_entry_address());
1882 1921 load_ptr_contents(a, O5);
1883 1922 jmpl(O5, 0, O7);
1884 1923 delayed()->nop();
1885 1924
1886 1925 breakpoint_trap(); // make stop actually stop rather than writing
1887 1926 // unnoticeable results in the output files.
1888 1927
1889 1928 // restore(); done in callee to save space!
1890 1929 }
1891 1930
1892 1931
1893 1932 void MacroAssembler::warn(const char* msg) {
1894 1933 save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
1895 1934 RegistersForDebugging::save_registers(this);
1896 1935 mov(O0, L0);
1897 1936 set((intptr_t)msg, O0);
1898 1937 call( CAST_FROM_FN_PTR(address, warning) );
1899 1938 delayed()->nop();
1900 1939 // ret();
1901 1940 // delayed()->restore();
1902 1941 RegistersForDebugging::restore_registers(this, L0);
1903 1942 restore();
1904 1943 }
1905 1944
1906 1945
1907 1946 void MacroAssembler::untested(const char* what) {
1908 1947 // We must be able to turn interactive prompting off
1909 1948 // in order to run automated test scripts on the VM
1910 1949 // Use the flag ShowMessageBoxOnError
1911 1950
1912 1951 char* b = new char[1024];
1913 1952 sprintf(b, "untested: %s", what);
1914 1953
1915 1954 if ( ShowMessageBoxOnError ) stop(b);
1916 1955 else warn(b);
1917 1956 }
1918 1957
1919 1958
1920 1959 void MacroAssembler::stop_subroutine() {
1921 1960 RegistersForDebugging::save_registers(this);
1922 1961
1923 1962 // for the sake of the debugger, stick a PC on the current frame
1924 1963 // (this assumes that the caller has performed an extra "save")
1925 1964 mov(I7, L7);
1926 1965 add(O7, -7 * BytesPerInt, I7);
1927 1966
1928 1967 save_frame(); // one more save to free up another O7 register
1929 1968 mov(I0, O1); // addr of reg save area
1930 1969
1931 1970 // We expect pointer to message in I1. Caller must set it up in O1
1932 1971 mov(I1, O0); // get msg
1933 1972 call (CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
1934 1973 delayed()->nop();
1935 1974
1936 1975 restore();
1937 1976
1938 1977 RegistersForDebugging::restore_registers(this, O0);
1939 1978
1940 1979 save_frame(0);
1941 1980 call(CAST_FROM_FN_PTR(address,breakpoint));
1942 1981 delayed()->nop();
1943 1982 restore();
1944 1983
1945 1984 mov(L7, I7);
1946 1985 retl();
1947 1986 delayed()->restore(); // see stop above
1948 1987 }
1949 1988
1950 1989
1951 1990 void MacroAssembler::debug(char* msg, RegistersForDebugging* regs) {
1952 1991 if ( ShowMessageBoxOnError ) {
1953 1992 JavaThreadState saved_state = JavaThread::current()->thread_state();
1954 1993 JavaThread::current()->set_thread_state(_thread_in_vm);
1955 1994 {
1956 1995 // In order to get locks work, we need to fake a in_VM state
1957 1996 ttyLocker ttyl;
1958 1997 ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
1959 1998 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1960 1999 ::tty->print_cr("Interpreter::bytecode_counter = %d", BytecodeCounter::counter_value());
1961 2000 }
1962 2001 if (os::message_box(msg, "Execution stopped, print registers?"))
1963 2002 regs->print(::tty);
1964 2003 }
1965 2004 ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
1966 2005 }
1967 2006 else
1968 2007 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1969 2008 assert(false, "error");
1970 2009 }
1971 2010
1972 2011
1973 2012 #ifndef PRODUCT
1974 2013 void MacroAssembler::test() {
1975 2014 ResourceMark rm;
1976 2015
1977 2016 CodeBuffer cb("test", 10000, 10000);
1978 2017 MacroAssembler* a = new MacroAssembler(&cb);
1979 2018 VM_Version::allow_all();
1980 2019 a->test_v9();
1981 2020 a->test_v8_onlys();
1982 2021 VM_Version::revert();
1983 2022
1984 2023 StubRoutines::Sparc::test_stop_entry()();
1985 2024 }
1986 2025 #endif
1987 2026
1988 2027
1989 2028 void MacroAssembler::calc_mem_param_words(Register Rparam_words, Register Rresult) {
1990 2029 subcc( Rparam_words, Argument::n_register_parameters, Rresult); // how many mem words?
1991 2030 Label no_extras;
1992 2031 br( negative, true, pt, no_extras ); // if neg, clear reg
1993 2032 delayed()->set(0, Rresult); // annuled, so only if taken
1994 2033 bind( no_extras );
1995 2034 }
1996 2035
1997 2036
1998 2037 void MacroAssembler::calc_frame_size(Register Rextra_words, Register Rresult) {
1999 2038 #ifdef _LP64
2000 2039 add(Rextra_words, frame::memory_parameter_word_sp_offset, Rresult);
2001 2040 #else
2002 2041 add(Rextra_words, frame::memory_parameter_word_sp_offset + 1, Rresult);
2003 2042 #endif
2004 2043 bclr(1, Rresult);
2005 2044 sll(Rresult, LogBytesPerWord, Rresult); // Rresult has total frame bytes
2006 2045 }
2007 2046
2008 2047
2009 2048 void MacroAssembler::calc_frame_size_and_save(Register Rextra_words, Register Rresult) {
2010 2049 calc_frame_size(Rextra_words, Rresult);
2011 2050 neg(Rresult);
2012 2051 save(SP, Rresult, SP);
2013 2052 }
2014 2053
2015 2054
2016 2055 // ---------------------------------------------------------
2017 2056 Assembler::RCondition cond2rcond(Assembler::Condition c) {
2018 2057 switch (c) {
2019 2058 /*case zero: */
2020 2059 case Assembler::equal: return Assembler::rc_z;
2021 2060 case Assembler::lessEqual: return Assembler::rc_lez;
2022 2061 case Assembler::less: return Assembler::rc_lz;
2023 2062 /*case notZero:*/
2024 2063 case Assembler::notEqual: return Assembler::rc_nz;
2025 2064 case Assembler::greater: return Assembler::rc_gz;
2026 2065 case Assembler::greaterEqual: return Assembler::rc_gez;
2027 2066 }
2028 2067 ShouldNotReachHere();
2029 2068 return Assembler::rc_z;
2030 2069 }
2031 2070
2032 2071 // compares register with zero and branches. NOT FOR USE WITH 64-bit POINTERS
2033 2072 void MacroAssembler::br_zero( Condition c, bool a, Predict p, Register s1, Label& L) {
2034 2073 tst(s1);
2035 2074 br (c, a, p, L);
2036 2075 }
2037 2076
2038 2077
2039 2078 // Compares a pointer register with zero and branches on null.
2040 2079 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
2041 2080 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) {
2042 2081 assert_not_delayed();
2043 2082 #ifdef _LP64
2044 2083 bpr( rc_z, a, p, s1, L );
2045 2084 #else
2046 2085 tst(s1);
2047 2086 br ( zero, a, p, L );
2048 2087 #endif
2049 2088 }
2050 2089
2051 2090 void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) {
2052 2091 assert_not_delayed();
2053 2092 #ifdef _LP64
2054 2093 bpr( rc_nz, a, p, s1, L );
2055 2094 #else
2056 2095 tst(s1);
2057 2096 br ( notZero, a, p, L );
2058 2097 #endif
2059 2098 }
2060 2099
2061 2100 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
2062 2101 Register s1, address d,
2063 2102 relocInfo::relocType rt ) {
2064 2103 if (VM_Version::v9_instructions_work()) {
2065 2104 bpr(rc, a, p, s1, d, rt);
2066 2105 } else {
2067 2106 tst(s1);
2068 2107 br(reg_cond_to_cc_cond(rc), a, p, d, rt);
2069 2108 }
2070 2109 }
2071 2110
2072 2111 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
2073 2112 Register s1, Label& L ) {
2074 2113 if (VM_Version::v9_instructions_work()) {
2075 2114 bpr(rc, a, p, s1, L);
2076 2115 } else {
2077 2116 tst(s1);
2078 2117 br(reg_cond_to_cc_cond(rc), a, p, L);
2079 2118 }
2080 2119 }
2081 2120
2082 2121
2083 2122 // instruction sequences factored across compiler & interpreter
2084 2123
2085 2124
2086 2125 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
2087 2126 Register Rb_hi, Register Rb_low,
2088 2127 Register Rresult) {
2089 2128
2090 2129 Label check_low_parts, done;
2091 2130
2092 2131 cmp(Ra_hi, Rb_hi ); // compare hi parts
2093 2132 br(equal, true, pt, check_low_parts);
2094 2133 delayed()->cmp(Ra_low, Rb_low); // test low parts
2095 2134
2096 2135 // And, with an unsigned comparison, it does not matter if the numbers
2097 2136 // are negative or not.
2098 2137 // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff.
2099 2138 // The second one is bigger (unsignedly).
2100 2139
2101 2140 // Other notes: The first move in each triplet can be unconditional
2102 2141 // (and therefore probably prefetchable).
2103 2142 // And the equals case for the high part does not need testing,
2104 2143 // since that triplet is reached only after finding the high halves differ.
2105 2144
2106 2145 if (VM_Version::v9_instructions_work()) {
2107 2146
2108 2147 mov ( -1, Rresult);
2109 2148 ba( false, done ); delayed()-> movcc(greater, false, icc, 1, Rresult);
2110 2149 }
2111 2150 else {
2112 2151 br(less, true, pt, done); delayed()-> set(-1, Rresult);
2113 2152 br(greater, true, pt, done); delayed()-> set( 1, Rresult);
2114 2153 }
2115 2154
2116 2155 bind( check_low_parts );
2117 2156
2118 2157 if (VM_Version::v9_instructions_work()) {
2119 2158 mov( -1, Rresult);
2120 2159 movcc(equal, false, icc, 0, Rresult);
2121 2160 movcc(greaterUnsigned, false, icc, 1, Rresult);
2122 2161 }
2123 2162 else {
2124 2163 set(-1, Rresult);
2125 2164 br(equal, true, pt, done); delayed()->set( 0, Rresult);
2126 2165 br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult);
2127 2166 }
2128 2167 bind( done );
2129 2168 }
2130 2169
2131 2170 void MacroAssembler::lneg( Register Rhi, Register Rlow ) {
2132 2171 subcc( G0, Rlow, Rlow );
2133 2172 subc( G0, Rhi, Rhi );
2134 2173 }
2135 2174
2136 2175 void MacroAssembler::lshl( Register Rin_high, Register Rin_low,
2137 2176 Register Rcount,
2138 2177 Register Rout_high, Register Rout_low,
2139 2178 Register Rtemp ) {
2140 2179
2141 2180
2142 2181 Register Ralt_count = Rtemp;
2143 2182 Register Rxfer_bits = Rtemp;
2144 2183
2145 2184 assert( Ralt_count != Rin_high
2146 2185 && Ralt_count != Rin_low
2147 2186 && Ralt_count != Rcount
2148 2187 && Rxfer_bits != Rin_low
2149 2188 && Rxfer_bits != Rin_high
2150 2189 && Rxfer_bits != Rcount
2151 2190 && Rxfer_bits != Rout_low
2152 2191 && Rout_low != Rin_high,
2153 2192 "register alias checks");
2154 2193
2155 2194 Label big_shift, done;
2156 2195
2157 2196 // This code can be optimized to use the 64 bit shifts in V9.
2158 2197 // Here we use the 32 bit shifts.
2159 2198
2160 2199 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits
2161 2200 subcc(Rcount, 31, Ralt_count);
2162 2201 br(greater, true, pn, big_shift);
2163 2202 delayed()->
2164 2203 dec(Ralt_count);
2165 2204
2166 2205 // shift < 32 bits, Ralt_count = Rcount-31
2167 2206
2168 2207 // We get the transfer bits by shifting right by 32-count the low
2169 2208 // register. This is done by shifting right by 31-count and then by one
2170 2209 // more to take care of the special (rare) case where count is zero
2171 2210 // (shifting by 32 would not work).
2172 2211
2173 2212 neg( Ralt_count );
2174 2213
2175 2214 // The order of the next two instructions is critical in the case where
2176 2215 // Rin and Rout are the same and should not be reversed.
2177 2216
2178 2217 srl( Rin_low, Ralt_count, Rxfer_bits ); // shift right by 31-count
2179 2218 if (Rcount != Rout_low) {
2180 2219 sll( Rin_low, Rcount, Rout_low ); // low half
2181 2220 }
2182 2221 sll( Rin_high, Rcount, Rout_high );
2183 2222 if (Rcount == Rout_low) {
2184 2223 sll( Rin_low, Rcount, Rout_low ); // low half
2185 2224 }
2186 2225 srl( Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
2187 2226 ba (false, done);
2188 2227 delayed()->
2189 2228 or3( Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low
2190 2229
2191 2230 // shift >= 32 bits, Ralt_count = Rcount-32
2192 2231 bind(big_shift);
2193 2232 sll( Rin_low, Ralt_count, Rout_high );
2194 2233 clr( Rout_low );
2195 2234
2196 2235 bind(done);
2197 2236 }
2198 2237
2199 2238
2200 2239 void MacroAssembler::lshr( Register Rin_high, Register Rin_low,
2201 2240 Register Rcount,
2202 2241 Register Rout_high, Register Rout_low,
2203 2242 Register Rtemp ) {
2204 2243
2205 2244 Register Ralt_count = Rtemp;
2206 2245 Register Rxfer_bits = Rtemp;
2207 2246
2208 2247 assert( Ralt_count != Rin_high
2209 2248 && Ralt_count != Rin_low
2210 2249 && Ralt_count != Rcount
2211 2250 && Rxfer_bits != Rin_low
2212 2251 && Rxfer_bits != Rin_high
2213 2252 && Rxfer_bits != Rcount
2214 2253 && Rxfer_bits != Rout_high
2215 2254 && Rout_high != Rin_low,
2216 2255 "register alias checks");
2217 2256
2218 2257 Label big_shift, done;
2219 2258
2220 2259 // This code can be optimized to use the 64 bit shifts in V9.
2221 2260 // Here we use the 32 bit shifts.
2222 2261
2223 2262 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits
2224 2263 subcc(Rcount, 31, Ralt_count);
2225 2264 br(greater, true, pn, big_shift);
2226 2265 delayed()->dec(Ralt_count);
2227 2266
2228 2267 // shift < 32 bits, Ralt_count = Rcount-31
2229 2268
2230 2269 // We get the transfer bits by shifting left by 32-count the high
2231 2270 // register. This is done by shifting left by 31-count and then by one
2232 2271 // more to take care of the special (rare) case where count is zero
2233 2272 // (shifting by 32 would not work).
2234 2273
2235 2274 neg( Ralt_count );
2236 2275 if (Rcount != Rout_low) {
2237 2276 srl( Rin_low, Rcount, Rout_low );
2238 2277 }
2239 2278
2240 2279 // The order of the next two instructions is critical in the case where
2241 2280 // Rin and Rout are the same and should not be reversed.
2242 2281
2243 2282 sll( Rin_high, Ralt_count, Rxfer_bits ); // shift left by 31-count
2244 2283 sra( Rin_high, Rcount, Rout_high ); // high half
2245 2284 sll( Rxfer_bits, 1, Rxfer_bits ); // shift left by one more
2246 2285 if (Rcount == Rout_low) {
2247 2286 srl( Rin_low, Rcount, Rout_low );
2248 2287 }
2249 2288 ba (false, done);
2250 2289 delayed()->
2251 2290 or3( Rout_low, Rxfer_bits, Rout_low ); // new low value: or shifted old low part and xfer from high
2252 2291
2253 2292 // shift >= 32 bits, Ralt_count = Rcount-32
2254 2293 bind(big_shift);
2255 2294
2256 2295 sra( Rin_high, Ralt_count, Rout_low );
2257 2296 sra( Rin_high, 31, Rout_high ); // sign into hi
2258 2297
2259 2298 bind( done );
2260 2299 }
2261 2300
2262 2301
2263 2302
2264 2303 void MacroAssembler::lushr( Register Rin_high, Register Rin_low,
2265 2304 Register Rcount,
2266 2305 Register Rout_high, Register Rout_low,
2267 2306 Register Rtemp ) {
2268 2307
2269 2308 Register Ralt_count = Rtemp;
2270 2309 Register Rxfer_bits = Rtemp;
2271 2310
2272 2311 assert( Ralt_count != Rin_high
2273 2312 && Ralt_count != Rin_low
2274 2313 && Ralt_count != Rcount
2275 2314 && Rxfer_bits != Rin_low
2276 2315 && Rxfer_bits != Rin_high
2277 2316 && Rxfer_bits != Rcount
2278 2317 && Rxfer_bits != Rout_high
2279 2318 && Rout_high != Rin_low,
2280 2319 "register alias checks");
2281 2320
2282 2321 Label big_shift, done;
2283 2322
2284 2323 // This code can be optimized to use the 64 bit shifts in V9.
2285 2324 // Here we use the 32 bit shifts.
2286 2325
2287 2326 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits
2288 2327 subcc(Rcount, 31, Ralt_count);
2289 2328 br(greater, true, pn, big_shift);
2290 2329 delayed()->dec(Ralt_count);
2291 2330
2292 2331 // shift < 32 bits, Ralt_count = Rcount-31
2293 2332
2294 2333 // We get the transfer bits by shifting left by 32-count the high
2295 2334 // register. This is done by shifting left by 31-count and then by one
2296 2335 // more to take care of the special (rare) case where count is zero
2297 2336 // (shifting by 32 would not work).
2298 2337
2299 2338 neg( Ralt_count );
2300 2339 if (Rcount != Rout_low) {
2301 2340 srl( Rin_low, Rcount, Rout_low );
2302 2341 }
2303 2342
2304 2343 // The order of the next two instructions is critical in the case where
2305 2344 // Rin and Rout are the same and should not be reversed.
2306 2345
2307 2346 sll( Rin_high, Ralt_count, Rxfer_bits ); // shift left by 31-count
2308 2347 srl( Rin_high, Rcount, Rout_high ); // high half
2309 2348 sll( Rxfer_bits, 1, Rxfer_bits ); // shift left by one more
2310 2349 if (Rcount == Rout_low) {
2311 2350 srl( Rin_low, Rcount, Rout_low );
2312 2351 }
2313 2352 ba (false, done);
2314 2353 delayed()->
2315 2354 or3( Rout_low, Rxfer_bits, Rout_low ); // new low value: or shifted old low part and xfer from high
2316 2355
2317 2356 // shift >= 32 bits, Ralt_count = Rcount-32
2318 2357 bind(big_shift);
2319 2358
2320 2359 srl( Rin_high, Ralt_count, Rout_low );
2321 2360 clr( Rout_high );
2322 2361
2323 2362 bind( done );
2324 2363 }
2325 2364
2326 2365 #ifdef _LP64
2327 2366 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
2328 2367 cmp(Ra, Rb);
2329 2368 mov( -1, Rresult);
2330 2369 movcc(equal, false, xcc, 0, Rresult);
2331 2370 movcc(greater, false, xcc, 1, Rresult);
2332 2371 }
2333 2372 #endif
2334 2373
2335 2374
2336 2375 void MacroAssembler::load_sized_value(Address src, Register dst,
2337 2376 size_t size_in_bytes, bool is_signed) {
2338 2377 switch (size_in_bytes) {
2339 2378 case 8: ldx(src, dst); break;
2340 2379 case 4: ld( src, dst); break;
2341 2380 case 2: is_signed ? ldsh(src, dst) : lduh(src, dst); break;
2342 2381 case 1: is_signed ? ldsb(src, dst) : ldub(src, dst); break;
2343 2382 default: ShouldNotReachHere();
2344 2383 }
2345 2384 }
2346 2385
2347 2386
2348 2387 void MacroAssembler::float_cmp( bool is_float, int unordered_result,
2349 2388 FloatRegister Fa, FloatRegister Fb,
2350 2389 Register Rresult) {
2351 2390
2352 2391 fcmp(is_float ? FloatRegisterImpl::S : FloatRegisterImpl::D, fcc0, Fa, Fb);
2353 2392
2354 2393 Condition lt = unordered_result == -1 ? f_unorderedOrLess : f_less;
2355 2394 Condition eq = f_equal;
2356 2395 Condition gt = unordered_result == 1 ? f_unorderedOrGreater : f_greater;
2357 2396
2358 2397 if (VM_Version::v9_instructions_work()) {
2359 2398
2360 2399 mov( -1, Rresult );
2361 2400 movcc( eq, true, fcc0, 0, Rresult );
2362 2401 movcc( gt, true, fcc0, 1, Rresult );
2363 2402
2364 2403 } else {
2365 2404 Label done;
2366 2405
2367 2406 set( -1, Rresult );
2368 2407 //fb(lt, true, pn, done); delayed()->set( -1, Rresult );
2369 2408 fb( eq, true, pn, done); delayed()->set( 0, Rresult );
2370 2409 fb( gt, true, pn, done); delayed()->set( 1, Rresult );
2371 2410
2372 2411 bind (done);
2373 2412 }
2374 2413 }
2375 2414
2376 2415
2377 2416 void MacroAssembler::fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
2378 2417 {
2379 2418 if (VM_Version::v9_instructions_work()) {
2380 2419 Assembler::fneg(w, s, d);
2381 2420 } else {
2382 2421 if (w == FloatRegisterImpl::S) {
2383 2422 Assembler::fneg(w, s, d);
2384 2423 } else if (w == FloatRegisterImpl::D) {
2385 2424 // number() does a sanity check on the alignment.
2386 2425 assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
2387 2426 ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
2388 2427
2389 2428 Assembler::fneg(FloatRegisterImpl::S, s, d);
2390 2429 Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2391 2430 } else {
2392 2431 assert(w == FloatRegisterImpl::Q, "Invalid float register width");
2393 2432
2394 2433 // number() does a sanity check on the alignment.
2395 2434 assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
2396 2435 ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
2397 2436
2398 2437 Assembler::fneg(FloatRegisterImpl::S, s, d);
2399 2438 Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2400 2439 Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
2401 2440 Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
2402 2441 }
2403 2442 }
2404 2443 }
2405 2444
2406 2445 void MacroAssembler::fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
2407 2446 {
2408 2447 if (VM_Version::v9_instructions_work()) {
2409 2448 Assembler::fmov(w, s, d);
2410 2449 } else {
2411 2450 if (w == FloatRegisterImpl::S) {
2412 2451 Assembler::fmov(w, s, d);
2413 2452 } else if (w == FloatRegisterImpl::D) {
2414 2453 // number() does a sanity check on the alignment.
2415 2454 assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
2416 2455 ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
2417 2456
2418 2457 Assembler::fmov(FloatRegisterImpl::S, s, d);
2419 2458 Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2420 2459 } else {
2421 2460 assert(w == FloatRegisterImpl::Q, "Invalid float register width");
2422 2461
2423 2462 // number() does a sanity check on the alignment.
2424 2463 assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
2425 2464 ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
2426 2465
2427 2466 Assembler::fmov(FloatRegisterImpl::S, s, d);
2428 2467 Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2429 2468 Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
2430 2469 Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
2431 2470 }
2432 2471 }
2433 2472 }
2434 2473
2435 2474 void MacroAssembler::fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
2436 2475 {
2437 2476 if (VM_Version::v9_instructions_work()) {
2438 2477 Assembler::fabs(w, s, d);
2439 2478 } else {
2440 2479 if (w == FloatRegisterImpl::S) {
2441 2480 Assembler::fabs(w, s, d);
2442 2481 } else if (w == FloatRegisterImpl::D) {
2443 2482 // number() does a sanity check on the alignment.
2444 2483 assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
2445 2484 ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
2446 2485
2447 2486 Assembler::fabs(FloatRegisterImpl::S, s, d);
2448 2487 Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2449 2488 } else {
2450 2489 assert(w == FloatRegisterImpl::Q, "Invalid float register width");
2451 2490
2452 2491 // number() does a sanity check on the alignment.
2453 2492 assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
2454 2493 ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
2455 2494
2456 2495 Assembler::fabs(FloatRegisterImpl::S, s, d);
2457 2496 Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
2458 2497 Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
2459 2498 Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
2460 2499 }
2461 2500 }
2462 2501 }
2463 2502
2464 2503 void MacroAssembler::save_all_globals_into_locals() {
2465 2504 mov(G1,L1);
2466 2505 mov(G2,L2);
2467 2506 mov(G3,L3);
2468 2507 mov(G4,L4);
2469 2508 mov(G5,L5);
2470 2509 mov(G6,L6);
2471 2510 mov(G7,L7);
2472 2511 }
2473 2512
2474 2513 void MacroAssembler::restore_globals_from_locals() {
2475 2514 mov(L1,G1);
2476 2515 mov(L2,G2);
2477 2516 mov(L3,G3);
2478 2517 mov(L4,G4);
2479 2518 mov(L5,G5);
2480 2519 mov(L6,G6);
2481 2520 mov(L7,G7);
2482 2521 }
2483 2522
2484 2523 // Use for 64 bit operation.
2485 2524 void MacroAssembler::casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm)
2486 2525 {
2487 2526 // store ptr_reg as the new top value
2488 2527 #ifdef _LP64
2489 2528 casx(top_ptr_reg, top_reg, ptr_reg);
2490 2529 #else
2491 2530 cas_under_lock(top_ptr_reg, top_reg, ptr_reg, lock_addr, use_call_vm);
2492 2531 #endif // _LP64
2493 2532 }
2494 2533
2495 2534 // [RGV] This routine does not handle 64 bit operations.
2496 2535 // use casx_under_lock() or casx directly!!!
2497 2536 void MacroAssembler::cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm)
2498 2537 {
2499 2538 // store ptr_reg as the new top value
2500 2539 if (VM_Version::v9_instructions_work()) {
2501 2540 cas(top_ptr_reg, top_reg, ptr_reg);
2502 2541 } else {
2503 2542
2504 2543 // If the register is not an out nor global, it is not visible
2505 2544 // after the save. Allocate a register for it, save its
2506 2545 // value in the register save area (the save may not flush
2507 2546 // registers to the save area).
2508 2547
2509 2548 Register top_ptr_reg_after_save;
2510 2549 Register top_reg_after_save;
2511 2550 Register ptr_reg_after_save;
2512 2551
2513 2552 if (top_ptr_reg->is_out() || top_ptr_reg->is_global()) {
2514 2553 top_ptr_reg_after_save = top_ptr_reg->after_save();
2515 2554 } else {
2516 2555 Address reg_save_addr = top_ptr_reg->address_in_saved_window();
2517 2556 top_ptr_reg_after_save = L0;
2518 2557 st(top_ptr_reg, reg_save_addr);
2519 2558 }
2520 2559
2521 2560 if (top_reg->is_out() || top_reg->is_global()) {
2522 2561 top_reg_after_save = top_reg->after_save();
2523 2562 } else {
2524 2563 Address reg_save_addr = top_reg->address_in_saved_window();
2525 2564 top_reg_after_save = L1;
2526 2565 st(top_reg, reg_save_addr);
2527 2566 }
2528 2567
2529 2568 if (ptr_reg->is_out() || ptr_reg->is_global()) {
2530 2569 ptr_reg_after_save = ptr_reg->after_save();
2531 2570 } else {
2532 2571 Address reg_save_addr = ptr_reg->address_in_saved_window();
2533 2572 ptr_reg_after_save = L2;
2534 2573 st(ptr_reg, reg_save_addr);
2535 2574 }
2536 2575
2537 2576 const Register& lock_reg = L3;
2538 2577 const Register& lock_ptr_reg = L4;
2539 2578 const Register& value_reg = L5;
2540 2579 const Register& yield_reg = L6;
2541 2580 const Register& yieldall_reg = L7;
2542 2581
2543 2582 save_frame();
2544 2583
2545 2584 if (top_ptr_reg_after_save == L0) {
2546 2585 ld(top_ptr_reg->address_in_saved_window().after_save(), top_ptr_reg_after_save);
2547 2586 }
2548 2587
2549 2588 if (top_reg_after_save == L1) {
2550 2589 ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save);
2551 2590 }
2552 2591
2553 2592 if (ptr_reg_after_save == L2) {
2554 2593 ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save);
2555 2594 }
2556 2595
2557 2596 Label(retry_get_lock);
2558 2597 Label(not_same);
2559 2598 Label(dont_yield);
2560 2599
2561 2600 assert(lock_addr, "lock_address should be non null for v8");
2562 2601 set((intptr_t)lock_addr, lock_ptr_reg);
2563 2602 // Initialize yield counter
2564 2603 mov(G0,yield_reg);
2565 2604 mov(G0, yieldall_reg);
2566 2605 set(StubRoutines::Sparc::locked, lock_reg);
2567 2606
2568 2607 bind(retry_get_lock);
2569 2608 cmp(yield_reg, V8AtomicOperationUnderLockSpinCount);
2570 2609 br(Assembler::less, false, Assembler::pt, dont_yield);
2571 2610 delayed()->nop();
2572 2611
2573 2612 if(use_call_vm) {
2574 2613 Untested("Need to verify global reg consistancy");
2575 2614 call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
2576 2615 } else {
2577 2616 // Save the regs and make space for a C call
2578 2617 save(SP, -96, SP);
2579 2618 save_all_globals_into_locals();
2580 2619 call(CAST_FROM_FN_PTR(address,os::yield_all));
2581 2620 delayed()->mov(yieldall_reg, O0);
2582 2621 restore_globals_from_locals();
2583 2622 restore();
2584 2623 }
2585 2624
2586 2625 // reset the counter
2587 2626 mov(G0,yield_reg);
2588 2627 add(yieldall_reg, 1, yieldall_reg);
2589 2628
2590 2629 bind(dont_yield);
2591 2630 // try to get lock
2592 2631 swap(lock_ptr_reg, 0, lock_reg);
2593 2632
2594 2633 // did we get the lock?
2595 2634 cmp(lock_reg, StubRoutines::Sparc::unlocked);
2596 2635 br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
2597 2636 delayed()->add(yield_reg,1,yield_reg);
2598 2637
2599 2638 // yes, got lock. do we have the same top?
2600 2639 ld(top_ptr_reg_after_save, 0, value_reg);
2601 2640 cmp(value_reg, top_reg_after_save);
2602 2641 br(Assembler::notEqual, false, Assembler::pn, not_same);
2603 2642 delayed()->nop();
2604 2643
2605 2644 // yes, same top.
2606 2645 st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
2607 2646 membar(Assembler::StoreStore);
2608 2647
2609 2648 bind(not_same);
2610 2649 mov(value_reg, ptr_reg_after_save);
2611 2650 st(lock_reg, lock_ptr_reg, 0); // unlock
2612 2651
2613 2652 restore();
2614 2653 }
2615 2654 }
2616 2655
2617 2656 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
2618 2657 Register tmp,
2619 2658 int offset) {
2620 2659 intptr_t value = *delayed_value_addr;
2621 2660 if (value != 0)
2622 2661 return RegisterOrConstant(value + offset);
2623 2662
2624 2663 // load indirectly to solve generation ordering problem
2625 2664 AddressLiteral a(delayed_value_addr);
2626 2665 load_ptr_contents(a, tmp);
2627 2666
2628 2667 #ifdef ASSERT
2629 2668 tst(tmp);
2630 2669 breakpoint_trap(zero, xcc);
2631 2670 #endif
2632 2671
2633 2672 if (offset != 0)
2634 2673 add(tmp, offset, tmp);
2635 2674
2636 2675 return RegisterOrConstant(tmp);
2637 2676 }
2638 2677
2639 2678
2640 2679 RegisterOrConstant MacroAssembler::regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) {
2641 2680 assert(d.register_or_noreg() != G0, "lost side effect");
2642 2681 if ((s2.is_constant() && s2.as_constant() == 0) ||
2643 2682 (s2.is_register() && s2.as_register() == G0)) {
2644 2683 // Do nothing, just move value.
2645 2684 if (s1.is_register()) {
2646 2685 if (d.is_constant()) d = temp;
2647 2686 mov(s1.as_register(), d.as_register());
2648 2687 return d;
2649 2688 } else {
2650 2689 return s1;
2651 2690 }
2652 2691 }
2653 2692
2654 2693 if (s1.is_register()) {
2655 2694 assert_different_registers(s1.as_register(), temp);
2656 2695 if (d.is_constant()) d = temp;
2657 2696 andn(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register());
2658 2697 return d;
2659 2698 } else {
2660 2699 if (s2.is_register()) {
2661 2700 assert_different_registers(s2.as_register(), temp);
2662 2701 if (d.is_constant()) d = temp;
2663 2702 set(s1.as_constant(), temp);
2664 2703 andn(temp, s2.as_register(), d.as_register());
2665 2704 return d;
2666 2705 } else {
2667 2706 intptr_t res = s1.as_constant() & ~s2.as_constant();
2668 2707 return res;
2669 2708 }
2670 2709 }
2671 2710 }
2672 2711
2673 2712 RegisterOrConstant MacroAssembler::regcon_inc_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) {
2674 2713 assert(d.register_or_noreg() != G0, "lost side effect");
2675 2714 if ((s2.is_constant() && s2.as_constant() == 0) ||
2676 2715 (s2.is_register() && s2.as_register() == G0)) {
2677 2716 // Do nothing, just move value.
2678 2717 if (s1.is_register()) {
2679 2718 if (d.is_constant()) d = temp;
2680 2719 mov(s1.as_register(), d.as_register());
2681 2720 return d;
2682 2721 } else {
2683 2722 return s1;
2684 2723 }
2685 2724 }
2686 2725
2687 2726 if (s1.is_register()) {
2688 2727 assert_different_registers(s1.as_register(), temp);
2689 2728 if (d.is_constant()) d = temp;
2690 2729 add(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register());
2691 2730 return d;
2692 2731 } else {
2693 2732 if (s2.is_register()) {
2694 2733 assert_different_registers(s2.as_register(), temp);
2695 2734 if (d.is_constant()) d = temp;
2696 2735 add(s2.as_register(), ensure_simm13_or_reg(s1, temp), d.as_register());
2697 2736 return d;
2698 2737 } else {
2699 2738 intptr_t res = s1.as_constant() + s2.as_constant();
2700 2739 return res;
2701 2740 }
2702 2741 }
2703 2742 }
2704 2743
2705 2744 RegisterOrConstant MacroAssembler::regcon_sll_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) {
2706 2745 assert(d.register_or_noreg() != G0, "lost side effect");
2707 2746 if (!is_simm13(s2.constant_or_zero()))
2708 2747 s2 = (s2.as_constant() & 0xFF);
2709 2748 if ((s2.is_constant() && s2.as_constant() == 0) ||
2710 2749 (s2.is_register() && s2.as_register() == G0)) {
2711 2750 // Do nothing, just move value.
2712 2751 if (s1.is_register()) {
2713 2752 if (d.is_constant()) d = temp;
2714 2753 mov(s1.as_register(), d.as_register());
2715 2754 return d;
2716 2755 } else {
2717 2756 return s1;
2718 2757 }
2719 2758 }
2720 2759
2721 2760 if (s1.is_register()) {
2722 2761 assert_different_registers(s1.as_register(), temp);
2723 2762 if (d.is_constant()) d = temp;
2724 2763 sll_ptr(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register());
2725 2764 return d;
2726 2765 } else {
2727 2766 if (s2.is_register()) {
2728 2767 assert_different_registers(s2.as_register(), temp);
2729 2768 if (d.is_constant()) d = temp;
2730 2769 set(s1.as_constant(), temp);
2731 2770 sll_ptr(temp, s2.as_register(), d.as_register());
2732 2771 return d;
2733 2772 } else {
2734 2773 intptr_t res = s1.as_constant() << s2.as_constant();
2735 2774 return res;
2736 2775 }
2737 2776 }
2738 2777 }
2739 2778
2740 2779
2741 2780 // Look up the method for a megamorphic invokeinterface call.
2742 2781 // The target method is determined by <intf_klass, itable_index>.
2743 2782 // The receiver klass is in recv_klass.
2744 2783 // On success, the result will be in method_result, and execution falls through.
2745 2784 // On failure, execution transfers to the given label.
2746 2785 void MacroAssembler::lookup_interface_method(Register recv_klass,
2747 2786 Register intf_klass,
2748 2787 RegisterOrConstant itable_index,
2749 2788 Register method_result,
2750 2789 Register scan_temp,
2751 2790 Register sethi_temp,
2752 2791 Label& L_no_such_interface) {
2753 2792 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
2754 2793 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
2755 2794 "caller must use same register for non-constant itable index as for method");
2756 2795
2757 2796 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2758 2797 int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
2759 2798 int scan_step = itableOffsetEntry::size() * wordSize;
2760 2799 int vte_size = vtableEntry::size() * wordSize;
2761 2800
2762 2801 lduw(recv_klass, instanceKlass::vtable_length_offset() * wordSize, scan_temp);
2763 2802 // %%% We should store the aligned, prescaled offset in the klassoop.
2764 2803 // Then the next several instructions would fold away.
2765 2804
2766 2805 int round_to_unit = ((HeapWordsPerLong > 1) ? BytesPerLong : 0);
2767 2806 int itb_offset = vtable_base;
2768 2807 if (round_to_unit != 0) {
2769 2808 // hoist first instruction of round_to(scan_temp, BytesPerLong):
2770 2809 itb_offset += round_to_unit - wordSize;
2771 2810 }
2772 2811 int itb_scale = exact_log2(vtableEntry::size() * wordSize);
2773 2812 sll(scan_temp, itb_scale, scan_temp);
2774 2813 add(scan_temp, itb_offset, scan_temp);
2775 2814 if (round_to_unit != 0) {
2776 2815 // Round up to align_object_offset boundary
2777 2816 // see code for instanceKlass::start_of_itable!
2778 2817 // Was: round_to(scan_temp, BytesPerLong);
2779 2818 // Hoisted: add(scan_temp, BytesPerLong-1, scan_temp);
2780 2819 and3(scan_temp, -round_to_unit, scan_temp);
2781 2820 }
2782 2821 add(recv_klass, scan_temp, scan_temp);
2783 2822
2784 2823 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
2785 2824 RegisterOrConstant itable_offset = itable_index;
2786 2825 itable_offset = regcon_sll_ptr(itable_index, exact_log2(itableMethodEntry::size() * wordSize), itable_offset);
2787 2826 itable_offset = regcon_inc_ptr(itable_offset, itableMethodEntry::method_offset_in_bytes(), itable_offset);
2788 2827 add(recv_klass, ensure_simm13_or_reg(itable_offset, sethi_temp), recv_klass);
2789 2828
2790 2829 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
2791 2830 // if (scan->interface() == intf) {
2792 2831 // result = (klass + scan->offset() + itable_index);
2793 2832 // }
2794 2833 // }
2795 2834 Label search, found_method;
2796 2835
2797 2836 for (int peel = 1; peel >= 0; peel--) {
2798 2837 // %%%% Could load both offset and interface in one ldx, if they were
2799 2838 // in the opposite order. This would save a load.
2800 2839 ld_ptr(scan_temp, itableOffsetEntry::interface_offset_in_bytes(), method_result);
2801 2840
2802 2841 // Check that this entry is non-null. A null entry means that
2803 2842 // the receiver class doesn't implement the interface, and wasn't the
2804 2843 // same as when the caller was compiled.
2805 2844 bpr(Assembler::rc_z, false, Assembler::pn, method_result, L_no_such_interface);
2806 2845 delayed()->cmp(method_result, intf_klass);
2807 2846
2808 2847 if (peel) {
2809 2848 brx(Assembler::equal, false, Assembler::pt, found_method);
2810 2849 } else {
2811 2850 brx(Assembler::notEqual, false, Assembler::pn, search);
2812 2851 // (invert the test to fall through to found_method...)
2813 2852 }
2814 2853 delayed()->add(scan_temp, scan_step, scan_temp);
2815 2854
2816 2855 if (!peel) break;
2817 2856
2818 2857 bind(search);
2819 2858 }
2820 2859
2821 2860 bind(found_method);
2822 2861
2823 2862 // Got a hit.
2824 2863 int ito_offset = itableOffsetEntry::offset_offset_in_bytes();
2825 2864 // scan_temp[-scan_step] points to the vtable offset we need
2826 2865 ito_offset -= scan_step;
2827 2866 lduw(scan_temp, ito_offset, scan_temp);
2828 2867 ld_ptr(recv_klass, scan_temp, method_result);
2829 2868 }
2830 2869
2831 2870
2832 2871 void MacroAssembler::check_klass_subtype(Register sub_klass,
2833 2872 Register super_klass,
2834 2873 Register temp_reg,
2835 2874 Register temp2_reg,
2836 2875 Label& L_success) {
2837 2876 Label L_failure, L_pop_to_failure;
2838 2877 check_klass_subtype_fast_path(sub_klass, super_klass,
2839 2878 temp_reg, temp2_reg,
2840 2879 &L_success, &L_failure, NULL);
2841 2880 Register sub_2 = sub_klass;
2842 2881 Register sup_2 = super_klass;
2843 2882 if (!sub_2->is_global()) sub_2 = L0;
2844 2883 if (!sup_2->is_global()) sup_2 = L1;
2845 2884
2846 2885 save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
2847 2886 check_klass_subtype_slow_path(sub_2, sup_2,
2848 2887 L2, L3, L4, L5,
2849 2888 NULL, &L_pop_to_failure);
2850 2889
2851 2890 // on success:
2852 2891 restore();
2853 2892 ba(false, L_success);
2854 2893 delayed()->nop();
2855 2894
2856 2895 // on failure:
2857 2896 bind(L_pop_to_failure);
2858 2897 restore();
2859 2898 bind(L_failure);
2860 2899 }
2861 2900
2862 2901
2863 2902 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
2864 2903 Register super_klass,
2865 2904 Register temp_reg,
2866 2905 Register temp2_reg,
2867 2906 Label* L_success,
2868 2907 Label* L_failure,
2869 2908 Label* L_slow_path,
2870 2909 RegisterOrConstant super_check_offset,
2871 2910 Register instanceof_hack) {
2872 2911 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
2873 2912 Klass::secondary_super_cache_offset_in_bytes());
2874 2913 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2875 2914 Klass::super_check_offset_offset_in_bytes());
2876 2915
2877 2916 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
2878 2917 bool need_slow_path = (must_load_sco ||
2879 2918 super_check_offset.constant_or_zero() == sco_offset);
2880 2919
2881 2920 assert_different_registers(sub_klass, super_klass, temp_reg);
2882 2921 if (super_check_offset.is_register()) {
2883 2922 assert_different_registers(sub_klass, super_klass, temp_reg,
2884 2923 super_check_offset.as_register());
2885 2924 } else if (must_load_sco) {
2886 2925 assert(temp2_reg != noreg, "supply either a temp or a register offset");
2887 2926 }
2888 2927
2889 2928 Label L_fallthrough;
2890 2929 int label_nulls = 0;
2891 2930 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
2892 2931 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
2893 2932 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
2894 2933 assert(label_nulls <= 1 || instanceof_hack != noreg ||
2895 2934 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
2896 2935 "at most one NULL in the batch, usually");
2897 2936
2898 2937 // Support for the instanceof hack, which uses delay slots to
2899 2938 // set a destination register to zero or one.
2900 2939 bool do_bool_sets = (instanceof_hack != noreg);
2901 2940 #define BOOL_SET(bool_value) \
2902 2941 if (do_bool_sets && bool_value >= 0) \
2903 2942 set(bool_value, instanceof_hack)
2904 2943 #define DELAYED_BOOL_SET(bool_value) \
2905 2944 if (do_bool_sets && bool_value >= 0) \
2906 2945 delayed()->set(bool_value, instanceof_hack); \
2907 2946 else delayed()->nop()
2908 2947 // Hacked ba(), which may only be used just before L_fallthrough.
2909 2948 #define FINAL_JUMP(label, bool_value) \
2910 2949 if (&(label) == &L_fallthrough) { \
2911 2950 BOOL_SET(bool_value); \
2912 2951 } else { \
2913 2952 ba((do_bool_sets && bool_value >= 0), label); \
2914 2953 DELAYED_BOOL_SET(bool_value); \
2915 2954 }
2916 2955
2917 2956 // If the pointers are equal, we are done (e.g., String[] elements).
2918 2957 // This self-check enables sharing of secondary supertype arrays among
2919 2958 // non-primary types such as array-of-interface. Otherwise, each such
2920 2959 // type would need its own customized SSA.
2921 2960 // We move this check to the front of the fast path because many
2922 2961 // type checks are in fact trivially successful in this manner,
2923 2962 // so we get a nicely predicted branch right at the start of the check.
2924 2963 cmp(super_klass, sub_klass);
2925 2964 brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
2926 2965 DELAYED_BOOL_SET(1);
2927 2966
2928 2967 // Check the supertype display:
2929 2968 if (must_load_sco) {
2930 2969 // The super check offset is always positive...
2931 2970 lduw(super_klass, sco_offset, temp2_reg);
2932 2971 super_check_offset = RegisterOrConstant(temp2_reg);
2933 2972 // super_check_offset is register.
2934 2973 assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset.as_register());
2935 2974 }
2936 2975 ld_ptr(sub_klass, super_check_offset, temp_reg);
2937 2976 cmp(super_klass, temp_reg);
2938 2977
2939 2978 // This check has worked decisively for primary supers.
2940 2979 // Secondary supers are sought in the super_cache ('super_cache_addr').
2941 2980 // (Secondary supers are interfaces and very deeply nested subtypes.)
2942 2981 // This works in the same check above because of a tricky aliasing
2943 2982 // between the super_cache and the primary super display elements.
2944 2983 // (The 'super_check_addr' can address either, as the case requires.)
2945 2984 // Note that the cache is updated below if it does not help us find
2946 2985 // what we need immediately.
2947 2986 // So if it was a primary super, we can just fail immediately.
2948 2987 // Otherwise, it's the slow path for us (no success at this point).
2949 2988
2950 2989 if (super_check_offset.is_register()) {
2951 2990 brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
2952 2991 delayed(); if (do_bool_sets) BOOL_SET(1);
2953 2992 // if !do_bool_sets, sneak the next cmp into the delay slot:
2954 2993 cmp(super_check_offset.as_register(), sc_offset);
2955 2994
2956 2995 if (L_failure == &L_fallthrough) {
2957 2996 brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path);
2958 2997 delayed()->nop();
2959 2998 BOOL_SET(0); // fallthrough on failure
2960 2999 } else {
2961 3000 brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
2962 3001 DELAYED_BOOL_SET(0);
2963 3002 FINAL_JUMP(*L_slow_path, -1); // -1 => vanilla delay slot
2964 3003 }
2965 3004 } else if (super_check_offset.as_constant() == sc_offset) {
2966 3005 // Need a slow path; fast failure is impossible.
2967 3006 if (L_slow_path == &L_fallthrough) {
2968 3007 brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
2969 3008 DELAYED_BOOL_SET(1);
2970 3009 } else {
2971 3010 brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
2972 3011 delayed()->nop();
2973 3012 FINAL_JUMP(*L_success, 1);
2974 3013 }
2975 3014 } else {
2976 3015 // No slow path; it's a fast decision.
2977 3016 if (L_failure == &L_fallthrough) {
2978 3017 brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
2979 3018 DELAYED_BOOL_SET(1);
2980 3019 BOOL_SET(0);
2981 3020 } else {
2982 3021 brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
2983 3022 DELAYED_BOOL_SET(0);
2984 3023 FINAL_JUMP(*L_success, 1);
2985 3024 }
2986 3025 }
2987 3026
2988 3027 bind(L_fallthrough);
2989 3028
2990 3029 #undef final_jump
2991 3030 #undef bool_set
2992 3031 #undef DELAYED_BOOL_SET
2993 3032 #undef final_jump
2994 3033 }
2995 3034
2996 3035
2997 3036 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
2998 3037 Register super_klass,
2999 3038 Register count_temp,
3000 3039 Register scan_temp,
3001 3040 Register scratch_reg,
3002 3041 Register coop_reg,
3003 3042 Label* L_success,
3004 3043 Label* L_failure) {
3005 3044 assert_different_registers(sub_klass, super_klass,
3006 3045 count_temp, scan_temp, scratch_reg, coop_reg);
3007 3046
3008 3047 Label L_fallthrough, L_loop;
3009 3048 int label_nulls = 0;
3010 3049 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3011 3050 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3012 3051 assert(label_nulls <= 1, "at most one NULL in the batch");
3013 3052
3014 3053 // a couple of useful fields in sub_klass:
3015 3054 int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
3016 3055 Klass::secondary_supers_offset_in_bytes());
3017 3056 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
3018 3057 Klass::secondary_super_cache_offset_in_bytes());
3019 3058
3020 3059 // Do a linear scan of the secondary super-klass chain.
3021 3060 // This code is rarely used, so simplicity is a virtue here.
3022 3061
3023 3062 #ifndef PRODUCT
3024 3063 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3025 3064 inc_counter((address) pst_counter, count_temp, scan_temp);
3026 3065 #endif
3027 3066
3028 3067 // We will consult the secondary-super array.
3029 3068 ld_ptr(sub_klass, ss_offset, scan_temp);
3030 3069
3031 3070 // Compress superclass if necessary.
3032 3071 Register search_key = super_klass;
3033 3072 bool decode_super_klass = false;
3034 3073 if (UseCompressedOops) {
3035 3074 if (coop_reg != noreg) {
3036 3075 encode_heap_oop_not_null(super_klass, coop_reg);
3037 3076 search_key = coop_reg;
3038 3077 } else {
3039 3078 encode_heap_oop_not_null(super_klass);
3040 3079 decode_super_klass = true; // scarce temps!
3041 3080 }
3042 3081 // The superclass is never null; it would be a basic system error if a null
3043 3082 // pointer were to sneak in here. Note that we have already loaded the
3044 3083 // Klass::super_check_offset from the super_klass in the fast path,
3045 3084 // so if there is a null in that register, we are already in the afterlife.
3046 3085 }
3047 3086
3048 3087 // Load the array length. (Positive movl does right thing on LP64.)
3049 3088 lduw(scan_temp, arrayOopDesc::length_offset_in_bytes(), count_temp);
3050 3089
3051 3090 // Check for empty secondary super list
3052 3091 tst(count_temp);
3053 3092
3054 3093 // Top of search loop
3055 3094 bind(L_loop);
3056 3095 br(Assembler::equal, false, Assembler::pn, *L_failure);
3057 3096 delayed()->add(scan_temp, heapOopSize, scan_temp);
3058 3097 assert(heapOopSize != 0, "heapOopSize should be initialized");
3059 3098
3060 3099 // Skip the array header in all array accesses.
3061 3100 int elem_offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
3062 3101 elem_offset -= heapOopSize; // the scan pointer was pre-incremented also
3063 3102
3064 3103 // Load next super to check
3065 3104 if (UseCompressedOops) {
3066 3105 // Don't use load_heap_oop; we don't want to decode the element.
3067 3106 lduw( scan_temp, elem_offset, scratch_reg );
3068 3107 } else {
3069 3108 ld_ptr( scan_temp, elem_offset, scratch_reg );
3070 3109 }
3071 3110
3072 3111 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
3073 3112 cmp(scratch_reg, search_key);
3074 3113
3075 3114 // A miss means we are NOT a subtype and need to keep looping
3076 3115 brx(Assembler::notEqual, false, Assembler::pn, L_loop);
3077 3116 delayed()->deccc(count_temp); // decrement trip counter in delay slot
3078 3117
3079 3118 // Falling out the bottom means we found a hit; we ARE a subtype
3080 3119 if (decode_super_klass) decode_heap_oop(super_klass);
3081 3120
3082 3121 // Success. Cache the super we found and proceed in triumph.
3083 3122 st_ptr(super_klass, sub_klass, sc_offset);
3084 3123
3085 3124 if (L_success != &L_fallthrough) {
3086 3125 ba(false, *L_success);
3087 3126 delayed()->nop();
3088 3127 }
3089 3128
3090 3129 bind(L_fallthrough);
3091 3130 }
3092 3131
3093 3132
3094 3133 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
3095 3134 Register temp_reg,
3096 3135 Label& wrong_method_type) {
3097 3136 assert_different_registers(mtype_reg, mh_reg, temp_reg);
3098 3137 // compare method type against that of the receiver
3099 3138 RegisterOrConstant mhtype_offset = delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg);
3100 3139 load_heap_oop(mh_reg, mhtype_offset, temp_reg);
3101 3140 cmp(temp_reg, mtype_reg);
3102 3141 br(Assembler::notEqual, false, Assembler::pn, wrong_method_type);
3103 3142 delayed()->nop();
3104 3143 }
3105 3144
3106 3145
3107 3146 // A method handle has a "vmslots" field which gives the size of its
3108 3147 // argument list in JVM stack slots. This field is either located directly
3109 3148 // in every method handle, or else is indirectly accessed through the
3110 3149 // method handle's MethodType. This macro hides the distinction.
3111 3150 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
3112 3151 Register temp_reg) {
3113 3152 assert_different_registers(vmslots_reg, mh_reg, temp_reg);
3114 3153 // load mh.type.form.vmslots
3115 3154 if (java_dyn_MethodHandle::vmslots_offset_in_bytes() != 0) {
3116 3155 // hoist vmslots into every mh to avoid dependent load chain
3117 3156 ld( Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
3118 3157 } else {
3119 3158 Register temp2_reg = vmslots_reg;
3120 3159 load_heap_oop(Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg)), temp2_reg);
3121 3160 load_heap_oop(Address(temp2_reg, delayed_value(java_dyn_MethodType::form_offset_in_bytes, temp_reg)), temp2_reg);
3122 3161 ld( Address(temp2_reg, delayed_value(java_dyn_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
3123 3162 }
3124 3163 }
3125 3164
3126 3165
3127 3166 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg, bool emit_delayed_nop) {
3128 3167 assert(mh_reg == G3_method_handle, "caller must put MH object in G3");
3129 3168 assert_different_registers(mh_reg, temp_reg);
3130 3169
3131 3170 // pick out the interpreted side of the handler
3132 3171 // NOTE: vmentry is not an oop!
3133 3172 ld_ptr(mh_reg, delayed_value(java_dyn_MethodHandle::vmentry_offset_in_bytes, temp_reg), temp_reg);
3134 3173
3135 3174 // off we go...
3136 3175 ld_ptr(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes(), temp_reg);
3137 3176 jmp(temp_reg, 0);
3138 3177
3139 3178 // for the various stubs which take control at this point,
3140 3179 // see MethodHandles::generate_method_handle_stub
3141 3180
3142 3181 // Some callers can fill the delay slot.
3143 3182 if (emit_delayed_nop) {
3144 3183 delayed()->nop();
3145 3184 }
3146 3185 }
3147 3186
3148 3187
3149 3188 RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot,
3150 3189 int extra_slot_offset) {
3151 3190 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
3152 3191 int stackElementSize = Interpreter::stackElementSize;
3153 3192 int offset = extra_slot_offset * stackElementSize;
3154 3193 if (arg_slot.is_constant()) {
3155 3194 offset += arg_slot.as_constant() * stackElementSize;
3156 3195 return offset;
3157 3196 } else {
3158 3197 Register temp = arg_slot.as_register();
3159 3198 sll_ptr(temp, exact_log2(stackElementSize), temp);
3160 3199 if (offset != 0)
3161 3200 add(temp, offset, temp);
3162 3201 return temp;
3163 3202 }
3164 3203 }
3165 3204
3166 3205
3167 3206 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
3168 3207 int extra_slot_offset) {
3169 3208 return Address(Gargs, argument_offset(arg_slot, extra_slot_offset));
3170 3209 }
3171 3210
3172 3211
3173 3212 void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
3174 3213 Register temp_reg,
3175 3214 Label& done, Label* slow_case,
3176 3215 BiasedLockingCounters* counters) {
3177 3216 assert(UseBiasedLocking, "why call this otherwise?");
3178 3217
3179 3218 if (PrintBiasedLockingStatistics) {
3180 3219 assert_different_registers(obj_reg, mark_reg, temp_reg, O7);
3181 3220 if (counters == NULL)
3182 3221 counters = BiasedLocking::counters();
3183 3222 }
3184 3223
3185 3224 Label cas_label;
3186 3225
3187 3226 // Biased locking
3188 3227 // See whether the lock is currently biased toward our thread and
3189 3228 // whether the epoch is still valid
3190 3229 // Note that the runtime guarantees sufficient alignment of JavaThread
3191 3230 // pointers to allow age to be placed into low bits
3192 3231 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
3193 3232 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3194 3233 cmp(temp_reg, markOopDesc::biased_lock_pattern);
3195 3234 brx(Assembler::notEqual, false, Assembler::pn, cas_label);
3196 3235 delayed()->nop();
3197 3236
3198 3237 load_klass(obj_reg, temp_reg);
3199 3238 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3200 3239 or3(G2_thread, temp_reg, temp_reg);
3201 3240 xor3(mark_reg, temp_reg, temp_reg);
3202 3241 andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
3203 3242 if (counters != NULL) {
3204 3243 cond_inc(Assembler::equal, (address) counters->biased_lock_entry_count_addr(), mark_reg, temp_reg);
3205 3244 // Reload mark_reg as we may need it later
3206 3245 ld_ptr(Address(obj_reg, oopDesc::mark_offset_in_bytes()), mark_reg);
3207 3246 }
3208 3247 brx(Assembler::equal, true, Assembler::pt, done);
3209 3248 delayed()->nop();
3210 3249
3211 3250 Label try_revoke_bias;
3212 3251 Label try_rebias;
3213 3252 Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
3214 3253 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3215 3254
3216 3255 // At this point we know that the header has the bias pattern and
3217 3256 // that we are not the bias owner in the current epoch. We need to
3218 3257 // figure out more details about the state of the header in order to
3219 3258 // know what operations can be legally performed on the object's
3220 3259 // header.
3221 3260
3222 3261 // If the low three bits in the xor result aren't clear, that means
3223 3262 // the prototype header is no longer biased and we have to revoke
3224 3263 // the bias on this object.
3225 3264 btst(markOopDesc::biased_lock_mask_in_place, temp_reg);
3226 3265 brx(Assembler::notZero, false, Assembler::pn, try_revoke_bias);
3227 3266
3228 3267 // Biasing is still enabled for this data type. See whether the
3229 3268 // epoch of the current bias is still valid, meaning that the epoch
3230 3269 // bits of the mark word are equal to the epoch bits of the
3231 3270 // prototype header. (Note that the prototype header's epoch bits
3232 3271 // only change at a safepoint.) If not, attempt to rebias the object
3233 3272 // toward the current thread. Note that we must be absolutely sure
3234 3273 // that the current epoch is invalid in order to do this because
3235 3274 // otherwise the manipulations it performs on the mark word are
3236 3275 // illegal.
3237 3276 delayed()->btst(markOopDesc::epoch_mask_in_place, temp_reg);
3238 3277 brx(Assembler::notZero, false, Assembler::pn, try_rebias);
3239 3278
3240 3279 // The epoch of the current bias is still valid but we know nothing
3241 3280 // about the owner; it might be set or it might be clear. Try to
3242 3281 // acquire the bias of the object using an atomic operation. If this
3243 3282 // fails we will go in to the runtime to revoke the object's bias.
3244 3283 // Note that we first construct the presumed unbiased header so we
3245 3284 // don't accidentally blow away another thread's valid bias.
3246 3285 delayed()->and3(mark_reg,
3247 3286 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
3248 3287 mark_reg);
3249 3288 or3(G2_thread, mark_reg, temp_reg);
3250 3289 casn(mark_addr.base(), mark_reg, temp_reg);
3251 3290 // If the biasing toward our thread failed, this means that
3252 3291 // another thread succeeded in biasing it toward itself and we
3253 3292 // need to revoke that bias. The revocation will occur in the
3254 3293 // interpreter runtime in the slow case.
3255 3294 cmp(mark_reg, temp_reg);
3256 3295 if (counters != NULL) {
3257 3296 cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg);
3258 3297 }
3259 3298 if (slow_case != NULL) {
3260 3299 brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3261 3300 delayed()->nop();
3262 3301 }
3263 3302 br(Assembler::always, false, Assembler::pt, done);
3264 3303 delayed()->nop();
3265 3304
3266 3305 bind(try_rebias);
3267 3306 // At this point we know the epoch has expired, meaning that the
3268 3307 // current "bias owner", if any, is actually invalid. Under these
3269 3308 // circumstances _only_, we are allowed to use the current header's
3270 3309 // value as the comparison value when doing the cas to acquire the
3271 3310 // bias in the current epoch. In other words, we allow transfer of
3272 3311 // the bias from one thread to another directly in this situation.
3273 3312 //
3274 3313 // FIXME: due to a lack of registers we currently blow away the age
3275 3314 // bits in this situation. Should attempt to preserve them.
3276 3315 load_klass(obj_reg, temp_reg);
3277 3316 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3278 3317 or3(G2_thread, temp_reg, temp_reg);
3279 3318 casn(mark_addr.base(), mark_reg, temp_reg);
3280 3319 // If the biasing toward our thread failed, this means that
3281 3320 // another thread succeeded in biasing it toward itself and we
3282 3321 // need to revoke that bias. The revocation will occur in the
3283 3322 // interpreter runtime in the slow case.
3284 3323 cmp(mark_reg, temp_reg);
3285 3324 if (counters != NULL) {
3286 3325 cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg);
3287 3326 }
3288 3327 if (slow_case != NULL) {
3289 3328 brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3290 3329 delayed()->nop();
3291 3330 }
3292 3331 br(Assembler::always, false, Assembler::pt, done);
3293 3332 delayed()->nop();
3294 3333
3295 3334 bind(try_revoke_bias);
3296 3335 // The prototype mark in the klass doesn't have the bias bit set any
3297 3336 // more, indicating that objects of this data type are not supposed
3298 3337 // to be biased any more. We are going to try to reset the mark of
3299 3338 // this object to the prototype value and fall through to the
3300 3339 // CAS-based locking scheme. Note that if our CAS fails, it means
3301 3340 // that another thread raced us for the privilege of revoking the
3302 3341 // bias of this particular object, so it's okay to continue in the
3303 3342 // normal locking code.
3304 3343 //
3305 3344 // FIXME: due to a lack of registers we currently blow away the age
3306 3345 // bits in this situation. Should attempt to preserve them.
3307 3346 load_klass(obj_reg, temp_reg);
3308 3347 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3309 3348 casn(mark_addr.base(), mark_reg, temp_reg);
3310 3349 // Fall through to the normal CAS-based lock, because no matter what
3311 3350 // the result of the above CAS, some thread must have succeeded in
3312 3351 // removing the bias bit from the object's header.
3313 3352 if (counters != NULL) {
3314 3353 cmp(mark_reg, temp_reg);
3315 3354 cond_inc(Assembler::zero, (address) counters->revoked_lock_entry_count_addr(), mark_reg, temp_reg);
3316 3355 }
3317 3356
3318 3357 bind(cas_label);
3319 3358 }
3320 3359
3321 3360 void MacroAssembler::biased_locking_exit (Address mark_addr, Register temp_reg, Label& done,
3322 3361 bool allow_delay_slot_filling) {
3323 3362 // Check for biased locking unlock case, which is a no-op
3324 3363 // Note: we do not have to check the thread ID for two reasons.
3325 3364 // First, the interpreter checks for IllegalMonitorStateException at
3326 3365 // a higher level. Second, if the bias was revoked while we held the
3327 3366 // lock, the object could not be rebiased toward another thread, so
3328 3367 // the bias bit would be clear.
3329 3368 ld_ptr(mark_addr, temp_reg);
3330 3369 and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3331 3370 cmp(temp_reg, markOopDesc::biased_lock_pattern);
3332 3371 brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done);
3333 3372 delayed();
3334 3373 if (!allow_delay_slot_filling) {
3335 3374 nop();
3336 3375 }
3337 3376 }
3338 3377
3339 3378
3340 3379 // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
3341 3380 // Solaris/SPARC's "as". Another apt name would be cas_ptr()
3342 3381
3343 3382 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
3344 3383 casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()) ;
3345 3384 }
3346 3385
3347 3386
3348 3387
3349 3388 // compiler_lock_object() and compiler_unlock_object() are direct transliterations
3350 3389 // of i486.ad fast_lock() and fast_unlock(). See those methods for detailed comments.
3351 3390 // The code could be tightened up considerably.
3352 3391 //
3353 3392 // box->dhw disposition - post-conditions at DONE_LABEL.
3354 3393 // - Successful inflated lock: box->dhw != 0.
3355 3394 // Any non-zero value suffices.
3356 3395 // Consider G2_thread, rsp, boxReg, or unused_mark()
3357 3396 // - Successful Stack-lock: box->dhw == mark.
3358 3397 // box->dhw must contain the displaced mark word value
3359 3398 // - Failure -- icc.ZFlag == 0 and box->dhw is undefined.
3360 3399 // The slow-path fast_enter() and slow_enter() operators
3361 3400 // are responsible for setting box->dhw = NonZero (typically ::unused_mark).
3362 3401 // - Biased: box->dhw is undefined
3363 3402 //
3364 3403 // SPARC refworkload performance - specifically jetstream and scimark - are
3365 3404 // extremely sensitive to the size of the code emitted by compiler_lock_object
3366 3405 // and compiler_unlock_object. Critically, the key factor is code size, not path
3367 3406 // length. (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
3368 3407 // effect).
3369 3408
3370 3409
3371 3410 void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
3372 3411 Register Rbox, Register Rscratch,
3373 3412 BiasedLockingCounters* counters,
3374 3413 bool try_bias) {
3375 3414 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3376 3415
3377 3416 verify_oop(Roop);
3378 3417 Label done ;
3379 3418
3380 3419 if (counters != NULL) {
3381 3420 inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
3382 3421 }
3383 3422
3384 3423 if (EmitSync & 1) {
3385 3424 mov (3, Rscratch) ;
3386 3425 st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3387 3426 cmp (SP, G0) ;
3388 3427 return ;
3389 3428 }
3390 3429
3391 3430 if (EmitSync & 2) {
3392 3431
3393 3432 // Fetch object's markword
3394 3433 ld_ptr(mark_addr, Rmark);
3395 3434
3396 3435 if (try_bias) {
3397 3436 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3398 3437 }
3399 3438
3400 3439 // Save Rbox in Rscratch to be used for the cas operation
3401 3440 mov(Rbox, Rscratch);
3402 3441
3403 3442 // set Rmark to markOop | markOopDesc::unlocked_value
3404 3443 or3(Rmark, markOopDesc::unlocked_value, Rmark);
3405 3444
3406 3445 // Initialize the box. (Must happen before we update the object mark!)
3407 3446 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3408 3447
3409 3448 // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
3410 3449 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3411 3450 casx_under_lock(mark_addr.base(), Rmark, Rscratch,
3412 3451 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3413 3452
3414 3453 // if compare/exchange succeeded we found an unlocked object and we now have locked it
3415 3454 // hence we are done
3416 3455 cmp(Rmark, Rscratch);
3417 3456 #ifdef _LP64
3418 3457 sub(Rscratch, STACK_BIAS, Rscratch);
3419 3458 #endif
3420 3459 brx(Assembler::equal, false, Assembler::pt, done);
3421 3460 delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot
3422 3461
3423 3462 // we did not find an unlocked object so see if this is a recursive case
3424 3463 // sub(Rscratch, SP, Rscratch);
3425 3464 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3426 3465 andcc(Rscratch, 0xfffff003, Rscratch);
3427 3466 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3428 3467 bind (done) ;
3429 3468 return ;
3430 3469 }
3431 3470
3432 3471 Label Egress ;
3433 3472
3434 3473 if (EmitSync & 256) {
3435 3474 Label IsInflated ;
3436 3475
3437 3476 ld_ptr (mark_addr, Rmark); // fetch obj->mark
3438 3477 // Triage: biased, stack-locked, neutral, inflated
3439 3478 if (try_bias) {
3440 3479 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3441 3480 // Invariant: if control reaches this point in the emitted stream
3442 3481 // then Rmark has not been modified.
3443 3482 }
3444 3483
3445 3484 // Store mark into displaced mark field in the on-stack basic-lock "box"
3446 3485 // Critically, this must happen before the CAS
3447 3486 // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
3448 3487 st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3449 3488 andcc (Rmark, 2, G0) ;
3450 3489 brx (Assembler::notZero, false, Assembler::pn, IsInflated) ;
3451 3490 delayed() ->
3452 3491
3453 3492 // Try stack-lock acquisition.
3454 3493 // Beware: the 1st instruction is in a delay slot
3455 3494 mov (Rbox, Rscratch);
3456 3495 or3 (Rmark, markOopDesc::unlocked_value, Rmark);
3457 3496 assert (mark_addr.disp() == 0, "cas must take a zero displacement");
3458 3497 casn (mark_addr.base(), Rmark, Rscratch) ;
3459 3498 cmp (Rmark, Rscratch);
3460 3499 brx (Assembler::equal, false, Assembler::pt, done);
3461 3500 delayed()->sub(Rscratch, SP, Rscratch);
3462 3501
3463 3502 // Stack-lock attempt failed - check for recursive stack-lock.
3464 3503 // See the comments below about how we might remove this case.
3465 3504 #ifdef _LP64
3466 3505 sub (Rscratch, STACK_BIAS, Rscratch);
3467 3506 #endif
3468 3507 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3469 3508 andcc (Rscratch, 0xfffff003, Rscratch);
3470 3509 br (Assembler::always, false, Assembler::pt, done) ;
3471 3510 delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3472 3511
3473 3512 bind (IsInflated) ;
3474 3513 if (EmitSync & 64) {
3475 3514 // If m->owner != null goto IsLocked
3476 3515 // Pessimistic form: Test-and-CAS vs CAS
3477 3516 // The optimistic form avoids RTS->RTO cache line upgrades.
3478 3517 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3479 3518 andcc (Rscratch, Rscratch, G0) ;
3480 3519 brx (Assembler::notZero, false, Assembler::pn, done) ;
3481 3520 delayed()->nop() ;
3482 3521 // m->owner == null : it's unlocked.
3483 3522 }
3484 3523
3485 3524 // Try to CAS m->owner from null to Self
3486 3525 // Invariant: if we acquire the lock then _recursions should be 0.
3487 3526 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
3488 3527 mov (G2_thread, Rscratch) ;
3489 3528 casn (Rmark, G0, Rscratch) ;
3490 3529 cmp (Rscratch, G0) ;
3491 3530 // Intentional fall-through into done
3492 3531 } else {
3493 3532 // Aggressively avoid the Store-before-CAS penalty
3494 3533 // Defer the store into box->dhw until after the CAS
3495 3534 Label IsInflated, Recursive ;
3496 3535
3497 3536 // Anticipate CAS -- Avoid RTS->RTO upgrade
3498 3537 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
3499 3538
3500 3539 ld_ptr (mark_addr, Rmark); // fetch obj->mark
3501 3540 // Triage: biased, stack-locked, neutral, inflated
3502 3541
3503 3542 if (try_bias) {
3504 3543 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3505 3544 // Invariant: if control reaches this point in the emitted stream
3506 3545 // then Rmark has not been modified.
3507 3546 }
3508 3547 andcc (Rmark, 2, G0) ;
3509 3548 brx (Assembler::notZero, false, Assembler::pn, IsInflated) ;
3510 3549 delayed()-> // Beware - dangling delay-slot
3511 3550
3512 3551 // Try stack-lock acquisition.
3513 3552 // Transiently install BUSY (0) encoding in the mark word.
3514 3553 // if the CAS of 0 into the mark was successful then we execute:
3515 3554 // ST box->dhw = mark -- save fetched mark in on-stack basiclock box
3516 3555 // ST obj->mark = box -- overwrite transient 0 value
3517 3556 // This presumes TSO, of course.
3518 3557
3519 3558 mov (0, Rscratch) ;
3520 3559 or3 (Rmark, markOopDesc::unlocked_value, Rmark);
3521 3560 assert (mark_addr.disp() == 0, "cas must take a zero displacement");
3522 3561 casn (mark_addr.base(), Rmark, Rscratch) ;
3523 3562 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
3524 3563 cmp (Rscratch, Rmark) ;
3525 3564 brx (Assembler::notZero, false, Assembler::pn, Recursive) ;
3526 3565 delayed() ->
3527 3566 st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3528 3567 if (counters != NULL) {
3529 3568 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3530 3569 }
3531 3570 br (Assembler::always, false, Assembler::pt, done);
3532 3571 delayed() ->
3533 3572 st_ptr (Rbox, mark_addr) ;
3534 3573
3535 3574 bind (Recursive) ;
3536 3575 // Stack-lock attempt failed - check for recursive stack-lock.
3537 3576 // Tests show that we can remove the recursive case with no impact
3538 3577 // on refworkload 0.83. If we need to reduce the size of the code
3539 3578 // emitted by compiler_lock_object() the recursive case is perfect
3540 3579 // candidate.
3541 3580 //
3542 3581 // A more extreme idea is to always inflate on stack-lock recursion.
3543 3582 // This lets us eliminate the recursive checks in compiler_lock_object
3544 3583 // and compiler_unlock_object and the (box->dhw == 0) encoding.
3545 3584 // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
3546 3585 // and showed a performance *increase*. In the same experiment I eliminated
3547 3586 // the fast-path stack-lock code from the interpreter and always passed
3548 3587 // control to the "slow" operators in synchronizer.cpp.
3549 3588
3550 3589 // RScratch contains the fetched obj->mark value from the failed CASN.
3551 3590 #ifdef _LP64
3552 3591 sub (Rscratch, STACK_BIAS, Rscratch);
3553 3592 #endif
3554 3593 sub(Rscratch, SP, Rscratch);
3555 3594 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3556 3595 andcc (Rscratch, 0xfffff003, Rscratch);
3557 3596 if (counters != NULL) {
3558 3597 // Accounting needs the Rscratch register
3559 3598 st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3560 3599 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3561 3600 br (Assembler::always, false, Assembler::pt, done) ;
3562 3601 delayed()->nop() ;
3563 3602 } else {
3564 3603 br (Assembler::always, false, Assembler::pt, done) ;
3565 3604 delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3566 3605 }
3567 3606
3568 3607 bind (IsInflated) ;
3569 3608 if (EmitSync & 64) {
3570 3609 // If m->owner != null goto IsLocked
3571 3610 // Test-and-CAS vs CAS
3572 3611 // Pessimistic form avoids futile (doomed) CAS attempts
3573 3612 // The optimistic form avoids RTS->RTO cache line upgrades.
3574 3613 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3575 3614 andcc (Rscratch, Rscratch, G0) ;
3576 3615 brx (Assembler::notZero, false, Assembler::pn, done) ;
3577 3616 delayed()->nop() ;
3578 3617 // m->owner == null : it's unlocked.
3579 3618 }
3580 3619
3581 3620 // Try to CAS m->owner from null to Self
3582 3621 // Invariant: if we acquire the lock then _recursions should be 0.
3583 3622 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
3584 3623 mov (G2_thread, Rscratch) ;
3585 3624 casn (Rmark, G0, Rscratch) ;
3586 3625 cmp (Rscratch, G0) ;
3587 3626 // ST box->displaced_header = NonZero.
3588 3627 // Any non-zero value suffices:
3589 3628 // unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
3590 3629 st_ptr (Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
3591 3630 // Intentional fall-through into done
3592 3631 }
3593 3632
3594 3633 bind (done) ;
3595 3634 }
3596 3635
3597 3636 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
3598 3637 Register Rbox, Register Rscratch,
3599 3638 bool try_bias) {
3600 3639 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3601 3640
3602 3641 Label done ;
3603 3642
3604 3643 if (EmitSync & 4) {
3605 3644 cmp (SP, G0) ;
3606 3645 return ;
3607 3646 }
3608 3647
3609 3648 if (EmitSync & 8) {
3610 3649 if (try_bias) {
3611 3650 biased_locking_exit(mark_addr, Rscratch, done);
3612 3651 }
3613 3652
3614 3653 // Test first if it is a fast recursive unlock
3615 3654 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
3616 3655 cmp(Rmark, G0);
3617 3656 brx(Assembler::equal, false, Assembler::pt, done);
3618 3657 delayed()->nop();
3619 3658
3620 3659 // Check if it is still a light weight lock, this is is true if we see
3621 3660 // the stack address of the basicLock in the markOop of the object
3622 3661 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3623 3662 casx_under_lock(mark_addr.base(), Rbox, Rmark,
3624 3663 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3625 3664 br (Assembler::always, false, Assembler::pt, done);
3626 3665 delayed()->cmp(Rbox, Rmark);
3627 3666 bind (done) ;
3628 3667 return ;
3629 3668 }
3630 3669
3631 3670 // Beware ... If the aggregate size of the code emitted by CLO and CUO is
3632 3671 // is too large performance rolls abruptly off a cliff.
3633 3672 // This could be related to inlining policies, code cache management, or
3634 3673 // I$ effects.
3635 3674 Label LStacked ;
3636 3675
3637 3676 if (try_bias) {
3638 3677 // TODO: eliminate redundant LDs of obj->mark
3639 3678 biased_locking_exit(mark_addr, Rscratch, done);
3640 3679 }
3641 3680
3642 3681 ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ;
3643 3682 ld_ptr (Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
3644 3683 andcc (Rscratch, Rscratch, G0);
3645 3684 brx (Assembler::zero, false, Assembler::pn, done);
3646 3685 delayed()-> nop() ; // consider: relocate fetch of mark, above, into this DS
3647 3686 andcc (Rmark, 2, G0) ;
3648 3687 brx (Assembler::zero, false, Assembler::pt, LStacked) ;
3649 3688 delayed()-> nop() ;
3650 3689
3651 3690 // It's inflated
3652 3691 // Conceptually we need a #loadstore|#storestore "release" MEMBAR before
3653 3692 // the ST of 0 into _owner which releases the lock. This prevents loads
3654 3693 // and stores within the critical section from reordering (floating)
3655 3694 // past the store that releases the lock. But TSO is a strong memory model
3656 3695 // and that particular flavor of barrier is a noop, so we can safely elide it.
3657 3696 // Note that we use 1-0 locking by default for the inflated case. We
3658 3697 // close the resultant (and rare) race by having contented threads in
3659 3698 // monitorenter periodically poll _owner.
3660 3699 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3661 3700 ld_ptr (Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
3662 3701 xor3 (Rscratch, G2_thread, Rscratch) ;
3663 3702 orcc (Rbox, Rscratch, Rbox) ;
3664 3703 brx (Assembler::notZero, false, Assembler::pn, done) ;
3665 3704 delayed()->
3666 3705 ld_ptr (Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
3667 3706 ld_ptr (Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
3668 3707 orcc (Rbox, Rscratch, G0) ;
3669 3708 if (EmitSync & 65536) {
3670 3709 Label LSucc ;
3671 3710 brx (Assembler::notZero, false, Assembler::pn, LSucc) ;
3672 3711 delayed()->nop() ;
3673 3712 br (Assembler::always, false, Assembler::pt, done) ;
3674 3713 delayed()->
3675 3714 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3676 3715
3677 3716 bind (LSucc) ;
3678 3717 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3679 3718 if (os::is_MP()) { membar (StoreLoad) ; }
3680 3719 ld_ptr (Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
3681 3720 andcc (Rscratch, Rscratch, G0) ;
3682 3721 brx (Assembler::notZero, false, Assembler::pt, done) ;
3683 3722 delayed()-> andcc (G0, G0, G0) ;
3684 3723 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
3685 3724 mov (G2_thread, Rscratch) ;
3686 3725 casn (Rmark, G0, Rscratch) ;
3687 3726 cmp (Rscratch, G0) ;
3688 3727 // invert icc.zf and goto done
3689 3728 brx (Assembler::notZero, false, Assembler::pt, done) ;
3690 3729 delayed() -> cmp (G0, G0) ;
3691 3730 br (Assembler::always, false, Assembler::pt, done);
3692 3731 delayed() -> cmp (G0, 1) ;
3693 3732 } else {
3694 3733 brx (Assembler::notZero, false, Assembler::pn, done) ;
3695 3734 delayed()->nop() ;
3696 3735 br (Assembler::always, false, Assembler::pt, done) ;
3697 3736 delayed()->
3698 3737 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3699 3738 }
3700 3739
3701 3740 bind (LStacked) ;
3702 3741 // Consider: we could replace the expensive CAS in the exit
3703 3742 // path with a simple ST of the displaced mark value fetched from
3704 3743 // the on-stack basiclock box. That admits a race where a thread T2
3705 3744 // in the slow lock path -- inflating with monitor M -- could race a
3706 3745 // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
3707 3746 // More precisely T1 in the stack-lock unlock path could "stomp" the
3708 3747 // inflated mark value M installed by T2, resulting in an orphan
3709 3748 // object monitor M and T2 becoming stranded. We can remedy that situation
3710 3749 // by having T2 periodically poll the object's mark word using timed wait
3711 3750 // operations. If T2 discovers that a stomp has occurred it vacates
3712 3751 // the monitor M and wakes any other threads stranded on the now-orphan M.
3713 3752 // In addition the monitor scavenger, which performs deflation,
3714 3753 // would also need to check for orpan monitors and stranded threads.
3715 3754 //
3716 3755 // Finally, inflation is also used when T2 needs to assign a hashCode
3717 3756 // to O and O is stack-locked by T1. The "stomp" race could cause
3718 3757 // an assigned hashCode value to be lost. We can avoid that condition
3719 3758 // and provide the necessary hashCode stability invariants by ensuring
3720 3759 // that hashCode generation is idempotent between copying GCs.
3721 3760 // For example we could compute the hashCode of an object O as
3722 3761 // O's heap address XOR some high quality RNG value that is refreshed
3723 3762 // at GC-time. The monitor scavenger would install the hashCode
3724 3763 // found in any orphan monitors. Again, the mechanism admits a
3725 3764 // lost-update "stomp" WAW race but detects and recovers as needed.
3726 3765 //
3727 3766 // A prototype implementation showed excellent results, although
3728 3767 // the scavenger and timeout code was rather involved.
3729 3768
3730 3769 casn (mark_addr.base(), Rbox, Rscratch) ;
3731 3770 cmp (Rbox, Rscratch);
3732 3771 // Intentional fall through into done ...
3733 3772
3734 3773 bind (done) ;
3735 3774 }
3736 3775
3737 3776
3738 3777
3739 3778 void MacroAssembler::print_CPU_state() {
3740 3779 // %%%%% need to implement this
3741 3780 }
3742 3781
3743 3782 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
3744 3783 // %%%%% need to implement this
3745 3784 }
3746 3785
3747 3786 void MacroAssembler::push_IU_state() {
3748 3787 // %%%%% need to implement this
3749 3788 }
3750 3789
3751 3790
3752 3791 void MacroAssembler::pop_IU_state() {
3753 3792 // %%%%% need to implement this
3754 3793 }
3755 3794
3756 3795
3757 3796 void MacroAssembler::push_FPU_state() {
3758 3797 // %%%%% need to implement this
3759 3798 }
3760 3799
3761 3800
3762 3801 void MacroAssembler::pop_FPU_state() {
3763 3802 // %%%%% need to implement this
3764 3803 }
3765 3804
3766 3805
3767 3806 void MacroAssembler::push_CPU_state() {
3768 3807 // %%%%% need to implement this
3769 3808 }
3770 3809
3771 3810
3772 3811 void MacroAssembler::pop_CPU_state() {
3773 3812 // %%%%% need to implement this
3774 3813 }
3775 3814
3776 3815
3777 3816
3778 3817 void MacroAssembler::verify_tlab() {
3779 3818 #ifdef ASSERT
3780 3819 if (UseTLAB && VerifyOops) {
3781 3820 Label next, next2, ok;
3782 3821 Register t1 = L0;
3783 3822 Register t2 = L1;
3784 3823 Register t3 = L2;
3785 3824
3786 3825 save_frame(0);
3787 3826 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3788 3827 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
3789 3828 or3(t1, t2, t3);
3790 3829 cmp(t1, t2);
3791 3830 br(Assembler::greaterEqual, false, Assembler::pn, next);
3792 3831 delayed()->nop();
3793 3832 stop("assert(top >= start)");
3794 3833 should_not_reach_here();
3795 3834
3796 3835 bind(next);
3797 3836 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3798 3837 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
3799 3838 or3(t3, t2, t3);
3800 3839 cmp(t1, t2);
3801 3840 br(Assembler::lessEqual, false, Assembler::pn, next2);
3802 3841 delayed()->nop();
3803 3842 stop("assert(top <= end)");
3804 3843 should_not_reach_here();
3805 3844
3806 3845 bind(next2);
3807 3846 and3(t3, MinObjAlignmentInBytesMask, t3);
3808 3847 cmp(t3, 0);
3809 3848 br(Assembler::lessEqual, false, Assembler::pn, ok);
3810 3849 delayed()->nop();
3811 3850 stop("assert(aligned)");
3812 3851 should_not_reach_here();
3813 3852
3814 3853 bind(ok);
3815 3854 restore();
3816 3855 }
3817 3856 #endif
3818 3857 }
3819 3858
3820 3859
3821 3860 void MacroAssembler::eden_allocate(
3822 3861 Register obj, // result: pointer to object after successful allocation
3823 3862 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
3824 3863 int con_size_in_bytes, // object size in bytes if known at compile time
3825 3864 Register t1, // temp register
3826 3865 Register t2, // temp register
3827 3866 Label& slow_case // continuation point if fast allocation fails
3828 3867 ){
3829 3868 // make sure arguments make sense
3830 3869 assert_different_registers(obj, var_size_in_bytes, t1, t2);
3831 3870 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
3832 3871 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
3833 3872
3834 3873 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
3835 3874 // No allocation in the shared eden.
3836 3875 br(Assembler::always, false, Assembler::pt, slow_case);
3837 3876 delayed()->nop();
3838 3877 } else {
3839 3878 // get eden boundaries
3840 3879 // note: we need both top & top_addr!
3841 3880 const Register top_addr = t1;
3842 3881 const Register end = t2;
3843 3882
3844 3883 CollectedHeap* ch = Universe::heap();
3845 3884 set((intx)ch->top_addr(), top_addr);
3846 3885 intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
3847 3886 ld_ptr(top_addr, delta, end);
3848 3887 ld_ptr(top_addr, 0, obj);
3849 3888
3850 3889 // try to allocate
3851 3890 Label retry;
3852 3891 bind(retry);
3853 3892 #ifdef ASSERT
3854 3893 // make sure eden top is properly aligned
3855 3894 {
3856 3895 Label L;
3857 3896 btst(MinObjAlignmentInBytesMask, obj);
3858 3897 br(Assembler::zero, false, Assembler::pt, L);
3859 3898 delayed()->nop();
3860 3899 stop("eden top is not properly aligned");
3861 3900 bind(L);
3862 3901 }
3863 3902 #endif // ASSERT
3864 3903 const Register free = end;
3865 3904 sub(end, obj, free); // compute amount of free space
3866 3905 if (var_size_in_bytes->is_valid()) {
3867 3906 // size is unknown at compile time
3868 3907 cmp(free, var_size_in_bytes);
3869 3908 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
3870 3909 delayed()->add(obj, var_size_in_bytes, end);
3871 3910 } else {
3872 3911 // size is known at compile time
3873 3912 cmp(free, con_size_in_bytes);
3874 3913 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
3875 3914 delayed()->add(obj, con_size_in_bytes, end);
3876 3915 }
3877 3916 // Compare obj with the value at top_addr; if still equal, swap the value of
3878 3917 // end with the value at top_addr. If not equal, read the value at top_addr
3879 3918 // into end.
3880 3919 casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3881 3920 // if someone beat us on the allocation, try again, otherwise continue
3882 3921 cmp(obj, end);
3883 3922 brx(Assembler::notEqual, false, Assembler::pn, retry);
3884 3923 delayed()->mov(end, obj); // nop if successfull since obj == end
3885 3924
3886 3925 #ifdef ASSERT
3887 3926 // make sure eden top is properly aligned
3888 3927 {
3889 3928 Label L;
3890 3929 const Register top_addr = t1;
3891 3930
3892 3931 set((intx)ch->top_addr(), top_addr);
3893 3932 ld_ptr(top_addr, 0, top_addr);
3894 3933 btst(MinObjAlignmentInBytesMask, top_addr);
3895 3934 br(Assembler::zero, false, Assembler::pt, L);
3896 3935 delayed()->nop();
3897 3936 stop("eden top is not properly aligned");
3898 3937 bind(L);
3899 3938 }
3900 3939 #endif // ASSERT
3901 3940 }
3902 3941 }
3903 3942
3904 3943
3905 3944 void MacroAssembler::tlab_allocate(
3906 3945 Register obj, // result: pointer to object after successful allocation
3907 3946 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
3908 3947 int con_size_in_bytes, // object size in bytes if known at compile time
3909 3948 Register t1, // temp register
3910 3949 Label& slow_case // continuation point if fast allocation fails
3911 3950 ){
3912 3951 // make sure arguments make sense
3913 3952 assert_different_registers(obj, var_size_in_bytes, t1);
3914 3953 assert(0 <= con_size_in_bytes && is_simm13(con_size_in_bytes), "illegal object size");
3915 3954 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
3916 3955
3917 3956 const Register free = t1;
3918 3957
3919 3958 verify_tlab();
3920 3959
3921 3960 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), obj);
3922 3961
3923 3962 // calculate amount of free space
3924 3963 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), free);
3925 3964 sub(free, obj, free);
3926 3965
3927 3966 Label done;
3928 3967 if (var_size_in_bytes == noreg) {
3929 3968 cmp(free, con_size_in_bytes);
3930 3969 } else {
3931 3970 cmp(free, var_size_in_bytes);
3932 3971 }
3933 3972 br(Assembler::less, false, Assembler::pn, slow_case);
3934 3973 // calculate the new top pointer
3935 3974 if (var_size_in_bytes == noreg) {
3936 3975 delayed()->add(obj, con_size_in_bytes, free);
3937 3976 } else {
3938 3977 delayed()->add(obj, var_size_in_bytes, free);
3939 3978 }
3940 3979
3941 3980 bind(done);
3942 3981
3943 3982 #ifdef ASSERT
3944 3983 // make sure new free pointer is properly aligned
3945 3984 {
3946 3985 Label L;
3947 3986 btst(MinObjAlignmentInBytesMask, free);
3948 3987 br(Assembler::zero, false, Assembler::pt, L);
3949 3988 delayed()->nop();
3950 3989 stop("updated TLAB free is not properly aligned");
3951 3990 bind(L);
3952 3991 }
3953 3992 #endif // ASSERT
3954 3993
3955 3994 // update the tlab top pointer
3956 3995 st_ptr(free, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
3957 3996 verify_tlab();
3958 3997 }
3959 3998
3960 3999
3961 4000 void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
3962 4001 Register top = O0;
3963 4002 Register t1 = G1;
3964 4003 Register t2 = G3;
3965 4004 Register t3 = O1;
3966 4005 assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
3967 4006 Label do_refill, discard_tlab;
3968 4007
3969 4008 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
3970 4009 // No allocation in the shared eden.
3971 4010 br(Assembler::always, false, Assembler::pt, slow_case);
3972 4011 delayed()->nop();
3973 4012 }
3974 4013
3975 4014 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
3976 4015 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
3977 4016 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
3978 4017
3979 4018 // calculate amount of free space
3980 4019 sub(t1, top, t1);
3981 4020 srl_ptr(t1, LogHeapWordSize, t1);
3982 4021
3983 4022 // Retain tlab and allocate object in shared space if
3984 4023 // the amount free in the tlab is too large to discard.
3985 4024 cmp(t1, t2);
3986 4025 brx(Assembler::lessEqual, false, Assembler::pt, discard_tlab);
3987 4026
3988 4027 // increment waste limit to prevent getting stuck on this slow path
3989 4028 delayed()->add(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment(), t2);
3990 4029 st_ptr(t2, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
3991 4030 if (TLABStats) {
3992 4031 // increment number of slow_allocations
3993 4032 ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
3994 4033 add(t2, 1, t2);
3995 4034 stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
3996 4035 }
3997 4036 br(Assembler::always, false, Assembler::pt, try_eden);
3998 4037 delayed()->nop();
3999 4038
4000 4039 bind(discard_tlab);
4001 4040 if (TLABStats) {
4002 4041 // increment number of refills
4003 4042 ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
4004 4043 add(t2, 1, t2);
4005 4044 stw(t2, G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()));
4006 4045 // accumulate wastage
4007 4046 ld(G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()), t2);
4008 4047 add(t2, t1, t2);
4009 4048 stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
4010 4049 }
4011 4050
4012 4051 // if tlab is currently allocated (top or end != null) then
4013 4052 // fill [top, end + alignment_reserve) with array object
4014 4053 br_null(top, false, Assembler::pn, do_refill);
4015 4054 delayed()->nop();
4016 4055
4017 4056 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
4018 4057 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
4019 4058 // set klass to intArrayKlass
4020 4059 sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
4021 4060 add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
4022 4061 sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
4023 4062 st(t1, top, arrayOopDesc::length_offset_in_bytes());
4024 4063 set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
4025 4064 ld_ptr(t2, 0, t2);
4026 4065 // store klass last. concurrent gcs assumes klass length is valid if
4027 4066 // klass field is not null.
4028 4067 store_klass(t2, top);
4029 4068 verify_oop(top);
4030 4069
4031 4070 // refill the tlab with an eden allocation
4032 4071 bind(do_refill);
4033 4072 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
4034 4073 sll_ptr(t1, LogHeapWordSize, t1);
4035 4074 // add object_size ??
4036 4075 eden_allocate(top, t1, 0, t2, t3, slow_case);
4037 4076
4038 4077 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset()));
4039 4078 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
4040 4079 #ifdef ASSERT
4041 4080 // check that tlab_size (t1) is still valid
4042 4081 {
4043 4082 Label ok;
4044 4083 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
4045 4084 sll_ptr(t2, LogHeapWordSize, t2);
4046 4085 cmp(t1, t2);
4047 4086 br(Assembler::equal, false, Assembler::pt, ok);
4048 4087 delayed()->nop();
4049 4088 stop("assert(t1 == tlab_size)");
4050 4089 should_not_reach_here();
4051 4090
4052 4091 bind(ok);
4053 4092 }
4054 4093 #endif // ASSERT
4055 4094 add(top, t1, top); // t1 is tlab_size
4056 4095 sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
4057 4096 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
4058 4097 verify_tlab();
4059 4098 br(Assembler::always, false, Assembler::pt, retry);
4060 4099 delayed()->nop();
4061 4100 }
4062 4101
4063 4102 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
4064 4103 switch (cond) {
4065 4104 // Note some conditions are synonyms for others
4066 4105 case Assembler::never: return Assembler::always;
4067 4106 case Assembler::zero: return Assembler::notZero;
4068 4107 case Assembler::lessEqual: return Assembler::greater;
4069 4108 case Assembler::less: return Assembler::greaterEqual;
4070 4109 case Assembler::lessEqualUnsigned: return Assembler::greaterUnsigned;
4071 4110 case Assembler::lessUnsigned: return Assembler::greaterEqualUnsigned;
4072 4111 case Assembler::negative: return Assembler::positive;
4073 4112 case Assembler::overflowSet: return Assembler::overflowClear;
4074 4113 case Assembler::always: return Assembler::never;
4075 4114 case Assembler::notZero: return Assembler::zero;
4076 4115 case Assembler::greater: return Assembler::lessEqual;
4077 4116 case Assembler::greaterEqual: return Assembler::less;
4078 4117 case Assembler::greaterUnsigned: return Assembler::lessEqualUnsigned;
4079 4118 case Assembler::greaterEqualUnsigned: return Assembler::lessUnsigned;
4080 4119 case Assembler::positive: return Assembler::negative;
4081 4120 case Assembler::overflowClear: return Assembler::overflowSet;
4082 4121 }
4083 4122
4084 4123 ShouldNotReachHere(); return Assembler::overflowClear;
4085 4124 }
4086 4125
4087 4126 void MacroAssembler::cond_inc(Assembler::Condition cond, address counter_ptr,
4088 4127 Register Rtmp1, Register Rtmp2 /*, Register Rtmp3, Register Rtmp4 */) {
4089 4128 Condition negated_cond = negate_condition(cond);
4090 4129 Label L;
4091 4130 brx(negated_cond, false, Assembler::pt, L);
4092 4131 delayed()->nop();
4093 4132 inc_counter(counter_ptr, Rtmp1, Rtmp2);
4094 4133 bind(L);
4095 4134 }
4096 4135
4097 4136 void MacroAssembler::inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2) {
4098 4137 AddressLiteral addrlit(counter_addr);
4099 4138 sethi(addrlit, Rtmp1); // Move hi22 bits into temporary register.
4100 4139 Address addr(Rtmp1, addrlit.low10()); // Build an address with low10 bits.
4101 4140 ld(addr, Rtmp2);
4102 4141 inc(Rtmp2);
4103 4142 st(Rtmp2, addr);
4104 4143 }
4105 4144
4106 4145 void MacroAssembler::inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2) {
4107 4146 inc_counter((address) counter_addr, Rtmp1, Rtmp2);
4108 4147 }
4109 4148
4110 4149 SkipIfEqual::SkipIfEqual(
4111 4150 MacroAssembler* masm, Register temp, const bool* flag_addr,
4112 4151 Assembler::Condition condition) {
4113 4152 _masm = masm;
4114 4153 AddressLiteral flag(flag_addr);
4115 4154 _masm->sethi(flag, temp);
4116 4155 _masm->ldub(temp, flag.low10(), temp);
4117 4156 _masm->tst(temp);
4118 4157 _masm->br(condition, false, Assembler::pt, _label);
4119 4158 _masm->delayed()->nop();
4120 4159 }
4121 4160
4122 4161 SkipIfEqual::~SkipIfEqual() {
4123 4162 _masm->bind(_label);
4124 4163 }
4125 4164
4126 4165
4127 4166 // Writes to stack successive pages until offset reached to check for
4128 4167 // stack overflow + shadow pages. This clobbers tsp and scratch.
4129 4168 void MacroAssembler::bang_stack_size(Register Rsize, Register Rtsp,
4130 4169 Register Rscratch) {
4131 4170 // Use stack pointer in temp stack pointer
4132 4171 mov(SP, Rtsp);
4133 4172
4134 4173 // Bang stack for total size given plus stack shadow page size.
4135 4174 // Bang one page at a time because a large size can overflow yellow and
4136 4175 // red zones (the bang will fail but stack overflow handling can't tell that
4137 4176 // it was a stack overflow bang vs a regular segv).
4138 4177 int offset = os::vm_page_size();
4139 4178 Register Roffset = Rscratch;
4140 4179
4141 4180 Label loop;
4142 4181 bind(loop);
4143 4182 set((-offset)+STACK_BIAS, Rscratch);
4144 4183 st(G0, Rtsp, Rscratch);
4145 4184 set(offset, Roffset);
4146 4185 sub(Rsize, Roffset, Rsize);
4147 4186 cmp(Rsize, G0);
4148 4187 br(Assembler::greater, false, Assembler::pn, loop);
4149 4188 delayed()->sub(Rtsp, Roffset, Rtsp);
4150 4189
4151 4190 // Bang down shadow pages too.
4152 4191 // The -1 because we already subtracted 1 page.
4153 4192 for (int i = 0; i< StackShadowPages-1; i++) {
4154 4193 set((-i*offset)+STACK_BIAS, Rscratch);
4155 4194 st(G0, Rtsp, Rscratch);
4156 4195 }
4157 4196 }
4158 4197
4159 4198 ///////////////////////////////////////////////////////////////////////////////////
4160 4199 #ifndef SERIALGC
4161 4200
4162 4201 static uint num_stores = 0;
4163 4202 static uint num_null_pre_stores = 0;
4164 4203
4165 4204 static void count_null_pre_vals(void* pre_val) {
4166 4205 num_stores++;
4167 4206 if (pre_val == NULL) num_null_pre_stores++;
4168 4207 if ((num_stores % 1000000) == 0) {
4169 4208 tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
4170 4209 num_stores, num_null_pre_stores,
4171 4210 100.0*(float)num_null_pre_stores/(float)num_stores);
4172 4211 }
4173 4212 }
4174 4213
4175 4214 static address satb_log_enqueue_with_frame = 0;
4176 4215 static u_char* satb_log_enqueue_with_frame_end = 0;
4177 4216
4178 4217 static address satb_log_enqueue_frameless = 0;
4179 4218 static u_char* satb_log_enqueue_frameless_end = 0;
4180 4219
4181 4220 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
4182 4221
4183 4222 // The calls to this don't work. We'd need to do a fair amount of work to
4184 4223 // make it work.
4185 4224 static void check_index(int ind) {
4186 4225 assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
4187 4226 "Invariants.");
4188 4227 }
4189 4228
4190 4229 static void generate_satb_log_enqueue(bool with_frame) {
4191 4230 BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
4192 4231 CodeBuffer buf(bb);
4193 4232 MacroAssembler masm(&buf);
4194 4233 address start = masm.pc();
4195 4234 Register pre_val;
4196 4235
4197 4236 Label refill, restart;
4198 4237 if (with_frame) {
4199 4238 masm.save_frame(0);
4200 4239 pre_val = I0; // Was O0 before the save.
4201 4240 } else {
4202 4241 pre_val = O0;
4203 4242 }
4204 4243 int satb_q_index_byte_offset =
4205 4244 in_bytes(JavaThread::satb_mark_queue_offset() +
4206 4245 PtrQueue::byte_offset_of_index());
4207 4246 int satb_q_buf_byte_offset =
4208 4247 in_bytes(JavaThread::satb_mark_queue_offset() +
4209 4248 PtrQueue::byte_offset_of_buf());
4210 4249 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
4211 4250 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
4212 4251 "check sizes in assembly below");
4213 4252
4214 4253 masm.bind(restart);
4215 4254 masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
4216 4255
4217 4256 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
4218 4257 // If the branch is taken, no harm in executing this in the delay slot.
4219 4258 masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
4220 4259 masm.sub(L0, oopSize, L0);
4221 4260
4222 4261 masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0
4223 4262 if (!with_frame) {
4224 4263 // Use return-from-leaf
4225 4264 masm.retl();
4226 4265 masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4227 4266 } else {
4228 4267 // Not delayed.
4229 4268 masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4230 4269 }
4231 4270 if (with_frame) {
4232 4271 masm.ret();
4233 4272 masm.delayed()->restore();
4234 4273 }
4235 4274 masm.bind(refill);
4236 4275
4237 4276 address handle_zero =
4238 4277 CAST_FROM_FN_PTR(address,
4239 4278 &SATBMarkQueueSet::handle_zero_index_for_thread);
4240 4279 // This should be rare enough that we can afford to save all the
4241 4280 // scratch registers that the calling context might be using.
4242 4281 masm.mov(G1_scratch, L0);
4243 4282 masm.mov(G3_scratch, L1);
4244 4283 masm.mov(G4, L2);
4245 4284 // We need the value of O0 above (for the write into the buffer), so we
4246 4285 // save and restore it.
4247 4286 masm.mov(O0, L3);
4248 4287 // Since the call will overwrite O7, we save and restore that, as well.
4249 4288 masm.mov(O7, L4);
4250 4289 masm.call_VM_leaf(L5, handle_zero, G2_thread);
4251 4290 masm.mov(L0, G1_scratch);
4252 4291 masm.mov(L1, G3_scratch);
4253 4292 masm.mov(L2, G4);
4254 4293 masm.mov(L3, O0);
4255 4294 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
4256 4295 masm.delayed()->mov(L4, O7);
4257 4296
4258 4297 if (with_frame) {
4259 4298 satb_log_enqueue_with_frame = start;
4260 4299 satb_log_enqueue_with_frame_end = masm.pc();
4261 4300 } else {
4262 4301 satb_log_enqueue_frameless = start;
4263 4302 satb_log_enqueue_frameless_end = masm.pc();
4264 4303 }
4265 4304 }
4266 4305
4267 4306 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
4268 4307 if (with_frame) {
4269 4308 if (satb_log_enqueue_with_frame == 0) {
4270 4309 generate_satb_log_enqueue(with_frame);
4271 4310 assert(satb_log_enqueue_with_frame != 0, "postcondition.");
4272 4311 if (G1SATBPrintStubs) {
4273 4312 tty->print_cr("Generated with-frame satb enqueue:");
4274 4313 Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
4275 4314 satb_log_enqueue_with_frame_end,
4276 4315 tty);
4277 4316 }
4278 4317 }
4279 4318 } else {
4280 4319 if (satb_log_enqueue_frameless == 0) {
4281 4320 generate_satb_log_enqueue(with_frame);
4282 4321 assert(satb_log_enqueue_frameless != 0, "postcondition.");
4283 4322 if (G1SATBPrintStubs) {
4284 4323 tty->print_cr("Generated frameless satb enqueue:");
4285 4324 Disassembler::decode((u_char*)satb_log_enqueue_frameless,
4286 4325 satb_log_enqueue_frameless_end,
4287 4326 tty);
4288 4327 }
4289 4328 }
4290 4329 }
4291 4330 }
4292 4331
4293 4332 void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
4294 4333 assert(offset == 0 || index == noreg, "choose one");
4295 4334
4296 4335 if (G1DisablePreBarrier) return;
4297 4336 // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
4298 4337 Label filtered;
4299 4338 // satb_log_barrier_work0(tmp, filtered);
4300 4339 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
4301 4340 ld(G2,
4302 4341 in_bytes(JavaThread::satb_mark_queue_offset() +
4303 4342 PtrQueue::byte_offset_of_active()),
4304 4343 tmp);
4305 4344 } else {
4306 4345 guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
4307 4346 "Assumption");
4308 4347 ldsb(G2,
4309 4348 in_bytes(JavaThread::satb_mark_queue_offset() +
4310 4349 PtrQueue::byte_offset_of_active()),
4311 4350 tmp);
4312 4351 }
4313 4352
4314 4353 // Check on whether to annul.
4315 4354 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
4316 4355 delayed() -> nop();
4317 4356
4318 4357 // satb_log_barrier_work1(tmp, offset);
4319 4358 if (index == noreg) {
4320 4359 if (Assembler::is_simm13(offset)) {
4321 4360 load_heap_oop(obj, offset, tmp);
4322 4361 } else {
4323 4362 set(offset, tmp);
4324 4363 load_heap_oop(obj, tmp, tmp);
4325 4364 }
4326 4365 } else {
4327 4366 load_heap_oop(obj, index, tmp);
4328 4367 }
4329 4368
4330 4369 // satb_log_barrier_work2(obj, tmp, offset);
4331 4370
4332 4371 // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
4333 4372
4334 4373 const Register pre_val = tmp;
4335 4374
4336 4375 if (G1SATBBarrierPrintNullPreVals) {
4337 4376 save_frame(0);
4338 4377 mov(pre_val, O0);
4339 4378 // Save G-regs that target may use.
4340 4379 mov(G1, L1);
4341 4380 mov(G2, L2);
4342 4381 mov(G3, L3);
4343 4382 mov(G4, L4);
4344 4383 mov(G5, L5);
4345 4384 call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
4346 4385 delayed()->nop();
4347 4386 // Restore G-regs that target may have used.
4348 4387 mov(L1, G1);
4349 4388 mov(L2, G2);
4350 4389 mov(L3, G3);
4351 4390 mov(L4, G4);
4352 4391 mov(L5, G5);
4353 4392 restore(G0, G0, G0);
4354 4393 }
4355 4394
4356 4395 // Check on whether to annul.
4357 4396 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
4358 4397 delayed() -> nop();
4359 4398
4360 4399 // OK, it's not filtered, so we'll need to call enqueue. In the normal
4361 4400 // case, pre_val will be a scratch G-reg, but there's some cases in which
4362 4401 // it's an O-reg. In the first case, do a normal call. In the latter,
4363 4402 // do a save here and call the frameless version.
4364 4403
4365 4404 guarantee(pre_val->is_global() || pre_val->is_out(),
4366 4405 "Or we need to think harder.");
4367 4406 if (pre_val->is_global() && !preserve_o_regs) {
4368 4407 generate_satb_log_enqueue_if_necessary(true); // with frame.
4369 4408 call(satb_log_enqueue_with_frame);
4370 4409 delayed()->mov(pre_val, O0);
4371 4410 } else {
4372 4411 generate_satb_log_enqueue_if_necessary(false); // with frameless.
4373 4412 save_frame(0);
4374 4413 call(satb_log_enqueue_frameless);
4375 4414 delayed()->mov(pre_val->after_save(), O0);
4376 4415 restore();
4377 4416 }
4378 4417
4379 4418 bind(filtered);
4380 4419 }
4381 4420
4382 4421 static jint num_ct_writes = 0;
4383 4422 static jint num_ct_writes_filtered_in_hr = 0;
4384 4423 static jint num_ct_writes_filtered_null = 0;
4385 4424 static G1CollectedHeap* g1 = NULL;
4386 4425
4387 4426 static Thread* count_ct_writes(void* filter_val, void* new_val) {
4388 4427 Atomic::inc(&num_ct_writes);
4389 4428 if (filter_val == NULL) {
4390 4429 Atomic::inc(&num_ct_writes_filtered_in_hr);
4391 4430 } else if (new_val == NULL) {
4392 4431 Atomic::inc(&num_ct_writes_filtered_null);
4393 4432 } else {
4394 4433 if (g1 == NULL) {
4395 4434 g1 = G1CollectedHeap::heap();
4396 4435 }
4397 4436 }
4398 4437 if ((num_ct_writes % 1000000) == 0) {
4399 4438 jint num_ct_writes_filtered =
4400 4439 num_ct_writes_filtered_in_hr +
4401 4440 num_ct_writes_filtered_null;
4402 4441
4403 4442 tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
4404 4443 " (%5.2f%% intra-HR, %5.2f%% null).",
4405 4444 num_ct_writes,
4406 4445 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
4407 4446 100.0*(float)num_ct_writes_filtered_in_hr/
4408 4447 (float)num_ct_writes,
4409 4448 100.0*(float)num_ct_writes_filtered_null/
4410 4449 (float)num_ct_writes);
4411 4450 }
4412 4451 return Thread::current();
4413 4452 }
4414 4453
4415 4454 static address dirty_card_log_enqueue = 0;
4416 4455 static u_char* dirty_card_log_enqueue_end = 0;
4417 4456
4418 4457 // This gets to assume that o0 contains the object address.
4419 4458 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
4420 4459 BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
4421 4460 CodeBuffer buf(bb);
4422 4461 MacroAssembler masm(&buf);
4423 4462 address start = masm.pc();
4424 4463
4425 4464 Label not_already_dirty, restart, refill;
4426 4465
4427 4466 #ifdef _LP64
4428 4467 masm.srlx(O0, CardTableModRefBS::card_shift, O0);
4429 4468 #else
4430 4469 masm.srl(O0, CardTableModRefBS::card_shift, O0);
4431 4470 #endif
4432 4471 AddressLiteral addrlit(byte_map_base);
4433 4472 masm.set(addrlit, O1); // O1 := <card table base>
4434 4473 masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
4435 4474
4436 4475 masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
4437 4476 O2, not_already_dirty);
4438 4477 // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
4439 4478 // case, harmless if not.
4440 4479 masm.delayed()->add(O0, O1, O3);
4441 4480
4442 4481 // We didn't take the branch, so we're already dirty: return.
4443 4482 // Use return-from-leaf
4444 4483 masm.retl();
4445 4484 masm.delayed()->nop();
4446 4485
4447 4486 // Not dirty.
4448 4487 masm.bind(not_already_dirty);
4449 4488 // First, dirty it.
4450 4489 masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty).
4451 4490 int dirty_card_q_index_byte_offset =
4452 4491 in_bytes(JavaThread::dirty_card_queue_offset() +
4453 4492 PtrQueue::byte_offset_of_index());
4454 4493 int dirty_card_q_buf_byte_offset =
4455 4494 in_bytes(JavaThread::dirty_card_queue_offset() +
4456 4495 PtrQueue::byte_offset_of_buf());
4457 4496 masm.bind(restart);
4458 4497 masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
4459 4498
4460 4499 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
4461 4500 L0, refill);
4462 4501 // If the branch is taken, no harm in executing this in the delay slot.
4463 4502 masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
4464 4503 masm.sub(L0, oopSize, L0);
4465 4504
4466 4505 masm.st_ptr(O3, L1, L0); // [_buf + index] := I0
4467 4506 // Use return-from-leaf
4468 4507 masm.retl();
4469 4508 masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
4470 4509
4471 4510 masm.bind(refill);
4472 4511 address handle_zero =
4473 4512 CAST_FROM_FN_PTR(address,
4474 4513 &DirtyCardQueueSet::handle_zero_index_for_thread);
4475 4514 // This should be rare enough that we can afford to save all the
4476 4515 // scratch registers that the calling context might be using.
4477 4516 masm.mov(G1_scratch, L3);
4478 4517 masm.mov(G3_scratch, L5);
4479 4518 // We need the value of O3 above (for the write into the buffer), so we
4480 4519 // save and restore it.
4481 4520 masm.mov(O3, L6);
4482 4521 // Since the call will overwrite O7, we save and restore that, as well.
4483 4522 masm.mov(O7, L4);
4484 4523
4485 4524 masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
4486 4525 masm.mov(L3, G1_scratch);
4487 4526 masm.mov(L5, G3_scratch);
4488 4527 masm.mov(L6, O3);
4489 4528 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
4490 4529 masm.delayed()->mov(L4, O7);
4491 4530
4492 4531 dirty_card_log_enqueue = start;
4493 4532 dirty_card_log_enqueue_end = masm.pc();
4494 4533 // XXX Should have a guarantee here about not going off the end!
4495 4534 // Does it already do so? Do an experiment...
4496 4535 }
4497 4536
4498 4537 static inline void
4499 4538 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
4500 4539 if (dirty_card_log_enqueue == 0) {
4501 4540 generate_dirty_card_log_enqueue(byte_map_base);
4502 4541 assert(dirty_card_log_enqueue != 0, "postcondition.");
4503 4542 if (G1SATBPrintStubs) {
4504 4543 tty->print_cr("Generated dirty_card enqueue:");
4505 4544 Disassembler::decode((u_char*)dirty_card_log_enqueue,
4506 4545 dirty_card_log_enqueue_end,
4507 4546 tty);
4508 4547 }
4509 4548 }
4510 4549 }
4511 4550
4512 4551
4513 4552 void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
4514 4553
4515 4554 Label filtered;
4516 4555 MacroAssembler* post_filter_masm = this;
4517 4556
4518 4557 if (new_val == G0) return;
4519 4558 if (G1DisablePostBarrier) return;
4520 4559
4521 4560 G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
4522 4561 assert(bs->kind() == BarrierSet::G1SATBCT ||
4523 4562 bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
4524 4563 if (G1RSBarrierRegionFilter) {
4525 4564 xor3(store_addr, new_val, tmp);
4526 4565 #ifdef _LP64
4527 4566 srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
4528 4567 #else
4529 4568 srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
4530 4569 #endif
4531 4570 if (G1PrintCTFilterStats) {
4532 4571 guarantee(tmp->is_global(), "Or stats won't work...");
4533 4572 // This is a sleazy hack: I'm temporarily hijacking G2, which I
4534 4573 // promise to restore.
4535 4574 mov(new_val, G2);
4536 4575 save_frame(0);
4537 4576 mov(tmp, O0);
4538 4577 mov(G2, O1);
4539 4578 // Save G-regs that target may use.
4540 4579 mov(G1, L1);
4541 4580 mov(G2, L2);
4542 4581 mov(G3, L3);
4543 4582 mov(G4, L4);
4544 4583 mov(G5, L5);
4545 4584 call(CAST_FROM_FN_PTR(address, &count_ct_writes));
4546 4585 delayed()->nop();
4547 4586 mov(O0, G2);
4548 4587 // Restore G-regs that target may have used.
4549 4588 mov(L1, G1);
4550 4589 mov(L3, G3);
4551 4590 mov(L4, G4);
4552 4591 mov(L5, G5);
4553 4592 restore(G0, G0, G0);
4554 4593 }
4555 4594 // XXX Should I predict this taken or not? Does it mattern?
4556 4595 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
4557 4596 delayed()->nop();
4558 4597 }
4559 4598
4560 4599 // If the "store_addr" register is an "in" or "local" register, move it to
4561 4600 // a scratch reg so we can pass it as an argument.
4562 4601 bool use_scr = !(store_addr->is_global() || store_addr->is_out());
4563 4602 // Pick a scratch register different from "tmp".
4564 4603 Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
4565 4604 // Make sure we use up the delay slot!
4566 4605 if (use_scr) {
4567 4606 post_filter_masm->mov(store_addr, scr);
4568 4607 } else {
4569 4608 post_filter_masm->nop();
4570 4609 }
4571 4610 generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
4572 4611 save_frame(0);
4573 4612 call(dirty_card_log_enqueue);
4574 4613 if (use_scr) {
4575 4614 delayed()->mov(scr, O0);
4576 4615 } else {
4577 4616 delayed()->mov(store_addr->after_save(), O0);
4578 4617 }
4579 4618 restore();
4580 4619
4581 4620 bind(filtered);
4582 4621
4583 4622 }
4584 4623
4585 4624 #endif // SERIALGC
4586 4625 ///////////////////////////////////////////////////////////////////////////////////
4587 4626
4588 4627 void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
4589 4628 // If we're writing constant NULL, we can skip the write barrier.
4590 4629 if (new_val == G0) return;
4591 4630 CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
4592 4631 assert(bs->kind() == BarrierSet::CardTableModRef ||
4593 4632 bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
4594 4633 card_table_write(bs->byte_map_base, tmp, store_addr);
4595 4634 }
4596 4635
4597 4636 void MacroAssembler::load_klass(Register src_oop, Register klass) {
4598 4637 // The number of bytes in this code is used by
4599 4638 // MachCallDynamicJavaNode::ret_addr_offset()
4600 4639 // if this changes, change that.
4601 4640 if (UseCompressedOops) {
4602 4641 lduw(src_oop, oopDesc::klass_offset_in_bytes(), klass);
4603 4642 decode_heap_oop_not_null(klass);
4604 4643 } else {
4605 4644 ld_ptr(src_oop, oopDesc::klass_offset_in_bytes(), klass);
4606 4645 }
4607 4646 }
4608 4647
4609 4648 void MacroAssembler::store_klass(Register klass, Register dst_oop) {
4610 4649 if (UseCompressedOops) {
4611 4650 assert(dst_oop != klass, "not enough registers");
4612 4651 encode_heap_oop_not_null(klass);
4613 4652 st(klass, dst_oop, oopDesc::klass_offset_in_bytes());
4614 4653 } else {
4615 4654 st_ptr(klass, dst_oop, oopDesc::klass_offset_in_bytes());
4616 4655 }
4617 4656 }
4618 4657
4619 4658 void MacroAssembler::store_klass_gap(Register s, Register d) {
4620 4659 if (UseCompressedOops) {
4621 4660 assert(s != d, "not enough registers");
4622 4661 st(s, d, oopDesc::klass_gap_offset_in_bytes());
4623 4662 }
4624 4663 }
4625 4664
4626 4665 void MacroAssembler::load_heap_oop(const Address& s, Register d) {
4627 4666 if (UseCompressedOops) {
4628 4667 lduw(s, d);
4629 4668 decode_heap_oop(d);
4630 4669 } else {
4631 4670 ld_ptr(s, d);
4632 4671 }
4633 4672 }
4634 4673
4635 4674 void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) {
4636 4675 if (UseCompressedOops) {
4637 4676 lduw(s1, s2, d);
4638 4677 decode_heap_oop(d, d);
4639 4678 } else {
4640 4679 ld_ptr(s1, s2, d);
4641 4680 }
4642 4681 }
4643 4682
4644 4683 void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) {
4645 4684 if (UseCompressedOops) {
4646 4685 lduw(s1, simm13a, d);
4647 4686 decode_heap_oop(d, d);
4648 4687 } else {
4649 4688 ld_ptr(s1, simm13a, d);
4650 4689 }
4651 4690 }
4652 4691
4653 4692 void MacroAssembler::load_heap_oop(Register s1, RegisterOrConstant s2, Register d) {
4654 4693 if (s2.is_constant()) load_heap_oop(s1, s2.as_constant(), d);
4655 4694 else load_heap_oop(s1, s2.as_register(), d);
4656 4695 }
4657 4696
4658 4697 void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) {
4659 4698 if (UseCompressedOops) {
4660 4699 assert(s1 != d && s2 != d, "not enough registers");
4661 4700 encode_heap_oop(d);
4662 4701 st(d, s1, s2);
4663 4702 } else {
4664 4703 st_ptr(d, s1, s2);
4665 4704 }
4666 4705 }
4667 4706
4668 4707 void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) {
4669 4708 if (UseCompressedOops) {
4670 4709 assert(s1 != d, "not enough registers");
4671 4710 encode_heap_oop(d);
4672 4711 st(d, s1, simm13a);
4673 4712 } else {
4674 4713 st_ptr(d, s1, simm13a);
4675 4714 }
4676 4715 }
4677 4716
4678 4717 void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) {
4679 4718 if (UseCompressedOops) {
4680 4719 assert(a.base() != d, "not enough registers");
4681 4720 encode_heap_oop(d);
4682 4721 st(d, a, offset);
4683 4722 } else {
4684 4723 st_ptr(d, a, offset);
4685 4724 }
4686 4725 }
4687 4726
4688 4727
4689 4728 void MacroAssembler::encode_heap_oop(Register src, Register dst) {
4690 4729 assert (UseCompressedOops, "must be compressed");
4691 4730 assert (Universe::heap() != NULL, "java heap should be initialized");
4692 4731 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4693 4732 verify_oop(src);
4694 4733 if (Universe::narrow_oop_base() == NULL) {
4695 4734 srlx(src, LogMinObjAlignmentInBytes, dst);
4696 4735 return;
4697 4736 }
4698 4737 Label done;
4699 4738 if (src == dst) {
4700 4739 // optimize for frequent case src == dst
4701 4740 bpr(rc_nz, true, Assembler::pt, src, done);
4702 4741 delayed() -> sub(src, G6_heapbase, dst); // annuled if not taken
4703 4742 bind(done);
4704 4743 srlx(src, LogMinObjAlignmentInBytes, dst);
4705 4744 } else {
4706 4745 bpr(rc_z, false, Assembler::pn, src, done);
4707 4746 delayed() -> mov(G0, dst);
4708 4747 // could be moved before branch, and annulate delay,
4709 4748 // but may add some unneeded work decoding null
4710 4749 sub(src, G6_heapbase, dst);
4711 4750 srlx(dst, LogMinObjAlignmentInBytes, dst);
4712 4751 bind(done);
4713 4752 }
4714 4753 }
4715 4754
4716 4755
4717 4756 void MacroAssembler::encode_heap_oop_not_null(Register r) {
4718 4757 assert (UseCompressedOops, "must be compressed");
4719 4758 assert (Universe::heap() != NULL, "java heap should be initialized");
4720 4759 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4721 4760 verify_oop(r);
4722 4761 if (Universe::narrow_oop_base() != NULL)
4723 4762 sub(r, G6_heapbase, r);
4724 4763 srlx(r, LogMinObjAlignmentInBytes, r);
4725 4764 }
4726 4765
4727 4766 void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) {
4728 4767 assert (UseCompressedOops, "must be compressed");
4729 4768 assert (Universe::heap() != NULL, "java heap should be initialized");
4730 4769 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4731 4770 verify_oop(src);
4732 4771 if (Universe::narrow_oop_base() == NULL) {
4733 4772 srlx(src, LogMinObjAlignmentInBytes, dst);
4734 4773 } else {
4735 4774 sub(src, G6_heapbase, dst);
4736 4775 srlx(dst, LogMinObjAlignmentInBytes, dst);
4737 4776 }
4738 4777 }
4739 4778
4740 4779 // Same algorithm as oops.inline.hpp decode_heap_oop.
4741 4780 void MacroAssembler::decode_heap_oop(Register src, Register dst) {
4742 4781 assert (UseCompressedOops, "must be compressed");
4743 4782 assert (Universe::heap() != NULL, "java heap should be initialized");
4744 4783 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4745 4784 sllx(src, LogMinObjAlignmentInBytes, dst);
4746 4785 if (Universe::narrow_oop_base() != NULL) {
4747 4786 Label done;
4748 4787 bpr(rc_nz, true, Assembler::pt, dst, done);
4749 4788 delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
4750 4789 bind(done);
4751 4790 }
4752 4791 verify_oop(dst);
4753 4792 }
4754 4793
4755 4794 void MacroAssembler::decode_heap_oop_not_null(Register r) {
4756 4795 // Do not add assert code to this unless you change vtableStubs_sparc.cpp
4757 4796 // pd_code_size_limit.
4758 4797 // Also do not verify_oop as this is called by verify_oop.
4759 4798 assert (UseCompressedOops, "must be compressed");
4760 4799 assert (Universe::heap() != NULL, "java heap should be initialized");
4761 4800 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4762 4801 sllx(r, LogMinObjAlignmentInBytes, r);
4763 4802 if (Universe::narrow_oop_base() != NULL)
4764 4803 add(r, G6_heapbase, r);
4765 4804 }
4766 4805
4767 4806 void MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) {
4768 4807 // Do not add assert code to this unless you change vtableStubs_sparc.cpp
4769 4808 // pd_code_size_limit.
4770 4809 // Also do not verify_oop as this is called by verify_oop.
4771 4810 assert (UseCompressedOops, "must be compressed");
4772 4811 assert (Universe::heap() != NULL, "java heap should be initialized");
4773 4812 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
4774 4813 sllx(src, LogMinObjAlignmentInBytes, dst);
4775 4814 if (Universe::narrow_oop_base() != NULL)
4776 4815 add(dst, G6_heapbase, dst);
4777 4816 }
4778 4817
4779 4818 void MacroAssembler::reinit_heapbase() {
4780 4819 if (UseCompressedOops) {
4781 4820 // call indirectly to solve generation ordering problem
4782 4821 AddressLiteral base(Universe::narrow_oop_base_addr());
4783 4822 load_ptr_contents(base, G6_heapbase);
4784 4823 }
4785 4824 }
4786 4825
4787 4826 // Compare char[] arrays aligned to 4 bytes.
4788 4827 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
4789 4828 Register limit, Register result,
4790 4829 Register chr1, Register chr2, Label& Ldone) {
4791 4830 Label Lvector, Lloop;
4792 4831 assert(chr1 == result, "should be the same");
4793 4832
4794 4833 // Note: limit contains number of bytes (2*char_elements) != 0.
4795 4834 andcc(limit, 0x2, chr1); // trailing character ?
4796 4835 br(Assembler::zero, false, Assembler::pt, Lvector);
4797 4836 delayed()->nop();
4798 4837
4799 4838 // compare the trailing char
4800 4839 sub(limit, sizeof(jchar), limit);
4801 4840 lduh(ary1, limit, chr1);
4802 4841 lduh(ary2, limit, chr2);
4803 4842 cmp(chr1, chr2);
4804 4843 br(Assembler::notEqual, true, Assembler::pt, Ldone);
4805 4844 delayed()->mov(G0, result); // not equal
4806 4845
4807 4846 // only one char ?
4808 4847 br_on_reg_cond(rc_z, true, Assembler::pn, limit, Ldone);
4809 4848 delayed()->add(G0, 1, result); // zero-length arrays are equal
4810 4849
4811 4850 // word by word compare, dont't need alignment check
4812 4851 bind(Lvector);
4813 4852 // Shift ary1 and ary2 to the end of the arrays, negate limit
4814 4853 add(ary1, limit, ary1);
4815 4854 add(ary2, limit, ary2);
4816 4855 neg(limit, limit);
4817 4856
4818 4857 lduw(ary1, limit, chr1);
4819 4858 bind(Lloop);
4820 4859 lduw(ary2, limit, chr2);
4821 4860 cmp(chr1, chr2);
4822 4861 br(Assembler::notEqual, true, Assembler::pt, Ldone);
4823 4862 delayed()->mov(G0, result); // not equal
4824 4863 inccc(limit, 2*sizeof(jchar));
4825 4864 // annul LDUW if branch is not taken to prevent access past end of array
4826 4865 br(Assembler::notZero, true, Assembler::pt, Lloop);
4827 4866 delayed()->lduw(ary1, limit, chr1); // hoisted
4828 4867
4829 4868 // Caller should set it:
4830 4869 // add(G0, 1, result); // equals
4831 4870 }
4832 4871
↓ open down ↓ |
3393 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX