393 #ifndef AARCH64
394 ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne);
395 #endif // !AARCH64
396
397 b(loop, ne);
398
399 b(not_subtype);
400
401 bind(update_cache);
402 // Must be equal but missed in cache. Update cache.
403 str(Rsuper_klass, Address(Rsub_klass, Klass::secondary_super_cache_offset()));
404
405 bind(ok_is_subtype);
406 }
407
408
409 // The 1st part of the store check.
410 // Sets card_table_base register.
411 void InterpreterMacroAssembler::store_check_part1(Register card_table_base) {
412 // Check barrier set type (should be card table) and element size
413 BarrierSet* bs = Universe::heap()->barrier_set();
414 assert(bs->kind() == BarrierSet::CardTableBarrierSet,
415 "Wrong barrier set kind");
416
417 CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
418 CardTable* ct = ctbs->card_table();
419 assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "Adjust store check code");
420
421 // Load card table base address.
422
423 /* Performance note.
424
425 There is an alternative way of loading card table base address
426 from thread descriptor, which may look more efficient:
427
428 ldr(card_table_base, Address(Rthread, JavaThread::card_table_base_offset()));
429
430 However, performance measurements of micro benchmarks and specJVM98
431 showed that loading of card table base from thread descriptor is
432 7-18% slower compared to loading of literal embedded into the code.
433 Possible cause is a cache miss (card table base address resides in a
460 ldrb(tmp, card_table_addr);
461 cbz(tmp, already_dirty);
462
463 set_card(card_table_base, card_table_addr, tmp);
464 bind(already_dirty);
465
466 } else {
467 #if INCLUDE_ALL_GCS
468 if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
469 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg);
470 }
471 #endif
472 set_card(card_table_base, card_table_addr, tmp);
473 }
474 }
475
476 void InterpreterMacroAssembler::set_card(Register card_table_base, Address card_table_addr, Register tmp) {
477 #ifdef AARCH64
478 strb(ZR, card_table_addr);
479 #else
480 CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
481 CardTable* ct = ctbs->card_table();
482 if ((((uintptr_t)ct->byte_map_base() & 0xff) == 0)) {
483 // Card table is aligned so the lowest byte of the table address base is zero.
484 // This works only if the code is not saved for later use, possibly
485 // in a context where the base would no longer be aligned.
486 strb(card_table_base, card_table_addr);
487 } else {
488 mov(tmp, 0);
489 strb(tmp, card_table_addr);
490 }
491 #endif // AARCH64
492 }
493
494 //////////////////////////////////////////////////////////////////////////////////
495
496
497 // Java Expression Stack
498
499 void InterpreterMacroAssembler::pop_ptr(Register r) {
500 assert(r != Rstack_top, "unpredictable instruction");
|
393 #ifndef AARCH64
394 ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne);
395 #endif // !AARCH64
396
397 b(loop, ne);
398
399 b(not_subtype);
400
401 bind(update_cache);
402 // Must be equal but missed in cache. Update cache.
403 str(Rsuper_klass, Address(Rsub_klass, Klass::secondary_super_cache_offset()));
404
405 bind(ok_is_subtype);
406 }
407
408
409 // The 1st part of the store check.
410 // Sets card_table_base register.
411 void InterpreterMacroAssembler::store_check_part1(Register card_table_base) {
412 // Check barrier set type (should be card table) and element size
413 BarrierSet* bs = BarrierSet::barrier_set();
414 assert(bs->kind() == BarrierSet::CardTableBarrierSet,
415 "Wrong barrier set kind");
416
417 CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
418 CardTable* ct = ctbs->card_table();
419 assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "Adjust store check code");
420
421 // Load card table base address.
422
423 /* Performance note.
424
425 There is an alternative way of loading card table base address
426 from thread descriptor, which may look more efficient:
427
428 ldr(card_table_base, Address(Rthread, JavaThread::card_table_base_offset()));
429
430 However, performance measurements of micro benchmarks and specJVM98
431 showed that loading of card table base from thread descriptor is
432 7-18% slower compared to loading of literal embedded into the code.
433 Possible cause is a cache miss (card table base address resides in a
460 ldrb(tmp, card_table_addr);
461 cbz(tmp, already_dirty);
462
463 set_card(card_table_base, card_table_addr, tmp);
464 bind(already_dirty);
465
466 } else {
467 #if INCLUDE_ALL_GCS
468 if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
469 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg);
470 }
471 #endif
472 set_card(card_table_base, card_table_addr, tmp);
473 }
474 }
475
476 void InterpreterMacroAssembler::set_card(Register card_table_base, Address card_table_addr, Register tmp) {
477 #ifdef AARCH64
478 strb(ZR, card_table_addr);
479 #else
480 CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
481 CardTable* ct = ctbs->card_table();
482 if ((((uintptr_t)ct->byte_map_base() & 0xff) == 0)) {
483 // Card table is aligned so the lowest byte of the table address base is zero.
484 // This works only if the code is not saved for later use, possibly
485 // in a context where the base would no longer be aligned.
486 strb(card_table_base, card_table_addr);
487 } else {
488 mov(tmp, 0);
489 strb(tmp, card_table_addr);
490 }
491 #endif // AARCH64
492 }
493
494 //////////////////////////////////////////////////////////////////////////////////
495
496
497 // Java Expression Stack
498
499 void InterpreterMacroAssembler::pop_ptr(Register r) {
500 assert(r != Rstack_top, "unpredictable instruction");
|