src/share/vm/opto/graphKit.cpp

Print this page
rev 4495 : 8012715: Fix G1 bug in graphKit.cpp: Access of PtrQueue::_index was int not size_t.

In graphKit INT operations were generated to access PtrQueue::_index which
has type size_t.  This is 64 bit on 64-bit machines. No problems occur on
little endian machines as long as the index fits into 32 bit, but on
big endian machines the upper part is read, which is zero. This leads
to unnecessary branches to the slow path in the runtime.


3548   if (do_load) {
3549     // We need to generate the load of the previous value
3550     assert(obj != NULL, "must have a base");
3551     assert(adr != NULL, "where are loading from?");
3552     assert(pre_val == NULL, "loaded already?");
3553     assert(val_type != NULL, "need a type");
3554   } else {
3555     // In this case both val_type and alias_idx are unused.
3556     assert(pre_val != NULL, "must be loaded already");
3557     assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
3558   }
3559   assert(bt == T_OBJECT, "or we shouldn't be here");
3560 
3561   IdealKit ideal(this, true);
3562 
3563   Node* tls = __ thread(); // ThreadLocalStorage
3564 
3565   Node* no_ctrl = NULL;
3566   Node* no_base = __ top();
3567   Node* zero = __ ConI(0);

3568 
3569   float likely  = PROB_LIKELY(0.999);
3570   float unlikely  = PROB_UNLIKELY(0.999);
3571 
3572   BasicType active_type = in_bytes(PtrQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
3573   assert(in_bytes(PtrQueue::byte_width_of_active()) == 4 || in_bytes(PtrQueue::byte_width_of_active()) == 1, "flag width");
3574 
3575   // Offsets into the thread
3576   const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() +  // 648
3577                                           PtrQueue::byte_offset_of_active());
3578   const int index_offset   = in_bytes(JavaThread::satb_mark_queue_offset() +  // 656
3579                                           PtrQueue::byte_offset_of_index());
3580   const int buffer_offset  = in_bytes(JavaThread::satb_mark_queue_offset() +  // 652
3581                                           PtrQueue::byte_offset_of_buf());
3582 
3583   // Now the actual pointers into the thread
3584   Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
3585   Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
3586   Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));
3587 
3588   // Now some of the values
3589   Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
3590 
3591   // if (!marking)
3592   __ if_then(marking, BoolTest::ne, zero); {
3593     Node* index   = __ load(__ ctrl(), index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);


3594 
3595     if (do_load) {
3596       // load original value
3597       // alias_idx correct??
3598       pre_val = __ load(no_ctrl, adr, val_type, bt, alias_idx);
3599     }
3600 
3601     // if (pre_val != NULL)
3602     __ if_then(pre_val, BoolTest::ne, null()); {
3603       Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
3604 
3605       // is the queue for this thread full?
3606       __ if_then(index, BoolTest::ne, zero, likely); {
3607 
3608         // decrement the index
3609         Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
3610         Node* next_indexX = next_index;
3611 #ifdef _LP64
3612         // We could refine the type for what it's worth
3613         // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
3614         next_indexX = _gvn.transform( new (C) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
3615 #endif
3616 
3617         // Now get the buffer location we will log the previous value into and store it
3618         Node *log_addr = __ AddP(no_base, buffer, next_indexX);
3619         __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw);
3620         // update the index
3621         __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
3622 
3623       } __ else_(); {
3624 
3625         // logging buffer is full, call the runtime
3626         const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
3627         __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls);
3628       } __ end_if();  // (!index)
3629     } __ end_if();  // (pre_val != NULL)
3630   } __ end_if();  // (!marking)
3631 
3632   // Final sync IdealKit and GraphKit.
3633   final_sync(ideal);
3634 }
3635 
3636 //
3637 // Update the card table and add card address to the queue
3638 //
3639 void GraphKit::g1_mark_card(IdealKit& ideal,
3640                             Node* card_adr,
3641                             Node* oop_store,
3642                             uint oop_alias_idx,
3643                             Node* index,
3644                             Node* index_adr,
3645                             Node* buffer,
3646                             const TypeFunc* tf) {
3647 
3648   Node* zero = __ ConI(0);

3649   Node* no_base = __ top();
3650   BasicType card_bt = T_BYTE;
3651   // Smash zero into card. MUST BE ORDERED WRT TO STORE
3652   __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw);
3653 
3654   //  Now do the queue work
3655   __ if_then(index, BoolTest::ne, zero); {
3656 
3657     Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t)));
3658     Node* next_indexX = next_index;
3659 #ifdef _LP64
3660     // We could refine the type for what it's worth
3661     // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
3662     next_indexX = _gvn.transform( new (C) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
3663 #endif // _LP64
3664     Node* log_addr = __ AddP(no_base, buffer, next_indexX);
3665 
3666     __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw);
3667     __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
3668 
3669   } __ else_(); {
3670     __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
3671   } __ end_if();
3672 
3673 }
3674 
3675 void GraphKit::g1_write_barrier_post(Node* oop_store,
3676                                      Node* obj,
3677                                      Node* adr,
3678                                      uint alias_idx,
3679                                      Node* val,
3680                                      BasicType bt,
3681                                      bool use_precise) {
3682   // If we are writing a NULL then we need no post barrier
3683 
3684   if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
3685     // Must be NULL
3686     const Type* t = val->bottom_type();
3687     assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL");


3708 
3709   // Get the alias_index for raw card-mark memory
3710   const TypePtr* card_type = TypeRawPtr::BOTTOM;
3711 
3712   const TypeFunc *tf = OptoRuntime::g1_wb_post_Type();
3713 
3714   // Offsets into the thread
3715   const int index_offset  = in_bytes(JavaThread::dirty_card_queue_offset() +
3716                                      PtrQueue::byte_offset_of_index());
3717   const int buffer_offset = in_bytes(JavaThread::dirty_card_queue_offset() +
3718                                      PtrQueue::byte_offset_of_buf());
3719 
3720   // Pointers into the thread
3721 
3722   Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
3723   Node* index_adr =  __ AddP(no_base, tls, __ ConX(index_offset));
3724 
3725   // Now some values
3726   // Use ctrl to avoid hoisting these values past a safepoint, which could
3727   // potentially reset these fields in the JavaThread.
3728   Node* index  = __ load(__ ctrl(), index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
3729   Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
3730 
3731   // Convert the store obj pointer to an int prior to doing math on it
3732   // Must use ctrl to prevent "integerized oop" existing across safepoint
3733   Node* cast =  __ CastPX(__ ctrl(), adr);
3734 
3735   // Divide pointer by card size
3736   Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
3737 
3738   // Combine card table base and card offset
3739   Node* card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );
3740 
3741   // If we know the value being stored does it cross regions?
3742 
3743   if (val != NULL) {
3744     // Does the store cause us to cross regions?
3745 
3746     // Should be able to do an unsigned compare of region_size instead of
3747     // and extra shift. Do we have an unsigned compare??
3748     // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);




3548   if (do_load) {
3549     // We need to generate the load of the previous value
3550     assert(obj != NULL, "must have a base");
3551     assert(adr != NULL, "where are loading from?");
3552     assert(pre_val == NULL, "loaded already?");
3553     assert(val_type != NULL, "need a type");
3554   } else {
3555     // In this case both val_type and alias_idx are unused.
3556     assert(pre_val != NULL, "must be loaded already");
3557     assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
3558   }
3559   assert(bt == T_OBJECT, "or we shouldn't be here");
3560 
3561   IdealKit ideal(this, true);
3562 
3563   Node* tls = __ thread(); // ThreadLocalStorage
3564 
3565   Node* no_ctrl = NULL;
3566   Node* no_base = __ top();
3567   Node* zero  = __ ConI(0);
3568   Node* zeroX = __ ConX(0);
3569 
3570   float likely  = PROB_LIKELY(0.999);
3571   float unlikely  = PROB_UNLIKELY(0.999);
3572 
3573   BasicType active_type = in_bytes(PtrQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
3574   assert(in_bytes(PtrQueue::byte_width_of_active()) == 4 || in_bytes(PtrQueue::byte_width_of_active()) == 1, "flag width");
3575 
3576   // Offsets into the thread
3577   const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() +  // 648
3578                                           PtrQueue::byte_offset_of_active());
3579   const int index_offset   = in_bytes(JavaThread::satb_mark_queue_offset() +  // 656
3580                                           PtrQueue::byte_offset_of_index());
3581   const int buffer_offset  = in_bytes(JavaThread::satb_mark_queue_offset() +  // 652
3582                                           PtrQueue::byte_offset_of_buf());
3583 
3584   // Now the actual pointers into the thread
3585   Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
3586   Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
3587   Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));
3588 
3589   // Now some of the values
3590   Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
3591 
3592   // if (!marking)
3593   __ if_then(marking, BoolTest::ne, zero); {
3594     BasicType index_bt = TypeX_X->basic_type();
3595     assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 PtrQueue::_index with wrong size.");
3596     Node* index   = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw);
3597 
3598     if (do_load) {
3599       // load original value
3600       // alias_idx correct??
3601       pre_val = __ load(no_ctrl, adr, val_type, bt, alias_idx);
3602     }
3603 
3604     // if (pre_val != NULL)
3605     __ if_then(pre_val, BoolTest::ne, null()); {
3606       Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
3607 
3608       // is the queue for this thread full?
3609       __ if_then(index, BoolTest::ne, zeroX, likely); {
3610 
3611         // decrement the index
3612         Node* next_index = _gvn.transform(new (C) SubXNode(index, __ ConX(sizeof(intptr_t))));






3613 
3614         // Now get the buffer location we will log the previous value into and store it
3615         Node *log_addr = __ AddP(no_base, buffer, next_index);
3616         __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw);
3617         // update the index
3618         __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw);
3619 
3620       } __ else_(); {
3621 
3622         // logging buffer is full, call the runtime
3623         const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
3624         __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls);
3625       } __ end_if();  // (!index)
3626     } __ end_if();  // (pre_val != NULL)
3627   } __ end_if();  // (!marking)
3628 
3629   // Final sync IdealKit and GraphKit.
3630   final_sync(ideal);
3631 }
3632 
3633 //
3634 // Update the card table and add card address to the queue
3635 //
3636 void GraphKit::g1_mark_card(IdealKit& ideal,
3637                             Node* card_adr,
3638                             Node* oop_store,
3639                             uint oop_alias_idx,
3640                             Node* index,
3641                             Node* index_adr,
3642                             Node* buffer,
3643                             const TypeFunc* tf) {
3644 
3645   Node* zero  = __ ConI(0);
3646   Node* zeroX = __ ConX(0);
3647   Node* no_base = __ top();
3648   BasicType card_bt = T_BYTE;
3649   // Smash zero into card. MUST BE ORDERED WRT TO STORE
3650   __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw);
3651 
3652   //  Now do the queue work
3653   __ if_then(index, BoolTest::ne, zeroX); {
3654 
3655     Node* next_index = _gvn.transform(new (C) SubXNode(index, __ ConX(sizeof(intptr_t))));
3656     Node* log_addr = __ AddP(no_base, buffer, next_index);






3657 
3658     __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw);
3659     __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw);
3660 
3661   } __ else_(); {
3662     __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
3663   } __ end_if();
3664 
3665 }
3666 
3667 void GraphKit::g1_write_barrier_post(Node* oop_store,
3668                                      Node* obj,
3669                                      Node* adr,
3670                                      uint alias_idx,
3671                                      Node* val,
3672                                      BasicType bt,
3673                                      bool use_precise) {
3674   // If we are writing a NULL then we need no post barrier
3675 
3676   if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
3677     // Must be NULL
3678     const Type* t = val->bottom_type();
3679     assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL");


3700 
3701   // Get the alias_index for raw card-mark memory
3702   const TypePtr* card_type = TypeRawPtr::BOTTOM;
3703 
3704   const TypeFunc *tf = OptoRuntime::g1_wb_post_Type();
3705 
3706   // Offsets into the thread
3707   const int index_offset  = in_bytes(JavaThread::dirty_card_queue_offset() +
3708                                      PtrQueue::byte_offset_of_index());
3709   const int buffer_offset = in_bytes(JavaThread::dirty_card_queue_offset() +
3710                                      PtrQueue::byte_offset_of_buf());
3711 
3712   // Pointers into the thread
3713 
3714   Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
3715   Node* index_adr =  __ AddP(no_base, tls, __ ConX(index_offset));
3716 
3717   // Now some values
3718   // Use ctrl to avoid hoisting these values past a safepoint, which could
3719   // potentially reset these fields in the JavaThread.
3720   Node* index  = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw);
3721   Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
3722 
3723   // Convert the store obj pointer to an int prior to doing math on it
3724   // Must use ctrl to prevent "integerized oop" existing across safepoint
3725   Node* cast =  __ CastPX(__ ctrl(), adr);
3726 
3727   // Divide pointer by card size
3728   Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
3729 
3730   // Combine card table base and card offset
3731   Node* card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );
3732 
3733   // If we know the value being stored does it cross regions?
3734 
3735   if (val != NULL) {
3736     // Does the store cause us to cross regions?
3737 
3738     // Should be able to do an unsigned compare of region_size instead of
3739     // and extra shift. Do we have an unsigned compare??
3740     // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);