< prev index next >

src/hotspot/cpu/sparc/templateTable_sparc.cpp

Print this page
rev 47680 : [mq]: x86_tlab
rev 47682 : [mq]: sparc2


3241   __ cmp(G3_scratch, JVM_CONSTANT_Class);
3242   __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3243   __ delayed()->sll(Roffset, LogBytesPerWord, Roffset);
3244   // get InstanceKlass
3245   __ load_resolved_klass_at_offset(Rscratch, Roffset, RinstanceKlass);
3246 
3247   // make sure klass is fully initialized:
3248   __ ldub(RinstanceKlass, in_bytes(InstanceKlass::init_state_offset()), G3_scratch);
3249   __ cmp(G3_scratch, InstanceKlass::fully_initialized);
3250   __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3251   __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3252 
3253   // get instance_size in InstanceKlass (already aligned)
3254   //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3255 
3256   // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
3257   __ btst(Klass::_lh_instance_slow_path_bit, Roffset);
3258   __ br(Assembler::notZero, false, Assembler::pn, slow_case);
3259   __ delayed()->nop();
3260 
3261   // allocate the instance
3262   // 1) Try to allocate in the TLAB
3263   // 2) if fail, and the TLAB is not full enough to discard, allocate in the shared Eden
3264   // 3) if the above fails (or is not applicable), go to a slow case
3265   // (creates a new TLAB, etc.)







3266 
3267   const bool allow_shared_alloc =
3268     Universe::heap()->supports_inline_contig_alloc();
3269 
3270   if(UseTLAB) {
3271     Register RoldTopValue = RallocatedObject;
3272     Register RtlabWasteLimitValue = G3_scratch;
3273     Register RnewTopValue = G1_scratch;
3274     Register RendValue = Rscratch;
3275     Register RfreeValue = RnewTopValue;
3276 
3277     // check if we can allocate in the TLAB
3278     __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), RoldTopValue); // sets up RalocatedObject
3279     __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), RendValue);
3280     __ add(RoldTopValue, Roffset, RnewTopValue);
3281 
3282     // if there is enough space, we do not CAS and do not clear
3283     __ cmp(RnewTopValue, RendValue);
3284     if(ZeroTLAB) {
3285       // the fields have already been cleared
3286       __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_header);
3287     } else {
3288       // initialize both the header and fields
3289       __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_object);
3290     }
3291     __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
3292 
3293     if (allow_shared_alloc) {
3294       // Check if tlab should be discarded (refill_waste_limit >= free)
3295       __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), RtlabWasteLimitValue);
3296       __ sub(RendValue, RoldTopValue, RfreeValue);
3297       __ srlx(RfreeValue, LogHeapWordSize, RfreeValue);
3298       __ cmp_and_brx_short(RtlabWasteLimitValue, RfreeValue, Assembler::greaterEqualUnsigned, Assembler::pt, slow_case); // tlab waste is small
3299 
3300       // increment waste limit to prevent getting stuck on this slow path
3301       if (Assembler::is_simm13(ThreadLocalAllocBuffer::refill_waste_limit_increment())) {
3302         __ add(RtlabWasteLimitValue, ThreadLocalAllocBuffer::refill_waste_limit_increment(), RtlabWasteLimitValue);
3303       } else {
3304         // set64 does not use the temp register if the given constant is 32 bit. So
3305         // we can just use any register; using G0 results in ignoring of the upper 32 bit
3306         // of that value.
3307         __ set64(ThreadLocalAllocBuffer::refill_waste_limit_increment(), G4_scratch, G0);
3308         __ add(RtlabWasteLimitValue, G4_scratch, RtlabWasteLimitValue);
3309       }
3310       __ st_ptr(RtlabWasteLimitValue, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
3311     } else {
3312       // No allocation in the shared eden.
3313       __ ba_short(slow_case);
3314     }
3315   }
3316 
3317   // Allocation in the shared Eden
3318   if (allow_shared_alloc) {
3319     Register RoldTopValue = G1_scratch;
3320     Register RtopAddr = G3_scratch;
3321     Register RnewTopValue = RallocatedObject;
3322     Register RendValue = Rscratch;
3323 
3324     __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr);
3325 
3326     Label retry;
3327     __ bind(retry);
3328     __ set((intptr_t)Universe::heap()->end_addr(), RendValue);
3329     __ ld_ptr(RendValue, 0, RendValue);
3330     __ ld_ptr(RtopAddr, 0, RoldTopValue);
3331     __ add(RoldTopValue, Roffset, RnewTopValue);
3332 
3333     // RnewTopValue contains the top address after the new object
3334     // has been allocated.
3335     __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
3336 
3337     __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue);
3338 
3339     // if someone beat us on the allocation, try again, otherwise continue
3340     __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
3341 
3342     // bump total bytes allocated by this thread
3343     // RoldTopValue and RtopAddr are dead, so can use G1 and G3
3344     __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch);
3345   }

3346 
3347   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {


3348     // clear object fields
3349     __ bind(initialize_object);
3350     __ deccc(Roffset, sizeof(oopDesc));
3351     __ br(Assembler::zero, false, Assembler::pt, initialize_header);
3352     __ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
3353 
3354     // initialize remaining object fields
3355     if (UseBlockZeroing) {
3356       // Use BIS for zeroing
3357       __ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header);
3358     } else {
3359       Label loop;
3360       __ subcc(Roffset, wordSize, Roffset);
3361       __ bind(loop);
3362       //__ subcc(Roffset, wordSize, Roffset);      // executed above loop or in delay slot
3363       __ st_ptr(G0, G3_scratch, Roffset);
3364       __ br(Assembler::notEqual, false, Assembler::pt, loop);
3365       __ delayed()->subcc(Roffset, wordSize, Roffset);
3366     }
3367     __ ba_short(initialize_header);




3241   __ cmp(G3_scratch, JVM_CONSTANT_Class);
3242   __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3243   __ delayed()->sll(Roffset, LogBytesPerWord, Roffset);
3244   // get InstanceKlass
3245   __ load_resolved_klass_at_offset(Rscratch, Roffset, RinstanceKlass);
3246 
3247   // make sure klass is fully initialized:
3248   __ ldub(RinstanceKlass, in_bytes(InstanceKlass::init_state_offset()), G3_scratch);
3249   __ cmp(G3_scratch, InstanceKlass::fully_initialized);
3250   __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3251   __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3252 
3253   // get instance_size in InstanceKlass (already aligned)
3254   //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3255 
3256   // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
3257   __ btst(Klass::_lh_instance_slow_path_bit, Roffset);
3258   __ br(Assembler::notZero, false, Assembler::pn, slow_case);
3259   __ delayed()->nop();
3260 
3261   // Allocate the instance:
3262   // 1) If TLAB is enabled:
3263   //  a) Try to allocate in the TLAB
3264   //  b) If fails, go to the slow path.
3265   // 2) Else TLAB is disabled:
3266   //  a) If inline contiguous allocations are enabled:
3267   //    i) Try to allocate in eden
3268   //    ii) If fails due to heap end, go to slow path.
3269   // 3) If TLAB is enabled OR inline contiguous is enabled:
3270   //  a) Initialize the allocation
3271   //  b) Exit.
3272   // 4) If neither 1 OR 2 are applicable, go to slow path.
3273 
3274   const bool allow_shared_alloc =
3275     Universe::heap()->supports_inline_contig_alloc();
3276 
3277   if(UseTLAB) {
3278     Register RoldTopValue = RallocatedObject;
3279     Register RtlabWasteLimitValue = G3_scratch;
3280     Register RnewTopValue = G1_scratch;
3281     Register RendValue = Rscratch;
3282     Register RfreeValue = RnewTopValue;
3283 
3284     // check if we can allocate in the TLAB
3285     __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), RoldTopValue); // sets up RalocatedObject
3286     __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), RendValue);
3287     __ add(RoldTopValue, Roffset, RnewTopValue);
3288 
3289     // if there is enough space, we do not CAS and do not clear
3290     __ cmp(RnewTopValue, RendValue);
3291     if(ZeroTLAB) {
3292       // the fields have already been cleared
3293       __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_header);
3294     } else {
3295       // initialize both the header and fields
3296       __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_object);
3297     }
3298     __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
3299 
3300     // Allocation does not fit in the TLAB.



















3301     __ ba_short(slow_case);
3302   } else {


3303     // Allocation in the shared Eden
3304     if (allow_shared_alloc) {
3305       Register RoldTopValue = G1_scratch;
3306       Register RtopAddr = G3_scratch;
3307       Register RnewTopValue = RallocatedObject;
3308       Register RendValue = Rscratch;
3309 
3310       __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr);
3311 
3312       Label retry;
3313       __ bind(retry);
3314       __ set((intptr_t)Universe::heap()->end_addr(), RendValue);
3315       __ ld_ptr(RendValue, 0, RendValue);
3316       __ ld_ptr(RtopAddr, 0, RoldTopValue);
3317       __ add(RoldTopValue, Roffset, RnewTopValue);
3318 
3319       // RnewTopValue contains the top address after the new object
3320       // has been allocated.
3321       __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
3322 
3323       __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue);
3324 
3325       // if someone beat us on the allocation, try again, otherwise continue
3326       __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
3327 
3328       // bump total bytes allocated by this thread
3329       // RoldTopValue and RtopAddr are dead, so can use G1 and G3
3330       __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch);
3331     }
3332   }
3333 
3334   // If UseTLAB or allow_shared_alloc are true, the object is created above and
3335   // there is an initialize need. Otherwise, skip and go to the slow path.
3336   if (UseTLAB || allow_shared_alloc) {
3337     // clear object fields
3338     __ bind(initialize_object);
3339     __ deccc(Roffset, sizeof(oopDesc));
3340     __ br(Assembler::zero, false, Assembler::pt, initialize_header);
3341     __ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
3342 
3343     // initialize remaining object fields
3344     if (UseBlockZeroing) {
3345       // Use BIS for zeroing
3346       __ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header);
3347     } else {
3348       Label loop;
3349       __ subcc(Roffset, wordSize, Roffset);
3350       __ bind(loop);
3351       //__ subcc(Roffset, wordSize, Roffset);      // executed above loop or in delay slot
3352       __ st_ptr(G0, G3_scratch, Roffset);
3353       __ br(Assembler::notEqual, false, Assembler::pt, loop);
3354       __ delayed()->subcc(Roffset, wordSize, Roffset);
3355     }
3356     __ ba_short(initialize_header);


< prev index next >