< prev index next >

src/hotspot/cpu/sparc/templateTable_sparc.cpp

Print this page
rev 47680 : [mq]: x86_tlab


3241   __ cmp(G3_scratch, JVM_CONSTANT_Class);
3242   __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3243   __ delayed()->sll(Roffset, LogBytesPerWord, Roffset);
3244   // get InstanceKlass
3245   __ load_resolved_klass_at_offset(Rscratch, Roffset, RinstanceKlass);
3246 
3247   // make sure klass is fully initialized:
3248   __ ldub(RinstanceKlass, in_bytes(InstanceKlass::init_state_offset()), G3_scratch);
3249   __ cmp(G3_scratch, InstanceKlass::fully_initialized);
3250   __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3251   __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3252 
3253   // get instance_size in InstanceKlass (already aligned)
3254   //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3255 
3256   // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
3257   __ btst(Klass::_lh_instance_slow_path_bit, Roffset);
3258   __ br(Assembler::notZero, false, Assembler::pn, slow_case);
3259   __ delayed()->nop();
3260 
3261   // allocate the instance
3262   // 1) Try to allocate in the TLAB
3263   // 2) if fail, and the TLAB is not full enough to discard, allocate in the shared Eden
3264   // 3) if the above fails (or is not applicable), go to a slow case
3265   // (creates a new TLAB, etc.)






3266 
3267   const bool allow_shared_alloc =
3268     Universe::heap()->supports_inline_contig_alloc();
3269 
3270   if(UseTLAB) {
3271     Register RoldTopValue = RallocatedObject;
3272     Register RtlabWasteLimitValue = G3_scratch;
3273     Register RnewTopValue = G1_scratch;
3274     Register RendValue = Rscratch;
3275     Register RfreeValue = RnewTopValue;
3276 
3277     // check if we can allocate in the TLAB
3278     __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), RoldTopValue); // sets up RalocatedObject
3279     __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), RendValue);
3280     __ add(RoldTopValue, Roffset, RnewTopValue);
3281 
3282     // if there is enough space, we do not CAS and do not clear
3283     __ cmp(RnewTopValue, RendValue);
3284     if(ZeroTLAB) {
3285       // the fields have already been cleared
3286       __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_header);
3287     } else {
3288       // initialize both the header and fields
3289       __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_object);
3290     }
3291     __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
3292 
3293     if (allow_shared_alloc) {
3294       // Check if tlab should be discarded (refill_waste_limit >= free)
3295       __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), RtlabWasteLimitValue);
3296       __ sub(RendValue, RoldTopValue, RfreeValue);
3297       __ srlx(RfreeValue, LogHeapWordSize, RfreeValue);
3298       __ cmp_and_brx_short(RtlabWasteLimitValue, RfreeValue, Assembler::greaterEqualUnsigned, Assembler::pt, slow_case); // tlab waste is small
3299 
3300       // increment waste limit to prevent getting stuck on this slow path
3301       if (Assembler::is_simm13(ThreadLocalAllocBuffer::refill_waste_limit_increment())) {
3302         __ add(RtlabWasteLimitValue, ThreadLocalAllocBuffer::refill_waste_limit_increment(), RtlabWasteLimitValue);
3303       } else {
3304         // set64 does not use the temp register if the given constant is 32 bit. So
3305         // we can just use any register; using G0 results in ignoring of the upper 32 bit
3306         // of that value.
3307         __ set64(ThreadLocalAllocBuffer::refill_waste_limit_increment(), G4_scratch, G0);
3308         __ add(RtlabWasteLimitValue, G4_scratch, RtlabWasteLimitValue);
3309       }
3310       __ st_ptr(RtlabWasteLimitValue, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
3311     } else {
3312       // No allocation in the shared eden.
3313       __ ba_short(slow_case);
3314     }
3315   }
3316 
3317   // Allocation in the shared Eden
3318   if (allow_shared_alloc) {
3319     Register RoldTopValue = G1_scratch;
3320     Register RtopAddr = G3_scratch;
3321     Register RnewTopValue = RallocatedObject;
3322     Register RendValue = Rscratch;
3323 
3324     __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr);
3325 
3326     Label retry;
3327     __ bind(retry);
3328     __ set((intptr_t)Universe::heap()->end_addr(), RendValue);
3329     __ ld_ptr(RendValue, 0, RendValue);
3330     __ ld_ptr(RtopAddr, 0, RoldTopValue);
3331     __ add(RoldTopValue, Roffset, RnewTopValue);
3332 
3333     // RnewTopValue contains the top address after the new object
3334     // has been allocated.
3335     __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
3336 
3337     __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue);
3338 
3339     // if someone beat us on the allocation, try again, otherwise continue
3340     __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
3341 
3342     // bump total bytes allocated by this thread
3343     // RoldTopValue and RtopAddr are dead, so can use G1 and G3
3344     __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch);
3345   }

3346 
3347   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {


3348     // clear object fields
3349     __ bind(initialize_object);
3350     __ deccc(Roffset, sizeof(oopDesc));
3351     __ br(Assembler::zero, false, Assembler::pt, initialize_header);
3352     __ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
3353 
3354     // initialize remaining object fields
3355     if (UseBlockZeroing) {
3356       // Use BIS for zeroing
3357       __ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header);
3358     } else {
3359       Label loop;
3360       __ subcc(Roffset, wordSize, Roffset);
3361       __ bind(loop);
3362       //__ subcc(Roffset, wordSize, Roffset);      // executed above loop or in delay slot
3363       __ st_ptr(G0, G3_scratch, Roffset);
3364       __ br(Assembler::notEqual, false, Assembler::pt, loop);
3365       __ delayed()->subcc(Roffset, wordSize, Roffset);
3366     }
3367     __ ba_short(initialize_header);




3241   __ cmp(G3_scratch, JVM_CONSTANT_Class);
3242   __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3243   __ delayed()->sll(Roffset, LogBytesPerWord, Roffset);
3244   // get InstanceKlass
3245   __ load_resolved_klass_at_offset(Rscratch, Roffset, RinstanceKlass);
3246 
3247   // make sure klass is fully initialized:
3248   __ ldub(RinstanceKlass, in_bytes(InstanceKlass::init_state_offset()), G3_scratch);
3249   __ cmp(G3_scratch, InstanceKlass::fully_initialized);
3250   __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3251   __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3252 
3253   // get instance_size in InstanceKlass (already aligned)
3254   //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3255 
3256   // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
3257   __ btst(Klass::_lh_instance_slow_path_bit, Roffset);
3258   __ br(Assembler::notZero, false, Assembler::pn, slow_case);
3259   __ delayed()->nop();
3260 
3261   // Allocate the instance:
3262   // 1) If TLAB is enabled:
3263   //  a) Try to allocate in the TLAB
3264   //  b) If fails, go to the slow path.
3265   // 2) If inline contiguous allocations are enabled:
3266   //  a) Try to allocate in eden
3267   //  b) If fails due to heap end, go to slow path.
3268   // 3) If TLAB is enabled OR inline contiguous is enabled:
3269   //  a) Initialize the allocation
3270   //  b) Exit.
3271   // 4) If neither 1 OR 2 are applicable, go to slow path.
3272 
3273   const bool allow_shared_alloc =
3274     Universe::heap()->supports_inline_contig_alloc();
3275 
3276   if(UseTLAB) {
3277     Register RoldTopValue = RallocatedObject;
3278     Register RtlabWasteLimitValue = G3_scratch;
3279     Register RnewTopValue = G1_scratch;
3280     Register RendValue = Rscratch;
3281     Register RfreeValue = RnewTopValue;
3282 
3283     // check if we can allocate in the TLAB
3284     __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), RoldTopValue); // sets up RalocatedObject
3285     __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), RendValue);
3286     __ add(RoldTopValue, Roffset, RnewTopValue);
3287 
3288     // if there is enough space, we do not CAS and do not clear
3289     __ cmp(RnewTopValue, RendValue);
3290     if(ZeroTLAB) {
3291       // the fields have already been cleared
3292       __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_header);
3293     } else {
3294       // initialize both the header and fields
3295       __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_object);
3296     }
3297     __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
3298 
3299     // Allocation does not fit in the TLAB.



















3300     __ ba_short(slow_case);
3301   } else {


3302     // Allocation in the shared Eden
3303     if (allow_shared_alloc) {
3304       Register RoldTopValue = G1_scratch;
3305       Register RtopAddr = G3_scratch;
3306       Register RnewTopValue = RallocatedObject;
3307       Register RendValue = Rscratch;
3308 
3309       __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr);
3310 
3311       Label retry;
3312       __ bind(retry);
3313       __ set((intptr_t)Universe::heap()->end_addr(), RendValue);
3314       __ ld_ptr(RendValue, 0, RendValue);
3315       __ ld_ptr(RtopAddr, 0, RoldTopValue);
3316       __ add(RoldTopValue, Roffset, RnewTopValue);
3317 
3318       // RnewTopValue contains the top address after the new object
3319       // has been allocated.
3320       __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
3321 
3322       __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue);
3323 
3324       // if someone beat us on the allocation, try again, otherwise continue
3325       __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
3326 
3327       // bump total bytes allocated by this thread
3328       // RoldTopValue and RtopAddr are dead, so can use G1 and G3
3329       __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch);
3330     }
3331   }
3332 
3333   // If UseTLAB or allow_shared_alloc are true, the object is created above and
3334   // there is an initialize need. Otherwise, skip and go to the slow path.
3335   if (UseTLAB || allow_shared_alloc) {
3336     // clear object fields
3337     __ bind(initialize_object);
3338     __ deccc(Roffset, sizeof(oopDesc));
3339     __ br(Assembler::zero, false, Assembler::pt, initialize_header);
3340     __ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
3341 
3342     // initialize remaining object fields
3343     if (UseBlockZeroing) {
3344       // Use BIS for zeroing
3345       __ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header);
3346     } else {
3347       Label loop;
3348       __ subcc(Roffset, wordSize, Roffset);
3349       __ bind(loop);
3350       //__ subcc(Roffset, wordSize, Roffset);      // executed above loop or in delay slot
3351       __ st_ptr(G0, G3_scratch, Roffset);
3352       __ br(Assembler::notEqual, false, Assembler::pt, loop);
3353       __ delayed()->subcc(Roffset, wordSize, Roffset);
3354     }
3355     __ ba_short(initialize_header);


< prev index next >