3241 __ cmp(G3_scratch, JVM_CONSTANT_Class);
3242 __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3243 __ delayed()->sll(Roffset, LogBytesPerWord, Roffset);
3244 // get InstanceKlass
3245 __ load_resolved_klass_at_offset(Rscratch, Roffset, RinstanceKlass);
3246
3247 // make sure klass is fully initialized:
3248 __ ldub(RinstanceKlass, in_bytes(InstanceKlass::init_state_offset()), G3_scratch);
3249 __ cmp(G3_scratch, InstanceKlass::fully_initialized);
3250 __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3251 __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3252
3253 // get instance_size in InstanceKlass (already aligned)
3254 //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3255
3256 // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
3257 __ btst(Klass::_lh_instance_slow_path_bit, Roffset);
3258 __ br(Assembler::notZero, false, Assembler::pn, slow_case);
3259 __ delayed()->nop();
3260
3261 // allocate the instance
3262 // 1) Try to allocate in the TLAB
3263 // 2) if fail, and the TLAB is not full enough to discard, allocate in the shared Eden
3264 // 3) if the above fails (or is not applicable), go to a slow case
3265 // (creates a new TLAB, etc.)
3266
3267 const bool allow_shared_alloc =
3268 Universe::heap()->supports_inline_contig_alloc();
3269
3270 if(UseTLAB) {
3271 Register RoldTopValue = RallocatedObject;
3272 Register RtlabWasteLimitValue = G3_scratch;
3273 Register RnewTopValue = G1_scratch;
3274 Register RendValue = Rscratch;
3275 Register RfreeValue = RnewTopValue;
3276
3277 // check if we can allocate in the TLAB
3278 __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), RoldTopValue); // sets up RalocatedObject
3279 __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), RendValue);
3280 __ add(RoldTopValue, Roffset, RnewTopValue);
3281
3282 // if there is enough space, we do not CAS and do not clear
3283 __ cmp(RnewTopValue, RendValue);
3284 if(ZeroTLAB) {
3285 // the fields have already been cleared
3286 __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_header);
3287 } else {
3288 // initialize both the header and fields
3289 __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_object);
3290 }
3291 __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
3292
3293 if (allow_shared_alloc) {
3294 // Check if tlab should be discarded (refill_waste_limit >= free)
3295 __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), RtlabWasteLimitValue);
3296 __ sub(RendValue, RoldTopValue, RfreeValue);
3297 __ srlx(RfreeValue, LogHeapWordSize, RfreeValue);
3298 __ cmp_and_brx_short(RtlabWasteLimitValue, RfreeValue, Assembler::greaterEqualUnsigned, Assembler::pt, slow_case); // tlab waste is small
3299
3300 // increment waste limit to prevent getting stuck on this slow path
3301 if (Assembler::is_simm13(ThreadLocalAllocBuffer::refill_waste_limit_increment())) {
3302 __ add(RtlabWasteLimitValue, ThreadLocalAllocBuffer::refill_waste_limit_increment(), RtlabWasteLimitValue);
3303 } else {
3304 // set64 does not use the temp register if the given constant is 32 bit. So
3305 // we can just use any register; using G0 results in ignoring of the upper 32 bit
3306 // of that value.
3307 __ set64(ThreadLocalAllocBuffer::refill_waste_limit_increment(), G4_scratch, G0);
3308 __ add(RtlabWasteLimitValue, G4_scratch, RtlabWasteLimitValue);
3309 }
3310 __ st_ptr(RtlabWasteLimitValue, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
3311 } else {
3312 // No allocation in the shared eden.
3313 __ ba_short(slow_case);
3314 }
3315 }
3316
3317 // Allocation in the shared Eden
3318 if (allow_shared_alloc) {
3319 Register RoldTopValue = G1_scratch;
3320 Register RtopAddr = G3_scratch;
3321 Register RnewTopValue = RallocatedObject;
3322 Register RendValue = Rscratch;
3323
3324 __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr);
3325
3326 Label retry;
3327 __ bind(retry);
3328 __ set((intptr_t)Universe::heap()->end_addr(), RendValue);
3329 __ ld_ptr(RendValue, 0, RendValue);
3330 __ ld_ptr(RtopAddr, 0, RoldTopValue);
3331 __ add(RoldTopValue, Roffset, RnewTopValue);
3332
3333 // RnewTopValue contains the top address after the new object
3334 // has been allocated.
3335 __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
3336
3337 __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue);
3338
3339 // if someone beat us on the allocation, try again, otherwise continue
3340 __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
3341
3342 // bump total bytes allocated by this thread
3343 // RoldTopValue and RtopAddr are dead, so can use G1 and G3
3344 __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch);
3345 }
3346
3347 if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
3348 // clear object fields
3349 __ bind(initialize_object);
3350 __ deccc(Roffset, sizeof(oopDesc));
3351 __ br(Assembler::zero, false, Assembler::pt, initialize_header);
3352 __ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
3353
3354 // initialize remaining object fields
3355 if (UseBlockZeroing) {
3356 // Use BIS for zeroing
3357 __ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header);
3358 } else {
3359 Label loop;
3360 __ subcc(Roffset, wordSize, Roffset);
3361 __ bind(loop);
3362 //__ subcc(Roffset, wordSize, Roffset); // executed above loop or in delay slot
3363 __ st_ptr(G0, G3_scratch, Roffset);
3364 __ br(Assembler::notEqual, false, Assembler::pt, loop);
3365 __ delayed()->subcc(Roffset, wordSize, Roffset);
3366 }
3367 __ ba_short(initialize_header);
|
3241 __ cmp(G3_scratch, JVM_CONSTANT_Class);
3242 __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3243 __ delayed()->sll(Roffset, LogBytesPerWord, Roffset);
3244 // get InstanceKlass
3245 __ load_resolved_klass_at_offset(Rscratch, Roffset, RinstanceKlass);
3246
3247 // make sure klass is fully initialized:
3248 __ ldub(RinstanceKlass, in_bytes(InstanceKlass::init_state_offset()), G3_scratch);
3249 __ cmp(G3_scratch, InstanceKlass::fully_initialized);
3250 __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3251 __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3252
3253 // get instance_size in InstanceKlass (already aligned)
3254 //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3255
3256 // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
3257 __ btst(Klass::_lh_instance_slow_path_bit, Roffset);
3258 __ br(Assembler::notZero, false, Assembler::pn, slow_case);
3259 __ delayed()->nop();
3260
3261 // Allocate the instance:
3262 // 1) If TLAB is enabled:
3263 // a) Try to allocate in the TLAB
3264 // b) If fails, go to the slow path.
3265 // 2) If inline contiguous allocations are enabled:
3266 // a) Try to allocate in eden
3267 // b) If fails due to heap end, go to slow path.
3268 // 3) If TLAB is enabled OR inline contiguous is enabled:
3269 // a) Initialize the allocation
3270 // b) Exit.
3271 // 4) If neither 1 OR 2 are applicable, go to slow path.
3272
3273 const bool allow_shared_alloc =
3274 Universe::heap()->supports_inline_contig_alloc();
3275
3276 if(UseTLAB) {
3277 Register RoldTopValue = RallocatedObject;
3278 Register RtlabWasteLimitValue = G3_scratch;
3279 Register RnewTopValue = G1_scratch;
3280 Register RendValue = Rscratch;
3281 Register RfreeValue = RnewTopValue;
3282
3283 // check if we can allocate in the TLAB
3284 __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), RoldTopValue); // sets up RalocatedObject
3285 __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), RendValue);
3286 __ add(RoldTopValue, Roffset, RnewTopValue);
3287
3288 // if there is enough space, we do not CAS and do not clear
3289 __ cmp(RnewTopValue, RendValue);
3290 if(ZeroTLAB) {
3291 // the fields have already been cleared
3292 __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_header);
3293 } else {
3294 // initialize both the header and fields
3295 __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_object);
3296 }
3297 __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
3298
3299 // Allocation does not fit in the TLAB.
3300 __ ba_short(slow_case);
3301 } else {
3302 // Allocation in the shared Eden
3303 if (allow_shared_alloc) {
3304 Register RoldTopValue = G1_scratch;
3305 Register RtopAddr = G3_scratch;
3306 Register RnewTopValue = RallocatedObject;
3307 Register RendValue = Rscratch;
3308
3309 __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr);
3310
3311 Label retry;
3312 __ bind(retry);
3313 __ set((intptr_t)Universe::heap()->end_addr(), RendValue);
3314 __ ld_ptr(RendValue, 0, RendValue);
3315 __ ld_ptr(RtopAddr, 0, RoldTopValue);
3316 __ add(RoldTopValue, Roffset, RnewTopValue);
3317
3318 // RnewTopValue contains the top address after the new object
3319 // has been allocated.
3320 __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
3321
3322 __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue);
3323
3324 // if someone beat us on the allocation, try again, otherwise continue
3325 __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
3326
3327 // bump total bytes allocated by this thread
3328 // RoldTopValue and RtopAddr are dead, so can use G1 and G3
3329 __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch);
3330 }
3331 }
3332
3333 // If UseTLAB or allow_shared_alloc are true, the object is created above and
3334 // there is an initialize need. Otherwise, skip and go to the slow path.
3335 if (UseTLAB || allow_shared_alloc) {
3336 // clear object fields
3337 __ bind(initialize_object);
3338 __ deccc(Roffset, sizeof(oopDesc));
3339 __ br(Assembler::zero, false, Assembler::pt, initialize_header);
3340 __ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
3341
3342 // initialize remaining object fields
3343 if (UseBlockZeroing) {
3344 // Use BIS for zeroing
3345 __ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header);
3346 } else {
3347 Label loop;
3348 __ subcc(Roffset, wordSize, Roffset);
3349 __ bind(loop);
3350 //__ subcc(Roffset, wordSize, Roffset); // executed above loop or in delay slot
3351 __ st_ptr(G0, G3_scratch, Roffset);
3352 __ br(Assembler::notEqual, false, Assembler::pt, loop);
3353 __ delayed()->subcc(Roffset, wordSize, Roffset);
3354 }
3355 __ ba_short(initialize_header);
|