3241 __ cmp(G3_scratch, JVM_CONSTANT_Class);
3242 __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3243 __ delayed()->sll(Roffset, LogBytesPerWord, Roffset);
3244 // get InstanceKlass
3245 __ load_resolved_klass_at_offset(Rscratch, Roffset, RinstanceKlass);
3246
3247 // make sure klass is fully initialized:
3248 __ ldub(RinstanceKlass, in_bytes(InstanceKlass::init_state_offset()), G3_scratch);
3249 __ cmp(G3_scratch, InstanceKlass::fully_initialized);
3250 __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3251 __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3252
3253 // get instance_size in InstanceKlass (already aligned)
3254 //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3255
3256 // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
3257 __ btst(Klass::_lh_instance_slow_path_bit, Roffset);
3258 __ br(Assembler::notZero, false, Assembler::pn, slow_case);
3259 __ delayed()->nop();
3260
3261 // allocate the instance
3262 // 1) Try to allocate in the TLAB
3263 // 2) if fail, and the TLAB is not full enough to discard, allocate in the shared Eden
3264 // 3) if the above fails (or is not applicable), go to a slow case
3265 // (creates a new TLAB, etc.)
3266
3267 const bool allow_shared_alloc =
3268 Universe::heap()->supports_inline_contig_alloc();
3269
3270 if(UseTLAB) {
3271 Register RoldTopValue = RallocatedObject;
3272 Register RtlabWasteLimitValue = G3_scratch;
3273 Register RnewTopValue = G1_scratch;
3274 Register RendValue = Rscratch;
3275 Register RfreeValue = RnewTopValue;
3276
3277 // check if we can allocate in the TLAB
3278 __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), RoldTopValue); // sets up RalocatedObject
3279 __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), RendValue);
3280 __ add(RoldTopValue, Roffset, RnewTopValue);
3281
3282 // if there is enough space, we do not CAS and do not clear
3283 __ cmp(RnewTopValue, RendValue);
3284 if(ZeroTLAB) {
3285 // the fields have already been cleared
3286 __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_header);
3287 } else {
3288 // initialize both the header and fields
3289 __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_object);
3290 }
3291 __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
3292
3293 if (allow_shared_alloc) {
3294 // Check if tlab should be discarded (refill_waste_limit >= free)
3295 __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), RtlabWasteLimitValue);
3296 __ sub(RendValue, RoldTopValue, RfreeValue);
3297 __ srlx(RfreeValue, LogHeapWordSize, RfreeValue);
3298 __ cmp_and_brx_short(RtlabWasteLimitValue, RfreeValue, Assembler::greaterEqualUnsigned, Assembler::pt, slow_case); // tlab waste is small
3299
3300 // increment waste limit to prevent getting stuck on this slow path
3301 if (Assembler::is_simm13(ThreadLocalAllocBuffer::refill_waste_limit_increment())) {
3302 __ add(RtlabWasteLimitValue, ThreadLocalAllocBuffer::refill_waste_limit_increment(), RtlabWasteLimitValue);
3303 } else {
3304 // set64 does not use the temp register if the given constant is 32 bit. So
3305 // we can just use any register; using G0 results in ignoring of the upper 32 bit
3306 // of that value.
3307 __ set64(ThreadLocalAllocBuffer::refill_waste_limit_increment(), G4_scratch, G0);
3308 __ add(RtlabWasteLimitValue, G4_scratch, RtlabWasteLimitValue);
3309 }
3310 __ st_ptr(RtlabWasteLimitValue, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
3311 } else {
3312 // No allocation in the shared eden.
3313 __ ba_short(slow_case);
3314 }
3315 }
3316
3317 // Allocation in the shared Eden
3318 if (allow_shared_alloc) {
3319 Register RoldTopValue = G1_scratch;
3320 Register RtopAddr = G3_scratch;
3321 Register RnewTopValue = RallocatedObject;
3322 Register RendValue = Rscratch;
3323
3324 __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr);
3325
3326 Label retry;
3327 __ bind(retry);
3328 __ set((intptr_t)Universe::heap()->end_addr(), RendValue);
3329 __ ld_ptr(RendValue, 0, RendValue);
3330 __ ld_ptr(RtopAddr, 0, RoldTopValue);
3331 __ add(RoldTopValue, Roffset, RnewTopValue);
3332
3333 // RnewTopValue contains the top address after the new object
3334 // has been allocated.
3335 __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
3336
3337 __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue);
3338
3339 // if someone beat us on the allocation, try again, otherwise continue
3340 __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
3341
3342 // bump total bytes allocated by this thread
3343 // RoldTopValue and RtopAddr are dead, so can use G1 and G3
3344 __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch);
3345 }
3346
3347 if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
3348 // clear object fields
3349 __ bind(initialize_object);
3350 __ deccc(Roffset, sizeof(oopDesc));
3351 __ br(Assembler::zero, false, Assembler::pt, initialize_header);
3352 __ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
3353
3354 // initialize remaining object fields
3355 if (UseBlockZeroing) {
3356 // Use BIS for zeroing
3357 __ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header);
3358 } else {
3359 Label loop;
3360 __ subcc(Roffset, wordSize, Roffset);
3361 __ bind(loop);
3362 //__ subcc(Roffset, wordSize, Roffset); // executed above loop or in delay slot
3363 __ st_ptr(G0, G3_scratch, Roffset);
3364 __ br(Assembler::notEqual, false, Assembler::pt, loop);
3365 __ delayed()->subcc(Roffset, wordSize, Roffset);
3366 }
3367 __ ba_short(initialize_header);
|
3241 __ cmp(G3_scratch, JVM_CONSTANT_Class);
3242 __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3243 __ delayed()->sll(Roffset, LogBytesPerWord, Roffset);
3244 // get InstanceKlass
3245 __ load_resolved_klass_at_offset(Rscratch, Roffset, RinstanceKlass);
3246
3247 // make sure klass is fully initialized:
3248 __ ldub(RinstanceKlass, in_bytes(InstanceKlass::init_state_offset()), G3_scratch);
3249 __ cmp(G3_scratch, InstanceKlass::fully_initialized);
3250 __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
3251 __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3252
3253 // get instance_size in InstanceKlass (already aligned)
3254 //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
3255
3256 // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
3257 __ btst(Klass::_lh_instance_slow_path_bit, Roffset);
3258 __ br(Assembler::notZero, false, Assembler::pn, slow_case);
3259 __ delayed()->nop();
3260
3261 // Allocate the instance:
3262 // If TLAB is enabled:
3263 // Try to allocate in the TLAB.
3264 // If fails, go to the slow path.
3265 // Else If inline contiguous allocations are enabled:
3266 // Try to allocate in eden.
3267 // If fails due to heap end, go to slow path.
3268 //
3269 // If TLAB is enabled OR inline contiguous is enabled:
3270 // Initialize the allocation.
3271 // Exit.
3272 //
3273 // Go to slow path.
3274
3275 const bool allow_shared_alloc =
3276 Universe::heap()->supports_inline_contig_alloc();
3277
3278 if(UseTLAB) {
3279 Register RoldTopValue = RallocatedObject;
3280 Register RtlabWasteLimitValue = G3_scratch;
3281 Register RnewTopValue = G1_scratch;
3282 Register RendValue = Rscratch;
3283 Register RfreeValue = RnewTopValue;
3284
3285 // check if we can allocate in the TLAB
3286 __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), RoldTopValue); // sets up RalocatedObject
3287 __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), RendValue);
3288 __ add(RoldTopValue, Roffset, RnewTopValue);
3289
3290 // if there is enough space, we do not CAS and do not clear
3291 __ cmp(RnewTopValue, RendValue);
3292 if(ZeroTLAB) {
3293 // the fields have already been cleared
3294 __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_header);
3295 } else {
3296 // initialize both the header and fields
3297 __ brx(Assembler::lessEqualUnsigned, true, Assembler::pt, initialize_object);
3298 }
3299 __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
3300
3301 // Allocation does not fit in the TLAB.
3302 __ ba_short(slow_case);
3303 } else {
3304 // Allocation in the shared Eden
3305 if (allow_shared_alloc) {
3306 Register RoldTopValue = G1_scratch;
3307 Register RtopAddr = G3_scratch;
3308 Register RnewTopValue = RallocatedObject;
3309 Register RendValue = Rscratch;
3310
3311 __ set((intptr_t)Universe::heap()->top_addr(), RtopAddr);
3312
3313 Label retry;
3314 __ bind(retry);
3315 __ set((intptr_t)Universe::heap()->end_addr(), RendValue);
3316 __ ld_ptr(RendValue, 0, RendValue);
3317 __ ld_ptr(RtopAddr, 0, RoldTopValue);
3318 __ add(RoldTopValue, Roffset, RnewTopValue);
3319
3320 // RnewTopValue contains the top address after the new object
3321 // has been allocated.
3322 __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
3323
3324 __ cas_ptr(RtopAddr, RoldTopValue, RnewTopValue);
3325
3326 // if someone beat us on the allocation, try again, otherwise continue
3327 __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
3328
3329 // bump total bytes allocated by this thread
3330 // RoldTopValue and RtopAddr are dead, so can use G1 and G3
3331 __ incr_allocated_bytes(Roffset, G1_scratch, G3_scratch);
3332 }
3333 }
3334
3335 // If UseTLAB or allow_shared_alloc are true, the object is created above and
3336 // there is an initialize need. Otherwise, skip and go to the slow path.
3337 if (UseTLAB || allow_shared_alloc) {
3338 // clear object fields
3339 __ bind(initialize_object);
3340 __ deccc(Roffset, sizeof(oopDesc));
3341 __ br(Assembler::zero, false, Assembler::pt, initialize_header);
3342 __ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
3343
3344 // initialize remaining object fields
3345 if (UseBlockZeroing) {
3346 // Use BIS for zeroing
3347 __ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header);
3348 } else {
3349 Label loop;
3350 __ subcc(Roffset, wordSize, Roffset);
3351 __ bind(loop);
3352 //__ subcc(Roffset, wordSize, Roffset); // executed above loop or in delay slot
3353 __ st_ptr(G0, G3_scratch, Roffset);
3354 __ br(Assembler::notEqual, false, Assembler::pt, loop);
3355 __ delayed()->subcc(Roffset, wordSize, Roffset);
3356 }
3357 __ ba_short(initialize_header);
|