4317
4318 (addr >> RS) * STRIDE + (new_val >> RS) + [MATRIX_BASE - (HEAP_BASE >> RS) * (STRIDE + 1)]
4319
4320 Notice that first two parts can be computed out-of-order, and only then merged with addition,
4321 which helps scheduling. If STRIDE is a power of two, then addr computation can be folded with
4322 region size shift. The third constant can be folded at compile time.
4323
4324 As long as STRIDE is less than 2^RS, we never overflow. As long as HEAP_BASE is aligned to
4325 region size, we are safe with doing RS shifts. Guarantee both:
4326 */
4327 HeapWord* heap_base = ShenandoahHeap::heap()->first_region_bottom();
4328 intx stride = matrix->stride();
4329 jint rs = ShenandoahHeapRegion::RegionSizeShift;
4330
4331 guarantee(stride < (intx)ShenandoahHeapRegion::RegionSizeBytes, "sanity");
4332 guarantee(is_ptr_aligned(heap_base, ShenandoahHeapRegion::RegionSizeBytes), "sanity");
4333
4334 Node* magic_con = MakeConX((jlong) matrix->matrix_addr() - ((jlong) heap_base >> rs) * (stride + 1));
4335
4336 // Compute addr part
4337 Node* adr_idx = _gvn.transform(new CastP2XNode(control(), adr));
4338 if (is_power_of_2(stride)) {
4339 // TODO: Apparently, C2 cannot perform peephole opt like this.
4340 adr_idx = _gvn.transform(new URShiftXNode(adr_idx, intcon(rs - log2_intptr(stride))));
4341 } else {
4342 adr_idx = _gvn.transform(new URShiftXNode(adr_idx, intcon(rs)));
4343 adr_idx = _gvn.transform(new MulXNode(adr_idx, MakeConX(stride)));
4344 }
4345
4346 // Compute new_val part
4347 Node* val_idx = _gvn.transform(new CastP2XNode(control(), not_null_val));
4348 val_idx = _gvn.transform(new URShiftXNode(val_idx, intcon(rs)));
4349
4350 // Add everything up
4351 adr_idx = _gvn.transform(new AddXNode(adr_idx, val_idx));
4352 adr_idx = _gvn.transform(new CastX2PNode(adr_idx));
4353 Node* matrix_adr = _gvn.transform(new AddPNode(top(), adr_idx, magic_con));
4354
4355 // Load current value
4356 const TypePtr* adr_type = TypeRawPtr::BOTTOM;
4357 Node* current = _gvn.transform(LoadNode::make(_gvn, control(), memory(Compile::AliasIdxRaw),
4358 matrix_adr, adr_type, TypeInt::INT, T_BYTE, MemNode::unordered));
4359
4360 // Check if already set
4361 Node* cmp_set = _gvn.transform(new CmpINode(current, intcon(0)));
4362 Node* cmp_set_bool = _gvn.transform(new BoolNode(cmp_set, BoolTest::eq));
4363 IfNode* cmp_iff = create_and_map_if(control(), cmp_set_bool, PROB_FAIR, COUNT_UNKNOWN);
4364
|
4317
4318 (addr >> RS) * STRIDE + (new_val >> RS) + [MATRIX_BASE - (HEAP_BASE >> RS) * (STRIDE + 1)]
4319
4320 Notice that first two parts can be computed out-of-order, and only then merged with addition,
4321 which helps scheduling. If STRIDE is a power of two, then addr computation can be folded with
4322 region size shift. The third constant can be folded at compile time.
4323
4324 As long as STRIDE is less than 2^RS, we never overflow. As long as HEAP_BASE is aligned to
4325 region size, we are safe with doing RS shifts. Guarantee both:
4326 */
4327 HeapWord* heap_base = ShenandoahHeap::heap()->first_region_bottom();
4328 intx stride = matrix->stride();
4329 jint rs = ShenandoahHeapRegion::RegionSizeShift;
4330
4331 guarantee(stride < (intx)ShenandoahHeapRegion::RegionSizeBytes, "sanity");
4332 guarantee(is_ptr_aligned(heap_base, ShenandoahHeapRegion::RegionSizeBytes), "sanity");
4333
4334 Node* magic_con = MakeConX((jlong) matrix->matrix_addr() - ((jlong) heap_base >> rs) * (stride + 1));
4335
4336 // Compute addr part
4337 // TODO: Might be worthwhile to change this to shift + mask
4338 Node* adr_idx = _gvn.transform(new CastP2XNode(control(), adr));
4339 adr_idx = _gvn.transform(new URShiftXNode(adr_idx, intcon(rs)));
4340 adr_idx = _gvn.transform(new MulXNode(adr_idx, MakeConX(stride)));
4341
4342 // Compute new_val part
4343 Node* val_idx = _gvn.transform(new CastP2XNode(control(), not_null_val));
4344 val_idx = _gvn.transform(new URShiftXNode(val_idx, intcon(rs)));
4345
4346 // Add everything up
4347 adr_idx = _gvn.transform(new AddXNode(adr_idx, val_idx));
4348 adr_idx = _gvn.transform(new CastX2PNode(adr_idx));
4349 Node* matrix_adr = _gvn.transform(new AddPNode(top(), adr_idx, magic_con));
4350
4351 // Load current value
4352 const TypePtr* adr_type = TypeRawPtr::BOTTOM;
4353 Node* current = _gvn.transform(LoadNode::make(_gvn, control(), memory(Compile::AliasIdxRaw),
4354 matrix_adr, adr_type, TypeInt::INT, T_BYTE, MemNode::unordered));
4355
4356 // Check if already set
4357 Node* cmp_set = _gvn.transform(new CmpINode(current, intcon(0)));
4358 Node* cmp_set_bool = _gvn.transform(new BoolNode(cmp_set, BoolTest::eq));
4359 IfNode* cmp_iff = create_and_map_if(control(), cmp_set_bool, PROB_FAIR, COUNT_UNKNOWN);
4360
|