3542 // rawoop = alloc.RawAddress
3543 // tile12 = 0x00010002
3544 // rawstore12 = (StoreI alloc.Control alloc.Memory (+ rawoop 12) tile12)
3545 // init = (Initialize alloc.Control alloc.Memory rawoop rawstore12)
3546 //
3547 void
3548 InitializeNode::coalesce_subword_stores(intptr_t header_size,
3549 Node* size_in_bytes,
3550 PhaseGVN* phase) {
3551 Compile* C = phase->C;
3552
3553 assert(stores_are_sane(phase), "");
3554 // Note: After this pass, they are not completely sane,
3555 // since there may be some overlaps.
3556
3557 int old_subword = 0, old_long = 0, new_int = 0, new_long = 0;
3558
3559 intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize);
3560 intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, ti_limit);
3561 size_limit = MIN2(size_limit, ti_limit);
3562 size_limit = align_size_up(size_limit, BytesPerLong);
3563 int num_tiles = size_limit / BytesPerLong;
3564
3565 // allocate space for the tile map:
3566 const int small_len = DEBUG_ONLY(true ? 3 :) 30; // keep stack frames small
3567 jlong tiles_buf[small_len];
3568 Node* nodes_buf[small_len];
3569 jlong inits_buf[small_len];
3570 jlong* tiles = ((num_tiles <= small_len) ? &tiles_buf[0]
3571 : NEW_RESOURCE_ARRAY(jlong, num_tiles));
3572 Node** nodes = ((num_tiles <= small_len) ? &nodes_buf[0]
3573 : NEW_RESOURCE_ARRAY(Node*, num_tiles));
3574 jlong* inits = ((num_tiles <= small_len) ? &inits_buf[0]
3575 : NEW_RESOURCE_ARRAY(jlong, num_tiles));
3576 // tiles: exact bitwise model of all primitive constants
3577 // nodes: last constant-storing node subsumed into the tiles model
3578 // inits: which bytes (in each tile) are touched by any initializations
3579
3580 //// Pass A: Fill in the tile model with any relevant stores.
3581
3582 Copy::zero_to_bytes(tiles, sizeof(tiles[0]) * num_tiles);
3773 // are no following full-word stores, or if something is fishy, return
3774 // a negative value.
3775 intptr_t InitializeNode::find_next_fullword_store(uint start, PhaseGVN* phase) {
3776 int int_map = 0;
3777 intptr_t int_map_off = 0;
3778 const int FULL_MAP = right_n_bits(BytesPerInt); // the int_map we hope for
3779
3780 for (uint i = start, limit = req(); i < limit; i++) {
3781 Node* st = in(i);
3782
3783 intptr_t st_off = get_store_offset(st, phase);
3784 if (st_off < 0) break; // return conservative answer
3785
3786 int st_size = st->as_Store()->memory_size();
3787 if (st_size >= BytesPerInt && (st_off % BytesPerInt) == 0) {
3788 return st_off; // we found a complete word init
3789 }
3790
3791 // update the map:
3792
3793 intptr_t this_int_off = align_size_down(st_off, BytesPerInt);
3794 if (this_int_off != int_map_off) {
3795 // reset the map:
3796 int_map = 0;
3797 int_map_off = this_int_off;
3798 }
3799
3800 int subword_off = st_off - this_int_off;
3801 int_map |= right_n_bits(st_size) << subword_off;
3802 if ((int_map & FULL_MAP) == FULL_MAP) {
3803 return this_int_off; // we found a complete word init
3804 }
3805
3806 // Did this store hit or cross the word boundary?
3807 intptr_t next_int_off = align_size_down(st_off + st_size, BytesPerInt);
3808 if (next_int_off == this_int_off + BytesPerInt) {
3809 // We passed the current int, without fully initializing it.
3810 int_map_off = next_int_off;
3811 int_map >>= BytesPerInt;
3812 } else if (next_int_off > this_int_off + BytesPerInt) {
3813 // We passed the current and next int.
3814 return this_int_off + BytesPerInt;
3815 }
3816 }
3817
3818 return -1;
3819 }
3820
3821
3822 // Called when the associated AllocateNode is expanded into CFG.
3823 // At this point, we may perform additional optimizations.
3824 // Linearize the stores by ascending offset, to make memory
3825 // activity as coherent as possible.
3826 Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
3827 intptr_t header_size,
3877 // byte[] a = { p,q,r,s } => a[0]=p,a[1]=q,a[2]=r,a[3]=s
3878 // byte[] a = { x,y,0,0 } => a[0..3] = 0, a[0]=x,a[1]=y
3879 // byte[] a = { 0,0,z,0 } => a[0..3] = 0, a[2]=z
3880 //
3881 // Note: coalesce_subword_stores may have already done this,
3882 // if it was prompted by constant non-zero subword initializers.
3883 // But this case can still arise with non-constant stores.
3884
3885 intptr_t next_full_store = find_next_fullword_store(i, phase);
3886
3887 // In the examples above:
3888 // in(i) p q r s x y z
3889 // st_off 12 13 14 15 12 13 14
3890 // st_size 1 1 1 1 1 1 1
3891 // next_full_s. 12 16 16 16 16 16 16
3892 // z's_done 12 16 16 16 12 16 12
3893 // z's_needed 12 16 16 16 16 16 16
3894 // zsize 0 0 0 0 4 0 4
3895 if (next_full_store < 0) {
3896 // Conservative tack: Zero to end of current word.
3897 zeroes_needed = align_size_up(zeroes_needed, BytesPerInt);
3898 } else {
3899 // Zero to beginning of next fully initialized word.
3900 // Or, don't zero at all, if we are already in that word.
3901 assert(next_full_store >= zeroes_needed, "must go forward");
3902 assert((next_full_store & (BytesPerInt-1)) == 0, "even boundary");
3903 zeroes_needed = next_full_store;
3904 }
3905 }
3906
3907 if (zeroes_needed > zeroes_done) {
3908 intptr_t zsize = zeroes_needed - zeroes_done;
3909 // Do some incremental zeroing on rawmem, in parallel with inits.
3910 zeroes_done = align_size_down(zeroes_done, BytesPerInt);
3911 rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
3912 zeroes_done, zeroes_needed,
3913 phase);
3914 zeroes_done = zeroes_needed;
3915 if (zsize > InitArrayShortSize && ++big_init_gaps > 2)
3916 do_zeroing = false; // leave the hole, next time
3917 }
3918 }
3919
3920 // Collect the store and move on:
3921 st->set_req(MemNode::Memory, inits);
3922 inits = st; // put it on the linearized chain
3923 set_req(i, zmem); // unhook from previous position
3924
3925 if (zeroes_done == st_off)
3926 zeroes_done = next_init_off;
3927
3928 assert(!do_zeroing || zeroes_done >= next_init_off, "don't miss any");
3929
3930 #ifdef ASSERT
3931 // Various order invariants. Weaker than stores_are_sane because
3932 // a large constant tile can be filled in by smaller non-constant stores.
3933 assert(st_off >= last_init_off, "inits do not reverse");
3934 last_init_off = st_off;
3935 const Type* val = NULL;
3936 if (st_size >= BytesPerInt &&
3937 (val = phase->type(st->in(MemNode::ValueIn)))->singleton() &&
3938 (int)val->basic_type() < (int)T_OBJECT) {
3939 assert(st_off >= last_tile_end, "tiles do not overlap");
3940 assert(st_off >= last_init_end, "tiles do not overwrite inits");
3941 last_tile_end = MAX2(last_tile_end, next_init_off);
3942 } else {
3943 intptr_t st_tile_end = align_size_up(next_init_off, BytesPerLong);
3944 assert(st_tile_end >= last_tile_end, "inits stay with tiles");
3945 assert(st_off >= last_init_end, "inits do not overlap");
3946 last_init_end = next_init_off; // it's a non-tile
3947 }
3948 #endif //ASSERT
3949 }
3950
3951 remove_extra_zeroes(); // clear out all the zmems left over
3952 add_req(inits);
3953
3954 if (!(UseTLAB && ZeroTLAB)) {
3955 // If anything remains to be zeroed, zero it all now.
3956 zeroes_done = align_size_down(zeroes_done, BytesPerInt);
3957 // if it is the last unused 4 bytes of an instance, forget about it
3958 intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, max_jint);
3959 if (zeroes_done + BytesPerLong >= size_limit) {
3960 AllocateNode* alloc = allocation();
3961 assert(alloc != NULL, "must be present");
3962 if (alloc != NULL && alloc->Opcode() == Op_Allocate) {
3963 Node* klass_node = alloc->in(AllocateNode::KlassNode);
3964 ciKlass* k = phase->type(klass_node)->is_klassptr()->klass();
3965 if (zeroes_done == k->layout_helper())
3966 zeroes_done = size_limit;
3967 }
3968 }
3969 if (zeroes_done < size_limit) {
3970 rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
3971 zeroes_done, size_in_bytes, phase);
3972 }
3973 }
3974
3975 set_complete(phase);
3976 return rawmem;
|
3542 // rawoop = alloc.RawAddress
3543 // tile12 = 0x00010002
3544 // rawstore12 = (StoreI alloc.Control alloc.Memory (+ rawoop 12) tile12)
3545 // init = (Initialize alloc.Control alloc.Memory rawoop rawstore12)
3546 //
3547 void
3548 InitializeNode::coalesce_subword_stores(intptr_t header_size,
3549 Node* size_in_bytes,
3550 PhaseGVN* phase) {
3551 Compile* C = phase->C;
3552
3553 assert(stores_are_sane(phase), "");
3554 // Note: After this pass, they are not completely sane,
3555 // since there may be some overlaps.
3556
3557 int old_subword = 0, old_long = 0, new_int = 0, new_long = 0;
3558
3559 intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize);
3560 intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, ti_limit);
3561 size_limit = MIN2(size_limit, ti_limit);
3562 size_limit = align_up(size_limit, BytesPerLong);
3563 int num_tiles = size_limit / BytesPerLong;
3564
3565 // allocate space for the tile map:
3566 const int small_len = DEBUG_ONLY(true ? 3 :) 30; // keep stack frames small
3567 jlong tiles_buf[small_len];
3568 Node* nodes_buf[small_len];
3569 jlong inits_buf[small_len];
3570 jlong* tiles = ((num_tiles <= small_len) ? &tiles_buf[0]
3571 : NEW_RESOURCE_ARRAY(jlong, num_tiles));
3572 Node** nodes = ((num_tiles <= small_len) ? &nodes_buf[0]
3573 : NEW_RESOURCE_ARRAY(Node*, num_tiles));
3574 jlong* inits = ((num_tiles <= small_len) ? &inits_buf[0]
3575 : NEW_RESOURCE_ARRAY(jlong, num_tiles));
3576 // tiles: exact bitwise model of all primitive constants
3577 // nodes: last constant-storing node subsumed into the tiles model
3578 // inits: which bytes (in each tile) are touched by any initializations
3579
3580 //// Pass A: Fill in the tile model with any relevant stores.
3581
3582 Copy::zero_to_bytes(tiles, sizeof(tiles[0]) * num_tiles);
3773 // are no following full-word stores, or if something is fishy, return
3774 // a negative value.
3775 intptr_t InitializeNode::find_next_fullword_store(uint start, PhaseGVN* phase) {
3776 int int_map = 0;
3777 intptr_t int_map_off = 0;
3778 const int FULL_MAP = right_n_bits(BytesPerInt); // the int_map we hope for
3779
3780 for (uint i = start, limit = req(); i < limit; i++) {
3781 Node* st = in(i);
3782
3783 intptr_t st_off = get_store_offset(st, phase);
3784 if (st_off < 0) break; // return conservative answer
3785
3786 int st_size = st->as_Store()->memory_size();
3787 if (st_size >= BytesPerInt && (st_off % BytesPerInt) == 0) {
3788 return st_off; // we found a complete word init
3789 }
3790
3791 // update the map:
3792
3793 intptr_t this_int_off = align_down(st_off, BytesPerInt);
3794 if (this_int_off != int_map_off) {
3795 // reset the map:
3796 int_map = 0;
3797 int_map_off = this_int_off;
3798 }
3799
3800 int subword_off = st_off - this_int_off;
3801 int_map |= right_n_bits(st_size) << subword_off;
3802 if ((int_map & FULL_MAP) == FULL_MAP) {
3803 return this_int_off; // we found a complete word init
3804 }
3805
3806 // Did this store hit or cross the word boundary?
3807 intptr_t next_int_off = align_down(st_off + st_size, BytesPerInt);
3808 if (next_int_off == this_int_off + BytesPerInt) {
3809 // We passed the current int, without fully initializing it.
3810 int_map_off = next_int_off;
3811 int_map >>= BytesPerInt;
3812 } else if (next_int_off > this_int_off + BytesPerInt) {
3813 // We passed the current and next int.
3814 return this_int_off + BytesPerInt;
3815 }
3816 }
3817
3818 return -1;
3819 }
3820
3821
3822 // Called when the associated AllocateNode is expanded into CFG.
3823 // At this point, we may perform additional optimizations.
3824 // Linearize the stores by ascending offset, to make memory
3825 // activity as coherent as possible.
3826 Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
3827 intptr_t header_size,
3877 // byte[] a = { p,q,r,s } => a[0]=p,a[1]=q,a[2]=r,a[3]=s
3878 // byte[] a = { x,y,0,0 } => a[0..3] = 0, a[0]=x,a[1]=y
3879 // byte[] a = { 0,0,z,0 } => a[0..3] = 0, a[2]=z
3880 //
3881 // Note: coalesce_subword_stores may have already done this,
3882 // if it was prompted by constant non-zero subword initializers.
3883 // But this case can still arise with non-constant stores.
3884
3885 intptr_t next_full_store = find_next_fullword_store(i, phase);
3886
3887 // In the examples above:
3888 // in(i) p q r s x y z
3889 // st_off 12 13 14 15 12 13 14
3890 // st_size 1 1 1 1 1 1 1
3891 // next_full_s. 12 16 16 16 16 16 16
3892 // z's_done 12 16 16 16 12 16 12
3893 // z's_needed 12 16 16 16 16 16 16
3894 // zsize 0 0 0 0 4 0 4
3895 if (next_full_store < 0) {
3896 // Conservative tack: Zero to end of current word.
3897 zeroes_needed = align_up(zeroes_needed, BytesPerInt);
3898 } else {
3899 // Zero to beginning of next fully initialized word.
3900 // Or, don't zero at all, if we are already in that word.
3901 assert(next_full_store >= zeroes_needed, "must go forward");
3902 assert((next_full_store & (BytesPerInt-1)) == 0, "even boundary");
3903 zeroes_needed = next_full_store;
3904 }
3905 }
3906
3907 if (zeroes_needed > zeroes_done) {
3908 intptr_t zsize = zeroes_needed - zeroes_done;
3909 // Do some incremental zeroing on rawmem, in parallel with inits.
3910 zeroes_done = align_down(zeroes_done, BytesPerInt);
3911 rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
3912 zeroes_done, zeroes_needed,
3913 phase);
3914 zeroes_done = zeroes_needed;
3915 if (zsize > InitArrayShortSize && ++big_init_gaps > 2)
3916 do_zeroing = false; // leave the hole, next time
3917 }
3918 }
3919
3920 // Collect the store and move on:
3921 st->set_req(MemNode::Memory, inits);
3922 inits = st; // put it on the linearized chain
3923 set_req(i, zmem); // unhook from previous position
3924
3925 if (zeroes_done == st_off)
3926 zeroes_done = next_init_off;
3927
3928 assert(!do_zeroing || zeroes_done >= next_init_off, "don't miss any");
3929
3930 #ifdef ASSERT
3931 // Various order invariants. Weaker than stores_are_sane because
3932 // a large constant tile can be filled in by smaller non-constant stores.
3933 assert(st_off >= last_init_off, "inits do not reverse");
3934 last_init_off = st_off;
3935 const Type* val = NULL;
3936 if (st_size >= BytesPerInt &&
3937 (val = phase->type(st->in(MemNode::ValueIn)))->singleton() &&
3938 (int)val->basic_type() < (int)T_OBJECT) {
3939 assert(st_off >= last_tile_end, "tiles do not overlap");
3940 assert(st_off >= last_init_end, "tiles do not overwrite inits");
3941 last_tile_end = MAX2(last_tile_end, next_init_off);
3942 } else {
3943 intptr_t st_tile_end = align_up(next_init_off, BytesPerLong);
3944 assert(st_tile_end >= last_tile_end, "inits stay with tiles");
3945 assert(st_off >= last_init_end, "inits do not overlap");
3946 last_init_end = next_init_off; // it's a non-tile
3947 }
3948 #endif //ASSERT
3949 }
3950
3951 remove_extra_zeroes(); // clear out all the zmems left over
3952 add_req(inits);
3953
3954 if (!(UseTLAB && ZeroTLAB)) {
3955 // If anything remains to be zeroed, zero it all now.
3956 zeroes_done = align_down(zeroes_done, BytesPerInt);
3957 // if it is the last unused 4 bytes of an instance, forget about it
3958 intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, max_jint);
3959 if (zeroes_done + BytesPerLong >= size_limit) {
3960 AllocateNode* alloc = allocation();
3961 assert(alloc != NULL, "must be present");
3962 if (alloc != NULL && alloc->Opcode() == Op_Allocate) {
3963 Node* klass_node = alloc->in(AllocateNode::KlassNode);
3964 ciKlass* k = phase->type(klass_node)->is_klassptr()->klass();
3965 if (zeroes_done == k->layout_helper())
3966 zeroes_done = size_limit;
3967 }
3968 }
3969 if (zeroes_done < size_limit) {
3970 rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
3971 zeroes_done, size_in_bytes, phase);
3972 }
3973 }
3974
3975 set_complete(phase);
3976 return rawmem;
|