< prev index next >

src/share/vm/opto/memnode.cpp

Print this page




3542 //   rawoop = alloc.RawAddress
3543 //   tile12 = 0x00010002
3544 //   rawstore12 = (StoreI alloc.Control alloc.Memory (+ rawoop 12) tile12)
3545 //   init = (Initialize alloc.Control alloc.Memory rawoop rawstore12)
3546 //
3547 void
3548 InitializeNode::coalesce_subword_stores(intptr_t header_size,
3549                                         Node* size_in_bytes,
3550                                         PhaseGVN* phase) {
3551   Compile* C = phase->C;
3552 
3553   assert(stores_are_sane(phase), "");
3554   // Note:  After this pass, they are not completely sane,
3555   // since there may be some overlaps.
3556 
3557   int old_subword = 0, old_long = 0, new_int = 0, new_long = 0;
3558 
3559   intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize);
3560   intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, ti_limit);
3561   size_limit = MIN2(size_limit, ti_limit);
3562   size_limit = align_size_up(size_limit, BytesPerLong);
3563   int num_tiles = size_limit / BytesPerLong;
3564 
3565   // allocate space for the tile map:
3566   const int small_len = DEBUG_ONLY(true ? 3 :) 30; // keep stack frames small
3567   jlong  tiles_buf[small_len];
3568   Node*  nodes_buf[small_len];
3569   jlong  inits_buf[small_len];
3570   jlong* tiles = ((num_tiles <= small_len) ? &tiles_buf[0]
3571                   : NEW_RESOURCE_ARRAY(jlong, num_tiles));
3572   Node** nodes = ((num_tiles <= small_len) ? &nodes_buf[0]
3573                   : NEW_RESOURCE_ARRAY(Node*, num_tiles));
3574   jlong* inits = ((num_tiles <= small_len) ? &inits_buf[0]
3575                   : NEW_RESOURCE_ARRAY(jlong, num_tiles));
3576   // tiles: exact bitwise model of all primitive constants
3577   // nodes: last constant-storing node subsumed into the tiles model
3578   // inits: which bytes (in each tile) are touched by any initializations
3579 
3580   //// Pass A: Fill in the tile model with any relevant stores.
3581 
3582   Copy::zero_to_bytes(tiles, sizeof(tiles[0]) * num_tiles);


3773 // are no following full-word stores, or if something is fishy, return
3774 // a negative value.
3775 intptr_t InitializeNode::find_next_fullword_store(uint start, PhaseGVN* phase) {
3776   int       int_map = 0;
3777   intptr_t  int_map_off = 0;
3778   const int FULL_MAP = right_n_bits(BytesPerInt);  // the int_map we hope for
3779 
3780   for (uint i = start, limit = req(); i < limit; i++) {
3781     Node* st = in(i);
3782 
3783     intptr_t st_off = get_store_offset(st, phase);
3784     if (st_off < 0)  break;  // return conservative answer
3785 
3786     int st_size = st->as_Store()->memory_size();
3787     if (st_size >= BytesPerInt && (st_off % BytesPerInt) == 0) {
3788       return st_off;            // we found a complete word init
3789     }
3790 
3791     // update the map:
3792 
3793     intptr_t this_int_off = align_size_down(st_off, BytesPerInt);
3794     if (this_int_off != int_map_off) {
3795       // reset the map:
3796       int_map = 0;
3797       int_map_off = this_int_off;
3798     }
3799 
3800     int subword_off = st_off - this_int_off;
3801     int_map |= right_n_bits(st_size) << subword_off;
3802     if ((int_map & FULL_MAP) == FULL_MAP) {
3803       return this_int_off;      // we found a complete word init
3804     }
3805 
3806     // Did this store hit or cross the word boundary?
3807     intptr_t next_int_off = align_size_down(st_off + st_size, BytesPerInt);
3808     if (next_int_off == this_int_off + BytesPerInt) {
3809       // We passed the current int, without fully initializing it.
3810       int_map_off = next_int_off;
3811       int_map >>= BytesPerInt;
3812     } else if (next_int_off > this_int_off + BytesPerInt) {
3813       // We passed the current and next int.
3814       return this_int_off + BytesPerInt;
3815     }
3816   }
3817 
3818   return -1;
3819 }
3820 
3821 
3822 // Called when the associated AllocateNode is expanded into CFG.
3823 // At this point, we may perform additional optimizations.
3824 // Linearize the stores by ascending offset, to make memory
3825 // activity as coherent as possible.
3826 Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
3827                                       intptr_t header_size,


3877         //   byte[] a = { p,q,r,s }  =>  a[0]=p,a[1]=q,a[2]=r,a[3]=s
3878         //   byte[] a = { x,y,0,0 }  =>  a[0..3] = 0, a[0]=x,a[1]=y
3879         //   byte[] a = { 0,0,z,0 }  =>  a[0..3] = 0, a[2]=z
3880         //
3881         // Note:  coalesce_subword_stores may have already done this,
3882         // if it was prompted by constant non-zero subword initializers.
3883         // But this case can still arise with non-constant stores.
3884 
3885         intptr_t next_full_store = find_next_fullword_store(i, phase);
3886 
3887         // In the examples above:
3888         //   in(i)          p   q   r   s     x   y     z
3889         //   st_off        12  13  14  15    12  13    14
3890         //   st_size        1   1   1   1     1   1     1
3891         //   next_full_s.  12  16  16  16    16  16    16
3892         //   z's_done      12  16  16  16    12  16    12
3893         //   z's_needed    12  16  16  16    16  16    16
3894         //   zsize          0   0   0   0     4   0     4
3895         if (next_full_store < 0) {
3896           // Conservative tack:  Zero to end of current word.
3897           zeroes_needed = align_size_up(zeroes_needed, BytesPerInt);
3898         } else {
3899           // Zero to beginning of next fully initialized word.
3900           // Or, don't zero at all, if we are already in that word.
3901           assert(next_full_store >= zeroes_needed, "must go forward");
3902           assert((next_full_store & (BytesPerInt-1)) == 0, "even boundary");
3903           zeroes_needed = next_full_store;
3904         }
3905       }
3906 
3907       if (zeroes_needed > zeroes_done) {
3908         intptr_t zsize = zeroes_needed - zeroes_done;
3909         // Do some incremental zeroing on rawmem, in parallel with inits.
3910         zeroes_done = align_size_down(zeroes_done, BytesPerInt);
3911         rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
3912                                               zeroes_done, zeroes_needed,
3913                                               phase);
3914         zeroes_done = zeroes_needed;
3915         if (zsize > InitArrayShortSize && ++big_init_gaps > 2)
3916           do_zeroing = false;   // leave the hole, next time
3917       }
3918     }
3919 
3920     // Collect the store and move on:
3921     st->set_req(MemNode::Memory, inits);
3922     inits = st;                 // put it on the linearized chain
3923     set_req(i, zmem);           // unhook from previous position
3924 
3925     if (zeroes_done == st_off)
3926       zeroes_done = next_init_off;
3927 
3928     assert(!do_zeroing || zeroes_done >= next_init_off, "don't miss any");
3929 
3930     #ifdef ASSERT
3931     // Various order invariants.  Weaker than stores_are_sane because
3932     // a large constant tile can be filled in by smaller non-constant stores.
3933     assert(st_off >= last_init_off, "inits do not reverse");
3934     last_init_off = st_off;
3935     const Type* val = NULL;
3936     if (st_size >= BytesPerInt &&
3937         (val = phase->type(st->in(MemNode::ValueIn)))->singleton() &&
3938         (int)val->basic_type() < (int)T_OBJECT) {
3939       assert(st_off >= last_tile_end, "tiles do not overlap");
3940       assert(st_off >= last_init_end, "tiles do not overwrite inits");
3941       last_tile_end = MAX2(last_tile_end, next_init_off);
3942     } else {
3943       intptr_t st_tile_end = align_size_up(next_init_off, BytesPerLong);
3944       assert(st_tile_end >= last_tile_end, "inits stay with tiles");
3945       assert(st_off      >= last_init_end, "inits do not overlap");
3946       last_init_end = next_init_off;  // it's a non-tile
3947     }
3948     #endif //ASSERT
3949   }
3950 
3951   remove_extra_zeroes();        // clear out all the zmems left over
3952   add_req(inits);
3953 
3954   if (!(UseTLAB && ZeroTLAB)) {
3955     // If anything remains to be zeroed, zero it all now.
3956     zeroes_done = align_size_down(zeroes_done, BytesPerInt);
3957     // if it is the last unused 4 bytes of an instance, forget about it
3958     intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, max_jint);
3959     if (zeroes_done + BytesPerLong >= size_limit) {
3960       AllocateNode* alloc = allocation();
3961       assert(alloc != NULL, "must be present");
3962       if (alloc != NULL && alloc->Opcode() == Op_Allocate) {
3963         Node* klass_node = alloc->in(AllocateNode::KlassNode);
3964         ciKlass* k = phase->type(klass_node)->is_klassptr()->klass();
3965         if (zeroes_done == k->layout_helper())
3966           zeroes_done = size_limit;
3967       }
3968     }
3969     if (zeroes_done < size_limit) {
3970       rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
3971                                             zeroes_done, size_in_bytes, phase);
3972     }
3973   }
3974 
3975   set_complete(phase);
3976   return rawmem;




3542 //   rawoop = alloc.RawAddress
3543 //   tile12 = 0x00010002
3544 //   rawstore12 = (StoreI alloc.Control alloc.Memory (+ rawoop 12) tile12)
3545 //   init = (Initialize alloc.Control alloc.Memory rawoop rawstore12)
3546 //
3547 void
3548 InitializeNode::coalesce_subword_stores(intptr_t header_size,
3549                                         Node* size_in_bytes,
3550                                         PhaseGVN* phase) {
3551   Compile* C = phase->C;
3552 
3553   assert(stores_are_sane(phase), "");
3554   // Note:  After this pass, they are not completely sane,
3555   // since there may be some overlaps.
3556 
3557   int old_subword = 0, old_long = 0, new_int = 0, new_long = 0;
3558 
3559   intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize);
3560   intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, ti_limit);
3561   size_limit = MIN2(size_limit, ti_limit);
3562   size_limit = align_up(size_limit, BytesPerLong);
3563   int num_tiles = size_limit / BytesPerLong;
3564 
3565   // allocate space for the tile map:
3566   const int small_len = DEBUG_ONLY(true ? 3 :) 30; // keep stack frames small
3567   jlong  tiles_buf[small_len];
3568   Node*  nodes_buf[small_len];
3569   jlong  inits_buf[small_len];
3570   jlong* tiles = ((num_tiles <= small_len) ? &tiles_buf[0]
3571                   : NEW_RESOURCE_ARRAY(jlong, num_tiles));
3572   Node** nodes = ((num_tiles <= small_len) ? &nodes_buf[0]
3573                   : NEW_RESOURCE_ARRAY(Node*, num_tiles));
3574   jlong* inits = ((num_tiles <= small_len) ? &inits_buf[0]
3575                   : NEW_RESOURCE_ARRAY(jlong, num_tiles));
3576   // tiles: exact bitwise model of all primitive constants
3577   // nodes: last constant-storing node subsumed into the tiles model
3578   // inits: which bytes (in each tile) are touched by any initializations
3579 
3580   //// Pass A: Fill in the tile model with any relevant stores.
3581 
3582   Copy::zero_to_bytes(tiles, sizeof(tiles[0]) * num_tiles);


3773 // are no following full-word stores, or if something is fishy, return
3774 // a negative value.
3775 intptr_t InitializeNode::find_next_fullword_store(uint start, PhaseGVN* phase) {
3776   int       int_map = 0;
3777   intptr_t  int_map_off = 0;
3778   const int FULL_MAP = right_n_bits(BytesPerInt);  // the int_map we hope for
3779 
3780   for (uint i = start, limit = req(); i < limit; i++) {
3781     Node* st = in(i);
3782 
3783     intptr_t st_off = get_store_offset(st, phase);
3784     if (st_off < 0)  break;  // return conservative answer
3785 
3786     int st_size = st->as_Store()->memory_size();
3787     if (st_size >= BytesPerInt && (st_off % BytesPerInt) == 0) {
3788       return st_off;            // we found a complete word init
3789     }
3790 
3791     // update the map:
3792 
3793     intptr_t this_int_off = align_down(st_off, BytesPerInt);
3794     if (this_int_off != int_map_off) {
3795       // reset the map:
3796       int_map = 0;
3797       int_map_off = this_int_off;
3798     }
3799 
3800     int subword_off = st_off - this_int_off;
3801     int_map |= right_n_bits(st_size) << subword_off;
3802     if ((int_map & FULL_MAP) == FULL_MAP) {
3803       return this_int_off;      // we found a complete word init
3804     }
3805 
3806     // Did this store hit or cross the word boundary?
3807     intptr_t next_int_off = align_down(st_off + st_size, BytesPerInt);
3808     if (next_int_off == this_int_off + BytesPerInt) {
3809       // We passed the current int, without fully initializing it.
3810       int_map_off = next_int_off;
3811       int_map >>= BytesPerInt;
3812     } else if (next_int_off > this_int_off + BytesPerInt) {
3813       // We passed the current and next int.
3814       return this_int_off + BytesPerInt;
3815     }
3816   }
3817 
3818   return -1;
3819 }
3820 
3821 
3822 // Called when the associated AllocateNode is expanded into CFG.
3823 // At this point, we may perform additional optimizations.
3824 // Linearize the stores by ascending offset, to make memory
3825 // activity as coherent as possible.
3826 Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
3827                                       intptr_t header_size,


3877         //   byte[] a = { p,q,r,s }  =>  a[0]=p,a[1]=q,a[2]=r,a[3]=s
3878         //   byte[] a = { x,y,0,0 }  =>  a[0..3] = 0, a[0]=x,a[1]=y
3879         //   byte[] a = { 0,0,z,0 }  =>  a[0..3] = 0, a[2]=z
3880         //
3881         // Note:  coalesce_subword_stores may have already done this,
3882         // if it was prompted by constant non-zero subword initializers.
3883         // But this case can still arise with non-constant stores.
3884 
3885         intptr_t next_full_store = find_next_fullword_store(i, phase);
3886 
3887         // In the examples above:
3888         //   in(i)          p   q   r   s     x   y     z
3889         //   st_off        12  13  14  15    12  13    14
3890         //   st_size        1   1   1   1     1   1     1
3891         //   next_full_s.  12  16  16  16    16  16    16
3892         //   z's_done      12  16  16  16    12  16    12
3893         //   z's_needed    12  16  16  16    16  16    16
3894         //   zsize          0   0   0   0     4   0     4
3895         if (next_full_store < 0) {
3896           // Conservative tack:  Zero to end of current word.
3897           zeroes_needed = align_up(zeroes_needed, BytesPerInt);
3898         } else {
3899           // Zero to beginning of next fully initialized word.
3900           // Or, don't zero at all, if we are already in that word.
3901           assert(next_full_store >= zeroes_needed, "must go forward");
3902           assert((next_full_store & (BytesPerInt-1)) == 0, "even boundary");
3903           zeroes_needed = next_full_store;
3904         }
3905       }
3906 
3907       if (zeroes_needed > zeroes_done) {
3908         intptr_t zsize = zeroes_needed - zeroes_done;
3909         // Do some incremental zeroing on rawmem, in parallel with inits.
3910         zeroes_done = align_down(zeroes_done, BytesPerInt);
3911         rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
3912                                               zeroes_done, zeroes_needed,
3913                                               phase);
3914         zeroes_done = zeroes_needed;
3915         if (zsize > InitArrayShortSize && ++big_init_gaps > 2)
3916           do_zeroing = false;   // leave the hole, next time
3917       }
3918     }
3919 
3920     // Collect the store and move on:
3921     st->set_req(MemNode::Memory, inits);
3922     inits = st;                 // put it on the linearized chain
3923     set_req(i, zmem);           // unhook from previous position
3924 
3925     if (zeroes_done == st_off)
3926       zeroes_done = next_init_off;
3927 
3928     assert(!do_zeroing || zeroes_done >= next_init_off, "don't miss any");
3929 
3930     #ifdef ASSERT
3931     // Various order invariants.  Weaker than stores_are_sane because
3932     // a large constant tile can be filled in by smaller non-constant stores.
3933     assert(st_off >= last_init_off, "inits do not reverse");
3934     last_init_off = st_off;
3935     const Type* val = NULL;
3936     if (st_size >= BytesPerInt &&
3937         (val = phase->type(st->in(MemNode::ValueIn)))->singleton() &&
3938         (int)val->basic_type() < (int)T_OBJECT) {
3939       assert(st_off >= last_tile_end, "tiles do not overlap");
3940       assert(st_off >= last_init_end, "tiles do not overwrite inits");
3941       last_tile_end = MAX2(last_tile_end, next_init_off);
3942     } else {
3943       intptr_t st_tile_end = align_up(next_init_off, BytesPerLong);
3944       assert(st_tile_end >= last_tile_end, "inits stay with tiles");
3945       assert(st_off      >= last_init_end, "inits do not overlap");
3946       last_init_end = next_init_off;  // it's a non-tile
3947     }
3948     #endif //ASSERT
3949   }
3950 
3951   remove_extra_zeroes();        // clear out all the zmems left over
3952   add_req(inits);
3953 
3954   if (!(UseTLAB && ZeroTLAB)) {
3955     // If anything remains to be zeroed, zero it all now.
3956     zeroes_done = align_down(zeroes_done, BytesPerInt);
3957     // if it is the last unused 4 bytes of an instance, forget about it
3958     intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, max_jint);
3959     if (zeroes_done + BytesPerLong >= size_limit) {
3960       AllocateNode* alloc = allocation();
3961       assert(alloc != NULL, "must be present");
3962       if (alloc != NULL && alloc->Opcode() == Op_Allocate) {
3963         Node* klass_node = alloc->in(AllocateNode::KlassNode);
3964         ciKlass* k = phase->type(klass_node)->is_klassptr()->klass();
3965         if (zeroes_done == k->layout_helper())
3966           zeroes_done = size_limit;
3967       }
3968     }
3969     if (zeroes_done < size_limit) {
3970       rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
3971                                             zeroes_done, size_in_bytes, phase);
3972     }
3973   }
3974 
3975   set_complete(phase);
3976   return rawmem;


< prev index next >