Udiff src/hotspot/share/opto/chaitin.cpp

src/hotspot/share/opto/chaitin.cpp

@@ -75,10 +75,11 @@
    tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score());
    // Flags
    if( _is_oop ) tty->print("Oop ");
    if( _is_float ) tty->print("Float ");
    if( _is_vector ) tty->print("Vector ");
+   if( _is_scalable ) tty->print("Scalable ");
    if( _was_spilled1 ) tty->print("Spilled ");
    if( _was_spilled2 ) tty->print("Spilled2 ");
    if( _direct_conflict ) tty->print("Direct_conflict ");
    if( _fat_proj ) tty->print("Fat ");
    if( _was_lo ) tty->print("Lo ");

@@ -642,11 +643,19 @@
        } else {                  // Must be a register-set
          if (!lrg._fat_proj) {   // Must be aligned adjacent register set
            // Live ranges record the highest register in their mask.
            // We want the low register for the AD file writer's convenience.
            OptoReg::Name hi = lrg.reg(); // Get hi register
-           OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
+           int num_regs = lrg.num_regs();
+           if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
+             // For scalable vector registers, when they are allocated in physical
+             // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
+             // vector. If they are allocated on stack, we need to get the actual
+             // num_regs, which reflects the physical length of scalable registers.
+             num_regs = lrg.scalable_reg_slots();
+           }
+           OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
            // We have to use pair [lo,lo+1] even for wide vectors because
            // the rest of code generation works only with pairs. It is safe
            // since for registers encoding only 'lo' is used.
            // Second reg from pair is used in ScheduleAndBundle on SPARC where
            // vector max size is 8 which corresponds to registers pair.

@@ -800,12 +809,23 @@
                  "oops must be in Op_RegP's" );
  
          // Check for vector live range (only if vector register is used).
          // On SPARC vector uses RegD which could be misaligned so it is not
          // processes as vector in RA.
-         if (RegMask::is_vector(ireg))
+         if (RegMask::is_vector(ireg)) {
            lrg._is_vector = 1;
+           if (ireg == Op_VecA) {
+             assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
+             lrg._is_scalable = 1;
+             // For scalable vector, when it is allocated in physical register,
+             // num_regs is RegMask::SlotsPerVecA for reg mask,
+             // which may not be the actual physical register size.
+             // If it is allocated in stack, we need to get the actual
+             // physical length of scalable vector register.
+             lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
+           }
+         }
          assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
                 "vector must be in vector registers");
  
          // Check for bound register masks
          const RegMask &lrgmask = lrg.mask();

@@ -903,10 +923,17 @@
          case Op_RegFlags:
          case 0:                 // not an ideal register
            lrg.set_num_regs(1);
            lrg.set_reg_pressure(1);
            break;
+         case Op_VecA:
+           assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
+           assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
+           assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
+           lrg.set_num_regs(RegMask::SlotsPerVecA);
+           lrg.set_reg_pressure(1);
+           break;
          case Op_VecS:
            assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
            assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
            lrg.set_num_regs(RegMask::SlotsPerVecS);
            lrg.set_reg_pressure(1);

@@ -1303,10 +1330,50 @@
        return true;
    }
    return false;
  }
  
+ static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
+   int num_regs = lrg.num_regs();
+   OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
+ 
+   if (lrg.is_scalable()) {
+     // a physical register is found
+     if (chunk == 0 && OptoReg::is_reg(assigned)) {
+       return assigned;
+     }
+ 
+     // find available stack slots for scalable register
+     if (lrg._is_vector) {
+       num_regs = lrg.scalable_reg_slots();
+       // if actual scalable vector register is exactly SlotsPerVecA * 32 bits
+       if (num_regs == RegMask::SlotsPerVecA) {
+         return assigned;
+       }
+ 
+       // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
+       // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
+       // instead of SlotsPerVecA bits.
+       assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
+       while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
+         // Verify the found reg has scalable_reg_slots() bits set.
+         if (mask.is_valid_reg(assigned, num_regs)) {
+           return assigned;
+         } else {
+           // Remove more for each iteration
+           mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
+           mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
+           assigned = mask.find_first_set(lrg, num_regs);
+         }
+       }
+       return OptoReg::Bad; // will cause chunk change, and retry next chunk
+     }
+   }
+ 
+   return assigned;
+ }
+ 
  // Choose a color using the biasing heuristic
  OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
  
    // Check for "at_risk" LRG's
    uint risk_lrg = _lrg_map.find(lrg._risk_bias);

@@ -1336,20 +1403,20 @@
      } else if( chunk == 0 ) {
        // Choose a color which is legal for him
        RegMask tempmask = lrg.mask();
        tempmask.AND(lrgs(copy_lrg).mask());
        tempmask.clear_to_sets(lrg.num_regs());
-       OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
+       OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
        if (OptoReg::is_valid(reg))
          return reg;
      }
    }
  
    // If no bias info exists, just go with the register selection ordering
    if (lrg._is_vector || lrg.num_regs() == 2) {
      // Find an aligned set
-     return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
+     return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
    }
  
    // CNC - Fun hack.  Alternate 1st and 2nd selection.  Enables post-allocate
    // copy removal to remove many more copies, by preventing a just-assigned
    // register from being repeatedly assigned.

@@ -1400,11 +1467,10 @@
      // Pull next LRG from the simplified list - in reverse order of removal
      uint lidx = _simplified;
      LRG *lrg = &lrgs(lidx);
      _simplified = lrg->_next;
  
- 
  #ifndef PRODUCT
      if (trace_spilling()) {
        ttyLocker ttyl;
        tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(),
                      lrg->degrees_of_freedom());

@@ -1482,11 +1548,10 @@
      // a chunk-rollover event
      if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) {
        // Bump register mask up to next stack chunk
        chunk += RegMask::CHUNK_SIZE;
        lrg->Set_All();
- 
        goto retry_next_chunk;
      }
  
      //---------------
      // Did we get a color?

@@ -1507,16 +1572,25 @@
        // to make.  In this case, the mask has more bits in it than the colors
        // chosen.  Restrict the mask to just what was picked.
        int n_regs = lrg->num_regs();
        assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
        if (n_regs == 1 || !lrg->_fat_proj) {
-         assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
+         if (Matcher::supports_scalable_vector()) {
+           assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
+         } else {
+           assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
+         }
          lrg->Clear();           // Clear the mask
          lrg->Insert(reg);       // Set regmask to match selected reg
          // For vectors and pairs, also insert the low bit of the pair
-         for (int i = 1; i < n_regs; i++)
+         // We always choose the high bit, then mask the low bits by register size
+         if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
+           n_regs = lrg->scalable_reg_slots();
+         }
+         for (int i = 1; i < n_regs; i++) {
            lrg->Insert(OptoReg::add(reg,-i));
+         }
          lrg->set_mask_size(n_regs);
        } else {                  // Else fatproj
          // mask must be equal to fatproj bits, by definition
        }
  #ifndef PRODUCT

< prev index next >