hg-jdk Udiff src/hotspot/share/opto/chaitin.cpp

src/hotspot/share/opto/chaitin.cpp

rev 60615 : 8231441: Initial SVE backend support
Reviewed-by: adinn, pli
Contributed-by: joshua.zhu@arm.com, yang.zhang@arm.com, ningsheng.jian@arm.com

@@ -75,10 +75,11 @@
   tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score());
   // Flags
   if( _is_oop ) tty->print("Oop ");
   if( _is_float ) tty->print("Float ");
   if( _is_vector ) tty->print("Vector ");
+  if( _is_scalable ) tty->print("Scalable ");
   if( _was_spilled1 ) tty->print("Spilled ");
   if( _was_spilled2 ) tty->print("Spilled2 ");
   if( _direct_conflict ) tty->print("Direct_conflict ");
   if( _fat_proj ) tty->print("Fat ");
   if( _was_lo ) tty->print("Lo ");

@@ -642,11 +643,19 @@
       } else {                  // Must be a register-set
         if (!lrg._fat_proj) {   // Must be aligned adjacent register set
           // Live ranges record the highest register in their mask.
           // We want the low register for the AD file writer's convenience.
           OptoReg::Name hi = lrg.reg(); // Get hi register
-          OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
+          int num_regs = lrg.num_regs();
+          if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
+            // For scalable vector registers, when they are allocated in physical
+            // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
+            // vector. If they are allocated on stack, we need to get the actual
+            // num_regs, which reflects the physical length of scalable registers.
+            num_regs = lrg.scalable_reg_slots();
+          }
+          OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
           // We have to use pair [lo,lo+1] even for wide vectors because
           // the rest of code generation works only with pairs. It is safe
           // since for registers encoding only 'lo' is used.
           // Second reg from pair is used in ScheduleAndBundle on SPARC where
           // vector max size is 8 which corresponds to registers pair.

@@ -800,12 +809,23 @@
                 "oops must be in Op_RegP's" );
 
         // Check for vector live range (only if vector register is used).
         // On SPARC vector uses RegD which could be misaligned so it is not
         // processes as vector in RA.
-        if (RegMask::is_vector(ireg))
+        if (RegMask::is_vector(ireg)) {
           lrg._is_vector = 1;
+          if (ireg == Op_VecA) {
+            assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
+            lrg._is_scalable = 1;
+            // For scalable vector, when it is allocated in physical register,
+            // num_regs is RegMask::SlotsPerVecA for reg mask,
+            // which may not be the actual physical register size.
+            // If it is allocated in stack, we need to get the actual
+            // physical length of scalable vector register.
+            lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
+          }
+        }
         assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
                "vector must be in vector registers");
 
         // Check for bound register masks
         const RegMask &lrgmask = lrg.mask();

@@ -903,10 +923,17 @@
         case Op_RegFlags:
         case 0:                 // not an ideal register
           lrg.set_num_regs(1);
           lrg.set_reg_pressure(1);
           break;
+        case Op_VecA:
+          assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
+          assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
+          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
+          lrg.set_num_regs(RegMask::SlotsPerVecA);
+          lrg.set_reg_pressure(1);
+          break;
         case Op_VecS:
           assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
           assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
           lrg.set_num_regs(RegMask::SlotsPerVecS);
           lrg.set_reg_pressure(1);

@@ -1303,10 +1330,50 @@
       return true;
   }
   return false;
 }
 
+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
+  int num_regs = lrg.num_regs();
+  OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
+
+  if (lrg.is_scalable()) {
+    // a physical register is found
+    if (chunk == 0 && OptoReg::is_reg(assigned)) {
+      return assigned;
+    }
+
+    // find available stack slots for scalable register
+    if (lrg._is_vector) {
+      num_regs = lrg.scalable_reg_slots();
+      // if actual scalable vector register is exactly SlotsPerVecA * 32 bits
+      if (num_regs == RegMask::SlotsPerVecA) {
+        return assigned;
+      }
+
+      // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
+      // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
+      // instead of SlotsPerVecA bits.
+      assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
+      while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
+        // Verify the found reg has scalable_reg_slots() bits set.
+        if (mask.is_valid_reg(assigned, num_regs)) {
+          return assigned;
+        } else {
+          // Remove more for each iteration
+          mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
+          mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
+          assigned = mask.find_first_set(lrg, num_regs);
+        }
+      }
+      return OptoReg::Bad; // will cause chunk change, and retry next chunk
+    }
+  }
+
+  return assigned;
+}
+
 // Choose a color using the biasing heuristic
 OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
 
   // Check for "at_risk" LRG's
   uint risk_lrg = _lrg_map.find(lrg._risk_bias);

@@ -1336,20 +1403,20 @@
     } else if( chunk == 0 ) {
       // Choose a color which is legal for him
       RegMask tempmask = lrg.mask();
       tempmask.AND(lrgs(copy_lrg).mask());
       tempmask.clear_to_sets(lrg.num_regs());
-      OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
+      OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
       if (OptoReg::is_valid(reg))
         return reg;
     }
   }
 
   // If no bias info exists, just go with the register selection ordering
   if (lrg._is_vector || lrg.num_regs() == 2) {
     // Find an aligned set
-    return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
+    return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
   }
 
   // CNC - Fun hack.  Alternate 1st and 2nd selection.  Enables post-allocate
   // copy removal to remove many more copies, by preventing a just-assigned
   // register from being repeatedly assigned.

@@ -1400,11 +1467,10 @@
     // Pull next LRG from the simplified list - in reverse order of removal
     uint lidx = _simplified;
     LRG *lrg = &lrgs(lidx);
     _simplified = lrg->_next;
 
-
 #ifndef PRODUCT
     if (trace_spilling()) {
       ttyLocker ttyl;
       tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(),
                     lrg->degrees_of_freedom());

@@ -1482,11 +1548,10 @@
     // a chunk-rollover event
     if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) {
       // Bump register mask up to next stack chunk
       chunk += RegMask::CHUNK_SIZE;
       lrg->Set_All();
-
       goto retry_next_chunk;
     }
 
     //---------------
     // Did we get a color?

@@ -1507,16 +1572,25 @@
       // to make.  In this case, the mask has more bits in it than the colors
       // chosen.  Restrict the mask to just what was picked.
       int n_regs = lrg->num_regs();
       assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
       if (n_regs == 1 || !lrg->_fat_proj) {
+        if (Matcher::supports_scalable_vector()) {
+          assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
+        } else {
         assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
+        }
         lrg->Clear();           // Clear the mask
         lrg->Insert(reg);       // Set regmask to match selected reg
         // For vectors and pairs, also insert the low bit of the pair
-        for (int i = 1; i < n_regs; i++)
+        // We always choose the high bit, then mask the low bits by register size
+        if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
+          n_regs = lrg->scalable_reg_slots();
+        }
+        for (int i = 1; i < n_regs; i++) {
           lrg->Insert(OptoReg::add(reg,-i));
+        }
         lrg->set_mask_size(n_regs);
       } else {                  // Else fatproj
         // mask must be equal to fatproj bits, by definition
       }
 #ifndef PRODUCT

< prev index next >