diff a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -67,11 +67,11 @@ // // follow the C1 compiler in making registers // // r0-r7,r10-r26 volatile (caller save) // r27-r32 system (no save, no allocate) -// r8-r9 invisible to the allocator (so we can use them as scratch regs) +// r8-r9 non-allocatable (so we can use them as scratch regs) // // as regards Java usage. we don't use any callee save registers // because this makes it difficult to de-optimise a frame (see comment // in x86 implementation of Deoptimization::unwind_callee_save_values) // @@ -92,10 +92,14 @@ reg_def R5_H ( SOC, SOC, Op_RegI, 5, r5->as_VMReg()->next() ); reg_def R6 ( SOC, SOC, Op_RegI, 6, r6->as_VMReg() ); reg_def R6_H ( SOC, SOC, Op_RegI, 6, r6->as_VMReg()->next() ); reg_def R7 ( SOC, SOC, Op_RegI, 7, r7->as_VMReg() ); reg_def R7_H ( SOC, SOC, Op_RegI, 7, r7->as_VMReg()->next() ); +reg_def R8 ( NS, SOC, Op_RegI, 8, r8->as_VMReg() ); // rscratch1, non-allocatable +reg_def R8_H ( NS, SOC, Op_RegI, 8, r8->as_VMReg()->next() ); +reg_def R9 ( NS, SOC, Op_RegI, 9, r9->as_VMReg() ); // rscratch2, non-allocatable +reg_def R9_H ( NS, SOC, Op_RegI, 9, r9->as_VMReg()->next() ); reg_def R10 ( SOC, SOC, Op_RegI, 10, r10->as_VMReg() ); reg_def R10_H ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next()); reg_def R11 ( SOC, SOC, Op_RegI, 11, r11->as_VMReg() ); reg_def R11_H ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next()); reg_def R12 ( SOC, SOC, Op_RegI, 12, r12->as_VMReg() ); @@ -138,11 +142,11 @@ reg_def R30_H ( NS, NS, Op_RegI, 30, r30->as_VMReg()->next()); reg_def R31 ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg() ); // sp reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next()); // ---------------------------- -// Float/Double Registers +// Float/Double/Vector Registers // ---------------------------- // Double Registers // The rules of ADL require that double registers be defined in pairs. @@ -159,169 +163,329 @@ // for Java use float registers v0-v15 are always save on call whereas // the platform ABI treats v8-v15 as callee save). float registers // v16-v31 are SOC as per the platform spec - reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); - reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); - reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); - reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); - - reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); - reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); - reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); - reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); - - reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); - reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); - reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); - reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); - - reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); - reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); - reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); - reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); - - reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); - reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); - reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); - reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); - - reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); - reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); - reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); - reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); - - reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); - reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); - reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); - reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); - - reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); - reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); - reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); - reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); - - reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() ); - reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() ); - reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); - reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); - - reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() ); - reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() ); - reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); - reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); - - reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() ); - reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() ); - reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2)); - reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3)); - - reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() ); - reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() ); - reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2)); - reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3)); - - reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() ); - reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() ); - reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2)); - reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3)); - - reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() ); - reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() ); - reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2)); - reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3)); - - reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() ); - reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() ); - reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2)); - reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3)); - - reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() ); - reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() ); - reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2)); - reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3)); - - reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); - reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); - reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2)); - reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3)); - - reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); - reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); - reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2)); - reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3)); - - reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); - reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); - reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2)); - reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3)); - - reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); - reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); - reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2)); - reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3)); - - reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); - reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); - reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2)); - reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3)); - - reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); - reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); - reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2)); - reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3)); - - reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); - reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); - reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2)); - reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3)); - - reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); - reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); - reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2)); - reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3)); - - reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); - reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); - reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2)); - reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3)); - - reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); - reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); - reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2)); - reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3)); - - reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); - reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); - reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2)); - reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3)); - - reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); - reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); - reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2)); - reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3)); - - reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); - reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); - reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2)); - reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3)); - - reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); - reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); - reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2)); - reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3)); - - reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); - reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); - reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2)); - reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3)); - - reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); - reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); - reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2)); - reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3)); +// For SVE vector registers, we simply extend vector register size to 8 +// slots. A vector register with lower 4 slots, denotes a 128-bit vector +// NEON vector register. While a vector register with whole 8 slots, +// indicating an SVE scalable vector register with vector size >= 128 +// bits (128 ~ 2048 bits, multiple of 128 bits). A 128-bit SVE vector +// register also has 8 slots, but the the actual size is 128 bits, the +// same as a NEON vector register. Since during JIT compilation, the +// real SVE vector register size can be detected, so register allocator +// is able to do the right thing with the real register size, e.g. for +// spilling/unspilling. + + reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); + reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); + reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); + reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); + reg_def V0_L ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(4) ); + reg_def V0_M ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(5) ); + reg_def V0_N ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(6) ); + reg_def V0_O ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(7) ); + + reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); + reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); + reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); + reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); + reg_def V1_L ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(4) ); + reg_def V1_M ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(5) ); + reg_def V1_N ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(6) ); + reg_def V1_O ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(7) ); + + reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); + reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); + reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); + reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); + reg_def V2_L ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(4) ); + reg_def V2_M ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(5) ); + reg_def V2_N ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(6) ); + reg_def V2_O ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(7) ); + + reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); + reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); + reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); + reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); + reg_def V3_L ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(4) ); + reg_def V3_M ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(5) ); + reg_def V3_N ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(6) ); + reg_def V3_O ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(7) ); + + reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); + reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); + reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); + reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); + reg_def V4_L ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(4) ); + reg_def V4_M ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(5) ); + reg_def V4_N ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(6) ); + reg_def V4_O ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(7) ); + + reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); + reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); + reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); + reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); + reg_def V5_L ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(4) ); + reg_def V5_M ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(5) ); + reg_def V5_N ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(6) ); + reg_def V5_O ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(7) ); + + reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); + reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); + reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); + reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); + reg_def V6_L ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(4) ); + reg_def V6_M ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(5) ); + reg_def V6_N ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(6) ); + reg_def V6_O ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(7) ); + + reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); + reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); + reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); + reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); + reg_def V7_L ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(4) ); + reg_def V7_M ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(5) ); + reg_def V7_N ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(6) ); + reg_def V7_O ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(7) ); + + reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() ); + reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() ); + reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); + reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); + reg_def V8_L ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(4) ); + reg_def V8_M ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(5) ); + reg_def V8_N ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(6) ); + reg_def V8_O ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(7) ); + + reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() ); + reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() ); + reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); + reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); + reg_def V9_L ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(4) ); + reg_def V9_M ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(5) ); + reg_def V9_N ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(6) ); + reg_def V9_O ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(7) ); + + reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() ); + reg_def V10_H ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() ); + reg_def V10_J ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2) ); + reg_def V10_K ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3) ); + reg_def V10_L ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(4) ); + reg_def V10_M ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(5) ); + reg_def V10_N ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(6) ); + reg_def V10_O ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(7) ); + + reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() ); + reg_def V11_H ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() ); + reg_def V11_J ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2) ); + reg_def V11_K ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3) ); + reg_def V11_L ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(4) ); + reg_def V11_M ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(5) ); + reg_def V11_N ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(6) ); + reg_def V11_O ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(7) ); + + reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() ); + reg_def V12_H ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() ); + reg_def V12_J ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2) ); + reg_def V12_K ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3) ); + reg_def V12_L ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(4) ); + reg_def V12_M ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(5) ); + reg_def V12_N ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(6) ); + reg_def V12_O ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(7) ); + + reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() ); + reg_def V13_H ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() ); + reg_def V13_J ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2) ); + reg_def V13_K ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3) ); + reg_def V13_L ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(4) ); + reg_def V13_M ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(5) ); + reg_def V13_N ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(6) ); + reg_def V13_O ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(7) ); + + reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() ); + reg_def V14_H ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() ); + reg_def V14_J ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2) ); + reg_def V14_K ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3) ); + reg_def V14_L ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(4) ); + reg_def V14_M ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(5) ); + reg_def V14_N ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(6) ); + reg_def V14_O ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(7) ); + + reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() ); + reg_def V15_H ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() ); + reg_def V15_J ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2) ); + reg_def V15_K ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3) ); + reg_def V15_L ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(4) ); + reg_def V15_M ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(5) ); + reg_def V15_N ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(6) ); + reg_def V15_O ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(7) ); + + reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); + reg_def V16_H ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); + reg_def V16_J ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2) ); + reg_def V16_K ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3) ); + reg_def V16_L ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(4) ); + reg_def V16_M ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(5) ); + reg_def V16_N ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(6) ); + reg_def V16_O ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(7) ); + + reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); + reg_def V17_H ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); + reg_def V17_J ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2) ); + reg_def V17_K ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3) ); + reg_def V17_L ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(4) ); + reg_def V17_M ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(5) ); + reg_def V17_N ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(6) ); + reg_def V17_O ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(7) ); + + reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); + reg_def V18_H ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); + reg_def V18_J ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2) ); + reg_def V18_K ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3) ); + reg_def V18_L ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(4) ); + reg_def V18_M ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(5) ); + reg_def V18_N ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(6) ); + reg_def V18_O ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(7) ); + + reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); + reg_def V19_H ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); + reg_def V19_J ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2) ); + reg_def V19_K ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3) ); + reg_def V19_L ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(4) ); + reg_def V19_M ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(5) ); + reg_def V19_N ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(6) ); + reg_def V19_O ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(7) ); + + reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); + reg_def V20_H ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); + reg_def V20_J ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2) ); + reg_def V20_K ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3) ); + reg_def V20_L ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(4) ); + reg_def V20_M ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(5) ); + reg_def V20_N ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(6) ); + reg_def V20_O ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(7) ); + + reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); + reg_def V21_H ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); + reg_def V21_J ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2) ); + reg_def V21_K ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3) ); + reg_def V21_L ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(4) ); + reg_def V21_M ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(5) ); + reg_def V21_N ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(6) ); + reg_def V21_O ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(7) ); + + reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); + reg_def V22_H ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); + reg_def V22_J ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2) ); + reg_def V22_K ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3) ); + reg_def V22_L ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(4) ); + reg_def V22_M ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(5) ); + reg_def V22_N ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(6) ); + reg_def V22_O ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(7) ); + + reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); + reg_def V23_H ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); + reg_def V23_J ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2) ); + reg_def V23_K ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3) ); + reg_def V23_L ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(4) ); + reg_def V23_M ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(5) ); + reg_def V23_N ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(6) ); + reg_def V23_O ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(7) ); + + reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); + reg_def V24_H ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); + reg_def V24_J ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2) ); + reg_def V24_K ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3) ); + reg_def V24_L ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(4) ); + reg_def V24_M ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(5) ); + reg_def V24_N ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(6) ); + reg_def V24_O ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(7) ); + + reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); + reg_def V25_H ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); + reg_def V25_J ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2) ); + reg_def V25_K ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3) ); + reg_def V25_L ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(4) ); + reg_def V25_M ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(5) ); + reg_def V25_N ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(6) ); + reg_def V25_O ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(7) ); + + reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); + reg_def V26_H ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); + reg_def V26_J ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2) ); + reg_def V26_K ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3) ); + reg_def V26_L ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(4) ); + reg_def V26_M ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(5) ); + reg_def V26_N ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(6) ); + reg_def V26_O ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(7) ); + + reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); + reg_def V27_H ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); + reg_def V27_J ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2) ); + reg_def V27_K ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3) ); + reg_def V27_L ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(4) ); + reg_def V27_M ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(5) ); + reg_def V27_N ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(6) ); + reg_def V27_O ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(7) ); + + reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); + reg_def V28_H ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); + reg_def V28_J ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2) ); + reg_def V28_K ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3) ); + reg_def V28_L ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(4) ); + reg_def V28_M ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(5) ); + reg_def V28_N ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(6) ); + reg_def V28_O ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(7) ); + + reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); + reg_def V29_H ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); + reg_def V29_J ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2) ); + reg_def V29_K ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3) ); + reg_def V29_L ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(4) ); + reg_def V29_M ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(5) ); + reg_def V29_N ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(6) ); + reg_def V29_O ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(7) ); + + reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); + reg_def V30_H ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); + reg_def V30_J ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2) ); + reg_def V30_K ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3) ); + reg_def V30_L ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(4) ); + reg_def V30_M ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(5) ); + reg_def V30_N ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(6) ); + reg_def V30_O ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(7) ); + + reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); + reg_def V31_H ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); + reg_def V31_J ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2) ); + reg_def V31_K ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3) ); + reg_def V31_L ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(4) ); + reg_def V31_M ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(5) ); + reg_def V31_N ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(6) ); + reg_def V31_O ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(7) ); + + +// ---------------------------- +// SVE Predicate Registers +// ---------------------------- + reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg()); + reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg()); + reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg()); + reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg()); + reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg()); + reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg()); + reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg()); + reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg()); + reg_def P8 (SOC, SOC, Op_RegVMask, 8, p8->as_VMReg()); + reg_def P9 (SOC, SOC, Op_RegVMask, 9, p9->as_VMReg()); + reg_def P10 (SOC, SOC, Op_RegVMask, 10, p10->as_VMReg()); + reg_def P11 (SOC, SOC, Op_RegVMask, 11, p11->as_VMReg()); + reg_def P12 (SOC, SOC, Op_RegVMask, 12, p12->as_VMReg()); + reg_def P13 (SOC, SOC, Op_RegVMask, 13, p13->as_VMReg()); + reg_def P14 (SOC, SOC, Op_RegVMask, 14, p14->as_VMReg()); + reg_def P15 (SOC, SOC, Op_RegVMask, 15, p15->as_VMReg()); // ---------------------------- // Special Registers // ---------------------------- @@ -331,11 +495,10 @@ // appear as an operand (a code identifying the FSPR occurs as an // immediate value in the instruction). reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad()); - // Specify priority of register selection within phases of register // allocation. Highest priority is first. A useful heuristic is to // give registers a low priority when they are required by machine // instructions, like EAX and EDX on I486, and choose no-save registers // before save-on-call, & save-on-call before save-on-entry. Registers @@ -379,54 +542,76 @@ R27, R27_H, // heapbase R28, R28_H, // thread R29, R29_H, // fp R30, R30_H, // lr R31, R31_H, // sp + R8, R8_H, // rscratch1 + R9, R9_H, // rscratch2 ); alloc_class chunk1( // no save - V16, V16_H, V16_J, V16_K, - V17, V17_H, V17_J, V17_K, - V18, V18_H, V18_J, V18_K, - V19, V19_H, V19_J, V19_K, - V20, V20_H, V20_J, V20_K, - V21, V21_H, V21_J, V21_K, - V22, V22_H, V22_J, V22_K, - V23, V23_H, V23_J, V23_K, - V24, V24_H, V24_J, V24_K, - V25, V25_H, V25_J, V25_K, - V26, V26_H, V26_J, V26_K, - V27, V27_H, V27_J, V27_K, - V28, V28_H, V28_J, V28_K, - V29, V29_H, V29_J, V29_K, - V30, V30_H, V30_J, V30_K, - V31, V31_H, V31_J, V31_K, + V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O, + V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O, + V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O, + V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O, + V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O, + V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O, + V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O, + V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O, + V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O, + V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O, + V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O, + V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O, + V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O, + V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O, + V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O, + V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O, // arg registers - V0, V0_H, V0_J, V0_K, - V1, V1_H, V1_J, V1_K, - V2, V2_H, V2_J, V2_K, - V3, V3_H, V3_J, V3_K, - V4, V4_H, V4_J, V4_K, - V5, V5_H, V5_J, V5_K, - V6, V6_H, V6_J, V6_K, - V7, V7_H, V7_J, V7_K, + V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O, + V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O, + V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O, + V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O, + V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O, + V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O, + V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O, + V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O, // non-volatiles - V8, V8_H, V8_J, V8_K, - V9, V9_H, V9_J, V9_K, - V10, V10_H, V10_J, V10_K, - V11, V11_H, V11_J, V11_K, - V12, V12_H, V12_J, V12_K, - V13, V13_H, V13_J, V13_K, - V14, V14_H, V14_J, V14_K, - V15, V15_H, V15_J, V15_K, + V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O, + V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O, + V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O, + V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O, + V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O, + V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O, + V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O, + V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O, ); -alloc_class chunk2(RFLAGS); +alloc_class chunk2 ( + P0, + P1, + P2, + P3, + P4, + P5, + P6, + P7, + + P8, + P9, + P10, + P11, + P12, + P13, + P14, + P15, +); + +alloc_class chunk3(RFLAGS); //----------Architecture Description Register Classes-------------------------- // Several register classes are automatically defined based upon information in // this architecture description. // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) @@ -706,10 +891,46 @@ V29, V29_H, V30, V30_H, V31, V31_H ); +// Class for all SVE vector registers. +reg_class vectora_reg ( + V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O, + V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O, + V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O, + V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O, + V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O, + V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O, + V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O, + V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O, + V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O, + V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O, + V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O, + V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O, + V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O, + V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O, + V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O, + V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O, + V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O, + V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O, + V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O, + V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O, + V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O, + V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O, + V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O, + V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O, + V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O, + V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O, + V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O, + V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O, + V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O, + V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O, + V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O, + V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O, +); + // Class for all 64bit vector registers reg_class vectord_reg( V0, V0_H, V1, V1_H, V2, V2_H, @@ -938,10 +1159,43 @@ // Class for 128 bit register v31 reg_class v31_reg( V31, V31_H ); +// Class for all SVE predicate registers. +reg_class pr_reg ( + P0, + P1, + P2, + P3, + P4, + P5, + P6, + P7, + P8, + P9, + P10, + P11, + P12, + P13, + P14, + P15 +); + +// Class for SVE governing predicate registers, which are used +// to determine the active elements of a predicated instruction. +reg_class gov_pr ( + P0, + P1, + P2, + P3, + P4, + P5, + P6, + P7 +); + // Singleton class for condition codes reg_class int_flags(RFLAGS); %} @@ -1740,32 +1994,37 @@ //============================================================================= // Figure out which register class each belongs in: rc_int, rc_float or // rc_stack. -enum RC { rc_bad, rc_int, rc_float, rc_stack }; +enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack }; static enum RC rc_class(OptoReg::Name reg) { if (reg == OptoReg::Bad) { return rc_bad; } - // we have 30 int registers * 2 halves - // (rscratch1 and rscratch2 are omitted) - int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2); + // we have 32 int registers * 2 halves + int slots_of_int_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers; if (reg < slots_of_int_registers) { return rc_int; } - // we have 32 float register * 4 halves - if (reg < slots_of_int_registers + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) { + // we have 32 float register * 8 halves + int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers; + if (reg < slots_of_int_registers + slots_of_float_registers) { return rc_float; } - // Between float regs & stack is the flags regs. + int slots_of_predicate_registers = PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers; + if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_predicate_registers) { + return rc_predicate; + } + + // Between predicate regs & stack is the flags. assert(OptoReg::is_stack(reg), "blow up if spilling flags"); return rc_stack; } @@ -1800,12 +2059,14 @@ int src_offset = ra_->reg2offset(src_lo); int dst_offset = ra_->reg2offset(dst_lo); if (bottom_type()->isa_vect() != NULL) { uint ireg = ideal_reg(); - assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); - if (cbuf) { + if (ireg == Op_VecA && cbuf) { + Unimplemented(); + } else if (cbuf) { + assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); C2_MacroAssembler _masm(cbuf); assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { // stack->stack assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset"); @@ -1819,16 +2080,16 @@ __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]), ireg == Op_VecD ? __ T8B : __ T16B, as_FloatRegister(Matcher::_regEncode[src_lo])); } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), - ireg == Op_VecD ? __ D : __ Q, - ra_->reg2offset(dst_lo)); + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(dst_lo)); } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), - ireg == Op_VecD ? __ D : __ Q, - ra_->reg2offset(src_lo)); + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(src_lo)); } else { ShouldNotReachHere(); } } } else if (cbuf) { @@ -1909,13 +2170,28 @@ st->print("[sp, #%d]", ra_->reg2offset(dst_lo)); } else { st->print("%s", Matcher::regName[dst_lo]); } if (bottom_type()->isa_vect() != NULL) { - st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128); + int vsize = 0; + switch (ideal_reg()) { + case Op_VecD: + vsize = 64; + break; + case Op_VecX: + vsize = 128; + break; + case Op_VecA: + vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; + break; + default: + assert(false, "bad register type for spill"); + ShouldNotReachHere(); + } + st->print("\t# vector spill size = %d", vsize); } else { - st->print("\t# spill size = %d", is64 ? 64:32); + st->print("\t# spill size = %d", is64 ? 64 : 32); } } return 0; @@ -2095,11 +2371,11 @@ return true; // Per default match rules are supported. } const bool Matcher::has_predicated_vectors(void) { - return false; + return UseSVE > 0; } const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -2131,11 +2407,11 @@ return true; } // Vector width in bytes. const int Matcher::vector_width_in_bytes(BasicType bt) { - int size = MIN2(16,(int)MaxVectorSize); + int size = MIN2(16, (int)MaxVectorSize); // Minimum 2 values in vector if (size < 2*type2aelembytes(bt)) size = 0; // But never < 4 if (size < 4) size = 0; return size; @@ -2144,18 +2420,36 @@ // Limits on vector size (number of elements) loaded into vector. const int Matcher::max_vector_size(const BasicType bt) { return vector_width_in_bytes(bt)/type2aelembytes(bt); } const int Matcher::min_vector_size(const BasicType bt) { -// For the moment limit the vector size to 8 bytes + int max_size = max_vector_size(bt); + if ((UseSVE > 0) && (MaxVectorSize >= 16)) { + // Currently vector length less than SVE vector register size is not supported. + return max_size; + } else { + // For the moment limit the vector size to 8 bytes with NEON. int size = 8 / type2aelembytes(bt); if (size < 2) size = 2; return size; + } +} + +const bool Matcher::supports_scalable_vector() { + return UseSVE > 0; +} + +// Actual max scalable vector register length. +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return Matcher::max_vector_size(bt); } // Vector ideal reg. const uint Matcher::vector_ideal_reg(int len) { + if (UseSVE > 0 && 16 <= len && len <= 256) { + return Op_VecA; + } switch(len) { case 8: return Op_VecD; case 16: return Op_VecX; } ShouldNotReachHere(); @@ -4856,10 +5150,19 @@ op_cost(0); format %{ %} interface(REG_INTER); %} +operand vecA() +%{ + constraint(ALLOC_IN_RC(vectora_reg)); + match(VecA); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + operand vecD() %{ constraint(ALLOC_IN_RC(vectord_reg)); match(VecD); @@ -5164,10 +5467,19 @@ op_cost(0); format %{ %} interface(REG_INTER); %} +operand pRegGov() +%{ + constraint(ALLOC_IN_RC(gov_pr)); + match(RegVMask); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + // Flags register, used as output of signed compare instructions // note that on AArch64 we also use this register as the output for // for floating point compare instructions (CmpF CmpD). this ensures // that ordered inequality tests use GT, GE, LT or LE none of which diff a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -271,10 +271,18 @@ void rf(FloatRegister r, int lsb) { f(r->encoding_nocheck(), lsb + 4, lsb); } + void prf(PRegister r, int lsb) { + f(r->encoding_nocheck(), lsb + 3, lsb); + } + + void pgrf(PRegister r, int lsb) { + f(r->encoding_nocheck(), lsb + 2, lsb); + } + unsigned get(int msb = 31, int lsb = 0) { int nbits = msb - lsb + 1; unsigned mask = ((1U << nbits) - 1) << lsb; assert_cond((bits & mask) == mask); return (insn & mask) >> lsb; @@ -676,10 +684,16 @@ current->zrf(reg, lsb); } void rf(FloatRegister reg, int lsb) { current->rf(reg, lsb); } + void prf(PRegister reg, int lsb) { + current->prf(reg, lsb); + } + void pgrf(PRegister reg, int lsb) { + current->pgrf(reg, lsb); + } void fixed(unsigned value, unsigned mask) { current->fixed(value, mask); } void emit() { diff a/src/hotspot/cpu/aarch64/register_aarch64.cpp b/src/hotspot/cpu/aarch64/register_aarch64.cpp --- a/src/hotspot/cpu/aarch64/register_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/register_aarch64.cpp @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. @@ -31,10 +31,13 @@ const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; +const int ConcreteRegisterImpl::max_pr + = ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers; + const char* RegisterImpl::name() const { const char* names[number_of_registers] = { "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", "rscratch1", "rscratch2", "r10", "r11", "r12", "r13", "r14", "r15", "r16", @@ -52,5 +55,13 @@ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" }; return is_valid() ? names[encoding()] : "noreg"; } + +const char* PRegisterImpl::name() const { + const char* names[number_of_registers] = { + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15" + }; + return is_valid() ? names[encoding()] : "noreg"; +} diff a/src/hotspot/cpu/aarch64/register_aarch64.hpp b/src/hotspot/cpu/aarch64/register_aarch64.hpp --- a/src/hotspot/cpu/aarch64/register_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as @@ -127,13 +127,14 @@ // The implementation of floating point registers for the architecture class FloatRegisterImpl: public AbstractRegisterImpl { public: enum { number_of_registers = 32, - max_slots_per_register = 4, + max_slots_per_register = 8, save_slots_per_register = 2, - extra_save_slots_per_register = max_slots_per_register - save_slots_per_register + slots_per_neon_register = 4, + extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register }; // construction inline friend FloatRegister as_FloatRegister(int encoding); @@ -185,10 +186,92 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, v28 , (28)); CONSTANT_REGISTER_DECLARATION(FloatRegister, v29 , (29)); CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30)); CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31)); +// SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0]. +CONSTANT_REGISTER_DECLARATION(FloatRegister, z0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z31 , (31)); + + +class PRegisterImpl; +typedef PRegisterImpl* PRegister; +inline PRegister as_PRegister(int encoding) { + return (PRegister)(intptr_t)encoding; +} + +// The implementation of predicate registers for the architecture +class PRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 16, + max_slots_per_register = 1 + }; + + // construction + inline friend PRegister as_PRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + PRegister successor() const { return as_PRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; +}; + +// The predicate registers of SVE. +CONSTANT_REGISTER_DECLARATION(PRegister, p0, ( 0)); +CONSTANT_REGISTER_DECLARATION(PRegister, p1, ( 1)); +CONSTANT_REGISTER_DECLARATION(PRegister, p2, ( 2)); +CONSTANT_REGISTER_DECLARATION(PRegister, p3, ( 3)); +CONSTANT_REGISTER_DECLARATION(PRegister, p4, ( 4)); +CONSTANT_REGISTER_DECLARATION(PRegister, p5, ( 5)); +CONSTANT_REGISTER_DECLARATION(PRegister, p6, ( 6)); +CONSTANT_REGISTER_DECLARATION(PRegister, p7, ( 7)); +CONSTANT_REGISTER_DECLARATION(PRegister, p8, ( 8)); +CONSTANT_REGISTER_DECLARATION(PRegister, p9, ( 9)); +CONSTANT_REGISTER_DECLARATION(PRegister, p10, (10)); +CONSTANT_REGISTER_DECLARATION(PRegister, p11, (11)); +CONSTANT_REGISTER_DECLARATION(PRegister, p12, (12)); +CONSTANT_REGISTER_DECLARATION(PRegister, p13, (13)); +CONSTANT_REGISTER_DECLARATION(PRegister, p14, (14)); +CONSTANT_REGISTER_DECLARATION(PRegister, p15, (15)); + // Need to know the total number of registers of all sorts for SharedInfo. // Define a class that exports it. class ConcreteRegisterImpl : public AbstractRegisterImpl { public: enum { @@ -197,16 +280,18 @@ // There is no requirement that any ordering here matches any ordering c2 gives // it's optoregs. number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + + PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers + 1) // flags }; // added to make it compile static const int max_gpr; static const int max_fpr; + static const int max_pr; }; // A set of registers class RegSet { uint32_t _bitset; diff a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp --- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp @@ -152,5 +152,55 @@ REGISTER_DEFINITION(Register, rfp); REGISTER_DEFINITION(Register, rthread); REGISTER_DEFINITION(Register, rheapbase); REGISTER_DEFINITION(Register, r31_sp); + +REGISTER_DEFINITION(FloatRegister, z0); +REGISTER_DEFINITION(FloatRegister, z1); +REGISTER_DEFINITION(FloatRegister, z2); +REGISTER_DEFINITION(FloatRegister, z3); +REGISTER_DEFINITION(FloatRegister, z4); +REGISTER_DEFINITION(FloatRegister, z5); +REGISTER_DEFINITION(FloatRegister, z6); +REGISTER_DEFINITION(FloatRegister, z7); +REGISTER_DEFINITION(FloatRegister, z8); +REGISTER_DEFINITION(FloatRegister, z9); +REGISTER_DEFINITION(FloatRegister, z10); +REGISTER_DEFINITION(FloatRegister, z11); +REGISTER_DEFINITION(FloatRegister, z12); +REGISTER_DEFINITION(FloatRegister, z13); +REGISTER_DEFINITION(FloatRegister, z14); +REGISTER_DEFINITION(FloatRegister, z15); +REGISTER_DEFINITION(FloatRegister, z16); +REGISTER_DEFINITION(FloatRegister, z17); +REGISTER_DEFINITION(FloatRegister, z18); +REGISTER_DEFINITION(FloatRegister, z19); +REGISTER_DEFINITION(FloatRegister, z20); +REGISTER_DEFINITION(FloatRegister, z21); +REGISTER_DEFINITION(FloatRegister, z22); +REGISTER_DEFINITION(FloatRegister, z23); +REGISTER_DEFINITION(FloatRegister, z24); +REGISTER_DEFINITION(FloatRegister, z25); +REGISTER_DEFINITION(FloatRegister, z26); +REGISTER_DEFINITION(FloatRegister, z27); +REGISTER_DEFINITION(FloatRegister, z28); +REGISTER_DEFINITION(FloatRegister, z29); +REGISTER_DEFINITION(FloatRegister, z30); +REGISTER_DEFINITION(FloatRegister, z31); + +REGISTER_DEFINITION(PRegister, p0); +REGISTER_DEFINITION(PRegister, p1); +REGISTER_DEFINITION(PRegister, p2); +REGISTER_DEFINITION(PRegister, p3); +REGISTER_DEFINITION(PRegister, p4); +REGISTER_DEFINITION(PRegister, p5); +REGISTER_DEFINITION(PRegister, p6); +REGISTER_DEFINITION(PRegister, p7); +REGISTER_DEFINITION(PRegister, p8); +REGISTER_DEFINITION(PRegister, p9); +REGISTER_DEFINITION(PRegister, p10); +REGISTER_DEFINITION(PRegister, p11); +REGISTER_DEFINITION(PRegister, p12); +REGISTER_DEFINITION(PRegister, p13); +REGISTER_DEFINITION(PRegister, p14); +REGISTER_DEFINITION(PRegister, p15); diff a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -116,11 +116,11 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { #if COMPILER2_OR_JVMCI if (save_vectors) { // Save upper half of vector registers - int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_register / + int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_neon_register / VMRegImpl::slots_per_word; additional_frame_words += vect_words; } #else assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); diff a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp --- a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp +++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp @@ -1,8 +1,8 @@ /* - * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. @@ -34,6 +34,10 @@ inline VMReg FloatRegisterImpl::as_VMReg() { return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + ConcreteRegisterImpl::max_gpr); } +inline VMReg PRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_fpr); +} + #endif // CPU_AARCH64_VMREG_AARCH64_INLINE_HPP diff a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -1004,10 +1004,18 @@ // Vector width in bytes const int Matcher::vector_width_in_bytes(BasicType bt) { return MaxVectorSize; } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // Vector ideal reg corresponding to specified size in bytes const uint Matcher::vector_ideal_reg(int size) { assert(MaxVectorSize >= size, ""); switch(size) { case 8: return Op_VecD; diff a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -2377,10 +2377,18 @@ const int Matcher::min_vector_size(const BasicType bt) { return max_vector_size(bt); // Same as max. } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // PPC implementation uses VSX load/store instructions (if // SuperwordUseVSX) which support 4 byte but not arbitrary alignment const bool Matcher::misaligned_vectors_ok() { return false; } diff a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1608,10 +1608,18 @@ const int Matcher::min_vector_size(const BasicType bt) { return max_vector_size(bt); // Same as max. } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // z/Architecture does support misaligned store/load at minimal extra cost. const bool Matcher::misaligned_vectors_ok() { return true; } diff a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1,7 +1,7 @@ // -// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License version 2 only, as // published by the Free Software Foundation. @@ -1613,10 +1613,18 @@ // Min size which can be loaded into vector is 4 bytes. int size = (type2aelembytes(bt) == 1) ? 4 : 2; return MIN2(size,max_size); } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // Vector ideal reg corresponding to specified size in bytes const uint Matcher::vector_ideal_reg(int size) { assert(MaxVectorSize >= size, ""); switch(size) { case 4: return Op_VecS; diff a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -2832,11 +2832,11 @@ OptoReg::Bad, // Op_RegF XMM0b_num, // Op_RegD RAX_H_num // Op_RegL }; // Excluded flags and vector registers. - assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type"); + assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type"); return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); %} %} //----------ATTRIBUTES--------------------------------------------------------- diff a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp --- a/src/hotspot/share/adlc/archDesc.cpp +++ b/src/hotspot/share/adlc/archDesc.cpp @@ -1,7 +1,7 @@ // -// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License version 2 only, as // published by the Free Software Foundation. @@ -932,20 +932,25 @@ } // Match Vector types. if (strncmp(idealOp, "Vec",3)==0) { switch(last_char) { + case 'A': return "TypeVect::VECTA"; case 'S': return "TypeVect::VECTS"; case 'D': return "TypeVect::VECTD"; case 'X': return "TypeVect::VECTX"; case 'Y': return "TypeVect::VECTY"; case 'Z': return "TypeVect::VECTZ"; default: internal_err("Vector type %s with unrecognized type\n",idealOp); } } + if (strncmp(idealOp, "RegVMask", 8) == 0) { + return "Type::BOTTOM"; + } + // !!!!! switch(last_char) { case 'I': return "TypeInt::INT"; case 'P': return "TypePtr::BOTTOM"; case 'N': return "TypeNarrowOop::BOTTOM"; diff a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -3940,10 +3940,12 @@ strcmp(opType,"RegP")==0 || strcmp(opType,"RegN")==0 || strcmp(opType,"RegL")==0 || strcmp(opType,"RegF")==0 || strcmp(opType,"RegD")==0 || + strcmp(opType,"RegVMask")==0 || + strcmp(opType,"VecA")==0 || strcmp(opType,"VecS")==0 || strcmp(opType,"VecD")==0 || strcmp(opType,"VecX")==0 || strcmp(opType,"VecY")==0 || strcmp(opType,"VecZ")==0 || diff a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp --- a/src/hotspot/share/opto/chaitin.cpp +++ b/src/hotspot/share/opto/chaitin.cpp @@ -75,10 +75,11 @@ tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score()); // Flags if( _is_oop ) tty->print("Oop "); if( _is_float ) tty->print("Float "); if( _is_vector ) tty->print("Vector "); + if( _is_scalable ) tty->print("Scalable "); if( _was_spilled1 ) tty->print("Spilled "); if( _was_spilled2 ) tty->print("Spilled2 "); if( _direct_conflict ) tty->print("Direct_conflict "); if( _fat_proj ) tty->print("Fat "); if( _was_lo ) tty->print("Lo "); @@ -642,11 +643,19 @@ } else { // Must be a register-set if (!lrg._fat_proj) { // Must be aligned adjacent register set // Live ranges record the highest register in their mask. // We want the low register for the AD file writer's convenience. OptoReg::Name hi = lrg.reg(); // Get hi register - OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo + int num_regs = lrg.num_regs(); + if (lrg.is_scalable() && OptoReg::is_stack(hi)) { + // For scalable vector registers, when they are allocated in physical + // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable + // vector. If they are allocated on stack, we need to get the actual + // num_regs, which reflects the physical length of scalable registers. + num_regs = lrg.scalable_reg_slots(); + } + OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo // We have to use pair [lo,lo+1] even for wide vectors because // the rest of code generation works only with pairs. It is safe // since for registers encoding only 'lo' is used. // Second reg from pair is used in ScheduleAndBundle on SPARC where // vector max size is 8 which corresponds to registers pair. @@ -800,12 +809,23 @@ "oops must be in Op_RegP's" ); // Check for vector live range (only if vector register is used). // On SPARC vector uses RegD which could be misaligned so it is not // processes as vector in RA. - if (RegMask::is_vector(ireg)) + if (RegMask::is_vector(ireg)) { lrg._is_vector = 1; + if (ireg == Op_VecA) { + assert(Matcher::supports_scalable_vector(), "scalable vector should be supported"); + lrg._is_scalable = 1; + // For scalable vector, when it is allocated in physical register, + // num_regs is RegMask::SlotsPerVecA for reg mask, + // which may not be the actual physical register size. + // If it is allocated in stack, we need to get the actual + // physical length of scalable vector register. + lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT)); + } + } assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL, "vector must be in vector registers"); // Check for bound register masks const RegMask &lrgmask = lrg.mask(); @@ -903,10 +923,17 @@ case Op_RegFlags: case 0: // not an ideal register lrg.set_num_regs(1); lrg.set_reg_pressure(1); break; + case Op_VecA: + assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); + assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity"); + assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned"); + lrg.set_num_regs(RegMask::SlotsPerVecA); + lrg.set_reg_pressure(1); + break; case Op_VecS: assert(Matcher::vector_size_supported(T_BYTE,4), "sanity"); assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity"); lrg.set_num_regs(RegMask::SlotsPerVecS); lrg.set_reg_pressure(1); @@ -1303,10 +1330,50 @@ return true; } return false; } +static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) { + int num_regs = lrg.num_regs(); + OptoReg::Name assigned = mask.find_first_set(lrg, num_regs); + + if (lrg.is_scalable()) { + // a physical register is found + if (chunk == 0 && OptoReg::is_reg(assigned)) { + return assigned; + } + + // find available stack slots for scalable register + if (lrg._is_vector) { + num_regs = lrg.scalable_reg_slots(); + // if actual scalable vector register is exactly SlotsPerVecA * 32 bits + if (num_regs == RegMask::SlotsPerVecA) { + return assigned; + } + + // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it + // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits + // instead of SlotsPerVecA bits. + assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg + while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) { + // Verify the found reg has scalable_reg_slots() bits set. + if (mask.is_valid_reg(assigned, num_regs)) { + return assigned; + } else { + // Remove more for each iteration + mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg + mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits + assigned = mask.find_first_set(lrg, num_regs); + } + } + return OptoReg::Bad; // will cause chunk change, and retry next chunk + } + } + + return assigned; +} + // Choose a color using the biasing heuristic OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { // Check for "at_risk" LRG's uint risk_lrg = _lrg_map.find(lrg._risk_bias); @@ -1336,20 +1403,20 @@ } else if( chunk == 0 ) { // Choose a color which is legal for him RegMask tempmask = lrg.mask(); tempmask.AND(lrgs(copy_lrg).mask()); tempmask.clear_to_sets(lrg.num_regs()); - OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs()); + OptoReg::Name reg = find_first_set(lrg, tempmask, chunk); if (OptoReg::is_valid(reg)) return reg; } } // If no bias info exists, just go with the register selection ordering if (lrg._is_vector || lrg.num_regs() == 2) { // Find an aligned set - return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk); + return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk); } // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate // copy removal to remove many more copies, by preventing a just-assigned // register from being repeatedly assigned. @@ -1400,11 +1467,10 @@ // Pull next LRG from the simplified list - in reverse order of removal uint lidx = _simplified; LRG *lrg = &lrgs(lidx); _simplified = lrg->_next; - #ifndef PRODUCT if (trace_spilling()) { ttyLocker ttyl; tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(), lrg->degrees_of_freedom()); @@ -1482,11 +1548,10 @@ // a chunk-rollover event if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) { // Bump register mask up to next stack chunk chunk += RegMask::CHUNK_SIZE; lrg->Set_All(); - goto retry_next_chunk; } //--------------- // Did we get a color? @@ -1507,16 +1572,25 @@ // to make. In this case, the mask has more bits in it than the colors // chosen. Restrict the mask to just what was picked. int n_regs = lrg->num_regs(); assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); if (n_regs == 1 || !lrg->_fat_proj) { - assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); + if (Matcher::supports_scalable_vector()) { + assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity"); + } else { + assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); + } lrg->Clear(); // Clear the mask lrg->Insert(reg); // Set regmask to match selected reg // For vectors and pairs, also insert the low bit of the pair - for (int i = 1; i < n_regs; i++) + // We always choose the high bit, then mask the low bits by register size + if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack + n_regs = lrg->scalable_reg_slots(); + } + for (int i = 1; i < n_regs; i++) { lrg->Insert(OptoReg::add(reg,-i)); + } lrg->set_mask_size(n_regs); } else { // Else fatproj // mask must be equal to fatproj bits, by definition } #ifndef PRODUCT diff a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp --- a/src/hotspot/share/opto/chaitin.hpp +++ b/src/hotspot/share/opto/chaitin.hpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. @@ -112,11 +112,13 @@ _mask_size = size; #ifdef ASSERT _msize_valid=1; if (_is_vector) { assert(!_fat_proj, "sanity"); - assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets"); + if (!(_is_scalable && OptoReg::is_stack(_reg))) { + assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets"); + } } else if (_num_regs == 2 && !_fat_proj) { assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs"); } #endif } @@ -135,18 +137,41 @@ void Insert( OptoReg::Name reg ) { _mask.Insert(reg); debug_only(_msize_valid=0;) } void Remove( OptoReg::Name reg ) { _mask.Remove(reg); debug_only(_msize_valid=0;) } void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) } - // Number of registers this live range uses when it colors private: + // Number of registers this live range uses when it colors uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else // except _num_regs is kill count for fat_proj + + // For scalable register, num_regs may not be the actual physical register size. + // We need to get the actual physical length of scalable register when scalable + // register is spilled. The size of one slot is 32-bit. + uint _scalable_reg_slots; // Actual scalable register length of slots. + // Meaningful only when _is_scalable is true. public: int num_regs() const { return _num_regs; } void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; } + uint scalable_reg_slots() { return _scalable_reg_slots; } + void set_scalable_reg_slots(uint slots) { + assert(_is_scalable, "scalable register"); + assert(slots > 0, "slots of scalable register is not valid"); + _scalable_reg_slots = slots; + } + + bool is_scalable() { +#ifdef ASSERT + if (_is_scalable) { + // Should only be a vector for now, but it could also be a RegVMask in future. + assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg"); + } +#endif + return _is_scalable; + } + private: // Number of physical registers this live range uses when it colors // Architecture and register-set dependent uint16_t _reg_pressure; public: @@ -168,10 +193,12 @@ bool just_lo_degree () const { return degree() == degrees_of_freedom(); } uint _is_oop:1, // Live-range holds an oop _is_float:1, // True if in float registers _is_vector:1, // True if in vector registers + _is_scalable:1, // True if register size is scalable + // e.g. Arm SVE vector/predicate registers. _was_spilled1:1, // True if prior spilling on def _was_spilled2:1, // True if twice prior spilling on def _is_bound:1, // live range starts life with no // degrees of freedom. _direct_conflict:1, // True if def and use registers in conflict diff a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -86,10 +86,11 @@ idealreg2spillmask [Op_RegN] = NULL; idealreg2spillmask [Op_RegL] = NULL; idealreg2spillmask [Op_RegF] = NULL; idealreg2spillmask [Op_RegD] = NULL; idealreg2spillmask [Op_RegP] = NULL; + idealreg2spillmask [Op_VecA] = NULL; idealreg2spillmask [Op_VecS] = NULL; idealreg2spillmask [Op_VecD] = NULL; idealreg2spillmask [Op_VecX] = NULL; idealreg2spillmask [Op_VecY] = NULL; idealreg2spillmask [Op_VecZ] = NULL; @@ -99,10 +100,11 @@ idealreg2debugmask [Op_RegN] = NULL; idealreg2debugmask [Op_RegL] = NULL; idealreg2debugmask [Op_RegF] = NULL; idealreg2debugmask [Op_RegD] = NULL; idealreg2debugmask [Op_RegP] = NULL; + idealreg2debugmask [Op_VecA] = NULL; idealreg2debugmask [Op_VecS] = NULL; idealreg2debugmask [Op_VecD] = NULL; idealreg2debugmask [Op_VecX] = NULL; idealreg2debugmask [Op_VecY] = NULL; idealreg2debugmask [Op_VecZ] = NULL; @@ -112,10 +114,11 @@ idealreg2mhdebugmask[Op_RegN] = NULL; idealreg2mhdebugmask[Op_RegL] = NULL; idealreg2mhdebugmask[Op_RegF] = NULL; idealreg2mhdebugmask[Op_RegD] = NULL; idealreg2mhdebugmask[Op_RegP] = NULL; + idealreg2mhdebugmask[Op_VecA] = NULL; idealreg2mhdebugmask[Op_VecS] = NULL; idealreg2mhdebugmask[Op_VecD] = NULL; idealreg2mhdebugmask[Op_VecX] = NULL; idealreg2mhdebugmask[Op_VecY] = NULL; idealreg2mhdebugmask[Op_VecZ] = NULL; @@ -425,11 +428,11 @@ rms[TypeFunc::ReturnAdr] = ret_adr; rms[TypeFunc::FramePtr ] = fp; return rms; } -#define NOF_STACK_MASKS (3*6+5) +#define NOF_STACK_MASKS (3*6+6) // Create the initial stack mask used by values spilling to the stack. // Disallow any debug info in outgoing argument areas by setting the // initial mask accordingly. void Matcher::init_first_stack_mask() { @@ -461,15 +464,16 @@ idealreg2mhdebugmask[Op_RegL] = &rms[14]; idealreg2mhdebugmask[Op_RegF] = &rms[15]; idealreg2mhdebugmask[Op_RegD] = &rms[16]; idealreg2mhdebugmask[Op_RegP] = &rms[17]; - idealreg2spillmask [Op_VecS] = &rms[18]; - idealreg2spillmask [Op_VecD] = &rms[19]; - idealreg2spillmask [Op_VecX] = &rms[20]; - idealreg2spillmask [Op_VecY] = &rms[21]; - idealreg2spillmask [Op_VecZ] = &rms[22]; + idealreg2spillmask [Op_VecA] = &rms[18]; + idealreg2spillmask [Op_VecS] = &rms[19]; + idealreg2spillmask [Op_VecD] = &rms[20]; + idealreg2spillmask [Op_VecX] = &rms[21]; + idealreg2spillmask [Op_VecY] = &rms[22]; + idealreg2spillmask [Op_VecZ] = &rms[23]; OptoReg::Name i; // At first, start with the empty mask C->FIRST_STACK_mask().Clear(); @@ -492,10 +496,11 @@ // Make spill masks. Registers for their class, plus FIRST_STACK_mask. RegMask aligned_stack_mask = C->FIRST_STACK_mask(); // Keep spill masks aligned. aligned_stack_mask.clear_to_pairs(); assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); + RegMask scalable_stack_mask = aligned_stack_mask; *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; #ifdef _LP64 *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN]; idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask()); @@ -562,32 +567,52 @@ aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecZ); assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); *idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ]; idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask); } - if (UseFPUForSpilling) { - // This mask logic assumes that the spill operations are - // symmetric and that the registers involved are the same size. - // On sparc for instance we may have to use 64 bit moves will - // kill 2 registers when used with F0-F31. - idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]); - idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]); + + if (Matcher::supports_scalable_vector()) { + int k = 1; + OptoReg::Name in = OptoReg::add(_in_arg_limit, -1); + // Exclude last input arg stack slots to avoid spilling vector register there, + // otherwise vector spills could stomp over stack slots in caller frame. + for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) { + scalable_stack_mask.Remove(in); + in = OptoReg::add(in, -1); + } + + // For VecA + scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA); + assert(scalable_stack_mask.is_AllStack(), "should be infinite stack"); + *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA]; + idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask); + } else { + *idealreg2spillmask[Op_VecA] = RegMask::Empty; + } + + if (UseFPUForSpilling) { + // This mask logic assumes that the spill operations are + // symmetric and that the registers involved are the same size. + // On sparc for instance we may have to use 64 bit moves will + // kill 2 registers when used with F0-F31. + idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]); #ifdef _LP64 - idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]); - idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); - idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); - idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]); + idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); + idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); + idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]); #else - idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]); #ifdef ARM - // ARM has support for moving 64bit values between a pair of - // integer registers and a double register - idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); - idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); + // ARM has support for moving 64bit values between a pair of + // integer registers and a double register + idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); + idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); #endif #endif - } + } // Make up debug masks. Any spill slot plus callee-save (SOE) registers. // Caller-save (SOC, AS) registers are assumed to be trashable by the various // inline-cache fixup routines. *idealreg2debugmask [Op_RegN] = *idealreg2spillmask[Op_RegN]; @@ -876,10 +901,11 @@ idealreg2regmask[Op_RegI] = regmask_for_ideal_register(Op_RegI, ret); idealreg2regmask[Op_RegP] = regmask_for_ideal_register(Op_RegP, ret); idealreg2regmask[Op_RegF] = regmask_for_ideal_register(Op_RegF, ret); idealreg2regmask[Op_RegD] = regmask_for_ideal_register(Op_RegD, ret); idealreg2regmask[Op_RegL] = regmask_for_ideal_register(Op_RegL, ret); + idealreg2regmask[Op_VecA] = regmask_for_ideal_register(Op_VecA, ret); idealreg2regmask[Op_VecS] = regmask_for_ideal_register(Op_VecS, ret); idealreg2regmask[Op_VecD] = regmask_for_ideal_register(Op_VecD, ret); idealreg2regmask[Op_VecX] = regmask_for_ideal_register(Op_VecX, ret); idealreg2regmask[Op_VecY] = regmask_for_ideal_register(Op_VecY, ret); idealreg2regmask[Op_VecZ] = regmask_for_ideal_register(Op_VecZ, ret); @@ -1561,11 +1587,10 @@ control = Label_Root(m, s, control, mem); if (C->failing()) return NULL; } } - // Call DFA to match this node, and return svec->DFA( n->Opcode(), n ); #ifdef ASSERT uint x; @@ -2411,11 +2436,11 @@ // Compute RegMask for an ideal register. const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) { const Type* t = Type::mreg2type[ideal_reg]; if (t == NULL) { - assert(ideal_reg >= Op_VecS && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg); + assert(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg); return NULL; // not supported } Node* fp = ret->in(TypeFunc::FramePtr); Node* mem = ret->in(TypeFunc::Memory); const TypePtr* atp = TypePtr::BOTTOM; @@ -2428,10 +2453,11 @@ case Op_RegP: spill = new LoadPNode(NULL, mem, fp, atp, t->is_ptr(), mo); break; case Op_RegF: spill = new LoadFNode(NULL, mem, fp, atp, t, mo); break; case Op_RegD: spill = new LoadDNode(NULL, mem, fp, atp, t, mo); break; case Op_RegL: spill = new LoadLNode(NULL, mem, fp, atp, t->is_long(), mo); break; + case Op_VecA: // fall-through case Op_VecS: // fall-through case Op_VecD: // fall-through case Op_VecX: // fall-through case Op_VecY: // fall-through case Op_VecZ: spill = new LoadVectorNode(NULL, mem, fp, atp, t->is_vect()); break; diff a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -336,10 +336,14 @@ static const bool vector_size_supported(const BasicType bt, int size) { return (Matcher::max_vector_size(bt) >= size && Matcher::min_vector_size(bt) <= size); } + static const bool supports_scalable_vector(); + // Actual max scalable vector register length. + static const int scalable_vector_reg_size(const BasicType bt); + // Vector ideal reg static const uint vector_ideal_reg(int len); // CPU supports misaligned vectors store/load. static const bool misaligned_vectors_ok(); diff a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp --- a/src/hotspot/share/opto/opcodes.cpp +++ b/src/hotspot/share/opto/opcodes.cpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. @@ -36,16 +36,18 @@ "RegI", "RegP", "RegF", "RegD", "RegL", - "RegFlags", + "VecA", "VecS", "VecD", "VecX", "VecY", "VecZ", + "RegVMask", + "RegFlags", "_last_machine_leaf", #include "classes.hpp" "_last_class_name", }; #undef macro diff a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp --- a/src/hotspot/share/opto/opcodes.hpp +++ b/src/hotspot/share/opto/opcodes.hpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. @@ -35,15 +35,17 @@ macro(RegI) // Machine integer register macro(RegP) // Machine pointer register macro(RegF) // Machine float register macro(RegD) // Machine double register macro(RegL) // Machine long register + macro(VecA) // Machine vectora register macro(VecS) // Machine vectors register macro(VecD) // Machine vectord register macro(VecX) // Machine vectorx register macro(VecY) // Machine vectory register macro(VecZ) // Machine vectorz register + macro(RegVMask) // Vector mask/predicate register macro(RegFlags) // Machine flags register _last_machine_leaf, // Split between regular opcodes and machine #include "classes.hpp" _last_opcode }; diff a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp --- a/src/hotspot/share/opto/postaloc.cpp +++ b/src/hotspot/share/opto/postaloc.cpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. @@ -264,13 +264,13 @@ // intermediate copies might be illegal, i.e., value is stored down to stack // then reloaded BUT survives in a register the whole way. Node *val = skip_copies(n->in(k)); if (val == x) return blk_adjust; // No progress? - int n_regs = RegMask::num_registers(val->ideal_reg()); uint val_idx = _lrg_map.live_range_id(val); OptoReg::Name val_reg = lrgs(val_idx).reg(); + int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx)); // See if it happens to already be in the correct register! // (either Phi's direct register, or the common case of the name // never-clobbered original-def register) if (register_contains_value(val, val_reg, n_regs, value)) { @@ -303,12 +303,30 @@ } if (ignore_self) continue; } Node *vv = value[reg]; + // For scalable register, number of registers may be inconsistent between + // "val_reg" and "reg". For example, when "val" resides in register + // but "reg" is located in stack. + if (lrgs(val_idx).is_scalable()) { + assert(val->ideal_reg() == Op_VecA, "scalable vector register"); + if (OptoReg::is_stack(reg)) { + n_regs = lrgs(val_idx).scalable_reg_slots(); + } else { + n_regs = RegMask::SlotsPerVecA; + } + } if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set - uint last = (n_regs-1); // Looking for the last part of a set + uint last; + if (lrgs(val_idx).is_scalable()) { + assert(val->ideal_reg() == Op_VecA, "scalable vector register"); + // For scalable vector register, regmask is always SlotsPerVecA bits aligned + last = RegMask::SlotsPerVecA - 1; + } else { + last = (n_regs-1); // Looking for the last part of a set + } if ((reg&last) != last) continue; // Wrong part of a set if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value } if( vv == val || // Got a direct hit? (t && vv && vv->bottom_type() == t && vv->is_Mach() && @@ -589,11 +607,11 @@ // For all Phi's for (j = 1; j < phi_dex; j++) { uint k; Node *phi = block->get_node(j); uint pidx = _lrg_map.live_range_id(phi); - OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg(); + OptoReg::Name preg = lrgs(pidx).reg(); // Remove copies remaining on edges. Check for junk phi. Node *u = NULL; for (k = 1; k < phi->req(); k++) { Node *x = phi->in(k); @@ -617,11 +635,11 @@ // the new values. Not illegal by itself but throws the over-strong // assert in scheduling. if( pidx ) { value.map(preg,phi); regnd.map(preg,phi); - int n_regs = RegMask::num_registers(phi->ideal_reg()); + int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx)); for (int l = 1; l < n_regs; l++) { OptoReg::Name preg_lo = OptoReg::add(preg,-l); value.map(preg_lo,phi); regnd.map(preg_lo,phi); } @@ -661,11 +679,11 @@ Node *valdef = skip_copies(def); // tighten up val through non-useless copies value.map(ureg,valdef); // record improved reaching-def info regnd.map(ureg, def); // Record other half of doubles uint def_ideal_reg = def->ideal_reg(); - int n_regs = RegMask::num_registers(def_ideal_reg); + int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def))); for (int l = 1; l < n_regs; l++) { OptoReg::Name ureg_lo = OptoReg::add(ureg,-l); if (!value[ureg_lo] && (!RegMask::can_represent(ureg_lo) || lrgs(useidx).mask().Member(ureg_lo))) { // Nearly always adjacent @@ -705,11 +723,11 @@ regnd.map(nreg, NULL); value.map(nreg, NULL); } uint n_ideal_reg = n->ideal_reg(); - int n_regs = RegMask::num_registers(n_ideal_reg); + int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx)); if (n_regs == 1) { // If Node 'n' does not change the value mapped by the register, // then 'n' is a useless copy. Do not update the register->node // mapping so 'n' will go dead. if( value[nreg] != val ) { diff a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp --- a/src/hotspot/share/opto/regmask.cpp +++ b/src/hotspot/share/opto/regmask.cpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. @@ -22,10 +22,11 @@ * */ #include "precompiled.hpp" #include "opto/ad.hpp" +#include "opto/chaitin.hpp" #include "opto/compile.hpp" #include "opto/matcher.hpp" #include "opto/node.hpp" #include "opto/regmask.hpp" #include "utilities/population_count.hpp" @@ -57,34 +58,51 @@ 0 ); //============================================================================= bool RegMask::is_vector(uint ireg) { - return (ireg == Op_VecS || ireg == Op_VecD || + return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ); } int RegMask::num_registers(uint ireg) { switch(ireg) { case Op_VecZ: - return 16; + return SlotsPerVecZ; case Op_VecY: - return 8; + return SlotsPerVecY; case Op_VecX: - return 4; + return SlotsPerVecX; case Op_VecD: + return SlotsPerVecD; case Op_RegD: case Op_RegL: #ifdef _LP64 case Op_RegP: #endif return 2; + case Op_VecA: + assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); + return SlotsPerVecA; } // Op_VecS and the rest ideal registers. return 1; } +int RegMask::num_registers(uint ireg, LRG &lrg) { + int n_regs = num_registers(ireg); + + // assigned is OptoReg which is selected by register allocator + OptoReg::Name assigned = lrg.reg(); + assert(OptoReg::is_valid(assigned), "should be valid opto register"); + + if (lrg.is_scalable() && OptoReg::is_stack(assigned)) { + n_regs = lrg.scalable_reg_slots(); + } + return n_regs; +} + // Clear out partial bits; leave only bit pairs void RegMask::clear_to_pairs() { assert(valid_watermarks(), "sanity"); for (int i = _lwm; i <= _hwm; i++) { int bits = _A[i]; @@ -155,19 +173,34 @@ } else if (is_bound1() || is_bound_pair()) { return true; } return false; } +// Check that whether given reg number with size is valid +// for current regmask, where reg is the highest number. +bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const { + for (int i = 0; i < size; i++) { + if (!Member(reg - i)) { + return false; + } + } + return true; +} // only indicies of power 2 are accessed, so index 3 is only filled in for storage. static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 }; // Find the lowest-numbered register set in the mask. Return the // HIGHEST register number in the set, or BAD if no sets. // Works also for size 1. -OptoReg::Name RegMask::find_first_set(const int size) const { - assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); +OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const { + if (lrg.is_scalable()) { + // For scalable vector register, regmask is SlotsPerVecA bits aligned. + assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets"); + } else { + assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); + } assert(valid_watermarks(), "sanity"); for (int i = _lwm; i <= _hwm; i++) { if (_A[i]) { // Found some bits // Convert to bit number, return hi bit in pair return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(_A[i]) + (size - 1)); @@ -243,16 +276,20 @@ for (int i = _lwm; i <= _hwm; i++) { int bits = _A[i]; while (bits) { // Check bits for pairing int bit = bits & -bits; // Extract low bit // Low bit is not odd means its mis-aligned. - if ((bit & low_bits_mask) == 0) return false; + if ((bit & low_bits_mask) == 0) { + return false; + } // Do extra work since (bit << size) may overflow. int hi_bit = bit << (size-1); // high bit int set = hi_bit + ((hi_bit-1) & ~(bit-1)); // Check for aligned adjacent bits in this set - if ((bits & set) != set) return false; + if ((bits & set) != set) { + return false; + } bits -= set; // Remove this set } } return true; } diff a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp --- a/src/hotspot/share/opto/regmask.hpp +++ b/src/hotspot/share/opto/regmask.hpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. @@ -28,10 +28,12 @@ #include "code/vmreg.hpp" #include "opto/optoreg.hpp" #include "utilities/count_leading_zeros.hpp" #include "utilities/count_trailing_zeros.hpp" +class LRG; + //-------------Non-zero bit search methods used by RegMask--------------------- // Find lowest 1, undefined if empty/0 static int find_lowest_bit(uint32_t mask) { return count_trailing_zeros(mask); } @@ -89,15 +91,17 @@ // If we ever go to quad-word allocations, SlotsPerQuad will become // the controlling alignment constraint. Note that this alignment // requirement is internal to the allocator, and independent of any // particular platform. enum { SlotsPerLong = 2, + SlotsPerVecA = 8, SlotsPerVecS = 1, SlotsPerVecD = 2, SlotsPerVecX = 4, SlotsPerVecY = 8, - SlotsPerVecZ = 16 }; + SlotsPerVecZ = 16, + }; // A constructor only used by the ADLC output. All mask fields are filled // in directly. Calls to this look something like RM(1,2,3,4); RegMask( # define BODY(I) int a##I, @@ -217,14 +221,18 @@ // Test for a single adjacent pair bool is_bound_pair() const; // Test for a single adjacent set of ideal register's size. bool is_bound(uint ireg) const; + // Check that whether given reg number with size is valid + // for current regmask, where reg is the highest number. + bool is_valid_reg(OptoReg::Name reg, const int size) const; + // Find the lowest-numbered register set in the mask. Return the // HIGHEST register number in the set, or BAD if no sets. // Assert that the mask contains only bit sets. - OptoReg::Name find_first_set(const int size) const; + OptoReg::Name find_first_set(LRG &lrg, const int size) const; // Clear out partial bits; leave only aligned adjacent bit sets of size. void clear_to_sets(const int size); // Smear out partial bits to aligned adjacent bit sets. void smear_to_sets(const int size); @@ -234,10 +242,11 @@ // Test for a single adjacent set int is_bound_set(const int size) const; static bool is_vector(uint ireg); static int num_registers(uint ireg); + static int num_registers(uint ireg, LRG &lrg); // Fast overlap test. Non-zero if any registers in common. int overlap(const RegMask &rm) const { assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); int hwm = MIN2(_hwm, rm._hwm); diff a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp --- a/src/hotspot/share/opto/type.cpp +++ b/src/hotspot/share/opto/type.cpp @@ -72,10 +72,11 @@ { Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ #else // all other + { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA. { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX { Bad, T_ILLEGAL, "vectory:", false, Op_VecY, relocInfo::none }, // VectorY { Bad, T_ILLEGAL, "vectorz:", false, Op_VecZ, relocInfo::none }, // VectorZ @@ -644,10 +645,14 @@ _zero_type[T_VOID] = Type::TOP; // the only void value is no value at all // get_zero_type() should not happen for T_CONFLICT _zero_type[T_CONFLICT]= NULL; + if (Matcher::supports_scalable_vector()) { + TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE)); + } + // Vector predefined types, it needs initialized _const_basic_type[]. if (Matcher::vector_size_supported(T_BYTE,4)) { TypeVect::VECTS = TypeVect::make(T_BYTE,4); } if (Matcher::vector_size_supported(T_FLOAT,2)) { @@ -660,10 +665,12 @@ TypeVect::VECTY = TypeVect::make(T_FLOAT,8); } if (Matcher::vector_size_supported(T_FLOAT,16)) { TypeVect::VECTZ = TypeVect::make(T_FLOAT,16); } + + mreg2type[Op_VecA] = TypeVect::VECTA; mreg2type[Op_VecS] = TypeVect::VECTS; mreg2type[Op_VecD] = TypeVect::VECTD; mreg2type[Op_VecX] = TypeVect::VECTX; mreg2type[Op_VecY] = TypeVect::VECTY; mreg2type[Op_VecZ] = TypeVect::VECTZ; @@ -979,10 +986,11 @@ Bad, // NarrowOop - handled in v-call Bad, // NarrowKlass - handled in v-call Bad, // Tuple - handled in v-call Bad, // Array - handled in v-call + Bad, // VectorA - handled in v-call Bad, // VectorS - handled in v-call Bad, // VectorD - handled in v-call Bad, // VectorX - handled in v-call Bad, // VectorY - handled in v-call Bad, // VectorZ - handled in v-call @@ -1879,11 +1887,10 @@ const TypeTuple *TypeTuple::INT_PAIR; const TypeTuple *TypeTuple::LONG_PAIR; const TypeTuple *TypeTuple::INT_CC_PAIR; const TypeTuple *TypeTuple::LONG_CC_PAIR; - //------------------------------make------------------------------------------- // Make a TypeTuple from the range of a method signature const TypeTuple *TypeTuple::make_range(ciSignature* sig) { ciType* return_type = sig->return_type(); uint arg_cnt = return_type->size(); @@ -2250,24 +2257,26 @@ return false; } //==============================TypeVect======================================= // Convenience common pre-built types. +const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors const TypeVect *TypeVect::VECTY = NULL; // 256-bit vectors const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors //------------------------------make------------------------------------------- const TypeVect* TypeVect::make(const Type *elem, uint length) { BasicType elem_bt = elem->array_element_basic_type(); assert(is_java_primitive(elem_bt), "only primitive types in vector"); - assert(length > 1 && is_power_of_2(length), "vector length is power of 2"); assert(Matcher::vector_size_supported(elem_bt, length), "length in range"); int size = length * type2aelembytes(elem_bt); switch (Matcher::vector_ideal_reg(size)) { + case Op_VecA: + return (TypeVect*)(new TypeVectA(elem, length))->hashcons(); case Op_VecS: return (TypeVect*)(new TypeVectS(elem, length))->hashcons(); case Op_RegL: case Op_VecD: case Op_RegD: @@ -2295,11 +2304,11 @@ case Bottom: // Ye Olde Default return t; default: // All else is a mistake typerr(t); - + case VectorA: case VectorS: case VectorD: case VectorX: case VectorY: case VectorZ: { // Meeting 2 vectors? @@ -2350,10 +2359,12 @@ //------------------------------dump2------------------------------------------ #ifndef PRODUCT void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const { switch (base()) { + case VectorA: + st->print("vectora["); break; case VectorS: st->print("vectors["); break; case VectorD: st->print("vectord["); break; case VectorX: diff a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp --- a/src/hotspot/share/opto/type.hpp +++ b/src/hotspot/share/opto/type.hpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. @@ -51,10 +51,11 @@ class TypeNarrowOop; class TypeNarrowKlass; class TypeAry; class TypeTuple; class TypeVect; +class TypeVectA; class TypeVectS; class TypeVectD; class TypeVectX; class TypeVectY; class TypeVectZ; @@ -85,10 +86,11 @@ NarrowOop, // Compressed oop pointer NarrowKlass, // Compressed klass pointer Tuple, // Method signature or object layout Array, // Array types + VectorA, // (Scalable) Vector types for vector length agnostic VectorS, // 32bit Vector types VectorD, // 64bit Vector types VectorX, // 128bit Vector types VectorY, // 256bit Vector types VectorZ, // 512bit Vector types @@ -755,10 +757,11 @@ static const TypeVect *make(const Type* elem, uint length); virtual const Type *xmeet( const Type *t) const; virtual const Type *xdual() const; // Compute dual right now. + static const TypeVect *VECTA; static const TypeVect *VECTS; static const TypeVect *VECTD; static const TypeVect *VECTX; static const TypeVect *VECTY; static const TypeVect *VECTZ; @@ -766,10 +769,15 @@ #ifndef PRODUCT virtual void dump2(Dict &d, uint, outputStream *st) const; // Specialized per-Type dumping #endif }; +class TypeVectA : public TypeVect { + friend class TypeVect; + TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {} +}; + class TypeVectS : public TypeVect { friend class TypeVect; TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {} }; @@ -1620,16 +1628,16 @@ inline const TypeAry *Type::isa_ary() const { return ((_base == Array) ? (TypeAry*)this : NULL); } inline const TypeVect *Type::is_vect() const { - assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" ); + assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" ); return (TypeVect*)this; } inline const TypeVect *Type::isa_vect() const { - return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL; + return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL; } inline const TypePtr *Type::is_ptr() const { // AnyPtr is the first Ptr and KlassPtr the last, with no non-ptrs between. assert(_base >= AnyPtr && _base <= KlassPtr, "Not a pointer");