1439 if (same_inputs(p, opd_idx)) {
1440 if (opd->is_Vector() || opd->is_LoadVector()) {
1441 assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
1442 return opd; // input is matching vector
1443 }
1444 if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
1445 Compile* C = _phase->C;
1446 Node* cnt = opd;
1447 // Vector instructions do not mask shift count, do it here.
1448 juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
1449 const TypeInt* t = opd->find_int_type();
1450 if (t != NULL && t->is_con()) {
1451 juint shift = t->get_con();
1452 if (shift > mask) { // Unsigned cmp
1453 cnt = ConNode::make(C, TypeInt::make(shift & mask));
1454 }
1455 } else {
1456 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
1457 cnt = ConNode::make(C, TypeInt::make(mask));
1458 _igvn.register_new_node_with_optimizer(cnt);
1459 cnt = new (C) AndINode(opd, cnt);
1460 _igvn.register_new_node_with_optimizer(cnt);
1461 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
1462 }
1463 assert(opd->bottom_type()->isa_int(), "int type only");
1464 // Move non constant shift count into vector register.
1465 cnt = VectorNode::shift_count(C, p0, cnt, vlen, velt_basic_type(p0));
1466 }
1467 if (cnt != opd) {
1468 _igvn.register_new_node_with_optimizer(cnt);
1469 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
1470 }
1471 return cnt;
1472 }
1473 assert(!opd->is_StoreVector(), "such vector is not expected here");
1474 // Convert scalar input to vector with the same number of elements as
1475 // p0's vector. Use p0's type because size of operand's container in
1476 // vector should match p0's size regardless operand's size.
1477 const Type* p0_t = velt_type(p0);
1478 VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t);
1479
2050 // lim = lim0 - N
2051 // (e - lim) % V == 0
2052 // Solving for lim:
2053 // (e - lim0 + N) % V == 0
2054 // N = (V - (e - lim0)) % V
2055 // lim = lim0 - (V - (e - lim0)) % V
2056
2057 int vw = vector_width_in_bytes(align_to_ref);
2058 int stride = iv_stride();
2059 int scale = align_to_ref_p.scale_in_bytes();
2060 int elt_size = align_to_ref_p.memory_size();
2061 int v_align = vw / elt_size;
2062 assert(v_align > 1, "sanity");
2063 int offset = align_to_ref_p.offset_in_bytes() / elt_size;
2064 Node *offsn = _igvn.intcon(offset);
2065
2066 Node *e = offsn;
2067 if (align_to_ref_p.invar() != NULL) {
2068 // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt)
2069 Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
2070 Node* aref = new (_phase->C) URShiftINode(align_to_ref_p.invar(), log2_elt);
2071 _igvn.register_new_node_with_optimizer(aref);
2072 _phase->set_ctrl(aref, pre_ctrl);
2073 if (align_to_ref_p.negate_invar()) {
2074 e = new (_phase->C) SubINode(e, aref);
2075 } else {
2076 e = new (_phase->C) AddINode(e, aref);
2077 }
2078 _igvn.register_new_node_with_optimizer(e);
2079 _phase->set_ctrl(e, pre_ctrl);
2080 }
2081 if (vw > ObjectAlignmentInBytes) {
2082 // incorporate base e +/- base && Mask >>> log2(elt)
2083 Node* xbase = new(_phase->C) CastP2XNode(NULL, align_to_ref_p.base());
2084 _igvn.register_new_node_with_optimizer(xbase);
2085 #ifdef _LP64
2086 xbase = new (_phase->C) ConvL2INode(xbase);
2087 _igvn.register_new_node_with_optimizer(xbase);
2088 #endif
2089 Node* mask = _igvn.intcon(vw-1);
2090 Node* masked_xbase = new (_phase->C) AndINode(xbase, mask);
2091 _igvn.register_new_node_with_optimizer(masked_xbase);
2092 Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
2093 Node* bref = new (_phase->C) URShiftINode(masked_xbase, log2_elt);
2094 _igvn.register_new_node_with_optimizer(bref);
2095 _phase->set_ctrl(bref, pre_ctrl);
2096 e = new (_phase->C) AddINode(e, bref);
2097 _igvn.register_new_node_with_optimizer(e);
2098 _phase->set_ctrl(e, pre_ctrl);
2099 }
2100
2101 // compute e +/- lim0
2102 if (scale < 0) {
2103 e = new (_phase->C) SubINode(e, lim0);
2104 } else {
2105 e = new (_phase->C) AddINode(e, lim0);
2106 }
2107 _igvn.register_new_node_with_optimizer(e);
2108 _phase->set_ctrl(e, pre_ctrl);
2109
2110 if (stride * scale > 0) {
2111 // compute V - (e +/- lim0)
2112 Node* va = _igvn.intcon(v_align);
2113 e = new (_phase->C) SubINode(va, e);
2114 _igvn.register_new_node_with_optimizer(e);
2115 _phase->set_ctrl(e, pre_ctrl);
2116 }
2117 // compute N = (exp) % V
2118 Node* va_msk = _igvn.intcon(v_align - 1);
2119 Node* N = new (_phase->C) AndINode(e, va_msk);
2120 _igvn.register_new_node_with_optimizer(N);
2121 _phase->set_ctrl(N, pre_ctrl);
2122
2123 // substitute back into (1), so that new limit
2124 // lim = lim0 + N
2125 Node* lim;
2126 if (stride < 0) {
2127 lim = new (_phase->C) SubINode(lim0, N);
2128 } else {
2129 lim = new (_phase->C) AddINode(lim0, N);
2130 }
2131 _igvn.register_new_node_with_optimizer(lim);
2132 _phase->set_ctrl(lim, pre_ctrl);
2133 Node* constrained =
2134 (stride > 0) ? (Node*) new (_phase->C) MinINode(lim, orig_limit)
2135 : (Node*) new (_phase->C) MaxINode(lim, orig_limit);
2136 _igvn.register_new_node_with_optimizer(constrained);
2137 _phase->set_ctrl(constrained, pre_ctrl);
2138 _igvn.hash_delete(pre_opaq);
2139 pre_opaq->set_req(1, constrained);
2140 }
2141
2142 //----------------------------get_pre_loop_end---------------------------
2143 // Find pre loop end from main loop. Returns null if none.
2144 CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode *cl) {
2145 Node *ctrl = cl->in(LoopNode::EntryControl);
2146 if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return NULL;
2147 Node *iffm = ctrl->in(0);
2148 if (!iffm->is_If()) return NULL;
2149 Node *p_f = iffm->in(0);
2150 if (!p_f->is_IfFalse()) return NULL;
2151 if (!p_f->in(0)->is_CountedLoopEnd()) return NULL;
2152 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2153 CountedLoopNode* loop_node = pre_end->loopnode();
2154 if (loop_node == NULL || !loop_node->is_pre_loop()) return NULL;
2155 return pre_end;
|
1439 if (same_inputs(p, opd_idx)) {
1440 if (opd->is_Vector() || opd->is_LoadVector()) {
1441 assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
1442 return opd; // input is matching vector
1443 }
1444 if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
1445 Compile* C = _phase->C;
1446 Node* cnt = opd;
1447 // Vector instructions do not mask shift count, do it here.
1448 juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
1449 const TypeInt* t = opd->find_int_type();
1450 if (t != NULL && t->is_con()) {
1451 juint shift = t->get_con();
1452 if (shift > mask) { // Unsigned cmp
1453 cnt = ConNode::make(C, TypeInt::make(shift & mask));
1454 }
1455 } else {
1456 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
1457 cnt = ConNode::make(C, TypeInt::make(mask));
1458 _igvn.register_new_node_with_optimizer(cnt);
1459 cnt = new AndINode(opd, cnt);
1460 _igvn.register_new_node_with_optimizer(cnt);
1461 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
1462 }
1463 assert(opd->bottom_type()->isa_int(), "int type only");
1464 // Move non constant shift count into vector register.
1465 cnt = VectorNode::shift_count(C, p0, cnt, vlen, velt_basic_type(p0));
1466 }
1467 if (cnt != opd) {
1468 _igvn.register_new_node_with_optimizer(cnt);
1469 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
1470 }
1471 return cnt;
1472 }
1473 assert(!opd->is_StoreVector(), "such vector is not expected here");
1474 // Convert scalar input to vector with the same number of elements as
1475 // p0's vector. Use p0's type because size of operand's container in
1476 // vector should match p0's size regardless operand's size.
1477 const Type* p0_t = velt_type(p0);
1478 VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t);
1479
2050 // lim = lim0 - N
2051 // (e - lim) % V == 0
2052 // Solving for lim:
2053 // (e - lim0 + N) % V == 0
2054 // N = (V - (e - lim0)) % V
2055 // lim = lim0 - (V - (e - lim0)) % V
2056
2057 int vw = vector_width_in_bytes(align_to_ref);
2058 int stride = iv_stride();
2059 int scale = align_to_ref_p.scale_in_bytes();
2060 int elt_size = align_to_ref_p.memory_size();
2061 int v_align = vw / elt_size;
2062 assert(v_align > 1, "sanity");
2063 int offset = align_to_ref_p.offset_in_bytes() / elt_size;
2064 Node *offsn = _igvn.intcon(offset);
2065
2066 Node *e = offsn;
2067 if (align_to_ref_p.invar() != NULL) {
2068 // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt)
2069 Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
2070 Node* aref = new URShiftINode(align_to_ref_p.invar(), log2_elt);
2071 _igvn.register_new_node_with_optimizer(aref);
2072 _phase->set_ctrl(aref, pre_ctrl);
2073 if (align_to_ref_p.negate_invar()) {
2074 e = new SubINode(e, aref);
2075 } else {
2076 e = new AddINode(e, aref);
2077 }
2078 _igvn.register_new_node_with_optimizer(e);
2079 _phase->set_ctrl(e, pre_ctrl);
2080 }
2081 if (vw > ObjectAlignmentInBytes) {
2082 // incorporate base e +/- base && Mask >>> log2(elt)
2083 Node* xbase = new CastP2XNode(NULL, align_to_ref_p.base());
2084 _igvn.register_new_node_with_optimizer(xbase);
2085 #ifdef _LP64
2086 xbase = new ConvL2INode(xbase);
2087 _igvn.register_new_node_with_optimizer(xbase);
2088 #endif
2089 Node* mask = _igvn.intcon(vw-1);
2090 Node* masked_xbase = new AndINode(xbase, mask);
2091 _igvn.register_new_node_with_optimizer(masked_xbase);
2092 Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
2093 Node* bref = new URShiftINode(masked_xbase, log2_elt);
2094 _igvn.register_new_node_with_optimizer(bref);
2095 _phase->set_ctrl(bref, pre_ctrl);
2096 e = new AddINode(e, bref);
2097 _igvn.register_new_node_with_optimizer(e);
2098 _phase->set_ctrl(e, pre_ctrl);
2099 }
2100
2101 // compute e +/- lim0
2102 if (scale < 0) {
2103 e = new SubINode(e, lim0);
2104 } else {
2105 e = new AddINode(e, lim0);
2106 }
2107 _igvn.register_new_node_with_optimizer(e);
2108 _phase->set_ctrl(e, pre_ctrl);
2109
2110 if (stride * scale > 0) {
2111 // compute V - (e +/- lim0)
2112 Node* va = _igvn.intcon(v_align);
2113 e = new SubINode(va, e);
2114 _igvn.register_new_node_with_optimizer(e);
2115 _phase->set_ctrl(e, pre_ctrl);
2116 }
2117 // compute N = (exp) % V
2118 Node* va_msk = _igvn.intcon(v_align - 1);
2119 Node* N = new AndINode(e, va_msk);
2120 _igvn.register_new_node_with_optimizer(N);
2121 _phase->set_ctrl(N, pre_ctrl);
2122
2123 // substitute back into (1), so that new limit
2124 // lim = lim0 + N
2125 Node* lim;
2126 if (stride < 0) {
2127 lim = new SubINode(lim0, N);
2128 } else {
2129 lim = new AddINode(lim0, N);
2130 }
2131 _igvn.register_new_node_with_optimizer(lim);
2132 _phase->set_ctrl(lim, pre_ctrl);
2133 Node* constrained =
2134 (stride > 0) ? (Node*) new MinINode(lim, orig_limit)
2135 : (Node*) new MaxINode(lim, orig_limit);
2136 _igvn.register_new_node_with_optimizer(constrained);
2137 _phase->set_ctrl(constrained, pre_ctrl);
2138 _igvn.hash_delete(pre_opaq);
2139 pre_opaq->set_req(1, constrained);
2140 }
2141
2142 //----------------------------get_pre_loop_end---------------------------
2143 // Find pre loop end from main loop. Returns null if none.
2144 CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode *cl) {
2145 Node *ctrl = cl->in(LoopNode::EntryControl);
2146 if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return NULL;
2147 Node *iffm = ctrl->in(0);
2148 if (!iffm->is_If()) return NULL;
2149 Node *p_f = iffm->in(0);
2150 if (!p_f->is_IfFalse()) return NULL;
2151 if (!p_f->in(0)->is_CountedLoopEnd()) return NULL;
2152 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
2153 CountedLoopNode* loop_node = pre_end->loopnode();
2154 if (loop_node == NULL || !loop_node->is_pre_loop()) return NULL;
2155 return pre_end;
|