< prev index next >
src/share/vm/opto/superword.cpp
Print this page
*** 64,78 ****
_nlist(arena(), 8, 0, NULL), // scratch list of nodes
_lpt(NULL), // loop tree node
_lp(NULL), // LoopNode
_bb(NULL), // basic block
_iv(NULL), // induction var
! _race_possible(false) // cases where SDMU is true
{}
//------------------------------transform_loop---------------------------
! void SuperWord::transform_loop(IdealLoopTree* lpt) {
assert(UseSuperWord, "should be");
// Do vectors exist on this architecture?
if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
assert(lpt->_head->is_CountedLoop(), "must be");
--- 64,79 ----
_nlist(arena(), 8, 0, NULL), // scratch list of nodes
_lpt(NULL), // loop tree node
_lp(NULL), // LoopNode
_bb(NULL), // basic block
_iv(NULL), // induction var
! _race_possible(false), // cases where SDMU is true
! _early_return(true)
{}
//------------------------------transform_loop---------------------------
! void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
assert(UseSuperWord, "should be");
// Do vectors exist on this architecture?
if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
assert(lpt->_head->is_CountedLoop(), "must be");
*** 103,114 ****
--- 104,117 ----
set_lp(cl);
// For now, define one block which is the entire loop body
set_bb(cl);
+ if (do_optimization) {
assert(_packset.length() == 0, "packset must be empty");
SLP_extract();
+ }
}
//------------------------------SLP_extract---------------------------
// Extract the superword level parallelism
//
*** 208,223 ****
// this reference to a vector-aligned address.
best_align_to_mem_ref = mem_ref;
best_iv_adjustment = iv_adjustment;
}
! SWPointer align_to_ref_p(mem_ref, this);
// Set alignment relative to "align_to_ref" for all related memory operations.
for (int i = memops.size() - 1; i >= 0; i--) {
MemNode* s = memops.at(i)->as_Mem();
if (isomorphic(s, mem_ref)) {
! SWPointer p2(s, this);
if (p2.comparable(align_to_ref_p)) {
int align = memory_alignment(s, iv_adjustment);
set_alignment(s, align);
}
}
--- 211,226 ----
// this reference to a vector-aligned address.
best_align_to_mem_ref = mem_ref;
best_iv_adjustment = iv_adjustment;
}
! SWPointer align_to_ref_p(mem_ref, this, NULL, false);
// Set alignment relative to "align_to_ref" for all related memory operations.
for (int i = memops.size() - 1; i >= 0; i--) {
MemNode* s = memops.at(i)->as_Mem();
if (isomorphic(s, mem_ref)) {
! SWPointer p2(s, this, NULL, false);
if (p2.comparable(align_to_ref_p)) {
int align = memory_alignment(s, iv_adjustment);
set_alignment(s, align);
}
}
*** 342,361 ****
GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
// Count number of comparable memory ops
for (uint i = 0; i < memops.size(); i++) {
MemNode* s1 = memops.at(i)->as_Mem();
! SWPointer p1(s1, this);
// Discard if pre loop can't align this reference
if (!ref_is_alignable(p1)) {
*cmp_ct.adr_at(i) = 0;
continue;
}
for (uint j = i+1; j < memops.size(); j++) {
MemNode* s2 = memops.at(j)->as_Mem();
if (isomorphic(s1, s2)) {
! SWPointer p2(s2, this);
if (p1.comparable(p2)) {
(*cmp_ct.adr_at(i))++;
(*cmp_ct.adr_at(j))++;
}
}
--- 345,364 ----
GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
// Count number of comparable memory ops
for (uint i = 0; i < memops.size(); i++) {
MemNode* s1 = memops.at(i)->as_Mem();
! SWPointer p1(s1, this, NULL, false);
// Discard if pre loop can't align this reference
if (!ref_is_alignable(p1)) {
*cmp_ct.adr_at(i) = 0;
continue;
}
for (uint j = i+1; j < memops.size(); j++) {
MemNode* s2 = memops.at(j)->as_Mem();
if (isomorphic(s1, s2)) {
! SWPointer p2(s2, this, NULL, false);
if (p1.comparable(p2)) {
(*cmp_ct.adr_at(i))++;
(*cmp_ct.adr_at(j))++;
}
}
*** 372,382 ****
for (uint j = 0; j < memops.size(); j++) {
MemNode* s = memops.at(j)->as_Mem();
if (s->is_Store()) {
int vw = vector_width_in_bytes(s);
assert(vw > 1, "sanity");
! SWPointer p(s, this);
if (cmp_ct.at(j) > max_ct ||
cmp_ct.at(j) == max_ct &&
(vw > max_vw ||
vw == max_vw &&
(data_size(s) < min_size ||
--- 375,385 ----
for (uint j = 0; j < memops.size(); j++) {
MemNode* s = memops.at(j)->as_Mem();
if (s->is_Store()) {
int vw = vector_width_in_bytes(s);
assert(vw > 1, "sanity");
! SWPointer p(s, this, NULL, false);
if (cmp_ct.at(j) > max_ct ||
cmp_ct.at(j) == max_ct &&
(vw > max_vw ||
vw == max_vw &&
(data_size(s) < min_size ||
*** 395,405 ****
for (uint j = 0; j < memops.size(); j++) {
MemNode* s = memops.at(j)->as_Mem();
if (s->is_Load()) {
int vw = vector_width_in_bytes(s);
assert(vw > 1, "sanity");
! SWPointer p(s, this);
if (cmp_ct.at(j) > max_ct ||
cmp_ct.at(j) == max_ct &&
(vw > max_vw ||
vw == max_vw &&
(data_size(s) < min_size ||
--- 398,408 ----
for (uint j = 0; j < memops.size(); j++) {
MemNode* s = memops.at(j)->as_Mem();
if (s->is_Load()) {
int vw = vector_width_in_bytes(s);
assert(vw > 1, "sanity");
! SWPointer p(s, this, NULL, false);
if (cmp_ct.at(j) > max_ct ||
cmp_ct.at(j) == max_ct &&
(vw > max_vw ||
vw == max_vw &&
(data_size(s) < min_size ||
*** 480,490 ****
}
//---------------------------get_iv_adjustment---------------------------
// Calculate loop's iv adjustment for this memory ops.
int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
! SWPointer align_to_ref_p(mem_ref, this);
int offset = align_to_ref_p.offset_in_bytes();
int scale = align_to_ref_p.scale_in_bytes();
int vw = vector_width_in_bytes(mem_ref);
assert(vw > 1, "sanity");
int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
--- 483,493 ----
}
//---------------------------get_iv_adjustment---------------------------
// Calculate loop's iv adjustment for this memory ops.
int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
! SWPointer align_to_ref_p(mem_ref, this, NULL, false);
int offset = align_to_ref_p.offset_in_bytes();
int scale = align_to_ref_p.scale_in_bytes();
int vw = vector_width_in_bytes(mem_ref);
assert(vw > 1, "sanity");
int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
*** 540,556 ****
// If no dependency yet, use slice
if (_dg.dep(s1)->in_cnt() == 0) {
_dg.make_edge(slice, s1);
}
! SWPointer p1(s1->as_Mem(), this);
bool sink_dependent = true;
for (int k = j - 1; k >= 0; k--) {
Node* s2 = _nlist.at(k);
if (s1->is_Load() && s2->is_Load())
continue;
! SWPointer p2(s2->as_Mem(), this);
int cmp = p1.cmp(p2);
if (SuperWordRTDepCheck &&
p1.base() != p2.base() && p1.valid() && p2.valid()) {
// Create a runtime check to disambiguate
--- 543,559 ----
// If no dependency yet, use slice
if (_dg.dep(s1)->in_cnt() == 0) {
_dg.make_edge(slice, s1);
}
! SWPointer p1(s1->as_Mem(), this, NULL, false);
bool sink_dependent = true;
for (int k = j - 1; k >= 0; k--) {
Node* s2 = _nlist.at(k);
if (s1->is_Load() && s2->is_Load())
continue;
! SWPointer p2(s2->as_Mem(), this, NULL, false);
int cmp = p1.cmp(p2);
if (SuperWordRTDepCheck &&
p1.base() != p2.base() && p1.valid() && p2.valid()) {
// Create a runtime check to disambiguate
*** 686,697 ****
// FIXME - co_locate_pack fails on Stores in different mem-slices, so
// only pack memops that are in the same alias set until that's fixed.
if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
_phase->C->get_alias_index(s2->as_Mem()->adr_type()))
return false;
! SWPointer p1(s1->as_Mem(), this);
! SWPointer p2(s2->as_Mem(), this);
if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
return diff == data_size(s1);
}
--- 689,700 ----
// FIXME - co_locate_pack fails on Stores in different mem-slices, so
// only pack memops that are in the same alias set until that's fixed.
if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
_phase->C->get_alias_index(s2->as_Mem()->adr_type()))
return false;
! SWPointer p1(s1->as_Mem(), this, NULL, false);
! SWPointer p2(s2->as_Mem(), this, NULL, false);
if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
return diff == data_size(s1);
}
*** 1495,1511 ****
Node* first = executed_first(p);
int opc = n->Opcode();
if (n->is_Load()) {
Node* ctl = n->in(MemNode::Control);
Node* mem = first->in(MemNode::Memory);
! SWPointer p1(n->as_Mem(), this);
// Identify the memory dependency for the new loadVector node by
// walking up through memory chain.
// This is done to give flexibility to the new loadVector node so that
// it can move above independent storeVector nodes.
while (mem->is_StoreVector()) {
! SWPointer p2(mem->as_Mem(), this);
int cmp = p1.cmp(p2);
if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
mem = mem->in(MemNode::Memory);
} else {
break; // dependent memory
--- 1498,1514 ----
Node* first = executed_first(p);
int opc = n->Opcode();
if (n->is_Load()) {
Node* ctl = n->in(MemNode::Control);
Node* mem = first->in(MemNode::Memory);
! SWPointer p1(n->as_Mem(), this, NULL, false);
// Identify the memory dependency for the new loadVector node by
// walking up through memory chain.
// This is done to give flexibility to the new loadVector node so that
// it can move above independent storeVector nodes.
while (mem->is_StoreVector()) {
! SWPointer p2(mem->as_Mem(), this, NULL, false);
int cmp = p1.cmp(p2);
if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
mem = mem->in(MemNode::Memory);
} else {
break; // dependent memory
*** 2018,2028 ****
}
//------------------------------memory_alignment---------------------------
// Alignment within a vector memory reference
int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
! SWPointer p(s, this);
if (!p.valid()) {
return bottom_align;
}
int vw = vector_width_in_bytes(s);
if (vw < 2) {
--- 2021,2031 ----
}
//------------------------------memory_alignment---------------------------
// Alignment within a vector memory reference
int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
! SWPointer p(s, this, NULL, false);
if (!p.valid()) {
return bottom_align;
}
int vw = vector_width_in_bytes(s);
if (vw < 2) {
*** 2182,2192 ****
// Ensure the original loop limit is available from the
// pre-loop Opaque1 node.
Node *orig_limit = pre_opaq->original_loop_limit();
assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
! SWPointer align_to_ref_p(align_to_ref, this);
assert(align_to_ref_p.valid(), "sanity");
// Given:
// lim0 == original pre loop limit
// V == v_align (power of 2)
--- 2185,2195 ----
// Ensure the original loop limit is available from the
// pre-loop Opaque1 node.
Node *orig_limit = pre_opaq->original_loop_limit();
assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
! SWPointer align_to_ref_p(align_to_ref, this, NULL, false);
assert(align_to_ref_p.valid(), "sanity");
// Given:
// lim0 == original pre loop limit
// V == v_align (power of 2)
*** 2353,2362 ****
--- 2356,2366 ----
_align_to_ref = NULL;
_lpt = NULL;
_lp = NULL;
_bb = NULL;
_iv = NULL;
+ _early_return = false;
}
//------------------------------print_packset---------------------------
void SuperWord::print_packset() {
#ifndef PRODUCT
*** 2409,2421 ****
//==============================SWPointer===========================
//----------------------------SWPointer------------------------
! SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
_mem(mem), _slp(slp), _base(NULL), _adr(NULL),
! _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {
Node* adr = mem->in(MemNode::Address);
if (!adr->is_AddP()) {
assert(!valid(), "too complex");
return;
--- 2413,2427 ----
//==============================SWPointer===========================
//----------------------------SWPointer------------------------
! SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) :
_mem(mem), _slp(slp), _base(NULL), _adr(NULL),
! _scale(0), _offset(0), _invar(NULL), _negate_invar(false),
! _nstack(nstack), _analyze_only(analyze_only),
! _stack_idx(0) {
Node* adr = mem->in(MemNode::Address);
if (!adr->is_AddP()) {
assert(!valid(), "too complex");
return;
*** 2444,2454 ****
// Following is used to create a temporary object during
// the pattern match of an address expression.
SWPointer::SWPointer(SWPointer* p) :
_mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL),
! _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {}
//------------------------scaled_iv_plus_offset--------------------
// Match: k*iv + offset
// where: k is a constant that maybe zero, and
// offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
--- 2450,2462 ----
// Following is used to create a temporary object during
// the pattern match of an address expression.
SWPointer::SWPointer(SWPointer* p) :
_mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL),
! _scale(0), _offset(0), _invar(NULL), _negate_invar(false),
! _nstack(p->_nstack), _analyze_only(p->_analyze_only),
! _stack_idx(p->_stack_idx) {}
//------------------------scaled_iv_plus_offset--------------------
// Match: k*iv + offset
// where: k is a constant that maybe zero, and
// offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
*** 2487,2496 ****
--- 2495,2507 ----
}
if (n == iv()) {
_scale = 1;
return true;
}
+ if (_analyze_only && (invariant(n) == false)) {
+ _nstack->push(n, _stack_idx++);
+ }
int opc = n->Opcode();
if (opc == Op_MulI) {
if (n->in(1) == iv() && n->in(2)->is_Con()) {
_scale = n->in(2)->get_int();
return true;
*** 2544,2553 ****
--- 2555,2567 ----
return true;
}
return false;
}
if (_invar != NULL) return false; // already have an invariant
+ if (_analyze_only && (invariant(n) == false)) {
+ _nstack->push(n, _stack_idx++);
+ }
if (opc == Op_AddI) {
if (n->in(2)->is_Con() && invariant(n->in(1))) {
_negate_invar = negate;
_invar = n->in(1);
_offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
< prev index next >