1 /*
2 * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
308 bt = n->as_Mem()->memory_type();
309 } else {
310 bt = n->bottom_type()->basic_type();
311 }
312
313 if (post_loop_allowed) {
314 if (!small_basic_type) {
315 switch (bt) {
316 case T_CHAR:
317 case T_BYTE:
318 case T_SHORT:
319 small_basic_type = true;
320 break;
321
322 case T_LONG:
323 // TODO: Remove when support completed for mask context with LONG.
324 // Support needs to be augmented for logical qword operations, currently we map to dword
325 // buckets for vectors on logicals as these were legacy.
326 small_basic_type = true;
327 break;
328 }
329 }
330 }
331
332 if (is_java_primitive(bt) == false) continue;
333
334 int cur_max_vector = Matcher::max_vector_size(bt);
335
336 // If a max vector exists which is not larger than _local_loop_unroll_factor
337 // stop looking, we already have the max vector to map to.
338 if (cur_max_vector < local_loop_unroll_factor) {
339 is_slp = false;
340 if (TraceSuperWordLoopUnrollAnalysis) {
341 tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
342 }
343 break;
344 }
345
346 // Map the maximal common vector
347 if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) {
735 (*cmp_ct.adr_at(i))++;
736 (*cmp_ct.adr_at(j))++;
737 }
738 }
739 }
740 }
741
742 // Find Store (or Load) with the greatest number of "comparable" references,
743 // biggest vector size, smallest data size and smallest iv offset.
744 int max_ct = 0;
745 int max_vw = 0;
746 int max_idx = -1;
747 int min_size = max_jint;
748 int min_iv_offset = max_jint;
749 for (uint j = 0; j < memops.size(); j++) {
750 MemNode* s = memops.at(j)->as_Mem();
751 if (s->is_Store()) {
752 int vw = vector_width_in_bytes(s);
753 assert(vw > 1, "sanity");
754 SWPointer p(s, this, NULL, false);
755 if (cmp_ct.at(j) > max_ct ||
756 cmp_ct.at(j) == max_ct &&
757 (vw > max_vw ||
758 vw == max_vw &&
759 (data_size(s) < min_size ||
760 data_size(s) == min_size &&
761 (p.offset_in_bytes() < min_iv_offset)))) {
762 max_ct = cmp_ct.at(j);
763 max_vw = vw;
764 max_idx = j;
765 min_size = data_size(s);
766 min_iv_offset = p.offset_in_bytes();
767 }
768 }
769 }
770 // If no stores, look at loads
771 if (max_ct == 0) {
772 for (uint j = 0; j < memops.size(); j++) {
773 MemNode* s = memops.at(j)->as_Mem();
774 if (s->is_Load()) {
775 int vw = vector_width_in_bytes(s);
776 assert(vw > 1, "sanity");
777 SWPointer p(s, this, NULL, false);
778 if (cmp_ct.at(j) > max_ct ||
779 cmp_ct.at(j) == max_ct &&
780 (vw > max_vw ||
781 vw == max_vw &&
782 (data_size(s) < min_size ||
783 data_size(s) == min_size &&
784 (p.offset_in_bytes() < min_iv_offset)))) {
785 max_ct = cmp_ct.at(j);
786 max_vw = vw;
787 max_idx = j;
788 min_size = data_size(s);
789 min_iv_offset = p.offset_in_bytes();
790 }
791 }
792 }
793 }
794
795 #ifdef ASSERT
796 if (TraceSuperWord && Verbose) {
797 tty->print_cr("\nVector memops after find_align_to_ref");
798 for (uint i = 0; i < memops.size(); i++) {
799 MemNode* s = memops.at(i)->as_Mem();
800 s->dump();
801 }
802 }
803 #endif
804
908
909 #ifndef PRODUCT
910 if (TraceSuperWord) {
911 tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
912 mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
913 mem_ref->dump();
914 }
915 #endif
916 return iv_adjustment;
917 }
918
919 //---------------------------dependence_graph---------------------------
920 // Construct dependency graph.
921 // Add dependence edges to load/store nodes for memory dependence
922 // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
923 void SuperWord::dependence_graph() {
924 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
925 // First, assign a dependence node to each memory node
926 for (int i = 0; i < _block.length(); i++ ) {
927 Node *n = _block.at(i);
928 if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {
929 _dg.make_node(n);
930 }
931 }
932
933 // For each memory slice, create the dependences
934 for (int i = 0; i < _mem_slice_head.length(); i++) {
935 Node* n = _mem_slice_head.at(i);
936 Node* n_tail = _mem_slice_tail.at(i);
937
938 // Get slice in predecessor order (last is first)
939 if (cl->is_main_loop()) {
940 mem_slice_preds(n_tail, n, _nlist);
941 }
942
943 #ifndef PRODUCT
944 if(TraceSuperWord && Verbose) {
945 tty->print_cr("SuperWord::dependence_graph: built a new mem slice");
946 for (int j = _nlist.length() - 1; j >= 0 ; j--) {
947 _nlist.at(j)->dump();
948 }
1728 _sw->set_my_pack(cmov, new_cmpd_pk); // and keep old packs for cmp and bool
1729 }
1730 _sw->_packset.remove(cmovd_pk);
1731 _sw->_packset.remove(bool_pk);
1732 _sw->_packset.remove(cmpd_pk);
1733 _sw->_packset.append(new_cmpd_pk);
1734 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack"); _sw->print_pack(new_cmpd_pk);})
1735 return new_cmpd_pk;
1736 }
1737
1738 bool CMoveKit::test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk) {
1739 Node* cmpd0 = cmpd_pk->at(0);
1740 assert(cmpd0->is_Cmp(), "CMoveKit::test_cmpd_pack: should be CmpDNode");
1741 assert(cmovd_pk->at(0)->is_CMove(), "CMoveKit::test_cmpd_pack: should be CMoveD");
1742 assert(cmpd_pk->size() == cmovd_pk->size(), "CMoveKit::test_cmpd_pack: should be same size");
1743 Node* in1 = cmpd0->in(1);
1744 Node* in2 = cmpd0->in(2);
1745 Node_List* in1_pk = _sw->my_pack(in1);
1746 Node_List* in2_pk = _sw->my_pack(in2);
1747
1748 if (in1_pk != NULL && in1_pk->size() != cmpd_pk->size()
1749 || in2_pk != NULL && in2_pk->size() != cmpd_pk->size() ) {
1750 return false;
1751 }
1752
1753 // test if "all" in1 are in the same pack or the same node
1754 if (in1_pk == NULL) {
1755 for (uint j = 1; j < cmpd_pk->size(); j++) {
1756 if (cmpd_pk->at(j)->in(1) != in1) {
1757 return false;
1758 }
1759 }//for: in1_pk is not pack but all CmpD nodes in the pack have the same in(1)
1760 }
1761 // test if "all" in2 are in the same pack or the same node
1762 if (in2_pk == NULL) {
1763 for (uint j = 1; j < cmpd_pk->size(); j++) {
1764 if (cmpd_pk->at(j)->in(2) != in2) {
1765 return false;
1766 }
1767 }//for: in2_pk is not pack but all CmpD nodes in the pack have the same in(2)
1768 }
1769 //now check if cmpd_pk may be subsumed in vector built for cmovd_pk
4021 _current = _dep_next->pred()->node();
4022 _dep_next = _dep_next->next_in();
4023 } else if (_next_idx < _end_idx) {
4024 _current = _n->in(_next_idx++);
4025 } else {
4026 _done = true;
4027 }
4028 }
4029
4030 // =========================== DepSuccs =========================
4031 // Iterator over successor edges in the dependence graph.
4032
4033 //------------------------------DepSuccs---------------------------
4034 DepSuccs::DepSuccs(Node* n, DepGraph& dg) {
4035 _n = n;
4036 _done = false;
4037 if (_n->is_Load()) {
4038 _next_idx = 0;
4039 _end_idx = _n->outcnt();
4040 _dep_next = dg.dep(_n)->out_head();
4041 } else if (_n->is_Mem() || _n->is_Phi() && _n->bottom_type() == Type::MEMORY) {
4042 _next_idx = 0;
4043 _end_idx = 0;
4044 _dep_next = dg.dep(_n)->out_head();
4045 } else {
4046 _next_idx = 0;
4047 _end_idx = _n->outcnt();
4048 _dep_next = NULL;
4049 }
4050 next();
4051 }
4052
4053 //-------------------------------next---------------------------
4054 void DepSuccs::next() {
4055 if (_dep_next != NULL) {
4056 _current = _dep_next->succ()->node();
4057 _dep_next = _dep_next->next_out();
4058 } else if (_next_idx < _end_idx) {
4059 _current = _n->raw_out(_next_idx++);
4060 } else {
4061 _done = true;
|
1 /*
2 * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
308 bt = n->as_Mem()->memory_type();
309 } else {
310 bt = n->bottom_type()->basic_type();
311 }
312
313 if (post_loop_allowed) {
314 if (!small_basic_type) {
315 switch (bt) {
316 case T_CHAR:
317 case T_BYTE:
318 case T_SHORT:
319 small_basic_type = true;
320 break;
321
322 case T_LONG:
323 // TODO: Remove when support completed for mask context with LONG.
324 // Support needs to be augmented for logical qword operations, currently we map to dword
325 // buckets for vectors on logicals as these were legacy.
326 small_basic_type = true;
327 break;
328
329 default:
330 break;
331 }
332 }
333 }
334
335 if (is_java_primitive(bt) == false) continue;
336
337 int cur_max_vector = Matcher::max_vector_size(bt);
338
339 // If a max vector exists which is not larger than _local_loop_unroll_factor
340 // stop looking, we already have the max vector to map to.
341 if (cur_max_vector < local_loop_unroll_factor) {
342 is_slp = false;
343 if (TraceSuperWordLoopUnrollAnalysis) {
344 tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
345 }
346 break;
347 }
348
349 // Map the maximal common vector
350 if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) {
738 (*cmp_ct.adr_at(i))++;
739 (*cmp_ct.adr_at(j))++;
740 }
741 }
742 }
743 }
744
745 // Find Store (or Load) with the greatest number of "comparable" references,
746 // biggest vector size, smallest data size and smallest iv offset.
747 int max_ct = 0;
748 int max_vw = 0;
749 int max_idx = -1;
750 int min_size = max_jint;
751 int min_iv_offset = max_jint;
752 for (uint j = 0; j < memops.size(); j++) {
753 MemNode* s = memops.at(j)->as_Mem();
754 if (s->is_Store()) {
755 int vw = vector_width_in_bytes(s);
756 assert(vw > 1, "sanity");
757 SWPointer p(s, this, NULL, false);
758 if ( cmp_ct.at(j) > max_ct ||
759 (cmp_ct.at(j) == max_ct &&
760 ( vw > max_vw ||
761 (vw == max_vw &&
762 ( data_size(s) < min_size ||
763 (data_size(s) == min_size &&
764 p.offset_in_bytes() < min_iv_offset)))))) {
765 max_ct = cmp_ct.at(j);
766 max_vw = vw;
767 max_idx = j;
768 min_size = data_size(s);
769 min_iv_offset = p.offset_in_bytes();
770 }
771 }
772 }
773 // If no stores, look at loads
774 if (max_ct == 0) {
775 for (uint j = 0; j < memops.size(); j++) {
776 MemNode* s = memops.at(j)->as_Mem();
777 if (s->is_Load()) {
778 int vw = vector_width_in_bytes(s);
779 assert(vw > 1, "sanity");
780 SWPointer p(s, this, NULL, false);
781 if ( cmp_ct.at(j) > max_ct ||
782 (cmp_ct.at(j) == max_ct &&
783 ( vw > max_vw ||
784 (vw == max_vw &&
785 ( data_size(s) < min_size ||
786 (data_size(s) == min_size &&
787 p.offset_in_bytes() < min_iv_offset)))))) {
788 max_ct = cmp_ct.at(j);
789 max_vw = vw;
790 max_idx = j;
791 min_size = data_size(s);
792 min_iv_offset = p.offset_in_bytes();
793 }
794 }
795 }
796 }
797
798 #ifdef ASSERT
799 if (TraceSuperWord && Verbose) {
800 tty->print_cr("\nVector memops after find_align_to_ref");
801 for (uint i = 0; i < memops.size(); i++) {
802 MemNode* s = memops.at(i)->as_Mem();
803 s->dump();
804 }
805 }
806 #endif
807
911
912 #ifndef PRODUCT
913 if (TraceSuperWord) {
914 tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
915 mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
916 mem_ref->dump();
917 }
918 #endif
919 return iv_adjustment;
920 }
921
922 //---------------------------dependence_graph---------------------------
923 // Construct dependency graph.
924 // Add dependence edges to load/store nodes for memory dependence
925 // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
926 void SuperWord::dependence_graph() {
927 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
928 // First, assign a dependence node to each memory node
929 for (int i = 0; i < _block.length(); i++ ) {
930 Node *n = _block.at(i);
931 if (n->is_Mem() || (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {
932 _dg.make_node(n);
933 }
934 }
935
936 // For each memory slice, create the dependences
937 for (int i = 0; i < _mem_slice_head.length(); i++) {
938 Node* n = _mem_slice_head.at(i);
939 Node* n_tail = _mem_slice_tail.at(i);
940
941 // Get slice in predecessor order (last is first)
942 if (cl->is_main_loop()) {
943 mem_slice_preds(n_tail, n, _nlist);
944 }
945
946 #ifndef PRODUCT
947 if(TraceSuperWord && Verbose) {
948 tty->print_cr("SuperWord::dependence_graph: built a new mem slice");
949 for (int j = _nlist.length() - 1; j >= 0 ; j--) {
950 _nlist.at(j)->dump();
951 }
1731 _sw->set_my_pack(cmov, new_cmpd_pk); // and keep old packs for cmp and bool
1732 }
1733 _sw->_packset.remove(cmovd_pk);
1734 _sw->_packset.remove(bool_pk);
1735 _sw->_packset.remove(cmpd_pk);
1736 _sw->_packset.append(new_cmpd_pk);
1737 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack"); _sw->print_pack(new_cmpd_pk);})
1738 return new_cmpd_pk;
1739 }
1740
1741 bool CMoveKit::test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk) {
1742 Node* cmpd0 = cmpd_pk->at(0);
1743 assert(cmpd0->is_Cmp(), "CMoveKit::test_cmpd_pack: should be CmpDNode");
1744 assert(cmovd_pk->at(0)->is_CMove(), "CMoveKit::test_cmpd_pack: should be CMoveD");
1745 assert(cmpd_pk->size() == cmovd_pk->size(), "CMoveKit::test_cmpd_pack: should be same size");
1746 Node* in1 = cmpd0->in(1);
1747 Node* in2 = cmpd0->in(2);
1748 Node_List* in1_pk = _sw->my_pack(in1);
1749 Node_List* in2_pk = _sw->my_pack(in2);
1750
1751 if ( (in1_pk != NULL && in1_pk->size() != cmpd_pk->size())
1752 || (in2_pk != NULL && in2_pk->size() != cmpd_pk->size()) ) {
1753 return false;
1754 }
1755
1756 // test if "all" in1 are in the same pack or the same node
1757 if (in1_pk == NULL) {
1758 for (uint j = 1; j < cmpd_pk->size(); j++) {
1759 if (cmpd_pk->at(j)->in(1) != in1) {
1760 return false;
1761 }
1762 }//for: in1_pk is not pack but all CmpD nodes in the pack have the same in(1)
1763 }
1764 // test if "all" in2 are in the same pack or the same node
1765 if (in2_pk == NULL) {
1766 for (uint j = 1; j < cmpd_pk->size(); j++) {
1767 if (cmpd_pk->at(j)->in(2) != in2) {
1768 return false;
1769 }
1770 }//for: in2_pk is not pack but all CmpD nodes in the pack have the same in(2)
1771 }
1772 //now check if cmpd_pk may be subsumed in vector built for cmovd_pk
4024 _current = _dep_next->pred()->node();
4025 _dep_next = _dep_next->next_in();
4026 } else if (_next_idx < _end_idx) {
4027 _current = _n->in(_next_idx++);
4028 } else {
4029 _done = true;
4030 }
4031 }
4032
4033 // =========================== DepSuccs =========================
4034 // Iterator over successor edges in the dependence graph.
4035
4036 //------------------------------DepSuccs---------------------------
4037 DepSuccs::DepSuccs(Node* n, DepGraph& dg) {
4038 _n = n;
4039 _done = false;
4040 if (_n->is_Load()) {
4041 _next_idx = 0;
4042 _end_idx = _n->outcnt();
4043 _dep_next = dg.dep(_n)->out_head();
4044 } else if (_n->is_Mem() || (_n->is_Phi() && _n->bottom_type() == Type::MEMORY)) {
4045 _next_idx = 0;
4046 _end_idx = 0;
4047 _dep_next = dg.dep(_n)->out_head();
4048 } else {
4049 _next_idx = 0;
4050 _end_idx = _n->outcnt();
4051 _dep_next = NULL;
4052 }
4053 next();
4054 }
4055
4056 //-------------------------------next---------------------------
4057 void DepSuccs::next() {
4058 if (_dep_next != NULL) {
4059 _current = _dep_next->succ()->node();
4060 _dep_next = _dep_next->next_out();
4061 } else if (_next_idx < _end_idx) {
4062 _current = _n->raw_out(_next_idx++);
4063 } else {
4064 _done = true;
|