< prev index next >
src/share/vm/opto/superword.hpp
Print this page
rev 8530 : SIMD: RFR(S): 8085932: Fixing bugs in detecting memory alignments in SuperWord - before making Trace class.
rev 8531 : SIMD: RFR(S): 8085932: Fixing bugs in detecting memory alignments in SuperWord - before making Trace class.
Copied print msg from prestine. No Tracer yet.
rev 8532 : SIMD: RFR(S): 8085932: Fixing bugs in detecting memory alignments in SuperWord. Added Tracer. Tabulation not fixed.
rev 8533 : SIMD: RFR(S): 8085932: Fixing bugs in detecting memory alignments in SuperWord - before making Trace class.
Printing (debug+trace) functions still here.
rev 8534 : SIMD: RFR(S): 8085932: Fixing bugs in detecting memory alignments in SuperWord.
Extra printing removed. Bug fixing for invariant and scale still here.
rev 8542 : SIMD: RFR(S): 8085932: added merge_packs_to_cmovd, CMoveDVNode is not built yet.
rev 8543 : SIMD: RFR(S): 8085932: Passed "Unimplemented". Failed as "Unprofitable".
rev 8544 : SIMD: RFR(S): 8085932: passed profitability. Need the correct .ad file.
Has FIXME! in places where stepping by 1 need to be corrected by stepping by 3. May revisit this code and find a better solution.
rev 8545 : SIMD: RFR(S): 8085932: Ideal Graph builds OK and the code is generated. .ad file is not correct yet.
rev 8700 : Merge
rev 8701 : SIMD: RFR(S): 8085932: fixing "friend class"
rev 8706 : SIMD: RFR(S): 8085932: fixing "friend class"
Added initialization to ctor, trailing spaces removed.
rev 8707 : Merge
rev 8708 : SIMD: CMove update - from c:\Java\openjdk-clone-060315\hotspot\
rev 8709 : Merge
rev 8710 : SIMD: small cleanup
rev 8711 : SIMD: CMoveVD - produces some code, but actually garbage in .ad file.
No reshaping in Matcher.
rev 8717 : SIMD: CMoveVD - .ad is good (need to be tested).
Added class CMoveVD_map. Removing Flag_is_CMove on a way.
rev 8718 : SIMD: CMoveVD - clean-up - created normal constructor in CMoveVD_map.
rev 8719 : SIMD: CMoveVD - cleanup.
rev 8720 : SIMD: added is_Bool_candidate, is_CmpD_candidate
rev 8721 : SIMD: cleanup
rev 8722 : SIMD: created class CMoveKit
rev 8723 : SIMD: cleanup
rev 8724 : SIMD: small fix for linux compilation
rev 8725 : SIMD: small changes ...
rev 8726 : SIMD: use insert instead of push, since the index is known.
rev 8728 : SIMD: cleanup
rev 8729 : SIMD: removed constant "3", need cleanup.
rev 8733 : SIMD: cleanup. src/cpu/x86/vm/x86.ad needs more.
Some !FIXME! are remaining, mostly for second thought
rev 8889 : SIMD: added option DoReserveCopyInSuperWord
rev 8930 : SIMD: cleanup - trailing spaces, tabs
rev 8937 : SIMD: added option DoReserveCopyInSuperWordTest for testing switching to reversed copy.
Much better functionality description of LoopReserveKit in loopnode.hpp
Cleanup in loopUnswitch.cpp
rev 8938 : SIMD: some functions are renamed, some cleanup
rev 9037 : SIMD: added SuperWord code for testing CountedLoopReserveKit
rev 9039 : Merge
rev 9048 : SIMD: mberg review fixes 2
rev 9098 : SIMD: rename _CountedLoopReserveKit_test to _CountedLoopReserveKit_debug
rev 9101 : Merge
rev 9150 : SIMD: fixing trace/debug printiout
rev 9158 : SIMD restore from 9150, 'relase Test results passed 520; failed 22; error 6.
fastdebug produces 'load vector' and 17 vs 28 performance gain on -XX+UseCMov
rev 9160 : SIMD: again same output for release and fastdebug as 9158
*** 27,36 ****
--- 27,37 ----
#include "opto/loopnode.hpp"
#include "opto/node.hpp"
#include "opto/phaseX.hpp"
#include "opto/vectornode.hpp"
#include "utilities/growableArray.hpp"
+ #include "libadt/dict.hpp"
//
// S U P E R W O R D T R A N S F O R M
//
// SuperWords are short, fixed length vectors.
*** 198,211 ****
--- 199,232 ----
SWNodeInfo() : _alignment(-1), _depth(0), _velt_type(NULL), _my_pack(NULL) {}
static const SWNodeInfo initial;
};
+ class SuperWord;
+ class CMoveKit {
+ friend class SuperWord;
+ private:
+ SuperWord* _sw;
+ Dict* _dict;
+ CMoveKit(Arena* a, SuperWord* sw) : _sw(sw) {_dict = new Dict(cmpkey, hashkey, a);}
+ void* _2p(Node* key) const { return (void*)(intptr_t)key; } // 2 conversion functions to make gcc happy
+ Dict* dict() const { return _dict; }
+ void map(Node* key, Node_List* val) { assert(_dict->operator[](_2p(key)) == NULL, "key existed"); _dict->Insert(_2p(key), (void*)val); }
+ void unmap(Node* key) { _dict->Delete(_2p(key)); }
+ Node_List* pack(Node* key) const { return (Node_List*)_dict->operator[](_2p(key)); }
+ Node* is_Bool_candidate(Node* nd) const; // if it is the right candidate return corresponding CMove* ,
+ Node* is_CmpD_candidate(Node* nd) const; // otherwise return NULL
+ Node_List* make_cmovevd_pack(Node_List* cmovd_pk);
+ bool test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk);
+ };//class CMoveKit
+
+
// -----------------------------SuperWord---------------------------------
// Transforms scalar operations into packed (superword) operations.
class SuperWord : public ResourceObj {
friend class SWPointer;
+ friend class CMoveKit;
private:
PhaseIdealLoop* _phase;
Arena* _arena;
PhaseIterGVN &_igvn;
*** 221,231 ****
GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes
GrowableArray<Node*> _iteration_first; // nodes in the generation that has deps from phi
GrowableArray<Node*> _iteration_last; // nodes in the generation that has deps to phi
GrowableArray<SWNodeInfo> _node_info; // Info needed per node
CloneMap& _clone_map; // map of nodes created in cloning
!
MemNode* _align_to_ref; // Memory reference that pre-loop will align to
GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs
DepGraph _dg; // Dependence graph
--- 242,252 ----
GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes
GrowableArray<Node*> _iteration_first; // nodes in the generation that has deps from phi
GrowableArray<Node*> _iteration_last; // nodes in the generation that has deps to phi
GrowableArray<SWNodeInfo> _node_info; // Info needed per node
CloneMap& _clone_map; // map of nodes created in cloning
! CMoveKit _cmovev_kit; // support for vectorization of CMov
MemNode* _align_to_ref; // Memory reference that pre-loop will align to
GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs
DepGraph _dg; // Dependence graph
*** 255,281 ****
--- 276,307 ----
bool is_debug() { return _vector_loop_debug > 0; }
bool is_trace_alignment() { return (_vector_loop_debug & 2) > 0; }
bool is_trace_mem_slice() { return (_vector_loop_debug & 4) > 0; }
bool is_trace_loop() { return (_vector_loop_debug & 8) > 0; }
bool is_trace_adjacent() { return (_vector_loop_debug & 16) > 0; }
+ bool is_trace_cmov() { return (_vector_loop_debug & 32) > 0; }
+ bool is_trace_loop_reverse() { return (_vector_loop_debug & 64) > 0; }
#endif
bool do_vector_loop() { return _do_vector_loop; }
+ bool do_reserve_copy() { return _do_reserve_copy; }
private:
IdealLoopTree* _lpt; // Current loop tree node
LoopNode* _lp; // Current LoopNode
Node* _bb; // Current basic block
PhiNode* _iv; // Induction var
bool _race_possible; // In cases where SDMU is true
bool _early_return; // True if we do not initialize
bool _do_vector_loop; // whether to do vectorization/simd style
+ bool _do_reserve_copy; // do reserve copy of the graph(loop) before final modification in output
int _num_work_vecs; // Number of non memory vector operations
int _num_reductions; // Number of reduction expressions applied
int _ii_first; // generation with direct deps from mem phi
int _ii_last; // generation with direct deps to mem phi
GrowableArray<int> _ii_order;
#ifndef PRODUCT
uintx _vector_loop_debug; // provide more printing in debug mode
+ //uintx _CountedLoopReserveKit_debug; // for debugging CountedLoopReserveKit
#endif
// Accessors
Arena* arena() { return _arena; }
*** 334,344 ****
bool same_velt_type(Node* n1, Node* n2);
// my_pack
Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
!
// CloneMap utilities
bool same_origin_idx(Node* a, Node* b) const;
bool same_generation(Node* a, Node* b) const;
// methods
--- 360,374 ----
bool same_velt_type(Node* n1, Node* n2);
// my_pack
Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
! // is pack good for converting into one vector node replacing 12 nodes of Cmp, Bool, CMov
! bool is_cmov_pack(Node_List* p);
! bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); }
! // For pack p, are all idx operands the same?
! bool same_inputs(Node_List* p, int idx);
// CloneMap utilities
bool same_origin_idx(Node* a, Node* b) const;
bool same_generation(Node* a, Node* b) const;
// methods
*** 348,357 ****
--- 378,388 ----
// Find the adjacent memory references and create pack pairs for them.
void find_adjacent_refs();
// Tracing support
#ifndef PRODUCT
void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment);
+ void print_loop(bool whole);
#endif
// Find a memory reference to align the loop induction variable to.
MemNode* find_align_to_ref(Node_List &memops);
// Calculate loop's iv adjustment for this memory ops.
int get_iv_adjustment(MemNode* mem);
*** 410,419 ****
--- 441,452 ----
void combine_packs();
// Construct the map from nodes to packs.
void construct_my_pack_map();
// Remove packs that are not implemented or not profitable.
void filter_packs();
+ // Merge CMoveD into new vector-nodes
+ void merge_packs_to_cmovd();
// Adjust the memory graph for the packed operations
void schedule();
// Remove "current" from its current position in the memory graph and insert
// it after the appropriate insert points (lip or uip);
void remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, Node *uip, Unique_Node_List &schd_before);
< prev index next >