< prev index next >

src/share/vm/opto/superword.hpp

Print this page
rev 8530 : SIMD: RFR(S): 8085932: Fixing bugs in detecting memory alignments in SuperWord - before making Trace class.
rev 8531 : SIMD: RFR(S): 8085932: Fixing bugs in detecting memory alignments in SuperWord - before making Trace class.
Copied print msg from prestine. No Tracer yet.
rev 8532 : SIMD: RFR(S): 8085932: Fixing bugs in detecting memory alignments in SuperWord. Added Tracer. Tabulation not fixed.
rev 8533 : SIMD: RFR(S): 8085932: Fixing bugs in detecting memory alignments in SuperWord - before making Trace class.
Printing (debug+trace) functions still here.
rev 8534 : SIMD: RFR(S): 8085932: Fixing bugs in detecting memory alignments in SuperWord.
Extra printing removed. Bug fixing for invariant and scale still here.
rev 8542 : SIMD: RFR(S): 8085932: added merge_packs_to_cmovd, CMoveDVNode is not built yet.
rev 8543 : SIMD: RFR(S): 8085932: Passed "Unimplemented". Failed as "Unprofitable".
rev 8544 : SIMD: RFR(S): 8085932: passed profitability. Need the correct .ad file.
Has FIXME! in places where stepping by 1 need to be corrected by stepping by 3. May revisit this code and find a better solution.
rev 8545 : SIMD: RFR(S): 8085932: Ideal Graph builds OK and the code is generated. .ad file is not correct yet.
rev 8700 : Merge
rev 8701 : SIMD: RFR(S): 8085932: fixing "friend class"
rev 8706 : SIMD: RFR(S): 8085932: fixing "friend class"
Added initialization to ctor, trailing spaces removed.
rev 8707 : Merge
rev 8708 : SIMD: CMove update - from c:\Java\openjdk-clone-060315\hotspot\
rev 8709 : Merge
rev 8710 : SIMD: small cleanup
rev 8711 : SIMD: CMoveVD - produces some code, but actually garbage in .ad file.
No reshaping in Matcher.
rev 8717 : SIMD: CMoveVD - .ad is good (need to be tested).
Added class CMoveVD_map. Removing Flag_is_CMove on a way.
rev 8718 : SIMD: CMoveVD - clean-up - created normal constructor in CMoveVD_map.
rev 8719 : SIMD: CMoveVD - cleanup.
rev 8720 : SIMD: added is_Bool_candidate, is_CmpD_candidate
rev 8721 : SIMD: cleanup
rev 8722 : SIMD: created class CMoveKit
rev 8723 : SIMD: cleanup
rev 8724 : SIMD: small fix for linux compilation
rev 8725 : SIMD: small changes ...
rev 8726 : SIMD: use insert instead of push, since the index is known.
rev 8728 : SIMD: cleanup
rev 8729 : SIMD: removed constant "3", need cleanup.
rev 8733 : SIMD: cleanup. src/cpu/x86/vm/x86.ad needs more.
Some !FIXME! are remaining, mostly for second thought
rev 8889 : SIMD: added option DoReserveCopyInSuperWord
rev 8930 : SIMD: cleanup - trailing spaces, tabs
rev 8937 : SIMD: added option DoReserveCopyInSuperWordTest for testing switching to reversed copy.
Much better functionality description of LoopReserveKit in loopnode.hpp
Cleanup in loopUnswitch.cpp
rev 8938 : SIMD: some functions are renamed, some cleanup
rev 9037 : SIMD: added SuperWord code for testing CountedLoopReserveKit
rev 9039 : Merge
rev 9048 : SIMD: mberg review fixes 2
rev 9098 : SIMD: rename _CountedLoopReserveKit_test to _CountedLoopReserveKit_debug
rev 9101 : Merge
rev 9150 : SIMD: fixing trace/debug printiout
rev 9158 : SIMD restore from 9150, 'relase Test results passed 520; failed 22; error 6.
fastdebug produces 'load vector' and 17 vs 28 performance gain on -XX+UseCMov
rev 9160 : SIMD: again same output for release and fastdebug as 9158

*** 27,36 **** --- 27,37 ---- #include "opto/loopnode.hpp" #include "opto/node.hpp" #include "opto/phaseX.hpp" #include "opto/vectornode.hpp" #include "utilities/growableArray.hpp" + #include "libadt/dict.hpp" // // S U P E R W O R D T R A N S F O R M // // SuperWords are short, fixed length vectors.
*** 198,211 **** --- 199,232 ---- SWNodeInfo() : _alignment(-1), _depth(0), _velt_type(NULL), _my_pack(NULL) {} static const SWNodeInfo initial; }; + class SuperWord; + class CMoveKit { + friend class SuperWord; + private: + SuperWord* _sw; + Dict* _dict; + CMoveKit(Arena* a, SuperWord* sw) : _sw(sw) {_dict = new Dict(cmpkey, hashkey, a);} + void* _2p(Node* key) const { return (void*)(intptr_t)key; } // 2 conversion functions to make gcc happy + Dict* dict() const { return _dict; } + void map(Node* key, Node_List* val) { assert(_dict->operator[](_2p(key)) == NULL, "key existed"); _dict->Insert(_2p(key), (void*)val); } + void unmap(Node* key) { _dict->Delete(_2p(key)); } + Node_List* pack(Node* key) const { return (Node_List*)_dict->operator[](_2p(key)); } + Node* is_Bool_candidate(Node* nd) const; // if it is the right candidate return corresponding CMove* , + Node* is_CmpD_candidate(Node* nd) const; // otherwise return NULL + Node_List* make_cmovevd_pack(Node_List* cmovd_pk); + bool test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk); + };//class CMoveKit + + // -----------------------------SuperWord--------------------------------- // Transforms scalar operations into packed (superword) operations. class SuperWord : public ResourceObj { friend class SWPointer; + friend class CMoveKit; private: PhaseIdealLoop* _phase; Arena* _arena; PhaseIterGVN &_igvn;
*** 221,231 **** GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes GrowableArray<Node*> _iteration_first; // nodes in the generation that has deps from phi GrowableArray<Node*> _iteration_last; // nodes in the generation that has deps to phi GrowableArray<SWNodeInfo> _node_info; // Info needed per node CloneMap& _clone_map; // map of nodes created in cloning ! MemNode* _align_to_ref; // Memory reference that pre-loop will align to GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs DepGraph _dg; // Dependence graph --- 242,252 ---- GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes GrowableArray<Node*> _iteration_first; // nodes in the generation that has deps from phi GrowableArray<Node*> _iteration_last; // nodes in the generation that has deps to phi GrowableArray<SWNodeInfo> _node_info; // Info needed per node CloneMap& _clone_map; // map of nodes created in cloning ! CMoveKit _cmovev_kit; // support for vectorization of CMov MemNode* _align_to_ref; // Memory reference that pre-loop will align to GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs DepGraph _dg; // Dependence graph
*** 255,281 **** --- 276,307 ---- bool is_debug() { return _vector_loop_debug > 0; } bool is_trace_alignment() { return (_vector_loop_debug & 2) > 0; } bool is_trace_mem_slice() { return (_vector_loop_debug & 4) > 0; } bool is_trace_loop() { return (_vector_loop_debug & 8) > 0; } bool is_trace_adjacent() { return (_vector_loop_debug & 16) > 0; } + bool is_trace_cmov() { return (_vector_loop_debug & 32) > 0; } + bool is_trace_loop_reverse() { return (_vector_loop_debug & 64) > 0; } #endif bool do_vector_loop() { return _do_vector_loop; } + bool do_reserve_copy() { return _do_reserve_copy; } private: IdealLoopTree* _lpt; // Current loop tree node LoopNode* _lp; // Current LoopNode Node* _bb; // Current basic block PhiNode* _iv; // Induction var bool _race_possible; // In cases where SDMU is true bool _early_return; // True if we do not initialize bool _do_vector_loop; // whether to do vectorization/simd style + bool _do_reserve_copy; // do reserve copy of the graph(loop) before final modification in output int _num_work_vecs; // Number of non memory vector operations int _num_reductions; // Number of reduction expressions applied int _ii_first; // generation with direct deps from mem phi int _ii_last; // generation with direct deps to mem phi GrowableArray<int> _ii_order; #ifndef PRODUCT uintx _vector_loop_debug; // provide more printing in debug mode + //uintx _CountedLoopReserveKit_debug; // for debugging CountedLoopReserveKit #endif // Accessors Arena* arena() { return _arena; }
*** 334,344 **** bool same_velt_type(Node* n1, Node* n2); // my_pack Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; } void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; } ! // CloneMap utilities bool same_origin_idx(Node* a, Node* b) const; bool same_generation(Node* a, Node* b) const; // methods --- 360,374 ---- bool same_velt_type(Node* n1, Node* n2); // my_pack Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; } void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; } ! // is pack good for converting into one vector node replacing 12 nodes of Cmp, Bool, CMov ! bool is_cmov_pack(Node_List* p); ! bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); } ! // For pack p, are all idx operands the same? ! bool same_inputs(Node_List* p, int idx); // CloneMap utilities bool same_origin_idx(Node* a, Node* b) const; bool same_generation(Node* a, Node* b) const; // methods
*** 348,357 **** --- 378,388 ---- // Find the adjacent memory references and create pack pairs for them. void find_adjacent_refs(); // Tracing support #ifndef PRODUCT void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment); + void print_loop(bool whole); #endif // Find a memory reference to align the loop induction variable to. MemNode* find_align_to_ref(Node_List &memops); // Calculate loop's iv adjustment for this memory ops. int get_iv_adjustment(MemNode* mem);
*** 410,419 **** --- 441,452 ---- void combine_packs(); // Construct the map from nodes to packs. void construct_my_pack_map(); // Remove packs that are not implemented or not profitable. void filter_packs(); + // Merge CMoveD into new vector-nodes + void merge_packs_to_cmovd(); // Adjust the memory graph for the packed operations void schedule(); // Remove "current" from its current position in the memory graph and insert // it after the appropriate insert points (lip or uip); void remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, Node *uip, Unique_Node_List &schd_before);
< prev index next >