< prev index next >

src/share/vm/opto/library_call.cpp

Print this page
rev 8502 : 8046943: JEP 246: Leverage CPU Instructions for GHASH and RSA
Summary: Add montgomeryMultiply intrinsic
Reviewed-by: kvn

*** 291,300 **** --- 291,301 ---- bool inline_updateBytesCRC32(); bool inline_updateByteBufferCRC32(); bool inline_multiplyToLen(); bool inline_squareToLen(); bool inline_mulAdd(); + bool inline_montgomeryMultiply(); bool inline_profileBoolean(); bool inline_isCompileConstant(); };
*** 502,511 **** --- 503,516 ---- case vmIntrinsics::_mulAdd: if (!UseMulAddIntrinsic) return NULL; break; + case vmIntrinsics::_montgomeryMultiply: + if (!UseMontgomeryMultiplyIntrinsic) return NULL; + break; + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: if (!UseAESIntrinsics) return NULL; // these two require the predicated logic predicates = 1;
*** 927,936 **** --- 932,944 ---- return inline_squareToLen(); case vmIntrinsics::_mulAdd: return inline_mulAdd(); + case vmIntrinsics::_montgomeryMultiply: + return inline_montgomeryMultiply(); + case vmIntrinsics::_encodeISOArray: return inline_encodeISOArray(); case vmIntrinsics::_updateCRC32: return inline_updateCRC32();
*** 5414,5423 **** --- 5422,5566 ---- Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); set_result(result); return true; } + //-------------inline_montgomeryMultiply----------------------------------- + bool LibraryCallKit::inline_montgomeryMultiply() { + address stubAddr = StubRoutines::montgomeryMultiply(); + assert(UseMontgomeryMultiplyIntrinsic, "not implementated on this platform"); + const char* stubName = "montgomery_multiply"; + + assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters"); + + Node* a = argument(1); + Node* b = argument(2); + Node* n = argument(3); + Node* len = argument(4); + Node* inv = argument(5); + Node* m = argument(7); + + const Type* a_type = a->Value(&_gvn); + const Type* b_type = b->Value(&_gvn); + const TypeAryPtr* top_a = a_type->isa_aryptr(); + const TypeAryPtr* top_b = a_type->isa_aryptr(); + const Type* n_type = a->Value(&_gvn); + const TypeAryPtr* top_n = n_type->isa_aryptr(); + if (top_a == NULL || top_b->klass() == NULL || + top_a == NULL || top_b->klass() == NULL || + top_n == NULL || top_n->klass() == NULL) { + // failed array check + return false; + } + + BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + BasicType b_elem = b_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (a_elem != T_INT || b_elem != T_INT || n_elem != T_INT) { + return false; + } + + // Set the original stack and the reexecute bit for the interpreter + // to reexecute the bytecode that invokes + // BigInteger.montgomeryMultiply() if deoptimization happens on the + // return from array allocation in the runtime. + { PreserveReexecuteState preexecs(this); + jvms()->set_should_reexecute(true); + + Node* a_start = array_element_address(a, intcon(0), a_elem); + Node* b_start = array_element_address(b, intcon(0), b_elem); + Node* n_start = array_element_address(n, intcon(0), n_elem); + + // Allocate the result array + ciKlass* klass = ciTypeArrayKlass::make(T_INT); + Node* klass_node = makecon(TypeKlassPtr::make(klass)); + + ciKlass* long_array_klass = ciTypeArrayKlass::make(T_LONG); + Node* long_array_klass_node = makecon(TypeKlassPtr::make(long_array_klass)); + + IdealKit ideal(this); + + #define __ ideal. + + // Cast the type of len to the type of an array index. We do it + // here because otherwise new_array() will generate a CastII node + // inside a conditional. If that happens this CastII will not + // dominate its use (in the call to montgomeryMultiply()) so the + // compilation will fail. + const TypeAryPtr* ary_type = n_type->isa_aryptr(); + const TypeInt* len_type = _gvn.find_int_type(len); + const TypeInt* narrow_len_type = ary_type->narrow_size_type(len_type); + len = __ CastII(len, narrow_len_type); + + Node* one = __ ConI(1); + Node* zero = __ ConI(0); + IdealVariable need_alloc(ideal), m_alloc(ideal), + scratch_start(ideal), scratch_alloc(ideal); __ declarations_done(); + + __ set(need_alloc, zero); + __ set(m_alloc, m); + __ if_then(m, BoolTest::eq, null(), PROB_STATIC_INFREQUENT); { + __ increment (need_alloc, one); + } __ else_(); { + // Update graphKit memory and control from IdealKit. + sync_kit(ideal); + Node* mlen_arg = load_array_length(m); + // Update IdealKit memory and control from graphKit. + __ sync_kit(this); + __ if_then(mlen_arg, BoolTest::lt, len, PROB_MIN); { + __ increment (need_alloc, one); + } __ end_if(); + } __ end_if(); + + __ if_then(__ value(need_alloc), BoolTest::ne, zero, PROB_STATIC_INFREQUENT); { + // Update graphKit memory and control from IdealKit. + sync_kit(ideal); + Node * narr = new_array(klass_node, len, 1); + // Update IdealKit memory and control from graphKit. + __ sync_kit(this); + __ set(m_alloc, narr); + } __ end_if(); + + // We need some scratch space. If len is reasonably small + // montgomeryMultiply() will allocate scratch on its own stack; if + // len is large we must do it here or we risk a stack overflow. + __ set(scratch_start, null()); + __ if_then(len, BoolTest::gt, intcon(512), PROB_MIN); { + // Update graphKit memory and control from IdealKit. + sync_kit(ideal); + Node * narr = new_array(long_array_klass_node, len, 1); + // Update IdealKit memory and control from graphKit. + __ sync_kit(this); + __ set(scratch_alloc, narr); + __ set(scratch_start, + array_element_address(narr, intcon(0), T_LONG)); + } __ end_if(); + + m = __ value(m_alloc); + // Can't use TypeAryPtr::INTS which uses Bottom offset. + _gvn.set_type(m, TypeOopPtr::make_from_klass(klass)); + + Node *scratch = __ value(scratch_start); + + // Final sync IdealKit and GraphKit. + final_sync(ideal); + #undef __ + + Node* m_start = array_element_address(m, intcon(0), T_INT); + + Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, + OptoRuntime::montgomeryMultiply_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + a_start, b_start, n_start, len, inv, top(), + scratch, m_start); + } // original reexecute is set back here + + C->set_has_split_ifs(true); // Has chance for split-if optimization + set_result(m); + return true; + } + /** * Calculate CRC32 for byte. * int java.util.zip.CRC32.update(int crc, int b) */
< prev index next >