< prev index next >
src/share/vm/opto/library_call.cpp
Print this page
rev 8502 : 8046943: JEP 246: Leverage CPU Instructions for GHASH and RSA
Summary: Add montgomeryMultiply intrinsic
Reviewed-by: kvn
*** 291,300 ****
--- 291,301 ----
bool inline_updateBytesCRC32();
bool inline_updateByteBufferCRC32();
bool inline_multiplyToLen();
bool inline_squareToLen();
bool inline_mulAdd();
+ bool inline_montgomeryMultiply();
bool inline_profileBoolean();
bool inline_isCompileConstant();
};
*** 502,511 ****
--- 503,516 ----
case vmIntrinsics::_mulAdd:
if (!UseMulAddIntrinsic) return NULL;
break;
+ case vmIntrinsics::_montgomeryMultiply:
+ if (!UseMontgomeryMultiplyIntrinsic) return NULL;
+ break;
+
case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
if (!UseAESIntrinsics) return NULL;
// these two require the predicated logic
predicates = 1;
*** 927,936 ****
--- 932,944 ----
return inline_squareToLen();
case vmIntrinsics::_mulAdd:
return inline_mulAdd();
+ case vmIntrinsics::_montgomeryMultiply:
+ return inline_montgomeryMultiply();
+
case vmIntrinsics::_encodeISOArray:
return inline_encodeISOArray();
case vmIntrinsics::_updateCRC32:
return inline_updateCRC32();
*** 5414,5423 ****
--- 5422,5566 ----
Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
set_result(result);
return true;
}
+ //-------------inline_montgomeryMultiply-----------------------------------
+ bool LibraryCallKit::inline_montgomeryMultiply() {
+ address stubAddr = StubRoutines::montgomeryMultiply();
+ assert(UseMontgomeryMultiplyIntrinsic, "not implementated on this platform");
+ const char* stubName = "montgomery_multiply";
+
+ assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters");
+
+ Node* a = argument(1);
+ Node* b = argument(2);
+ Node* n = argument(3);
+ Node* len = argument(4);
+ Node* inv = argument(5);
+ Node* m = argument(7);
+
+ const Type* a_type = a->Value(&_gvn);
+ const Type* b_type = b->Value(&_gvn);
+ const TypeAryPtr* top_a = a_type->isa_aryptr();
+ const TypeAryPtr* top_b = a_type->isa_aryptr();
+ const Type* n_type = a->Value(&_gvn);
+ const TypeAryPtr* top_n = n_type->isa_aryptr();
+ if (top_a == NULL || top_b->klass() == NULL ||
+ top_a == NULL || top_b->klass() == NULL ||
+ top_n == NULL || top_n->klass() == NULL) {
+ // failed array check
+ return false;
+ }
+
+ BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+ BasicType b_elem = b_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+ BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+ if (a_elem != T_INT || b_elem != T_INT || n_elem != T_INT) {
+ return false;
+ }
+
+ // Set the original stack and the reexecute bit for the interpreter
+ // to reexecute the bytecode that invokes
+ // BigInteger.montgomeryMultiply() if deoptimization happens on the
+ // return from array allocation in the runtime.
+ { PreserveReexecuteState preexecs(this);
+ jvms()->set_should_reexecute(true);
+
+ Node* a_start = array_element_address(a, intcon(0), a_elem);
+ Node* b_start = array_element_address(b, intcon(0), b_elem);
+ Node* n_start = array_element_address(n, intcon(0), n_elem);
+
+ // Allocate the result array
+ ciKlass* klass = ciTypeArrayKlass::make(T_INT);
+ Node* klass_node = makecon(TypeKlassPtr::make(klass));
+
+ ciKlass* long_array_klass = ciTypeArrayKlass::make(T_LONG);
+ Node* long_array_klass_node = makecon(TypeKlassPtr::make(long_array_klass));
+
+ IdealKit ideal(this);
+
+ #define __ ideal.
+
+ // Cast the type of len to the type of an array index. We do it
+ // here because otherwise new_array() will generate a CastII node
+ // inside a conditional. If that happens this CastII will not
+ // dominate its use (in the call to montgomeryMultiply()) so the
+ // compilation will fail.
+ const TypeAryPtr* ary_type = n_type->isa_aryptr();
+ const TypeInt* len_type = _gvn.find_int_type(len);
+ const TypeInt* narrow_len_type = ary_type->narrow_size_type(len_type);
+ len = __ CastII(len, narrow_len_type);
+
+ Node* one = __ ConI(1);
+ Node* zero = __ ConI(0);
+ IdealVariable need_alloc(ideal), m_alloc(ideal),
+ scratch_start(ideal), scratch_alloc(ideal); __ declarations_done();
+
+ __ set(need_alloc, zero);
+ __ set(m_alloc, m);
+ __ if_then(m, BoolTest::eq, null(), PROB_STATIC_INFREQUENT); {
+ __ increment (need_alloc, one);
+ } __ else_(); {
+ // Update graphKit memory and control from IdealKit.
+ sync_kit(ideal);
+ Node* mlen_arg = load_array_length(m);
+ // Update IdealKit memory and control from graphKit.
+ __ sync_kit(this);
+ __ if_then(mlen_arg, BoolTest::lt, len, PROB_MIN); {
+ __ increment (need_alloc, one);
+ } __ end_if();
+ } __ end_if();
+
+ __ if_then(__ value(need_alloc), BoolTest::ne, zero, PROB_STATIC_INFREQUENT); {
+ // Update graphKit memory and control from IdealKit.
+ sync_kit(ideal);
+ Node * narr = new_array(klass_node, len, 1);
+ // Update IdealKit memory and control from graphKit.
+ __ sync_kit(this);
+ __ set(m_alloc, narr);
+ } __ end_if();
+
+ // We need some scratch space. If len is reasonably small
+ // montgomeryMultiply() will allocate scratch on its own stack; if
+ // len is large we must do it here or we risk a stack overflow.
+ __ set(scratch_start, null());
+ __ if_then(len, BoolTest::gt, intcon(512), PROB_MIN); {
+ // Update graphKit memory and control from IdealKit.
+ sync_kit(ideal);
+ Node * narr = new_array(long_array_klass_node, len, 1);
+ // Update IdealKit memory and control from graphKit.
+ __ sync_kit(this);
+ __ set(scratch_alloc, narr);
+ __ set(scratch_start,
+ array_element_address(narr, intcon(0), T_LONG));
+ } __ end_if();
+
+ m = __ value(m_alloc);
+ // Can't use TypeAryPtr::INTS which uses Bottom offset.
+ _gvn.set_type(m, TypeOopPtr::make_from_klass(klass));
+
+ Node *scratch = __ value(scratch_start);
+
+ // Final sync IdealKit and GraphKit.
+ final_sync(ideal);
+ #undef __
+
+ Node* m_start = array_element_address(m, intcon(0), T_INT);
+
+ Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+ OptoRuntime::montgomeryMultiply_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ a_start, b_start, n_start, len, inv, top(),
+ scratch, m_start);
+ } // original reexecute is set back here
+
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+ set_result(m);
+ return true;
+ }
+
/**
* Calculate CRC32 for byte.
* int java.util.zip.CRC32.update(int crc, int b)
*/
< prev index next >