< prev index next >

src/share/vm/opto/library_call.cpp

Print this page
rev 8502 : 8046943: Leverage CPU Instructions for GHASH and RSA
Summary: Add montgomeryMultiply intrinsics
Reviewed-by: kvn

@@ -291,10 +291,12 @@
   bool inline_updateBytesCRC32();
   bool inline_updateByteBufferCRC32();
   bool inline_multiplyToLen();
   bool inline_squareToLen();
   bool inline_mulAdd();
+  bool inline_montgomeryMultiply();
+  bool inline_montgomerySquare();
 
   bool inline_profileBoolean();
   bool inline_isCompileConstant();
 };
 

@@ -502,10 +504,17 @@
 
   case vmIntrinsics::_mulAdd:
     if (!UseMulAddIntrinsic) return NULL;
     break;
 
+  case vmIntrinsics::_montgomeryMultiply:
+     if (!UseMontgomeryMultiplyIntrinsic) return NULL;
+    break;
+  case vmIntrinsics::_montgomerySquare:
+     if (!UseMontgomerySquareIntrinsic) return NULL;
+    break;
+
   case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     if (!UseAESIntrinsics) return NULL;
     // these two require the predicated logic
     predicates = 1;

@@ -927,10 +936,15 @@
     return inline_squareToLen();
 
   case vmIntrinsics::_mulAdd:
     return inline_mulAdd();
 
+  case vmIntrinsics::_montgomeryMultiply:
+    return inline_montgomeryMultiply();
+  case vmIntrinsics::_montgomerySquare:
+    return inline_montgomerySquare();
+
   case vmIntrinsics::_encodeISOArray:
     return inline_encodeISOArray();
 
   case vmIntrinsics::_updateCRC32:
     return inline_updateCRC32();

@@ -5414,10 +5428,274 @@
   Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
   set_result(result);
   return true;
 }
 
+//-------------inline_montgomeryMultiply-----------------------------------
+bool LibraryCallKit::inline_montgomeryMultiply() {
+  address stubAddr = StubRoutines::montgomeryMultiply();
+  assert(UseMontgomeryMultiplyIntrinsic, "not implementated on this platform");
+  const char* stubName = "montgomery_multiply";
+
+  assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters");
+
+  Node* a    = argument(1);
+  Node* b    = argument(2);
+  Node* n    = argument(3);
+  Node* len  = argument(4);
+  Node* inv  = argument(5);
+  Node* m    = argument(7);
+
+  const Type* a_type = a->Value(&_gvn);
+  const Type* b_type = b->Value(&_gvn);
+  const TypeAryPtr* top_a = a_type->isa_aryptr();
+  const TypeAryPtr* top_b = a_type->isa_aryptr();
+  const Type* n_type = a->Value(&_gvn);
+  const TypeAryPtr* top_n = n_type->isa_aryptr();
+  if (top_a  == NULL || top_b->klass()  == NULL ||
+      top_b == NULL || top_b->klass() == NULL ||
+      top_n == NULL || top_n->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+
+  BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType b_elem = b_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (a_elem != T_INT || b_elem != T_INT || n_elem != T_INT) {
+    return false;
+  }
+
+  // Set the original stack and the reexecute bit for the interpreter
+  // to reexecute the bytecode that invokes
+  // BigInteger.montgomeryMultiply() if deoptimization happens on the
+  // return from array allocation in the runtime.
+  { PreserveReexecuteState preexecs(this);
+    jvms()->set_should_reexecute(true);
+
+    Node* a_start = array_element_address(a, intcon(0), a_elem);
+    Node* b_start = array_element_address(b, intcon(0), b_elem);
+    Node* n_start = array_element_address(n, intcon(0), n_elem);
+
+    // Allocate the result array
+    ciKlass* klass = ciTypeArrayKlass::make(T_INT);
+    Node* klass_node = makecon(TypeKlassPtr::make(klass));
+
+    ciKlass* long_array_klass = ciTypeArrayKlass::make(T_LONG);
+    Node* long_array_klass_node = makecon(TypeKlassPtr::make(long_array_klass));
+
+    IdealKit ideal(this);
+
+#define __ ideal.
+
+    // Cast the type of len to the type of an array index.  We do it
+    // here because otherwise new_array() will generate a CastII node
+    // inside a conditional.  If that happens this CastII will not
+    // dominate its use (in the call to montgomeryMultiply()) so the
+    // compilation will fail.
+    const TypeAryPtr* ary_type = n_type->isa_aryptr();
+    const TypeInt* len_type = _gvn.find_int_type(len);
+    const TypeInt* narrow_len_type = ary_type->narrow_size_type(len_type);
+    len = __ CastII(len, narrow_len_type);
+
+    Node* one = __ ConI(1);
+    Node* zero = __ ConI(0);
+    IdealVariable need_alloc(ideal), m_alloc(ideal),
+      scratch_start(ideal), scratch_alloc(ideal);  __ declarations_done();
+
+    __ set(need_alloc, zero);
+    __ set(m_alloc, m);
+    __ if_then(m, BoolTest::eq, null(), PROB_STATIC_INFREQUENT); {
+      __ increment (need_alloc, one);
+    } __ else_(); {
+      // Update graphKit memory and control from IdealKit.
+      sync_kit(ideal);
+      Node* mlen_arg = load_array_length(m);
+      // Update IdealKit memory and control from graphKit.
+      __ sync_kit(this);
+      __ if_then(mlen_arg, BoolTest::lt, len, PROB_MIN); {
+        __ increment (need_alloc, one);
+      } __ end_if();
+    } __ end_if();
+
+    __ if_then(__ value(need_alloc), BoolTest::ne, zero, PROB_STATIC_INFREQUENT); {
+      // Update graphKit memory and control from IdealKit.
+      sync_kit(ideal);
+      Node * narr = new_array(klass_node, len, 1);
+      // Update IdealKit memory and control from graphKit.
+      __ sync_kit(this);
+      __ set(m_alloc, narr);
+    } __ end_if();
+
+    // We need some scratch space.  If len is reasonably small
+    // montgomeryMultiply() will allocate scratch on its own stack; if
+    // len is large we must do it here or we risk a stack overflow.
+    __ set(scratch_start, null());
+    __ if_then(len, BoolTest::gt, intcon(512), PROB_MIN); {
+      // Update graphKit memory and control from IdealKit.
+      sync_kit(ideal);
+      Node * narr = new_array(long_array_klass_node, len, 1);
+      // Update IdealKit memory and control from graphKit.
+      __ sync_kit(this);
+      __ set(scratch_alloc, narr);
+      __ set(scratch_start,
+             array_element_address(narr, intcon(0), T_LONG));
+    } __ end_if();
+
+    m = __ value(m_alloc);
+    // Can't use TypeAryPtr::INTS which uses Bottom offset.
+    _gvn.set_type(m, TypeOopPtr::make_from_klass(klass));
+
+    Node *scratch = __ value(scratch_start);
+
+    // Final sync IdealKit and GraphKit.
+    final_sync(ideal);
+#undef __
+
+    Node* m_start = array_element_address(m, intcon(0), T_INT);
+
+    Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                   OptoRuntime::montgomeryMultiply_Type(),
+                                   stubAddr, stubName, TypePtr::BOTTOM,
+                                   a_start, b_start, n_start, len, inv, top(),
+                                   scratch, m_start);
+  } // original reexecute is set back here
+
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+  set_result(m);
+  return true;
+}
+
+
+bool LibraryCallKit::inline_montgomerySquare() {
+  address stubAddr = StubRoutines::montgomerySquare();
+  assert(UseMontgomerySquareIntrinsic, "not implementated on this platform");
+  const char* stubName = "montgomery_square";
+
+  assert(callee()->signature()->size() == 6, "montgomerySquare has 6 parameters");
+
+  Node* a    = argument(1);
+  Node* n    = argument(2);
+  Node* len  = argument(3);
+  Node* inv  = argument(4);
+  Node* m    = argument(6);
+
+  const Type* a_type = a->Value(&_gvn);
+  const TypeAryPtr* top_a = a_type->isa_aryptr();
+  const Type* n_type = a->Value(&_gvn);
+  const TypeAryPtr* top_n = n_type->isa_aryptr();
+  if (top_a  == NULL || top_a->klass()  == NULL ||
+      top_n == NULL || top_n->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+
+  BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (a_elem != T_INT || n_elem != T_INT) {
+    return false;
+  }
+
+  // Set the original stack and the reexecute bit for the interpreter
+  // to reexecute the bytecode that invokes
+  // BigInteger.montgomerySquare() if deoptimization happens on the
+  // return from array allocation in the runtime.
+  { PreserveReexecuteState preexecs(this);
+    jvms()->set_should_reexecute(true);
+
+    Node* a_start = array_element_address(a, intcon(0), a_elem);
+    Node* n_start = array_element_address(n, intcon(0), n_elem);
+
+    // Allocate the result array
+    ciKlass* klass = ciTypeArrayKlass::make(T_INT);
+    Node* klass_node = makecon(TypeKlassPtr::make(klass));
+
+    ciKlass* long_array_klass = ciTypeArrayKlass::make(T_LONG);
+    Node* long_array_klass_node = makecon(TypeKlassPtr::make(long_array_klass));
+
+    IdealKit ideal(this);
+
+#define __ ideal.
+
+    // Cast the type of len to the type of an array index.  We do it
+    // here because otherwise new_array() will generate a CastII node
+    // inside a conditional.  If that happens this CastII will not
+    // dominate its use (in the call to montgomerySquare()) so the
+    // compilation will fail.
+    const TypeAryPtr* ary_type = n_type->isa_aryptr();
+    const TypeInt* len_type = _gvn.find_int_type(len);
+    const TypeInt* narrow_len_type = ary_type->narrow_size_type(len_type);
+    len = __ CastII(len, narrow_len_type);
+
+    Node* one = __ ConI(1);
+    Node* zero = __ ConI(0);
+    IdealVariable need_alloc(ideal), m_alloc(ideal),
+      scratch_start(ideal), scratch_alloc(ideal);  __ declarations_done();
+
+    __ set(need_alloc, zero);
+    __ set(m_alloc, m);
+    __ if_then(m, BoolTest::eq, null(), PROB_STATIC_INFREQUENT); {
+      __ increment (need_alloc, one);
+    } __ else_(); {
+      // Update graphKit memory and control from IdealKit.
+      sync_kit(ideal);
+      Node* mlen_arg = load_array_length(m);
+      // Update IdealKit memory and control from graphKit.
+      __ sync_kit(this);
+      __ if_then(mlen_arg, BoolTest::lt, len, PROB_MIN); {
+        __ increment (need_alloc, one);
+      } __ end_if();
+    } __ end_if();
+
+    __ if_then(__ value(need_alloc), BoolTest::ne, zero, PROB_STATIC_INFREQUENT); {
+      // Update graphKit memory and control from IdealKit.
+      sync_kit(ideal);
+      Node * narr = new_array(klass_node, len, 1);
+      // Update IdealKit memory and control from graphKit.
+      __ sync_kit(this);
+      __ set(m_alloc, narr);
+    } __ end_if();
+
+    // We need some scratch space.  If len is reasonably small
+    // montgomerySquare() will allocate scratch on its own stack; if
+    // len is large we must do it here or we risk a stack overflow.
+    __ set(scratch_start, null());
+    __ if_then(len, BoolTest::gt, intcon(512), PROB_MIN); {
+      // Update graphKit memory and control from IdealKit.
+      sync_kit(ideal);
+      Node * narr = new_array(long_array_klass_node, len, 1);
+      // Update IdealKit memory and control from graphKit.
+      __ sync_kit(this);
+      __ set(scratch_alloc, narr);
+      __ set(scratch_start,
+             array_element_address(narr, intcon(0), T_LONG));
+    } __ end_if();
+
+    m = __ value(m_alloc);
+    // Can't use TypeAryPtr::INTS which uses Bottom offset.
+    _gvn.set_type(m, TypeOopPtr::make_from_klass(klass));
+
+    Node *scratch = __ value(scratch_start);
+
+    // Final sync IdealKit and GraphKit.
+    final_sync(ideal);
+#undef __
+
+    Node* m_start = array_element_address(m, intcon(0), T_INT);
+
+    Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                   OptoRuntime::montgomerySquare_Type(),
+                                   stubAddr, stubName, TypePtr::BOTTOM,
+                                   a_start, n_start, len, inv, top(),
+                                   scratch, m_start);
+  } // original reexecute is set back here
+
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+  set_result(m);
+  return true;
+}
+
 
 /**
  * Calculate CRC32 for byte.
  * int java.util.zip.CRC32.update(int crc, int b)
  */
< prev index next >