< prev index next >

src/share/vm/opto/library_call.cpp

Print this page
rev 8502 : 8046943: JEP 246: Leverage CPU Instructions for GHASH and RSA
Summary: Add montgomeryMultiply intrinsic
Reviewed-by: kvn


 276   bool inline_aescrypt_Block(vmIntrinsics::ID id);
 277   bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
 278   Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
 279   Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
 280   Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
 281   bool inline_sha_implCompress(vmIntrinsics::ID id);
 282   bool inline_digestBase_implCompressMB(int predicate);
 283   bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
 284                                  bool long_state, address stubAddr, const char *stubName,
 285                                  Node* src_start, Node* ofs, Node* limit);
 286   Node* get_state_from_sha_object(Node *sha_object);
 287   Node* get_state_from_sha5_object(Node *sha_object);
 288   Node* inline_digestBase_implCompressMB_predicate(int predicate);
 289   bool inline_encodeISOArray();
 290   bool inline_updateCRC32();
 291   bool inline_updateBytesCRC32();
 292   bool inline_updateByteBufferCRC32();
 293   bool inline_multiplyToLen();
 294   bool inline_squareToLen();
 295   bool inline_mulAdd();

 296 
 297   bool inline_profileBoolean();
 298   bool inline_isCompileConstant();
 299 };
 300 
 301 
 302 //---------------------------make_vm_intrinsic----------------------------
 303 CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
 304   vmIntrinsics::ID id = m->intrinsic_id();
 305   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
 306 
 307   ccstr disable_intr = NULL;
 308 
 309   if ((DisableIntrinsic[0] != '\0'
 310        && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) ||
 311       (method_has_option_value("DisableIntrinsic", disable_intr)
 312        && strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)) {
 313     // disabled by a user request on the command line:
 314     // example: -XX:DisableIntrinsic=_hashCode,_getClass
 315     return NULL;


 487     break;
 488 #endif
 489 
 490   case vmIntrinsics::_aescrypt_encryptBlock:
 491   case vmIntrinsics::_aescrypt_decryptBlock:
 492     if (!UseAESIntrinsics) return NULL;
 493     break;
 494 
 495   case vmIntrinsics::_multiplyToLen:
 496     if (!UseMultiplyToLenIntrinsic) return NULL;
 497     break;
 498 
 499   case vmIntrinsics::_squareToLen:
 500     if (!UseSquareToLenIntrinsic) return NULL;
 501     break;
 502 
 503   case vmIntrinsics::_mulAdd:
 504     if (!UseMulAddIntrinsic) return NULL;
 505     break;
 506 




 507   case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
 508   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
 509     if (!UseAESIntrinsics) return NULL;
 510     // these two require the predicated logic
 511     predicates = 1;
 512     break;
 513 
 514   case vmIntrinsics::_sha_implCompress:
 515     if (!UseSHA1Intrinsics) return NULL;
 516     break;
 517 
 518   case vmIntrinsics::_sha2_implCompress:
 519     if (!UseSHA256Intrinsics) return NULL;
 520     break;
 521 
 522   case vmIntrinsics::_sha5_implCompress:
 523     if (!UseSHA512Intrinsics) return NULL;
 524     break;
 525 
 526   case vmIntrinsics::_digestBase_implCompressMB:


 912   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
 913     return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
 914 
 915   case vmIntrinsics::_sha_implCompress:
 916   case vmIntrinsics::_sha2_implCompress:
 917   case vmIntrinsics::_sha5_implCompress:
 918     return inline_sha_implCompress(intrinsic_id());
 919 
 920   case vmIntrinsics::_digestBase_implCompressMB:
 921     return inline_digestBase_implCompressMB(predicate);
 922 
 923   case vmIntrinsics::_multiplyToLen:
 924     return inline_multiplyToLen();
 925 
 926   case vmIntrinsics::_squareToLen:
 927     return inline_squareToLen();
 928 
 929   case vmIntrinsics::_mulAdd:
 930     return inline_mulAdd();
 931 



 932   case vmIntrinsics::_encodeISOArray:
 933     return inline_encodeISOArray();
 934 
 935   case vmIntrinsics::_updateCRC32:
 936     return inline_updateCRC32();
 937   case vmIntrinsics::_updateBytesCRC32:
 938     return inline_updateBytesCRC32();
 939   case vmIntrinsics::_updateByteBufferCRC32:
 940     return inline_updateByteBufferCRC32();
 941 
 942   case vmIntrinsics::_profileBoolean:
 943     return inline_profileBoolean();
 944   case vmIntrinsics::_isCompileConstant:
 945     return inline_isCompileConstant();
 946 
 947   default:
 948     // If you get here, it may be that someone has added a new intrinsic
 949     // to the list in vmSymbols.hpp without implementing it here.
 950 #ifndef PRODUCT
 951     if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {


5399   BasicType out_elem = out_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
5400   BasicType in_elem = in_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
5401   if (out_elem != T_INT || in_elem != T_INT) {
5402     return false;
5403   }
5404 
5405   Node* outlen = load_array_length(out);
5406   Node* new_offset = _gvn.transform(new SubINode(outlen, offset));
5407   Node* out_start = array_element_address(out, intcon(0), out_elem);
5408   Node* in_start = array_element_address(in, intcon(0), in_elem);
5409 
5410   Node*  call = make_runtime_call(RC_LEAF|RC_NO_FP,
5411                                   OptoRuntime::mulAdd_Type(),
5412                                   stubAddr, stubName, TypePtr::BOTTOM,
5413                                   out_start,in_start, new_offset, len, k);
5414   Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
5415   set_result(result);
5416   return true;
5417 }
5418 







































































































































5419 
5420 /**
5421  * Calculate CRC32 for byte.
5422  * int java.util.zip.CRC32.update(int crc, int b)
5423  */
5424 bool LibraryCallKit::inline_updateCRC32() {
5425   assert(UseCRC32Intrinsics, "need AVX and LCMUL instructions support");
5426   assert(callee()->signature()->size() == 2, "update has 2 parameters");
5427   // no receiver since it is static method
5428   Node* crc  = argument(0); // type: int
5429   Node* b    = argument(1); // type: int
5430 
5431   /*
5432    *    int c = ~ crc;
5433    *    b = timesXtoThe32[(b ^ c) & 0xFF];
5434    *    b = b ^ (c >>> 8);
5435    *    crc = ~b;
5436    */
5437 
5438   Node* M1 = intcon(-1);




 276   bool inline_aescrypt_Block(vmIntrinsics::ID id);
 277   bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
 278   Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
 279   Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
 280   Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
 281   bool inline_sha_implCompress(vmIntrinsics::ID id);
 282   bool inline_digestBase_implCompressMB(int predicate);
 283   bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
 284                                  bool long_state, address stubAddr, const char *stubName,
 285                                  Node* src_start, Node* ofs, Node* limit);
 286   Node* get_state_from_sha_object(Node *sha_object);
 287   Node* get_state_from_sha5_object(Node *sha_object);
 288   Node* inline_digestBase_implCompressMB_predicate(int predicate);
 289   bool inline_encodeISOArray();
 290   bool inline_updateCRC32();
 291   bool inline_updateBytesCRC32();
 292   bool inline_updateByteBufferCRC32();
 293   bool inline_multiplyToLen();
 294   bool inline_squareToLen();
 295   bool inline_mulAdd();
 296   bool inline_montgomeryMultiply();
 297 
 298   bool inline_profileBoolean();
 299   bool inline_isCompileConstant();
 300 };
 301 
 302 
 303 //---------------------------make_vm_intrinsic----------------------------
 304 CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
 305   vmIntrinsics::ID id = m->intrinsic_id();
 306   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
 307 
 308   ccstr disable_intr = NULL;
 309 
 310   if ((DisableIntrinsic[0] != '\0'
 311        && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) ||
 312       (method_has_option_value("DisableIntrinsic", disable_intr)
 313        && strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)) {
 314     // disabled by a user request on the command line:
 315     // example: -XX:DisableIntrinsic=_hashCode,_getClass
 316     return NULL;


 488     break;
 489 #endif
 490 
 491   case vmIntrinsics::_aescrypt_encryptBlock:
 492   case vmIntrinsics::_aescrypt_decryptBlock:
 493     if (!UseAESIntrinsics) return NULL;
 494     break;
 495 
 496   case vmIntrinsics::_multiplyToLen:
 497     if (!UseMultiplyToLenIntrinsic) return NULL;
 498     break;
 499 
 500   case vmIntrinsics::_squareToLen:
 501     if (!UseSquareToLenIntrinsic) return NULL;
 502     break;
 503 
 504   case vmIntrinsics::_mulAdd:
 505     if (!UseMulAddIntrinsic) return NULL;
 506     break;
 507 
 508   case vmIntrinsics::_montgomeryMultiply:
 509      if (!UseMontgomeryMultiplyIntrinsic) return NULL;
 510     break;
 511 
 512   case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
 513   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
 514     if (!UseAESIntrinsics) return NULL;
 515     // these two require the predicated logic
 516     predicates = 1;
 517     break;
 518 
 519   case vmIntrinsics::_sha_implCompress:
 520     if (!UseSHA1Intrinsics) return NULL;
 521     break;
 522 
 523   case vmIntrinsics::_sha2_implCompress:
 524     if (!UseSHA256Intrinsics) return NULL;
 525     break;
 526 
 527   case vmIntrinsics::_sha5_implCompress:
 528     if (!UseSHA512Intrinsics) return NULL;
 529     break;
 530 
 531   case vmIntrinsics::_digestBase_implCompressMB:


 917   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
 918     return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
 919 
 920   case vmIntrinsics::_sha_implCompress:
 921   case vmIntrinsics::_sha2_implCompress:
 922   case vmIntrinsics::_sha5_implCompress:
 923     return inline_sha_implCompress(intrinsic_id());
 924 
 925   case vmIntrinsics::_digestBase_implCompressMB:
 926     return inline_digestBase_implCompressMB(predicate);
 927 
 928   case vmIntrinsics::_multiplyToLen:
 929     return inline_multiplyToLen();
 930 
 931   case vmIntrinsics::_squareToLen:
 932     return inline_squareToLen();
 933 
 934   case vmIntrinsics::_mulAdd:
 935     return inline_mulAdd();
 936 
 937   case vmIntrinsics::_montgomeryMultiply:
 938     return inline_montgomeryMultiply();
 939 
 940   case vmIntrinsics::_encodeISOArray:
 941     return inline_encodeISOArray();
 942 
 943   case vmIntrinsics::_updateCRC32:
 944     return inline_updateCRC32();
 945   case vmIntrinsics::_updateBytesCRC32:
 946     return inline_updateBytesCRC32();
 947   case vmIntrinsics::_updateByteBufferCRC32:
 948     return inline_updateByteBufferCRC32();
 949 
 950   case vmIntrinsics::_profileBoolean:
 951     return inline_profileBoolean();
 952   case vmIntrinsics::_isCompileConstant:
 953     return inline_isCompileConstant();
 954 
 955   default:
 956     // If you get here, it may be that someone has added a new intrinsic
 957     // to the list in vmSymbols.hpp without implementing it here.
 958 #ifndef PRODUCT
 959     if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {


5407   BasicType out_elem = out_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
5408   BasicType in_elem = in_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
5409   if (out_elem != T_INT || in_elem != T_INT) {
5410     return false;
5411   }
5412 
5413   Node* outlen = load_array_length(out);
5414   Node* new_offset = _gvn.transform(new SubINode(outlen, offset));
5415   Node* out_start = array_element_address(out, intcon(0), out_elem);
5416   Node* in_start = array_element_address(in, intcon(0), in_elem);
5417 
5418   Node*  call = make_runtime_call(RC_LEAF|RC_NO_FP,
5419                                   OptoRuntime::mulAdd_Type(),
5420                                   stubAddr, stubName, TypePtr::BOTTOM,
5421                                   out_start,in_start, new_offset, len, k);
5422   Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
5423   set_result(result);
5424   return true;
5425 }
5426 
5427 //-------------inline_montgomeryMultiply-----------------------------------
5428 bool LibraryCallKit::inline_montgomeryMultiply() {
5429   address stubAddr = StubRoutines::montgomeryMultiply();
5430   assert(UseMontgomeryMultiplyIntrinsic, "not implementated on this platform");
5431   const char* stubName = "montgomery_multiply";
5432 
5433   assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters");
5434 
5435   Node* a    = argument(1);
5436   Node* b    = argument(2);
5437   Node* n    = argument(3);
5438   Node* len  = argument(4);
5439   Node* inv  = argument(5);
5440   Node* m    = argument(7);
5441 
5442   const Type* a_type = a->Value(&_gvn);
5443   const Type* b_type = b->Value(&_gvn);
5444   const TypeAryPtr* top_a = a_type->isa_aryptr();
5445   const TypeAryPtr* top_b = a_type->isa_aryptr();
5446   const Type* n_type = a->Value(&_gvn);
5447   const TypeAryPtr* top_n = n_type->isa_aryptr();
5448   if (top_a  == NULL || top_b->klass()  == NULL ||
5449       top_a == NULL || top_b->klass() == NULL ||
5450       top_n == NULL || top_n->klass()  == NULL) {
5451     // failed array check
5452     return false;
5453   }
5454 
5455   BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
5456   BasicType b_elem = b_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
5457   BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
5458   if (a_elem != T_INT || b_elem != T_INT || n_elem != T_INT) {
5459     return false;
5460   }
5461 
5462   // Set the original stack and the reexecute bit for the interpreter
5463   // to reexecute the bytecode that invokes
5464   // BigInteger.montgomeryMultiply() if deoptimization happens on the
5465   // return from array allocation in the runtime.
5466   { PreserveReexecuteState preexecs(this);
5467     jvms()->set_should_reexecute(true);
5468 
5469     Node* a_start = array_element_address(a, intcon(0), a_elem);
5470     Node* b_start = array_element_address(b, intcon(0), b_elem);
5471     Node* n_start = array_element_address(n, intcon(0), n_elem);
5472 
5473     // Allocate the result array
5474     ciKlass* klass = ciTypeArrayKlass::make(T_INT);
5475     Node* klass_node = makecon(TypeKlassPtr::make(klass));
5476 
5477     ciKlass* long_array_klass = ciTypeArrayKlass::make(T_LONG);
5478     Node* long_array_klass_node = makecon(TypeKlassPtr::make(long_array_klass));
5479 
5480     IdealKit ideal(this);
5481 
5482 #define __ ideal.
5483 
5484     // Cast the type of len to the type of an array index.  We do it
5485     // here because otherwise new_array() will generate a CastII node
5486     // inside a conditional.  If that happens this CastII will not
5487     // dominate its use (in the call to montgomeryMultiply()) so the
5488     // compilation will fail.
5489     const TypeAryPtr* ary_type = n_type->isa_aryptr();
5490     const TypeInt* len_type = _gvn.find_int_type(len);
5491     const TypeInt* narrow_len_type = ary_type->narrow_size_type(len_type);
5492     len = __ CastII(len, narrow_len_type);
5493 
5494     Node* one = __ ConI(1);
5495     Node* zero = __ ConI(0);
5496     IdealVariable need_alloc(ideal), m_alloc(ideal),
5497       scratch_start(ideal), scratch_alloc(ideal);  __ declarations_done();
5498 
5499     __ set(need_alloc, zero);
5500     __ set(m_alloc, m);
5501     __ if_then(m, BoolTest::eq, null(), PROB_STATIC_INFREQUENT); {
5502       __ increment (need_alloc, one);
5503     } __ else_(); {
5504       // Update graphKit memory and control from IdealKit.
5505       sync_kit(ideal);
5506       Node* mlen_arg = load_array_length(m);
5507       // Update IdealKit memory and control from graphKit.
5508       __ sync_kit(this);
5509       __ if_then(mlen_arg, BoolTest::lt, len, PROB_MIN); {
5510         __ increment (need_alloc, one);
5511       } __ end_if();
5512     } __ end_if();
5513 
5514     __ if_then(__ value(need_alloc), BoolTest::ne, zero, PROB_STATIC_INFREQUENT); {
5515       // Update graphKit memory and control from IdealKit.
5516       sync_kit(ideal);
5517       Node * narr = new_array(klass_node, len, 1);
5518       // Update IdealKit memory and control from graphKit.
5519       __ sync_kit(this);
5520       __ set(m_alloc, narr);
5521     } __ end_if();
5522 
5523     // We need some scratch space.  If len is reasonably small
5524     // montgomeryMultiply() will allocate scratch on its own stack; if
5525     // len is large we must do it here or we risk a stack overflow.
5526     __ set(scratch_start, null());
5527     __ if_then(len, BoolTest::gt, intcon(512), PROB_MIN); {
5528       // Update graphKit memory and control from IdealKit.
5529       sync_kit(ideal);
5530       Node * narr = new_array(long_array_klass_node, len, 1);
5531       // Update IdealKit memory and control from graphKit.
5532       __ sync_kit(this);
5533       __ set(scratch_alloc, narr);
5534       __ set(scratch_start,
5535              array_element_address(narr, intcon(0), T_LONG));
5536     } __ end_if();
5537 
5538     m = __ value(m_alloc);
5539     // Can't use TypeAryPtr::INTS which uses Bottom offset.
5540     _gvn.set_type(m, TypeOopPtr::make_from_klass(klass));
5541 
5542     Node *scratch = __ value(scratch_start);
5543 
5544     // Final sync IdealKit and GraphKit.
5545     final_sync(ideal);
5546 #undef __
5547 
5548     Node* m_start = array_element_address(m, intcon(0), T_INT);
5549 
5550     Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
5551                                    OptoRuntime::montgomeryMultiply_Type(),
5552                                    stubAddr, stubName, TypePtr::BOTTOM,
5553                                    a_start, b_start, n_start, len, inv, top(),
5554                                    scratch, m_start);
5555   } // original reexecute is set back here
5556 
5557   C->set_has_split_ifs(true); // Has chance for split-if optimization
5558   set_result(m);
5559   return true;
5560 }
5561 
5562 
5563 /**
5564  * Calculate CRC32 for byte.
5565  * int java.util.zip.CRC32.update(int crc, int b)
5566  */
5567 bool LibraryCallKit::inline_updateCRC32() {
5568   assert(UseCRC32Intrinsics, "need AVX and LCMUL instructions support");
5569   assert(callee()->signature()->size() == 2, "update has 2 parameters");
5570   // no receiver since it is static method
5571   Node* crc  = argument(0); // type: int
5572   Node* b    = argument(1); // type: int
5573 
5574   /*
5575    *    int c = ~ crc;
5576    *    b = timesXtoThe32[(b ^ c) & 0xFF];
5577    *    b = b ^ (c >>> 8);
5578    *    crc = ~b;
5579    */
5580 
5581   Node* M1 = intcon(-1);


< prev index next >