< prev index next >

src/hotspot/cpu/s390/macroAssembler_s390.cpp

Print this page




 919   // Simply use distance from start of const section (should be patched in the end).
 920   long disp = toc_distance();
 921 
 922   RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp);
 923   relocate(rspec);
 924   z_larl(Rtoc, RelAddr::pcrel_off32(disp));  // Offset is in halfwords.
 925 }
 926 
 927 // PCrelative TOC access.
 928 // Load from anywhere pcrelative (with relocation of load instr)
 929 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) {
 930   address          pc             = this->pc();
 931   ptrdiff_t        total_distance = dataLocation - pc;
 932   RelocationHolder rspec          = internal_word_Relocation::spec(dataLocation);
 933 
 934   assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
 935   assert(total_distance != 0, "sanity");
 936 
 937   // Some extra safety net.
 938   if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
 939     guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away");
 940   }
 941 
 942   (this)->relocate(rspec, relocInfo::pcrel_addr_format);
 943   z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
 944 }
 945 
 946 
 947 // PCrelative TOC access.
 948 // Load from anywhere pcrelative (with relocation of load instr)
 949 // loaded addr has to be relocated when added to constant pool.
 950 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) {
 951   address          pc             = this->pc();
 952   ptrdiff_t        total_distance = addrLocation - pc;
 953   RelocationHolder rspec          = internal_word_Relocation::spec(addrLocation);
 954 
 955   assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
 956 
 957   // Some extra safety net.
 958   if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
 959     guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away");
 960   }
 961 
 962   (this)->relocate(rspec, relocInfo::pcrel_addr_format);
 963   z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
 964 }
 965 
 966 // Generic operation: load a value from memory and test.
 967 // CondCode indicates the sign (<0, ==0, >0) of the loaded value.
 968 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) {
 969   z_lb(dst, a);
 970   z_ltr(dst, dst);
 971 }
 972 
 973 void MacroAssembler::load_and_test_short(Register dst, const Address &a) {
 974   int64_t disp = a.disp20();
 975   if (Displacement::is_shortDisp(disp)) {
 976     z_lh(dst, a);
 977   } else if (Displacement::is_longDisp(disp)) {
 978     z_lhy(dst, a);
 979   } else {


1008   } else {
1009     ShouldNotReachHere();
1010   }
1011 }
1012 
1013 // Test a bit in a register. Result is reflected in CC.
1014 void MacroAssembler::testbit(Register r, unsigned int bitPos) {
1015   if (bitPos < 16) {
1016     z_tmll(r, 1U<<bitPos);
1017   } else if (bitPos < 32) {
1018     z_tmlh(r, 1U<<(bitPos-16));
1019   } else if (bitPos < 48) {
1020     z_tmhl(r, 1U<<(bitPos-32));
1021   } else if (bitPos < 64) {
1022     z_tmhh(r, 1U<<(bitPos-48));
1023   } else {
1024     ShouldNotReachHere();
1025   }
1026 }
1027 







1028 // Clear a register, i.e. load const zero into reg.
1029 // Return len (in bytes) of generated instruction(s).
1030 // whole_reg: Clear 64 bits if true, 32 bits otherwise.
1031 // set_cc:    Use instruction that sets the condition code, if true.
1032 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) {
1033   unsigned int start_off = offset();
1034   if (whole_reg) {
1035     set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0);
1036   } else {  // Only 32bit register.
1037     set_cc ? z_xr(r, r) : z_lhi(r, 0);
1038   }
1039   return offset() - start_off;
1040 }
1041 
1042 #ifdef ASSERT
1043 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) {
1044   switch (pattern_len) {
1045     case 1:
1046       pattern = (pattern & 0x000000ff)  | ((pattern & 0x000000ff)<<8);
1047     case 2:


4879   if (VM_Version::has_ExecuteExtensions()) {
4880     z_exrl(Z_R1, MVC_template);
4881   } else {
4882     z_ex(tmp1_reg, 0, Z_R0, Z_R1);
4883   }
4884 
4885   bind(done);
4886 
4887   BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint");
4888 
4889   int block_end = offset();
4890   return block_end - block_start;
4891 }
4892 
4893 //------------------------------------------------------
4894 //   Special String Intrinsics. Implementation
4895 //------------------------------------------------------
4896 
4897 // Intrinsics for CompactStrings
4898 
4899 // Compress char[] to byte[]. odd_reg contains cnt. Kills dst. Early clobber: result







4900 // The result is the number of characters copied before the first incompatible character was found.
4901 // If tmp2 is provided and the compression fails, the compression stops exactly at this point and the result is precise.

4902 //
4903 // Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
4904 // - Different number of characters may have been written to dead array (if tmp2 not provided).
4905 // - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
4906 unsigned int MacroAssembler::string_compress(Register result, Register src, Register dst, Register odd_reg,
4907                                              Register even_reg, Register tmp, Register tmp2) {
4908   int block_start = offset();
4909   Label Lloop1, Lloop2, Lslow, Ldone;
4910   const Register addr2 = dst, ind1 = result, mask = tmp;
4911   const bool precise = (tmp2 != noreg);
4912 



4913   BLOCK_COMMENT("string_compress {");


4914 
4915   z_sll(odd_reg, 1);       // Number of bytes to read. (Must be a positive simm32.)
4916   clear_reg(ind1);         // Index to read.
4917   z_llilf(mask, 0xFF00FF00);
4918   z_ahi(odd_reg, -16);     // Last possible index for fast loop.
4919   z_brl(Lslow);
4920 
4921   // ind1: index, even_reg: index increment, odd_reg: index limit
4922   z_iihf(mask, 0xFF00FF00);
4923   z_lhi(even_reg, 16);
4924 
4925   bind(Lloop1); // 8 Characters per iteration.
4926   z_lg(Z_R0, Address(src, ind1));
4927   z_lg(Z_R1, Address(src, ind1, 8));
4928   if (precise) {






4929     if (VM_Version::has_DistinctOpnds()) {
4930       z_ogrk(tmp2, Z_R0, Z_R1);

4931     } else {
4932       z_lgr(tmp2, Z_R0);
4933       z_ogr(tmp2, Z_R1);



































4934     }
4935     z_ngr(tmp2, mask);
4936     z_brne(Lslow);         // Failed fast case, retry slowly.


































































4937   }
4938   z_stcmh(Z_R0, 5, 0, addr2);
4939   z_stcm(Z_R0, 5, 2, addr2);
4940   if (!precise) { z_ogr(Z_R0, Z_R1); }
4941   z_stcmh(Z_R1, 5, 4, addr2);
4942   z_stcm(Z_R1, 5, 6, addr2);







































4943   if (!precise) {
4944     z_ngr(Z_R0, mask);
4945     z_brne(Ldone);         // Failed (more than needed was written).















































4946   }
4947   z_aghi(addr2, 8);
4948   z_brxle(ind1, even_reg, Lloop1);
4949 
4950   bind(Lslow);
4951   // Compute index limit and skip if negative.
4952   z_ahi(odd_reg, 16-2);    // Last possible index for slow loop.
4953   z_lhi(even_reg, 2);
4954   z_cr(ind1, odd_reg);
4955   z_brh(Ldone);
4956 
4957   bind(Lloop2); // 1 Character per iteration.
4958   z_llh(Z_R0, Address(src, ind1));
4959   z_tmll(Z_R0, 0xFF00);
4960   z_brnaz(Ldone);          // Failed slow case: Return number of written characters.
4961   z_stc(Z_R0, Address(addr2));
4962   z_aghi(addr2, 1);
4963   z_brxle(ind1, even_reg, Lloop2);
4964 
4965   bind(Ldone);             // result = ind1 = 2*cnt
4966   z_srl(ind1, 1);


















4967 



4968   BLOCK_COMMENT("} string_compress");
4969 
4970   return offset() - block_start;
4971 }
4972 
4973 // Inflate byte[] to char[].
4974 unsigned int MacroAssembler::string_inflate_trot(Register src, Register dst, Register cnt, Register tmp) {
4975   int block_start = offset();
4976 
4977   BLOCK_COMMENT("string_inflate {");
4978 
4979   Register stop_char = Z_R0;
4980   Register table     = Z_R1;
4981   Register src_addr  = tmp;
4982 
4983   assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt);
4984   assert(dst->encoding()%2 == 0, "must be even reg");
4985   assert(cnt->encoding()%2 == 1, "must be odd reg");
4986   assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair");
4987 
4988   StubRoutines::zarch::generate_load_trot_table_addr(this, table);  // kills Z_R0 (if ASSERT)
4989   clear_reg(stop_char);  // Stop character. Not used here, but initialized to have a defined value.
4990   lgr_if_needed(src_addr, src);
4991   z_llgfr(cnt, cnt);     // # src characters, must be a positive simm32.
4992 
4993   translate_ot(dst, src_addr, /* mask = */ 0x0001);
4994 
4995   BLOCK_COMMENT("} string_inflate");
4996 
4997   return offset() - block_start;
4998 }
4999 
5000 // Inflate byte[] to char[]. odd_reg contains cnt. Kills src.
5001 unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register odd_reg,
5002                                             Register even_reg, Register tmp) {
5003   int block_start = offset();





5004 
5005   BLOCK_COMMENT("string_inflate {");

5006 
5007   Label Lloop1, Lloop2, Lslow, Ldone;
5008   const Register addr1 = src, ind2 = tmp;



































5009 
5010   z_sll(odd_reg, 1);       // Number of bytes to write. (Must be a positive simm32.)
5011   clear_reg(ind2);         // Index to write.
5012   z_ahi(odd_reg, -16);     // Last possible index for fast loop.
5013   z_brl(Lslow);









































5014 
5015   // ind2: index, even_reg: index increment, odd_reg: index limit
5016   clear_reg(Z_R0);
5017   clear_reg(Z_R1);
5018   z_lhi(even_reg, 16);
5019 
5020   bind(Lloop1); // 8 Characters per iteration.
5021   z_icmh(Z_R0, 5, 0, addr1);
5022   z_icmh(Z_R1, 5, 4, addr1);
5023   z_icm(Z_R0, 5, 2, addr1);
5024   z_icm(Z_R1, 5, 6, addr1);
5025   z_aghi(addr1, 8);
5026   z_stg(Z_R0, Address(dst, ind2));
5027   z_stg(Z_R1, Address(dst, ind2, 8));
5028   z_brxle(ind2, even_reg, Lloop1);
5029 
5030   bind(Lslow);
5031   // Compute index limit and skip if negative.
5032   z_ahi(odd_reg, 16-2);    // Last possible index for slow loop.
5033   z_lhi(even_reg, 2);
5034   z_cr(ind2, odd_reg);
5035   z_brh(Ldone);
5036 
5037   bind(Lloop2); // 1 Character per iteration.
5038   z_llc(Z_R0, Address(addr1));
5039   z_sth(Z_R0, Address(dst, ind2));
5040   z_aghi(addr1, 1);
5041   z_brxle(ind2, even_reg, Lloop2);





















5042 
5043   bind(Ldone);










































5044 
5045   BLOCK_COMMENT("} string_inflate");











































































































































































































































5046 

5047   return offset() - block_start;
5048 }
5049 
5050 // Kills src.
5051 unsigned int MacroAssembler::has_negatives(Register result, Register src, Register cnt,
5052                                            Register odd_reg, Register even_reg, Register tmp) {
5053   int block_start = offset();
5054   Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone;
5055   const Register addr = src, mask = tmp;
5056 
5057   BLOCK_COMMENT("has_negatives {");
5058 
5059   z_llgfr(Z_R1, cnt);      // Number of bytes to read. (Must be a positive simm32.)
5060   z_llilf(mask, 0x80808080);
5061   z_lhi(result, 1);        // Assume true.
5062   // Last possible addr for fast loop.
5063   z_lay(odd_reg, -16, Z_R1, src);
5064   z_chi(cnt, 16);
5065   z_brl(Lslow);
5066 




 919   // Simply use distance from start of const section (should be patched in the end).
 920   long disp = toc_distance();
 921 
 922   RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp);
 923   relocate(rspec);
 924   z_larl(Rtoc, RelAddr::pcrel_off32(disp));  // Offset is in halfwords.
 925 }
 926 
 927 // PCrelative TOC access.
 928 // Load from anywhere pcrelative (with relocation of load instr)
 929 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) {
 930   address          pc             = this->pc();
 931   ptrdiff_t        total_distance = dataLocation - pc;
 932   RelocationHolder rspec          = internal_word_Relocation::spec(dataLocation);
 933 
 934   assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
 935   assert(total_distance != 0, "sanity");
 936 
 937   // Some extra safety net.
 938   if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
 939     guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
 940   }
 941 
 942   (this)->relocate(rspec, relocInfo::pcrel_addr_format);
 943   z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
 944 }
 945 
 946 
 947 // PCrelative TOC access.
 948 // Load from anywhere pcrelative (with relocation of load instr)
 949 // loaded addr has to be relocated when added to constant pool.
 950 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) {
 951   address          pc             = this->pc();
 952   ptrdiff_t        total_distance = addrLocation - pc;
 953   RelocationHolder rspec          = internal_word_Relocation::spec(addrLocation);
 954 
 955   assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
 956 
 957   // Some extra safety net.
 958   if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
 959     guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
 960   }
 961 
 962   (this)->relocate(rspec, relocInfo::pcrel_addr_format);
 963   z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
 964 }
 965 
 966 // Generic operation: load a value from memory and test.
 967 // CondCode indicates the sign (<0, ==0, >0) of the loaded value.
 968 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) {
 969   z_lb(dst, a);
 970   z_ltr(dst, dst);
 971 }
 972 
 973 void MacroAssembler::load_and_test_short(Register dst, const Address &a) {
 974   int64_t disp = a.disp20();
 975   if (Displacement::is_shortDisp(disp)) {
 976     z_lh(dst, a);
 977   } else if (Displacement::is_longDisp(disp)) {
 978     z_lhy(dst, a);
 979   } else {


1008   } else {
1009     ShouldNotReachHere();
1010   }
1011 }
1012 
1013 // Test a bit in a register. Result is reflected in CC.
1014 void MacroAssembler::testbit(Register r, unsigned int bitPos) {
1015   if (bitPos < 16) {
1016     z_tmll(r, 1U<<bitPos);
1017   } else if (bitPos < 32) {
1018     z_tmlh(r, 1U<<(bitPos-16));
1019   } else if (bitPos < 48) {
1020     z_tmhl(r, 1U<<(bitPos-32));
1021   } else if (bitPos < 64) {
1022     z_tmhh(r, 1U<<(bitPos-48));
1023   } else {
1024     ShouldNotReachHere();
1025   }
1026 }
1027 
1028 void MacroAssembler::prefetch_read(Address a) {
1029   z_pfd(1, a.disp20(), a.indexOrR0(), a.base());
1030 }
1031 void MacroAssembler::prefetch_update(Address a) {
1032   z_pfd(2, a.disp20(), a.indexOrR0(), a.base());
1033 }
1034 
1035 // Clear a register, i.e. load const zero into reg.
1036 // Return len (in bytes) of generated instruction(s).
1037 // whole_reg: Clear 64 bits if true, 32 bits otherwise.
1038 // set_cc:    Use instruction that sets the condition code, if true.
1039 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) {
1040   unsigned int start_off = offset();
1041   if (whole_reg) {
1042     set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0);
1043   } else {  // Only 32bit register.
1044     set_cc ? z_xr(r, r) : z_lhi(r, 0);
1045   }
1046   return offset() - start_off;
1047 }
1048 
1049 #ifdef ASSERT
1050 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) {
1051   switch (pattern_len) {
1052     case 1:
1053       pattern = (pattern & 0x000000ff)  | ((pattern & 0x000000ff)<<8);
1054     case 2:


4886   if (VM_Version::has_ExecuteExtensions()) {
4887     z_exrl(Z_R1, MVC_template);
4888   } else {
4889     z_ex(tmp1_reg, 0, Z_R0, Z_R1);
4890   }
4891 
4892   bind(done);
4893 
4894   BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint");
4895 
4896   int block_end = offset();
4897   return block_end - block_start;
4898 }
4899 
4900 //------------------------------------------------------
4901 //   Special String Intrinsics. Implementation
4902 //------------------------------------------------------
4903 
4904 // Intrinsics for CompactStrings
4905 
4906 // Compress char[] to byte[].
4907 //   Restores: src, dst
4908 //   Uses:     cnt
4909 //   Kills:    tmp, Z_R0, Z_R1.
4910 //   Early clobber: result.
4911 // Note:
4912 //   cnt is signed int. Do not rely on high word!
4913 //       counts # characters, not bytes.
4914 // The result is the number of characters copied before the first incompatible character was found.
4915 // If precise is true, the processing stops exactly at this point. Otherwise, the result may be off
4916 // by a few bytes. The result always indicates the number of copied characters.
4917 //
4918 // Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
4919 // - Different number of characters may have been written to dead array (if precise is false).
4920 // - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
4921 unsigned int MacroAssembler::string_compress(Register result, Register src, Register dst, Register cnt,
4922                                              Register tmp,    bool precise) {
4923   assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp);



4924 
4925   if (precise) {
4926     BLOCK_COMMENT("encode_iso_array {");
4927   } else {
4928     BLOCK_COMMENT("string_compress {");
4929   }
4930   int  block_start = offset();
4931 
4932   Register       Rsrc  = src;
4933   Register       Rdst  = dst;
4934   Register       Rix   = tmp;
4935   Register       Rcnt  = cnt;
4936   Register       Rmask = result;  // holds incompatibility check mask until result value is stored.
4937   Label          ScalarShortcut, AllDone;
4938 
4939   z_iilf(Rmask, 0xFF00FF00);
4940   z_iihf(Rmask, 0xFF00FF00);
4941 
4942 #if 0  // Sacrifice shortcuts for code compactness
4943   {
4944     //---<  shortcuts for short strings (very frequent)   >---
4945     //   Strings with 4 and 8 characters were fond to occur very frequently.
4946     //   Therefore, we handle them right away with minimal overhead.
4947     Label     skipShortcut, skip4Shortcut, skip8Shortcut;
4948     Register  Rout = Z_R0;
4949     z_chi(Rcnt, 4);
4950     z_brne(skip4Shortcut);                 // 4 characters are very frequent
4951       z_lg(Z_R0, 0, Rsrc);                 // Treat exactly 4 characters specially.
4952       if (VM_Version::has_DistinctOpnds()) {
4953         Rout = Z_R0;
4954         z_ngrk(Rix, Z_R0, Rmask);
4955       } else {
4956         Rout = Rix;
4957         z_lgr(Rix, Z_R0);
4958         z_ngr(Z_R0, Rmask);
4959       }
4960       z_brnz(skipShortcut);
4961       z_stcmh(Rout, 5, 0, Rdst);
4962       z_stcm(Rout,  5, 2, Rdst);
4963       z_lgfr(result, Rcnt);
4964       z_bru(AllDone);
4965     bind(skip4Shortcut);
4966 
4967     z_chi(Rcnt, 8);
4968     z_brne(skip8Shortcut);                 // There's more to do...
4969       z_lmg(Z_R0, Z_R1, 0, Rsrc);          // Treat exactly 8 characters specially.
4970       if (VM_Version::has_DistinctOpnds()) {
4971         Rout = Z_R0;
4972         z_ogrk(Rix, Z_R0, Z_R1);
4973         z_ngr(Rix, Rmask);
4974       } else {
4975         Rout = Rix;
4976         z_lgr(Rix, Z_R0);
4977         z_ogr(Z_R0, Z_R1);
4978         z_ngr(Z_R0, Rmask);
4979       }
4980       z_brnz(skipShortcut);
4981       z_stcmh(Rout, 5, 0, Rdst);
4982       z_stcm(Rout,  5, 2, Rdst);
4983       z_stcmh(Z_R1, 5, 4, Rdst);
4984       z_stcm(Z_R1,  5, 6, Rdst);
4985       z_lgfr(result, Rcnt);
4986       z_bru(AllDone);
4987 
4988     bind(skip8Shortcut);
4989     clear_reg(Z_R0, true, false);          // #characters already processed (none). Precond for scalar loop.
4990     z_brl(ScalarShortcut);                 // Just a few characters
4991 
4992     bind(skipShortcut);
4993   }
4994 #endif
4995   clear_reg(Z_R0);                         // make sure register is properly initialized.
4996 
4997   if (VM_Version::has_VectorFacility()) {
4998     const int  min_vcnt     = 32;          // Minimum #characters required to use vector instructions.
4999                                            // Otherwise just do nothing in vector mode.
5000                                            // Must be multiple of 2*(vector register length in chars (8 HW = 128 bits)).
5001     const int  log_min_vcnt = exact_log2(min_vcnt);
5002     Label      VectorLoop, VectorDone, VectorBreak;
5003 
5004     VectorRegister Vtmp1      = Z_V16;
5005     VectorRegister Vtmp2      = Z_V17;
5006     VectorRegister Vmask      = Z_V18;
5007     VectorRegister Vzero      = Z_V19;
5008     VectorRegister Vsrc_first = Z_V20;
5009     VectorRegister Vsrc_last  = Z_V23;
5010 
5011     assert((Vsrc_last->encoding() - Vsrc_first->encoding() + 1) == min_vcnt/8, "logic error");
5012     assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");
5013     z_srak(Rix, Rcnt, log_min_vcnt);       // # vector loop iterations
5014     z_brz(VectorDone);                     // not enough data for vector loop
5015 
5016     z_vzero(Vzero);                        // all zeroes
5017     z_vgmh(Vmask, 0, 7);                   // generate 0xff00 mask for all 2-byte elements
5018     z_sllg(Z_R0, Rix, log_min_vcnt);       // remember #chars that will be processed by vector loop
5019 
5020     bind(VectorLoop);
5021       z_vlm(Vsrc_first, Vsrc_last, 0, Rsrc);
5022       add2reg(Rsrc, min_vcnt*2);
5023 
5024       //---<  check for incompatible character  >---
5025       z_vo(Vtmp1, Z_V20, Z_V21);
5026       z_vo(Vtmp2, Z_V22, Z_V23);
5027       z_vo(Vtmp1, Vtmp1, Vtmp2);
5028       z_vn(Vtmp1, Vtmp1, Vmask);
5029       z_vceqhs(Vtmp1, Vtmp1, Vzero);       // high half of all chars must be zero for successful compress.
5030       z_brne(VectorBreak);                 // break vector loop, incompatible character found.
5031                                            // re-process data from current iteration in break handler.
5032 
5033       //---<  pack & store characters  >---
5034       z_vpkh(Vtmp1, Z_V20, Z_V21);         // pack (src1, src2) -> tmp1
5035       z_vpkh(Vtmp2, Z_V22, Z_V23);         // pack (src3, src4) -> tmp2
5036       z_vstm(Vtmp1, Vtmp2, 0, Rdst);       // store packed string
5037       add2reg(Rdst, min_vcnt);
5038 
5039       z_brct(Rix, VectorLoop);
5040 
5041     z_bru(VectorDone);
5042 
5043     bind(VectorBreak);
5044       z_sll(Rix, log_min_vcnt);            // # chars processed so far in VectorLoop, excl. current iteration.
5045       z_sr(Z_R0, Rix);                     // correct # chars processed in total.
5046 
5047     bind(VectorDone);
5048   }
5049 
5050   {
5051     const int  min_cnt     =  8;           // Minimum #characters required to use unrolled loop.
5052                                            // Otherwise just do nothing in unrolled loop.
5053                                            // Must be multiple of 8.
5054     const int  log_min_cnt = exact_log2(min_cnt);
5055     Label      UnrolledLoop, UnrolledDone, UnrolledBreak;
5056 
5057     if (VM_Version::has_DistinctOpnds()) {
5058       z_srk(Rix, Rcnt, Z_R0);              // remaining # chars to compress in unrolled loop
5059     } else {
5060       z_lr(Rix, Rcnt);
5061       z_sr(Rix, Z_R0);
5062     }
5063     z_sra(Rix, log_min_cnt);             // unrolled loop count
5064     z_brz(UnrolledDone);
5065 
5066     bind(UnrolledLoop);
5067       z_lmg(Z_R0, Z_R1, 0, Rsrc);
5068       if (precise) {
5069         z_ogr(Z_R1, Z_R0);                 // check all 8 chars for incompatibility
5070         z_ngr(Z_R1, Rmask);
5071         z_brnz(UnrolledBreak);
5072 
5073         z_lg(Z_R1, 8, Rsrc);               // reload destroyed register
5074         z_stcmh(Z_R0, 5, 0, Rdst);
5075         z_stcm(Z_R0,  5, 2, Rdst);
5076       } else {
5077         z_stcmh(Z_R0, 5, 0, Rdst);
5078         z_stcm(Z_R0,  5, 2, Rdst);
5079 
5080         z_ogr(Z_R0, Z_R1);
5081         z_ngr(Z_R0, Rmask);
5082         z_brnz(UnrolledBreak);
5083       }
5084       z_stcmh(Z_R1, 5, 4, Rdst);
5085       z_stcm(Z_R1,  5, 6, Rdst);
5086 
5087       add2reg(Rsrc, min_cnt*2);
5088       add2reg(Rdst, min_cnt);
5089       z_brct(Rix, UnrolledLoop);
5090 
5091     z_lgfr(Z_R0, Rcnt);                    // # chars processed in total after unrolled loop.
5092     z_nilf(Z_R0, ~(min_cnt-1));
5093     z_tmll(Rcnt, min_cnt-1);
5094     z_brnaz(ScalarShortcut);               // if all bits zero, there is nothing left to do for scalar loop.
5095                                            // Rix == 0 in all cases.
5096     z_lgfr(result, Rcnt);                  // all characters processed.
5097     z_sgfr(Rdst, Rcnt);                    // restore ptr
5098     z_sgfr(Rsrc, Rcnt);                    // restore ptr, double the element count for Rsrc restore
5099     z_sgfr(Rsrc, Rcnt);
5100     z_bru(AllDone);
5101 
5102     bind(UnrolledBreak);
5103     z_lgfr(Z_R0, Rcnt);                    // # chars processed in total after unrolled loop
5104     z_nilf(Z_R0, ~(min_cnt-1));
5105     z_sll(Rix, log_min_cnt);               // # chars processed so far in UnrolledLoop, excl. current iteration.
5106     z_sr(Z_R0, Rix);                       // correct # chars processed in total.
5107     if (!precise) {
5108       z_lgfr(result, Z_R0);
5109       z_aghi(result, min_cnt/2);           // min_cnt/2 characters have already been written
5110                                            // but ptrs were not updated yet.
5111       z_sgfr(Rdst, Z_R0);                  // restore ptr
5112       z_sgfr(Rsrc, Z_R0);                  // restore ptr, double the element count for Rsrc restore
5113       z_sgfr(Rsrc, Z_R0);
5114       z_bru(AllDone);
5115     }
5116     bind(UnrolledDone);
5117   }
5118 
5119   {
5120     Label     ScalarLoop, ScalarDone, ScalarBreak;
5121 
5122     bind(ScalarShortcut);
5123     z_ltgfr(result, Rcnt);
5124     z_brz(AllDone);
5125 
5126 #if 0  // Sacrifice shortcuts for code compactness
5127     {
5128       //---<  Special treatment for very short strings (one or two characters)  >---
5129       //   For these strings, we are sure that the above code was skipped.
5130       //   Thus, no registers were modified, register restore is not required.
5131       Label     ScalarDoit, Scalar2Char;
5132       z_chi(Rcnt, 2);
5133       z_brh(ScalarDoit);
5134       z_llh(Z_R1,  0, Z_R0, Rsrc);
5135       z_bre(Scalar2Char);
5136       z_tmll(Z_R1, 0xff00);
5137       z_lghi(result, 0);                   // cnt == 1, first char invalid, no chars successfully processed
5138       z_brnaz(AllDone);
5139       z_stc(Z_R1,  0, Z_R0, Rdst);
5140       z_lghi(result, 1);
5141       z_bru(AllDone);
5142 
5143       bind(Scalar2Char);
5144       z_llh(Z_R0,  2, Z_R0, Rsrc);
5145       z_tmll(Z_R1, 0xff00);
5146       z_lghi(result, 0);                   // cnt == 2, first char invalid, no chars successfully processed
5147       z_brnaz(AllDone);
5148       z_stc(Z_R1,  0, Z_R0, Rdst);
5149       z_tmll(Z_R0, 0xff00);
5150       z_lghi(result, 1);                   // cnt == 2, second char invalid, one char successfully processed
5151       z_brnaz(AllDone);
5152       z_stc(Z_R0,  1, Z_R0, Rdst);
5153       z_lghi(result, 2);
5154       z_bru(AllDone);
5155 
5156       bind(ScalarDoit);
5157     }
5158 #endif

5159 
5160     if (VM_Version::has_DistinctOpnds()) {
5161       z_srk(Rix, Rcnt, Z_R0);              // remaining # chars to compress in unrolled loop
5162     } else {
5163       z_lr(Rix, Rcnt);
5164       z_sr(Rix, Z_R0);
5165     }
5166     z_lgfr(result, Rcnt);                  // # processed characters (if all runs ok).
5167     z_brz(ScalarDone);






5168 
5169     bind(ScalarLoop);
5170       z_llh(Z_R1, 0, Z_R0, Rsrc);
5171       z_tmll(Z_R1, 0xff00);
5172       z_brnaz(ScalarBreak);
5173       z_stc(Z_R1, 0, Z_R0, Rdst);
5174       add2reg(Rsrc, 2);
5175       add2reg(Rdst, 1);
5176       z_brct(Rix, ScalarLoop);
5177 
5178     z_bru(ScalarDone);
5179 
5180     bind(ScalarBreak);
5181     z_sr(result, Rix);
5182 
5183     bind(ScalarDone);
5184     z_sgfr(Rdst, result);                  // restore ptr
5185     z_sgfr(Rsrc, result);                  // restore ptr, double the element count for Rsrc restore
5186     z_sgfr(Rsrc, result);
5187   }
5188   bind(AllDone);
5189 
5190   if (precise) {
5191     BLOCK_COMMENT("} encode_iso_array");
5192   } else {
5193     BLOCK_COMMENT("} string_compress");
5194   }
5195   return offset() - block_start;
5196 }
5197 
5198 // Inflate byte[] to char[].
5199 unsigned int MacroAssembler::string_inflate_trot(Register src, Register dst, Register cnt, Register tmp) {
5200   int block_start = offset();
5201 
5202   BLOCK_COMMENT("string_inflate {");
5203 
5204   Register stop_char = Z_R0;
5205   Register table     = Z_R1;
5206   Register src_addr  = tmp;
5207 
5208   assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt);
5209   assert(dst->encoding()%2 == 0, "must be even reg");
5210   assert(cnt->encoding()%2 == 1, "must be odd reg");
5211   assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair");
5212 
5213   StubRoutines::zarch::generate_load_trot_table_addr(this, table);  // kills Z_R0 (if ASSERT)
5214   clear_reg(stop_char);  // Stop character. Not used here, but initialized to have a defined value.
5215   lgr_if_needed(src_addr, src);
5216   z_llgfr(cnt, cnt);     // # src characters, must be a positive simm32.
5217 
5218   translate_ot(dst, src_addr, /* mask = */ 0x0001);
5219 
5220   BLOCK_COMMENT("} string_inflate");
5221 
5222   return offset() - block_start;
5223 }
5224 
5225 // Inflate byte[] to char[].
5226 //   Restores: src, dst
5227 //   Uses:     cnt
5228 //   Kills:    tmp, Z_R0, Z_R1.
5229 // Note:
5230 //   cnt is signed int. Do not rely on high word!
5231 //       counts # characters, not bytes.
5232 unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
5233   assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp);
5234 
5235   BLOCK_COMMENT("string_inflate {");
5236   int block_start = offset();
5237 
5238   Register   Rcnt = cnt;   // # characters (src: bytes, dst: char (2-byte)), remaining after current loop.
5239   Register   Rix  = tmp;   // loop index
5240   Register   Rsrc = src;   // addr(src array)
5241   Register   Rdst = dst;   // addr(dst array)
5242   Label      ScalarShortcut, AllDone;
5243 
5244 #if 0  // Sacrifice shortcuts for code compactness
5245   {
5246     //---<  shortcuts for short strings (very frequent)   >---
5247     Label   skipShortcut, skip4Shortcut;
5248     z_ltr(Rcnt, Rcnt);                     // absolutely nothing to do for strings of len == 0.
5249     z_brz(AllDone);
5250     clear_reg(Z_R0);                       // make sure registers are properly initialized.
5251     clear_reg(Z_R1);
5252     z_chi(Rcnt, 4);
5253     z_brne(skip4Shortcut);                 // 4 characters are very frequent
5254       z_icm(Z_R0, 5,    0, Rsrc);          // Treat exactly 4 characters specially.
5255       z_icm(Z_R1, 5,    2, Rsrc);
5256       z_stm(Z_R0, Z_R1, 0, Rdst);
5257       z_bru(AllDone);
5258     bind(skip4Shortcut);
5259 
5260     z_chi(Rcnt, 8);
5261     z_brh(skipShortcut);                   // There's a lot to do...
5262     z_lgfr(Z_R0, Rcnt);                    // remaining #characters (<= 8). Precond for scalar loop.
5263                                            // This does not destroy the "register cleared" state of Z_R0.
5264     z_brl(ScalarShortcut);                 // Just a few characters
5265       z_icmh(Z_R0, 5, 0, Rsrc);            // Treat exactly 8 characters specially.
5266       z_icmh(Z_R1, 5, 4, Rsrc);
5267       z_icm(Z_R0,  5, 2, Rsrc);
5268       z_icm(Z_R1,  5, 6, Rsrc);
5269       z_stmg(Z_R0, Z_R1, 0, Rdst);
5270       z_bru(AllDone);
5271     bind(skipShortcut);
5272   }
5273 #endif
5274   clear_reg(Z_R0);                         // make sure register is properly initialized.
5275 
5276   if (VM_Version::has_VectorFacility()) {
5277     const int  min_vcnt     = 32;          // Minimum #characters required to use vector instructions.
5278                                            // Otherwise just do nothing in vector mode.
5279                                            // Must be multiple of vector register length (16 bytes = 128 bits).
5280     const int  log_min_vcnt = exact_log2(min_vcnt);
5281     Label      VectorLoop, VectorDone;
5282 
5283     assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");
5284     z_srak(Rix, Rcnt, log_min_vcnt);       // calculate # vector loop iterations
5285     z_brz(VectorDone);                     // skip if none
5286 
5287     z_sllg(Z_R0, Rix, log_min_vcnt);       // remember #chars that will be processed by vector loop
5288 
5289     bind(VectorLoop);
5290       z_vlm(Z_V20, Z_V21, 0, Rsrc);        // get next 32 characters (single-byte)
5291       add2reg(Rsrc, min_vcnt);
5292 
5293       z_vuplhb(Z_V22, Z_V20);              // V2 <- (expand) V0(high)
5294       z_vupllb(Z_V23, Z_V20);              // V3 <- (expand) V0(low)
5295       z_vuplhb(Z_V24, Z_V21);              // V4 <- (expand) V1(high)
5296       z_vupllb(Z_V25, Z_V21);              // V5 <- (expand) V1(low)
5297       z_vstm(Z_V22, Z_V25, 0, Rdst);       // store next 32 bytes
5298       add2reg(Rdst, min_vcnt*2);
5299 
5300       z_brct(Rix, VectorLoop);
5301 
5302     bind(VectorDone);
5303   }
5304 
5305   const int  min_cnt     =  8;             // Minimum #characters required to use unrolled scalar loop.
5306                                            // Otherwise just do nothing in unrolled scalar mode.
5307                                            // Must be multiple of 8.
5308   {
5309     const int  log_min_cnt = exact_log2(min_cnt);
5310     Label      UnrolledLoop, UnrolledDone;
5311 
5312 
5313     if (VM_Version::has_DistinctOpnds()) {
5314       z_srk(Rix, Rcnt, Z_R0);              // remaining # chars to process in unrolled loop
5315     } else {
5316       z_lr(Rix, Rcnt);
5317       z_sr(Rix, Z_R0);
5318     }
5319     z_sra(Rix, log_min_cnt);               // unrolled loop count
5320     z_brz(UnrolledDone);
5321 

5322     clear_reg(Z_R0);
5323     clear_reg(Z_R1);

5324 
5325     bind(UnrolledLoop);
5326       z_icmh(Z_R0, 5, 0, Rsrc);
5327       z_icmh(Z_R1, 5, 4, Rsrc);
5328       z_icm(Z_R0,  5, 2, Rsrc);
5329       z_icm(Z_R1,  5, 6, Rsrc);
5330       add2reg(Rsrc, min_cnt);
5331 
5332       z_stmg(Z_R0, Z_R1, 0, Rdst);
5333 
5334       add2reg(Rdst, min_cnt*2);
5335       z_brct(Rix, UnrolledLoop);
5336 
5337     bind(UnrolledDone);
5338     z_lgfr(Z_R0, Rcnt);                    // # chars left over after unrolled loop.
5339     z_nilf(Z_R0, min_cnt-1);
5340     z_brnz(ScalarShortcut);                // if zero, there is nothing left to do for scalar loop.
5341                                            // Rix == 0 in all cases.
5342     z_sgfr(Z_R0, Rcnt);                    // negative # characters the ptrs have been advanced previously.
5343     z_agr(Rdst, Z_R0);                     // restore ptr, double the element count for Rdst restore.
5344     z_agr(Rdst, Z_R0);
5345     z_agr(Rsrc, Z_R0);                     // restore ptr.
5346     z_bru(AllDone);
5347   }
5348 
5349   {
5350     bind(ScalarShortcut);
5351     // Z_R0 must contain remaining # characters as 64-bit signed int here.
5352     //      register contents is preserved over scalar processing (for register fixup).
5353 
5354 #if 0  // Sacrifice shortcuts for code compactness
5355     {
5356       Label      ScalarDefault;
5357       z_chi(Rcnt, 2);
5358       z_brh(ScalarDefault);
5359       z_llc(Z_R0,  0, Z_R0, Rsrc);     // 6 bytes
5360       z_sth(Z_R0,  0, Z_R0, Rdst);     // 4 bytes
5361       z_brl(AllDone);
5362       z_llc(Z_R0,  1, Z_R0, Rsrc);     // 6 bytes
5363       z_sth(Z_R0,  2, Z_R0, Rdst);     // 4 bytes
5364       z_bru(AllDone);
5365       bind(ScalarDefault);
5366     }
5367 #endif
5368 
5369     Label   CodeTable;
5370     // Some comments on Rix calculation:
5371     //  - Rcnt is small, therefore no bits shifted out of low word (sll(g) instructions).
5372     //  - high word of both Rix and Rcnt may contain garbage
5373     //  - the final lngfr takes care of that garbage, extending the sign to high word
5374     z_sllg(Rix, Z_R0, 2);                // calculate 10*Rix = (4*Rix + Rix)*2
5375     z_ar(Rix, Z_R0);
5376     z_larl(Z_R1, CodeTable);
5377     z_sll(Rix, 1);
5378     z_lngfr(Rix, Rix);      // ix range: [0..7], after inversion & mult: [-(7*12)..(0*12)].
5379     z_bc(Assembler::bcondAlways, 0, Rix, Z_R1);
5380 
5381     z_llc(Z_R1,  6, Z_R0, Rsrc);  // 6 bytes
5382     z_sth(Z_R1, 12, Z_R0, Rdst);  // 4 bytes
5383 
5384     z_llc(Z_R1,  5, Z_R0, Rsrc);
5385     z_sth(Z_R1, 10, Z_R0, Rdst);
5386 
5387     z_llc(Z_R1,  4, Z_R0, Rsrc);
5388     z_sth(Z_R1,  8, Z_R0, Rdst);
5389 
5390     z_llc(Z_R1,  3, Z_R0, Rsrc);
5391     z_sth(Z_R1,  6, Z_R0, Rdst);
5392 
5393     z_llc(Z_R1,  2, Z_R0, Rsrc);
5394     z_sth(Z_R1,  4, Z_R0, Rdst);
5395 
5396     z_llc(Z_R1,  1, Z_R0, Rsrc);
5397     z_sth(Z_R1,  2, Z_R0, Rdst);
5398 
5399     z_llc(Z_R1,  0, Z_R0, Rsrc);
5400     z_sth(Z_R1,  0, Z_R0, Rdst);
5401     bind(CodeTable);
5402 
5403     z_chi(Rcnt, 8);                        // no fixup for small strings. Rdst, Rsrc were not modified.
5404     z_brl(AllDone);
5405 
5406     z_sgfr(Z_R0, Rcnt);                    // # characters the ptrs have been advanced previously.
5407     z_agr(Rdst, Z_R0);                     // restore ptr, double the element count for Rdst restore.
5408     z_agr(Rdst, Z_R0);
5409     z_agr(Rsrc, Z_R0);                     // restore ptr.
5410   }
5411   bind(AllDone);
5412 
5413   BLOCK_COMMENT("} string_inflate");
5414   return offset() - block_start;
5415 }
5416 
5417 // Inflate byte[] to char[], length known at compile time.
5418 //   Restores: src, dst
5419 //   Kills:    tmp, Z_R0, Z_R1.
5420 // Note:
5421 //   len is signed int. Counts # characters, not bytes.
5422 unsigned int MacroAssembler::string_inflate_const(Register src, Register dst, Register tmp, int len) {
5423   assert_different_registers(Z_R0, Z_R1, src, dst, tmp);
5424 
5425   BLOCK_COMMENT("string_inflate_const {");
5426   int block_start = offset();
5427 
5428   Register   Rix  = tmp;   // loop index
5429   Register   Rsrc = src;   // addr(src array)
5430   Register   Rdst = dst;   // addr(dst array)
5431   Label      ScalarShortcut, AllDone;
5432   int        nprocessed = 0;
5433   int        src_off    = 0;  // compensate for saved (optimized away) ptr advancement.
5434   int        dst_off    = 0;  // compensate for saved (optimized away) ptr advancement.
5435   bool       restore_inputs = false;
5436   bool       workreg_clear  = false;
5437 
5438   if ((len >= 32) && VM_Version::has_VectorFacility()) {
5439     const int  min_vcnt     = 32;          // Minimum #characters required to use vector instructions.
5440                                            // Otherwise just do nothing in vector mode.
5441                                            // Must be multiple of vector register length (16 bytes = 128 bits).
5442     const int  log_min_vcnt = exact_log2(min_vcnt);
5443     const int  iterations   = (len - nprocessed) >> log_min_vcnt;
5444     nprocessed             += iterations << log_min_vcnt;
5445     Label      VectorLoop;
5446 
5447     if (iterations == 1) {
5448       z_vlm(Z_V20, Z_V21, 0+src_off, Rsrc);  // get next 32 characters (single-byte)
5449       z_vuplhb(Z_V22, Z_V20);                // V2 <- (expand) V0(high)
5450       z_vupllb(Z_V23, Z_V20);                // V3 <- (expand) V0(low)
5451       z_vuplhb(Z_V24, Z_V21);                // V4 <- (expand) V1(high)
5452       z_vupllb(Z_V25, Z_V21);                // V5 <- (expand) V1(low)
5453       z_vstm(Z_V22, Z_V25, 0+dst_off, Rdst); // store next 32 bytes
5454 
5455       src_off += min_vcnt;
5456       dst_off += min_vcnt*2;
5457     } else {
5458       restore_inputs = true;
5459 
5460       z_lgfi(Rix, len>>log_min_vcnt);
5461       bind(VectorLoop);
5462         z_vlm(Z_V20, Z_V21, 0, Rsrc);        // get next 32 characters (single-byte)
5463         add2reg(Rsrc, min_vcnt);
5464 
5465         z_vuplhb(Z_V22, Z_V20);              // V2 <- (expand) V0(high)
5466         z_vupllb(Z_V23, Z_V20);              // V3 <- (expand) V0(low)
5467         z_vuplhb(Z_V24, Z_V21);              // V4 <- (expand) V1(high)
5468         z_vupllb(Z_V25, Z_V21);              // V5 <- (expand) V1(low)
5469         z_vstm(Z_V22, Z_V25, 0, Rdst);       // store next 32 bytes
5470         add2reg(Rdst, min_vcnt*2);
5471 
5472         z_brct(Rix, VectorLoop);
5473     }
5474   }
5475 
5476   if (((len-nprocessed) >= 16) && VM_Version::has_VectorFacility()) {
5477     const int  min_vcnt     = 16;          // Minimum #characters required to use vector instructions.
5478                                            // Otherwise just do nothing in vector mode.
5479                                            // Must be multiple of vector register length (16 bytes = 128 bits).
5480     const int  log_min_vcnt = exact_log2(min_vcnt);
5481     const int  iterations   = (len - nprocessed) >> log_min_vcnt;
5482     nprocessed             += iterations << log_min_vcnt;
5483     assert(iterations == 1, "must be!");
5484 
5485     z_vl(Z_V20, 0+src_off, Z_R0, Rsrc);    // get next 16 characters (single-byte)
5486     z_vuplhb(Z_V22, Z_V20);                // V2 <- (expand) V0(high)
5487     z_vupllb(Z_V23, Z_V20);                // V3 <- (expand) V0(low)
5488     z_vstm(Z_V22, Z_V23, 0+dst_off, Rdst); // store next 32 bytes
5489 
5490     src_off += min_vcnt;
5491     dst_off += min_vcnt*2;
5492   }
5493 
5494   if ((len-nprocessed) > 8) {
5495     const int  min_cnt     =  8;           // Minimum #characters required to use unrolled scalar loop.
5496                                            // Otherwise just do nothing in unrolled scalar mode.
5497                                            // Must be multiple of 8.
5498     const int  log_min_cnt = exact_log2(min_cnt);
5499     const int  iterations  = (len - nprocessed) >> log_min_cnt;
5500     nprocessed     += iterations << log_min_cnt;
5501 
5502     //---<  avoid loop overhead/ptr increment for small # iterations  >---
5503     if (iterations <= 2) {
5504       clear_reg(Z_R0);
5505       clear_reg(Z_R1);
5506       workreg_clear = true;
5507 
5508       z_icmh(Z_R0, 5, 0+src_off, Rsrc);
5509       z_icmh(Z_R1, 5, 4+src_off, Rsrc);
5510       z_icm(Z_R0,  5, 2+src_off, Rsrc);
5511       z_icm(Z_R1,  5, 6+src_off, Rsrc);
5512       z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
5513 
5514       src_off += min_cnt;
5515       dst_off += min_cnt*2;
5516     }
5517 
5518     if (iterations == 2) {
5519       z_icmh(Z_R0, 5, 0+src_off, Rsrc);
5520       z_icmh(Z_R1, 5, 4+src_off, Rsrc);
5521       z_icm(Z_R0,  5, 2+src_off, Rsrc);
5522       z_icm(Z_R1,  5, 6+src_off, Rsrc);
5523       z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
5524 
5525       src_off += min_cnt;
5526       dst_off += min_cnt*2;
5527     }
5528 
5529     if (iterations > 2) {
5530       Label      UnrolledLoop;
5531       restore_inputs  = true;
5532 
5533       clear_reg(Z_R0);
5534       clear_reg(Z_R1);
5535       workreg_clear = true;
5536 
5537       z_lgfi(Rix, iterations);
5538       bind(UnrolledLoop);
5539         z_icmh(Z_R0, 5, 0, Rsrc);
5540         z_icmh(Z_R1, 5, 4, Rsrc);
5541         z_icm(Z_R0,  5, 2, Rsrc);
5542         z_icm(Z_R1,  5, 6, Rsrc);
5543         add2reg(Rsrc, min_cnt);
5544 
5545         z_stmg(Z_R0, Z_R1, 0, Rdst);
5546         add2reg(Rdst, min_cnt*2);
5547 
5548         z_brct(Rix, UnrolledLoop);
5549     }
5550   }
5551 
5552   if ((len-nprocessed) > 0) {
5553     switch (len-nprocessed) {
5554       case 8:
5555         if (!workreg_clear) {
5556           clear_reg(Z_R0);
5557           clear_reg(Z_R1);
5558         }
5559         z_icmh(Z_R0, 5, 0+src_off, Rsrc);
5560         z_icmh(Z_R1, 5, 4+src_off, Rsrc);
5561         z_icm(Z_R0,  5, 2+src_off, Rsrc);
5562         z_icm(Z_R1,  5, 6+src_off, Rsrc);
5563         z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
5564         break;
5565       case 7:
5566         if (!workreg_clear) {
5567           clear_reg(Z_R0);
5568           clear_reg(Z_R1);
5569         }
5570         clear_reg(Rix);
5571         z_icm(Z_R0,  5, 0+src_off, Rsrc);
5572         z_icm(Z_R1,  5, 2+src_off, Rsrc);
5573         z_icm(Rix,   5, 4+src_off, Rsrc);
5574         z_stm(Z_R0,  Z_R1, 0+dst_off, Rdst);
5575         z_llc(Z_R0,  6+src_off, Z_R0, Rsrc);
5576         z_st(Rix,    8+dst_off, Z_R0, Rdst);
5577         z_sth(Z_R0, 12+dst_off, Z_R0, Rdst);
5578         break;
5579       case 6:
5580         if (!workreg_clear) {
5581           clear_reg(Z_R0);
5582           clear_reg(Z_R1);
5583         }
5584         clear_reg(Rix);
5585         z_icm(Z_R0, 5, 0+src_off, Rsrc);
5586         z_icm(Z_R1, 5, 2+src_off, Rsrc);
5587         z_icm(Rix,  5, 4+src_off, Rsrc);
5588         z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
5589         z_st(Rix,   8+dst_off, Z_R0, Rdst);
5590         break;
5591       case 5:
5592         if (!workreg_clear) {
5593           clear_reg(Z_R0);
5594           clear_reg(Z_R1);
5595         }
5596         z_icm(Z_R0, 5, 0+src_off, Rsrc);
5597         z_icm(Z_R1, 5, 2+src_off, Rsrc);
5598         z_llc(Rix,  4+src_off, Z_R0, Rsrc);
5599         z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
5600         z_sth(Rix,  8+dst_off, Z_R0, Rdst);
5601         break;
5602       case 4:
5603         if (!workreg_clear) {
5604           clear_reg(Z_R0);
5605           clear_reg(Z_R1);
5606         }
5607         z_icm(Z_R0, 5, 0+src_off, Rsrc);
5608         z_icm(Z_R1, 5, 2+src_off, Rsrc);
5609         z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
5610         break;
5611       case 3:
5612         if (!workreg_clear) {
5613           clear_reg(Z_R0);
5614         }
5615         z_llc(Z_R1, 2+src_off, Z_R0, Rsrc);
5616         z_icm(Z_R0, 5, 0+src_off, Rsrc);
5617         z_sth(Z_R1, 4+dst_off, Z_R0, Rdst);
5618         z_st(Z_R0,  0+dst_off, Rdst);
5619         break;
5620       case 2:
5621         z_llc(Z_R0, 0+src_off, Z_R0, Rsrc);
5622         z_llc(Z_R1, 1+src_off, Z_R0, Rsrc);
5623         z_sth(Z_R0, 0+dst_off, Z_R0, Rdst);
5624         z_sth(Z_R1, 2+dst_off, Z_R0, Rdst);
5625         break;
5626       case 1:
5627         z_llc(Z_R0, 0+src_off, Z_R0, Rsrc);
5628         z_sth(Z_R0, 0+dst_off, Z_R0, Rdst);
5629         break;
5630       default:
5631         guarantee(false, "Impossible");
5632         break;
5633     }
5634     src_off   +=  len-nprocessed;
5635     dst_off   += (len-nprocessed)*2;
5636     nprocessed = len;
5637   }
5638 
5639   //---< restore modified input registers  >---
5640   if ((nprocessed > 0) && restore_inputs) {
5641     z_agfi(Rsrc, -(nprocessed-src_off));
5642     if (nprocessed < 1000000000) { // avoid int overflow
5643       z_agfi(Rdst, -(nprocessed*2-dst_off));
5644     } else {
5645       z_agfi(Rdst, -(nprocessed-dst_off));
5646       z_agfi(Rdst, -nprocessed);
5647     }
5648   }
5649 
5650   BLOCK_COMMENT("} string_inflate_const");
5651   return offset() - block_start;
5652 }
5653 
5654 // Kills src.
5655 unsigned int MacroAssembler::has_negatives(Register result, Register src, Register cnt,
5656                                            Register odd_reg, Register even_reg, Register tmp) {
5657   int block_start = offset();
5658   Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone;
5659   const Register addr = src, mask = tmp;
5660 
5661   BLOCK_COMMENT("has_negatives {");
5662 
5663   z_llgfr(Z_R1, cnt);      // Number of bytes to read. (Must be a positive simm32.)
5664   z_llilf(mask, 0x80808080);
5665   z_lhi(result, 1);        // Assume true.
5666   // Last possible addr for fast loop.
5667   z_lay(odd_reg, -16, Z_R1, src);
5668   z_chi(cnt, 16);
5669   z_brl(Lslow);
5670 


< prev index next >