919 // Simply use distance from start of const section (should be patched in the end). 920 long disp = toc_distance(); 921 922 RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp); 923 relocate(rspec); 924 z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords. 925 } 926 927 // PCrelative TOC access. 928 // Load from anywhere pcrelative (with relocation of load instr) 929 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) { 930 address pc = this->pc(); 931 ptrdiff_t total_distance = dataLocation - pc; 932 RelocationHolder rspec = internal_word_Relocation::spec(dataLocation); 933 934 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 935 assert(total_distance != 0, "sanity"); 936 937 // Some extra safety net. 938 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 939 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away"); 940 } 941 942 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 943 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 944 } 945 946 947 // PCrelative TOC access. 948 // Load from anywhere pcrelative (with relocation of load instr) 949 // loaded addr has to be relocated when added to constant pool. 950 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) { 951 address pc = this->pc(); 952 ptrdiff_t total_distance = addrLocation - pc; 953 RelocationHolder rspec = internal_word_Relocation::spec(addrLocation); 954 955 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 956 957 // Some extra safety net. 958 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 959 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away"); 960 } 961 962 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 963 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 964 } 965 966 // Generic operation: load a value from memory and test. 967 // CondCode indicates the sign (<0, ==0, >0) of the loaded value. 968 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) { 969 z_lb(dst, a); 970 z_ltr(dst, dst); 971 } 972 973 void MacroAssembler::load_and_test_short(Register dst, const Address &a) { 974 int64_t disp = a.disp20(); 975 if (Displacement::is_shortDisp(disp)) { 976 z_lh(dst, a); 977 } else if (Displacement::is_longDisp(disp)) { 978 z_lhy(dst, a); 979 } else { 1008 } else { 1009 ShouldNotReachHere(); 1010 } 1011 } 1012 1013 // Test a bit in a register. Result is reflected in CC. 1014 void MacroAssembler::testbit(Register r, unsigned int bitPos) { 1015 if (bitPos < 16) { 1016 z_tmll(r, 1U<<bitPos); 1017 } else if (bitPos < 32) { 1018 z_tmlh(r, 1U<<(bitPos-16)); 1019 } else if (bitPos < 48) { 1020 z_tmhl(r, 1U<<(bitPos-32)); 1021 } else if (bitPos < 64) { 1022 z_tmhh(r, 1U<<(bitPos-48)); 1023 } else { 1024 ShouldNotReachHere(); 1025 } 1026 } 1027 1028 // Clear a register, i.e. load const zero into reg. 1029 // Return len (in bytes) of generated instruction(s). 1030 // whole_reg: Clear 64 bits if true, 32 bits otherwise. 1031 // set_cc: Use instruction that sets the condition code, if true. 1032 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) { 1033 unsigned int start_off = offset(); 1034 if (whole_reg) { 1035 set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0); 1036 } else { // Only 32bit register. 1037 set_cc ? z_xr(r, r) : z_lhi(r, 0); 1038 } 1039 return offset() - start_off; 1040 } 1041 1042 #ifdef ASSERT 1043 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) { 1044 switch (pattern_len) { 1045 case 1: 1046 pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8); 1047 case 2: 4879 if (VM_Version::has_ExecuteExtensions()) { 4880 z_exrl(Z_R1, MVC_template); 4881 } else { 4882 z_ex(tmp1_reg, 0, Z_R0, Z_R1); 4883 } 4884 4885 bind(done); 4886 4887 BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint"); 4888 4889 int block_end = offset(); 4890 return block_end - block_start; 4891 } 4892 4893 //------------------------------------------------------ 4894 // Special String Intrinsics. Implementation 4895 //------------------------------------------------------ 4896 4897 // Intrinsics for CompactStrings 4898 4899 // Compress char[] to byte[]. odd_reg contains cnt. Kills dst. Early clobber: result 4900 // The result is the number of characters copied before the first incompatible character was found. 4901 // If tmp2 is provided and the compression fails, the compression stops exactly at this point and the result is precise. 4902 // 4903 // Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure: 4904 // - Different number of characters may have been written to dead array (if tmp2 not provided). 4905 // - Returns a number <cnt instead of 0. (Result gets compared with cnt.) 4906 unsigned int MacroAssembler::string_compress(Register result, Register src, Register dst, Register odd_reg, 4907 Register even_reg, Register tmp, Register tmp2) { 4908 int block_start = offset(); 4909 Label Lloop1, Lloop2, Lslow, Ldone; 4910 const Register addr2 = dst, ind1 = result, mask = tmp; 4911 const bool precise = (tmp2 != noreg); 4912 4913 BLOCK_COMMENT("string_compress {"); 4914 4915 z_sll(odd_reg, 1); // Number of bytes to read. (Must be a positive simm32.) 4916 clear_reg(ind1); // Index to read. 4917 z_llilf(mask, 0xFF00FF00); 4918 z_ahi(odd_reg, -16); // Last possible index for fast loop. 4919 z_brl(Lslow); 4920 4921 // ind1: index, even_reg: index increment, odd_reg: index limit 4922 z_iihf(mask, 0xFF00FF00); 4923 z_lhi(even_reg, 16); 4924 4925 bind(Lloop1); // 8 Characters per iteration. 4926 z_lg(Z_R0, Address(src, ind1)); 4927 z_lg(Z_R1, Address(src, ind1, 8)); 4928 if (precise) { 4929 if (VM_Version::has_DistinctOpnds()) { 4930 z_ogrk(tmp2, Z_R0, Z_R1); 4931 } else { 4932 z_lgr(tmp2, Z_R0); 4933 z_ogr(tmp2, Z_R1); 4934 } 4935 z_ngr(tmp2, mask); 4936 z_brne(Lslow); // Failed fast case, retry slowly. 4937 } 4938 z_stcmh(Z_R0, 5, 0, addr2); 4939 z_stcm(Z_R0, 5, 2, addr2); 4940 if (!precise) { z_ogr(Z_R0, Z_R1); } 4941 z_stcmh(Z_R1, 5, 4, addr2); 4942 z_stcm(Z_R1, 5, 6, addr2); 4943 if (!precise) { 4944 z_ngr(Z_R0, mask); 4945 z_brne(Ldone); // Failed (more than needed was written). 4946 } 4947 z_aghi(addr2, 8); 4948 z_brxle(ind1, even_reg, Lloop1); 4949 4950 bind(Lslow); 4951 // Compute index limit and skip if negative. 4952 z_ahi(odd_reg, 16-2); // Last possible index for slow loop. 4953 z_lhi(even_reg, 2); 4954 z_cr(ind1, odd_reg); 4955 z_brh(Ldone); 4956 4957 bind(Lloop2); // 1 Character per iteration. 4958 z_llh(Z_R0, Address(src, ind1)); 4959 z_tmll(Z_R0, 0xFF00); 4960 z_brnaz(Ldone); // Failed slow case: Return number of written characters. 4961 z_stc(Z_R0, Address(addr2)); 4962 z_aghi(addr2, 1); 4963 z_brxle(ind1, even_reg, Lloop2); 4964 4965 bind(Ldone); // result = ind1 = 2*cnt 4966 z_srl(ind1, 1); 4967 4968 BLOCK_COMMENT("} string_compress"); 4969 4970 return offset() - block_start; 4971 } 4972 4973 // Inflate byte[] to char[]. 4974 unsigned int MacroAssembler::string_inflate_trot(Register src, Register dst, Register cnt, Register tmp) { 4975 int block_start = offset(); 4976 4977 BLOCK_COMMENT("string_inflate {"); 4978 4979 Register stop_char = Z_R0; 4980 Register table = Z_R1; 4981 Register src_addr = tmp; 4982 4983 assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt); 4984 assert(dst->encoding()%2 == 0, "must be even reg"); 4985 assert(cnt->encoding()%2 == 1, "must be odd reg"); 4986 assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair"); 4987 4988 StubRoutines::zarch::generate_load_trot_table_addr(this, table); // kills Z_R0 (if ASSERT) 4989 clear_reg(stop_char); // Stop character. Not used here, but initialized to have a defined value. 4990 lgr_if_needed(src_addr, src); 4991 z_llgfr(cnt, cnt); // # src characters, must be a positive simm32. 4992 4993 translate_ot(dst, src_addr, /* mask = */ 0x0001); 4994 4995 BLOCK_COMMENT("} string_inflate"); 4996 4997 return offset() - block_start; 4998 } 4999 5000 // Inflate byte[] to char[]. odd_reg contains cnt. Kills src. 5001 unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register odd_reg, 5002 Register even_reg, Register tmp) { 5003 int block_start = offset(); 5004 5005 BLOCK_COMMENT("string_inflate {"); 5006 5007 Label Lloop1, Lloop2, Lslow, Ldone; 5008 const Register addr1 = src, ind2 = tmp; 5009 5010 z_sll(odd_reg, 1); // Number of bytes to write. (Must be a positive simm32.) 5011 clear_reg(ind2); // Index to write. 5012 z_ahi(odd_reg, -16); // Last possible index for fast loop. 5013 z_brl(Lslow); 5014 5015 // ind2: index, even_reg: index increment, odd_reg: index limit 5016 clear_reg(Z_R0); 5017 clear_reg(Z_R1); 5018 z_lhi(even_reg, 16); 5019 5020 bind(Lloop1); // 8 Characters per iteration. 5021 z_icmh(Z_R0, 5, 0, addr1); 5022 z_icmh(Z_R1, 5, 4, addr1); 5023 z_icm(Z_R0, 5, 2, addr1); 5024 z_icm(Z_R1, 5, 6, addr1); 5025 z_aghi(addr1, 8); 5026 z_stg(Z_R0, Address(dst, ind2)); 5027 z_stg(Z_R1, Address(dst, ind2, 8)); 5028 z_brxle(ind2, even_reg, Lloop1); 5029 5030 bind(Lslow); 5031 // Compute index limit and skip if negative. 5032 z_ahi(odd_reg, 16-2); // Last possible index for slow loop. 5033 z_lhi(even_reg, 2); 5034 z_cr(ind2, odd_reg); 5035 z_brh(Ldone); 5036 5037 bind(Lloop2); // 1 Character per iteration. 5038 z_llc(Z_R0, Address(addr1)); 5039 z_sth(Z_R0, Address(dst, ind2)); 5040 z_aghi(addr1, 1); 5041 z_brxle(ind2, even_reg, Lloop2); 5042 5043 bind(Ldone); 5044 5045 BLOCK_COMMENT("} string_inflate"); 5046 5047 return offset() - block_start; 5048 } 5049 5050 // Kills src. 5051 unsigned int MacroAssembler::has_negatives(Register result, Register src, Register cnt, 5052 Register odd_reg, Register even_reg, Register tmp) { 5053 int block_start = offset(); 5054 Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone; 5055 const Register addr = src, mask = tmp; 5056 5057 BLOCK_COMMENT("has_negatives {"); 5058 5059 z_llgfr(Z_R1, cnt); // Number of bytes to read. (Must be a positive simm32.) 5060 z_llilf(mask, 0x80808080); 5061 z_lhi(result, 1); // Assume true. 5062 // Last possible addr for fast loop. 5063 z_lay(odd_reg, -16, Z_R1, src); 5064 z_chi(cnt, 16); 5065 z_brl(Lslow); 5066 | 919 // Simply use distance from start of const section (should be patched in the end). 920 long disp = toc_distance(); 921 922 RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp); 923 relocate(rspec); 924 z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords. 925 } 926 927 // PCrelative TOC access. 928 // Load from anywhere pcrelative (with relocation of load instr) 929 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) { 930 address pc = this->pc(); 931 ptrdiff_t total_distance = dataLocation - pc; 932 RelocationHolder rspec = internal_word_Relocation::spec(dataLocation); 933 934 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 935 assert(total_distance != 0, "sanity"); 936 937 // Some extra safety net. 938 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 939 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 940 } 941 942 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 943 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 944 } 945 946 947 // PCrelative TOC access. 948 // Load from anywhere pcrelative (with relocation of load instr) 949 // loaded addr has to be relocated when added to constant pool. 950 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) { 951 address pc = this->pc(); 952 ptrdiff_t total_distance = addrLocation - pc; 953 RelocationHolder rspec = internal_word_Relocation::spec(addrLocation); 954 955 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); 956 957 // Some extra safety net. 958 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { 959 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); 960 } 961 962 (this)->relocate(rspec, relocInfo::pcrel_addr_format); 963 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); 964 } 965 966 // Generic operation: load a value from memory and test. 967 // CondCode indicates the sign (<0, ==0, >0) of the loaded value. 968 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) { 969 z_lb(dst, a); 970 z_ltr(dst, dst); 971 } 972 973 void MacroAssembler::load_and_test_short(Register dst, const Address &a) { 974 int64_t disp = a.disp20(); 975 if (Displacement::is_shortDisp(disp)) { 976 z_lh(dst, a); 977 } else if (Displacement::is_longDisp(disp)) { 978 z_lhy(dst, a); 979 } else { 1008 } else { 1009 ShouldNotReachHere(); 1010 } 1011 } 1012 1013 // Test a bit in a register. Result is reflected in CC. 1014 void MacroAssembler::testbit(Register r, unsigned int bitPos) { 1015 if (bitPos < 16) { 1016 z_tmll(r, 1U<<bitPos); 1017 } else if (bitPos < 32) { 1018 z_tmlh(r, 1U<<(bitPos-16)); 1019 } else if (bitPos < 48) { 1020 z_tmhl(r, 1U<<(bitPos-32)); 1021 } else if (bitPos < 64) { 1022 z_tmhh(r, 1U<<(bitPos-48)); 1023 } else { 1024 ShouldNotReachHere(); 1025 } 1026 } 1027 1028 void MacroAssembler::prefetch_read(Address a) { 1029 z_pfd(1, a.disp20(), a.indexOrR0(), a.base()); 1030 } 1031 void MacroAssembler::prefetch_update(Address a) { 1032 z_pfd(2, a.disp20(), a.indexOrR0(), a.base()); 1033 } 1034 1035 // Clear a register, i.e. load const zero into reg. 1036 // Return len (in bytes) of generated instruction(s). 1037 // whole_reg: Clear 64 bits if true, 32 bits otherwise. 1038 // set_cc: Use instruction that sets the condition code, if true. 1039 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) { 1040 unsigned int start_off = offset(); 1041 if (whole_reg) { 1042 set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0); 1043 } else { // Only 32bit register. 1044 set_cc ? z_xr(r, r) : z_lhi(r, 0); 1045 } 1046 return offset() - start_off; 1047 } 1048 1049 #ifdef ASSERT 1050 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) { 1051 switch (pattern_len) { 1052 case 1: 1053 pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8); 1054 case 2: 4886 if (VM_Version::has_ExecuteExtensions()) { 4887 z_exrl(Z_R1, MVC_template); 4888 } else { 4889 z_ex(tmp1_reg, 0, Z_R0, Z_R1); 4890 } 4891 4892 bind(done); 4893 4894 BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint"); 4895 4896 int block_end = offset(); 4897 return block_end - block_start; 4898 } 4899 4900 //------------------------------------------------------ 4901 // Special String Intrinsics. Implementation 4902 //------------------------------------------------------ 4903 4904 // Intrinsics for CompactStrings 4905 4906 // Compress char[] to byte[]. 4907 // Restores: src, dst 4908 // Uses: cnt 4909 // Kills: tmp, Z_R0, Z_R1. 4910 // Early clobber: result. 4911 // Note: 4912 // cnt is signed int. Do not rely on high word! 4913 // counts # characters, not bytes. 4914 // The result is the number of characters copied before the first incompatible character was found. 4915 // If precise is true, the processing stops exactly at this point. Otherwise, the result may be off 4916 // by a few bytes. The result always indicates the number of copied characters. 4917 // 4918 // Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure: 4919 // - Different number of characters may have been written to dead array (if precise is false). 4920 // - Returns a number <cnt instead of 0. (Result gets compared with cnt.) 4921 unsigned int MacroAssembler::string_compress(Register result, Register src, Register dst, Register cnt, 4922 Register tmp, bool precise) { 4923 assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp); 4924 4925 if (precise) { 4926 BLOCK_COMMENT("encode_iso_array {"); 4927 } else { 4928 BLOCK_COMMENT("string_compress {"); 4929 } 4930 int block_start = offset(); 4931 4932 Register Rsrc = src; 4933 Register Rdst = dst; 4934 Register Rix = tmp; 4935 Register Rcnt = cnt; 4936 Register Rmask = result; // holds incompatibility check mask until result value is stored. 4937 Label ScalarShortcut, AllDone; 4938 4939 z_iilf(Rmask, 0xFF00FF00); 4940 z_iihf(Rmask, 0xFF00FF00); 4941 4942 #if 0 // Sacrifice shortcuts for code compactness 4943 { 4944 //---< shortcuts for short strings (very frequent) >--- 4945 // Strings with 4 and 8 characters were fond to occur very frequently. 4946 // Therefore, we handle them right away with minimal overhead. 4947 Label skipShortcut, skip4Shortcut, skip8Shortcut; 4948 Register Rout = Z_R0; 4949 z_chi(Rcnt, 4); 4950 z_brne(skip4Shortcut); // 4 characters are very frequent 4951 z_lg(Z_R0, 0, Rsrc); // Treat exactly 4 characters specially. 4952 if (VM_Version::has_DistinctOpnds()) { 4953 Rout = Z_R0; 4954 z_ngrk(Rix, Z_R0, Rmask); 4955 } else { 4956 Rout = Rix; 4957 z_lgr(Rix, Z_R0); 4958 z_ngr(Z_R0, Rmask); 4959 } 4960 z_brnz(skipShortcut); 4961 z_stcmh(Rout, 5, 0, Rdst); 4962 z_stcm(Rout, 5, 2, Rdst); 4963 z_lgfr(result, Rcnt); 4964 z_bru(AllDone); 4965 bind(skip4Shortcut); 4966 4967 z_chi(Rcnt, 8); 4968 z_brne(skip8Shortcut); // There's more to do... 4969 z_lmg(Z_R0, Z_R1, 0, Rsrc); // Treat exactly 8 characters specially. 4970 if (VM_Version::has_DistinctOpnds()) { 4971 Rout = Z_R0; 4972 z_ogrk(Rix, Z_R0, Z_R1); 4973 z_ngr(Rix, Rmask); 4974 } else { 4975 Rout = Rix; 4976 z_lgr(Rix, Z_R0); 4977 z_ogr(Z_R0, Z_R1); 4978 z_ngr(Z_R0, Rmask); 4979 } 4980 z_brnz(skipShortcut); 4981 z_stcmh(Rout, 5, 0, Rdst); 4982 z_stcm(Rout, 5, 2, Rdst); 4983 z_stcmh(Z_R1, 5, 4, Rdst); 4984 z_stcm(Z_R1, 5, 6, Rdst); 4985 z_lgfr(result, Rcnt); 4986 z_bru(AllDone); 4987 4988 bind(skip8Shortcut); 4989 clear_reg(Z_R0, true, false); // #characters already processed (none). Precond for scalar loop. 4990 z_brl(ScalarShortcut); // Just a few characters 4991 4992 bind(skipShortcut); 4993 } 4994 #endif 4995 clear_reg(Z_R0); // make sure register is properly initialized. 4996 4997 if (VM_Version::has_VectorFacility()) { 4998 const int min_vcnt = 32; // Minimum #characters required to use vector instructions. 4999 // Otherwise just do nothing in vector mode. 5000 // Must be multiple of 2*(vector register length in chars (8 HW = 128 bits)). 5001 const int log_min_vcnt = exact_log2(min_vcnt); 5002 Label VectorLoop, VectorDone, VectorBreak; 5003 5004 VectorRegister Vtmp1 = Z_V16; 5005 VectorRegister Vtmp2 = Z_V17; 5006 VectorRegister Vmask = Z_V18; 5007 VectorRegister Vzero = Z_V19; 5008 VectorRegister Vsrc_first = Z_V20; 5009 VectorRegister Vsrc_last = Z_V23; 5010 5011 assert((Vsrc_last->encoding() - Vsrc_first->encoding() + 1) == min_vcnt/8, "logic error"); 5012 assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()"); 5013 z_srak(Rix, Rcnt, log_min_vcnt); // # vector loop iterations 5014 z_brz(VectorDone); // not enough data for vector loop 5015 5016 z_vzero(Vzero); // all zeroes 5017 z_vgmh(Vmask, 0, 7); // generate 0xff00 mask for all 2-byte elements 5018 z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop 5019 5020 bind(VectorLoop); 5021 z_vlm(Vsrc_first, Vsrc_last, 0, Rsrc); 5022 add2reg(Rsrc, min_vcnt*2); 5023 5024 //---< check for incompatible character >--- 5025 z_vo(Vtmp1, Z_V20, Z_V21); 5026 z_vo(Vtmp2, Z_V22, Z_V23); 5027 z_vo(Vtmp1, Vtmp1, Vtmp2); 5028 z_vn(Vtmp1, Vtmp1, Vmask); 5029 z_vceqhs(Vtmp1, Vtmp1, Vzero); // high half of all chars must be zero for successful compress. 5030 z_brne(VectorBreak); // break vector loop, incompatible character found. 5031 // re-process data from current iteration in break handler. 5032 5033 //---< pack & store characters >--- 5034 z_vpkh(Vtmp1, Z_V20, Z_V21); // pack (src1, src2) -> tmp1 5035 z_vpkh(Vtmp2, Z_V22, Z_V23); // pack (src3, src4) -> tmp2 5036 z_vstm(Vtmp1, Vtmp2, 0, Rdst); // store packed string 5037 add2reg(Rdst, min_vcnt); 5038 5039 z_brct(Rix, VectorLoop); 5040 5041 z_bru(VectorDone); 5042 5043 bind(VectorBreak); 5044 z_sll(Rix, log_min_vcnt); // # chars processed so far in VectorLoop, excl. current iteration. 5045 z_sr(Z_R0, Rix); // correct # chars processed in total. 5046 5047 bind(VectorDone); 5048 } 5049 5050 { 5051 const int min_cnt = 8; // Minimum #characters required to use unrolled loop. 5052 // Otherwise just do nothing in unrolled loop. 5053 // Must be multiple of 8. 5054 const int log_min_cnt = exact_log2(min_cnt); 5055 Label UnrolledLoop, UnrolledDone, UnrolledBreak; 5056 5057 if (VM_Version::has_DistinctOpnds()) { 5058 z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop 5059 } else { 5060 z_lr(Rix, Rcnt); 5061 z_sr(Rix, Z_R0); 5062 } 5063 z_sra(Rix, log_min_cnt); // unrolled loop count 5064 z_brz(UnrolledDone); 5065 5066 bind(UnrolledLoop); 5067 z_lmg(Z_R0, Z_R1, 0, Rsrc); 5068 if (precise) { 5069 z_ogr(Z_R1, Z_R0); // check all 8 chars for incompatibility 5070 z_ngr(Z_R1, Rmask); 5071 z_brnz(UnrolledBreak); 5072 5073 z_lg(Z_R1, 8, Rsrc); // reload destroyed register 5074 z_stcmh(Z_R0, 5, 0, Rdst); 5075 z_stcm(Z_R0, 5, 2, Rdst); 5076 } else { 5077 z_stcmh(Z_R0, 5, 0, Rdst); 5078 z_stcm(Z_R0, 5, 2, Rdst); 5079 5080 z_ogr(Z_R0, Z_R1); 5081 z_ngr(Z_R0, Rmask); 5082 z_brnz(UnrolledBreak); 5083 } 5084 z_stcmh(Z_R1, 5, 4, Rdst); 5085 z_stcm(Z_R1, 5, 6, Rdst); 5086 5087 add2reg(Rsrc, min_cnt*2); 5088 add2reg(Rdst, min_cnt); 5089 z_brct(Rix, UnrolledLoop); 5090 5091 z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop. 5092 z_nilf(Z_R0, ~(min_cnt-1)); 5093 z_tmll(Rcnt, min_cnt-1); 5094 z_brnaz(ScalarShortcut); // if all bits zero, there is nothing left to do for scalar loop. 5095 // Rix == 0 in all cases. 5096 z_lgfr(result, Rcnt); // all characters processed. 5097 z_sgfr(Rdst, Rcnt); // restore ptr 5098 z_sgfr(Rsrc, Rcnt); // restore ptr, double the element count for Rsrc restore 5099 z_sgfr(Rsrc, Rcnt); 5100 z_bru(AllDone); 5101 5102 bind(UnrolledBreak); 5103 z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop 5104 z_nilf(Z_R0, ~(min_cnt-1)); 5105 z_sll(Rix, log_min_cnt); // # chars processed so far in UnrolledLoop, excl. current iteration. 5106 z_sr(Z_R0, Rix); // correct # chars processed in total. 5107 if (!precise) { 5108 z_lgfr(result, Z_R0); 5109 z_aghi(result, min_cnt/2); // min_cnt/2 characters have already been written 5110 // but ptrs were not updated yet. 5111 z_sgfr(Rdst, Z_R0); // restore ptr 5112 z_sgfr(Rsrc, Z_R0); // restore ptr, double the element count for Rsrc restore 5113 z_sgfr(Rsrc, Z_R0); 5114 z_bru(AllDone); 5115 } 5116 bind(UnrolledDone); 5117 } 5118 5119 { 5120 Label ScalarLoop, ScalarDone, ScalarBreak; 5121 5122 bind(ScalarShortcut); 5123 z_ltgfr(result, Rcnt); 5124 z_brz(AllDone); 5125 5126 #if 0 // Sacrifice shortcuts for code compactness 5127 { 5128 //---< Special treatment for very short strings (one or two characters) >--- 5129 // For these strings, we are sure that the above code was skipped. 5130 // Thus, no registers were modified, register restore is not required. 5131 Label ScalarDoit, Scalar2Char; 5132 z_chi(Rcnt, 2); 5133 z_brh(ScalarDoit); 5134 z_llh(Z_R1, 0, Z_R0, Rsrc); 5135 z_bre(Scalar2Char); 5136 z_tmll(Z_R1, 0xff00); 5137 z_lghi(result, 0); // cnt == 1, first char invalid, no chars successfully processed 5138 z_brnaz(AllDone); 5139 z_stc(Z_R1, 0, Z_R0, Rdst); 5140 z_lghi(result, 1); 5141 z_bru(AllDone); 5142 5143 bind(Scalar2Char); 5144 z_llh(Z_R0, 2, Z_R0, Rsrc); 5145 z_tmll(Z_R1, 0xff00); 5146 z_lghi(result, 0); // cnt == 2, first char invalid, no chars successfully processed 5147 z_brnaz(AllDone); 5148 z_stc(Z_R1, 0, Z_R0, Rdst); 5149 z_tmll(Z_R0, 0xff00); 5150 z_lghi(result, 1); // cnt == 2, second char invalid, one char successfully processed 5151 z_brnaz(AllDone); 5152 z_stc(Z_R0, 1, Z_R0, Rdst); 5153 z_lghi(result, 2); 5154 z_bru(AllDone); 5155 5156 bind(ScalarDoit); 5157 } 5158 #endif 5159 5160 if (VM_Version::has_DistinctOpnds()) { 5161 z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop 5162 } else { 5163 z_lr(Rix, Rcnt); 5164 z_sr(Rix, Z_R0); 5165 } 5166 z_lgfr(result, Rcnt); // # processed characters (if all runs ok). 5167 z_brz(ScalarDone); 5168 5169 bind(ScalarLoop); 5170 z_llh(Z_R1, 0, Z_R0, Rsrc); 5171 z_tmll(Z_R1, 0xff00); 5172 z_brnaz(ScalarBreak); 5173 z_stc(Z_R1, 0, Z_R0, Rdst); 5174 add2reg(Rsrc, 2); 5175 add2reg(Rdst, 1); 5176 z_brct(Rix, ScalarLoop); 5177 5178 z_bru(ScalarDone); 5179 5180 bind(ScalarBreak); 5181 z_sr(result, Rix); 5182 5183 bind(ScalarDone); 5184 z_sgfr(Rdst, result); // restore ptr 5185 z_sgfr(Rsrc, result); // restore ptr, double the element count for Rsrc restore 5186 z_sgfr(Rsrc, result); 5187 } 5188 bind(AllDone); 5189 5190 if (precise) { 5191 BLOCK_COMMENT("} encode_iso_array"); 5192 } else { 5193 BLOCK_COMMENT("} string_compress"); 5194 } 5195 return offset() - block_start; 5196 } 5197 5198 // Inflate byte[] to char[]. 5199 unsigned int MacroAssembler::string_inflate_trot(Register src, Register dst, Register cnt, Register tmp) { 5200 int block_start = offset(); 5201 5202 BLOCK_COMMENT("string_inflate {"); 5203 5204 Register stop_char = Z_R0; 5205 Register table = Z_R1; 5206 Register src_addr = tmp; 5207 5208 assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt); 5209 assert(dst->encoding()%2 == 0, "must be even reg"); 5210 assert(cnt->encoding()%2 == 1, "must be odd reg"); 5211 assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair"); 5212 5213 StubRoutines::zarch::generate_load_trot_table_addr(this, table); // kills Z_R0 (if ASSERT) 5214 clear_reg(stop_char); // Stop character. Not used here, but initialized to have a defined value. 5215 lgr_if_needed(src_addr, src); 5216 z_llgfr(cnt, cnt); // # src characters, must be a positive simm32. 5217 5218 translate_ot(dst, src_addr, /* mask = */ 0x0001); 5219 5220 BLOCK_COMMENT("} string_inflate"); 5221 5222 return offset() - block_start; 5223 } 5224 5225 // Inflate byte[] to char[]. 5226 // Restores: src, dst 5227 // Uses: cnt 5228 // Kills: tmp, Z_R0, Z_R1. 5229 // Note: 5230 // cnt is signed int. Do not rely on high word! 5231 // counts # characters, not bytes. 5232 unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) { 5233 assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp); 5234 5235 BLOCK_COMMENT("string_inflate {"); 5236 int block_start = offset(); 5237 5238 Register Rcnt = cnt; // # characters (src: bytes, dst: char (2-byte)), remaining after current loop. 5239 Register Rix = tmp; // loop index 5240 Register Rsrc = src; // addr(src array) 5241 Register Rdst = dst; // addr(dst array) 5242 Label ScalarShortcut, AllDone; 5243 5244 #if 0 // Sacrifice shortcuts for code compactness 5245 { 5246 //---< shortcuts for short strings (very frequent) >--- 5247 Label skipShortcut, skip4Shortcut; 5248 z_ltr(Rcnt, Rcnt); // absolutely nothing to do for strings of len == 0. 5249 z_brz(AllDone); 5250 clear_reg(Z_R0); // make sure registers are properly initialized. 5251 clear_reg(Z_R1); 5252 z_chi(Rcnt, 4); 5253 z_brne(skip4Shortcut); // 4 characters are very frequent 5254 z_icm(Z_R0, 5, 0, Rsrc); // Treat exactly 4 characters specially. 5255 z_icm(Z_R1, 5, 2, Rsrc); 5256 z_stm(Z_R0, Z_R1, 0, Rdst); 5257 z_bru(AllDone); 5258 bind(skip4Shortcut); 5259 5260 z_chi(Rcnt, 8); 5261 z_brh(skipShortcut); // There's a lot to do... 5262 z_lgfr(Z_R0, Rcnt); // remaining #characters (<= 8). Precond for scalar loop. 5263 // This does not destroy the "register cleared" state of Z_R0. 5264 z_brl(ScalarShortcut); // Just a few characters 5265 z_icmh(Z_R0, 5, 0, Rsrc); // Treat exactly 8 characters specially. 5266 z_icmh(Z_R1, 5, 4, Rsrc); 5267 z_icm(Z_R0, 5, 2, Rsrc); 5268 z_icm(Z_R1, 5, 6, Rsrc); 5269 z_stmg(Z_R0, Z_R1, 0, Rdst); 5270 z_bru(AllDone); 5271 bind(skipShortcut); 5272 } 5273 #endif 5274 clear_reg(Z_R0); // make sure register is properly initialized. 5275 5276 if (VM_Version::has_VectorFacility()) { 5277 const int min_vcnt = 32; // Minimum #characters required to use vector instructions. 5278 // Otherwise just do nothing in vector mode. 5279 // Must be multiple of vector register length (16 bytes = 128 bits). 5280 const int log_min_vcnt = exact_log2(min_vcnt); 5281 Label VectorLoop, VectorDone; 5282 5283 assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()"); 5284 z_srak(Rix, Rcnt, log_min_vcnt); // calculate # vector loop iterations 5285 z_brz(VectorDone); // skip if none 5286 5287 z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop 5288 5289 bind(VectorLoop); 5290 z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte) 5291 add2reg(Rsrc, min_vcnt); 5292 5293 z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high) 5294 z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low) 5295 z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high) 5296 z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low) 5297 z_vstm(Z_V22, Z_V25, 0, Rdst); // store next 32 bytes 5298 add2reg(Rdst, min_vcnt*2); 5299 5300 z_brct(Rix, VectorLoop); 5301 5302 bind(VectorDone); 5303 } 5304 5305 const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop. 5306 // Otherwise just do nothing in unrolled scalar mode. 5307 // Must be multiple of 8. 5308 { 5309 const int log_min_cnt = exact_log2(min_cnt); 5310 Label UnrolledLoop, UnrolledDone; 5311 5312 5313 if (VM_Version::has_DistinctOpnds()) { 5314 z_srk(Rix, Rcnt, Z_R0); // remaining # chars to process in unrolled loop 5315 } else { 5316 z_lr(Rix, Rcnt); 5317 z_sr(Rix, Z_R0); 5318 } 5319 z_sra(Rix, log_min_cnt); // unrolled loop count 5320 z_brz(UnrolledDone); 5321 5322 clear_reg(Z_R0); 5323 clear_reg(Z_R1); 5324 5325 bind(UnrolledLoop); 5326 z_icmh(Z_R0, 5, 0, Rsrc); 5327 z_icmh(Z_R1, 5, 4, Rsrc); 5328 z_icm(Z_R0, 5, 2, Rsrc); 5329 z_icm(Z_R1, 5, 6, Rsrc); 5330 add2reg(Rsrc, min_cnt); 5331 5332 z_stmg(Z_R0, Z_R1, 0, Rdst); 5333 5334 add2reg(Rdst, min_cnt*2); 5335 z_brct(Rix, UnrolledLoop); 5336 5337 bind(UnrolledDone); 5338 z_lgfr(Z_R0, Rcnt); // # chars left over after unrolled loop. 5339 z_nilf(Z_R0, min_cnt-1); 5340 z_brnz(ScalarShortcut); // if zero, there is nothing left to do for scalar loop. 5341 // Rix == 0 in all cases. 5342 z_sgfr(Z_R0, Rcnt); // negative # characters the ptrs have been advanced previously. 5343 z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore. 5344 z_agr(Rdst, Z_R0); 5345 z_agr(Rsrc, Z_R0); // restore ptr. 5346 z_bru(AllDone); 5347 } 5348 5349 { 5350 bind(ScalarShortcut); 5351 // Z_R0 must contain remaining # characters as 64-bit signed int here. 5352 // register contents is preserved over scalar processing (for register fixup). 5353 5354 #if 0 // Sacrifice shortcuts for code compactness 5355 { 5356 Label ScalarDefault; 5357 z_chi(Rcnt, 2); 5358 z_brh(ScalarDefault); 5359 z_llc(Z_R0, 0, Z_R0, Rsrc); // 6 bytes 5360 z_sth(Z_R0, 0, Z_R0, Rdst); // 4 bytes 5361 z_brl(AllDone); 5362 z_llc(Z_R0, 1, Z_R0, Rsrc); // 6 bytes 5363 z_sth(Z_R0, 2, Z_R0, Rdst); // 4 bytes 5364 z_bru(AllDone); 5365 bind(ScalarDefault); 5366 } 5367 #endif 5368 5369 Label CodeTable; 5370 // Some comments on Rix calculation: 5371 // - Rcnt is small, therefore no bits shifted out of low word (sll(g) instructions). 5372 // - high word of both Rix and Rcnt may contain garbage 5373 // - the final lngfr takes care of that garbage, extending the sign to high word 5374 z_sllg(Rix, Z_R0, 2); // calculate 10*Rix = (4*Rix + Rix)*2 5375 z_ar(Rix, Z_R0); 5376 z_larl(Z_R1, CodeTable); 5377 z_sll(Rix, 1); 5378 z_lngfr(Rix, Rix); // ix range: [0..7], after inversion & mult: [-(7*12)..(0*12)]. 5379 z_bc(Assembler::bcondAlways, 0, Rix, Z_R1); 5380 5381 z_llc(Z_R1, 6, Z_R0, Rsrc); // 6 bytes 5382 z_sth(Z_R1, 12, Z_R0, Rdst); // 4 bytes 5383 5384 z_llc(Z_R1, 5, Z_R0, Rsrc); 5385 z_sth(Z_R1, 10, Z_R0, Rdst); 5386 5387 z_llc(Z_R1, 4, Z_R0, Rsrc); 5388 z_sth(Z_R1, 8, Z_R0, Rdst); 5389 5390 z_llc(Z_R1, 3, Z_R0, Rsrc); 5391 z_sth(Z_R1, 6, Z_R0, Rdst); 5392 5393 z_llc(Z_R1, 2, Z_R0, Rsrc); 5394 z_sth(Z_R1, 4, Z_R0, Rdst); 5395 5396 z_llc(Z_R1, 1, Z_R0, Rsrc); 5397 z_sth(Z_R1, 2, Z_R0, Rdst); 5398 5399 z_llc(Z_R1, 0, Z_R0, Rsrc); 5400 z_sth(Z_R1, 0, Z_R0, Rdst); 5401 bind(CodeTable); 5402 5403 z_chi(Rcnt, 8); // no fixup for small strings. Rdst, Rsrc were not modified. 5404 z_brl(AllDone); 5405 5406 z_sgfr(Z_R0, Rcnt); // # characters the ptrs have been advanced previously. 5407 z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore. 5408 z_agr(Rdst, Z_R0); 5409 z_agr(Rsrc, Z_R0); // restore ptr. 5410 } 5411 bind(AllDone); 5412 5413 BLOCK_COMMENT("} string_inflate"); 5414 return offset() - block_start; 5415 } 5416 5417 // Inflate byte[] to char[], length known at compile time. 5418 // Restores: src, dst 5419 // Kills: tmp, Z_R0, Z_R1. 5420 // Note: 5421 // len is signed int. Counts # characters, not bytes. 5422 unsigned int MacroAssembler::string_inflate_const(Register src, Register dst, Register tmp, int len) { 5423 assert_different_registers(Z_R0, Z_R1, src, dst, tmp); 5424 5425 BLOCK_COMMENT("string_inflate_const {"); 5426 int block_start = offset(); 5427 5428 Register Rix = tmp; // loop index 5429 Register Rsrc = src; // addr(src array) 5430 Register Rdst = dst; // addr(dst array) 5431 Label ScalarShortcut, AllDone; 5432 int nprocessed = 0; 5433 int src_off = 0; // compensate for saved (optimized away) ptr advancement. 5434 int dst_off = 0; // compensate for saved (optimized away) ptr advancement. 5435 bool restore_inputs = false; 5436 bool workreg_clear = false; 5437 5438 if ((len >= 32) && VM_Version::has_VectorFacility()) { 5439 const int min_vcnt = 32; // Minimum #characters required to use vector instructions. 5440 // Otherwise just do nothing in vector mode. 5441 // Must be multiple of vector register length (16 bytes = 128 bits). 5442 const int log_min_vcnt = exact_log2(min_vcnt); 5443 const int iterations = (len - nprocessed) >> log_min_vcnt; 5444 nprocessed += iterations << log_min_vcnt; 5445 Label VectorLoop; 5446 5447 if (iterations == 1) { 5448 z_vlm(Z_V20, Z_V21, 0+src_off, Rsrc); // get next 32 characters (single-byte) 5449 z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high) 5450 z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low) 5451 z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high) 5452 z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low) 5453 z_vstm(Z_V22, Z_V25, 0+dst_off, Rdst); // store next 32 bytes 5454 5455 src_off += min_vcnt; 5456 dst_off += min_vcnt*2; 5457 } else { 5458 restore_inputs = true; 5459 5460 z_lgfi(Rix, len>>log_min_vcnt); 5461 bind(VectorLoop); 5462 z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte) 5463 add2reg(Rsrc, min_vcnt); 5464 5465 z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high) 5466 z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low) 5467 z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high) 5468 z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low) 5469 z_vstm(Z_V22, Z_V25, 0, Rdst); // store next 32 bytes 5470 add2reg(Rdst, min_vcnt*2); 5471 5472 z_brct(Rix, VectorLoop); 5473 } 5474 } 5475 5476 if (((len-nprocessed) >= 16) && VM_Version::has_VectorFacility()) { 5477 const int min_vcnt = 16; // Minimum #characters required to use vector instructions. 5478 // Otherwise just do nothing in vector mode. 5479 // Must be multiple of vector register length (16 bytes = 128 bits). 5480 const int log_min_vcnt = exact_log2(min_vcnt); 5481 const int iterations = (len - nprocessed) >> log_min_vcnt; 5482 nprocessed += iterations << log_min_vcnt; 5483 assert(iterations == 1, "must be!"); 5484 5485 z_vl(Z_V20, 0+src_off, Z_R0, Rsrc); // get next 16 characters (single-byte) 5486 z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high) 5487 z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low) 5488 z_vstm(Z_V22, Z_V23, 0+dst_off, Rdst); // store next 32 bytes 5489 5490 src_off += min_vcnt; 5491 dst_off += min_vcnt*2; 5492 } 5493 5494 if ((len-nprocessed) > 8) { 5495 const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop. 5496 // Otherwise just do nothing in unrolled scalar mode. 5497 // Must be multiple of 8. 5498 const int log_min_cnt = exact_log2(min_cnt); 5499 const int iterations = (len - nprocessed) >> log_min_cnt; 5500 nprocessed += iterations << log_min_cnt; 5501 5502 //---< avoid loop overhead/ptr increment for small # iterations >--- 5503 if (iterations <= 2) { 5504 clear_reg(Z_R0); 5505 clear_reg(Z_R1); 5506 workreg_clear = true; 5507 5508 z_icmh(Z_R0, 5, 0+src_off, Rsrc); 5509 z_icmh(Z_R1, 5, 4+src_off, Rsrc); 5510 z_icm(Z_R0, 5, 2+src_off, Rsrc); 5511 z_icm(Z_R1, 5, 6+src_off, Rsrc); 5512 z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst); 5513 5514 src_off += min_cnt; 5515 dst_off += min_cnt*2; 5516 } 5517 5518 if (iterations == 2) { 5519 z_icmh(Z_R0, 5, 0+src_off, Rsrc); 5520 z_icmh(Z_R1, 5, 4+src_off, Rsrc); 5521 z_icm(Z_R0, 5, 2+src_off, Rsrc); 5522 z_icm(Z_R1, 5, 6+src_off, Rsrc); 5523 z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst); 5524 5525 src_off += min_cnt; 5526 dst_off += min_cnt*2; 5527 } 5528 5529 if (iterations > 2) { 5530 Label UnrolledLoop; 5531 restore_inputs = true; 5532 5533 clear_reg(Z_R0); 5534 clear_reg(Z_R1); 5535 workreg_clear = true; 5536 5537 z_lgfi(Rix, iterations); 5538 bind(UnrolledLoop); 5539 z_icmh(Z_R0, 5, 0, Rsrc); 5540 z_icmh(Z_R1, 5, 4, Rsrc); 5541 z_icm(Z_R0, 5, 2, Rsrc); 5542 z_icm(Z_R1, 5, 6, Rsrc); 5543 add2reg(Rsrc, min_cnt); 5544 5545 z_stmg(Z_R0, Z_R1, 0, Rdst); 5546 add2reg(Rdst, min_cnt*2); 5547 5548 z_brct(Rix, UnrolledLoop); 5549 } 5550 } 5551 5552 if ((len-nprocessed) > 0) { 5553 switch (len-nprocessed) { 5554 case 8: 5555 if (!workreg_clear) { 5556 clear_reg(Z_R0); 5557 clear_reg(Z_R1); 5558 } 5559 z_icmh(Z_R0, 5, 0+src_off, Rsrc); 5560 z_icmh(Z_R1, 5, 4+src_off, Rsrc); 5561 z_icm(Z_R0, 5, 2+src_off, Rsrc); 5562 z_icm(Z_R1, 5, 6+src_off, Rsrc); 5563 z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst); 5564 break; 5565 case 7: 5566 if (!workreg_clear) { 5567 clear_reg(Z_R0); 5568 clear_reg(Z_R1); 5569 } 5570 clear_reg(Rix); 5571 z_icm(Z_R0, 5, 0+src_off, Rsrc); 5572 z_icm(Z_R1, 5, 2+src_off, Rsrc); 5573 z_icm(Rix, 5, 4+src_off, Rsrc); 5574 z_stm(Z_R0, Z_R1, 0+dst_off, Rdst); 5575 z_llc(Z_R0, 6+src_off, Z_R0, Rsrc); 5576 z_st(Rix, 8+dst_off, Z_R0, Rdst); 5577 z_sth(Z_R0, 12+dst_off, Z_R0, Rdst); 5578 break; 5579 case 6: 5580 if (!workreg_clear) { 5581 clear_reg(Z_R0); 5582 clear_reg(Z_R1); 5583 } 5584 clear_reg(Rix); 5585 z_icm(Z_R0, 5, 0+src_off, Rsrc); 5586 z_icm(Z_R1, 5, 2+src_off, Rsrc); 5587 z_icm(Rix, 5, 4+src_off, Rsrc); 5588 z_stm(Z_R0, Z_R1, 0+dst_off, Rdst); 5589 z_st(Rix, 8+dst_off, Z_R0, Rdst); 5590 break; 5591 case 5: 5592 if (!workreg_clear) { 5593 clear_reg(Z_R0); 5594 clear_reg(Z_R1); 5595 } 5596 z_icm(Z_R0, 5, 0+src_off, Rsrc); 5597 z_icm(Z_R1, 5, 2+src_off, Rsrc); 5598 z_llc(Rix, 4+src_off, Z_R0, Rsrc); 5599 z_stm(Z_R0, Z_R1, 0+dst_off, Rdst); 5600 z_sth(Rix, 8+dst_off, Z_R0, Rdst); 5601 break; 5602 case 4: 5603 if (!workreg_clear) { 5604 clear_reg(Z_R0); 5605 clear_reg(Z_R1); 5606 } 5607 z_icm(Z_R0, 5, 0+src_off, Rsrc); 5608 z_icm(Z_R1, 5, 2+src_off, Rsrc); 5609 z_stm(Z_R0, Z_R1, 0+dst_off, Rdst); 5610 break; 5611 case 3: 5612 if (!workreg_clear) { 5613 clear_reg(Z_R0); 5614 } 5615 z_llc(Z_R1, 2+src_off, Z_R0, Rsrc); 5616 z_icm(Z_R0, 5, 0+src_off, Rsrc); 5617 z_sth(Z_R1, 4+dst_off, Z_R0, Rdst); 5618 z_st(Z_R0, 0+dst_off, Rdst); 5619 break; 5620 case 2: 5621 z_llc(Z_R0, 0+src_off, Z_R0, Rsrc); 5622 z_llc(Z_R1, 1+src_off, Z_R0, Rsrc); 5623 z_sth(Z_R0, 0+dst_off, Z_R0, Rdst); 5624 z_sth(Z_R1, 2+dst_off, Z_R0, Rdst); 5625 break; 5626 case 1: 5627 z_llc(Z_R0, 0+src_off, Z_R0, Rsrc); 5628 z_sth(Z_R0, 0+dst_off, Z_R0, Rdst); 5629 break; 5630 default: 5631 guarantee(false, "Impossible"); 5632 break; 5633 } 5634 src_off += len-nprocessed; 5635 dst_off += (len-nprocessed)*2; 5636 nprocessed = len; 5637 } 5638 5639 //---< restore modified input registers >--- 5640 if ((nprocessed > 0) && restore_inputs) { 5641 z_agfi(Rsrc, -(nprocessed-src_off)); 5642 if (nprocessed < 1000000000) { // avoid int overflow 5643 z_agfi(Rdst, -(nprocessed*2-dst_off)); 5644 } else { 5645 z_agfi(Rdst, -(nprocessed-dst_off)); 5646 z_agfi(Rdst, -nprocessed); 5647 } 5648 } 5649 5650 BLOCK_COMMENT("} string_inflate_const"); 5651 return offset() - block_start; 5652 } 5653 5654 // Kills src. 5655 unsigned int MacroAssembler::has_negatives(Register result, Register src, Register cnt, 5656 Register odd_reg, Register even_reg, Register tmp) { 5657 int block_start = offset(); 5658 Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone; 5659 const Register addr = src, mask = tmp; 5660 5661 BLOCK_COMMENT("has_negatives {"); 5662 5663 z_llgfr(Z_R1, cnt); // Number of bytes to read. (Must be a positive simm32.) 5664 z_llilf(mask, 0x80808080); 5665 z_lhi(result, 1); // Assume true. 5666 // Last possible addr for fast loop. 5667 z_lay(odd_reg, -16, Z_R1, src); 5668 z_chi(cnt, 16); 5669 z_brl(Lslow); 5670 |