--- old/src/hotspot/cpu/s390/assembler_s390.hpp 2017-11-14 17:21:42.003212000 +0100 +++ new/src/hotspot/cpu/s390/assembler_s390.hpp 2017-11-14 17:21:41.624204000 +0100 @@ -582,7 +582,11 @@ #define LOC_ZOPC (unsigned long)(0xebL << 40 | 0xf2L) // z196 #define LOCG_ZOPC (unsigned long)(0xebL << 40 | 0xe2L) // z196 -#define LMG_ZOPC (unsigned long)(235L << 40 | 4L) + +// LOAD multiple registers at once +#define LM_ZOPC (unsigned int)(0x98 << 24) +#define LMY_ZOPC (unsigned long)(0xebL << 40 | 0x98L) +#define LMG_ZOPC (unsigned long)(0xebL << 40 | 0x04L) #define LE_ZOPC (unsigned int)(0x78 << 24) #define LEY_ZOPC (unsigned long)(237L << 40 | 100L) @@ -613,7 +617,10 @@ #define STOC_ZOPC (unsigned long)(0xebL << 40 | 0xf3L) // z196 #define STOCG_ZOPC (unsigned long)(0xebL << 40 | 0xe3L) // z196 -#define STMG_ZOPC (unsigned long)(235L << 40 | 36L) +// STORE multiple registers at once +#define STM_ZOPC (unsigned int)(0x90 << 24) +#define STMY_ZOPC (unsigned long)(0xebL << 40 | 0x90L) +#define STMG_ZOPC (unsigned long)(0xebL << 40 | 0x24L) #define STE_ZOPC (unsigned int)(0x70 << 24) #define STEY_ZOPC (unsigned long)(237L << 40 | 102L) @@ -874,15 +881,19 @@ // Shift // arithmetic -#define SLA_ZOPC (unsigned int)(139 << 24) -#define SLAG_ZOPC (unsigned long)(235L << 40 | 11L) -#define SRA_ZOPC (unsigned int)(138 << 24) -#define SRAG_ZOPC (unsigned long)(235L << 40 | 10L) +#define SLA_ZOPC (unsigned int)(0x8b << 24) +#define SLAK_ZOPC (unsigned long)(0xebL << 40 | 0xddL) +#define SLAG_ZOPC (unsigned long)(0xebL << 40 | 0x0bL) +#define SRA_ZOPC (unsigned int)(0x8a << 24) +#define SRAK_ZOPC (unsigned long)(0xebL << 40 | 0xdcL) +#define SRAG_ZOPC (unsigned long)(0xebL << 40 | 0x0aL) // logical -#define SLL_ZOPC (unsigned int)(137 << 24) -#define SLLG_ZOPC (unsigned long)(235L << 40 | 13L) -#define SRL_ZOPC (unsigned int)(136 << 24) -#define SRLG_ZOPC (unsigned long)(235L << 40 | 12L) +#define SLL_ZOPC (unsigned int)(0x89 << 24) +#define SLLK_ZOPC (unsigned long)(0xebL << 40 | 0xdfL) +#define SLLG_ZOPC (unsigned long)(0xebL << 40 | 0x0dL) +#define SRL_ZOPC (unsigned int)(0x88 << 24) +#define SRLK_ZOPC (unsigned long)(0xebL << 40 | 0xdeL) +#define SRLG_ZOPC (unsigned long)(0xebL << 40 | 0x0cL) // Rotate, then AND/XOR/OR/insert // rotate @@ -2262,12 +2273,16 @@ // shift inline void z_sla( Register r1, int64_t d2, Register b2=Z_R0); // shift left r1 = r1 << ((d2+b2)&0x3f) ; int32, only 31 bits shifted, sign preserved! + inline void z_slak(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int32, only 31 bits shifted, sign preserved! inline void z_slag(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int64, only 63 bits shifted, sign preserved! inline void z_sra( Register r1, int64_t d2, Register b2=Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, sign extended + inline void z_srak(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int32, sign extended inline void z_srag(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, sign extended inline void z_sll( Register r1, int64_t d2, Register b2=Z_R0); // shift left r1 = r1 << ((d2+b2)&0x3f) ; int32, zeros added + inline void z_sllk(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int32, zeros added inline void z_sllg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int64, zeros added inline void z_srl( Register r1, int64_t d2, Register b2=Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, zero extended + inline void z_srlk(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int32, zero extended inline void z_srlg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, zero extended // rotate @@ -3035,7 +3050,11 @@ inline void z_tam(); inline void z_stckf(int64_t d2, Register b2); + inline void z_stm( Register r1, Register r3, int64_t d2, Register b2); + inline void z_stmy(Register r1, Register r3, int64_t d2, Register b2); inline void z_stmg(Register r1, Register r3, int64_t d2, Register b2); + inline void z_lm( Register r1, Register r3, int64_t d2, Register b2); + inline void z_lmy(Register r1, Register r3, int64_t d2, Register b2); inline void z_lmg(Register r1, Register r3, int64_t d2, Register b2); inline void z_cs( Register r1, Register r3, int64_t d2, Register b2); --- old/src/hotspot/cpu/s390/assembler_s390.inline.hpp 2017-11-14 17:21:43.550231000 +0100 +++ new/src/hotspot/cpu/s390/assembler_s390.inline.hpp 2017-11-14 17:21:43.173226000 +0100 @@ -334,12 +334,16 @@ // SHIFT/RORATE OPERATIONS //----------------------------------- inline void Assembler::z_sla( Register r1, int64_t d2, Register b2) { emit_32( SLA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_slak(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLAK_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } inline void Assembler::z_slag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLAG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } inline void Assembler::z_sra( Register r1, int64_t d2, Register b2) { emit_32( SRA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_srak(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRAK_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } inline void Assembler::z_srag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRAG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } inline void Assembler::z_sll( Register r1, int64_t d2, Register b2) { emit_32( SLL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_sllk(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLLK_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } inline void Assembler::z_sllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } inline void Assembler::z_srl( Register r1, int64_t d2, Register b2) { emit_32( SRL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_srlk(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRLK_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } inline void Assembler::z_srlg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } // rotate left @@ -690,10 +694,14 @@ inline void Assembler::z_tam() { emit_16( TAM_ZOPC); } inline void Assembler::z_stckf(int64_t d2, Register b2) { emit_32( STCKF_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); } -inline void Assembler::z_stmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); } -inline void Assembler::z_lmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); } +inline void Assembler::z_stm( Register r1, Register r3, int64_t d2, Register b2) { emit_32( STM_ZOPC | reg(r1, 8, 32) | reg(r3,12,32)| reg(b2,16,32) | uimm12(d2, 20,32)); } +inline void Assembler::z_stmy(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMY_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) | simm20(d2) ); } +inline void Assembler::z_stmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMG_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) | simm20(d2) ); } +inline void Assembler::z_lm( Register r1, Register r3, int64_t d2, Register b2) { emit_32( LM_ZOPC | reg(r1, 8, 32) | reg(r3,12,32)| reg(b2,16,32) | uimm12(d2, 20,32)); } +inline void Assembler::z_lmy( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMY_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) | simm20(d2) ); } +inline void Assembler::z_lmg( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMG_ZOPC | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) | simm20(d2) ); } -inline void Assembler::z_cs(Register r1, Register r3, int64_t d2, Register b2) { emit_32( CS_ZOPC | regt(r1, 8, 32) | reg(r3, 12, 32) | reg(b2, 16, 32) | uimm12(d2, 20, 32)); } +inline void Assembler::z_cs( Register r1, Register r3, int64_t d2, Register b2) { emit_32( CS_ZOPC | regt(r1, 8, 32) | reg(r3, 12, 32) | reg(b2, 16, 32) | uimm12(d2, 20, 32)); } inline void Assembler::z_csy(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSY_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | reg(b2, 16, 48) | simm20(d2)); } inline void Assembler::z_csg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | reg(b2, 16, 48) | simm20(d2)); } inline void Assembler::z_cs( Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_cs( r1, r3, a.disp(), a.baseOrR0()); } --- old/src/hotspot/cpu/s390/macroAssembler_s390.cpp 2017-11-14 17:21:45.046243000 +0100 +++ new/src/hotspot/cpu/s390/macroAssembler_s390.cpp 2017-11-14 17:21:44.659256000 +0100 @@ -936,7 +936,7 @@ // Some extra safety net. if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { - guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away"); + guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); } (this)->relocate(rspec, relocInfo::pcrel_addr_format); @@ -956,7 +956,7 @@ // Some extra safety net. if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { - guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away"); + guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance); } (this)->relocate(rspec, relocInfo::pcrel_addr_format); @@ -1025,6 +1025,13 @@ } } +void MacroAssembler::prefetch_read(Address a) { + z_pfd(1, a.disp20(), a.indexOrR0(), a.base()); +} +void MacroAssembler::prefetch_update(Address a) { + z_pfd(2, a.disp20(), a.indexOrR0(), a.base()); +} + // Clear a register, i.e. load const zero into reg. // Return len (in bytes) of generated instruction(s). // whole_reg: Clear 64 bits if true, 32 bits otherwise. @@ -4896,77 +4903,295 @@ // Intrinsics for CompactStrings -// Compress char[] to byte[]. odd_reg contains cnt. Kills dst. Early clobber: result +// Compress char[] to byte[]. +// Restores: src, dst +// Uses: cnt +// Kills: tmp, Z_R0, Z_R1. +// Early clobber: result. +// Note: +// cnt is signed int. Do not rely on high word! +// counts # characters, not bytes. // The result is the number of characters copied before the first incompatible character was found. -// If tmp2 is provided and the compression fails, the compression stops exactly at this point and the result is precise. +// If precise is true, the processing stops exactly at this point. Otherwise, the result may be off +// by a few bytes. The result always indicates the number of copied characters. // // Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure: -// - Different number of characters may have been written to dead array (if tmp2 not provided). +// - Different number of characters may have been written to dead array (if precise is false). // - Returns a number --- + // Strings with 4 and 8 characters were fond to occur very frequently. + // Therefore, we handle them right away with minimal overhead. + Label skipShortcut, skip4Shortcut, skip8Shortcut; + Register Rout = Z_R0; + z_chi(Rcnt, 4); + z_brne(skip4Shortcut); // 4 characters are very frequent + z_lg(Z_R0, 0, Rsrc); // Treat exactly 4 characters specially. + if (VM_Version::has_DistinctOpnds()) { + Rout = Z_R0; + z_ngrk(Rix, Z_R0, Rmask); + } else { + Rout = Rix; + z_lgr(Rix, Z_R0); + z_ngr(Z_R0, Rmask); + } + z_brnz(skipShortcut); + z_stcmh(Rout, 5, 0, Rdst); + z_stcm(Rout, 5, 2, Rdst); + z_lgfr(result, Rcnt); + z_bru(AllDone); + bind(skip4Shortcut); + + z_chi(Rcnt, 8); + z_brne(skip8Shortcut); // There's more to do... + z_lmg(Z_R0, Z_R1, 0, Rsrc); // Treat exactly 8 characters specially. + if (VM_Version::has_DistinctOpnds()) { + Rout = Z_R0; + z_ogrk(Rix, Z_R0, Z_R1); + z_ngr(Rix, Rmask); + } else { + Rout = Rix; + z_lgr(Rix, Z_R0); + z_ogr(Z_R0, Z_R1); + z_ngr(Z_R0, Rmask); + } + z_brnz(skipShortcut); + z_stcmh(Rout, 5, 0, Rdst); + z_stcm(Rout, 5, 2, Rdst); + z_stcmh(Z_R1, 5, 4, Rdst); + z_stcm(Z_R1, 5, 6, Rdst); + z_lgfr(result, Rcnt); + z_bru(AllDone); + + bind(skip8Shortcut); + clear_reg(Z_R0, true, false); // #characters already processed (none). Precond for scalar loop. + z_brl(ScalarShortcut); // Just a few characters + + bind(skipShortcut); + } +#endif + clear_reg(Z_R0); // make sure register is properly initialized. + + if (VM_Version::has_VectorFacility()) { + const int min_vcnt = 32; // Minimum #characters required to use vector instructions. + // Otherwise just do nothing in vector mode. + // Must be multiple of 2*(vector register length in chars (8 HW = 128 bits)). + const int log_min_vcnt = exact_log2(min_vcnt); + Label VectorLoop, VectorDone, VectorBreak; + + VectorRegister Vtmp1 = Z_V16; + VectorRegister Vtmp2 = Z_V17; + VectorRegister Vmask = Z_V18; + VectorRegister Vzero = Z_V19; + VectorRegister Vsrc_first = Z_V20; + VectorRegister Vsrc_last = Z_V23; + + assert((Vsrc_last->encoding() - Vsrc_first->encoding() + 1) == min_vcnt/8, "logic error"); + assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()"); + z_srak(Rix, Rcnt, log_min_vcnt); // # vector loop iterations + z_brz(VectorDone); // not enough data for vector loop + + z_vzero(Vzero); // all zeroes + z_vgmh(Vmask, 0, 7); // generate 0xff00 mask for all 2-byte elements + z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop + + bind(VectorLoop); + z_vlm(Vsrc_first, Vsrc_last, 0, Rsrc); + add2reg(Rsrc, min_vcnt*2); + + //---< check for incompatible character >--- + z_vo(Vtmp1, Z_V20, Z_V21); + z_vo(Vtmp2, Z_V22, Z_V23); + z_vo(Vtmp1, Vtmp1, Vtmp2); + z_vn(Vtmp1, Vtmp1, Vmask); + z_vceqhs(Vtmp1, Vtmp1, Vzero); // high half of all chars must be zero for successful compress. + z_brne(VectorBreak); // break vector loop, incompatible character found. + // re-process data from current iteration in break handler. + + //---< pack & store characters >--- + z_vpkh(Vtmp1, Z_V20, Z_V21); // pack (src1, src2) -> tmp1 + z_vpkh(Vtmp2, Z_V22, Z_V23); // pack (src3, src4) -> tmp2 + z_vstm(Vtmp1, Vtmp2, 0, Rdst); // store packed string + add2reg(Rdst, min_vcnt); + + z_brct(Rix, VectorLoop); + + z_bru(VectorDone); + + bind(VectorBreak); + z_sll(Rix, log_min_vcnt); // # chars processed so far in VectorLoop, excl. current iteration. + z_sr(Z_R0, Rix); // correct # chars processed in total. + + bind(VectorDone); + } + + { + const int min_cnt = 8; // Minimum #characters required to use unrolled loop. + // Otherwise just do nothing in unrolled loop. + // Must be multiple of 8. + const int log_min_cnt = exact_log2(min_cnt); + Label UnrolledLoop, UnrolledDone, UnrolledBreak; + if (VM_Version::has_DistinctOpnds()) { - z_ogrk(tmp2, Z_R0, Z_R1); + z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop } else { - z_lgr(tmp2, Z_R0); - z_ogr(tmp2, Z_R1); + z_lr(Rix, Rcnt); + z_sr(Rix, Z_R0); } - z_ngr(tmp2, mask); - z_brne(Lslow); // Failed fast case, retry slowly. - } - z_stcmh(Z_R0, 5, 0, addr2); - z_stcm(Z_R0, 5, 2, addr2); - if (!precise) { z_ogr(Z_R0, Z_R1); } - z_stcmh(Z_R1, 5, 4, addr2); - z_stcm(Z_R1, 5, 6, addr2); - if (!precise) { - z_ngr(Z_R0, mask); - z_brne(Ldone); // Failed (more than needed was written). - } - z_aghi(addr2, 8); - z_brxle(ind1, even_reg, Lloop1); - - bind(Lslow); - // Compute index limit and skip if negative. - z_ahi(odd_reg, 16-2); // Last possible index for slow loop. - z_lhi(even_reg, 2); - z_cr(ind1, odd_reg); - z_brh(Ldone); - - bind(Lloop2); // 1 Character per iteration. - z_llh(Z_R0, Address(src, ind1)); - z_tmll(Z_R0, 0xFF00); - z_brnaz(Ldone); // Failed slow case: Return number of written characters. - z_stc(Z_R0, Address(addr2)); - z_aghi(addr2, 1); - z_brxle(ind1, even_reg, Lloop2); + z_sra(Rix, log_min_cnt); // unrolled loop count + z_brz(UnrolledDone); - bind(Ldone); // result = ind1 = 2*cnt - z_srl(ind1, 1); + bind(UnrolledLoop); + z_lmg(Z_R0, Z_R1, 0, Rsrc); + if (precise) { + z_ogr(Z_R1, Z_R0); // check all 8 chars for incompatibility + z_ngr(Z_R1, Rmask); + z_brnz(UnrolledBreak); + + z_lg(Z_R1, 8, Rsrc); // reload destroyed register + z_stcmh(Z_R0, 5, 0, Rdst); + z_stcm(Z_R0, 5, 2, Rdst); + } else { + z_stcmh(Z_R0, 5, 0, Rdst); + z_stcm(Z_R0, 5, 2, Rdst); + + z_ogr(Z_R0, Z_R1); + z_ngr(Z_R0, Rmask); + z_brnz(UnrolledBreak); + } + z_stcmh(Z_R1, 5, 4, Rdst); + z_stcm(Z_R1, 5, 6, Rdst); + + add2reg(Rsrc, min_cnt*2); + add2reg(Rdst, min_cnt); + z_brct(Rix, UnrolledLoop); + + z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop. + z_nilf(Z_R0, ~(min_cnt-1)); + z_tmll(Rcnt, min_cnt-1); + z_brnaz(ScalarShortcut); // if all bits zero, there is nothing left to do for scalar loop. + // Rix == 0 in all cases. + z_lgfr(result, Rcnt); // all characters processed. + z_sgfr(Rdst, Rcnt); // restore ptr + z_sgfr(Rsrc, Rcnt); // restore ptr, double the element count for Rsrc restore + z_sgfr(Rsrc, Rcnt); + z_bru(AllDone); + + bind(UnrolledBreak); + z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop + z_nilf(Z_R0, ~(min_cnt-1)); + z_sll(Rix, log_min_cnt); // # chars processed so far in UnrolledLoop, excl. current iteration. + z_sr(Z_R0, Rix); // correct # chars processed in total. + if (!precise) { + z_lgfr(result, Z_R0); + z_aghi(result, min_cnt/2); // min_cnt/2 characters have already been written + // but ptrs were not updated yet. + z_sgfr(Rdst, Z_R0); // restore ptr + z_sgfr(Rsrc, Z_R0); // restore ptr, double the element count for Rsrc restore + z_sgfr(Rsrc, Z_R0); + z_bru(AllDone); + } + bind(UnrolledDone); + } + + { + Label ScalarLoop, ScalarDone, ScalarBreak; + + bind(ScalarShortcut); + z_ltgfr(result, Rcnt); + z_brz(AllDone); + +#if 0 // Sacrifice shortcuts for code compactness + { + //---< Special treatment for very short strings (one or two characters) >--- + // For these strings, we are sure that the above code was skipped. + // Thus, no registers were modified, register restore is not required. + Label ScalarDoit, Scalar2Char; + z_chi(Rcnt, 2); + z_brh(ScalarDoit); + z_llh(Z_R1, 0, Z_R0, Rsrc); + z_bre(Scalar2Char); + z_tmll(Z_R1, 0xff00); + z_lghi(result, 0); // cnt == 1, first char invalid, no chars successfully processed + z_brnaz(AllDone); + z_stc(Z_R1, 0, Z_R0, Rdst); + z_lghi(result, 1); + z_bru(AllDone); + + bind(Scalar2Char); + z_llh(Z_R0, 2, Z_R0, Rsrc); + z_tmll(Z_R1, 0xff00); + z_lghi(result, 0); // cnt == 2, first char invalid, no chars successfully processed + z_brnaz(AllDone); + z_stc(Z_R1, 0, Z_R0, Rdst); + z_tmll(Z_R0, 0xff00); + z_lghi(result, 1); // cnt == 2, second char invalid, one char successfully processed + z_brnaz(AllDone); + z_stc(Z_R0, 1, Z_R0, Rdst); + z_lghi(result, 2); + z_bru(AllDone); + + bind(ScalarDoit); + } +#endif + + if (VM_Version::has_DistinctOpnds()) { + z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop + } else { + z_lr(Rix, Rcnt); + z_sr(Rix, Z_R0); + } + z_lgfr(result, Rcnt); // # processed characters (if all runs ok). + z_brz(ScalarDone); - BLOCK_COMMENT("} string_compress"); + bind(ScalarLoop); + z_llh(Z_R1, 0, Z_R0, Rsrc); + z_tmll(Z_R1, 0xff00); + z_brnaz(ScalarBreak); + z_stc(Z_R1, 0, Z_R0, Rdst); + add2reg(Rsrc, 2); + add2reg(Rdst, 1); + z_brct(Rix, ScalarLoop); + + z_bru(ScalarDone); + + bind(ScalarBreak); + z_sr(result, Rix); + + bind(ScalarDone); + z_sgfr(Rdst, result); // restore ptr + z_sgfr(Rsrc, result); // restore ptr, double the element count for Rsrc restore + z_sgfr(Rsrc, result); + } + bind(AllDone); + if (precise) { + BLOCK_COMMENT("} encode_iso_array"); + } else { + BLOCK_COMMENT("} string_compress"); + } return offset() - block_start; } @@ -4997,53 +5222,432 @@ return offset() - block_start; } -// Inflate byte[] to char[]. odd_reg contains cnt. Kills src. -unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register odd_reg, - Register even_reg, Register tmp) { - int block_start = offset(); +// Inflate byte[] to char[]. +// Restores: src, dst +// Uses: cnt +// Kills: tmp, Z_R0, Z_R1. +// Note: +// cnt is signed int. Do not rely on high word! +// counts # characters, not bytes. +unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) { + assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp); BLOCK_COMMENT("string_inflate {"); + int block_start = offset(); - Label Lloop1, Lloop2, Lslow, Ldone; - const Register addr1 = src, ind2 = tmp; + Register Rcnt = cnt; // # characters (src: bytes, dst: char (2-byte)), remaining after current loop. + Register Rix = tmp; // loop index + Register Rsrc = src; // addr(src array) + Register Rdst = dst; // addr(dst array) + Label ScalarShortcut, AllDone; + +#if 0 // Sacrifice shortcuts for code compactness + { + //---< shortcuts for short strings (very frequent) >--- + Label skipShortcut, skip4Shortcut; + z_ltr(Rcnt, Rcnt); // absolutely nothing to do for strings of len == 0. + z_brz(AllDone); + clear_reg(Z_R0); // make sure registers are properly initialized. + clear_reg(Z_R1); + z_chi(Rcnt, 4); + z_brne(skip4Shortcut); // 4 characters are very frequent + z_icm(Z_R0, 5, 0, Rsrc); // Treat exactly 4 characters specially. + z_icm(Z_R1, 5, 2, Rsrc); + z_stm(Z_R0, Z_R1, 0, Rdst); + z_bru(AllDone); + bind(skip4Shortcut); + + z_chi(Rcnt, 8); + z_brh(skipShortcut); // There's a lot to do... + z_lgfr(Z_R0, Rcnt); // remaining #characters (<= 8). Precond for scalar loop. + // This does not destroy the "register cleared" state of Z_R0. + z_brl(ScalarShortcut); // Just a few characters + z_icmh(Z_R0, 5, 0, Rsrc); // Treat exactly 8 characters specially. + z_icmh(Z_R1, 5, 4, Rsrc); + z_icm(Z_R0, 5, 2, Rsrc); + z_icm(Z_R1, 5, 6, Rsrc); + z_stmg(Z_R0, Z_R1, 0, Rdst); + z_bru(AllDone); + bind(skipShortcut); + } +#endif + clear_reg(Z_R0); // make sure register is properly initialized. - z_sll(odd_reg, 1); // Number of bytes to write. (Must be a positive simm32.) - clear_reg(ind2); // Index to write. - z_ahi(odd_reg, -16); // Last possible index for fast loop. - z_brl(Lslow); - - // ind2: index, even_reg: index increment, odd_reg: index limit - clear_reg(Z_R0); - clear_reg(Z_R1); - z_lhi(even_reg, 16); - - bind(Lloop1); // 8 Characters per iteration. - z_icmh(Z_R0, 5, 0, addr1); - z_icmh(Z_R1, 5, 4, addr1); - z_icm(Z_R0, 5, 2, addr1); - z_icm(Z_R1, 5, 6, addr1); - z_aghi(addr1, 8); - z_stg(Z_R0, Address(dst, ind2)); - z_stg(Z_R1, Address(dst, ind2, 8)); - z_brxle(ind2, even_reg, Lloop1); - - bind(Lslow); - // Compute index limit and skip if negative. - z_ahi(odd_reg, 16-2); // Last possible index for slow loop. - z_lhi(even_reg, 2); - z_cr(ind2, odd_reg); - z_brh(Ldone); - - bind(Lloop2); // 1 Character per iteration. - z_llc(Z_R0, Address(addr1)); - z_sth(Z_R0, Address(dst, ind2)); - z_aghi(addr1, 1); - z_brxle(ind2, even_reg, Lloop2); + if (VM_Version::has_VectorFacility()) { + const int min_vcnt = 32; // Minimum #characters required to use vector instructions. + // Otherwise just do nothing in vector mode. + // Must be multiple of vector register length (16 bytes = 128 bits). + const int log_min_vcnt = exact_log2(min_vcnt); + Label VectorLoop, VectorDone; + + assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()"); + z_srak(Rix, Rcnt, log_min_vcnt); // calculate # vector loop iterations + z_brz(VectorDone); // skip if none + + z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop + + bind(VectorLoop); + z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte) + add2reg(Rsrc, min_vcnt); + + z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high) + z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low) + z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high) + z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low) + z_vstm(Z_V22, Z_V25, 0, Rdst); // store next 32 bytes + add2reg(Rdst, min_vcnt*2); + + z_brct(Rix, VectorLoop); + + bind(VectorDone); + } + + const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop. + // Otherwise just do nothing in unrolled scalar mode. + // Must be multiple of 8. + { + const int log_min_cnt = exact_log2(min_cnt); + Label UnrolledLoop, UnrolledDone; - bind(Ldone); + + if (VM_Version::has_DistinctOpnds()) { + z_srk(Rix, Rcnt, Z_R0); // remaining # chars to process in unrolled loop + } else { + z_lr(Rix, Rcnt); + z_sr(Rix, Z_R0); + } + z_sra(Rix, log_min_cnt); // unrolled loop count + z_brz(UnrolledDone); + + clear_reg(Z_R0); + clear_reg(Z_R1); + + bind(UnrolledLoop); + z_icmh(Z_R0, 5, 0, Rsrc); + z_icmh(Z_R1, 5, 4, Rsrc); + z_icm(Z_R0, 5, 2, Rsrc); + z_icm(Z_R1, 5, 6, Rsrc); + add2reg(Rsrc, min_cnt); + + z_stmg(Z_R0, Z_R1, 0, Rdst); + + add2reg(Rdst, min_cnt*2); + z_brct(Rix, UnrolledLoop); + + bind(UnrolledDone); + z_lgfr(Z_R0, Rcnt); // # chars left over after unrolled loop. + z_nilf(Z_R0, min_cnt-1); + z_brnz(ScalarShortcut); // if zero, there is nothing left to do for scalar loop. + // Rix == 0 in all cases. + z_sgfr(Z_R0, Rcnt); // negative # characters the ptrs have been advanced previously. + z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore. + z_agr(Rdst, Z_R0); + z_agr(Rsrc, Z_R0); // restore ptr. + z_bru(AllDone); + } + + { + bind(ScalarShortcut); + // Z_R0 must contain remaining # characters as 64-bit signed int here. + // register contents is preserved over scalar processing (for register fixup). + +#if 0 // Sacrifice shortcuts for code compactness + { + Label ScalarDefault; + z_chi(Rcnt, 2); + z_brh(ScalarDefault); + z_llc(Z_R0, 0, Z_R0, Rsrc); // 6 bytes + z_sth(Z_R0, 0, Z_R0, Rdst); // 4 bytes + z_brl(AllDone); + z_llc(Z_R0, 1, Z_R0, Rsrc); // 6 bytes + z_sth(Z_R0, 2, Z_R0, Rdst); // 4 bytes + z_bru(AllDone); + bind(ScalarDefault); + } +#endif + + Label CodeTable; + // Some comments on Rix calculation: + // - Rcnt is small, therefore no bits shifted out of low word (sll(g) instructions). + // - high word of both Rix and Rcnt may contain garbage + // - the final lngfr takes care of that garbage, extending the sign to high word + z_sllg(Rix, Z_R0, 2); // calculate 10*Rix = (4*Rix + Rix)*2 + z_ar(Rix, Z_R0); + z_larl(Z_R1, CodeTable); + z_sll(Rix, 1); + z_lngfr(Rix, Rix); // ix range: [0..7], after inversion & mult: [-(7*12)..(0*12)]. + z_bc(Assembler::bcondAlways, 0, Rix, Z_R1); + + z_llc(Z_R1, 6, Z_R0, Rsrc); // 6 bytes + z_sth(Z_R1, 12, Z_R0, Rdst); // 4 bytes + + z_llc(Z_R1, 5, Z_R0, Rsrc); + z_sth(Z_R1, 10, Z_R0, Rdst); + + z_llc(Z_R1, 4, Z_R0, Rsrc); + z_sth(Z_R1, 8, Z_R0, Rdst); + + z_llc(Z_R1, 3, Z_R0, Rsrc); + z_sth(Z_R1, 6, Z_R0, Rdst); + + z_llc(Z_R1, 2, Z_R0, Rsrc); + z_sth(Z_R1, 4, Z_R0, Rdst); + + z_llc(Z_R1, 1, Z_R0, Rsrc); + z_sth(Z_R1, 2, Z_R0, Rdst); + + z_llc(Z_R1, 0, Z_R0, Rsrc); + z_sth(Z_R1, 0, Z_R0, Rdst); + bind(CodeTable); + + z_chi(Rcnt, 8); // no fixup for small strings. Rdst, Rsrc were not modified. + z_brl(AllDone); + + z_sgfr(Z_R0, Rcnt); // # characters the ptrs have been advanced previously. + z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore. + z_agr(Rdst, Z_R0); + z_agr(Rsrc, Z_R0); // restore ptr. + } + bind(AllDone); BLOCK_COMMENT("} string_inflate"); + return offset() - block_start; +} + +// Inflate byte[] to char[], length known at compile time. +// Restores: src, dst +// Kills: tmp, Z_R0, Z_R1. +// Note: +// len is signed int. Counts # characters, not bytes. +unsigned int MacroAssembler::string_inflate_const(Register src, Register dst, Register tmp, int len) { + assert_different_registers(Z_R0, Z_R1, src, dst, tmp); + + BLOCK_COMMENT("string_inflate_const {"); + int block_start = offset(); + + Register Rix = tmp; // loop index + Register Rsrc = src; // addr(src array) + Register Rdst = dst; // addr(dst array) + Label ScalarShortcut, AllDone; + int nprocessed = 0; + int src_off = 0; // compensate for saved (optimized away) ptr advancement. + int dst_off = 0; // compensate for saved (optimized away) ptr advancement. + bool restore_inputs = false; + bool workreg_clear = false; + + if ((len >= 32) && VM_Version::has_VectorFacility()) { + const int min_vcnt = 32; // Minimum #characters required to use vector instructions. + // Otherwise just do nothing in vector mode. + // Must be multiple of vector register length (16 bytes = 128 bits). + const int log_min_vcnt = exact_log2(min_vcnt); + const int iterations = (len - nprocessed) >> log_min_vcnt; + nprocessed += iterations << log_min_vcnt; + Label VectorLoop; + + if (iterations == 1) { + z_vlm(Z_V20, Z_V21, 0+src_off, Rsrc); // get next 32 characters (single-byte) + z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high) + z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low) + z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high) + z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low) + z_vstm(Z_V22, Z_V25, 0+dst_off, Rdst); // store next 32 bytes + + src_off += min_vcnt; + dst_off += min_vcnt*2; + } else { + restore_inputs = true; + + z_lgfi(Rix, len>>log_min_vcnt); + bind(VectorLoop); + z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte) + add2reg(Rsrc, min_vcnt); + + z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high) + z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low) + z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high) + z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low) + z_vstm(Z_V22, Z_V25, 0, Rdst); // store next 32 bytes + add2reg(Rdst, min_vcnt*2); + + z_brct(Rix, VectorLoop); + } + } + + if (((len-nprocessed) >= 16) && VM_Version::has_VectorFacility()) { + const int min_vcnt = 16; // Minimum #characters required to use vector instructions. + // Otherwise just do nothing in vector mode. + // Must be multiple of vector register length (16 bytes = 128 bits). + const int log_min_vcnt = exact_log2(min_vcnt); + const int iterations = (len - nprocessed) >> log_min_vcnt; + nprocessed += iterations << log_min_vcnt; + assert(iterations == 1, "must be!"); + + z_vl(Z_V20, 0+src_off, Z_R0, Rsrc); // get next 16 characters (single-byte) + z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high) + z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low) + z_vstm(Z_V22, Z_V23, 0+dst_off, Rdst); // store next 32 bytes + + src_off += min_vcnt; + dst_off += min_vcnt*2; + } + + if ((len-nprocessed) > 8) { + const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop. + // Otherwise just do nothing in unrolled scalar mode. + // Must be multiple of 8. + const int log_min_cnt = exact_log2(min_cnt); + const int iterations = (len - nprocessed) >> log_min_cnt; + nprocessed += iterations << log_min_cnt; + + //---< avoid loop overhead/ptr increment for small # iterations >--- + if (iterations <= 2) { + clear_reg(Z_R0); + clear_reg(Z_R1); + workreg_clear = true; + + z_icmh(Z_R0, 5, 0+src_off, Rsrc); + z_icmh(Z_R1, 5, 4+src_off, Rsrc); + z_icm(Z_R0, 5, 2+src_off, Rsrc); + z_icm(Z_R1, 5, 6+src_off, Rsrc); + z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst); + + src_off += min_cnt; + dst_off += min_cnt*2; + } + + if (iterations == 2) { + z_icmh(Z_R0, 5, 0+src_off, Rsrc); + z_icmh(Z_R1, 5, 4+src_off, Rsrc); + z_icm(Z_R0, 5, 2+src_off, Rsrc); + z_icm(Z_R1, 5, 6+src_off, Rsrc); + z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst); + + src_off += min_cnt; + dst_off += min_cnt*2; + } + + if (iterations > 2) { + Label UnrolledLoop; + restore_inputs = true; + + clear_reg(Z_R0); + clear_reg(Z_R1); + workreg_clear = true; + + z_lgfi(Rix, iterations); + bind(UnrolledLoop); + z_icmh(Z_R0, 5, 0, Rsrc); + z_icmh(Z_R1, 5, 4, Rsrc); + z_icm(Z_R0, 5, 2, Rsrc); + z_icm(Z_R1, 5, 6, Rsrc); + add2reg(Rsrc, min_cnt); + + z_stmg(Z_R0, Z_R1, 0, Rdst); + add2reg(Rdst, min_cnt*2); + + z_brct(Rix, UnrolledLoop); + } + } + + if ((len-nprocessed) > 0) { + switch (len-nprocessed) { + case 8: + if (!workreg_clear) { + clear_reg(Z_R0); + clear_reg(Z_R1); + } + z_icmh(Z_R0, 5, 0+src_off, Rsrc); + z_icmh(Z_R1, 5, 4+src_off, Rsrc); + z_icm(Z_R0, 5, 2+src_off, Rsrc); + z_icm(Z_R1, 5, 6+src_off, Rsrc); + z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst); + break; + case 7: + if (!workreg_clear) { + clear_reg(Z_R0); + clear_reg(Z_R1); + } + clear_reg(Rix); + z_icm(Z_R0, 5, 0+src_off, Rsrc); + z_icm(Z_R1, 5, 2+src_off, Rsrc); + z_icm(Rix, 5, 4+src_off, Rsrc); + z_stm(Z_R0, Z_R1, 0+dst_off, Rdst); + z_llc(Z_R0, 6+src_off, Z_R0, Rsrc); + z_st(Rix, 8+dst_off, Z_R0, Rdst); + z_sth(Z_R0, 12+dst_off, Z_R0, Rdst); + break; + case 6: + if (!workreg_clear) { + clear_reg(Z_R0); + clear_reg(Z_R1); + } + clear_reg(Rix); + z_icm(Z_R0, 5, 0+src_off, Rsrc); + z_icm(Z_R1, 5, 2+src_off, Rsrc); + z_icm(Rix, 5, 4+src_off, Rsrc); + z_stm(Z_R0, Z_R1, 0+dst_off, Rdst); + z_st(Rix, 8+dst_off, Z_R0, Rdst); + break; + case 5: + if (!workreg_clear) { + clear_reg(Z_R0); + clear_reg(Z_R1); + } + z_icm(Z_R0, 5, 0+src_off, Rsrc); + z_icm(Z_R1, 5, 2+src_off, Rsrc); + z_llc(Rix, 4+src_off, Z_R0, Rsrc); + z_stm(Z_R0, Z_R1, 0+dst_off, Rdst); + z_sth(Rix, 8+dst_off, Z_R0, Rdst); + break; + case 4: + if (!workreg_clear) { + clear_reg(Z_R0); + clear_reg(Z_R1); + } + z_icm(Z_R0, 5, 0+src_off, Rsrc); + z_icm(Z_R1, 5, 2+src_off, Rsrc); + z_stm(Z_R0, Z_R1, 0+dst_off, Rdst); + break; + case 3: + if (!workreg_clear) { + clear_reg(Z_R0); + } + z_llc(Z_R1, 2+src_off, Z_R0, Rsrc); + z_icm(Z_R0, 5, 0+src_off, Rsrc); + z_sth(Z_R1, 4+dst_off, Z_R0, Rdst); + z_st(Z_R0, 0+dst_off, Rdst); + break; + case 2: + z_llc(Z_R0, 0+src_off, Z_R0, Rsrc); + z_llc(Z_R1, 1+src_off, Z_R0, Rsrc); + z_sth(Z_R0, 0+dst_off, Z_R0, Rdst); + z_sth(Z_R1, 2+dst_off, Z_R0, Rdst); + break; + case 1: + z_llc(Z_R0, 0+src_off, Z_R0, Rsrc); + z_sth(Z_R0, 0+dst_off, Z_R0, Rdst); + break; + default: + guarantee(false, "Impossible"); + break; + } + src_off += len-nprocessed; + dst_off += (len-nprocessed)*2; + nprocessed = len; + } + + //---< restore modified input registers >--- + if ((nprocessed > 0) && restore_inputs) { + z_agfi(Rsrc, -(nprocessed-src_off)); + if (nprocessed < 1000000000) { // avoid int overflow + z_agfi(Rdst, -(nprocessed*2-dst_off)); + } else { + z_agfi(Rdst, -(nprocessed-dst_off)); + z_agfi(Rdst, -nprocessed); + } + } + BLOCK_COMMENT("} string_inflate_const"); return offset() - block_start; } --- old/src/hotspot/cpu/s390/macroAssembler_s390.hpp 2017-11-14 17:21:46.666271000 +0100 +++ new/src/hotspot/cpu/s390/macroAssembler_s390.hpp 2017-11-14 17:21:46.284260000 +0100 @@ -198,6 +198,9 @@ // Test a bit in a register. Result is reflected in CC. void testbit(Register r, unsigned int bitPos); + void prefetch_read(Address a); + void prefetch_update(Address a); + // Clear a register, i.e. load const zero into reg. Return len (in bytes) of // generated instruction(s). // whole_reg: Clear 64 bits if true, 32 bits otherwise. @@ -836,7 +839,7 @@ void load_mirror(Register mirror, Register method); //-------------------------- - //--- perations on arrays. + //--- Operations on arrays. //-------------------------- unsigned int Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len); unsigned int Clear_Array_Const(long cnt, Register base); @@ -849,20 +852,34 @@ // Special String Intrinsics Implementation. //------------------------------------------- // Intrinsics for CompactStrings - // Compress char[] to byte[]. odd_reg contains cnt. tmp3 is only needed for precise behavior in failure case. Kills dst. - unsigned int string_compress(Register result, Register src, Register dst, Register odd_reg, - Register even_reg, Register tmp, Register tmp2 = noreg); + // Restores: src, dst + // Uses: cnt + // Kills: tmp, Z_R0, Z_R1. + // Early clobber: result. + // Boolean precise controls accuracy of result value. + unsigned int string_compress(Register result, Register src, Register dst, Register cnt, + Register tmp, bool precise); + + // Inflate byte[] to char[]. + unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp); + + // Inflate byte[] to char[]. + // Restores: src, dst + // Uses: cnt + // Kills: tmp, Z_R0, Z_R1. + unsigned int string_inflate(Register src, Register dst, Register cnt, Register tmp); + + // Inflate byte[] to char[], length known at compile time. + // Restores: src, dst + // Kills: tmp, Z_R0, Z_R1. + // Note: + // len is signed int. Counts # characters, not bytes. + unsigned int string_inflate_const(Register src, Register dst, Register tmp, int len); // Kills src. unsigned int has_negatives(Register result, Register src, Register cnt, Register odd_reg, Register even_reg, Register tmp); - // Inflate byte[] to char[]. - unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp); - // Odd_reg contains cnt. Kills src. - unsigned int string_inflate(Register src, Register dst, Register odd_reg, - Register even_reg, Register tmp); - unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register odd_reg, Register even_reg, Register result, int ae); --- old/src/hotspot/cpu/s390/s390.ad 2017-11-14 17:21:48.169279000 +0100 +++ new/src/hotspot/cpu/s390/s390.ad 2017-11-14 17:21:47.780281000 +0100 @@ -10267,14 +10267,14 @@ %} // char[] to byte[] compression -instruct string_compress(iRegP src, rarg5RegP dst, iRegI result, roddRegI len, revenRegI evenReg, iRegI tmp, flagsReg cr) %{ +instruct string_compress(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI tmp, flagsReg cr) %{ match(Set result (StrCompressedCopy src (Binary dst len))); - effect(TEMP_DEF result, USE_KILL dst, USE_KILL len, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too. + effect(TEMP_DEF result, TEMP tmp, KILL cr); // R0, R1 are killed, too. ins_cost(300); format %{ "String Compress $src->$dst($len) -> $result" %} ins_encode %{ __ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register, - $evenReg$$Register, $tmp$$Register); + $tmp$$Register, false); %} ins_pipe(pipe_class_dummy); %} @@ -10293,13 +10293,25 @@ //%} // byte[] to char[] inflation -instruct string_inflate(Universe dummy, rarg5RegP src, iRegP dst, roddRegI len, revenRegI evenReg, iRegI tmp, flagsReg cr) %{ +instruct string_inflate(Universe dummy, iRegP src, iRegP dst, iRegI len, iRegI tmp, flagsReg cr) %{ match(Set dummy (StrInflatedCopy src (Binary dst len))); - effect(USE_KILL src, USE_KILL len, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too. + effect(TEMP tmp, KILL cr); // R0, R1 are killed, too. ins_cost(300); format %{ "String Inflate $src->$dst($len)" %} ins_encode %{ - __ string_inflate($src$$Register, $dst$$Register, $len$$Register, $evenReg$$Register, $tmp$$Register); + __ string_inflate($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +// byte[] to char[] inflation +instruct string_inflate_const(Universe dummy, iRegP src, iRegP dst, iRegI tmp, immI len, flagsReg cr) %{ + match(Set dummy (StrInflatedCopy src (Binary dst len))); + effect(TEMP tmp, KILL cr); // R0, R1 are killed, too. + ins_cost(300); + format %{ "String Inflate (constLen) $src->$dst($len)" %} + ins_encode %{ + __ string_inflate_const($src$$Register, $dst$$Register, $tmp$$Register, $len$$constant); %} ins_pipe(pipe_class_dummy); %} @@ -10318,14 +10330,14 @@ %} // encode char[] to byte[] in ISO_8859_1 -instruct encode_iso_array(rarg5RegP src, iRegP dst, iRegI result, roddRegI len, revenRegI evenReg, iRegI tmp, iRegI tmp2, flagsReg cr) %{ +instruct encode_iso_array(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI tmp, flagsReg cr) %{ match(Set result (EncodeISOArray src (Binary dst len))); - effect(TEMP_DEF result, USE_KILL src, USE_KILL len, TEMP evenReg, TEMP tmp, TEMP tmp2, KILL cr); // R0, R1 are killed, too. + effect(TEMP_DEF result, TEMP tmp, KILL cr); // R0, R1 are killed, too. ins_cost(300); format %{ "Encode array $src->$dst($len) -> $result" %} ins_encode %{ __ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register, - $evenReg$$Register, $tmp$$Register, $tmp2$$Register); + $tmp$$Register, true); %} ins_pipe(pipe_class_dummy); %}