--- old/src/hotspot/cpu/x86/assembler_x86.cpp 2018-06-21 22:31:39.196180600 -0700 +++ new/src/hotspot/cpu/x86/assembler_x86.cpp 2018-06-21 22:31:38.624123400 -0700 @@ -137,6 +137,8 @@ _index = noreg; _scale = no_scale; _disp = disp; + _xmmindex = xnoreg; + _isxmmindex = false; switch (rtype) { case relocInfo::external_word_type: _rspec = external_word_Relocation::spec(loc); @@ -176,6 +178,8 @@ _scale = no_scale; _disp = (intptr_t) loc; _rspec = spec; + _xmmindex = xnoreg; + _isxmmindex = false; } #endif // _LP64 @@ -608,6 +612,21 @@ emit_operand((Register)reg, base, index, scale, disp, rspec); } +void Assembler::emit_operand(XMMRegister reg, Register base, XMMRegister index, + Address::ScaleFactor scale, int disp, + RelocationHolder const& rspec) { + if (UseAVX > 2) { + int xreg_enc = reg->encoding(); + int xmmindex_enc = index->encoding(); + XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf); + XMMRegister new_index = as_XMMRegister(xmmindex_enc & 0xf); + emit_operand((Register)new_reg, base, (Register)new_index, scale, disp, rspec); + return; + } + emit_operand((Register)reg, base, (Register)index, scale, disp, rspec); +} + + // Secret local extension to Assembler::WhichOperand: #define end_pc_operand (_WhichOperand_limit) @@ -1108,8 +1127,12 @@ } void Assembler::emit_operand(XMMRegister reg, Address adr) { - emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, - adr._rspec); + if (adr.isxmmindex()) { + emit_operand(reg, adr._base, adr._xmmindex, adr._scale, adr._disp, adr._rspec); + } else { + emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, + adr._rspec); + } } // MMX operations @@ -3396,6 +3419,15 @@ emit_int8(imm8); } +void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x76); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::pause() { emit_int8((unsigned char)0xF3); @@ -3847,6 +3879,17 @@ emit_operand(dst, src); } +void Assembler::vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit? VM_Version::supports_avx() : + vector_len == AVX_256bit? VM_Version::supports_avx2() : + vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x30); + emit_int8((unsigned char) (0xC0 | encode)); +} + + void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) { assert(is_vector_masking(), ""); assert(VM_Version::supports_avx512vlbw(), ""); @@ -3860,7 +3903,6 @@ emit_int8(0x30); emit_operand(dst, src); } - void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx512vlbw(), ""); assert(src != xnoreg, "sanity"); @@ -3888,6 +3930,28 @@ emit_operand(src, dst); } +void Assembler::evpmovdb(Address dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(src != xnoreg, "sanity"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_QVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_is_evex_instruction(); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x31); + emit_operand(src, dst); +} + +void Assembler::vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit? VM_Version::supports_avx() : + vector_len == AVX_256bit? VM_Version::supports_avx2() : + vector_len == AVX_512bit? VM_Version::supports_evex() : 0, " "); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x33); + emit_int8((unsigned char)(0xC0 | encode)); +} + // generic void Assembler::pop(Register dst) { int encode = prefix_and_encode(dst->encoding()); @@ -6048,6 +6112,24 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x10); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x12); + emit_int8((unsigned char)(0xC0 | encode)); +} + // Shift packed integers arithmetically right by specified number of bits. void Assembler::psraw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); @@ -6149,6 +6231,15 @@ emit_operand(dst, src); } +void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDB); + emit_int8((unsigned char)(0xC0 | encode)); +} + + void Assembler::pandn(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); @@ -6184,6 +6275,15 @@ emit_operand(dst, src); } +void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEB); + emit_int8((unsigned char)(0xC0 | encode)); +} + + void Assembler::pxor(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); @@ -6795,6 +6895,20 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(dst != xnoreg, "sanity"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + // swap src<->dst for encoding + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x90); + emit_operand(dst, src); +} // Carry-Less Multiplication Quadword void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) { @@ -7420,7 +7534,12 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) { bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0; bool vex_b = adr.base_needs_rex(); - bool vex_x = adr.index_needs_rex(); + bool vex_x; + if (adr.isxmmindex()) { + vex_x = adr.xmmindex_needs_rex(); + } else { + vex_x = adr.index_needs_rex(); + } set_attributes(attributes); attributes->set_current_assembler(this); @@ -7457,7 +7576,13 @@ if (UseAVX > 2 && !attributes->is_legacy_mode()) { bool evex_r = (xreg_enc >= 16); - bool evex_v = (nds_enc >= 16); + bool evex_v; + // EVEX.V' is set to true when VSIB is used as we may need to use higher order XMM registers (16-31) + if (adr.isxmmindex()) { + evex_v = ((adr._xmmindex->encoding() > 15) ? true : false); + } else { + evex_v = (nds_enc >= 16); + } attributes->set_is_evex_instruction(); evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc); } else { --- old/src/hotspot/cpu/x86/assembler_x86.hpp 2018-06-21 22:31:42.430504000 -0700 +++ new/src/hotspot/cpu/x86/assembler_x86.hpp 2018-06-21 22:31:41.859446900 -0700 @@ -184,8 +184,10 @@ private: Register _base; Register _index; + XMMRegister _xmmindex; ScaleFactor _scale; int _disp; + bool _isxmmindex; RelocationHolder _rspec; // Easily misused constructors make them private @@ -201,8 +203,10 @@ Address() : _base(noreg), _index(noreg), + _xmmindex(xnoreg), _scale(no_scale), - _disp(0) { + _disp(0), + _isxmmindex(false){ } // No default displacement otherwise Register can be implicitly @@ -211,15 +215,19 @@ Address(Register base, int disp) : _base(base), _index(noreg), + _xmmindex(xnoreg), _scale(no_scale), - _disp(disp) { + _disp(disp), + _isxmmindex(false){ } Address(Register base, Register index, ScaleFactor scale, int disp = 0) : _base (base), _index(index), + _xmmindex(xnoreg), _scale(scale), - _disp (disp) { + _disp (disp), + _isxmmindex(false) { assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); } @@ -227,13 +235,26 @@ Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0) : _base (base), _index(index.register_or_noreg()), + _xmmindex(xnoreg), _scale(scale), - _disp (disp + (index.constant_or_zero() * scale_size(scale))) { + _disp (disp + (index.constant_or_zero() * scale_size(scale))), + _isxmmindex(false){ if (!index.is_register()) scale = Address::no_scale; assert(!_index->is_valid() == (scale == Address::no_scale), "inconsistent address"); } + Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0) + : _base (base), + _index(noreg), + _xmmindex(index), + _scale(scale), + _disp(disp), + _isxmmindex(true) { + assert(!index->is_valid() == (scale == Address::no_scale), + "inconsistent address"); + } + Address plus_disp(int disp) const { Address a = (*this); a._disp += disp; @@ -269,24 +290,29 @@ Address(Register base, ByteSize disp) : _base(base), _index(noreg), + _xmmindex(xnoreg), _scale(no_scale), - _disp(in_bytes(disp)) { + _disp(in_bytes(disp)), + _isxmmindex(false){ } Address(Register base, Register index, ScaleFactor scale, ByteSize disp) : _base(base), _index(index), + _xmmindex(xnoreg), _scale(scale), - _disp(in_bytes(disp)) { + _disp(in_bytes(disp)), + _isxmmindex(false){ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); } - Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp) : _base (base), _index(index.register_or_noreg()), + _xmmindex(xnoreg), _scale(scale), - _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) { + _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))), + _isxmmindex(false) { if (!index.is_register()) scale = Address::no_scale; assert(!_index->is_valid() == (scale == Address::no_scale), "inconsistent address"); @@ -298,8 +324,10 @@ bool uses(Register reg) const { return _base == reg || _index == reg; } Register base() const { return _base; } Register index() const { return _index; } + XMMRegister xmmindex() const { return _xmmindex; } ScaleFactor scale() const { return _scale; } int disp() const { return _disp; } + bool isxmmindex() const { return _isxmmindex; } // Convert the raw encoding form into the form expected by the constructor for // Address. An index of 4 (rsp) corresponds to having no index, so convert @@ -317,6 +345,10 @@ return _index != noreg &&_index->encoding() >= 8; } + bool xmmindex_needs_rex() const { + return _xmmindex != xnoreg && _xmmindex->encoding() >= 8; + } + relocInfo::relocType reloc() const { return _rspec.type(); } friend class Assembler; @@ -683,6 +715,9 @@ RelocationHolder const& rspec, int rip_relative_correction = 0); + void emit_operand(XMMRegister reg, Register base, XMMRegister index, Address::ScaleFactor scale, + int disp, RelocationHolder const& rspec); + void emit_operand(Register reg, Address adr, int rip_relative_correction = 0); // operands that only take the original 32bit registers @@ -1551,6 +1586,7 @@ void vpermq(XMMRegister dst, XMMRegister src, int imm8); void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void pause(); @@ -1617,11 +1653,16 @@ void pmovzxbw(XMMRegister dst, Address src); void vpmovzxbw( XMMRegister dst, Address src, int vector_len); + void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len); void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len); void evpmovwb(Address dst, XMMRegister src, int vector_len); void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len); + void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len); + + void evpmovdb(Address dst, XMMRegister src, int vector_len); + #ifndef _LP64 // no 32bit push/pop on amd64 void popl(Address dst); #endif @@ -2021,6 +2062,8 @@ void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs) void psraw(XMMRegister dst, int shift); @@ -2036,6 +2079,7 @@ void pand(XMMRegister dst, XMMRegister src); void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); // Andn packed integers void pandn(XMMRegister dst, XMMRegister src); @@ -2044,6 +2088,7 @@ void por(XMMRegister dst, XMMRegister src); void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); // Xor packed integers void pxor(XMMRegister dst, XMMRegister src); @@ -2111,6 +2156,8 @@ void evpbroadcastd(XMMRegister dst, Register src, int vector_len); void evpbroadcastq(XMMRegister dst, Register src, int vector_len); + void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len); + // Carry-Less Multiplication Quadword void pclmulqdq(XMMRegister dst, XMMRegister src, int mask); void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask); --- old/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2018-06-21 22:31:45.358796800 -0700 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2018-06-21 22:31:44.787739700 -0700 @@ -3590,6 +3590,15 @@ } } +void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { + if (reachable(src)) { + Assembler::evmovdquq(dst, as_Address(src), vector_len); + } else { + lea(rscratch, src); + Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len); + } +} + void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { Assembler::movdqa(dst, as_Address(src)); --- old/src/hotspot/cpu/x86/macroAssembler_x86.hpp 2018-06-21 22:31:48.596120500 -0700 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.hpp 2018-06-21 22:31:48.030063900 -0700 @@ -1094,6 +1094,10 @@ void vmovdqu(XMMRegister dst, Address src); void vmovdqu(XMMRegister dst, XMMRegister src); void vmovdqu(XMMRegister dst, AddressLiteral src); + void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } + void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } + void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } + void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch); // Move Aligned Double Quadword void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } @@ -1207,6 +1211,8 @@ void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpmovzxbw(XMMRegister dst, Address src, int vector_len); + void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); } + void vpmovmskb(Register dst, XMMRegister src); void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); --- old/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp 2018-06-21 22:31:51.497410600 -0700 +++ new/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp 2018-06-21 22:31:50.924353300 -0700 @@ -4229,6 +4229,406 @@ return start; } +#ifdef _MSC_VER +#define ALIGNED_(x) __declspec(align(x)) +#else +#define ALIGNED_(x) __attribute__ ((aligned(x))) +#endif + //base64 character set +ALIGNED_(64) address base64_charset_addr() { + StubCodeMark mark(this, "StubRoutines", "base64_charset"); + address start = __ pc(); + __ emit_data64(0x0000004200000041, relocInfo::none); + __ emit_data64(0x0000004400000043, relocInfo::none); + __ emit_data64(0x0000004600000045, relocInfo::none); + __ emit_data64(0x0000004800000047, relocInfo::none); + __ emit_data64(0x0000004a00000049, relocInfo::none); + __ emit_data64(0x0000004c0000004b, relocInfo::none); + __ emit_data64(0x0000004e0000004d, relocInfo::none); + __ emit_data64(0x000000500000004f, relocInfo::none); + __ emit_data64(0x0000005200000051, relocInfo::none); + __ emit_data64(0x0000005400000053, relocInfo::none); + __ emit_data64(0x0000005600000055, relocInfo::none); + __ emit_data64(0x0000005800000057, relocInfo::none); + __ emit_data64(0x0000005a00000059, relocInfo::none); + __ emit_data64(0x0000006200000061, relocInfo::none); + __ emit_data64(0x0000006400000063, relocInfo::none); + __ emit_data64(0x0000006600000065, relocInfo::none); + __ emit_data64(0x0000006800000067, relocInfo::none); + __ emit_data64(0x0000006a00000069, relocInfo::none); + __ emit_data64(0x0000006c0000006b, relocInfo::none); + __ emit_data64(0x0000006e0000006d, relocInfo::none); + __ emit_data64(0x000000700000006f, relocInfo::none); + __ emit_data64(0x0000007200000071, relocInfo::none); + __ emit_data64(0x0000007400000073, relocInfo::none); + __ emit_data64(0x0000007600000075, relocInfo::none); + __ emit_data64(0x0000007800000077, relocInfo::none); + __ emit_data64(0x0000007a00000079, relocInfo::none); + __ emit_data64(0x0000003100000030, relocInfo::none); + __ emit_data64(0x0000003300000032, relocInfo::none); + __ emit_data64(0x0000003500000034, relocInfo::none); + __ emit_data64(0x0000003700000036, relocInfo::none); + __ emit_data64(0x0000003900000038, relocInfo::none); + __ emit_data64(0x0000002f0000002b, relocInfo::none); + return start; + } + + //base64 url character set +ALIGNED_(64) address base64url_charset_addr() { + StubCodeMark mark(this, "StubRoutines", "base64url_charset"); + address start = __ pc(); + __ emit_data64(0x0000004200000041, relocInfo::none); + __ emit_data64(0x0000004400000043, relocInfo::none); + __ emit_data64(0x0000004600000045, relocInfo::none); + __ emit_data64(0x0000004800000047, relocInfo::none); + __ emit_data64(0x0000004a00000049, relocInfo::none); + __ emit_data64(0x0000004c0000004b, relocInfo::none); + __ emit_data64(0x0000004e0000004d, relocInfo::none); + __ emit_data64(0x000000500000004f, relocInfo::none); + __ emit_data64(0x0000005200000051, relocInfo::none); + __ emit_data64(0x0000005400000053, relocInfo::none); + __ emit_data64(0x0000005600000055, relocInfo::none); + __ emit_data64(0x0000005800000057, relocInfo::none); + __ emit_data64(0x0000005a00000059, relocInfo::none); + __ emit_data64(0x0000006200000061, relocInfo::none); + __ emit_data64(0x0000006400000063, relocInfo::none); + __ emit_data64(0x0000006600000065, relocInfo::none); + __ emit_data64(0x0000006800000067, relocInfo::none); + __ emit_data64(0x0000006a00000069, relocInfo::none); + __ emit_data64(0x0000006c0000006b, relocInfo::none); + __ emit_data64(0x0000006e0000006d, relocInfo::none); + __ emit_data64(0x000000700000006f, relocInfo::none); + __ emit_data64(0x0000007200000071, relocInfo::none); + __ emit_data64(0x0000007400000073, relocInfo::none); + __ emit_data64(0x0000007600000075, relocInfo::none); + __ emit_data64(0x0000007800000077, relocInfo::none); + __ emit_data64(0x0000007a00000079, relocInfo::none); + __ emit_data64(0x0000003100000030, relocInfo::none); + __ emit_data64(0x0000003300000032, relocInfo::none); + __ emit_data64(0x0000003500000034, relocInfo::none); + __ emit_data64(0x0000003700000036, relocInfo::none); + __ emit_data64(0x0000003900000038, relocInfo::none); + __ emit_data64(0x0000005f0000002d, relocInfo::none); + + return start; +} + +ALIGNED_(64) address base64_bswap_mask_addr() { + StubCodeMark mark(this, "StubRoutines", "bswap_mask_base64"); + address start = __ pc(); + __ emit_data64(0x0504038002010080, relocInfo::none); + __ emit_data64(0x0b0a098008070680, relocInfo::none); + __ emit_data64(0x0908078006050480, relocInfo::none); + __ emit_data64(0x0f0e0d800c0b0a80, relocInfo::none); + __ emit_data64(0x0605048003020180, relocInfo::none); + __ emit_data64(0x0c0b0a8009080780, relocInfo::none); + __ emit_data64(0x0504038002010080, relocInfo::none); + __ emit_data64(0x0b0a098008070680, relocInfo::none); + + return start; +} + +ALIGNED_(64) address base64_right_shift_mask_addr() { + StubCodeMark mark(this, "StubRoutines", "right_shift_mask"); + address start = __ pc(); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + + return start; + } + +ALIGNED_(64) address base64_left_shift_mask_addr() { + StubCodeMark mark(this, "StubRoutines", "left_shift_mask"); + address start = __ pc(); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + + return start; + } + +ALIGNED_(64) address base64_and_mask_addr() { + StubCodeMark mark(this, "StubRoutines", "and_mask"); + address start = __ pc(); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + return start; + } + +ALIGNED_(64) address base64_gather_mask_addr() { + StubCodeMark mark(this, "StubRoutines", "gather_mask"); + address start = __ pc(); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + return start; + } + +// Code for generating Base64 encoding. +// Intrinsic function prototype in Base64.java: +// private void implEncode(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL) { + address generate_base64_implEncode() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "implEncode"); + address start = __ pc(); + __ enter(); + + // Save callee-saved registers before using them + __ push(r12); + __ push(r13); + __ push(r14); + __ push(r15); + __ push(rbx); + + // arguments + const Register source = c_rarg0; // Source Array + const Register start_offset = c_rarg1; // start offset + const Register end_offset = c_rarg2; // end offset + const Register dest = c_rarg3; // destination array + +#ifndef _WIN64 + const Register dp = c_rarg4; // Position for writing to dest array + const Register isURL = c_rarg5;// Base64 or URL character set +#else + const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64 + const Address isURL_mem(rbp, 7 * wordSize); + const Register isURL = r10; // pick the volatile windows register + const Register dp = r12; + __ movl(dp, dp_mem); + __ movl(isURL, isURL_mem); +#endif + + const Register length = r14; + Label L_process80, L_process32, L_process3, L_exit, L_processdata; + + // calculate length from offsets + __ movl(length, end_offset); + __ subl(length, start_offset); + __ cmpl(length, 0); + __ jcc(Assembler::lessEqual, L_exit); + + // Save k1 value in rbx + __ kmovql(rbx, k1); + __ lea(r11, ExternalAddress(StubRoutines::x86::base64_charset_addr())); + // check if base64 charset(isURL=0) or base64 url charset(isURL=1) needs to be loaded + __ cmpl(isURL, 0); + __ jcc(Assembler::equal, L_processdata); + __ lea(r11, ExternalAddress(StubRoutines::x86::base64url_charset_addr())); + + // load masks required for encoding data + __ BIND(L_processdata); + __ movdqu(xmm16, ExternalAddress(StubRoutines::x86::base64_gather_mask_addr())); + // Set 64 bits of K register. + __ evpcmpeqb(k1, xmm16, xmm16, Assembler::AVX_512bit); + __ evmovdquq(xmm12, ExternalAddress(StubRoutines::x86::base64_bswap_mask_addr()), Assembler::AVX_256bit, r13); + __ evmovdquq(xmm13, ExternalAddress(StubRoutines::x86::base64_right_shift_mask_addr()), Assembler::AVX_512bit, r13); + __ evmovdquq(xmm14, ExternalAddress(StubRoutines::x86::base64_left_shift_mask_addr()), Assembler::AVX_512bit, r13); + __ evmovdquq(xmm15, ExternalAddress(StubRoutines::x86::base64_and_mask_addr()), Assembler::AVX_512bit, r13); + + // Vector Base64 implementation, producing 96 bytes of encoded data + __ BIND(L_process80); + __ cmpl(length, 80); + __ jcc(Assembler::below, L_process32); + __ evmovdquq(xmm0, Address(source, start_offset, Address::times_1, 0), Assembler::AVX_256bit); + __ evmovdquq(xmm1, Address(source, start_offset, Address::times_1, 24), Assembler::AVX_256bit); + __ evmovdquq(xmm2, Address(source, start_offset, Address::times_1, 48), Assembler::AVX_256bit); + + //permute the input data in such a manner that we have continuity of the source + __ vpermq(xmm3, xmm0, 148, Assembler::AVX_256bit); + __ vpermq(xmm4, xmm1, 148, Assembler::AVX_256bit); + __ vpermq(xmm5, xmm2, 148, Assembler::AVX_256bit); + + //shuffle input and group 3 bytes of data and to it add 0 as the 4th byte. + //we can deal with 12 bytes at a time in a 128 bit register + __ vpshufb(xmm3, xmm3, xmm12, Assembler::AVX_256bit); + __ vpshufb(xmm4, xmm4, xmm12, Assembler::AVX_256bit); + __ vpshufb(xmm5, xmm5, xmm12, Assembler::AVX_256bit); + + //convert byte to word. Each 128 bit register will have 6 bytes for processing + __ vpmovzxbw(xmm3, xmm3, Assembler::AVX_512bit); + __ vpmovzxbw(xmm4, xmm4, Assembler::AVX_512bit); + __ vpmovzxbw(xmm5, xmm5, Assembler::AVX_512bit); + + // Extract bits in the following pattern 6, 4+2, 2+4, 6 to convert 3, 8 bit numbers to 4, 6 bit numbers + __ evpsrlvw(xmm0, xmm3, xmm13, Assembler::AVX_512bit); + __ evpsrlvw(xmm1, xmm4, xmm13, Assembler::AVX_512bit); + __ evpsrlvw(xmm2, xmm5, xmm13, Assembler::AVX_512bit); + + __ evpsllvw(xmm3, xmm3, xmm14, Assembler::AVX_512bit); + __ evpsllvw(xmm4, xmm4, xmm14, Assembler::AVX_512bit); + __ evpsllvw(xmm5, xmm5, xmm14, Assembler::AVX_512bit); + + __ vpsrlq(xmm0, xmm0, 8, Assembler::AVX_512bit); + __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit); + __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit); + + __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit); + __ vpsllq(xmm4, xmm4, 8, Assembler::AVX_512bit); + __ vpsllq(xmm5, xmm5, 8, Assembler::AVX_512bit); + + __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit); + __ vpandq(xmm4, xmm4, xmm15, Assembler::AVX_512bit); + __ vpandq(xmm5, xmm5, xmm15, Assembler::AVX_512bit); + + // Get the final 4*6 bits base64 encoding + __ vporq(xmm3, xmm3, xmm0, Assembler::AVX_512bit); + __ vporq(xmm4, xmm4, xmm1, Assembler::AVX_512bit); + __ vporq(xmm5, xmm5, xmm2, Assembler::AVX_512bit); + + // Shift + __ vpsrlq(xmm3, xmm3, 8, Assembler::AVX_512bit); + __ vpsrlq(xmm4, xmm4, 8, Assembler::AVX_512bit); + __ vpsrlq(xmm5, xmm5, 8, Assembler::AVX_512bit); + + // look up 6 bits in the base64 character set to fetch the encoding + // we are converting word to dword as gather instructions need dword indices for looking up encoding + __ vextracti64x4(xmm6, xmm3, 0); + __ vpmovzxwd(xmm0, xmm6, Assembler::AVX_512bit); + __ vextracti64x4(xmm6, xmm3, 1); + __ vpmovzxwd(xmm1, xmm6, Assembler::AVX_512bit); + + __ vextracti64x4(xmm6, xmm4, 0); + __ vpmovzxwd(xmm2, xmm6, Assembler::AVX_512bit); + __ vextracti64x4(xmm6, xmm4, 1); + __ vpmovzxwd(xmm3, xmm6, Assembler::AVX_512bit); + + __ vextracti64x4(xmm4, xmm5, 0); + __ vpmovzxwd(xmm6, xmm4, Assembler::AVX_512bit); + + __ vextracti64x4(xmm4, xmm5, 1); + __ vpmovzxwd(xmm7, xmm4, Assembler::AVX_512bit); + + __ kmovql(k2, k1); + __ evpgatherdd(xmm4, k2, Address(r11, xmm0, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k1); + __ evpgatherdd(xmm5, k2, Address(r11, xmm1, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k1); + __ evpgatherdd(xmm8, k2, Address(r11, xmm2, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k1); + __ evpgatherdd(xmm9, k2, Address(r11, xmm3, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k1); + __ evpgatherdd(xmm10, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k1); + __ evpgatherdd(xmm11, k2, Address(r11, xmm7, Address::times_4, 0), Assembler::AVX_512bit); + + //Down convert dword to byte. Final output is 16*6 = 96 bytes long + __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm4, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm5, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 32), xmm8, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 48), xmm9, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 64), xmm10, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 80), xmm11, Assembler::AVX_512bit); + + __ addq(dest, 96); + __ addq(source, 72); + __ subq(length, 72); + __ jmp(L_process80); + + // Vector Base64 implementation generating 32 bytes of encoded data + __ BIND(L_process32); + __ cmpl(length, 32); + __ jcc(Assembler::below, L_process3); + __ evmovdquq(xmm0, Address(source, start_offset), Assembler::AVX_256bit); + __ vpermq(xmm0, xmm0, 148, Assembler::AVX_256bit); + __ vpshufb(xmm6, xmm0, xmm12, Assembler::AVX_256bit); + __ vpmovzxbw(xmm6, xmm6, Assembler::AVX_512bit); + __ evpsrlvw(xmm2, xmm6, xmm13, Assembler::AVX_512bit); + __ evpsllvw(xmm3, xmm6, xmm14, Assembler::AVX_512bit); + + __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit); + __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit); + __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit); + __ vporq(xmm1, xmm2, xmm3, Assembler::AVX_512bit); + __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit); + __ vextracti64x4(xmm9, xmm1, 0); + __ vpmovzxwd(xmm6, xmm9, Assembler::AVX_512bit); + __ vextracti64x4(xmm9, xmm1, 1); + __ vpmovzxwd(xmm5, xmm9, Assembler::AVX_512bit); + __ kmovql(k2, k1); + __ evpgatherdd(xmm8, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k1); + __ evpgatherdd(xmm10, k2, Address(r11, xmm5, Address::times_4, 0), Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm8, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm10, Assembler::AVX_512bit); + __ subq(length, 24); + __ addq(dest, 32); + __ addq(source, 24); + __ jmp(L_process32); + + // Scalar data processing takes 3 bytes at a time and produces 4 bytes of encoded data + /* This code corresponds to the scalar version of the following snippet in Base64.java + ** int bits = (src[sp0++] & 0xff) << 16 |(src[sp0++] & 0xff) << 8 |(src[sp0++] & 0xff); + ** dst[dp0++] = (byte)base64[(bits >> > 18) & 0x3f]; + ** dst[dp0++] = (byte)base64[(bits >> > 12) & 0x3f]; + ** dst[dp0++] = (byte)base64[(bits >> > 6) & 0x3f]; + ** dst[dp0++] = (byte)base64[bits & 0x3f];*/ + __ BIND(L_process3); + __ cmpl(length, 3); + __ jcc(Assembler::below, L_exit); + // Read 1 byte at a time + __ movzbl(rax, Address(source, start_offset)); + __ shll(rax, 0x10); + __ movl(r15, rax); + __ movzbl(rax, Address(source, start_offset, Address::times_1, 1)); + __ shll(rax, 0x8); + __ movzwl(rax, rax); + __ orl(r15, rax); + __ movzbl(rax, Address(source, start_offset, Address::times_1, 2)); + __ orl(rax, r15); + // Save 3 bytes read in r15 + __ movl(r15, rax); + __ shrl(rax, 0x12); + __ andl(rax, 0x3f); + // rax contains the index, r11 contains base64 lookup table + __ movb(rax, Address(r11, rax, Address::times_4)); + // Write the encoded byte to destination + __ movb(Address(dest, dp, Address::times_1, 0), rax); + __ movl(rax, r15); + __ shrl(rax, 0xc); + __ andl(rax, 0x3f); + __ movb(rax, Address(r11, rax, Address::times_4)); + __ movb(Address(dest, dp, Address::times_1, 1), rax); + __ movl(rax, r15); + __ shrl(rax, 0x6); + __ andl(rax, 0x3f); + __ movb(rax, Address(r11, rax, Address::times_4)); + __ movb(Address(dest, dp, Address::times_1, 2), rax); + __ movl(rax, r15); + __ andl(rax, 0x3f); + __ movb(rax, Address(r11, rax, Address::times_4)); + __ movb(Address(dest, dp, Address::times_1, 3), rax); + __ subl(length, 3); + __ addq(dest, 4); + __ addq(source, 3); + __ jmp(L_process3); + __ BIND(L_exit); + // restore k1 register value + __ kmovql(k1, rbx); + __ pop(rbx); + __ pop(r15); + __ pop(r14); + __ pop(r13); + __ pop(r12); + __ leave(); + __ ret(0); + return start; + } + /** * Arguments: * @@ -5106,6 +5506,17 @@ StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); } + if (UseBASE64Intrinsics) { + StubRoutines::x86::_and_mask = base64_and_mask_addr(); + StubRoutines::x86::_bswap_mask = base64_bswap_mask_addr(); + StubRoutines::x86::_base64_charset = base64_charset_addr(); + StubRoutines::x86::_url_charset = base64url_charset_addr(); + StubRoutines::x86::_gather_mask = base64_gather_mask_addr(); + StubRoutines::x86::_left_shift_mask = base64_left_shift_mask_addr(); + StubRoutines::x86::_right_shift_mask = base64_right_shift_mask_addr(); + StubRoutines::_base64_implEncode = generate_base64_implEncode(); + } + // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, --- old/src/hotspot/cpu/x86/stubRoutines_x86.cpp 2018-06-21 22:31:54.566717500 -0700 +++ new/src/hotspot/cpu/x86/stubRoutines_x86.cpp 2018-06-21 22:31:53.995660400 -0700 @@ -50,6 +50,15 @@ address StubRoutines::x86::_k256_W_adr = NULL; address StubRoutines::x86::_k512_W_addr = NULL; address StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = NULL; +// Base64 masks +address StubRoutines::x86::_bswap_mask = NULL; +address StubRoutines::x86::_base64_charset = NULL; +address StubRoutines::x86::_gather_mask = NULL; +address StubRoutines::x86::_right_shift_mask = NULL; +address StubRoutines::x86::_left_shift_mask = NULL; +address StubRoutines::x86::_and_mask = NULL; +address StubRoutines::x86::_url_charset = NULL; + #endif address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL; --- old/src/hotspot/cpu/x86/stubRoutines_x86.hpp 2018-06-21 22:31:57.387999600 -0700 +++ new/src/hotspot/cpu/x86/stubRoutines_x86.hpp 2018-06-21 22:31:56.817942600 -0700 @@ -138,6 +138,14 @@ static address _k512_W_addr; // byte flip mask for sha512 static address _pshuffle_byte_flip_mask_addr_sha512; + // Masks for base64 + static address _base64_charset; + static address _bswap_mask; + static address _gather_mask; + static address _right_shift_mask; + static address _left_shift_mask; + static address _and_mask; + static address _url_charset; #endif // byte flip mask for sha256 static address _pshuffle_byte_flip_mask_addr; @@ -198,6 +206,13 @@ static address k256_W_addr() { return _k256_W_adr; } static address k512_W_addr() { return _k512_W_addr; } static address pshuffle_byte_flip_mask_addr_sha512() { return _pshuffle_byte_flip_mask_addr_sha512; } + static address base64_charset_addr() { return _base64_charset; } + static address base64url_charset_addr() { return _url_charset; } + static address base64_bswap_mask_addr() { return _bswap_mask; } + static address base64_gather_mask_addr() { return _gather_mask; } + static address base64_right_shift_mask_addr() { return _right_shift_mask; } + static address base64_left_shift_mask_addr() { return _left_shift_mask; } + static address base64_and_mask_addr() { return _and_mask; } #endif static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; } static void generate_CRC32C_table(bool is_pclmulqdq_supported); --- old/src/hotspot/cpu/x86/vm_version_x86.cpp 2018-06-21 22:32:00.211281900 -0700 +++ new/src/hotspot/cpu/x86/vm_version_x86.cpp 2018-06-21 22:31:59.634224200 -0700 @@ -855,6 +855,17 @@ FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); } + // Base64 Intrinsics (Check the condition for which the intrinsic will be active) + if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) { + if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { + UseBASE64Intrinsics = true; + } + } else if (UseBASE64Intrinsics) { + if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) + warning("Base64 intrinsic requires EVEX instructions on this CPU"); + FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); + } + if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions if (FLAG_IS_DEFAULT(UseFMA)) { UseFMA = true; --- old/src/hotspot/share/aot/aotCodeHeap.cpp 2018-06-21 22:32:03.097570500 -0700 +++ new/src/hotspot/share/aot/aotCodeHeap.cpp 2018-06-21 22:32:02.515512300 -0700 @@ -516,6 +516,7 @@ SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_counterMode_AESCrypt", address, StubRoutines::_counterMode_AESCrypt); SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_ghash_processBlocks", address, StubRoutines::_ghash_processBlocks); + SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_base64_implEncode", address, StubRoutines::_base64_implEncode); SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_crc32c_table_addr", address, StubRoutines::_crc32c_table_addr); SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_updateBytesCRC32C", address, StubRoutines::_updateBytesCRC32C); SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_updateBytesAdler32", address, StubRoutines::_updateBytesAdler32); --- old/src/hotspot/share/classfile/vmSymbols.cpp 2018-06-21 22:32:05.948855600 -0700 +++ new/src/hotspot/share/classfile/vmSymbols.cpp 2018-06-21 22:32:05.376798400 -0700 @@ -735,6 +735,9 @@ case vmIntrinsics::_ghash_processBlocks: if (!UseGHASHIntrinsics) return true; break; + case vmIntrinsics::_base64_implEncode: + if (!UseBASE64Intrinsics) return true; + break; case vmIntrinsics::_updateBytesCRC32C: case vmIntrinsics::_updateDirectByteBufferCRC32C: if (!UseCRC32CIntrinsics) return true; --- old/src/hotspot/share/classfile/vmSymbols.hpp 2018-06-21 22:32:08.805141200 -0700 +++ new/src/hotspot/share/classfile/vmSymbols.hpp 2018-06-21 22:32:08.232083900 -0700 @@ -1015,6 +1015,12 @@ do_name( implCompressMB_name, "implCompressMultiBlock0") \ do_signature(implCompressMB_signature, "([BII)I") \ \ + /* support for java.util.Base64.Encoder*/ \ + do_class(java_util_Base64_Encoder, "java/util/Base64$Encoder") \ + do_intrinsic(_base64_implEncode, java_util_Base64_Encoder, implEncode_name, implEncode_signature, F_R) \ + do_name(implEncode_name, "implEncode") \ + do_signature(implEncode_signature, "([BII[BIZ)V") \ + \ /* support for com.sun.crypto.provider.GHASH */ \ do_class(com_sun_crypto_provider_ghash, "com/sun/crypto/provider/GHASH") \ do_intrinsic(_ghash_processBlocks, com_sun_crypto_provider_ghash, processBlocks_name, ghash_processBlocks_signature, F_S) \ --- old/src/hotspot/share/jvmci/vmStructs_jvmci.cpp 2018-06-21 22:32:11.696430300 -0700 +++ new/src/hotspot/share/jvmci/vmStructs_jvmci.cpp 2018-06-21 22:32:11.129373600 -0700 @@ -297,6 +297,7 @@ static_field(StubRoutines, _cipherBlockChaining_encryptAESCrypt, address) \ static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \ static_field(StubRoutines, _counterMode_AESCrypt, address) \ + static_field(StubRoutines, _base64_implEncode, address) \ static_field(StubRoutines, _ghash_processBlocks, address) \ static_field(StubRoutines, _sha1_implCompress, address) \ static_field(StubRoutines, _sha1_implCompressMB, address) \ --- old/src/hotspot/share/opto/c2compiler.cpp 2018-06-21 22:32:14.529713600 -0700 +++ new/src/hotspot/share/opto/c2compiler.cpp 2018-06-21 22:32:13.961656800 -0700 @@ -583,6 +583,7 @@ case vmIntrinsics::_montgomerySquare: case vmIntrinsics::_vectorizedMismatch: case vmIntrinsics::_ghash_processBlocks: + case vmIntrinsics::_base64_implEncode: case vmIntrinsics::_updateCRC32: case vmIntrinsics::_updateBytesCRC32: case vmIntrinsics::_updateByteBufferCRC32: --- old/src/hotspot/share/opto/escape.cpp 2018-06-21 22:32:17.345995200 -0700 +++ new/src/hotspot/share/opto/escape.cpp 2018-06-21 22:32:16.775938200 -0700 @@ -990,6 +990,7 @@ strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 || strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 || strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 || + strcmp(call->as_CallLeaf()->_name, "implEncode") == 0 || strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 || strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 || strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 || --- old/src/hotspot/share/opto/library_call.cpp 2018-06-21 22:32:20.289289500 -0700 +++ new/src/hotspot/share/opto/library_call.cpp 2018-06-21 22:32:19.719232500 -0700 @@ -301,6 +301,7 @@ Node* get_key_start_from_aescrypt_object(Node* aescrypt_object); Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object); bool inline_ghash_processBlocks(); + bool inline_base64_implEncode(); bool inline_sha_implCompress(vmIntrinsics::ID id); bool inline_digestBase_implCompressMB(int predicate); bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA, @@ -836,6 +837,8 @@ case vmIntrinsics::_ghash_processBlocks: return inline_ghash_processBlocks(); + case vmIntrinsics::_base64_implEncode: + return inline_base64_implEncode(); case vmIntrinsics::_encodeISOArray: case vmIntrinsics::_encodeByteISOArray: @@ -6633,6 +6636,35 @@ return true; } +bool LibraryCallKit::inline_base64_implEncode() { + address stubAddr; + const char *stubName; + assert(UseBASE64Intrinsics, "need Base64 intrinsics support"); + assert(callee()->signature()->size() == 6, "base64_implEncode has 6 parameters"); + stubAddr = StubRoutines::base64_implEncode(); + stubName = "implEncode"; + + if (!stubAddr) return false; + Node* base64obj = argument(0); + Node* src = argument(1); + Node* offset = argument(2); + Node* len = argument(3); + Node* dest = argument(4); + Node* dp = argument(5); + Node* isURL = argument(6); + + Node* src_start = array_element_address(src, intcon(0), T_BYTE); + assert(src_start, "source array is NULL"); + Node* dest_start = array_element_address(dest, intcon(0), T_BYTE); + assert(dest_start, "destination array is NULL"); + + Node* base64 = make_runtime_call(RC_LEAF, + OptoRuntime::base64_implEncode_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, offset, len, dest_start, dp, isURL); + return true; +} + //------------------------------inline_sha_implCompress----------------------- // // Calculate SHA (i.e., SHA-1) for single-block byte[] array. --- old/src/hotspot/share/opto/runtime.cpp 2018-06-21 22:32:23.403600900 -0700 +++ new/src/hotspot/share/opto/runtime.cpp 2018-06-21 22:32:22.822542800 -0700 @@ -1154,6 +1154,27 @@ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); return TypeFunc::make(domain, range); } +// Base64 encode function +const TypeFunc* OptoRuntime::base64_implEncode_Type() { + int argcnt = 6; + + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // src array + fields[argp++] = TypeInt::INT; // offset + fields[argp++] = TypeInt::INT; // length + fields[argp++] = TypePtr::NOTNULL; // dest array + fields[argp++] = TypeInt::INT; // dp + fields[argp++] = TypeInt::BOOL; // isURL + assert(argp == TypeFunc::Parms + argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms + 0] = NULL; // void + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} //------------- Interpreter state access for on stack replacement const TypeFunc* OptoRuntime::osr_end_Type() { --- old/src/hotspot/share/opto/runtime.hpp 2018-06-21 22:32:26.307891300 -0700 +++ new/src/hotspot/share/opto/runtime.hpp 2018-06-21 22:32:25.728833400 -0700 @@ -299,6 +299,7 @@ static const TypeFunc* vectorizedMismatch_Type(); static const TypeFunc* ghash_processBlocks_Type(); + static const TypeFunc* base64_implEncode_Type(); static const TypeFunc* updateBytesCRC32_Type(); static const TypeFunc* updateBytesCRC32C_Type(); --- old/src/hotspot/share/runtime/globals.hpp 2018-06-21 22:32:29.117172200 -0700 +++ new/src/hotspot/share/runtime/globals.hpp 2018-06-21 22:32:28.548115300 -0700 @@ -688,6 +688,9 @@ diagnostic(bool, UseGHASHIntrinsics, false, \ "Use intrinsics for GHASH versions of crypto") \ \ + product(bool, UseBASE64Intrinsics, false, \ + "Use intrinsics for java.util.Base64") \ + \ product(size_t, LargePageSizeInBytes, 0, \ "Large page size (0 to let VM choose the page size)") \ range(0, max_uintx) \ --- old/src/hotspot/share/runtime/stubRoutines.cpp 2018-06-21 22:32:32.060466500 -0700 +++ new/src/hotspot/share/runtime/stubRoutines.cpp 2018-06-21 22:32:31.489409400 -0700 @@ -128,6 +128,7 @@ address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL; address StubRoutines::_counterMode_AESCrypt = NULL; address StubRoutines::_ghash_processBlocks = NULL; +address StubRoutines::_base64_implEncode = NULL; address StubRoutines::_sha1_implCompress = NULL; address StubRoutines::_sha1_implCompressMB = NULL; --- old/src/hotspot/share/runtime/stubRoutines.hpp 2018-06-21 22:32:34.865747000 -0700 +++ new/src/hotspot/share/runtime/stubRoutines.hpp 2018-06-21 22:32:34.294689900 -0700 @@ -172,6 +172,7 @@ static address _cipherBlockChaining_decryptAESCrypt; static address _counterMode_AESCrypt; static address _ghash_processBlocks; + static address _base64_implEncode; static address _sha1_implCompress; static address _sha1_implCompressMB; @@ -347,7 +348,7 @@ static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; } static address counterMode_AESCrypt() { return _counterMode_AESCrypt; } static address ghash_processBlocks() { return _ghash_processBlocks; } - + static address base64_implEncode() { return _base64_implEncode; } static address sha1_implCompress() { return _sha1_implCompress; } static address sha1_implCompressMB() { return _sha1_implCompressMB; } static address sha256_implCompress() { return _sha256_implCompress; } --- old/src/hotspot/share/runtime/vmStructs.cpp 2018-06-21 22:32:37.683028700 -0700 +++ new/src/hotspot/share/runtime/vmStructs.cpp 2018-06-21 22:32:37.112971700 -0700 @@ -618,6 +618,7 @@ static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \ static_field(StubRoutines, _counterMode_AESCrypt, address) \ static_field(StubRoutines, _ghash_processBlocks, address) \ + static_field(StubRoutines, _base64_implEncode, address) \ static_field(StubRoutines, _updateBytesCRC32, address) \ static_field(StubRoutines, _crc_table_adr, address) \ static_field(StubRoutines, _crc32c_table_addr, address) \ --- old/src/java.base/share/classes/java/util/Base64.java 2018-06-21 22:32:40.624322800 -0700 +++ new/src/java.base/share/classes/java/util/Base64.java 2018-06-21 22:32:40.047265100 -0700 @@ -31,6 +31,7 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import jdk.internal.HotSpotIntrinsicCandidate; /** * This class consists exclusively of static methods for obtaining @@ -390,6 +391,26 @@ return new Encoder(isURL, newline, linemax, false); } + void generateImplEncode(byte[] src, int sp, int sl, byte[] dst, int dp) { + if (sp > sl) + throw new ArrayIndexOutOfBoundsException( + "Start offset " + sp + ", End offset " +sl); + implEncode(src, sp, sl, dst, dp, isURL); + } + + @HotSpotIntrinsicCandidate + private void implEncode(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL) { + char[] base64 = isURL ? toBase64URL : toBase64; + for (int sp0 = sp, dp0 = dp ; sp0 < sl; ) { + int bits = (src[sp0++] & 0xff) << 16 | + (src[sp0++] & 0xff) << 8 | + (src[sp0++] & 0xff); + dst[dp0++] = (byte)base64[(bits >>> 18) & 0x3f]; + dst[dp0++] = (byte)base64[(bits >>> 12) & 0x3f]; + dst[dp0++] = (byte)base64[(bits >>> 6) & 0x3f]; + dst[dp0++] = (byte)base64[bits & 0x3f]; + } + } private int encode0(byte[] src, int off, int end, byte[] dst) { char[] base64 = isURL ? toBase64URL : toBase64; int sp = off; @@ -400,15 +421,7 @@ int dp = 0; while (sp < sl) { int sl0 = Math.min(sp + slen, sl); - for (int sp0 = sp, dp0 = dp ; sp0 < sl0; ) { - int bits = (src[sp0++] & 0xff) << 16 | - (src[sp0++] & 0xff) << 8 | - (src[sp0++] & 0xff); - dst[dp0++] = (byte)base64[(bits >>> 18) & 0x3f]; - dst[dp0++] = (byte)base64[(bits >>> 12) & 0x3f]; - dst[dp0++] = (byte)base64[(bits >>> 6) & 0x3f]; - dst[dp0++] = (byte)base64[bits & 0x3f]; - } + generateImplEncode(src, sp, sl0, dst, dp); int dlen = (sl0 - sp) / 3 * 4; dp += dlen; sp = sl0; --- old/src/jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat/BinaryContainer.java 2018-06-21 22:32:43.494609800 -0700 +++ new/src/jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat/BinaryContainer.java 2018-06-21 22:32:42.919552300 -0700 @@ -213,6 +213,7 @@ {"StubRoutines::_counterMode_AESCrypt", "_aot_stub_routines_counterMode_AESCrypt" }, {"StubRoutines::_ghash_processBlocks", "_aot_stub_routines_ghash_processBlocks" }, + {"StubRoutines::_base64_implEncode", "_aot_stub_routines_base64_implEncode" }, {"StubRoutines::_crc32c_table_addr", "_aot_stub_routines_crc32c_table_addr" }, {"StubRoutines::_updateBytesCRC32C", "_aot_stub_routines_updateBytesCRC32C" }, {"StubRoutines::_updateBytesAdler32", "_aot_stub_routines_updateBytesAdler32" },