< prev index next >

src/cpu/x86/vm/stubGenerator_x86_32.cpp

Print this page

        

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.

@@ -2716,10 +2716,188 @@
     __ jmp(L_exit);
 
     return start;
   }
 
+  // byte swap x86 long
+  address generate_ghash_long_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
+    address start = __ pc();
+    __ emit_data(0x0b0a0908, relocInfo::none, 0);
+    __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
+    __ emit_data(0x03020100, relocInfo::none, 0);
+    __ emit_data(0x07060504, relocInfo::none, 0);
+
+  return start;
+  }
+
+  // byte swap x86 byte array 
+  address generate_ghash_byte_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
+    address start = __ pc();
+    __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
+    __ emit_data(0x08090a0b, relocInfo::none, 0);
+    __ emit_data(0x04050607, relocInfo::none, 0);
+    __ emit_data(0x00010203, relocInfo::none, 0);
+  return start;
+  }
+
+  /* Single and multi-block ghash operations */
+  address generate_ghash_processBlocks() {
+    assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
+    __ align(CodeEntryAlignment);
+    Label L_ghash_loop, L_exit;
+    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+    address start = __ pc();
+
+    const Register state        = rdi;
+    const Register subkeyH      = rsi;
+    const Register data         = rdx;
+    const Register blocks       = rcx;
+
+    const Address  state_param(rbp, 8+0);
+    const Address  subkeyH_param(rbp, 8+4);
+    const Address  data_param(rbp, 8+8);
+    const Address  blocks_param(rbp, 8+12);
+
+#ifdef _WIN64
+    const int XMM_REG_LAST  = 7;
+#endif
+
+    const XMMRegister xmm_temp0 = xmm0;
+    const XMMRegister xmm_temp1 = xmm1;
+    const XMMRegister xmm_temp2 = xmm2;
+    const XMMRegister xmm_temp3 = xmm3;
+    const XMMRegister xmm_temp4 = xmm4;
+    const XMMRegister xmm_temp5 = xmm5;
+    const XMMRegister xmm_temp6 = xmm6;
+    const XMMRegister xmm_temp7 = xmm7;
+
+    __ enter();
+
+#ifdef _WIN64
+    // save the xmm registers which must be preserved 6-10
+    __ subptr(rsp, -rsp_after_call_off * wordSize);
+    for (int i = 6; i <= XMM_REG_LAST; i++) {
+      __ movdqu(xmm_save(i), as_XMMRegister(i));
+    }
+#endif
+    __ movptr(state, state_param);
+    __ movptr(subkeyH, subkeyH_param);
+    __ movptr(data, data_param);
+    __ movptr(blocks, blocks_param);
+
+    __ movdqu(xmm_temp0, Address(state, 0));
+    __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+  
+    __ movdqu(xmm_temp1, Address(subkeyH, 0));
+    __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+    __ BIND(L_ghash_loop);
+    __ movdqu(xmm_temp2, Address(data, 0));
+    __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
+
+    __ pxor(xmm_temp0, xmm_temp2); 
+
+    //
+    // Multiply with the hash key
+    //
+    __ movdqu(xmm_temp3, xmm_temp0);
+    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
+    __ movdqu(xmm_temp4, xmm_temp0);
+    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
+
+    __ movdqu(xmm_temp5, xmm_temp0);
+    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
+    __ movdqu(xmm_temp6, xmm_temp0);
+    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
+
+    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
+
+    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
+    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
+    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
+    __ pxor(xmm_temp3, xmm_temp5);
+    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
+                                // of the carry-less multiplication of
+                                // xmm0 by xmm1.
+
+    // We shift the result of the multiplication by one bit position
+    // to the left to cope for the fact that the bits are reversed.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp6);
+    __ pslld (xmm_temp3, 1);
+    __ pslld(xmm_temp6, 1);
+    __ psrld(xmm_temp7, 31);
+    __ psrld(xmm_temp4, 31);
+    __ movdqu(xmm_temp5, xmm_temp7);
+    __ pslldq(xmm_temp4, 4);
+    __ pslldq(xmm_temp7, 4);
+    __ psrldq(xmm_temp5, 12);
+    __ por(xmm_temp3, xmm_temp7);
+    __ por(xmm_temp6, xmm_temp4);
+    __ por(xmm_temp6, xmm_temp5);
+
+    //
+    // First phase of the reduction
+    //
+    // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
+    // independently.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
+    __ pslld(xmm_temp4, 30);    // packed right shift shifting << 30
+    __ pslld(xmm_temp5, 25);    // packed right shift shifting << 25
+    __ pxor(xmm_temp7, xmm_temp4);      // xor the shifted versions
+    __ pxor(xmm_temp7, xmm_temp5);
+    __ movdqu(xmm_temp4, xmm_temp7);
+    __ pslldq(xmm_temp7, 12);
+    __ psrldq(xmm_temp4, 4);
+    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
+
+    //
+    // Second phase of the reduction
+    //
+    // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
+    // shift operations.
+    __ movdqu(xmm_temp2, xmm_temp3);
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
+    __ psrld(xmm_temp7, 2);     // packed left shifting >> 2
+    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
+    __ pxor(xmm_temp2, xmm_temp7);      // xor the shifted versions
+    __ pxor(xmm_temp2, xmm_temp5);
+    __ pxor(xmm_temp2, xmm_temp4);
+    __ pxor(xmm_temp3, xmm_temp2);
+    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
+
+    __ decrement(blocks);
+    __ jcc(Assembler::zero, L_exit);
+    __ movdqu(xmm_temp0, xmm_temp6);
+    __ addptr(data, 16);
+    __ jmp(L_ghash_loop);
+
+    __ BIND(L_exit);
+       // Byte swap 16-byte result
+    __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
+
+#ifdef _WIN64
+    // restore xmm regs belonging to calling function
+    for (int i = 6; i <= XMM_REG_LAST; i++) {
+      __ movdqu(as_XMMRegister(i), xmm_save(i));
+    }
+#endif
+    __ leave();
+    __ ret(0);
+    return start;
+  }
+
   /**
    *  Arguments:
    *
    * Inputs:
    *   rsp(4)   - int crc

@@ -3015,10 +3193,17 @@
       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
     }
 
+    // Generate GHASH intrinsics code
+    if (UseGHASHIntrinsics) {
+      StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
+      StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
+      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
                                                    &StubRoutines::_safefetch32_fault_pc,
                                                    &StubRoutines::_safefetch32_continuation_pc);
     StubRoutines::_safefetchN_entry           = StubRoutines::_safefetch32_entry;
< prev index next >