src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File 7088419 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page


   1 /*
   2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


2777 }
2778 
2779 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
2780 void MacroAssembler::movptr(Register dst, intptr_t src) {
2781   LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
2782 }
2783 
2784 void MacroAssembler::movptr(Address dst, Register src) {
2785   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2786 }
2787 
2788 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
2789   if (reachable(src)) {
2790     Assembler::movdqu(dst, as_Address(src));
2791   } else {
2792     lea(rscratch1, src);
2793     Assembler::movdqu(dst, Address(rscratch1, 0));
2794   }
2795 }
2796 









2797 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
2798   if (reachable(src)) {
2799     Assembler::movsd(dst, as_Address(src));
2800   } else {
2801     lea(rscratch1, src);
2802     Assembler::movsd(dst, Address(rscratch1, 0));
2803   }
2804 }
2805 
2806 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
2807   if (reachable(src)) {
2808     Assembler::movss(dst, as_Address(src));
2809   } else {
2810     lea(rscratch1, src);
2811     Assembler::movss(dst, Address(rscratch1, 0));
2812   }
2813 }
2814 
2815 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
2816   if (reachable(src)) {


6371     jccb(Assembler::lessEqual, L_copy_8_chars);
6372 
6373     bind(L_copy_8_chars_exit);
6374     subptr(len, 8);
6375     jccb(Assembler::zero, L_done);
6376   }
6377 
6378   bind(L_copy_1_char);
6379   load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0));
6380   testl(tmp5, 0xff00);      // check if Unicode char
6381   jccb(Assembler::notZero, L_copy_1_char_exit);
6382   movb(Address(dst, len, Address::times_1, 0), tmp5);
6383   addptr(len, 1);
6384   jccb(Assembler::less, L_copy_1_char);
6385 
6386   bind(L_copy_1_char_exit);
6387   addptr(result, len); // len is negative count of not processed elements
6388   bind(L_done);
6389 }
6390 



























































































































































































6391 #undef BIND
6392 #undef BLOCK_COMMENT
6393 
6394 
6395 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
6396   switch (cond) {
6397     // Note some conditions are synonyms for others
6398     case Assembler::zero:         return Assembler::notZero;
6399     case Assembler::notZero:      return Assembler::zero;
6400     case Assembler::less:         return Assembler::greaterEqual;
6401     case Assembler::lessEqual:    return Assembler::greater;
6402     case Assembler::greater:      return Assembler::lessEqual;
6403     case Assembler::greaterEqual: return Assembler::less;
6404     case Assembler::below:        return Assembler::aboveEqual;
6405     case Assembler::belowEqual:   return Assembler::above;
6406     case Assembler::above:        return Assembler::belowEqual;
6407     case Assembler::aboveEqual:   return Assembler::below;
6408     case Assembler::overflow:     return Assembler::noOverflow;
6409     case Assembler::noOverflow:   return Assembler::overflow;
6410     case Assembler::negative:     return Assembler::positive;
   1 /*
   2  * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


2777 }
2778 
2779 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
2780 void MacroAssembler::movptr(Register dst, intptr_t src) {
2781   LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
2782 }
2783 
2784 void MacroAssembler::movptr(Address dst, Register src) {
2785   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2786 }
2787 
2788 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
2789   if (reachable(src)) {
2790     Assembler::movdqu(dst, as_Address(src));
2791   } else {
2792     lea(rscratch1, src);
2793     Assembler::movdqu(dst, Address(rscratch1, 0));
2794   }
2795 }
2796 
2797 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
2798   if (reachable(src)) {
2799     Assembler::movdqa(dst, as_Address(src));
2800   } else {
2801     lea(rscratch1, src);
2802     Assembler::movdqa(dst, Address(rscratch1, 0));
2803   }
2804 }
2805 
2806 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
2807   if (reachable(src)) {
2808     Assembler::movsd(dst, as_Address(src));
2809   } else {
2810     lea(rscratch1, src);
2811     Assembler::movsd(dst, Address(rscratch1, 0));
2812   }
2813 }
2814 
2815 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
2816   if (reachable(src)) {
2817     Assembler::movss(dst, as_Address(src));
2818   } else {
2819     lea(rscratch1, src);
2820     Assembler::movss(dst, Address(rscratch1, 0));
2821   }
2822 }
2823 
2824 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
2825   if (reachable(src)) {


6380     jccb(Assembler::lessEqual, L_copy_8_chars);
6381 
6382     bind(L_copy_8_chars_exit);
6383     subptr(len, 8);
6384     jccb(Assembler::zero, L_done);
6385   }
6386 
6387   bind(L_copy_1_char);
6388   load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0));
6389   testl(tmp5, 0xff00);      // check if Unicode char
6390   jccb(Assembler::notZero, L_copy_1_char_exit);
6391   movb(Address(dst, len, Address::times_1, 0), tmp5);
6392   addptr(len, 1);
6393   jccb(Assembler::less, L_copy_1_char);
6394 
6395   bind(L_copy_1_char_exit);
6396   addptr(result, len); // len is negative count of not processed elements
6397   bind(L_done);
6398 }
6399 
6400 /**
6401  * Emits code to update CRC-32 with a byte value according to constants in table
6402  *
6403  * @param [in,out]crc   Register containing the crc.
6404  * @param [in]val       Register containing the byte to fold into the CRC.
6405  * @param [in]table     Register containing the table of crc constants.
6406  *
6407  * uint32_t crc;
6408  * val = crc_table[(val ^ crc) & 0xFF];
6409  * crc = val ^ (crc >> 8);
6410  *
6411  */
6412 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
6413   xorl(val, crc);
6414   andl(val, 0xFF);
6415   shrl(crc, 8); // unsigned shift
6416   xorl(crc, Address(table, val, Address::times_4, 0));
6417 }
6418 
6419 /**
6420  * Fold 128-bit data chunk
6421  */
6422 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
6423   vpclmulhdq(xtmp, xK, xcrc); // [123:64]
6424   vpclmulldq(xcrc, xK, xcrc); // [63:0]
6425   vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
6426   pxor(xcrc, xtmp);
6427 }
6428 
6429 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
6430   vpclmulhdq(xtmp, xK, xcrc);
6431   vpclmulldq(xcrc, xK, xcrc);
6432   pxor(xcrc, xbuf);
6433   pxor(xcrc, xtmp);
6434 }
6435 
6436 /**
6437  * 8-bit folds to compute 32-bit CRC
6438  *
6439  * uint64_t xcrc;
6440  * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8);
6441  */
6442 void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) {
6443   movdl(tmp, xcrc);
6444   andl(tmp, 0xFF);
6445   movdl(xtmp, Address(table, tmp, Address::times_4, 0));
6446   psrldq(xcrc, 1); // unsigned shift one byte
6447   pxor(xcrc, xtmp);
6448 }
6449 
6450 /**
6451  * uint32_t crc;
6452  * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
6453  */
6454 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
6455   movl(tmp, crc);
6456   andl(tmp, 0xFF);
6457   shrl(crc, 8);
6458   xorl(crc, Address(table, tmp, Address::times_4, 0));
6459 }
6460 
6461 /**
6462  * @param crc   register containing existing CRC (32-bit)
6463  * @param buf   register pointing to input byte buffer (byte*)
6464  * @param len   register containing number of bytes
6465  * @param table register that will contain address of CRC table
6466  * @param tmp   scratch register
6467  */
6468 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) {
6469   assert_different_registers(crc, buf, len, table, tmp, rax);
6470 
6471   Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned;
6472   Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop;
6473 
6474   lea(table, ExternalAddress(StubRoutines::crc_table_addr()));
6475   notl(crc); // ~crc
6476   cmpl(len, 16);
6477   jcc(Assembler::less, L_tail);
6478 
6479   // Align buffer to 16 bytes
6480   movl(tmp, buf);
6481   andl(tmp, 0xF);
6482   jccb(Assembler::zero, L_aligned);
6483   subl(tmp,  16);
6484   addl(len, tmp);
6485 
6486   align(4);
6487   BIND(L_align_loop);
6488   movsbl(rax, Address(buf, 0)); // load byte with sign extension
6489   update_byte_crc32(crc, rax, table);
6490   increment(buf);
6491   incrementl(tmp);
6492   jccb(Assembler::less, L_align_loop);
6493 
6494   BIND(L_aligned);
6495   movl(tmp, len); // save
6496   shrl(len, 4);
6497   jcc(Assembler::zero, L_tail_restore);
6498 
6499   // Fold crc into first bytes of vector
6500   movdqa(xmm1, Address(buf, 0));
6501   movdl(rax, xmm1);
6502   xorl(crc, rax);
6503   pinsrd(xmm1, crc, 0);
6504   addptr(buf, 16);
6505   subl(len, 4); // len > 0
6506   jcc(Assembler::less, L_fold_tail);
6507 
6508   movdqa(xmm2, Address(buf,  0));
6509   movdqa(xmm3, Address(buf, 16));
6510   movdqa(xmm4, Address(buf, 32));
6511   addptr(buf, 48);
6512   subl(len, 3);
6513   jcc(Assembler::lessEqual, L_fold_512b);
6514 
6515   // Fold total 512 bits of polynomial on each iteration,
6516   // 128 bits per each of 4 parallel streams.
6517   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32));
6518 
6519   align(32);
6520   BIND(L_fold_512b_loop);
6521   fold_128bit_crc32(xmm1, xmm0, xmm5, buf,  0);
6522   fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16);
6523   fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32);
6524   fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48);
6525   addptr(buf, 64);
6526   subl(len, 4);
6527   jcc(Assembler::greater, L_fold_512b_loop);
6528 
6529   // Fold 512 bits to 128 bits.
6530   BIND(L_fold_512b);
6531   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
6532   fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2);
6533   fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3);
6534   fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4);
6535 
6536   // Fold the rest of 128 bits data chunks
6537   BIND(L_fold_tail);
6538   addl(len, 3);
6539   jccb(Assembler::lessEqual, L_fold_128b);
6540   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
6541 
6542   BIND(L_fold_tail_loop);
6543   fold_128bit_crc32(xmm1, xmm0, xmm5, buf,  0);
6544   addptr(buf, 16);
6545   decrementl(len);
6546   jccb(Assembler::greater, L_fold_tail_loop);
6547 
6548   // Fold 128 bits in xmm1 down into 32 bits in crc register.
6549   BIND(L_fold_128b);
6550   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
6551   vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
6552   vpand(xmm3, xmm0, xmm2, false /* vector256 */);
6553   vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
6554   psrldq(xmm1, 8);
6555   psrldq(xmm2, 4);
6556   pxor(xmm0, xmm1);
6557   pxor(xmm0, xmm2);
6558 
6559   // 8 8-bit folds to compute 32-bit CRC.
6560   for (int j = 0; j < 4; j++) {
6561     fold_8bit_crc32(xmm0, table, xmm1, rax);
6562   }
6563   movdl(crc, xmm0); // mov 32 bits to general register
6564   for (int j = 0; j < 4; j++) {
6565     fold_8bit_crc32(crc, table, rax);
6566   }
6567 
6568   BIND(L_tail_restore);
6569   movl(len, tmp); // restore
6570   BIND(L_tail);
6571   andl(len, 0xf);
6572   jccb(Assembler::zero, L_exit);
6573 
6574   // Fold the rest of bytes
6575   align(4);
6576   BIND(L_tail_loop);
6577   movsbl(rax, Address(buf, 0)); // load byte with sign extension
6578   update_byte_crc32(crc, rax, table);
6579   increment(buf);
6580   decrementl(len);
6581   jccb(Assembler::greater, L_tail_loop);
6582 
6583   BIND(L_exit);
6584   notl(crc); // ~c
6585 }
6586 
6587 #undef BIND
6588 #undef BLOCK_COMMENT
6589 
6590 
6591 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
6592   switch (cond) {
6593     // Note some conditions are synonyms for others
6594     case Assembler::zero:         return Assembler::notZero;
6595     case Assembler::notZero:      return Assembler::zero;
6596     case Assembler::less:         return Assembler::greaterEqual;
6597     case Assembler::lessEqual:    return Assembler::greater;
6598     case Assembler::greater:      return Assembler::lessEqual;
6599     case Assembler::greaterEqual: return Assembler::less;
6600     case Assembler::below:        return Assembler::aboveEqual;
6601     case Assembler::belowEqual:   return Assembler::above;
6602     case Assembler::above:        return Assembler::belowEqual;
6603     case Assembler::aboveEqual:   return Assembler::below;
6604     case Assembler::overflow:     return Assembler::noOverflow;
6605     case Assembler::noOverflow:   return Assembler::overflow;
6606     case Assembler::negative:     return Assembler::positive;
src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File