src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8005544 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page




5978     movl(Address(to, 4), value);
5979     addptr(to, 8);
5980     BIND(L_fill_8_bytes);
5981     subl(count, 1 << (shift + 1));
5982     jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
5983     // fall through to fill 4 bytes
5984   } else {
5985     Label L_fill_32_bytes;
5986     if (!UseUnalignedLoadStores) {
5987       // align to 8 bytes, we know we are 4 byte aligned to start
5988       testptr(to, 4);
5989       jccb(Assembler::zero, L_fill_32_bytes);
5990       movl(Address(to, 0), value);
5991       addptr(to, 4);
5992       subl(count, 1<<shift);
5993     }
5994     BIND(L_fill_32_bytes);
5995     {
5996       assert( UseSSE >= 2, "supported cpu only" );
5997       Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
5998       // Fill 32-byte chunks
5999       movdl(xtmp, value);
























6000       pshufd(xtmp, xtmp, 0);
6001 
6002       subl(count, 8 << shift);
6003       jcc(Assembler::less, L_check_fill_8_bytes);
6004       align(16);
6005 
6006       BIND(L_fill_32_bytes_loop);
6007 
6008       if (UseUnalignedLoadStores) {
6009         movdqu(Address(to, 0), xtmp);
6010         movdqu(Address(to, 16), xtmp);
6011       } else {
6012         movq(Address(to, 0), xtmp);
6013         movq(Address(to, 8), xtmp);
6014         movq(Address(to, 16), xtmp);
6015         movq(Address(to, 24), xtmp);
6016       }
6017 
6018       addptr(to, 32);
6019       subl(count, 8 << shift);
6020       jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);

6021       BIND(L_check_fill_8_bytes);
6022       addl(count, 8 << shift);
6023       jccb(Assembler::zero, L_exit);
6024       jmpb(L_fill_8_bytes);
6025 
6026       //
6027       // length is too short, just fill qwords
6028       //
6029       BIND(L_fill_8_bytes_loop);
6030       movq(Address(to, 0), xtmp);
6031       addptr(to, 8);
6032       BIND(L_fill_8_bytes);
6033       subl(count, 1 << (shift + 1));
6034       jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
6035     }
6036   }
6037   // fill trailing 4 bytes
6038   BIND(L_fill_4_bytes);
6039   testl(count, 1<<shift);
6040   jccb(Assembler::zero, L_fill_2_bytes);




5978     movl(Address(to, 4), value);
5979     addptr(to, 8);
5980     BIND(L_fill_8_bytes);
5981     subl(count, 1 << (shift + 1));
5982     jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
5983     // fall through to fill 4 bytes
5984   } else {
5985     Label L_fill_32_bytes;
5986     if (!UseUnalignedLoadStores) {
5987       // align to 8 bytes, we know we are 4 byte aligned to start
5988       testptr(to, 4);
5989       jccb(Assembler::zero, L_fill_32_bytes);
5990       movl(Address(to, 0), value);
5991       addptr(to, 4);
5992       subl(count, 1<<shift);
5993     }
5994     BIND(L_fill_32_bytes);
5995     {
5996       assert( UseSSE >= 2, "supported cpu only" );
5997       Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;

5998       movdl(xtmp, value);
5999       if (UseAVX >= 2 && UseUnalignedLoadStores) {
6000         // Fill 64-byte chunks
6001         Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
6002         vpbroadcastd(xtmp, xtmp);
6003 
6004         subl(count, 16 << shift);
6005         jcc(Assembler::less, L_check_fill_32_bytes);
6006         align(16);
6007 
6008         BIND(L_fill_64_bytes_loop);
6009         vmovdqu(Address(to, 0), xtmp);
6010         vmovdqu(Address(to, 32), xtmp);
6011         addptr(to, 64);
6012         subl(count, 16 << shift);
6013         jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
6014 
6015         BIND(L_check_fill_32_bytes);
6016         addl(count, 8 << shift);
6017         jccb(Assembler::less, L_check_fill_8_bytes);
6018         vmovdqu(Address(to, 0), xtmp);
6019         addptr(to, 32);
6020         subl(count, 8 << shift);
6021       } else {
6022         // Fill 32-byte chunks
6023         pshufd(xtmp, xtmp, 0);
6024 
6025         subl(count, 8 << shift);
6026         jcc(Assembler::less, L_check_fill_8_bytes);
6027         align(16);
6028 
6029         BIND(L_fill_32_bytes_loop);
6030 
6031         if (UseUnalignedLoadStores) {
6032           movdqu(Address(to, 0), xtmp);
6033           movdqu(Address(to, 16), xtmp);
6034         } else {
6035           movq(Address(to, 0), xtmp);
6036           movq(Address(to, 8), xtmp);
6037           movq(Address(to, 16), xtmp);
6038           movq(Address(to, 24), xtmp);
6039         }
6040 
6041         addptr(to, 32);
6042         subl(count, 8 << shift);
6043         jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
6044       }
6045       BIND(L_check_fill_8_bytes);
6046       addl(count, 8 << shift);
6047       jccb(Assembler::zero, L_exit);
6048       jmpb(L_fill_8_bytes);
6049 
6050       //
6051       // length is too short, just fill qwords
6052       //
6053       BIND(L_fill_8_bytes_loop);
6054       movq(Address(to, 0), xtmp);
6055       addptr(to, 8);
6056       BIND(L_fill_8_bytes);
6057       subl(count, 1 << (shift + 1));
6058       jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
6059     }
6060   }
6061   // fill trailing 4 bytes
6062   BIND(L_fill_4_bytes);
6063   testl(count, 1<<shift);
6064   jccb(Assembler::zero, L_fill_2_bytes);


src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File