995 __ movb(rax, Address(from, 0));
996 __ movb(Address(from, to_from, Address::times_1, 0), rax);
997 __ BIND(L_exit);
998 } else {
999 __ BIND(L_copy_byte);
1000 }
1001 } else {
1002 __ BIND(L_copy_2_bytes);
1003 }
1004
1005 if (t == T_OBJECT) {
1006 __ movl(count, Address(rsp, 12+12)); // reread 'count'
1007 __ mov(to, saved_to); // restore 'to'
1008 gen_write_ref_array_post_barrier(to, count);
1009 __ BIND(L_0_count);
1010 }
1011 inc_copy_counter_np(t);
1012 __ pop(rdi);
1013 __ pop(rsi);
1014 __ leave(); // required for proper stackwalking of RuntimeStub frame
1015 __ xorptr(rax, rax); // return 0
1016 __ ret(0);
1017 return start;
1018 }
1019
1020
1021 address generate_fill(BasicType t, bool aligned, const char *name) {
1022 __ align(CodeEntryAlignment);
1023 StubCodeMark mark(this, "StubRoutines", name);
1024 address start = __ pc();
1025
1026 BLOCK_COMMENT("Entry:");
1027
1028 const Register to = rdi; // source array address
1029 const Register value = rdx; // value
1030 const Register count = rsi; // elements count
1031
1032 __ enter(); // required for proper stackwalking of RuntimeStub frame
1033 __ push(rsi);
1034 __ push(rdi);
1230 __ subptr(to, from); // to --> to_from
1231 if (VM_Version::supports_mmx()) {
1232 if (UseXMMForArrayCopy) {
1233 xmm_copy_forward(from, to_from, count);
1234 } else {
1235 mmx_copy_forward(from, to_from, count);
1236 }
1237 } else {
1238 __ jmpb(L_copy_8_bytes);
1239 __ align(OptoLoopAlignment);
1240 __ BIND(L_copy_8_bytes_loop);
1241 __ fild_d(Address(from, 0));
1242 __ fistp_d(Address(from, to_from, Address::times_1));
1243 __ addptr(from, 8);
1244 __ BIND(L_copy_8_bytes);
1245 __ decrement(count);
1246 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1247 }
1248 inc_copy_counter_np(T_LONG);
1249 __ leave(); // required for proper stackwalking of RuntimeStub frame
1250 __ xorptr(rax, rax); // return 0
1251 __ ret(0);
1252 return start;
1253 }
1254
1255 address generate_conjoint_long_copy(address nooverlap_target,
1256 address* entry, const char *name) {
1257 __ align(CodeEntryAlignment);
1258 StubCodeMark mark(this, "StubRoutines", name);
1259 address start = __ pc();
1260
1261 Label L_copy_8_bytes, L_copy_8_bytes_loop;
1262 const Register from = rax; // source array address
1263 const Register to = rdx; // destination array address
1264 const Register count = rcx; // elements count
1265 const Register end_from = rax; // source array end address
1266
1267 __ enter(); // required for proper stackwalking of RuntimeStub frame
1268 __ movptr(from , Address(rsp, 8+0)); // from
1269 __ movptr(to , Address(rsp, 8+4)); // to
3348 __ enter(); // required for proper stackwalking of RuntimeStub frame
3349 __ push(rsi);
3350 __ push(rdi);
3351 __ push(rbx);
3352
3353 Address crc_arg(rbp, 8 + 0);
3354 Address buf_arg(rbp, 8 + 4);
3355 Address len_arg(rbp, 8 + 8);
3356
3357 // Load up:
3358 __ movl(crc, crc_arg);
3359 __ movptr(buf, buf_arg);
3360 __ movl(len, len_arg);
3361
3362 __ kernel_crc32(crc, buf, len, table, tmp);
3363
3364 __ movl(rax, crc);
3365 __ pop(rbx);
3366 __ pop(rdi);
3367 __ pop(rsi);
3368 __ leave(); // required for proper stackwalking of RuntimeStub frame
3369 __ ret(0);
3370
3371 return start;
3372 }
3373
3374 /**
3375 * Arguments:
3376 *
3377 * Inputs:
3378 * rsp(4) - int crc
3379 * rsp(8) - byte* buf
3380 * rsp(12) - int length
3381 * rsp(16) - table_start - optional (present only when doing a library_calll,
3382 * not used by x86 algorithm)
3383 *
3384 * Ouput:
3385 * rax - int crc result
3386 */
3387 address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
3405 Address crc_arg(rsp, 4 + 4 + 0); // ESP+4 +
3406 // we need to add additional 4 because __ enter
3407 // have just pushed ebp on a stack
3408 Address buf_arg(rsp, 4 + 4 + 4);
3409 Address len_arg(rsp, 4 + 4 + 8);
3410 // Load up:
3411 __ movl(crc, crc_arg);
3412 __ movl(buf, buf_arg);
3413 __ movl(len, len_arg);
3414 __ push(d);
3415 __ push(g);
3416 __ push(h);
3417 __ crc32c_ipl_alg2_alt2(crc, buf, len,
3418 d, g, h,
3419 empty, empty, empty,
3420 xmm0, xmm1, xmm2,
3421 is_pclmulqdq_supported);
3422 __ pop(h);
3423 __ pop(g);
3424 __ pop(d);
3425 __ leave(); // required for proper stackwalking of RuntimeStub frame
3426 __ ret(0);
3427
3428 return start;
3429 }
3430
3431 address generate_libmExp() {
3432 address start = __ pc();
3433
3434 const XMMRegister x0 = xmm0;
3435 const XMMRegister x1 = xmm1;
3436 const XMMRegister x2 = xmm2;
3437 const XMMRegister x3 = xmm3;
3438
3439 const XMMRegister x4 = xmm4;
3440 const XMMRegister x5 = xmm5;
3441 const XMMRegister x6 = xmm6;
3442 const XMMRegister x7 = xmm7;
3443
3444 const Register tmp = rbx;
|
995 __ movb(rax, Address(from, 0));
996 __ movb(Address(from, to_from, Address::times_1, 0), rax);
997 __ BIND(L_exit);
998 } else {
999 __ BIND(L_copy_byte);
1000 }
1001 } else {
1002 __ BIND(L_copy_2_bytes);
1003 }
1004
1005 if (t == T_OBJECT) {
1006 __ movl(count, Address(rsp, 12+12)); // reread 'count'
1007 __ mov(to, saved_to); // restore 'to'
1008 gen_write_ref_array_post_barrier(to, count);
1009 __ BIND(L_0_count);
1010 }
1011 inc_copy_counter_np(t);
1012 __ pop(rdi);
1013 __ pop(rsi);
1014 __ leave(); // required for proper stackwalking of RuntimeStub frame
1015 __ vzeroupper();
1016 __ xorptr(rax, rax); // return 0
1017 __ ret(0);
1018 return start;
1019 }
1020
1021
1022 address generate_fill(BasicType t, bool aligned, const char *name) {
1023 __ align(CodeEntryAlignment);
1024 StubCodeMark mark(this, "StubRoutines", name);
1025 address start = __ pc();
1026
1027 BLOCK_COMMENT("Entry:");
1028
1029 const Register to = rdi; // source array address
1030 const Register value = rdx; // value
1031 const Register count = rsi; // elements count
1032
1033 __ enter(); // required for proper stackwalking of RuntimeStub frame
1034 __ push(rsi);
1035 __ push(rdi);
1231 __ subptr(to, from); // to --> to_from
1232 if (VM_Version::supports_mmx()) {
1233 if (UseXMMForArrayCopy) {
1234 xmm_copy_forward(from, to_from, count);
1235 } else {
1236 mmx_copy_forward(from, to_from, count);
1237 }
1238 } else {
1239 __ jmpb(L_copy_8_bytes);
1240 __ align(OptoLoopAlignment);
1241 __ BIND(L_copy_8_bytes_loop);
1242 __ fild_d(Address(from, 0));
1243 __ fistp_d(Address(from, to_from, Address::times_1));
1244 __ addptr(from, 8);
1245 __ BIND(L_copy_8_bytes);
1246 __ decrement(count);
1247 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
1248 }
1249 inc_copy_counter_np(T_LONG);
1250 __ leave(); // required for proper stackwalking of RuntimeStub frame
1251 __ vzeroupper();
1252 __ xorptr(rax, rax); // return 0
1253 __ ret(0);
1254 return start;
1255 }
1256
1257 address generate_conjoint_long_copy(address nooverlap_target,
1258 address* entry, const char *name) {
1259 __ align(CodeEntryAlignment);
1260 StubCodeMark mark(this, "StubRoutines", name);
1261 address start = __ pc();
1262
1263 Label L_copy_8_bytes, L_copy_8_bytes_loop;
1264 const Register from = rax; // source array address
1265 const Register to = rdx; // destination array address
1266 const Register count = rcx; // elements count
1267 const Register end_from = rax; // source array end address
1268
1269 __ enter(); // required for proper stackwalking of RuntimeStub frame
1270 __ movptr(from , Address(rsp, 8+0)); // from
1271 __ movptr(to , Address(rsp, 8+4)); // to
3350 __ enter(); // required for proper stackwalking of RuntimeStub frame
3351 __ push(rsi);
3352 __ push(rdi);
3353 __ push(rbx);
3354
3355 Address crc_arg(rbp, 8 + 0);
3356 Address buf_arg(rbp, 8 + 4);
3357 Address len_arg(rbp, 8 + 8);
3358
3359 // Load up:
3360 __ movl(crc, crc_arg);
3361 __ movptr(buf, buf_arg);
3362 __ movl(len, len_arg);
3363
3364 __ kernel_crc32(crc, buf, len, table, tmp);
3365
3366 __ movl(rax, crc);
3367 __ pop(rbx);
3368 __ pop(rdi);
3369 __ pop(rsi);
3370 __ vzeroupper();
3371 __ leave(); // required for proper stackwalking of RuntimeStub frame
3372 __ ret(0);
3373
3374 return start;
3375 }
3376
3377 /**
3378 * Arguments:
3379 *
3380 * Inputs:
3381 * rsp(4) - int crc
3382 * rsp(8) - byte* buf
3383 * rsp(12) - int length
3384 * rsp(16) - table_start - optional (present only when doing a library_calll,
3385 * not used by x86 algorithm)
3386 *
3387 * Ouput:
3388 * rax - int crc result
3389 */
3390 address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
3408 Address crc_arg(rsp, 4 + 4 + 0); // ESP+4 +
3409 // we need to add additional 4 because __ enter
3410 // have just pushed ebp on a stack
3411 Address buf_arg(rsp, 4 + 4 + 4);
3412 Address len_arg(rsp, 4 + 4 + 8);
3413 // Load up:
3414 __ movl(crc, crc_arg);
3415 __ movl(buf, buf_arg);
3416 __ movl(len, len_arg);
3417 __ push(d);
3418 __ push(g);
3419 __ push(h);
3420 __ crc32c_ipl_alg2_alt2(crc, buf, len,
3421 d, g, h,
3422 empty, empty, empty,
3423 xmm0, xmm1, xmm2,
3424 is_pclmulqdq_supported);
3425 __ pop(h);
3426 __ pop(g);
3427 __ pop(d);
3428 __ vzeroupper();
3429 __ leave(); // required for proper stackwalking of RuntimeStub frame
3430 __ ret(0);
3431
3432 return start;
3433 }
3434
3435 address generate_libmExp() {
3436 address start = __ pc();
3437
3438 const XMMRegister x0 = xmm0;
3439 const XMMRegister x1 = xmm1;
3440 const XMMRegister x2 = xmm2;
3441 const XMMRegister x3 = xmm3;
3442
3443 const XMMRegister x4 = xmm4;
3444 const XMMRegister x5 = xmm5;
3445 const XMMRegister x6 = xmm6;
3446 const XMMRegister x7 = xmm7;
3447
3448 const Register tmp = rbx;
|