Print this page
rev 1838 : 6961690: load oops from constant table on SPARC
Summary: oops should be loaded from the constant table of an nmethod instead of materializing them with a long code sequence.
Reviewed-by:
Split |
Close |
Expand all |
Collapse all |
--- old/src/cpu/x86/vm/assembler_x86.cpp
+++ new/src/cpu/x86/vm/assembler_x86.cpp
1 1 /*
2 2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "incls/_precompiled.incl"
26 26 #include "incls/_assembler_x86.cpp.incl"
27 27
28 28 // Implementation of AddressLiteral
29 29
30 30 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
31 31 _is_lval = false;
32 32 _target = target;
33 33 switch (rtype) {
34 34 case relocInfo::oop_type:
35 35 // Oops are a special case. Normally they would be their own section
36 36 // but in cases like icBuffer they are literals in the code stream that
37 37 // we don't have a section for. We use none so that we get a literal address
38 38 // which is always patchable.
39 39 break;
40 40 case relocInfo::external_word_type:
41 41 _rspec = external_word_Relocation::spec(target);
42 42 break;
43 43 case relocInfo::internal_word_type:
44 44 _rspec = internal_word_Relocation::spec(target);
45 45 break;
46 46 case relocInfo::opt_virtual_call_type:
47 47 _rspec = opt_virtual_call_Relocation::spec();
48 48 break;
49 49 case relocInfo::static_call_type:
50 50 _rspec = static_call_Relocation::spec();
51 51 break;
52 52 case relocInfo::runtime_call_type:
53 53 _rspec = runtime_call_Relocation::spec();
54 54 break;
55 55 case relocInfo::poll_type:
56 56 case relocInfo::poll_return_type:
57 57 _rspec = Relocation::spec_simple(rtype);
58 58 break;
59 59 case relocInfo::none:
60 60 break;
61 61 default:
62 62 ShouldNotReachHere();
63 63 break;
64 64 }
65 65 }
66 66
67 67 // Implementation of Address
68 68
69 69 #ifdef _LP64
70 70
71 71 Address Address::make_array(ArrayAddress adr) {
72 72 // Not implementable on 64bit machines
73 73 // Should have been handled higher up the call chain.
74 74 ShouldNotReachHere();
75 75 return Address();
76 76 }
77 77
78 78 // exceedingly dangerous constructor
79 79 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
80 80 _base = noreg;
81 81 _index = noreg;
82 82 _scale = no_scale;
83 83 _disp = disp;
84 84 switch (rtype) {
85 85 case relocInfo::external_word_type:
86 86 _rspec = external_word_Relocation::spec(loc);
87 87 break;
88 88 case relocInfo::internal_word_type:
89 89 _rspec = internal_word_Relocation::spec(loc);
90 90 break;
91 91 case relocInfo::runtime_call_type:
92 92 // HMM
93 93 _rspec = runtime_call_Relocation::spec();
94 94 break;
95 95 case relocInfo::poll_type:
96 96 case relocInfo::poll_return_type:
97 97 _rspec = Relocation::spec_simple(rtype);
98 98 break;
99 99 case relocInfo::none:
100 100 break;
101 101 default:
102 102 ShouldNotReachHere();
103 103 }
104 104 }
105 105 #else // LP64
106 106
107 107 Address Address::make_array(ArrayAddress adr) {
108 108 AddressLiteral base = adr.base();
109 109 Address index = adr.index();
110 110 assert(index._disp == 0, "must not have disp"); // maybe it can?
111 111 Address array(index._base, index._index, index._scale, (intptr_t) base.target());
112 112 array._rspec = base._rspec;
113 113 return array;
114 114 }
115 115
116 116 // exceedingly dangerous constructor
117 117 Address::Address(address loc, RelocationHolder spec) {
118 118 _base = noreg;
119 119 _index = noreg;
120 120 _scale = no_scale;
121 121 _disp = (intptr_t) loc;
122 122 _rspec = spec;
123 123 }
124 124
125 125 #endif // _LP64
126 126
127 127
128 128
129 129 // Convert the raw encoding form into the form expected by the constructor for
130 130 // Address. An index of 4 (rsp) corresponds to having no index, so convert
131 131 // that to noreg for the Address constructor.
132 132 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
133 133 RelocationHolder rspec;
134 134 if (disp_is_oop) {
135 135 rspec = Relocation::spec_simple(relocInfo::oop_type);
136 136 }
137 137 bool valid_index = index != rsp->encoding();
138 138 if (valid_index) {
139 139 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
140 140 madr._rspec = rspec;
141 141 return madr;
142 142 } else {
143 143 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
144 144 madr._rspec = rspec;
145 145 return madr;
146 146 }
147 147 }
148 148
149 149 // Implementation of Assembler
150 150
151 151 int AbstractAssembler::code_fill_byte() {
152 152 return (u_char)'\xF4'; // hlt
153 153 }
154 154
155 155 // make this go away someday
156 156 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
157 157 if (rtype == relocInfo::none)
158 158 emit_long(data);
159 159 else emit_data(data, Relocation::spec_simple(rtype), format);
160 160 }
161 161
162 162 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
163 163 assert(imm_operand == 0, "default format must be immediate in this file");
164 164 assert(inst_mark() != NULL, "must be inside InstructionMark");
165 165 if (rspec.type() != relocInfo::none) {
166 166 #ifdef ASSERT
167 167 check_relocation(rspec, format);
168 168 #endif
169 169 // Do not use AbstractAssembler::relocate, which is not intended for
170 170 // embedded words. Instead, relocate to the enclosing instruction.
171 171
172 172 // hack. call32 is too wide for mask so use disp32
173 173 if (format == call32_operand)
174 174 code_section()->relocate(inst_mark(), rspec, disp32_operand);
175 175 else
176 176 code_section()->relocate(inst_mark(), rspec, format);
177 177 }
178 178 emit_long(data);
179 179 }
180 180
181 181 static int encode(Register r) {
182 182 int enc = r->encoding();
183 183 if (enc >= 8) {
184 184 enc -= 8;
185 185 }
186 186 return enc;
187 187 }
188 188
189 189 static int encode(XMMRegister r) {
190 190 int enc = r->encoding();
191 191 if (enc >= 8) {
192 192 enc -= 8;
193 193 }
194 194 return enc;
195 195 }
196 196
197 197 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
198 198 assert(dst->has_byte_register(), "must have byte register");
199 199 assert(isByte(op1) && isByte(op2), "wrong opcode");
200 200 assert(isByte(imm8), "not a byte");
201 201 assert((op1 & 0x01) == 0, "should be 8bit operation");
202 202 emit_byte(op1);
203 203 emit_byte(op2 | encode(dst));
204 204 emit_byte(imm8);
205 205 }
206 206
207 207
208 208 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
209 209 assert(isByte(op1) && isByte(op2), "wrong opcode");
210 210 assert((op1 & 0x01) == 1, "should be 32bit operation");
211 211 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
212 212 if (is8bit(imm32)) {
213 213 emit_byte(op1 | 0x02); // set sign bit
214 214 emit_byte(op2 | encode(dst));
215 215 emit_byte(imm32 & 0xFF);
216 216 } else {
217 217 emit_byte(op1);
218 218 emit_byte(op2 | encode(dst));
219 219 emit_long(imm32);
220 220 }
221 221 }
222 222
223 223 // immediate-to-memory forms
224 224 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
225 225 assert((op1 & 0x01) == 1, "should be 32bit operation");
226 226 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
227 227 if (is8bit(imm32)) {
228 228 emit_byte(op1 | 0x02); // set sign bit
229 229 emit_operand(rm, adr, 1);
230 230 emit_byte(imm32 & 0xFF);
231 231 } else {
232 232 emit_byte(op1);
233 233 emit_operand(rm, adr, 4);
234 234 emit_long(imm32);
235 235 }
236 236 }
237 237
238 238 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
239 239 LP64_ONLY(ShouldNotReachHere());
240 240 assert(isByte(op1) && isByte(op2), "wrong opcode");
241 241 assert((op1 & 0x01) == 1, "should be 32bit operation");
242 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
243 243 InstructionMark im(this);
244 244 emit_byte(op1);
245 245 emit_byte(op2 | encode(dst));
246 246 emit_data((intptr_t)obj, relocInfo::oop_type, 0);
247 247 }
248 248
249 249
250 250 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
251 251 assert(isByte(op1) && isByte(op2), "wrong opcode");
252 252 emit_byte(op1);
253 253 emit_byte(op2 | encode(dst) << 3 | encode(src));
254 254 }
255 255
256 256
257 257 void Assembler::emit_operand(Register reg, Register base, Register index,
258 258 Address::ScaleFactor scale, int disp,
259 259 RelocationHolder const& rspec,
260 260 int rip_relative_correction) {
261 261 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
262 262
263 263 // Encode the registers as needed in the fields they are used in
264 264
265 265 int regenc = encode(reg) << 3;
266 266 int indexenc = index->is_valid() ? encode(index) << 3 : 0;
267 267 int baseenc = base->is_valid() ? encode(base) : 0;
268 268
269 269 if (base->is_valid()) {
270 270 if (index->is_valid()) {
271 271 assert(scale != Address::no_scale, "inconsistent address");
272 272 // [base + index*scale + disp]
273 273 if (disp == 0 && rtype == relocInfo::none &&
274 274 base != rbp LP64_ONLY(&& base != r13)) {
275 275 // [base + index*scale]
276 276 // [00 reg 100][ss index base]
277 277 assert(index != rsp, "illegal addressing mode");
278 278 emit_byte(0x04 | regenc);
279 279 emit_byte(scale << 6 | indexenc | baseenc);
280 280 } else if (is8bit(disp) && rtype == relocInfo::none) {
281 281 // [base + index*scale + imm8]
282 282 // [01 reg 100][ss index base] imm8
283 283 assert(index != rsp, "illegal addressing mode");
284 284 emit_byte(0x44 | regenc);
285 285 emit_byte(scale << 6 | indexenc | baseenc);
286 286 emit_byte(disp & 0xFF);
287 287 } else {
288 288 // [base + index*scale + disp32]
289 289 // [10 reg 100][ss index base] disp32
290 290 assert(index != rsp, "illegal addressing mode");
291 291 emit_byte(0x84 | regenc);
292 292 emit_byte(scale << 6 | indexenc | baseenc);
293 293 emit_data(disp, rspec, disp32_operand);
294 294 }
295 295 } else if (base == rsp LP64_ONLY(|| base == r12)) {
296 296 // [rsp + disp]
297 297 if (disp == 0 && rtype == relocInfo::none) {
298 298 // [rsp]
299 299 // [00 reg 100][00 100 100]
300 300 emit_byte(0x04 | regenc);
301 301 emit_byte(0x24);
302 302 } else if (is8bit(disp) && rtype == relocInfo::none) {
303 303 // [rsp + imm8]
304 304 // [01 reg 100][00 100 100] disp8
305 305 emit_byte(0x44 | regenc);
306 306 emit_byte(0x24);
307 307 emit_byte(disp & 0xFF);
308 308 } else {
309 309 // [rsp + imm32]
310 310 // [10 reg 100][00 100 100] disp32
311 311 emit_byte(0x84 | regenc);
312 312 emit_byte(0x24);
313 313 emit_data(disp, rspec, disp32_operand);
314 314 }
315 315 } else {
316 316 // [base + disp]
317 317 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
318 318 if (disp == 0 && rtype == relocInfo::none &&
319 319 base != rbp LP64_ONLY(&& base != r13)) {
320 320 // [base]
321 321 // [00 reg base]
322 322 emit_byte(0x00 | regenc | baseenc);
323 323 } else if (is8bit(disp) && rtype == relocInfo::none) {
324 324 // [base + disp8]
325 325 // [01 reg base] disp8
326 326 emit_byte(0x40 | regenc | baseenc);
327 327 emit_byte(disp & 0xFF);
328 328 } else {
329 329 // [base + disp32]
330 330 // [10 reg base] disp32
331 331 emit_byte(0x80 | regenc | baseenc);
332 332 emit_data(disp, rspec, disp32_operand);
333 333 }
334 334 }
335 335 } else {
336 336 if (index->is_valid()) {
337 337 assert(scale != Address::no_scale, "inconsistent address");
338 338 // [index*scale + disp]
339 339 // [00 reg 100][ss index 101] disp32
340 340 assert(index != rsp, "illegal addressing mode");
341 341 emit_byte(0x04 | regenc);
342 342 emit_byte(scale << 6 | indexenc | 0x05);
343 343 emit_data(disp, rspec, disp32_operand);
344 344 } else if (rtype != relocInfo::none ) {
345 345 // [disp] (64bit) RIP-RELATIVE (32bit) abs
346 346 // [00 000 101] disp32
347 347
348 348 emit_byte(0x05 | regenc);
349 349 // Note that the RIP-rel. correction applies to the generated
350 350 // disp field, but _not_ to the target address in the rspec.
351 351
352 352 // disp was created by converting the target address minus the pc
353 353 // at the start of the instruction. That needs more correction here.
354 354 // intptr_t disp = target - next_ip;
355 355 assert(inst_mark() != NULL, "must be inside InstructionMark");
356 356 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
357 357 int64_t adjusted = disp;
358 358 // Do rip-rel adjustment for 64bit
359 359 LP64_ONLY(adjusted -= (next_ip - inst_mark()));
360 360 assert(is_simm32(adjusted),
361 361 "must be 32bit offset (RIP relative address)");
362 362 emit_data((int32_t) adjusted, rspec, disp32_operand);
363 363
364 364 } else {
365 365 // 32bit never did this, did everything as the rip-rel/disp code above
366 366 // [disp] ABSOLUTE
367 367 // [00 reg 100][00 100 101] disp32
368 368 emit_byte(0x04 | regenc);
369 369 emit_byte(0x25);
370 370 emit_data(disp, rspec, disp32_operand);
371 371 }
372 372 }
373 373 }
374 374
375 375 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
376 376 Address::ScaleFactor scale, int disp,
377 377 RelocationHolder const& rspec) {
378 378 emit_operand((Register)reg, base, index, scale, disp, rspec);
379 379 }
380 380
381 381 // Secret local extension to Assembler::WhichOperand:
382 382 #define end_pc_operand (_WhichOperand_limit)
383 383
384 384 address Assembler::locate_operand(address inst, WhichOperand which) {
385 385 // Decode the given instruction, and return the address of
386 386 // an embedded 32-bit operand word.
387 387
388 388 // If "which" is disp32_operand, selects the displacement portion
389 389 // of an effective address specifier.
390 390 // If "which" is imm64_operand, selects the trailing immediate constant.
391 391 // If "which" is call32_operand, selects the displacement of a call or jump.
392 392 // Caller is responsible for ensuring that there is such an operand,
393 393 // and that it is 32/64 bits wide.
394 394
395 395 // If "which" is end_pc_operand, find the end of the instruction.
396 396
397 397 address ip = inst;
398 398 bool is_64bit = false;
399 399
400 400 debug_only(bool has_disp32 = false);
401 401 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
402 402
403 403 again_after_prefix:
404 404 switch (0xFF & *ip++) {
405 405
406 406 // These convenience macros generate groups of "case" labels for the switch.
407 407 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
408 408 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
409 409 case (x)+4: case (x)+5: case (x)+6: case (x)+7
410 410 #define REP16(x) REP8((x)+0): \
411 411 case REP8((x)+8)
412 412
413 413 case CS_segment:
414 414 case SS_segment:
415 415 case DS_segment:
416 416 case ES_segment:
417 417 case FS_segment:
418 418 case GS_segment:
419 419 // Seems dubious
420 420 LP64_ONLY(assert(false, "shouldn't have that prefix"));
421 421 assert(ip == inst+1, "only one prefix allowed");
422 422 goto again_after_prefix;
423 423
424 424 case 0x67:
425 425 case REX:
426 426 case REX_B:
427 427 case REX_X:
428 428 case REX_XB:
429 429 case REX_R:
430 430 case REX_RB:
431 431 case REX_RX:
432 432 case REX_RXB:
433 433 NOT_LP64(assert(false, "64bit prefixes"));
434 434 goto again_after_prefix;
435 435
436 436 case REX_W:
437 437 case REX_WB:
438 438 case REX_WX:
439 439 case REX_WXB:
440 440 case REX_WR:
441 441 case REX_WRB:
442 442 case REX_WRX:
443 443 case REX_WRXB:
444 444 NOT_LP64(assert(false, "64bit prefixes"));
445 445 is_64bit = true;
446 446 goto again_after_prefix;
447 447
448 448 case 0xFF: // pushq a; decl a; incl a; call a; jmp a
449 449 case 0x88: // movb a, r
450 450 case 0x89: // movl a, r
451 451 case 0x8A: // movb r, a
452 452 case 0x8B: // movl r, a
453 453 case 0x8F: // popl a
454 454 debug_only(has_disp32 = true);
455 455 break;
456 456
457 457 case 0x68: // pushq #32
458 458 if (which == end_pc_operand) {
459 459 return ip + 4;
460 460 }
461 461 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
462 462 return ip; // not produced by emit_operand
463 463
464 464 case 0x66: // movw ... (size prefix)
465 465 again_after_size_prefix2:
466 466 switch (0xFF & *ip++) {
467 467 case REX:
468 468 case REX_B:
469 469 case REX_X:
470 470 case REX_XB:
471 471 case REX_R:
472 472 case REX_RB:
473 473 case REX_RX:
474 474 case REX_RXB:
475 475 case REX_W:
476 476 case REX_WB:
477 477 case REX_WX:
478 478 case REX_WXB:
479 479 case REX_WR:
480 480 case REX_WRB:
481 481 case REX_WRX:
482 482 case REX_WRXB:
483 483 NOT_LP64(assert(false, "64bit prefix found"));
484 484 goto again_after_size_prefix2;
485 485 case 0x8B: // movw r, a
486 486 case 0x89: // movw a, r
487 487 debug_only(has_disp32 = true);
488 488 break;
489 489 case 0xC7: // movw a, #16
490 490 debug_only(has_disp32 = true);
491 491 tail_size = 2; // the imm16
492 492 break;
493 493 case 0x0F: // several SSE/SSE2 variants
494 494 ip--; // reparse the 0x0F
495 495 goto again_after_prefix;
496 496 default:
497 497 ShouldNotReachHere();
498 498 }
499 499 break;
500 500
501 501 case REP8(0xB8): // movl/q r, #32/#64(oop?)
502 502 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4);
503 503 // these asserts are somewhat nonsensical
504 504 #ifndef _LP64
505 505 assert(which == imm_operand || which == disp32_operand, "");
506 506 #else
507 507 assert((which == call32_operand || which == imm_operand) && is_64bit ||
508 508 which == narrow_oop_operand && !is_64bit, "");
509 509 #endif // _LP64
510 510 return ip;
511 511
512 512 case 0x69: // imul r, a, #32
513 513 case 0xC7: // movl a, #32(oop?)
514 514 tail_size = 4;
515 515 debug_only(has_disp32 = true); // has both kinds of operands!
516 516 break;
517 517
518 518 case 0x0F: // movx..., etc.
519 519 switch (0xFF & *ip++) {
520 520 case 0x12: // movlps
521 521 case 0x28: // movaps
522 522 case 0x2E: // ucomiss
523 523 case 0x2F: // comiss
524 524 case 0x54: // andps
525 525 case 0x55: // andnps
526 526 case 0x56: // orps
527 527 case 0x57: // xorps
528 528 case 0x6E: // movd
529 529 case 0x7E: // movd
530 530 case 0xAE: // ldmxcsr a
531 531 // 64bit side says it these have both operands but that doesn't
532 532 // appear to be true
533 533 debug_only(has_disp32 = true);
534 534 break;
535 535
536 536 case 0xAD: // shrd r, a, %cl
537 537 case 0xAF: // imul r, a
538 538 case 0xBE: // movsbl r, a (movsxb)
539 539 case 0xBF: // movswl r, a (movsxw)
540 540 case 0xB6: // movzbl r, a (movzxb)
541 541 case 0xB7: // movzwl r, a (movzxw)
542 542 case REP16(0x40): // cmovl cc, r, a
543 543 case 0xB0: // cmpxchgb
544 544 case 0xB1: // cmpxchg
545 545 case 0xC1: // xaddl
546 546 case 0xC7: // cmpxchg8
547 547 case REP16(0x90): // setcc a
548 548 debug_only(has_disp32 = true);
549 549 // fall out of the switch to decode the address
550 550 break;
551 551
552 552 case 0xAC: // shrd r, a, #8
553 553 debug_only(has_disp32 = true);
554 554 tail_size = 1; // the imm8
555 555 break;
556 556
557 557 case REP16(0x80): // jcc rdisp32
558 558 if (which == end_pc_operand) return ip + 4;
559 559 assert(which == call32_operand, "jcc has no disp32 or imm");
560 560 return ip;
561 561 default:
562 562 ShouldNotReachHere();
563 563 }
564 564 break;
565 565
566 566 case 0x81: // addl a, #32; addl r, #32
567 567 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
568 568 // on 32bit in the case of cmpl, the imm might be an oop
569 569 tail_size = 4;
570 570 debug_only(has_disp32 = true); // has both kinds of operands!
571 571 break;
572 572
573 573 case 0x83: // addl a, #8; addl r, #8
574 574 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
575 575 debug_only(has_disp32 = true); // has both kinds of operands!
576 576 tail_size = 1;
577 577 break;
578 578
579 579 case 0x9B:
580 580 switch (0xFF & *ip++) {
581 581 case 0xD9: // fnstcw a
582 582 debug_only(has_disp32 = true);
583 583 break;
584 584 default:
585 585 ShouldNotReachHere();
586 586 }
587 587 break;
588 588
589 589 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
590 590 case REP4(0x10): // adc...
591 591 case REP4(0x20): // and...
592 592 case REP4(0x30): // xor...
593 593 case REP4(0x08): // or...
594 594 case REP4(0x18): // sbb...
595 595 case REP4(0x28): // sub...
596 596 case 0xF7: // mull a
597 597 case 0x8D: // lea r, a
598 598 case 0x87: // xchg r, a
599 599 case REP4(0x38): // cmp...
600 600 case 0x85: // test r, a
601 601 debug_only(has_disp32 = true); // has both kinds of operands!
602 602 break;
603 603
604 604 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
605 605 case 0xC6: // movb a, #8
606 606 case 0x80: // cmpb a, #8
607 607 case 0x6B: // imul r, a, #8
608 608 debug_only(has_disp32 = true); // has both kinds of operands!
609 609 tail_size = 1; // the imm8
610 610 break;
611 611
612 612 case 0xE8: // call rdisp32
613 613 case 0xE9: // jmp rdisp32
614 614 if (which == end_pc_operand) return ip + 4;
615 615 assert(which == call32_operand, "call has no disp32 or imm");
616 616 return ip;
617 617
618 618 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
619 619 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
620 620 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
621 621 case 0xDD: // fld_d a; fst_d a; fstp_d a
622 622 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
623 623 case 0xDF: // fild_d a; fistp_d a
624 624 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
625 625 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
626 626 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
627 627 debug_only(has_disp32 = true);
628 628 break;
629 629
630 630 case 0xF0: // Lock
631 631 assert(os::is_MP(), "only on MP");
632 632 goto again_after_prefix;
633 633
634 634 case 0xF3: // For SSE
635 635 case 0xF2: // For SSE2
636 636 switch (0xFF & *ip++) {
637 637 case REX:
638 638 case REX_B:
639 639 case REX_X:
640 640 case REX_XB:
641 641 case REX_R:
642 642 case REX_RB:
643 643 case REX_RX:
644 644 case REX_RXB:
645 645 case REX_W:
646 646 case REX_WB:
647 647 case REX_WX:
648 648 case REX_WXB:
649 649 case REX_WR:
650 650 case REX_WRB:
651 651 case REX_WRX:
652 652 case REX_WRXB:
653 653 NOT_LP64(assert(false, "found 64bit prefix"));
654 654 ip++;
655 655 default:
656 656 ip++;
657 657 }
658 658 debug_only(has_disp32 = true); // has both kinds of operands!
659 659 break;
660 660
661 661 default:
662 662 ShouldNotReachHere();
663 663
664 664 #undef REP8
665 665 #undef REP16
666 666 }
667 667
668 668 assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
669 669 #ifdef _LP64
670 670 assert(which != imm_operand, "instruction is not a movq reg, imm64");
671 671 #else
672 672 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
673 673 assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
674 674 #endif // LP64
675 675 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
676 676
677 677 // parse the output of emit_operand
678 678 int op2 = 0xFF & *ip++;
679 679 int base = op2 & 0x07;
680 680 int op3 = -1;
681 681 const int b100 = 4;
682 682 const int b101 = 5;
683 683 if (base == b100 && (op2 >> 6) != 3) {
684 684 op3 = 0xFF & *ip++;
685 685 base = op3 & 0x07; // refetch the base
686 686 }
687 687 // now ip points at the disp (if any)
688 688
689 689 switch (op2 >> 6) {
690 690 case 0:
691 691 // [00 reg 100][ss index base]
692 692 // [00 reg 100][00 100 esp]
693 693 // [00 reg base]
694 694 // [00 reg 100][ss index 101][disp32]
695 695 // [00 reg 101] [disp32]
696 696
697 697 if (base == b101) {
698 698 if (which == disp32_operand)
699 699 return ip; // caller wants the disp32
700 700 ip += 4; // skip the disp32
701 701 }
702 702 break;
703 703
704 704 case 1:
705 705 // [01 reg 100][ss index base][disp8]
706 706 // [01 reg 100][00 100 esp][disp8]
707 707 // [01 reg base] [disp8]
708 708 ip += 1; // skip the disp8
709 709 break;
710 710
711 711 case 2:
712 712 // [10 reg 100][ss index base][disp32]
713 713 // [10 reg 100][00 100 esp][disp32]
714 714 // [10 reg base] [disp32]
715 715 if (which == disp32_operand)
716 716 return ip; // caller wants the disp32
717 717 ip += 4; // skip the disp32
718 718 break;
719 719
720 720 case 3:
721 721 // [11 reg base] (not a memory addressing mode)
722 722 break;
723 723 }
724 724
725 725 if (which == end_pc_operand) {
726 726 return ip + tail_size;
727 727 }
728 728
729 729 #ifdef _LP64
730 730 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
731 731 #else
732 732 assert(which == imm_operand, "instruction has only an imm field");
733 733 #endif // LP64
734 734 return ip;
735 735 }
736 736
737 737 address Assembler::locate_next_instruction(address inst) {
738 738 // Secretly share code with locate_operand:
739 739 return locate_operand(inst, end_pc_operand);
740 740 }
741 741
742 742
743 743 #ifdef ASSERT
744 744 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
745 745 address inst = inst_mark();
746 746 assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
747 747 address opnd;
748 748
749 749 Relocation* r = rspec.reloc();
750 750 if (r->type() == relocInfo::none) {
751 751 return;
752 752 } else if (r->is_call() || format == call32_operand) {
753 753 // assert(format == imm32_operand, "cannot specify a nonzero format");
754 754 opnd = locate_operand(inst, call32_operand);
755 755 } else if (r->is_data()) {
756 756 assert(format == imm_operand || format == disp32_operand
757 757 LP64_ONLY(|| format == narrow_oop_operand), "format ok");
758 758 opnd = locate_operand(inst, (WhichOperand)format);
759 759 } else {
760 760 assert(format == imm_operand, "cannot specify a format");
761 761 return;
762 762 }
763 763 assert(opnd == pc(), "must put operand where relocs can find it");
764 764 }
765 765 #endif // ASSERT
766 766
767 767 void Assembler::emit_operand32(Register reg, Address adr) {
768 768 assert(reg->encoding() < 8, "no extended registers");
769 769 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
770 770 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
771 771 adr._rspec);
772 772 }
773 773
774 774 void Assembler::emit_operand(Register reg, Address adr,
775 775 int rip_relative_correction) {
776 776 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
777 777 adr._rspec,
778 778 rip_relative_correction);
779 779 }
780 780
781 781 void Assembler::emit_operand(XMMRegister reg, Address adr) {
782 782 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
783 783 adr._rspec);
784 784 }
785 785
786 786 // MMX operations
787 787 void Assembler::emit_operand(MMXRegister reg, Address adr) {
788 788 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
789 789 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
790 790 }
791 791
792 792 // work around gcc (3.2.1-7a) bug
793 793 void Assembler::emit_operand(Address adr, MMXRegister reg) {
794 794 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
795 795 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
796 796 }
797 797
798 798
799 799 void Assembler::emit_farith(int b1, int b2, int i) {
800 800 assert(isByte(b1) && isByte(b2), "wrong opcode");
801 801 assert(0 <= i && i < 8, "illegal stack offset");
802 802 emit_byte(b1);
803 803 emit_byte(b2 + i);
804 804 }
805 805
806 806
807 807 // Now the Assembler instruction (identical for 32/64 bits)
808 808
809 809 void Assembler::adcl(Register dst, int32_t imm32) {
810 810 prefix(dst);
811 811 emit_arith(0x81, 0xD0, dst, imm32);
812 812 }
813 813
814 814 void Assembler::adcl(Register dst, Address src) {
815 815 InstructionMark im(this);
816 816 prefix(src, dst);
817 817 emit_byte(0x13);
818 818 emit_operand(dst, src);
819 819 }
820 820
821 821 void Assembler::adcl(Register dst, Register src) {
822 822 (void) prefix_and_encode(dst->encoding(), src->encoding());
823 823 emit_arith(0x13, 0xC0, dst, src);
824 824 }
825 825
826 826 void Assembler::addl(Address dst, int32_t imm32) {
827 827 InstructionMark im(this);
828 828 prefix(dst);
829 829 emit_arith_operand(0x81, rax, dst, imm32);
830 830 }
831 831
832 832 void Assembler::addl(Address dst, Register src) {
833 833 InstructionMark im(this);
834 834 prefix(dst, src);
835 835 emit_byte(0x01);
836 836 emit_operand(src, dst);
837 837 }
838 838
839 839 void Assembler::addl(Register dst, int32_t imm32) {
840 840 prefix(dst);
841 841 emit_arith(0x81, 0xC0, dst, imm32);
842 842 }
843 843
844 844 void Assembler::addl(Register dst, Address src) {
845 845 InstructionMark im(this);
846 846 prefix(src, dst);
847 847 emit_byte(0x03);
848 848 emit_operand(dst, src);
849 849 }
850 850
851 851 void Assembler::addl(Register dst, Register src) {
852 852 (void) prefix_and_encode(dst->encoding(), src->encoding());
853 853 emit_arith(0x03, 0xC0, dst, src);
854 854 }
855 855
856 856 void Assembler::addr_nop_4() {
857 857 // 4 bytes: NOP DWORD PTR [EAX+0]
858 858 emit_byte(0x0F);
859 859 emit_byte(0x1F);
860 860 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
861 861 emit_byte(0); // 8-bits offset (1 byte)
862 862 }
863 863
864 864 void Assembler::addr_nop_5() {
865 865 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
866 866 emit_byte(0x0F);
867 867 emit_byte(0x1F);
868 868 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
869 869 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
870 870 emit_byte(0); // 8-bits offset (1 byte)
871 871 }
872 872
873 873 void Assembler::addr_nop_7() {
874 874 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
875 875 emit_byte(0x0F);
876 876 emit_byte(0x1F);
877 877 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
878 878 emit_long(0); // 32-bits offset (4 bytes)
879 879 }
880 880
881 881 void Assembler::addr_nop_8() {
882 882 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
883 883 emit_byte(0x0F);
884 884 emit_byte(0x1F);
885 885 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
886 886 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
887 887 emit_long(0); // 32-bits offset (4 bytes)
888 888 }
889 889
890 890 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
891 891 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
892 892 emit_byte(0xF2);
893 893 int encode = prefix_and_encode(dst->encoding(), src->encoding());
894 894 emit_byte(0x0F);
895 895 emit_byte(0x58);
896 896 emit_byte(0xC0 | encode);
897 897 }
898 898
899 899 void Assembler::addsd(XMMRegister dst, Address src) {
900 900 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
901 901 InstructionMark im(this);
902 902 emit_byte(0xF2);
903 903 prefix(src, dst);
904 904 emit_byte(0x0F);
905 905 emit_byte(0x58);
906 906 emit_operand(dst, src);
907 907 }
908 908
909 909 void Assembler::addss(XMMRegister dst, XMMRegister src) {
910 910 NOT_LP64(assert(VM_Version::supports_sse(), ""));
911 911 emit_byte(0xF3);
912 912 int encode = prefix_and_encode(dst->encoding(), src->encoding());
913 913 emit_byte(0x0F);
914 914 emit_byte(0x58);
915 915 emit_byte(0xC0 | encode);
916 916 }
917 917
918 918 void Assembler::addss(XMMRegister dst, Address src) {
919 919 NOT_LP64(assert(VM_Version::supports_sse(), ""));
920 920 InstructionMark im(this);
921 921 emit_byte(0xF3);
922 922 prefix(src, dst);
923 923 emit_byte(0x0F);
924 924 emit_byte(0x58);
925 925 emit_operand(dst, src);
926 926 }
927 927
928 928 void Assembler::andl(Register dst, int32_t imm32) {
929 929 prefix(dst);
930 930 emit_arith(0x81, 0xE0, dst, imm32);
931 931 }
932 932
933 933 void Assembler::andl(Register dst, Address src) {
934 934 InstructionMark im(this);
935 935 prefix(src, dst);
936 936 emit_byte(0x23);
937 937 emit_operand(dst, src);
938 938 }
939 939
940 940 void Assembler::andl(Register dst, Register src) {
941 941 (void) prefix_and_encode(dst->encoding(), src->encoding());
942 942 emit_arith(0x23, 0xC0, dst, src);
943 943 }
944 944
945 945 void Assembler::andpd(XMMRegister dst, Address src) {
946 946 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
947 947 InstructionMark im(this);
948 948 emit_byte(0x66);
949 949 prefix(src, dst);
950 950 emit_byte(0x0F);
951 951 emit_byte(0x54);
952 952 emit_operand(dst, src);
953 953 }
954 954
955 955 void Assembler::bsfl(Register dst, Register src) {
956 956 int encode = prefix_and_encode(dst->encoding(), src->encoding());
957 957 emit_byte(0x0F);
958 958 emit_byte(0xBC);
959 959 emit_byte(0xC0 | encode);
960 960 }
961 961
962 962 void Assembler::bsrl(Register dst, Register src) {
963 963 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
964 964 int encode = prefix_and_encode(dst->encoding(), src->encoding());
965 965 emit_byte(0x0F);
966 966 emit_byte(0xBD);
967 967 emit_byte(0xC0 | encode);
968 968 }
969 969
970 970 void Assembler::bswapl(Register reg) { // bswap
971 971 int encode = prefix_and_encode(reg->encoding());
972 972 emit_byte(0x0F);
973 973 emit_byte(0xC8 | encode);
974 974 }
975 975
976 976 void Assembler::call(Label& L, relocInfo::relocType rtype) {
977 977 // suspect disp32 is always good
978 978 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
979 979
980 980 if (L.is_bound()) {
981 981 const int long_size = 5;
982 982 int offs = (int)( target(L) - pc() );
983 983 assert(offs <= 0, "assembler error");
984 984 InstructionMark im(this);
985 985 // 1110 1000 #32-bit disp
986 986 emit_byte(0xE8);
987 987 emit_data(offs - long_size, rtype, operand);
988 988 } else {
989 989 InstructionMark im(this);
990 990 // 1110 1000 #32-bit disp
991 991 L.add_patch_at(code(), locator());
992 992
993 993 emit_byte(0xE8);
994 994 emit_data(int(0), rtype, operand);
995 995 }
996 996 }
997 997
998 998 void Assembler::call(Register dst) {
999 999 // This was originally using a 32bit register encoding
1000 1000 // and surely we want 64bit!
1001 1001 // this is a 32bit encoding but in 64bit mode the default
1002 1002 // operand size is 64bit so there is no need for the
1003 1003 // wide prefix. So prefix only happens if we use the
1004 1004 // new registers. Much like push/pop.
1005 1005 int x = offset();
1006 1006 // this may be true but dbx disassembles it as if it
1007 1007 // were 32bits...
1008 1008 // int encode = prefix_and_encode(dst->encoding());
1009 1009 // if (offset() != x) assert(dst->encoding() >= 8, "what?");
1010 1010 int encode = prefixq_and_encode(dst->encoding());
1011 1011
1012 1012 emit_byte(0xFF);
1013 1013 emit_byte(0xD0 | encode);
1014 1014 }
1015 1015
1016 1016
1017 1017 void Assembler::call(Address adr) {
1018 1018 InstructionMark im(this);
1019 1019 prefix(adr);
1020 1020 emit_byte(0xFF);
1021 1021 emit_operand(rdx, adr);
1022 1022 }
1023 1023
1024 1024 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1025 1025 assert(entry != NULL, "call most probably wrong");
1026 1026 InstructionMark im(this);
1027 1027 emit_byte(0xE8);
1028 1028 intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1029 1029 assert(is_simm32(disp), "must be 32bit offset (call2)");
1030 1030 // Technically, should use call32_operand, but this format is
1031 1031 // implied by the fact that we're emitting a call instruction.
1032 1032
1033 1033 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1034 1034 emit_data((int) disp, rspec, operand);
1035 1035 }
1036 1036
1037 1037 void Assembler::cdql() {
1038 1038 emit_byte(0x99);
1039 1039 }
1040 1040
1041 1041 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1042 1042 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1043 1043 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1044 1044 emit_byte(0x0F);
1045 1045 emit_byte(0x40 | cc);
1046 1046 emit_byte(0xC0 | encode);
1047 1047 }
1048 1048
1049 1049
1050 1050 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1051 1051 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1052 1052 prefix(src, dst);
1053 1053 emit_byte(0x0F);
1054 1054 emit_byte(0x40 | cc);
1055 1055 emit_operand(dst, src);
1056 1056 }
1057 1057
1058 1058 void Assembler::cmpb(Address dst, int imm8) {
1059 1059 InstructionMark im(this);
1060 1060 prefix(dst);
1061 1061 emit_byte(0x80);
1062 1062 emit_operand(rdi, dst, 1);
1063 1063 emit_byte(imm8);
1064 1064 }
1065 1065
1066 1066 void Assembler::cmpl(Address dst, int32_t imm32) {
1067 1067 InstructionMark im(this);
1068 1068 prefix(dst);
1069 1069 emit_byte(0x81);
1070 1070 emit_operand(rdi, dst, 4);
1071 1071 emit_long(imm32);
1072 1072 }
1073 1073
1074 1074 void Assembler::cmpl(Register dst, int32_t imm32) {
1075 1075 prefix(dst);
1076 1076 emit_arith(0x81, 0xF8, dst, imm32);
1077 1077 }
1078 1078
1079 1079 void Assembler::cmpl(Register dst, Register src) {
1080 1080 (void) prefix_and_encode(dst->encoding(), src->encoding());
1081 1081 emit_arith(0x3B, 0xC0, dst, src);
1082 1082 }
1083 1083
1084 1084
1085 1085 void Assembler::cmpl(Register dst, Address src) {
1086 1086 InstructionMark im(this);
1087 1087 prefix(src, dst);
1088 1088 emit_byte(0x3B);
1089 1089 emit_operand(dst, src);
1090 1090 }
1091 1091
1092 1092 void Assembler::cmpw(Address dst, int imm16) {
1093 1093 InstructionMark im(this);
1094 1094 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1095 1095 emit_byte(0x66);
1096 1096 emit_byte(0x81);
1097 1097 emit_operand(rdi, dst, 2);
1098 1098 emit_word(imm16);
1099 1099 }
1100 1100
1101 1101 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1102 1102 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1103 1103 // The ZF is set if the compared values were equal, and cleared otherwise.
1104 1104 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1105 1105 if (Atomics & 2) {
1106 1106 // caveat: no instructionmark, so this isn't relocatable.
1107 1107 // Emit a synthetic, non-atomic, CAS equivalent.
1108 1108 // Beware. The synthetic form sets all ICCs, not just ZF.
1109 1109 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1110 1110 cmpl(rax, adr);
1111 1111 movl(rax, adr);
1112 1112 if (reg != rax) {
1113 1113 Label L ;
1114 1114 jcc(Assembler::notEqual, L);
1115 1115 movl(adr, reg);
1116 1116 bind(L);
1117 1117 }
1118 1118 } else {
1119 1119 InstructionMark im(this);
1120 1120 prefix(adr, reg);
1121 1121 emit_byte(0x0F);
1122 1122 emit_byte(0xB1);
1123 1123 emit_operand(reg, adr);
1124 1124 }
1125 1125 }
1126 1126
1127 1127 void Assembler::comisd(XMMRegister dst, Address src) {
1128 1128 // NOTE: dbx seems to decode this as comiss even though the
1129 1129 // 0x66 is there. Strangly ucomisd comes out correct
1130 1130 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1131 1131 emit_byte(0x66);
1132 1132 comiss(dst, src);
1133 1133 }
1134 1134
1135 1135 void Assembler::comiss(XMMRegister dst, Address src) {
1136 1136 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1137 1137
1138 1138 InstructionMark im(this);
1139 1139 prefix(src, dst);
1140 1140 emit_byte(0x0F);
1141 1141 emit_byte(0x2F);
1142 1142 emit_operand(dst, src);
1143 1143 }
1144 1144
1145 1145 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1146 1146 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1147 1147 emit_byte(0xF3);
1148 1148 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1149 1149 emit_byte(0x0F);
1150 1150 emit_byte(0xE6);
1151 1151 emit_byte(0xC0 | encode);
1152 1152 }
1153 1153
1154 1154 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1155 1155 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1156 1156 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1157 1157 emit_byte(0x0F);
1158 1158 emit_byte(0x5B);
1159 1159 emit_byte(0xC0 | encode);
1160 1160 }
1161 1161
1162 1162 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1163 1163 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1164 1164 emit_byte(0xF2);
1165 1165 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1166 1166 emit_byte(0x0F);
1167 1167 emit_byte(0x5A);
1168 1168 emit_byte(0xC0 | encode);
1169 1169 }
1170 1170
1171 1171 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1172 1172 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1173 1173 emit_byte(0xF2);
1174 1174 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1175 1175 emit_byte(0x0F);
1176 1176 emit_byte(0x2A);
1177 1177 emit_byte(0xC0 | encode);
1178 1178 }
1179 1179
1180 1180 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1181 1181 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1182 1182 emit_byte(0xF3);
1183 1183 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1184 1184 emit_byte(0x0F);
1185 1185 emit_byte(0x2A);
1186 1186 emit_byte(0xC0 | encode);
1187 1187 }
1188 1188
1189 1189 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1190 1190 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1191 1191 emit_byte(0xF3);
1192 1192 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1193 1193 emit_byte(0x0F);
1194 1194 emit_byte(0x5A);
1195 1195 emit_byte(0xC0 | encode);
1196 1196 }
1197 1197
1198 1198 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1199 1199 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1200 1200 emit_byte(0xF2);
1201 1201 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1202 1202 emit_byte(0x0F);
1203 1203 emit_byte(0x2C);
1204 1204 emit_byte(0xC0 | encode);
1205 1205 }
1206 1206
1207 1207 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1208 1208 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1209 1209 emit_byte(0xF3);
1210 1210 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1211 1211 emit_byte(0x0F);
1212 1212 emit_byte(0x2C);
1213 1213 emit_byte(0xC0 | encode);
1214 1214 }
1215 1215
1216 1216 void Assembler::decl(Address dst) {
1217 1217 // Don't use it directly. Use MacroAssembler::decrement() instead.
1218 1218 InstructionMark im(this);
1219 1219 prefix(dst);
1220 1220 emit_byte(0xFF);
1221 1221 emit_operand(rcx, dst);
1222 1222 }
1223 1223
1224 1224 void Assembler::divsd(XMMRegister dst, Address src) {
1225 1225 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1226 1226 InstructionMark im(this);
1227 1227 emit_byte(0xF2);
1228 1228 prefix(src, dst);
1229 1229 emit_byte(0x0F);
1230 1230 emit_byte(0x5E);
1231 1231 emit_operand(dst, src);
1232 1232 }
1233 1233
1234 1234 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1235 1235 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1236 1236 emit_byte(0xF2);
1237 1237 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1238 1238 emit_byte(0x0F);
1239 1239 emit_byte(0x5E);
1240 1240 emit_byte(0xC0 | encode);
1241 1241 }
1242 1242
1243 1243 void Assembler::divss(XMMRegister dst, Address src) {
1244 1244 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1245 1245 InstructionMark im(this);
1246 1246 emit_byte(0xF3);
1247 1247 prefix(src, dst);
1248 1248 emit_byte(0x0F);
1249 1249 emit_byte(0x5E);
1250 1250 emit_operand(dst, src);
1251 1251 }
1252 1252
1253 1253 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1254 1254 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1255 1255 emit_byte(0xF3);
1256 1256 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1257 1257 emit_byte(0x0F);
1258 1258 emit_byte(0x5E);
1259 1259 emit_byte(0xC0 | encode);
1260 1260 }
1261 1261
1262 1262 void Assembler::emms() {
1263 1263 NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1264 1264 emit_byte(0x0F);
1265 1265 emit_byte(0x77);
1266 1266 }
1267 1267
1268 1268 void Assembler::hlt() {
1269 1269 emit_byte(0xF4);
1270 1270 }
1271 1271
1272 1272 void Assembler::idivl(Register src) {
1273 1273 int encode = prefix_and_encode(src->encoding());
1274 1274 emit_byte(0xF7);
1275 1275 emit_byte(0xF8 | encode);
1276 1276 }
1277 1277
1278 1278 void Assembler::divl(Register src) { // Unsigned
1279 1279 int encode = prefix_and_encode(src->encoding());
1280 1280 emit_byte(0xF7);
1281 1281 emit_byte(0xF0 | encode);
1282 1282 }
1283 1283
1284 1284 void Assembler::imull(Register dst, Register src) {
1285 1285 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1286 1286 emit_byte(0x0F);
1287 1287 emit_byte(0xAF);
1288 1288 emit_byte(0xC0 | encode);
1289 1289 }
1290 1290
1291 1291
1292 1292 void Assembler::imull(Register dst, Register src, int value) {
1293 1293 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1294 1294 if (is8bit(value)) {
1295 1295 emit_byte(0x6B);
1296 1296 emit_byte(0xC0 | encode);
1297 1297 emit_byte(value & 0xFF);
1298 1298 } else {
1299 1299 emit_byte(0x69);
1300 1300 emit_byte(0xC0 | encode);
1301 1301 emit_long(value);
1302 1302 }
1303 1303 }
1304 1304
1305 1305 void Assembler::incl(Address dst) {
1306 1306 // Don't use it directly. Use MacroAssembler::increment() instead.
1307 1307 InstructionMark im(this);
1308 1308 prefix(dst);
1309 1309 emit_byte(0xFF);
1310 1310 emit_operand(rax, dst);
1311 1311 }
1312 1312
1313 1313 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
1314 1314 InstructionMark im(this);
1315 1315 relocate(rtype);
1316 1316 assert((0 <= cc) && (cc < 16), "illegal cc");
1317 1317 if (L.is_bound()) {
1318 1318 address dst = target(L);
1319 1319 assert(dst != NULL, "jcc most probably wrong");
1320 1320
1321 1321 const int short_size = 2;
1322 1322 const int long_size = 6;
1323 1323 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1324 1324 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1325 1325 // 0111 tttn #8-bit disp
1326 1326 emit_byte(0x70 | cc);
1327 1327 emit_byte((offs - short_size) & 0xFF);
1328 1328 } else {
1329 1329 // 0000 1111 1000 tttn #32-bit disp
1330 1330 assert(is_simm32(offs - long_size),
1331 1331 "must be 32bit offset (call4)");
1332 1332 emit_byte(0x0F);
1333 1333 emit_byte(0x80 | cc);
1334 1334 emit_long(offs - long_size);
1335 1335 }
1336 1336 } else {
1337 1337 // Note: could eliminate cond. jumps to this jump if condition
1338 1338 // is the same however, seems to be rather unlikely case.
1339 1339 // Note: use jccb() if label to be bound is very close to get
1340 1340 // an 8-bit displacement
1341 1341 L.add_patch_at(code(), locator());
1342 1342 emit_byte(0x0F);
1343 1343 emit_byte(0x80 | cc);
1344 1344 emit_long(0);
1345 1345 }
1346 1346 }
1347 1347
1348 1348 void Assembler::jccb(Condition cc, Label& L) {
1349 1349 if (L.is_bound()) {
1350 1350 const int short_size = 2;
1351 1351 address entry = target(L);
1352 1352 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1353 1353 "Dispacement too large for a short jmp");
1354 1354 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1355 1355 // 0111 tttn #8-bit disp
1356 1356 emit_byte(0x70 | cc);
1357 1357 emit_byte((offs - short_size) & 0xFF);
1358 1358 } else {
1359 1359 InstructionMark im(this);
1360 1360 L.add_patch_at(code(), locator());
1361 1361 emit_byte(0x70 | cc);
1362 1362 emit_byte(0);
1363 1363 }
1364 1364 }
1365 1365
1366 1366 void Assembler::jmp(Address adr) {
1367 1367 InstructionMark im(this);
1368 1368 prefix(adr);
1369 1369 emit_byte(0xFF);
1370 1370 emit_operand(rsp, adr);
1371 1371 }
1372 1372
1373 1373 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
1374 1374 if (L.is_bound()) {
1375 1375 address entry = target(L);
1376 1376 assert(entry != NULL, "jmp most probably wrong");
1377 1377 InstructionMark im(this);
1378 1378 const int short_size = 2;
1379 1379 const int long_size = 5;
1380 1380 intptr_t offs = entry - _code_pos;
1381 1381 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1382 1382 emit_byte(0xEB);
1383 1383 emit_byte((offs - short_size) & 0xFF);
1384 1384 } else {
1385 1385 emit_byte(0xE9);
1386 1386 emit_long(offs - long_size);
1387 1387 }
1388 1388 } else {
1389 1389 // By default, forward jumps are always 32-bit displacements, since
1390 1390 // we can't yet know where the label will be bound. If you're sure that
1391 1391 // the forward jump will not run beyond 256 bytes, use jmpb to
1392 1392 // force an 8-bit displacement.
1393 1393 InstructionMark im(this);
1394 1394 relocate(rtype);
1395 1395 L.add_patch_at(code(), locator());
1396 1396 emit_byte(0xE9);
1397 1397 emit_long(0);
1398 1398 }
1399 1399 }
1400 1400
1401 1401 void Assembler::jmp(Register entry) {
1402 1402 int encode = prefix_and_encode(entry->encoding());
1403 1403 emit_byte(0xFF);
1404 1404 emit_byte(0xE0 | encode);
1405 1405 }
1406 1406
1407 1407 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1408 1408 InstructionMark im(this);
1409 1409 emit_byte(0xE9);
1410 1410 assert(dest != NULL, "must have a target");
1411 1411 intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1412 1412 assert(is_simm32(disp), "must be 32bit offset (jmp)");
1413 1413 emit_data(disp, rspec.reloc(), call32_operand);
1414 1414 }
1415 1415
1416 1416 void Assembler::jmpb(Label& L) {
1417 1417 if (L.is_bound()) {
1418 1418 const int short_size = 2;
1419 1419 address entry = target(L);
1420 1420 assert(is8bit((entry - _code_pos) + short_size),
1421 1421 "Dispacement too large for a short jmp");
1422 1422 assert(entry != NULL, "jmp most probably wrong");
1423 1423 intptr_t offs = entry - _code_pos;
1424 1424 emit_byte(0xEB);
1425 1425 emit_byte((offs - short_size) & 0xFF);
1426 1426 } else {
1427 1427 InstructionMark im(this);
1428 1428 L.add_patch_at(code(), locator());
1429 1429 emit_byte(0xEB);
1430 1430 emit_byte(0);
1431 1431 }
1432 1432 }
1433 1433
1434 1434 void Assembler::ldmxcsr( Address src) {
1435 1435 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1436 1436 InstructionMark im(this);
1437 1437 prefix(src);
1438 1438 emit_byte(0x0F);
1439 1439 emit_byte(0xAE);
1440 1440 emit_operand(as_Register(2), src);
1441 1441 }
1442 1442
1443 1443 void Assembler::leal(Register dst, Address src) {
1444 1444 InstructionMark im(this);
1445 1445 #ifdef _LP64
1446 1446 emit_byte(0x67); // addr32
1447 1447 prefix(src, dst);
1448 1448 #endif // LP64
1449 1449 emit_byte(0x8D);
1450 1450 emit_operand(dst, src);
1451 1451 }
1452 1452
1453 1453 void Assembler::lock() {
1454 1454 if (Atomics & 1) {
1455 1455 // Emit either nothing, a NOP, or a NOP: prefix
1456 1456 emit_byte(0x90) ;
1457 1457 } else {
1458 1458 emit_byte(0xF0);
1459 1459 }
1460 1460 }
1461 1461
1462 1462 void Assembler::lzcntl(Register dst, Register src) {
1463 1463 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1464 1464 emit_byte(0xF3);
1465 1465 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1466 1466 emit_byte(0x0F);
1467 1467 emit_byte(0xBD);
1468 1468 emit_byte(0xC0 | encode);
1469 1469 }
1470 1470
1471 1471 // Emit mfence instruction
1472 1472 void Assembler::mfence() {
1473 1473 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1474 1474 emit_byte( 0x0F );
1475 1475 emit_byte( 0xAE );
1476 1476 emit_byte( 0xF0 );
1477 1477 }
1478 1478
1479 1479 void Assembler::mov(Register dst, Register src) {
1480 1480 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1481 1481 }
1482 1482
1483 1483 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1484 1484 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1485 1485 int dstenc = dst->encoding();
1486 1486 int srcenc = src->encoding();
1487 1487 emit_byte(0x66);
1488 1488 if (dstenc < 8) {
1489 1489 if (srcenc >= 8) {
1490 1490 prefix(REX_B);
1491 1491 srcenc -= 8;
1492 1492 }
1493 1493 } else {
1494 1494 if (srcenc < 8) {
1495 1495 prefix(REX_R);
1496 1496 } else {
1497 1497 prefix(REX_RB);
1498 1498 srcenc -= 8;
1499 1499 }
1500 1500 dstenc -= 8;
1501 1501 }
1502 1502 emit_byte(0x0F);
1503 1503 emit_byte(0x28);
1504 1504 emit_byte(0xC0 | dstenc << 3 | srcenc);
1505 1505 }
1506 1506
1507 1507 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1508 1508 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1509 1509 int dstenc = dst->encoding();
1510 1510 int srcenc = src->encoding();
1511 1511 if (dstenc < 8) {
1512 1512 if (srcenc >= 8) {
1513 1513 prefix(REX_B);
1514 1514 srcenc -= 8;
1515 1515 }
1516 1516 } else {
1517 1517 if (srcenc < 8) {
1518 1518 prefix(REX_R);
1519 1519 } else {
1520 1520 prefix(REX_RB);
1521 1521 srcenc -= 8;
1522 1522 }
1523 1523 dstenc -= 8;
1524 1524 }
1525 1525 emit_byte(0x0F);
1526 1526 emit_byte(0x28);
1527 1527 emit_byte(0xC0 | dstenc << 3 | srcenc);
1528 1528 }
1529 1529
1530 1530 void Assembler::movb(Register dst, Address src) {
1531 1531 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1532 1532 InstructionMark im(this);
1533 1533 prefix(src, dst, true);
1534 1534 emit_byte(0x8A);
1535 1535 emit_operand(dst, src);
1536 1536 }
1537 1537
1538 1538
1539 1539 void Assembler::movb(Address dst, int imm8) {
1540 1540 InstructionMark im(this);
1541 1541 prefix(dst);
1542 1542 emit_byte(0xC6);
1543 1543 emit_operand(rax, dst, 1);
1544 1544 emit_byte(imm8);
1545 1545 }
1546 1546
1547 1547
1548 1548 void Assembler::movb(Address dst, Register src) {
1549 1549 assert(src->has_byte_register(), "must have byte register");
1550 1550 InstructionMark im(this);
1551 1551 prefix(dst, src, true);
1552 1552 emit_byte(0x88);
1553 1553 emit_operand(src, dst);
1554 1554 }
1555 1555
1556 1556 void Assembler::movdl(XMMRegister dst, Register src) {
1557 1557 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1558 1558 emit_byte(0x66);
1559 1559 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1560 1560 emit_byte(0x0F);
1561 1561 emit_byte(0x6E);
1562 1562 emit_byte(0xC0 | encode);
1563 1563 }
1564 1564
1565 1565 void Assembler::movdl(Register dst, XMMRegister src) {
1566 1566 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1567 1567 emit_byte(0x66);
1568 1568 // swap src/dst to get correct prefix
1569 1569 int encode = prefix_and_encode(src->encoding(), dst->encoding());
1570 1570 emit_byte(0x0F);
1571 1571 emit_byte(0x7E);
1572 1572 emit_byte(0xC0 | encode);
1573 1573 }
1574 1574
1575 1575 void Assembler::movdqa(XMMRegister dst, Address src) {
1576 1576 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1577 1577 InstructionMark im(this);
1578 1578 emit_byte(0x66);
1579 1579 prefix(src, dst);
1580 1580 emit_byte(0x0F);
1581 1581 emit_byte(0x6F);
1582 1582 emit_operand(dst, src);
1583 1583 }
1584 1584
1585 1585 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1586 1586 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1587 1587 emit_byte(0x66);
1588 1588 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1589 1589 emit_byte(0x0F);
1590 1590 emit_byte(0x6F);
1591 1591 emit_byte(0xC0 | encode);
1592 1592 }
1593 1593
1594 1594 void Assembler::movdqa(Address dst, XMMRegister src) {
1595 1595 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1596 1596 InstructionMark im(this);
1597 1597 emit_byte(0x66);
1598 1598 prefix(dst, src);
1599 1599 emit_byte(0x0F);
1600 1600 emit_byte(0x7F);
1601 1601 emit_operand(src, dst);
1602 1602 }
1603 1603
1604 1604 void Assembler::movdqu(XMMRegister dst, Address src) {
1605 1605 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1606 1606 InstructionMark im(this);
1607 1607 emit_byte(0xF3);
1608 1608 prefix(src, dst);
1609 1609 emit_byte(0x0F);
1610 1610 emit_byte(0x6F);
1611 1611 emit_operand(dst, src);
1612 1612 }
1613 1613
1614 1614 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1615 1615 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1616 1616 emit_byte(0xF3);
1617 1617 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1618 1618 emit_byte(0x0F);
1619 1619 emit_byte(0x6F);
1620 1620 emit_byte(0xC0 | encode);
1621 1621 }
1622 1622
1623 1623 void Assembler::movdqu(Address dst, XMMRegister src) {
1624 1624 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1625 1625 InstructionMark im(this);
1626 1626 emit_byte(0xF3);
1627 1627 prefix(dst, src);
1628 1628 emit_byte(0x0F);
1629 1629 emit_byte(0x7F);
1630 1630 emit_operand(src, dst);
1631 1631 }
1632 1632
1633 1633 // Uses zero extension on 64bit
1634 1634
1635 1635 void Assembler::movl(Register dst, int32_t imm32) {
1636 1636 int encode = prefix_and_encode(dst->encoding());
1637 1637 emit_byte(0xB8 | encode);
1638 1638 emit_long(imm32);
1639 1639 }
1640 1640
1641 1641 void Assembler::movl(Register dst, Register src) {
1642 1642 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1643 1643 emit_byte(0x8B);
1644 1644 emit_byte(0xC0 | encode);
1645 1645 }
1646 1646
1647 1647 void Assembler::movl(Register dst, Address src) {
1648 1648 InstructionMark im(this);
1649 1649 prefix(src, dst);
1650 1650 emit_byte(0x8B);
1651 1651 emit_operand(dst, src);
1652 1652 }
1653 1653
1654 1654 void Assembler::movl(Address dst, int32_t imm32) {
1655 1655 InstructionMark im(this);
1656 1656 prefix(dst);
1657 1657 emit_byte(0xC7);
1658 1658 emit_operand(rax, dst, 4);
1659 1659 emit_long(imm32);
1660 1660 }
1661 1661
1662 1662 void Assembler::movl(Address dst, Register src) {
1663 1663 InstructionMark im(this);
1664 1664 prefix(dst, src);
1665 1665 emit_byte(0x89);
1666 1666 emit_operand(src, dst);
1667 1667 }
1668 1668
1669 1669 // New cpus require to use movsd and movss to avoid partial register stall
1670 1670 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1671 1671 // The selection is done in MacroAssembler::movdbl() and movflt().
1672 1672 void Assembler::movlpd(XMMRegister dst, Address src) {
1673 1673 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1674 1674 InstructionMark im(this);
1675 1675 emit_byte(0x66);
1676 1676 prefix(src, dst);
1677 1677 emit_byte(0x0F);
1678 1678 emit_byte(0x12);
1679 1679 emit_operand(dst, src);
1680 1680 }
1681 1681
1682 1682 void Assembler::movq( MMXRegister dst, Address src ) {
1683 1683 assert( VM_Version::supports_mmx(), "" );
1684 1684 emit_byte(0x0F);
1685 1685 emit_byte(0x6F);
1686 1686 emit_operand(dst, src);
1687 1687 }
1688 1688
1689 1689 void Assembler::movq( Address dst, MMXRegister src ) {
1690 1690 assert( VM_Version::supports_mmx(), "" );
1691 1691 emit_byte(0x0F);
1692 1692 emit_byte(0x7F);
1693 1693 // workaround gcc (3.2.1-7a) bug
1694 1694 // In that version of gcc with only an emit_operand(MMX, Address)
1695 1695 // gcc will tail jump and try and reverse the parameters completely
1696 1696 // obliterating dst in the process. By having a version available
1697 1697 // that doesn't need to swap the args at the tail jump the bug is
1698 1698 // avoided.
1699 1699 emit_operand(dst, src);
1700 1700 }
1701 1701
1702 1702 void Assembler::movq(XMMRegister dst, Address src) {
1703 1703 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1704 1704 InstructionMark im(this);
1705 1705 emit_byte(0xF3);
1706 1706 prefix(src, dst);
1707 1707 emit_byte(0x0F);
1708 1708 emit_byte(0x7E);
1709 1709 emit_operand(dst, src);
1710 1710 }
1711 1711
1712 1712 void Assembler::movq(Address dst, XMMRegister src) {
1713 1713 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1714 1714 InstructionMark im(this);
1715 1715 emit_byte(0x66);
1716 1716 prefix(dst, src);
1717 1717 emit_byte(0x0F);
1718 1718 emit_byte(0xD6);
1719 1719 emit_operand(src, dst);
1720 1720 }
1721 1721
1722 1722 void Assembler::movsbl(Register dst, Address src) { // movsxb
1723 1723 InstructionMark im(this);
1724 1724 prefix(src, dst);
1725 1725 emit_byte(0x0F);
1726 1726 emit_byte(0xBE);
1727 1727 emit_operand(dst, src);
1728 1728 }
1729 1729
1730 1730 void Assembler::movsbl(Register dst, Register src) { // movsxb
1731 1731 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1732 1732 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1733 1733 emit_byte(0x0F);
1734 1734 emit_byte(0xBE);
1735 1735 emit_byte(0xC0 | encode);
1736 1736 }
1737 1737
1738 1738 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1739 1739 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1740 1740 emit_byte(0xF2);
1741 1741 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1742 1742 emit_byte(0x0F);
1743 1743 emit_byte(0x10);
1744 1744 emit_byte(0xC0 | encode);
1745 1745 }
1746 1746
1747 1747 void Assembler::movsd(XMMRegister dst, Address src) {
1748 1748 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1749 1749 InstructionMark im(this);
1750 1750 emit_byte(0xF2);
1751 1751 prefix(src, dst);
1752 1752 emit_byte(0x0F);
1753 1753 emit_byte(0x10);
1754 1754 emit_operand(dst, src);
1755 1755 }
1756 1756
1757 1757 void Assembler::movsd(Address dst, XMMRegister src) {
1758 1758 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1759 1759 InstructionMark im(this);
1760 1760 emit_byte(0xF2);
1761 1761 prefix(dst, src);
1762 1762 emit_byte(0x0F);
1763 1763 emit_byte(0x11);
1764 1764 emit_operand(src, dst);
1765 1765 }
1766 1766
1767 1767 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1768 1768 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1769 1769 emit_byte(0xF3);
1770 1770 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1771 1771 emit_byte(0x0F);
1772 1772 emit_byte(0x10);
1773 1773 emit_byte(0xC0 | encode);
1774 1774 }
1775 1775
1776 1776 void Assembler::movss(XMMRegister dst, Address src) {
1777 1777 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1778 1778 InstructionMark im(this);
1779 1779 emit_byte(0xF3);
1780 1780 prefix(src, dst);
1781 1781 emit_byte(0x0F);
1782 1782 emit_byte(0x10);
1783 1783 emit_operand(dst, src);
1784 1784 }
1785 1785
1786 1786 void Assembler::movss(Address dst, XMMRegister src) {
1787 1787 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1788 1788 InstructionMark im(this);
1789 1789 emit_byte(0xF3);
1790 1790 prefix(dst, src);
1791 1791 emit_byte(0x0F);
1792 1792 emit_byte(0x11);
1793 1793 emit_operand(src, dst);
1794 1794 }
1795 1795
1796 1796 void Assembler::movswl(Register dst, Address src) { // movsxw
1797 1797 InstructionMark im(this);
1798 1798 prefix(src, dst);
1799 1799 emit_byte(0x0F);
1800 1800 emit_byte(0xBF);
1801 1801 emit_operand(dst, src);
1802 1802 }
1803 1803
1804 1804 void Assembler::movswl(Register dst, Register src) { // movsxw
1805 1805 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1806 1806 emit_byte(0x0F);
1807 1807 emit_byte(0xBF);
1808 1808 emit_byte(0xC0 | encode);
1809 1809 }
1810 1810
1811 1811 void Assembler::movw(Address dst, int imm16) {
1812 1812 InstructionMark im(this);
1813 1813
1814 1814 emit_byte(0x66); // switch to 16-bit mode
1815 1815 prefix(dst);
1816 1816 emit_byte(0xC7);
1817 1817 emit_operand(rax, dst, 2);
1818 1818 emit_word(imm16);
1819 1819 }
1820 1820
1821 1821 void Assembler::movw(Register dst, Address src) {
1822 1822 InstructionMark im(this);
1823 1823 emit_byte(0x66);
1824 1824 prefix(src, dst);
1825 1825 emit_byte(0x8B);
1826 1826 emit_operand(dst, src);
1827 1827 }
1828 1828
1829 1829 void Assembler::movw(Address dst, Register src) {
1830 1830 InstructionMark im(this);
1831 1831 emit_byte(0x66);
1832 1832 prefix(dst, src);
1833 1833 emit_byte(0x89);
1834 1834 emit_operand(src, dst);
1835 1835 }
1836 1836
1837 1837 void Assembler::movzbl(Register dst, Address src) { // movzxb
1838 1838 InstructionMark im(this);
1839 1839 prefix(src, dst);
1840 1840 emit_byte(0x0F);
1841 1841 emit_byte(0xB6);
1842 1842 emit_operand(dst, src);
1843 1843 }
1844 1844
1845 1845 void Assembler::movzbl(Register dst, Register src) { // movzxb
1846 1846 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1847 1847 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1848 1848 emit_byte(0x0F);
1849 1849 emit_byte(0xB6);
1850 1850 emit_byte(0xC0 | encode);
1851 1851 }
1852 1852
1853 1853 void Assembler::movzwl(Register dst, Address src) { // movzxw
1854 1854 InstructionMark im(this);
1855 1855 prefix(src, dst);
1856 1856 emit_byte(0x0F);
1857 1857 emit_byte(0xB7);
1858 1858 emit_operand(dst, src);
1859 1859 }
1860 1860
1861 1861 void Assembler::movzwl(Register dst, Register src) { // movzxw
1862 1862 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1863 1863 emit_byte(0x0F);
1864 1864 emit_byte(0xB7);
1865 1865 emit_byte(0xC0 | encode);
1866 1866 }
1867 1867
1868 1868 void Assembler::mull(Address src) {
1869 1869 InstructionMark im(this);
1870 1870 prefix(src);
1871 1871 emit_byte(0xF7);
1872 1872 emit_operand(rsp, src);
1873 1873 }
1874 1874
1875 1875 void Assembler::mull(Register src) {
1876 1876 int encode = prefix_and_encode(src->encoding());
1877 1877 emit_byte(0xF7);
1878 1878 emit_byte(0xE0 | encode);
1879 1879 }
1880 1880
1881 1881 void Assembler::mulsd(XMMRegister dst, Address src) {
1882 1882 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1883 1883 InstructionMark im(this);
1884 1884 emit_byte(0xF2);
1885 1885 prefix(src, dst);
1886 1886 emit_byte(0x0F);
1887 1887 emit_byte(0x59);
1888 1888 emit_operand(dst, src);
1889 1889 }
1890 1890
1891 1891 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1892 1892 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1893 1893 emit_byte(0xF2);
1894 1894 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1895 1895 emit_byte(0x0F);
1896 1896 emit_byte(0x59);
1897 1897 emit_byte(0xC0 | encode);
1898 1898 }
1899 1899
1900 1900 void Assembler::mulss(XMMRegister dst, Address src) {
1901 1901 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1902 1902 InstructionMark im(this);
1903 1903 emit_byte(0xF3);
1904 1904 prefix(src, dst);
1905 1905 emit_byte(0x0F);
1906 1906 emit_byte(0x59);
1907 1907 emit_operand(dst, src);
1908 1908 }
1909 1909
1910 1910 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1911 1911 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1912 1912 emit_byte(0xF3);
1913 1913 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1914 1914 emit_byte(0x0F);
1915 1915 emit_byte(0x59);
1916 1916 emit_byte(0xC0 | encode);
1917 1917 }
1918 1918
1919 1919 void Assembler::negl(Register dst) {
1920 1920 int encode = prefix_and_encode(dst->encoding());
1921 1921 emit_byte(0xF7);
1922 1922 emit_byte(0xD8 | encode);
1923 1923 }
1924 1924
1925 1925 void Assembler::nop(int i) {
1926 1926 #ifdef ASSERT
1927 1927 assert(i > 0, " ");
1928 1928 // The fancy nops aren't currently recognized by debuggers making it a
1929 1929 // pain to disassemble code while debugging. If asserts are on clearly
1930 1930 // speed is not an issue so simply use the single byte traditional nop
1931 1931 // to do alignment.
1932 1932
1933 1933 for (; i > 0 ; i--) emit_byte(0x90);
1934 1934 return;
1935 1935
1936 1936 #endif // ASSERT
1937 1937
1938 1938 if (UseAddressNop && VM_Version::is_intel()) {
1939 1939 //
1940 1940 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1941 1941 // 1: 0x90
1942 1942 // 2: 0x66 0x90
1943 1943 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1944 1944 // 4: 0x0F 0x1F 0x40 0x00
1945 1945 // 5: 0x0F 0x1F 0x44 0x00 0x00
1946 1946 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1947 1947 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1948 1948 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1949 1949 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1950 1950 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1951 1951 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1952 1952
1953 1953 // The rest coding is Intel specific - don't use consecutive address nops
1954 1954
1955 1955 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1956 1956 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1957 1957 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1958 1958 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1959 1959
1960 1960 while(i >= 15) {
1961 1961 // For Intel don't generate consecutive addess nops (mix with regular nops)
1962 1962 i -= 15;
1963 1963 emit_byte(0x66); // size prefix
1964 1964 emit_byte(0x66); // size prefix
1965 1965 emit_byte(0x66); // size prefix
1966 1966 addr_nop_8();
1967 1967 emit_byte(0x66); // size prefix
1968 1968 emit_byte(0x66); // size prefix
1969 1969 emit_byte(0x66); // size prefix
1970 1970 emit_byte(0x90); // nop
1971 1971 }
1972 1972 switch (i) {
1973 1973 case 14:
1974 1974 emit_byte(0x66); // size prefix
1975 1975 case 13:
1976 1976 emit_byte(0x66); // size prefix
1977 1977 case 12:
1978 1978 addr_nop_8();
1979 1979 emit_byte(0x66); // size prefix
1980 1980 emit_byte(0x66); // size prefix
1981 1981 emit_byte(0x66); // size prefix
1982 1982 emit_byte(0x90); // nop
1983 1983 break;
1984 1984 case 11:
1985 1985 emit_byte(0x66); // size prefix
1986 1986 case 10:
1987 1987 emit_byte(0x66); // size prefix
1988 1988 case 9:
1989 1989 emit_byte(0x66); // size prefix
1990 1990 case 8:
1991 1991 addr_nop_8();
1992 1992 break;
1993 1993 case 7:
1994 1994 addr_nop_7();
1995 1995 break;
1996 1996 case 6:
1997 1997 emit_byte(0x66); // size prefix
1998 1998 case 5:
1999 1999 addr_nop_5();
2000 2000 break;
2001 2001 case 4:
2002 2002 addr_nop_4();
2003 2003 break;
2004 2004 case 3:
2005 2005 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2006 2006 emit_byte(0x66); // size prefix
2007 2007 case 2:
2008 2008 emit_byte(0x66); // size prefix
2009 2009 case 1:
2010 2010 emit_byte(0x90); // nop
2011 2011 break;
2012 2012 default:
2013 2013 assert(i == 0, " ");
2014 2014 }
2015 2015 return;
2016 2016 }
2017 2017 if (UseAddressNop && VM_Version::is_amd()) {
2018 2018 //
2019 2019 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2020 2020 // 1: 0x90
2021 2021 // 2: 0x66 0x90
2022 2022 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2023 2023 // 4: 0x0F 0x1F 0x40 0x00
2024 2024 // 5: 0x0F 0x1F 0x44 0x00 0x00
2025 2025 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2026 2026 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2027 2027 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2028 2028 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2029 2029 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2030 2030 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2031 2031
2032 2032 // The rest coding is AMD specific - use consecutive address nops
2033 2033
2034 2034 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2035 2035 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2036 2036 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2037 2037 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2038 2038 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2039 2039 // Size prefixes (0x66) are added for larger sizes
2040 2040
2041 2041 while(i >= 22) {
2042 2042 i -= 11;
2043 2043 emit_byte(0x66); // size prefix
2044 2044 emit_byte(0x66); // size prefix
2045 2045 emit_byte(0x66); // size prefix
2046 2046 addr_nop_8();
2047 2047 }
2048 2048 // Generate first nop for size between 21-12
2049 2049 switch (i) {
2050 2050 case 21:
2051 2051 i -= 1;
2052 2052 emit_byte(0x66); // size prefix
2053 2053 case 20:
2054 2054 case 19:
2055 2055 i -= 1;
2056 2056 emit_byte(0x66); // size prefix
2057 2057 case 18:
2058 2058 case 17:
2059 2059 i -= 1;
2060 2060 emit_byte(0x66); // size prefix
2061 2061 case 16:
2062 2062 case 15:
2063 2063 i -= 8;
2064 2064 addr_nop_8();
2065 2065 break;
2066 2066 case 14:
2067 2067 case 13:
2068 2068 i -= 7;
2069 2069 addr_nop_7();
2070 2070 break;
2071 2071 case 12:
2072 2072 i -= 6;
2073 2073 emit_byte(0x66); // size prefix
2074 2074 addr_nop_5();
2075 2075 break;
2076 2076 default:
2077 2077 assert(i < 12, " ");
2078 2078 }
2079 2079
2080 2080 // Generate second nop for size between 11-1
2081 2081 switch (i) {
2082 2082 case 11:
2083 2083 emit_byte(0x66); // size prefix
2084 2084 case 10:
2085 2085 emit_byte(0x66); // size prefix
2086 2086 case 9:
2087 2087 emit_byte(0x66); // size prefix
2088 2088 case 8:
2089 2089 addr_nop_8();
2090 2090 break;
2091 2091 case 7:
2092 2092 addr_nop_7();
2093 2093 break;
2094 2094 case 6:
2095 2095 emit_byte(0x66); // size prefix
2096 2096 case 5:
2097 2097 addr_nop_5();
2098 2098 break;
2099 2099 case 4:
2100 2100 addr_nop_4();
2101 2101 break;
2102 2102 case 3:
2103 2103 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2104 2104 emit_byte(0x66); // size prefix
2105 2105 case 2:
2106 2106 emit_byte(0x66); // size prefix
2107 2107 case 1:
2108 2108 emit_byte(0x90); // nop
2109 2109 break;
2110 2110 default:
2111 2111 assert(i == 0, " ");
2112 2112 }
2113 2113 return;
2114 2114 }
2115 2115
2116 2116 // Using nops with size prefixes "0x66 0x90".
2117 2117 // From AMD Optimization Guide:
2118 2118 // 1: 0x90
2119 2119 // 2: 0x66 0x90
2120 2120 // 3: 0x66 0x66 0x90
2121 2121 // 4: 0x66 0x66 0x66 0x90
2122 2122 // 5: 0x66 0x66 0x90 0x66 0x90
2123 2123 // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2124 2124 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2125 2125 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2126 2126 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2127 2127 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2128 2128 //
2129 2129 while(i > 12) {
2130 2130 i -= 4;
2131 2131 emit_byte(0x66); // size prefix
2132 2132 emit_byte(0x66);
2133 2133 emit_byte(0x66);
2134 2134 emit_byte(0x90); // nop
2135 2135 }
2136 2136 // 1 - 12 nops
2137 2137 if(i > 8) {
2138 2138 if(i > 9) {
2139 2139 i -= 1;
2140 2140 emit_byte(0x66);
2141 2141 }
2142 2142 i -= 3;
2143 2143 emit_byte(0x66);
2144 2144 emit_byte(0x66);
2145 2145 emit_byte(0x90);
2146 2146 }
2147 2147 // 1 - 8 nops
2148 2148 if(i > 4) {
2149 2149 if(i > 6) {
2150 2150 i -= 1;
2151 2151 emit_byte(0x66);
2152 2152 }
2153 2153 i -= 3;
2154 2154 emit_byte(0x66);
2155 2155 emit_byte(0x66);
2156 2156 emit_byte(0x90);
2157 2157 }
2158 2158 switch (i) {
2159 2159 case 4:
2160 2160 emit_byte(0x66);
2161 2161 case 3:
2162 2162 emit_byte(0x66);
2163 2163 case 2:
2164 2164 emit_byte(0x66);
2165 2165 case 1:
2166 2166 emit_byte(0x90);
2167 2167 break;
2168 2168 default:
2169 2169 assert(i == 0, " ");
2170 2170 }
2171 2171 }
2172 2172
2173 2173 void Assembler::notl(Register dst) {
2174 2174 int encode = prefix_and_encode(dst->encoding());
2175 2175 emit_byte(0xF7);
2176 2176 emit_byte(0xD0 | encode );
2177 2177 }
2178 2178
2179 2179 void Assembler::orl(Address dst, int32_t imm32) {
2180 2180 InstructionMark im(this);
2181 2181 prefix(dst);
2182 2182 emit_byte(0x81);
2183 2183 emit_operand(rcx, dst, 4);
2184 2184 emit_long(imm32);
2185 2185 }
2186 2186
2187 2187 void Assembler::orl(Register dst, int32_t imm32) {
2188 2188 prefix(dst);
2189 2189 emit_arith(0x81, 0xC8, dst, imm32);
2190 2190 }
2191 2191
2192 2192
2193 2193 void Assembler::orl(Register dst, Address src) {
2194 2194 InstructionMark im(this);
2195 2195 prefix(src, dst);
2196 2196 emit_byte(0x0B);
2197 2197 emit_operand(dst, src);
2198 2198 }
2199 2199
2200 2200
2201 2201 void Assembler::orl(Register dst, Register src) {
2202 2202 (void) prefix_and_encode(dst->encoding(), src->encoding());
2203 2203 emit_arith(0x0B, 0xC0, dst, src);
2204 2204 }
2205 2205
2206 2206 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2207 2207 assert(VM_Version::supports_sse4_2(), "");
2208 2208
2209 2209 InstructionMark im(this);
2210 2210 emit_byte(0x66);
2211 2211 prefix(src, dst);
2212 2212 emit_byte(0x0F);
2213 2213 emit_byte(0x3A);
2214 2214 emit_byte(0x61);
2215 2215 emit_operand(dst, src);
2216 2216 emit_byte(imm8);
2217 2217 }
2218 2218
2219 2219 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2220 2220 assert(VM_Version::supports_sse4_2(), "");
2221 2221
2222 2222 emit_byte(0x66);
2223 2223 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2224 2224 emit_byte(0x0F);
2225 2225 emit_byte(0x3A);
2226 2226 emit_byte(0x61);
2227 2227 emit_byte(0xC0 | encode);
2228 2228 emit_byte(imm8);
2229 2229 }
2230 2230
2231 2231 // generic
2232 2232 void Assembler::pop(Register dst) {
2233 2233 int encode = prefix_and_encode(dst->encoding());
2234 2234 emit_byte(0x58 | encode);
2235 2235 }
2236 2236
2237 2237 void Assembler::popcntl(Register dst, Address src) {
2238 2238 assert(VM_Version::supports_popcnt(), "must support");
2239 2239 InstructionMark im(this);
2240 2240 emit_byte(0xF3);
2241 2241 prefix(src, dst);
2242 2242 emit_byte(0x0F);
2243 2243 emit_byte(0xB8);
2244 2244 emit_operand(dst, src);
2245 2245 }
2246 2246
2247 2247 void Assembler::popcntl(Register dst, Register src) {
2248 2248 assert(VM_Version::supports_popcnt(), "must support");
2249 2249 emit_byte(0xF3);
2250 2250 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2251 2251 emit_byte(0x0F);
2252 2252 emit_byte(0xB8);
2253 2253 emit_byte(0xC0 | encode);
2254 2254 }
2255 2255
2256 2256 void Assembler::popf() {
2257 2257 emit_byte(0x9D);
2258 2258 }
2259 2259
2260 2260 #ifndef _LP64 // no 32bit push/pop on amd64
2261 2261 void Assembler::popl(Address dst) {
2262 2262 // NOTE: this will adjust stack by 8byte on 64bits
2263 2263 InstructionMark im(this);
2264 2264 prefix(dst);
2265 2265 emit_byte(0x8F);
2266 2266 emit_operand(rax, dst);
2267 2267 }
2268 2268 #endif
2269 2269
2270 2270 void Assembler::prefetch_prefix(Address src) {
2271 2271 prefix(src);
2272 2272 emit_byte(0x0F);
2273 2273 }
2274 2274
2275 2275 void Assembler::prefetchnta(Address src) {
2276 2276 NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2277 2277 InstructionMark im(this);
2278 2278 prefetch_prefix(src);
2279 2279 emit_byte(0x18);
2280 2280 emit_operand(rax, src); // 0, src
2281 2281 }
2282 2282
2283 2283 void Assembler::prefetchr(Address src) {
2284 2284 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2285 2285 InstructionMark im(this);
2286 2286 prefetch_prefix(src);
2287 2287 emit_byte(0x0D);
2288 2288 emit_operand(rax, src); // 0, src
2289 2289 }
2290 2290
2291 2291 void Assembler::prefetcht0(Address src) {
2292 2292 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2293 2293 InstructionMark im(this);
2294 2294 prefetch_prefix(src);
2295 2295 emit_byte(0x18);
2296 2296 emit_operand(rcx, src); // 1, src
2297 2297 }
2298 2298
2299 2299 void Assembler::prefetcht1(Address src) {
2300 2300 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2301 2301 InstructionMark im(this);
2302 2302 prefetch_prefix(src);
2303 2303 emit_byte(0x18);
2304 2304 emit_operand(rdx, src); // 2, src
2305 2305 }
2306 2306
2307 2307 void Assembler::prefetcht2(Address src) {
2308 2308 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2309 2309 InstructionMark im(this);
2310 2310 prefetch_prefix(src);
2311 2311 emit_byte(0x18);
2312 2312 emit_operand(rbx, src); // 3, src
2313 2313 }
2314 2314
2315 2315 void Assembler::prefetchw(Address src) {
2316 2316 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2317 2317 InstructionMark im(this);
2318 2318 prefetch_prefix(src);
2319 2319 emit_byte(0x0D);
2320 2320 emit_operand(rcx, src); // 1, src
2321 2321 }
2322 2322
2323 2323 void Assembler::prefix(Prefix p) {
2324 2324 a_byte(p);
2325 2325 }
2326 2326
2327 2327 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2328 2328 assert(isByte(mode), "invalid value");
2329 2329 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2330 2330
2331 2331 emit_byte(0x66);
2332 2332 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2333 2333 emit_byte(0x0F);
2334 2334 emit_byte(0x70);
2335 2335 emit_byte(0xC0 | encode);
2336 2336 emit_byte(mode & 0xFF);
2337 2337
2338 2338 }
2339 2339
2340 2340 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2341 2341 assert(isByte(mode), "invalid value");
2342 2342 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2343 2343
2344 2344 InstructionMark im(this);
2345 2345 emit_byte(0x66);
2346 2346 prefix(src, dst);
2347 2347 emit_byte(0x0F);
2348 2348 emit_byte(0x70);
2349 2349 emit_operand(dst, src);
2350 2350 emit_byte(mode & 0xFF);
2351 2351 }
2352 2352
2353 2353 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2354 2354 assert(isByte(mode), "invalid value");
2355 2355 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2356 2356
2357 2357 emit_byte(0xF2);
2358 2358 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2359 2359 emit_byte(0x0F);
2360 2360 emit_byte(0x70);
2361 2361 emit_byte(0xC0 | encode);
2362 2362 emit_byte(mode & 0xFF);
2363 2363 }
2364 2364
2365 2365 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2366 2366 assert(isByte(mode), "invalid value");
2367 2367 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2368 2368
2369 2369 InstructionMark im(this);
2370 2370 emit_byte(0xF2);
2371 2371 prefix(src, dst); // QQ new
2372 2372 emit_byte(0x0F);
2373 2373 emit_byte(0x70);
2374 2374 emit_operand(dst, src);
2375 2375 emit_byte(mode & 0xFF);
2376 2376 }
2377 2377
2378 2378 void Assembler::psrlq(XMMRegister dst, int shift) {
2379 2379 // HMM Table D-1 says sse2 or mmx
2380 2380 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2381 2381
2382 2382 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2383 2383 emit_byte(0x66);
2384 2384 emit_byte(0x0F);
2385 2385 emit_byte(0x73);
2386 2386 emit_byte(0xC0 | encode);
2387 2387 emit_byte(shift);
2388 2388 }
2389 2389
2390 2390 void Assembler::ptest(XMMRegister dst, Address src) {
2391 2391 assert(VM_Version::supports_sse4_1(), "");
2392 2392
2393 2393 InstructionMark im(this);
2394 2394 emit_byte(0x66);
2395 2395 prefix(src, dst);
2396 2396 emit_byte(0x0F);
2397 2397 emit_byte(0x38);
2398 2398 emit_byte(0x17);
2399 2399 emit_operand(dst, src);
2400 2400 }
2401 2401
2402 2402 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2403 2403 assert(VM_Version::supports_sse4_1(), "");
2404 2404
2405 2405 emit_byte(0x66);
2406 2406 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2407 2407 emit_byte(0x0F);
2408 2408 emit_byte(0x38);
2409 2409 emit_byte(0x17);
2410 2410 emit_byte(0xC0 | encode);
2411 2411 }
2412 2412
2413 2413 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2414 2414 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2415 2415 emit_byte(0x66);
2416 2416 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2417 2417 emit_byte(0x0F);
2418 2418 emit_byte(0x60);
2419 2419 emit_byte(0xC0 | encode);
2420 2420 }
2421 2421
2422 2422 void Assembler::push(int32_t imm32) {
2423 2423 // in 64bits we push 64bits onto the stack but only
2424 2424 // take a 32bit immediate
2425 2425 emit_byte(0x68);
2426 2426 emit_long(imm32);
2427 2427 }
2428 2428
2429 2429 void Assembler::push(Register src) {
2430 2430 int encode = prefix_and_encode(src->encoding());
2431 2431
2432 2432 emit_byte(0x50 | encode);
2433 2433 }
2434 2434
2435 2435 void Assembler::pushf() {
2436 2436 emit_byte(0x9C);
2437 2437 }
2438 2438
2439 2439 #ifndef _LP64 // no 32bit push/pop on amd64
2440 2440 void Assembler::pushl(Address src) {
2441 2441 // Note this will push 64bit on 64bit
2442 2442 InstructionMark im(this);
2443 2443 prefix(src);
2444 2444 emit_byte(0xFF);
2445 2445 emit_operand(rsi, src);
2446 2446 }
2447 2447 #endif
2448 2448
2449 2449 void Assembler::pxor(XMMRegister dst, Address src) {
2450 2450 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2451 2451 InstructionMark im(this);
2452 2452 emit_byte(0x66);
2453 2453 prefix(src, dst);
2454 2454 emit_byte(0x0F);
2455 2455 emit_byte(0xEF);
2456 2456 emit_operand(dst, src);
2457 2457 }
2458 2458
2459 2459 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2460 2460 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2461 2461 InstructionMark im(this);
2462 2462 emit_byte(0x66);
2463 2463 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2464 2464 emit_byte(0x0F);
2465 2465 emit_byte(0xEF);
2466 2466 emit_byte(0xC0 | encode);
2467 2467 }
2468 2468
2469 2469 void Assembler::rcll(Register dst, int imm8) {
2470 2470 assert(isShiftCount(imm8), "illegal shift count");
2471 2471 int encode = prefix_and_encode(dst->encoding());
2472 2472 if (imm8 == 1) {
2473 2473 emit_byte(0xD1);
2474 2474 emit_byte(0xD0 | encode);
2475 2475 } else {
2476 2476 emit_byte(0xC1);
2477 2477 emit_byte(0xD0 | encode);
2478 2478 emit_byte(imm8);
2479 2479 }
2480 2480 }
2481 2481
2482 2482 // copies data from [esi] to [edi] using rcx pointer sized words
2483 2483 // generic
2484 2484 void Assembler::rep_mov() {
2485 2485 emit_byte(0xF3);
2486 2486 // MOVSQ
2487 2487 LP64_ONLY(prefix(REX_W));
2488 2488 emit_byte(0xA5);
2489 2489 }
2490 2490
2491 2491 // sets rcx pointer sized words with rax, value at [edi]
2492 2492 // generic
2493 2493 void Assembler::rep_set() { // rep_set
2494 2494 emit_byte(0xF3);
2495 2495 // STOSQ
2496 2496 LP64_ONLY(prefix(REX_W));
2497 2497 emit_byte(0xAB);
2498 2498 }
2499 2499
2500 2500 // scans rcx pointer sized words at [edi] for occurance of rax,
2501 2501 // generic
2502 2502 void Assembler::repne_scan() { // repne_scan
2503 2503 emit_byte(0xF2);
2504 2504 // SCASQ
2505 2505 LP64_ONLY(prefix(REX_W));
2506 2506 emit_byte(0xAF);
2507 2507 }
2508 2508
2509 2509 #ifdef _LP64
2510 2510 // scans rcx 4 byte words at [edi] for occurance of rax,
2511 2511 // generic
2512 2512 void Assembler::repne_scanl() { // repne_scan
2513 2513 emit_byte(0xF2);
2514 2514 // SCASL
2515 2515 emit_byte(0xAF);
2516 2516 }
2517 2517 #endif
2518 2518
2519 2519 void Assembler::ret(int imm16) {
2520 2520 if (imm16 == 0) {
2521 2521 emit_byte(0xC3);
2522 2522 } else {
2523 2523 emit_byte(0xC2);
2524 2524 emit_word(imm16);
2525 2525 }
2526 2526 }
2527 2527
2528 2528 void Assembler::sahf() {
2529 2529 #ifdef _LP64
2530 2530 // Not supported in 64bit mode
2531 2531 ShouldNotReachHere();
2532 2532 #endif
2533 2533 emit_byte(0x9E);
2534 2534 }
2535 2535
2536 2536 void Assembler::sarl(Register dst, int imm8) {
2537 2537 int encode = prefix_and_encode(dst->encoding());
2538 2538 assert(isShiftCount(imm8), "illegal shift count");
2539 2539 if (imm8 == 1) {
2540 2540 emit_byte(0xD1);
2541 2541 emit_byte(0xF8 | encode);
2542 2542 } else {
2543 2543 emit_byte(0xC1);
2544 2544 emit_byte(0xF8 | encode);
2545 2545 emit_byte(imm8);
2546 2546 }
2547 2547 }
2548 2548
2549 2549 void Assembler::sarl(Register dst) {
2550 2550 int encode = prefix_and_encode(dst->encoding());
2551 2551 emit_byte(0xD3);
2552 2552 emit_byte(0xF8 | encode);
2553 2553 }
2554 2554
2555 2555 void Assembler::sbbl(Address dst, int32_t imm32) {
2556 2556 InstructionMark im(this);
2557 2557 prefix(dst);
2558 2558 emit_arith_operand(0x81, rbx, dst, imm32);
2559 2559 }
2560 2560
2561 2561 void Assembler::sbbl(Register dst, int32_t imm32) {
2562 2562 prefix(dst);
2563 2563 emit_arith(0x81, 0xD8, dst, imm32);
2564 2564 }
2565 2565
2566 2566
2567 2567 void Assembler::sbbl(Register dst, Address src) {
2568 2568 InstructionMark im(this);
2569 2569 prefix(src, dst);
2570 2570 emit_byte(0x1B);
2571 2571 emit_operand(dst, src);
2572 2572 }
2573 2573
2574 2574 void Assembler::sbbl(Register dst, Register src) {
2575 2575 (void) prefix_and_encode(dst->encoding(), src->encoding());
2576 2576 emit_arith(0x1B, 0xC0, dst, src);
2577 2577 }
2578 2578
2579 2579 void Assembler::setb(Condition cc, Register dst) {
2580 2580 assert(0 <= cc && cc < 16, "illegal cc");
2581 2581 int encode = prefix_and_encode(dst->encoding(), true);
2582 2582 emit_byte(0x0F);
2583 2583 emit_byte(0x90 | cc);
2584 2584 emit_byte(0xC0 | encode);
2585 2585 }
2586 2586
2587 2587 void Assembler::shll(Register dst, int imm8) {
2588 2588 assert(isShiftCount(imm8), "illegal shift count");
2589 2589 int encode = prefix_and_encode(dst->encoding());
2590 2590 if (imm8 == 1 ) {
2591 2591 emit_byte(0xD1);
2592 2592 emit_byte(0xE0 | encode);
2593 2593 } else {
2594 2594 emit_byte(0xC1);
2595 2595 emit_byte(0xE0 | encode);
2596 2596 emit_byte(imm8);
2597 2597 }
2598 2598 }
2599 2599
2600 2600 void Assembler::shll(Register dst) {
2601 2601 int encode = prefix_and_encode(dst->encoding());
2602 2602 emit_byte(0xD3);
2603 2603 emit_byte(0xE0 | encode);
2604 2604 }
2605 2605
2606 2606 void Assembler::shrl(Register dst, int imm8) {
2607 2607 assert(isShiftCount(imm8), "illegal shift count");
2608 2608 int encode = prefix_and_encode(dst->encoding());
2609 2609 emit_byte(0xC1);
2610 2610 emit_byte(0xE8 | encode);
2611 2611 emit_byte(imm8);
2612 2612 }
2613 2613
2614 2614 void Assembler::shrl(Register dst) {
2615 2615 int encode = prefix_and_encode(dst->encoding());
2616 2616 emit_byte(0xD3);
2617 2617 emit_byte(0xE8 | encode);
2618 2618 }
2619 2619
2620 2620 // copies a single word from [esi] to [edi]
2621 2621 void Assembler::smovl() {
2622 2622 emit_byte(0xA5);
2623 2623 }
2624 2624
2625 2625 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
↓ open down ↓ |
2625 lines elided |
↑ open up ↑ |
2626 2626 // HMM Table D-1 says sse2
2627 2627 // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2628 2628 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2629 2629 emit_byte(0xF2);
2630 2630 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2631 2631 emit_byte(0x0F);
2632 2632 emit_byte(0x51);
2633 2633 emit_byte(0xC0 | encode);
2634 2634 }
2635 2635
2636 +void Assembler::sqrtsd(XMMRegister dst, Address src) {
2637 + NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2638 + InstructionMark im(this);
2639 + emit_byte(0xF2);
2640 + prefix(src, dst);
2641 + emit_byte(0x0F);
2642 + emit_byte(0x51);
2643 + emit_operand(dst, src);
2644 +}
2645 +
2646 +void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2647 + // HMM Table D-1 says sse2
2648 + // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2649 + NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2650 + emit_byte(0xF3);
2651 + int encode = prefix_and_encode(dst->encoding(), src->encoding());
2652 + emit_byte(0x0F);
2653 + emit_byte(0x51);
2654 + emit_byte(0xC0 | encode);
2655 +}
2656 +
2657 +void Assembler::sqrtss(XMMRegister dst, Address src) {
2658 + NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2659 + InstructionMark im(this);
2660 + emit_byte(0xF3);
2661 + prefix(src, dst);
2662 + emit_byte(0x0F);
2663 + emit_byte(0x51);
2664 + emit_operand(dst, src);
2665 +}
2666 +
2636 2667 void Assembler::stmxcsr( Address dst) {
2637 2668 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2638 2669 InstructionMark im(this);
2639 2670 prefix(dst);
2640 2671 emit_byte(0x0F);
2641 2672 emit_byte(0xAE);
2642 2673 emit_operand(as_Register(3), dst);
2643 2674 }
2644 2675
2645 2676 void Assembler::subl(Address dst, int32_t imm32) {
2646 2677 InstructionMark im(this);
2647 2678 prefix(dst);
2648 2679 if (is8bit(imm32)) {
2649 2680 emit_byte(0x83);
2650 2681 emit_operand(rbp, dst, 1);
2651 2682 emit_byte(imm32 & 0xFF);
2652 2683 } else {
2653 2684 emit_byte(0x81);
2654 2685 emit_operand(rbp, dst, 4);
2655 2686 emit_long(imm32);
2656 2687 }
2657 2688 }
2658 2689
2659 2690 void Assembler::subl(Register dst, int32_t imm32) {
2660 2691 prefix(dst);
2661 2692 emit_arith(0x81, 0xE8, dst, imm32);
2662 2693 }
2663 2694
2664 2695 void Assembler::subl(Address dst, Register src) {
2665 2696 InstructionMark im(this);
2666 2697 prefix(dst, src);
2667 2698 emit_byte(0x29);
2668 2699 emit_operand(src, dst);
2669 2700 }
2670 2701
2671 2702 void Assembler::subl(Register dst, Address src) {
2672 2703 InstructionMark im(this);
2673 2704 prefix(src, dst);
2674 2705 emit_byte(0x2B);
2675 2706 emit_operand(dst, src);
2676 2707 }
2677 2708
2678 2709 void Assembler::subl(Register dst, Register src) {
2679 2710 (void) prefix_and_encode(dst->encoding(), src->encoding());
2680 2711 emit_arith(0x2B, 0xC0, dst, src);
2681 2712 }
2682 2713
2683 2714 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2684 2715 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2685 2716 emit_byte(0xF2);
2686 2717 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2687 2718 emit_byte(0x0F);
2688 2719 emit_byte(0x5C);
2689 2720 emit_byte(0xC0 | encode);
2690 2721 }
2691 2722
2692 2723 void Assembler::subsd(XMMRegister dst, Address src) {
2693 2724 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2694 2725 InstructionMark im(this);
2695 2726 emit_byte(0xF2);
2696 2727 prefix(src, dst);
2697 2728 emit_byte(0x0F);
2698 2729 emit_byte(0x5C);
2699 2730 emit_operand(dst, src);
2700 2731 }
2701 2732
2702 2733 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2703 2734 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2704 2735 emit_byte(0xF3);
2705 2736 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2706 2737 emit_byte(0x0F);
2707 2738 emit_byte(0x5C);
2708 2739 emit_byte(0xC0 | encode);
2709 2740 }
2710 2741
2711 2742 void Assembler::subss(XMMRegister dst, Address src) {
2712 2743 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2713 2744 InstructionMark im(this);
2714 2745 emit_byte(0xF3);
2715 2746 prefix(src, dst);
2716 2747 emit_byte(0x0F);
2717 2748 emit_byte(0x5C);
2718 2749 emit_operand(dst, src);
2719 2750 }
2720 2751
2721 2752 void Assembler::testb(Register dst, int imm8) {
2722 2753 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2723 2754 (void) prefix_and_encode(dst->encoding(), true);
2724 2755 emit_arith_b(0xF6, 0xC0, dst, imm8);
2725 2756 }
2726 2757
2727 2758 void Assembler::testl(Register dst, int32_t imm32) {
2728 2759 // not using emit_arith because test
2729 2760 // doesn't support sign-extension of
2730 2761 // 8bit operands
2731 2762 int encode = dst->encoding();
2732 2763 if (encode == 0) {
2733 2764 emit_byte(0xA9);
2734 2765 } else {
2735 2766 encode = prefix_and_encode(encode);
2736 2767 emit_byte(0xF7);
2737 2768 emit_byte(0xC0 | encode);
2738 2769 }
2739 2770 emit_long(imm32);
2740 2771 }
2741 2772
2742 2773 void Assembler::testl(Register dst, Register src) {
2743 2774 (void) prefix_and_encode(dst->encoding(), src->encoding());
2744 2775 emit_arith(0x85, 0xC0, dst, src);
2745 2776 }
2746 2777
2747 2778 void Assembler::testl(Register dst, Address src) {
2748 2779 InstructionMark im(this);
2749 2780 prefix(src, dst);
2750 2781 emit_byte(0x85);
2751 2782 emit_operand(dst, src);
2752 2783 }
2753 2784
2754 2785 void Assembler::ucomisd(XMMRegister dst, Address src) {
2755 2786 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2756 2787 emit_byte(0x66);
2757 2788 ucomiss(dst, src);
2758 2789 }
2759 2790
2760 2791 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2761 2792 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2762 2793 emit_byte(0x66);
2763 2794 ucomiss(dst, src);
2764 2795 }
2765 2796
2766 2797 void Assembler::ucomiss(XMMRegister dst, Address src) {
2767 2798 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2768 2799
2769 2800 InstructionMark im(this);
2770 2801 prefix(src, dst);
2771 2802 emit_byte(0x0F);
2772 2803 emit_byte(0x2E);
2773 2804 emit_operand(dst, src);
2774 2805 }
2775 2806
2776 2807 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2777 2808 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2778 2809 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2779 2810 emit_byte(0x0F);
2780 2811 emit_byte(0x2E);
2781 2812 emit_byte(0xC0 | encode);
2782 2813 }
2783 2814
2784 2815
2785 2816 void Assembler::xaddl(Address dst, Register src) {
2786 2817 InstructionMark im(this);
2787 2818 prefix(dst, src);
2788 2819 emit_byte(0x0F);
2789 2820 emit_byte(0xC1);
2790 2821 emit_operand(src, dst);
2791 2822 }
2792 2823
2793 2824 void Assembler::xchgl(Register dst, Address src) { // xchg
2794 2825 InstructionMark im(this);
2795 2826 prefix(src, dst);
2796 2827 emit_byte(0x87);
2797 2828 emit_operand(dst, src);
2798 2829 }
2799 2830
2800 2831 void Assembler::xchgl(Register dst, Register src) {
2801 2832 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2802 2833 emit_byte(0x87);
2803 2834 emit_byte(0xc0 | encode);
2804 2835 }
2805 2836
2806 2837 void Assembler::xorl(Register dst, int32_t imm32) {
2807 2838 prefix(dst);
2808 2839 emit_arith(0x81, 0xF0, dst, imm32);
2809 2840 }
2810 2841
2811 2842 void Assembler::xorl(Register dst, Address src) {
2812 2843 InstructionMark im(this);
2813 2844 prefix(src, dst);
2814 2845 emit_byte(0x33);
2815 2846 emit_operand(dst, src);
2816 2847 }
2817 2848
2818 2849 void Assembler::xorl(Register dst, Register src) {
2819 2850 (void) prefix_and_encode(dst->encoding(), src->encoding());
2820 2851 emit_arith(0x33, 0xC0, dst, src);
2821 2852 }
2822 2853
2823 2854 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2824 2855 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2825 2856 emit_byte(0x66);
2826 2857 xorps(dst, src);
2827 2858 }
2828 2859
2829 2860 void Assembler::xorpd(XMMRegister dst, Address src) {
2830 2861 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2831 2862 InstructionMark im(this);
2832 2863 emit_byte(0x66);
2833 2864 prefix(src, dst);
2834 2865 emit_byte(0x0F);
2835 2866 emit_byte(0x57);
2836 2867 emit_operand(dst, src);
2837 2868 }
2838 2869
2839 2870
2840 2871 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2841 2872 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2842 2873 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2843 2874 emit_byte(0x0F);
2844 2875 emit_byte(0x57);
2845 2876 emit_byte(0xC0 | encode);
2846 2877 }
2847 2878
2848 2879 void Assembler::xorps(XMMRegister dst, Address src) {
2849 2880 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2850 2881 InstructionMark im(this);
2851 2882 prefix(src, dst);
2852 2883 emit_byte(0x0F);
2853 2884 emit_byte(0x57);
2854 2885 emit_operand(dst, src);
2855 2886 }
2856 2887
2857 2888 #ifndef _LP64
2858 2889 // 32bit only pieces of the assembler
2859 2890
2860 2891 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2861 2892 // NO PREFIX AS NEVER 64BIT
2862 2893 InstructionMark im(this);
2863 2894 emit_byte(0x81);
2864 2895 emit_byte(0xF8 | src1->encoding());
2865 2896 emit_data(imm32, rspec, 0);
2866 2897 }
2867 2898
2868 2899 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2869 2900 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2870 2901 InstructionMark im(this);
2871 2902 emit_byte(0x81);
2872 2903 emit_operand(rdi, src1);
2873 2904 emit_data(imm32, rspec, 0);
2874 2905 }
2875 2906
2876 2907 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2877 2908 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2878 2909 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise.
2879 2910 void Assembler::cmpxchg8(Address adr) {
2880 2911 InstructionMark im(this);
2881 2912 emit_byte(0x0F);
2882 2913 emit_byte(0xc7);
2883 2914 emit_operand(rcx, adr);
2884 2915 }
2885 2916
2886 2917 void Assembler::decl(Register dst) {
2887 2918 // Don't use it directly. Use MacroAssembler::decrementl() instead.
2888 2919 emit_byte(0x48 | dst->encoding());
2889 2920 }
2890 2921
2891 2922 #endif // _LP64
2892 2923
2893 2924 // 64bit typically doesn't use the x87 but needs to for the trig funcs
2894 2925
2895 2926 void Assembler::fabs() {
2896 2927 emit_byte(0xD9);
2897 2928 emit_byte(0xE1);
2898 2929 }
2899 2930
2900 2931 void Assembler::fadd(int i) {
2901 2932 emit_farith(0xD8, 0xC0, i);
2902 2933 }
2903 2934
2904 2935 void Assembler::fadd_d(Address src) {
2905 2936 InstructionMark im(this);
2906 2937 emit_byte(0xDC);
2907 2938 emit_operand32(rax, src);
2908 2939 }
2909 2940
2910 2941 void Assembler::fadd_s(Address src) {
2911 2942 InstructionMark im(this);
2912 2943 emit_byte(0xD8);
2913 2944 emit_operand32(rax, src);
2914 2945 }
2915 2946
2916 2947 void Assembler::fadda(int i) {
2917 2948 emit_farith(0xDC, 0xC0, i);
2918 2949 }
2919 2950
2920 2951 void Assembler::faddp(int i) {
2921 2952 emit_farith(0xDE, 0xC0, i);
2922 2953 }
2923 2954
2924 2955 void Assembler::fchs() {
2925 2956 emit_byte(0xD9);
2926 2957 emit_byte(0xE0);
2927 2958 }
2928 2959
2929 2960 void Assembler::fcom(int i) {
2930 2961 emit_farith(0xD8, 0xD0, i);
2931 2962 }
2932 2963
2933 2964 void Assembler::fcomp(int i) {
2934 2965 emit_farith(0xD8, 0xD8, i);
2935 2966 }
2936 2967
2937 2968 void Assembler::fcomp_d(Address src) {
2938 2969 InstructionMark im(this);
2939 2970 emit_byte(0xDC);
2940 2971 emit_operand32(rbx, src);
2941 2972 }
2942 2973
2943 2974 void Assembler::fcomp_s(Address src) {
2944 2975 InstructionMark im(this);
2945 2976 emit_byte(0xD8);
2946 2977 emit_operand32(rbx, src);
2947 2978 }
2948 2979
2949 2980 void Assembler::fcompp() {
2950 2981 emit_byte(0xDE);
2951 2982 emit_byte(0xD9);
2952 2983 }
2953 2984
2954 2985 void Assembler::fcos() {
2955 2986 emit_byte(0xD9);
2956 2987 emit_byte(0xFF);
2957 2988 }
2958 2989
2959 2990 void Assembler::fdecstp() {
2960 2991 emit_byte(0xD9);
2961 2992 emit_byte(0xF6);
2962 2993 }
2963 2994
2964 2995 void Assembler::fdiv(int i) {
2965 2996 emit_farith(0xD8, 0xF0, i);
2966 2997 }
2967 2998
2968 2999 void Assembler::fdiv_d(Address src) {
2969 3000 InstructionMark im(this);
2970 3001 emit_byte(0xDC);
2971 3002 emit_operand32(rsi, src);
2972 3003 }
2973 3004
2974 3005 void Assembler::fdiv_s(Address src) {
2975 3006 InstructionMark im(this);
2976 3007 emit_byte(0xD8);
2977 3008 emit_operand32(rsi, src);
2978 3009 }
2979 3010
2980 3011 void Assembler::fdiva(int i) {
2981 3012 emit_farith(0xDC, 0xF8, i);
2982 3013 }
2983 3014
2984 3015 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
2985 3016 // is erroneous for some of the floating-point instructions below.
2986 3017
2987 3018 void Assembler::fdivp(int i) {
2988 3019 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
2989 3020 }
2990 3021
2991 3022 void Assembler::fdivr(int i) {
2992 3023 emit_farith(0xD8, 0xF8, i);
2993 3024 }
2994 3025
2995 3026 void Assembler::fdivr_d(Address src) {
2996 3027 InstructionMark im(this);
2997 3028 emit_byte(0xDC);
2998 3029 emit_operand32(rdi, src);
2999 3030 }
3000 3031
3001 3032 void Assembler::fdivr_s(Address src) {
3002 3033 InstructionMark im(this);
3003 3034 emit_byte(0xD8);
3004 3035 emit_operand32(rdi, src);
3005 3036 }
3006 3037
3007 3038 void Assembler::fdivra(int i) {
3008 3039 emit_farith(0xDC, 0xF0, i);
3009 3040 }
3010 3041
3011 3042 void Assembler::fdivrp(int i) {
3012 3043 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3013 3044 }
3014 3045
3015 3046 void Assembler::ffree(int i) {
3016 3047 emit_farith(0xDD, 0xC0, i);
3017 3048 }
3018 3049
3019 3050 void Assembler::fild_d(Address adr) {
3020 3051 InstructionMark im(this);
3021 3052 emit_byte(0xDF);
3022 3053 emit_operand32(rbp, adr);
3023 3054 }
3024 3055
3025 3056 void Assembler::fild_s(Address adr) {
3026 3057 InstructionMark im(this);
3027 3058 emit_byte(0xDB);
3028 3059 emit_operand32(rax, adr);
3029 3060 }
3030 3061
3031 3062 void Assembler::fincstp() {
3032 3063 emit_byte(0xD9);
3033 3064 emit_byte(0xF7);
3034 3065 }
3035 3066
3036 3067 void Assembler::finit() {
3037 3068 emit_byte(0x9B);
3038 3069 emit_byte(0xDB);
3039 3070 emit_byte(0xE3);
3040 3071 }
3041 3072
3042 3073 void Assembler::fist_s(Address adr) {
3043 3074 InstructionMark im(this);
3044 3075 emit_byte(0xDB);
3045 3076 emit_operand32(rdx, adr);
3046 3077 }
3047 3078
3048 3079 void Assembler::fistp_d(Address adr) {
3049 3080 InstructionMark im(this);
3050 3081 emit_byte(0xDF);
3051 3082 emit_operand32(rdi, adr);
3052 3083 }
3053 3084
3054 3085 void Assembler::fistp_s(Address adr) {
3055 3086 InstructionMark im(this);
3056 3087 emit_byte(0xDB);
3057 3088 emit_operand32(rbx, adr);
3058 3089 }
3059 3090
3060 3091 void Assembler::fld1() {
3061 3092 emit_byte(0xD9);
3062 3093 emit_byte(0xE8);
3063 3094 }
3064 3095
3065 3096 void Assembler::fld_d(Address adr) {
3066 3097 InstructionMark im(this);
3067 3098 emit_byte(0xDD);
3068 3099 emit_operand32(rax, adr);
3069 3100 }
3070 3101
3071 3102 void Assembler::fld_s(Address adr) {
3072 3103 InstructionMark im(this);
3073 3104 emit_byte(0xD9);
3074 3105 emit_operand32(rax, adr);
3075 3106 }
3076 3107
3077 3108
3078 3109 void Assembler::fld_s(int index) {
3079 3110 emit_farith(0xD9, 0xC0, index);
3080 3111 }
3081 3112
3082 3113 void Assembler::fld_x(Address adr) {
3083 3114 InstructionMark im(this);
3084 3115 emit_byte(0xDB);
3085 3116 emit_operand32(rbp, adr);
3086 3117 }
3087 3118
3088 3119 void Assembler::fldcw(Address src) {
3089 3120 InstructionMark im(this);
3090 3121 emit_byte(0xd9);
3091 3122 emit_operand32(rbp, src);
3092 3123 }
3093 3124
3094 3125 void Assembler::fldenv(Address src) {
3095 3126 InstructionMark im(this);
3096 3127 emit_byte(0xD9);
3097 3128 emit_operand32(rsp, src);
3098 3129 }
3099 3130
3100 3131 void Assembler::fldlg2() {
3101 3132 emit_byte(0xD9);
3102 3133 emit_byte(0xEC);
3103 3134 }
3104 3135
3105 3136 void Assembler::fldln2() {
3106 3137 emit_byte(0xD9);
3107 3138 emit_byte(0xED);
3108 3139 }
3109 3140
3110 3141 void Assembler::fldz() {
3111 3142 emit_byte(0xD9);
3112 3143 emit_byte(0xEE);
3113 3144 }
3114 3145
3115 3146 void Assembler::flog() {
3116 3147 fldln2();
3117 3148 fxch();
3118 3149 fyl2x();
3119 3150 }
3120 3151
3121 3152 void Assembler::flog10() {
3122 3153 fldlg2();
3123 3154 fxch();
3124 3155 fyl2x();
3125 3156 }
3126 3157
3127 3158 void Assembler::fmul(int i) {
3128 3159 emit_farith(0xD8, 0xC8, i);
3129 3160 }
3130 3161
3131 3162 void Assembler::fmul_d(Address src) {
3132 3163 InstructionMark im(this);
3133 3164 emit_byte(0xDC);
3134 3165 emit_operand32(rcx, src);
3135 3166 }
3136 3167
3137 3168 void Assembler::fmul_s(Address src) {
3138 3169 InstructionMark im(this);
3139 3170 emit_byte(0xD8);
3140 3171 emit_operand32(rcx, src);
3141 3172 }
3142 3173
3143 3174 void Assembler::fmula(int i) {
3144 3175 emit_farith(0xDC, 0xC8, i);
3145 3176 }
3146 3177
3147 3178 void Assembler::fmulp(int i) {
3148 3179 emit_farith(0xDE, 0xC8, i);
3149 3180 }
3150 3181
3151 3182 void Assembler::fnsave(Address dst) {
3152 3183 InstructionMark im(this);
3153 3184 emit_byte(0xDD);
3154 3185 emit_operand32(rsi, dst);
3155 3186 }
3156 3187
3157 3188 void Assembler::fnstcw(Address src) {
3158 3189 InstructionMark im(this);
3159 3190 emit_byte(0x9B);
3160 3191 emit_byte(0xD9);
3161 3192 emit_operand32(rdi, src);
3162 3193 }
3163 3194
3164 3195 void Assembler::fnstsw_ax() {
3165 3196 emit_byte(0xdF);
3166 3197 emit_byte(0xE0);
3167 3198 }
3168 3199
3169 3200 void Assembler::fprem() {
3170 3201 emit_byte(0xD9);
3171 3202 emit_byte(0xF8);
3172 3203 }
3173 3204
3174 3205 void Assembler::fprem1() {
3175 3206 emit_byte(0xD9);
3176 3207 emit_byte(0xF5);
3177 3208 }
3178 3209
3179 3210 void Assembler::frstor(Address src) {
3180 3211 InstructionMark im(this);
3181 3212 emit_byte(0xDD);
3182 3213 emit_operand32(rsp, src);
3183 3214 }
3184 3215
3185 3216 void Assembler::fsin() {
3186 3217 emit_byte(0xD9);
3187 3218 emit_byte(0xFE);
3188 3219 }
3189 3220
3190 3221 void Assembler::fsqrt() {
3191 3222 emit_byte(0xD9);
3192 3223 emit_byte(0xFA);
3193 3224 }
3194 3225
3195 3226 void Assembler::fst_d(Address adr) {
3196 3227 InstructionMark im(this);
3197 3228 emit_byte(0xDD);
3198 3229 emit_operand32(rdx, adr);
3199 3230 }
3200 3231
3201 3232 void Assembler::fst_s(Address adr) {
3202 3233 InstructionMark im(this);
3203 3234 emit_byte(0xD9);
3204 3235 emit_operand32(rdx, adr);
3205 3236 }
3206 3237
3207 3238 void Assembler::fstp_d(Address adr) {
3208 3239 InstructionMark im(this);
3209 3240 emit_byte(0xDD);
3210 3241 emit_operand32(rbx, adr);
3211 3242 }
3212 3243
3213 3244 void Assembler::fstp_d(int index) {
3214 3245 emit_farith(0xDD, 0xD8, index);
3215 3246 }
3216 3247
3217 3248 void Assembler::fstp_s(Address adr) {
3218 3249 InstructionMark im(this);
3219 3250 emit_byte(0xD9);
3220 3251 emit_operand32(rbx, adr);
3221 3252 }
3222 3253
3223 3254 void Assembler::fstp_x(Address adr) {
3224 3255 InstructionMark im(this);
3225 3256 emit_byte(0xDB);
3226 3257 emit_operand32(rdi, adr);
3227 3258 }
3228 3259
3229 3260 void Assembler::fsub(int i) {
3230 3261 emit_farith(0xD8, 0xE0, i);
3231 3262 }
3232 3263
3233 3264 void Assembler::fsub_d(Address src) {
3234 3265 InstructionMark im(this);
3235 3266 emit_byte(0xDC);
3236 3267 emit_operand32(rsp, src);
3237 3268 }
3238 3269
3239 3270 void Assembler::fsub_s(Address src) {
3240 3271 InstructionMark im(this);
3241 3272 emit_byte(0xD8);
3242 3273 emit_operand32(rsp, src);
3243 3274 }
3244 3275
3245 3276 void Assembler::fsuba(int i) {
3246 3277 emit_farith(0xDC, 0xE8, i);
3247 3278 }
3248 3279
3249 3280 void Assembler::fsubp(int i) {
3250 3281 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3251 3282 }
3252 3283
3253 3284 void Assembler::fsubr(int i) {
3254 3285 emit_farith(0xD8, 0xE8, i);
3255 3286 }
3256 3287
3257 3288 void Assembler::fsubr_d(Address src) {
3258 3289 InstructionMark im(this);
3259 3290 emit_byte(0xDC);
3260 3291 emit_operand32(rbp, src);
3261 3292 }
3262 3293
3263 3294 void Assembler::fsubr_s(Address src) {
3264 3295 InstructionMark im(this);
3265 3296 emit_byte(0xD8);
3266 3297 emit_operand32(rbp, src);
3267 3298 }
3268 3299
3269 3300 void Assembler::fsubra(int i) {
3270 3301 emit_farith(0xDC, 0xE0, i);
3271 3302 }
3272 3303
3273 3304 void Assembler::fsubrp(int i) {
3274 3305 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3275 3306 }
3276 3307
3277 3308 void Assembler::ftan() {
3278 3309 emit_byte(0xD9);
3279 3310 emit_byte(0xF2);
3280 3311 emit_byte(0xDD);
3281 3312 emit_byte(0xD8);
3282 3313 }
3283 3314
3284 3315 void Assembler::ftst() {
3285 3316 emit_byte(0xD9);
3286 3317 emit_byte(0xE4);
3287 3318 }
3288 3319
3289 3320 void Assembler::fucomi(int i) {
3290 3321 // make sure the instruction is supported (introduced for P6, together with cmov)
3291 3322 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3292 3323 emit_farith(0xDB, 0xE8, i);
3293 3324 }
3294 3325
3295 3326 void Assembler::fucomip(int i) {
3296 3327 // make sure the instruction is supported (introduced for P6, together with cmov)
3297 3328 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3298 3329 emit_farith(0xDF, 0xE8, i);
3299 3330 }
3300 3331
3301 3332 void Assembler::fwait() {
3302 3333 emit_byte(0x9B);
3303 3334 }
3304 3335
3305 3336 void Assembler::fxch(int i) {
3306 3337 emit_farith(0xD9, 0xC8, i);
3307 3338 }
3308 3339
3309 3340 void Assembler::fyl2x() {
3310 3341 emit_byte(0xD9);
3311 3342 emit_byte(0xF1);
3312 3343 }
3313 3344
3314 3345
3315 3346 #ifndef _LP64
3316 3347
3317 3348 void Assembler::incl(Register dst) {
3318 3349 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3319 3350 emit_byte(0x40 | dst->encoding());
3320 3351 }
3321 3352
3322 3353 void Assembler::lea(Register dst, Address src) {
3323 3354 leal(dst, src);
3324 3355 }
3325 3356
3326 3357 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
3327 3358 InstructionMark im(this);
3328 3359 emit_byte(0xC7);
3329 3360 emit_operand(rax, dst);
3330 3361 emit_data((int)imm32, rspec, 0);
3331 3362 }
3332 3363
3333 3364 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3334 3365 InstructionMark im(this);
3335 3366 int encode = prefix_and_encode(dst->encoding());
3336 3367 emit_byte(0xB8 | encode);
3337 3368 emit_data((int)imm32, rspec, 0);
3338 3369 }
3339 3370
3340 3371 void Assembler::popa() { // 32bit
3341 3372 emit_byte(0x61);
3342 3373 }
3343 3374
3344 3375 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3345 3376 InstructionMark im(this);
3346 3377 emit_byte(0x68);
3347 3378 emit_data(imm32, rspec, 0);
3348 3379 }
3349 3380
3350 3381 void Assembler::pusha() { // 32bit
3351 3382 emit_byte(0x60);
3352 3383 }
3353 3384
3354 3385 void Assembler::set_byte_if_not_zero(Register dst) {
3355 3386 emit_byte(0x0F);
3356 3387 emit_byte(0x95);
3357 3388 emit_byte(0xE0 | dst->encoding());
3358 3389 }
3359 3390
3360 3391 void Assembler::shldl(Register dst, Register src) {
3361 3392 emit_byte(0x0F);
3362 3393 emit_byte(0xA5);
3363 3394 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3364 3395 }
3365 3396
3366 3397 void Assembler::shrdl(Register dst, Register src) {
3367 3398 emit_byte(0x0F);
3368 3399 emit_byte(0xAD);
3369 3400 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3370 3401 }
3371 3402
3372 3403 #else // LP64
3373 3404
3374 3405 void Assembler::set_byte_if_not_zero(Register dst) {
3375 3406 int enc = prefix_and_encode(dst->encoding(), true);
3376 3407 emit_byte(0x0F);
3377 3408 emit_byte(0x95);
3378 3409 emit_byte(0xE0 | enc);
3379 3410 }
3380 3411
3381 3412 // 64bit only pieces of the assembler
3382 3413 // This should only be used by 64bit instructions that can use rip-relative
3383 3414 // it cannot be used by instructions that want an immediate value.
3384 3415
3385 3416 bool Assembler::reachable(AddressLiteral adr) {
3386 3417 int64_t disp;
3387 3418 // None will force a 64bit literal to the code stream. Likely a placeholder
3388 3419 // for something that will be patched later and we need to certain it will
3389 3420 // always be reachable.
3390 3421 if (adr.reloc() == relocInfo::none) {
3391 3422 return false;
3392 3423 }
3393 3424 if (adr.reloc() == relocInfo::internal_word_type) {
3394 3425 // This should be rip relative and easily reachable.
3395 3426 return true;
3396 3427 }
3397 3428 if (adr.reloc() == relocInfo::virtual_call_type ||
3398 3429 adr.reloc() == relocInfo::opt_virtual_call_type ||
3399 3430 adr.reloc() == relocInfo::static_call_type ||
3400 3431 adr.reloc() == relocInfo::static_stub_type ) {
3401 3432 // This should be rip relative within the code cache and easily
3402 3433 // reachable until we get huge code caches. (At which point
3403 3434 // ic code is going to have issues).
3404 3435 return true;
3405 3436 }
3406 3437 if (adr.reloc() != relocInfo::external_word_type &&
3407 3438 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special
3408 3439 adr.reloc() != relocInfo::poll_type && // relocs to identify them
3409 3440 adr.reloc() != relocInfo::runtime_call_type ) {
3410 3441 return false;
3411 3442 }
3412 3443
3413 3444 // Stress the correction code
3414 3445 if (ForceUnreachable) {
3415 3446 // Must be runtimecall reloc, see if it is in the codecache
3416 3447 // Flipping stuff in the codecache to be unreachable causes issues
3417 3448 // with things like inline caches where the additional instructions
3418 3449 // are not handled.
3419 3450 if (CodeCache::find_blob(adr._target) == NULL) {
3420 3451 return false;
3421 3452 }
3422 3453 }
3423 3454 // For external_word_type/runtime_call_type if it is reachable from where we
3424 3455 // are now (possibly a temp buffer) and where we might end up
3425 3456 // anywhere in the codeCache then we are always reachable.
3426 3457 // This would have to change if we ever save/restore shared code
3427 3458 // to be more pessimistic.
3428 3459
3429 3460 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3430 3461 if (!is_simm32(disp)) return false;
3431 3462 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3432 3463 if (!is_simm32(disp)) return false;
3433 3464
3434 3465 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3435 3466
3436 3467 // Because rip relative is a disp + address_of_next_instruction and we
3437 3468 // don't know the value of address_of_next_instruction we apply a fudge factor
3438 3469 // to make sure we will be ok no matter the size of the instruction we get placed into.
3439 3470 // We don't have to fudge the checks above here because they are already worst case.
3440 3471
3441 3472 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3442 3473 // + 4 because better safe than sorry.
3443 3474 const int fudge = 12 + 4;
3444 3475 if (disp < 0) {
3445 3476 disp -= fudge;
3446 3477 } else {
3447 3478 disp += fudge;
3448 3479 }
3449 3480 return is_simm32(disp);
3450 3481 }
3451 3482
3452 3483 void Assembler::emit_data64(jlong data,
3453 3484 relocInfo::relocType rtype,
3454 3485 int format) {
3455 3486 if (rtype == relocInfo::none) {
3456 3487 emit_long64(data);
3457 3488 } else {
3458 3489 emit_data64(data, Relocation::spec_simple(rtype), format);
3459 3490 }
3460 3491 }
3461 3492
3462 3493 void Assembler::emit_data64(jlong data,
3463 3494 RelocationHolder const& rspec,
3464 3495 int format) {
3465 3496 assert(imm_operand == 0, "default format must be immediate in this file");
3466 3497 assert(imm_operand == format, "must be immediate");
3467 3498 assert(inst_mark() != NULL, "must be inside InstructionMark");
3468 3499 // Do not use AbstractAssembler::relocate, which is not intended for
3469 3500 // embedded words. Instead, relocate to the enclosing instruction.
3470 3501 code_section()->relocate(inst_mark(), rspec, format);
3471 3502 #ifdef ASSERT
3472 3503 check_relocation(rspec, format);
3473 3504 #endif
3474 3505 emit_long64(data);
3475 3506 }
3476 3507
3477 3508 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3478 3509 if (reg_enc >= 8) {
3479 3510 prefix(REX_B);
3480 3511 reg_enc -= 8;
3481 3512 } else if (byteinst && reg_enc >= 4) {
3482 3513 prefix(REX);
3483 3514 }
3484 3515 return reg_enc;
3485 3516 }
3486 3517
3487 3518 int Assembler::prefixq_and_encode(int reg_enc) {
3488 3519 if (reg_enc < 8) {
3489 3520 prefix(REX_W);
3490 3521 } else {
3491 3522 prefix(REX_WB);
3492 3523 reg_enc -= 8;
3493 3524 }
3494 3525 return reg_enc;
3495 3526 }
3496 3527
3497 3528 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3498 3529 if (dst_enc < 8) {
3499 3530 if (src_enc >= 8) {
3500 3531 prefix(REX_B);
3501 3532 src_enc -= 8;
3502 3533 } else if (byteinst && src_enc >= 4) {
3503 3534 prefix(REX);
3504 3535 }
3505 3536 } else {
3506 3537 if (src_enc < 8) {
3507 3538 prefix(REX_R);
3508 3539 } else {
3509 3540 prefix(REX_RB);
3510 3541 src_enc -= 8;
3511 3542 }
3512 3543 dst_enc -= 8;
3513 3544 }
3514 3545 return dst_enc << 3 | src_enc;
3515 3546 }
3516 3547
3517 3548 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3518 3549 if (dst_enc < 8) {
3519 3550 if (src_enc < 8) {
3520 3551 prefix(REX_W);
3521 3552 } else {
3522 3553 prefix(REX_WB);
3523 3554 src_enc -= 8;
3524 3555 }
3525 3556 } else {
3526 3557 if (src_enc < 8) {
3527 3558 prefix(REX_WR);
3528 3559 } else {
3529 3560 prefix(REX_WRB);
3530 3561 src_enc -= 8;
3531 3562 }
3532 3563 dst_enc -= 8;
3533 3564 }
3534 3565 return dst_enc << 3 | src_enc;
3535 3566 }
3536 3567
3537 3568 void Assembler::prefix(Register reg) {
3538 3569 if (reg->encoding() >= 8) {
3539 3570 prefix(REX_B);
3540 3571 }
3541 3572 }
3542 3573
3543 3574 void Assembler::prefix(Address adr) {
3544 3575 if (adr.base_needs_rex()) {
3545 3576 if (adr.index_needs_rex()) {
3546 3577 prefix(REX_XB);
3547 3578 } else {
3548 3579 prefix(REX_B);
3549 3580 }
3550 3581 } else {
3551 3582 if (adr.index_needs_rex()) {
3552 3583 prefix(REX_X);
3553 3584 }
3554 3585 }
3555 3586 }
3556 3587
3557 3588 void Assembler::prefixq(Address adr) {
3558 3589 if (adr.base_needs_rex()) {
3559 3590 if (adr.index_needs_rex()) {
3560 3591 prefix(REX_WXB);
3561 3592 } else {
3562 3593 prefix(REX_WB);
3563 3594 }
3564 3595 } else {
3565 3596 if (adr.index_needs_rex()) {
3566 3597 prefix(REX_WX);
3567 3598 } else {
3568 3599 prefix(REX_W);
3569 3600 }
3570 3601 }
3571 3602 }
3572 3603
3573 3604
3574 3605 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3575 3606 if (reg->encoding() < 8) {
3576 3607 if (adr.base_needs_rex()) {
3577 3608 if (adr.index_needs_rex()) {
3578 3609 prefix(REX_XB);
3579 3610 } else {
3580 3611 prefix(REX_B);
3581 3612 }
3582 3613 } else {
3583 3614 if (adr.index_needs_rex()) {
3584 3615 prefix(REX_X);
3585 3616 } else if (reg->encoding() >= 4 ) {
3586 3617 prefix(REX);
3587 3618 }
3588 3619 }
3589 3620 } else {
3590 3621 if (adr.base_needs_rex()) {
3591 3622 if (adr.index_needs_rex()) {
3592 3623 prefix(REX_RXB);
3593 3624 } else {
3594 3625 prefix(REX_RB);
3595 3626 }
3596 3627 } else {
3597 3628 if (adr.index_needs_rex()) {
3598 3629 prefix(REX_RX);
3599 3630 } else {
3600 3631 prefix(REX_R);
3601 3632 }
3602 3633 }
3603 3634 }
3604 3635 }
3605 3636
3606 3637 void Assembler::prefixq(Address adr, Register src) {
3607 3638 if (src->encoding() < 8) {
3608 3639 if (adr.base_needs_rex()) {
3609 3640 if (adr.index_needs_rex()) {
3610 3641 prefix(REX_WXB);
3611 3642 } else {
3612 3643 prefix(REX_WB);
3613 3644 }
3614 3645 } else {
3615 3646 if (adr.index_needs_rex()) {
3616 3647 prefix(REX_WX);
3617 3648 } else {
3618 3649 prefix(REX_W);
3619 3650 }
3620 3651 }
3621 3652 } else {
3622 3653 if (adr.base_needs_rex()) {
3623 3654 if (adr.index_needs_rex()) {
3624 3655 prefix(REX_WRXB);
3625 3656 } else {
3626 3657 prefix(REX_WRB);
3627 3658 }
3628 3659 } else {
3629 3660 if (adr.index_needs_rex()) {
3630 3661 prefix(REX_WRX);
3631 3662 } else {
3632 3663 prefix(REX_WR);
3633 3664 }
3634 3665 }
3635 3666 }
3636 3667 }
3637 3668
3638 3669 void Assembler::prefix(Address adr, XMMRegister reg) {
3639 3670 if (reg->encoding() < 8) {
3640 3671 if (adr.base_needs_rex()) {
3641 3672 if (adr.index_needs_rex()) {
3642 3673 prefix(REX_XB);
3643 3674 } else {
3644 3675 prefix(REX_B);
3645 3676 }
3646 3677 } else {
3647 3678 if (adr.index_needs_rex()) {
3648 3679 prefix(REX_X);
3649 3680 }
3650 3681 }
3651 3682 } else {
3652 3683 if (adr.base_needs_rex()) {
3653 3684 if (adr.index_needs_rex()) {
3654 3685 prefix(REX_RXB);
3655 3686 } else {
3656 3687 prefix(REX_RB);
3657 3688 }
3658 3689 } else {
3659 3690 if (adr.index_needs_rex()) {
3660 3691 prefix(REX_RX);
3661 3692 } else {
3662 3693 prefix(REX_R);
3663 3694 }
3664 3695 }
3665 3696 }
3666 3697 }
3667 3698
3668 3699 void Assembler::adcq(Register dst, int32_t imm32) {
3669 3700 (void) prefixq_and_encode(dst->encoding());
3670 3701 emit_arith(0x81, 0xD0, dst, imm32);
3671 3702 }
3672 3703
3673 3704 void Assembler::adcq(Register dst, Address src) {
3674 3705 InstructionMark im(this);
3675 3706 prefixq(src, dst);
3676 3707 emit_byte(0x13);
3677 3708 emit_operand(dst, src);
3678 3709 }
3679 3710
3680 3711 void Assembler::adcq(Register dst, Register src) {
3681 3712 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3682 3713 emit_arith(0x13, 0xC0, dst, src);
3683 3714 }
3684 3715
3685 3716 void Assembler::addq(Address dst, int32_t imm32) {
3686 3717 InstructionMark im(this);
3687 3718 prefixq(dst);
3688 3719 emit_arith_operand(0x81, rax, dst,imm32);
3689 3720 }
3690 3721
3691 3722 void Assembler::addq(Address dst, Register src) {
3692 3723 InstructionMark im(this);
3693 3724 prefixq(dst, src);
3694 3725 emit_byte(0x01);
3695 3726 emit_operand(src, dst);
3696 3727 }
3697 3728
3698 3729 void Assembler::addq(Register dst, int32_t imm32) {
3699 3730 (void) prefixq_and_encode(dst->encoding());
3700 3731 emit_arith(0x81, 0xC0, dst, imm32);
3701 3732 }
3702 3733
3703 3734 void Assembler::addq(Register dst, Address src) {
3704 3735 InstructionMark im(this);
3705 3736 prefixq(src, dst);
3706 3737 emit_byte(0x03);
3707 3738 emit_operand(dst, src);
3708 3739 }
3709 3740
3710 3741 void Assembler::addq(Register dst, Register src) {
3711 3742 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3712 3743 emit_arith(0x03, 0xC0, dst, src);
3713 3744 }
3714 3745
3715 3746 void Assembler::andq(Register dst, int32_t imm32) {
3716 3747 (void) prefixq_and_encode(dst->encoding());
3717 3748 emit_arith(0x81, 0xE0, dst, imm32);
3718 3749 }
3719 3750
3720 3751 void Assembler::andq(Register dst, Address src) {
3721 3752 InstructionMark im(this);
3722 3753 prefixq(src, dst);
3723 3754 emit_byte(0x23);
3724 3755 emit_operand(dst, src);
3725 3756 }
3726 3757
3727 3758 void Assembler::andq(Register dst, Register src) {
3728 3759 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3729 3760 emit_arith(0x23, 0xC0, dst, src);
3730 3761 }
3731 3762
3732 3763 void Assembler::bsfq(Register dst, Register src) {
3733 3764 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3734 3765 emit_byte(0x0F);
3735 3766 emit_byte(0xBC);
3736 3767 emit_byte(0xC0 | encode);
3737 3768 }
3738 3769
3739 3770 void Assembler::bsrq(Register dst, Register src) {
3740 3771 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
3741 3772 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3742 3773 emit_byte(0x0F);
3743 3774 emit_byte(0xBD);
3744 3775 emit_byte(0xC0 | encode);
3745 3776 }
3746 3777
3747 3778 void Assembler::bswapq(Register reg) {
3748 3779 int encode = prefixq_and_encode(reg->encoding());
3749 3780 emit_byte(0x0F);
3750 3781 emit_byte(0xC8 | encode);
3751 3782 }
3752 3783
3753 3784 void Assembler::cdqq() {
3754 3785 prefix(REX_W);
3755 3786 emit_byte(0x99);
3756 3787 }
3757 3788
3758 3789 void Assembler::clflush(Address adr) {
3759 3790 prefix(adr);
3760 3791 emit_byte(0x0F);
3761 3792 emit_byte(0xAE);
3762 3793 emit_operand(rdi, adr);
3763 3794 }
3764 3795
3765 3796 void Assembler::cmovq(Condition cc, Register dst, Register src) {
3766 3797 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3767 3798 emit_byte(0x0F);
3768 3799 emit_byte(0x40 | cc);
3769 3800 emit_byte(0xC0 | encode);
3770 3801 }
3771 3802
3772 3803 void Assembler::cmovq(Condition cc, Register dst, Address src) {
3773 3804 InstructionMark im(this);
3774 3805 prefixq(src, dst);
3775 3806 emit_byte(0x0F);
3776 3807 emit_byte(0x40 | cc);
3777 3808 emit_operand(dst, src);
3778 3809 }
3779 3810
3780 3811 void Assembler::cmpq(Address dst, int32_t imm32) {
3781 3812 InstructionMark im(this);
3782 3813 prefixq(dst);
3783 3814 emit_byte(0x81);
3784 3815 emit_operand(rdi, dst, 4);
3785 3816 emit_long(imm32);
3786 3817 }
3787 3818
3788 3819 void Assembler::cmpq(Register dst, int32_t imm32) {
3789 3820 (void) prefixq_and_encode(dst->encoding());
3790 3821 emit_arith(0x81, 0xF8, dst, imm32);
3791 3822 }
3792 3823
3793 3824 void Assembler::cmpq(Address dst, Register src) {
3794 3825 InstructionMark im(this);
3795 3826 prefixq(dst, src);
3796 3827 emit_byte(0x3B);
3797 3828 emit_operand(src, dst);
3798 3829 }
3799 3830
3800 3831 void Assembler::cmpq(Register dst, Register src) {
3801 3832 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3802 3833 emit_arith(0x3B, 0xC0, dst, src);
3803 3834 }
3804 3835
3805 3836 void Assembler::cmpq(Register dst, Address src) {
3806 3837 InstructionMark im(this);
3807 3838 prefixq(src, dst);
3808 3839 emit_byte(0x3B);
3809 3840 emit_operand(dst, src);
3810 3841 }
3811 3842
3812 3843 void Assembler::cmpxchgq(Register reg, Address adr) {
3813 3844 InstructionMark im(this);
3814 3845 prefixq(adr, reg);
3815 3846 emit_byte(0x0F);
3816 3847 emit_byte(0xB1);
3817 3848 emit_operand(reg, adr);
3818 3849 }
3819 3850
3820 3851 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3821 3852 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3822 3853 emit_byte(0xF2);
3823 3854 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3824 3855 emit_byte(0x0F);
3825 3856 emit_byte(0x2A);
3826 3857 emit_byte(0xC0 | encode);
3827 3858 }
3828 3859
3829 3860 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3830 3861 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3831 3862 emit_byte(0xF3);
3832 3863 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3833 3864 emit_byte(0x0F);
3834 3865 emit_byte(0x2A);
3835 3866 emit_byte(0xC0 | encode);
3836 3867 }
3837 3868
3838 3869 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3839 3870 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3840 3871 emit_byte(0xF2);
3841 3872 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3842 3873 emit_byte(0x0F);
3843 3874 emit_byte(0x2C);
3844 3875 emit_byte(0xC0 | encode);
3845 3876 }
3846 3877
3847 3878 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3848 3879 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3849 3880 emit_byte(0xF3);
3850 3881 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3851 3882 emit_byte(0x0F);
3852 3883 emit_byte(0x2C);
3853 3884 emit_byte(0xC0 | encode);
3854 3885 }
3855 3886
3856 3887 void Assembler::decl(Register dst) {
3857 3888 // Don't use it directly. Use MacroAssembler::decrementl() instead.
3858 3889 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3859 3890 int encode = prefix_and_encode(dst->encoding());
3860 3891 emit_byte(0xFF);
3861 3892 emit_byte(0xC8 | encode);
3862 3893 }
3863 3894
3864 3895 void Assembler::decq(Register dst) {
3865 3896 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3866 3897 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3867 3898 int encode = prefixq_and_encode(dst->encoding());
3868 3899 emit_byte(0xFF);
3869 3900 emit_byte(0xC8 | encode);
3870 3901 }
3871 3902
3872 3903 void Assembler::decq(Address dst) {
3873 3904 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3874 3905 InstructionMark im(this);
3875 3906 prefixq(dst);
3876 3907 emit_byte(0xFF);
3877 3908 emit_operand(rcx, dst);
3878 3909 }
3879 3910
3880 3911 void Assembler::fxrstor(Address src) {
3881 3912 prefixq(src);
3882 3913 emit_byte(0x0F);
3883 3914 emit_byte(0xAE);
3884 3915 emit_operand(as_Register(1), src);
3885 3916 }
3886 3917
3887 3918 void Assembler::fxsave(Address dst) {
3888 3919 prefixq(dst);
3889 3920 emit_byte(0x0F);
3890 3921 emit_byte(0xAE);
3891 3922 emit_operand(as_Register(0), dst);
3892 3923 }
3893 3924
3894 3925 void Assembler::idivq(Register src) {
3895 3926 int encode = prefixq_and_encode(src->encoding());
3896 3927 emit_byte(0xF7);
3897 3928 emit_byte(0xF8 | encode);
3898 3929 }
3899 3930
3900 3931 void Assembler::imulq(Register dst, Register src) {
3901 3932 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3902 3933 emit_byte(0x0F);
3903 3934 emit_byte(0xAF);
3904 3935 emit_byte(0xC0 | encode);
3905 3936 }
3906 3937
3907 3938 void Assembler::imulq(Register dst, Register src, int value) {
3908 3939 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3909 3940 if (is8bit(value)) {
3910 3941 emit_byte(0x6B);
3911 3942 emit_byte(0xC0 | encode);
3912 3943 emit_byte(value & 0xFF);
3913 3944 } else {
3914 3945 emit_byte(0x69);
3915 3946 emit_byte(0xC0 | encode);
3916 3947 emit_long(value);
3917 3948 }
3918 3949 }
3919 3950
3920 3951 void Assembler::incl(Register dst) {
3921 3952 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3922 3953 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3923 3954 int encode = prefix_and_encode(dst->encoding());
3924 3955 emit_byte(0xFF);
3925 3956 emit_byte(0xC0 | encode);
3926 3957 }
3927 3958
3928 3959 void Assembler::incq(Register dst) {
3929 3960 // Don't use it directly. Use MacroAssembler::incrementq() instead.
3930 3961 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3931 3962 int encode = prefixq_and_encode(dst->encoding());
3932 3963 emit_byte(0xFF);
3933 3964 emit_byte(0xC0 | encode);
3934 3965 }
3935 3966
3936 3967 void Assembler::incq(Address dst) {
3937 3968 // Don't use it directly. Use MacroAssembler::incrementq() instead.
3938 3969 InstructionMark im(this);
3939 3970 prefixq(dst);
3940 3971 emit_byte(0xFF);
3941 3972 emit_operand(rax, dst);
3942 3973 }
3943 3974
3944 3975 void Assembler::lea(Register dst, Address src) {
3945 3976 leaq(dst, src);
3946 3977 }
3947 3978
3948 3979 void Assembler::leaq(Register dst, Address src) {
3949 3980 InstructionMark im(this);
3950 3981 prefixq(src, dst);
3951 3982 emit_byte(0x8D);
3952 3983 emit_operand(dst, src);
3953 3984 }
3954 3985
3955 3986 void Assembler::mov64(Register dst, int64_t imm64) {
3956 3987 InstructionMark im(this);
3957 3988 int encode = prefixq_and_encode(dst->encoding());
3958 3989 emit_byte(0xB8 | encode);
3959 3990 emit_long64(imm64);
3960 3991 }
3961 3992
3962 3993 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
3963 3994 InstructionMark im(this);
3964 3995 int encode = prefixq_and_encode(dst->encoding());
3965 3996 emit_byte(0xB8 | encode);
3966 3997 emit_data64(imm64, rspec);
3967 3998 }
3968 3999
3969 4000 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3970 4001 InstructionMark im(this);
3971 4002 int encode = prefix_and_encode(dst->encoding());
3972 4003 emit_byte(0xB8 | encode);
3973 4004 emit_data((int)imm32, rspec, narrow_oop_operand);
3974 4005 }
3975 4006
3976 4007 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) {
3977 4008 InstructionMark im(this);
3978 4009 prefix(dst);
3979 4010 emit_byte(0xC7);
3980 4011 emit_operand(rax, dst, 4);
3981 4012 emit_data((int)imm32, rspec, narrow_oop_operand);
3982 4013 }
3983 4014
3984 4015 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3985 4016 InstructionMark im(this);
3986 4017 int encode = prefix_and_encode(src1->encoding());
3987 4018 emit_byte(0x81);
3988 4019 emit_byte(0xF8 | encode);
3989 4020 emit_data((int)imm32, rspec, narrow_oop_operand);
3990 4021 }
3991 4022
3992 4023 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3993 4024 InstructionMark im(this);
3994 4025 prefix(src1);
3995 4026 emit_byte(0x81);
3996 4027 emit_operand(rax, src1, 4);
3997 4028 emit_data((int)imm32, rspec, narrow_oop_operand);
3998 4029 }
3999 4030
4000 4031 void Assembler::lzcntq(Register dst, Register src) {
4001 4032 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
4002 4033 emit_byte(0xF3);
4003 4034 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4004 4035 emit_byte(0x0F);
4005 4036 emit_byte(0xBD);
4006 4037 emit_byte(0xC0 | encode);
4007 4038 }
4008 4039
4009 4040 void Assembler::movdq(XMMRegister dst, Register src) {
4010 4041 // table D-1 says MMX/SSE2
4011 4042 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4012 4043 emit_byte(0x66);
4013 4044 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4014 4045 emit_byte(0x0F);
4015 4046 emit_byte(0x6E);
4016 4047 emit_byte(0xC0 | encode);
4017 4048 }
4018 4049
4019 4050 void Assembler::movdq(Register dst, XMMRegister src) {
4020 4051 // table D-1 says MMX/SSE2
4021 4052 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4022 4053 emit_byte(0x66);
4023 4054 // swap src/dst to get correct prefix
4024 4055 int encode = prefixq_and_encode(src->encoding(), dst->encoding());
4025 4056 emit_byte(0x0F);
4026 4057 emit_byte(0x7E);
4027 4058 emit_byte(0xC0 | encode);
4028 4059 }
4029 4060
4030 4061 void Assembler::movq(Register dst, Register src) {
4031 4062 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4032 4063 emit_byte(0x8B);
4033 4064 emit_byte(0xC0 | encode);
4034 4065 }
4035 4066
4036 4067 void Assembler::movq(Register dst, Address src) {
4037 4068 InstructionMark im(this);
4038 4069 prefixq(src, dst);
4039 4070 emit_byte(0x8B);
4040 4071 emit_operand(dst, src);
4041 4072 }
4042 4073
4043 4074 void Assembler::movq(Address dst, Register src) {
4044 4075 InstructionMark im(this);
4045 4076 prefixq(dst, src);
4046 4077 emit_byte(0x89);
4047 4078 emit_operand(src, dst);
4048 4079 }
4049 4080
4050 4081 void Assembler::movsbq(Register dst, Address src) {
4051 4082 InstructionMark im(this);
4052 4083 prefixq(src, dst);
4053 4084 emit_byte(0x0F);
4054 4085 emit_byte(0xBE);
4055 4086 emit_operand(dst, src);
4056 4087 }
4057 4088
4058 4089 void Assembler::movsbq(Register dst, Register src) {
4059 4090 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4060 4091 emit_byte(0x0F);
4061 4092 emit_byte(0xBE);
4062 4093 emit_byte(0xC0 | encode);
4063 4094 }
4064 4095
4065 4096 void Assembler::movslq(Register dst, int32_t imm32) {
4066 4097 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx)
4067 4098 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx)
4068 4099 // as a result we shouldn't use until tested at runtime...
4069 4100 ShouldNotReachHere();
4070 4101 InstructionMark im(this);
4071 4102 int encode = prefixq_and_encode(dst->encoding());
4072 4103 emit_byte(0xC7 | encode);
4073 4104 emit_long(imm32);
4074 4105 }
4075 4106
4076 4107 void Assembler::movslq(Address dst, int32_t imm32) {
4077 4108 assert(is_simm32(imm32), "lost bits");
4078 4109 InstructionMark im(this);
4079 4110 prefixq(dst);
4080 4111 emit_byte(0xC7);
4081 4112 emit_operand(rax, dst, 4);
4082 4113 emit_long(imm32);
4083 4114 }
4084 4115
4085 4116 void Assembler::movslq(Register dst, Address src) {
4086 4117 InstructionMark im(this);
4087 4118 prefixq(src, dst);
4088 4119 emit_byte(0x63);
4089 4120 emit_operand(dst, src);
4090 4121 }
4091 4122
4092 4123 void Assembler::movslq(Register dst, Register src) {
4093 4124 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4094 4125 emit_byte(0x63);
4095 4126 emit_byte(0xC0 | encode);
4096 4127 }
4097 4128
4098 4129 void Assembler::movswq(Register dst, Address src) {
4099 4130 InstructionMark im(this);
4100 4131 prefixq(src, dst);
4101 4132 emit_byte(0x0F);
4102 4133 emit_byte(0xBF);
4103 4134 emit_operand(dst, src);
4104 4135 }
4105 4136
4106 4137 void Assembler::movswq(Register dst, Register src) {
4107 4138 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4108 4139 emit_byte(0x0F);
4109 4140 emit_byte(0xBF);
4110 4141 emit_byte(0xC0 | encode);
4111 4142 }
4112 4143
4113 4144 void Assembler::movzbq(Register dst, Address src) {
4114 4145 InstructionMark im(this);
4115 4146 prefixq(src, dst);
4116 4147 emit_byte(0x0F);
4117 4148 emit_byte(0xB6);
4118 4149 emit_operand(dst, src);
4119 4150 }
4120 4151
4121 4152 void Assembler::movzbq(Register dst, Register src) {
4122 4153 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4123 4154 emit_byte(0x0F);
4124 4155 emit_byte(0xB6);
4125 4156 emit_byte(0xC0 | encode);
4126 4157 }
4127 4158
4128 4159 void Assembler::movzwq(Register dst, Address src) {
4129 4160 InstructionMark im(this);
4130 4161 prefixq(src, dst);
4131 4162 emit_byte(0x0F);
4132 4163 emit_byte(0xB7);
4133 4164 emit_operand(dst, src);
4134 4165 }
4135 4166
4136 4167 void Assembler::movzwq(Register dst, Register src) {
4137 4168 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4138 4169 emit_byte(0x0F);
4139 4170 emit_byte(0xB7);
4140 4171 emit_byte(0xC0 | encode);
4141 4172 }
4142 4173
4143 4174 void Assembler::negq(Register dst) {
4144 4175 int encode = prefixq_and_encode(dst->encoding());
4145 4176 emit_byte(0xF7);
4146 4177 emit_byte(0xD8 | encode);
4147 4178 }
4148 4179
4149 4180 void Assembler::notq(Register dst) {
4150 4181 int encode = prefixq_and_encode(dst->encoding());
4151 4182 emit_byte(0xF7);
4152 4183 emit_byte(0xD0 | encode);
4153 4184 }
4154 4185
4155 4186 void Assembler::orq(Address dst, int32_t imm32) {
4156 4187 InstructionMark im(this);
4157 4188 prefixq(dst);
4158 4189 emit_byte(0x81);
4159 4190 emit_operand(rcx, dst, 4);
4160 4191 emit_long(imm32);
4161 4192 }
4162 4193
4163 4194 void Assembler::orq(Register dst, int32_t imm32) {
4164 4195 (void) prefixq_and_encode(dst->encoding());
4165 4196 emit_arith(0x81, 0xC8, dst, imm32);
4166 4197 }
4167 4198
4168 4199 void Assembler::orq(Register dst, Address src) {
4169 4200 InstructionMark im(this);
4170 4201 prefixq(src, dst);
4171 4202 emit_byte(0x0B);
4172 4203 emit_operand(dst, src);
4173 4204 }
4174 4205
4175 4206 void Assembler::orq(Register dst, Register src) {
4176 4207 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4177 4208 emit_arith(0x0B, 0xC0, dst, src);
4178 4209 }
4179 4210
4180 4211 void Assembler::popa() { // 64bit
4181 4212 movq(r15, Address(rsp, 0));
4182 4213 movq(r14, Address(rsp, wordSize));
4183 4214 movq(r13, Address(rsp, 2 * wordSize));
4184 4215 movq(r12, Address(rsp, 3 * wordSize));
4185 4216 movq(r11, Address(rsp, 4 * wordSize));
4186 4217 movq(r10, Address(rsp, 5 * wordSize));
4187 4218 movq(r9, Address(rsp, 6 * wordSize));
4188 4219 movq(r8, Address(rsp, 7 * wordSize));
4189 4220 movq(rdi, Address(rsp, 8 * wordSize));
4190 4221 movq(rsi, Address(rsp, 9 * wordSize));
4191 4222 movq(rbp, Address(rsp, 10 * wordSize));
4192 4223 // skip rsp
4193 4224 movq(rbx, Address(rsp, 12 * wordSize));
4194 4225 movq(rdx, Address(rsp, 13 * wordSize));
4195 4226 movq(rcx, Address(rsp, 14 * wordSize));
4196 4227 movq(rax, Address(rsp, 15 * wordSize));
4197 4228
4198 4229 addq(rsp, 16 * wordSize);
4199 4230 }
4200 4231
4201 4232 void Assembler::popcntq(Register dst, Address src) {
4202 4233 assert(VM_Version::supports_popcnt(), "must support");
4203 4234 InstructionMark im(this);
4204 4235 emit_byte(0xF3);
4205 4236 prefixq(src, dst);
4206 4237 emit_byte(0x0F);
4207 4238 emit_byte(0xB8);
4208 4239 emit_operand(dst, src);
4209 4240 }
4210 4241
4211 4242 void Assembler::popcntq(Register dst, Register src) {
4212 4243 assert(VM_Version::supports_popcnt(), "must support");
4213 4244 emit_byte(0xF3);
4214 4245 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4215 4246 emit_byte(0x0F);
4216 4247 emit_byte(0xB8);
4217 4248 emit_byte(0xC0 | encode);
4218 4249 }
4219 4250
4220 4251 void Assembler::popq(Address dst) {
4221 4252 InstructionMark im(this);
4222 4253 prefixq(dst);
4223 4254 emit_byte(0x8F);
4224 4255 emit_operand(rax, dst);
4225 4256 }
4226 4257
4227 4258 void Assembler::pusha() { // 64bit
4228 4259 // we have to store original rsp. ABI says that 128 bytes
4229 4260 // below rsp are local scratch.
4230 4261 movq(Address(rsp, -5 * wordSize), rsp);
4231 4262
4232 4263 subq(rsp, 16 * wordSize);
4233 4264
4234 4265 movq(Address(rsp, 15 * wordSize), rax);
4235 4266 movq(Address(rsp, 14 * wordSize), rcx);
4236 4267 movq(Address(rsp, 13 * wordSize), rdx);
4237 4268 movq(Address(rsp, 12 * wordSize), rbx);
4238 4269 // skip rsp
4239 4270 movq(Address(rsp, 10 * wordSize), rbp);
4240 4271 movq(Address(rsp, 9 * wordSize), rsi);
4241 4272 movq(Address(rsp, 8 * wordSize), rdi);
4242 4273 movq(Address(rsp, 7 * wordSize), r8);
4243 4274 movq(Address(rsp, 6 * wordSize), r9);
4244 4275 movq(Address(rsp, 5 * wordSize), r10);
4245 4276 movq(Address(rsp, 4 * wordSize), r11);
4246 4277 movq(Address(rsp, 3 * wordSize), r12);
4247 4278 movq(Address(rsp, 2 * wordSize), r13);
4248 4279 movq(Address(rsp, wordSize), r14);
4249 4280 movq(Address(rsp, 0), r15);
4250 4281 }
4251 4282
4252 4283 void Assembler::pushq(Address src) {
4253 4284 InstructionMark im(this);
4254 4285 prefixq(src);
4255 4286 emit_byte(0xFF);
4256 4287 emit_operand(rsi, src);
4257 4288 }
4258 4289
4259 4290 void Assembler::rclq(Register dst, int imm8) {
4260 4291 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4261 4292 int encode = prefixq_and_encode(dst->encoding());
4262 4293 if (imm8 == 1) {
4263 4294 emit_byte(0xD1);
4264 4295 emit_byte(0xD0 | encode);
4265 4296 } else {
4266 4297 emit_byte(0xC1);
4267 4298 emit_byte(0xD0 | encode);
4268 4299 emit_byte(imm8);
4269 4300 }
4270 4301 }
4271 4302 void Assembler::sarq(Register dst, int imm8) {
4272 4303 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4273 4304 int encode = prefixq_and_encode(dst->encoding());
4274 4305 if (imm8 == 1) {
4275 4306 emit_byte(0xD1);
4276 4307 emit_byte(0xF8 | encode);
4277 4308 } else {
4278 4309 emit_byte(0xC1);
4279 4310 emit_byte(0xF8 | encode);
4280 4311 emit_byte(imm8);
4281 4312 }
4282 4313 }
4283 4314
4284 4315 void Assembler::sarq(Register dst) {
4285 4316 int encode = prefixq_and_encode(dst->encoding());
4286 4317 emit_byte(0xD3);
4287 4318 emit_byte(0xF8 | encode);
4288 4319 }
4289 4320 void Assembler::sbbq(Address dst, int32_t imm32) {
4290 4321 InstructionMark im(this);
4291 4322 prefixq(dst);
4292 4323 emit_arith_operand(0x81, rbx, dst, imm32);
4293 4324 }
4294 4325
4295 4326 void Assembler::sbbq(Register dst, int32_t imm32) {
4296 4327 (void) prefixq_and_encode(dst->encoding());
4297 4328 emit_arith(0x81, 0xD8, dst, imm32);
4298 4329 }
4299 4330
4300 4331 void Assembler::sbbq(Register dst, Address src) {
4301 4332 InstructionMark im(this);
4302 4333 prefixq(src, dst);
4303 4334 emit_byte(0x1B);
4304 4335 emit_operand(dst, src);
4305 4336 }
4306 4337
4307 4338 void Assembler::sbbq(Register dst, Register src) {
4308 4339 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4309 4340 emit_arith(0x1B, 0xC0, dst, src);
4310 4341 }
4311 4342
4312 4343 void Assembler::shlq(Register dst, int imm8) {
4313 4344 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4314 4345 int encode = prefixq_and_encode(dst->encoding());
4315 4346 if (imm8 == 1) {
4316 4347 emit_byte(0xD1);
4317 4348 emit_byte(0xE0 | encode);
4318 4349 } else {
4319 4350 emit_byte(0xC1);
4320 4351 emit_byte(0xE0 | encode);
4321 4352 emit_byte(imm8);
4322 4353 }
4323 4354 }
4324 4355
4325 4356 void Assembler::shlq(Register dst) {
4326 4357 int encode = prefixq_and_encode(dst->encoding());
4327 4358 emit_byte(0xD3);
4328 4359 emit_byte(0xE0 | encode);
4329 4360 }
4330 4361
4331 4362 void Assembler::shrq(Register dst, int imm8) {
4332 4363 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4333 4364 int encode = prefixq_and_encode(dst->encoding());
4334 4365 emit_byte(0xC1);
↓ open down ↓ |
1689 lines elided |
↑ open up ↑ |
4335 4366 emit_byte(0xE8 | encode);
4336 4367 emit_byte(imm8);
4337 4368 }
4338 4369
4339 4370 void Assembler::shrq(Register dst) {
4340 4371 int encode = prefixq_and_encode(dst->encoding());
4341 4372 emit_byte(0xD3);
4342 4373 emit_byte(0xE8 | encode);
4343 4374 }
4344 4375
4345 -void Assembler::sqrtsd(XMMRegister dst, Address src) {
4346 - NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4347 - InstructionMark im(this);
4348 - emit_byte(0xF2);
4349 - prefix(src, dst);
4350 - emit_byte(0x0F);
4351 - emit_byte(0x51);
4352 - emit_operand(dst, src);
4353 -}
4354 -
4355 4376 void Assembler::subq(Address dst, int32_t imm32) {
4356 4377 InstructionMark im(this);
4357 4378 prefixq(dst);
4358 4379 if (is8bit(imm32)) {
4359 4380 emit_byte(0x83);
4360 4381 emit_operand(rbp, dst, 1);
4361 4382 emit_byte(imm32 & 0xFF);
4362 4383 } else {
4363 4384 emit_byte(0x81);
4364 4385 emit_operand(rbp, dst, 4);
4365 4386 emit_long(imm32);
4366 4387 }
4367 4388 }
4368 4389
4369 4390 void Assembler::subq(Register dst, int32_t imm32) {
4370 4391 (void) prefixq_and_encode(dst->encoding());
4371 4392 emit_arith(0x81, 0xE8, dst, imm32);
4372 4393 }
4373 4394
4374 4395 void Assembler::subq(Address dst, Register src) {
4375 4396 InstructionMark im(this);
4376 4397 prefixq(dst, src);
4377 4398 emit_byte(0x29);
4378 4399 emit_operand(src, dst);
4379 4400 }
4380 4401
4381 4402 void Assembler::subq(Register dst, Address src) {
4382 4403 InstructionMark im(this);
4383 4404 prefixq(src, dst);
4384 4405 emit_byte(0x2B);
4385 4406 emit_operand(dst, src);
4386 4407 }
4387 4408
4388 4409 void Assembler::subq(Register dst, Register src) {
4389 4410 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4390 4411 emit_arith(0x2B, 0xC0, dst, src);
4391 4412 }
4392 4413
4393 4414 void Assembler::testq(Register dst, int32_t imm32) {
4394 4415 // not using emit_arith because test
4395 4416 // doesn't support sign-extension of
4396 4417 // 8bit operands
4397 4418 int encode = dst->encoding();
4398 4419 if (encode == 0) {
4399 4420 prefix(REX_W);
4400 4421 emit_byte(0xA9);
4401 4422 } else {
4402 4423 encode = prefixq_and_encode(encode);
4403 4424 emit_byte(0xF7);
4404 4425 emit_byte(0xC0 | encode);
4405 4426 }
4406 4427 emit_long(imm32);
4407 4428 }
4408 4429
4409 4430 void Assembler::testq(Register dst, Register src) {
4410 4431 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4411 4432 emit_arith(0x85, 0xC0, dst, src);
4412 4433 }
4413 4434
4414 4435 void Assembler::xaddq(Address dst, Register src) {
4415 4436 InstructionMark im(this);
4416 4437 prefixq(dst, src);
4417 4438 emit_byte(0x0F);
4418 4439 emit_byte(0xC1);
4419 4440 emit_operand(src, dst);
4420 4441 }
4421 4442
4422 4443 void Assembler::xchgq(Register dst, Address src) {
4423 4444 InstructionMark im(this);
4424 4445 prefixq(src, dst);
4425 4446 emit_byte(0x87);
4426 4447 emit_operand(dst, src);
4427 4448 }
4428 4449
4429 4450 void Assembler::xchgq(Register dst, Register src) {
4430 4451 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4431 4452 emit_byte(0x87);
4432 4453 emit_byte(0xc0 | encode);
4433 4454 }
4434 4455
4435 4456 void Assembler::xorq(Register dst, Register src) {
4436 4457 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4437 4458 emit_arith(0x33, 0xC0, dst, src);
4438 4459 }
4439 4460
4440 4461 void Assembler::xorq(Register dst, Address src) {
4441 4462 InstructionMark im(this);
4442 4463 prefixq(src, dst);
4443 4464 emit_byte(0x33);
4444 4465 emit_operand(dst, src);
4445 4466 }
4446 4467
4447 4468 #endif // !LP64
4448 4469
4449 4470 static Assembler::Condition reverse[] = {
4450 4471 Assembler::noOverflow /* overflow = 0x0 */ ,
4451 4472 Assembler::overflow /* noOverflow = 0x1 */ ,
4452 4473 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
4453 4474 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
4454 4475 Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
4455 4476 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
4456 4477 Assembler::above /* belowEqual = 0x6 */ ,
4457 4478 Assembler::belowEqual /* above = 0x7 */ ,
4458 4479 Assembler::positive /* negative = 0x8 */ ,
4459 4480 Assembler::negative /* positive = 0x9 */ ,
4460 4481 Assembler::noParity /* parity = 0xa */ ,
4461 4482 Assembler::parity /* noParity = 0xb */ ,
4462 4483 Assembler::greaterEqual /* less = 0xc */ ,
4463 4484 Assembler::less /* greaterEqual = 0xd */ ,
4464 4485 Assembler::greater /* lessEqual = 0xe */ ,
4465 4486 Assembler::lessEqual /* greater = 0xf, */
4466 4487
4467 4488 };
4468 4489
4469 4490
4470 4491 // Implementation of MacroAssembler
4471 4492
4472 4493 // First all the versions that have distinct versions depending on 32/64 bit
4473 4494 // Unless the difference is trivial (1 line or so).
4474 4495
4475 4496 #ifndef _LP64
4476 4497
4477 4498 // 32bit versions
4478 4499
4479 4500 Address MacroAssembler::as_Address(AddressLiteral adr) {
4480 4501 return Address(adr.target(), adr.rspec());
4481 4502 }
4482 4503
4483 4504 Address MacroAssembler::as_Address(ArrayAddress adr) {
4484 4505 return Address::make_array(adr);
4485 4506 }
4486 4507
4487 4508 int MacroAssembler::biased_locking_enter(Register lock_reg,
4488 4509 Register obj_reg,
4489 4510 Register swap_reg,
4490 4511 Register tmp_reg,
4491 4512 bool swap_reg_contains_mark,
4492 4513 Label& done,
4493 4514 Label* slow_case,
4494 4515 BiasedLockingCounters* counters) {
4495 4516 assert(UseBiasedLocking, "why call this otherwise?");
4496 4517 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4497 4518 assert_different_registers(lock_reg, obj_reg, swap_reg);
4498 4519
4499 4520 if (PrintBiasedLockingStatistics && counters == NULL)
4500 4521 counters = BiasedLocking::counters();
4501 4522
4502 4523 bool need_tmp_reg = false;
4503 4524 if (tmp_reg == noreg) {
4504 4525 need_tmp_reg = true;
4505 4526 tmp_reg = lock_reg;
4506 4527 } else {
4507 4528 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4508 4529 }
4509 4530 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4510 4531 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
4511 4532 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
4512 4533 Address saved_mark_addr(lock_reg, 0);
4513 4534
4514 4535 // Biased locking
4515 4536 // See whether the lock is currently biased toward our thread and
4516 4537 // whether the epoch is still valid
4517 4538 // Note that the runtime guarantees sufficient alignment of JavaThread
4518 4539 // pointers to allow age to be placed into low bits
4519 4540 // First check to see whether biasing is even enabled for this object
4520 4541 Label cas_label;
4521 4542 int null_check_offset = -1;
4522 4543 if (!swap_reg_contains_mark) {
4523 4544 null_check_offset = offset();
4524 4545 movl(swap_reg, mark_addr);
4525 4546 }
4526 4547 if (need_tmp_reg) {
4527 4548 push(tmp_reg);
4528 4549 }
4529 4550 movl(tmp_reg, swap_reg);
4530 4551 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4531 4552 cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4532 4553 if (need_tmp_reg) {
4533 4554 pop(tmp_reg);
4534 4555 }
4535 4556 jcc(Assembler::notEqual, cas_label);
4536 4557 // The bias pattern is present in the object's header. Need to check
4537 4558 // whether the bias owner and the epoch are both still current.
4538 4559 // Note that because there is no current thread register on x86 we
4539 4560 // need to store off the mark word we read out of the object to
4540 4561 // avoid reloading it and needing to recheck invariants below. This
4541 4562 // store is unfortunate but it makes the overall code shorter and
4542 4563 // simpler.
4543 4564 movl(saved_mark_addr, swap_reg);
4544 4565 if (need_tmp_reg) {
4545 4566 push(tmp_reg);
4546 4567 }
4547 4568 get_thread(tmp_reg);
4548 4569 xorl(swap_reg, tmp_reg);
4549 4570 if (swap_reg_contains_mark) {
4550 4571 null_check_offset = offset();
4551 4572 }
4552 4573 movl(tmp_reg, klass_addr);
4553 4574 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4554 4575 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4555 4576 if (need_tmp_reg) {
4556 4577 pop(tmp_reg);
4557 4578 }
4558 4579 if (counters != NULL) {
4559 4580 cond_inc32(Assembler::zero,
4560 4581 ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4561 4582 }
4562 4583 jcc(Assembler::equal, done);
4563 4584
4564 4585 Label try_revoke_bias;
4565 4586 Label try_rebias;
4566 4587
4567 4588 // At this point we know that the header has the bias pattern and
4568 4589 // that we are not the bias owner in the current epoch. We need to
4569 4590 // figure out more details about the state of the header in order to
4570 4591 // know what operations can be legally performed on the object's
4571 4592 // header.
4572 4593
4573 4594 // If the low three bits in the xor result aren't clear, that means
4574 4595 // the prototype header is no longer biased and we have to revoke
4575 4596 // the bias on this object.
4576 4597 testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4577 4598 jcc(Assembler::notZero, try_revoke_bias);
4578 4599
4579 4600 // Biasing is still enabled for this data type. See whether the
4580 4601 // epoch of the current bias is still valid, meaning that the epoch
4581 4602 // bits of the mark word are equal to the epoch bits of the
4582 4603 // prototype header. (Note that the prototype header's epoch bits
4583 4604 // only change at a safepoint.) If not, attempt to rebias the object
4584 4605 // toward the current thread. Note that we must be absolutely sure
4585 4606 // that the current epoch is invalid in order to do this because
4586 4607 // otherwise the manipulations it performs on the mark word are
4587 4608 // illegal.
4588 4609 testl(swap_reg, markOopDesc::epoch_mask_in_place);
4589 4610 jcc(Assembler::notZero, try_rebias);
4590 4611
4591 4612 // The epoch of the current bias is still valid but we know nothing
4592 4613 // about the owner; it might be set or it might be clear. Try to
4593 4614 // acquire the bias of the object using an atomic operation. If this
4594 4615 // fails we will go in to the runtime to revoke the object's bias.
4595 4616 // Note that we first construct the presumed unbiased header so we
4596 4617 // don't accidentally blow away another thread's valid bias.
4597 4618 movl(swap_reg, saved_mark_addr);
4598 4619 andl(swap_reg,
4599 4620 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4600 4621 if (need_tmp_reg) {
4601 4622 push(tmp_reg);
4602 4623 }
4603 4624 get_thread(tmp_reg);
4604 4625 orl(tmp_reg, swap_reg);
4605 4626 if (os::is_MP()) {
4606 4627 lock();
4607 4628 }
4608 4629 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4609 4630 if (need_tmp_reg) {
4610 4631 pop(tmp_reg);
4611 4632 }
4612 4633 // If the biasing toward our thread failed, this means that
4613 4634 // another thread succeeded in biasing it toward itself and we
4614 4635 // need to revoke that bias. The revocation will occur in the
4615 4636 // interpreter runtime in the slow case.
4616 4637 if (counters != NULL) {
4617 4638 cond_inc32(Assembler::zero,
4618 4639 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4619 4640 }
4620 4641 if (slow_case != NULL) {
4621 4642 jcc(Assembler::notZero, *slow_case);
4622 4643 }
4623 4644 jmp(done);
4624 4645
4625 4646 bind(try_rebias);
4626 4647 // At this point we know the epoch has expired, meaning that the
4627 4648 // current "bias owner", if any, is actually invalid. Under these
4628 4649 // circumstances _only_, we are allowed to use the current header's
4629 4650 // value as the comparison value when doing the cas to acquire the
4630 4651 // bias in the current epoch. In other words, we allow transfer of
4631 4652 // the bias from one thread to another directly in this situation.
4632 4653 //
4633 4654 // FIXME: due to a lack of registers we currently blow away the age
4634 4655 // bits in this situation. Should attempt to preserve them.
4635 4656 if (need_tmp_reg) {
4636 4657 push(tmp_reg);
4637 4658 }
4638 4659 get_thread(tmp_reg);
4639 4660 movl(swap_reg, klass_addr);
4640 4661 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4641 4662 movl(swap_reg, saved_mark_addr);
4642 4663 if (os::is_MP()) {
4643 4664 lock();
4644 4665 }
4645 4666 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4646 4667 if (need_tmp_reg) {
4647 4668 pop(tmp_reg);
4648 4669 }
4649 4670 // If the biasing toward our thread failed, then another thread
4650 4671 // succeeded in biasing it toward itself and we need to revoke that
4651 4672 // bias. The revocation will occur in the runtime in the slow case.
4652 4673 if (counters != NULL) {
4653 4674 cond_inc32(Assembler::zero,
4654 4675 ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4655 4676 }
4656 4677 if (slow_case != NULL) {
4657 4678 jcc(Assembler::notZero, *slow_case);
4658 4679 }
4659 4680 jmp(done);
4660 4681
4661 4682 bind(try_revoke_bias);
4662 4683 // The prototype mark in the klass doesn't have the bias bit set any
4663 4684 // more, indicating that objects of this data type are not supposed
4664 4685 // to be biased any more. We are going to try to reset the mark of
4665 4686 // this object to the prototype value and fall through to the
4666 4687 // CAS-based locking scheme. Note that if our CAS fails, it means
4667 4688 // that another thread raced us for the privilege of revoking the
4668 4689 // bias of this particular object, so it's okay to continue in the
4669 4690 // normal locking code.
4670 4691 //
4671 4692 // FIXME: due to a lack of registers we currently blow away the age
4672 4693 // bits in this situation. Should attempt to preserve them.
4673 4694 movl(swap_reg, saved_mark_addr);
4674 4695 if (need_tmp_reg) {
4675 4696 push(tmp_reg);
4676 4697 }
4677 4698 movl(tmp_reg, klass_addr);
4678 4699 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4679 4700 if (os::is_MP()) {
4680 4701 lock();
4681 4702 }
4682 4703 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4683 4704 if (need_tmp_reg) {
4684 4705 pop(tmp_reg);
4685 4706 }
4686 4707 // Fall through to the normal CAS-based lock, because no matter what
4687 4708 // the result of the above CAS, some thread must have succeeded in
4688 4709 // removing the bias bit from the object's header.
4689 4710 if (counters != NULL) {
4690 4711 cond_inc32(Assembler::zero,
4691 4712 ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4692 4713 }
4693 4714
4694 4715 bind(cas_label);
4695 4716
4696 4717 return null_check_offset;
4697 4718 }
4698 4719 void MacroAssembler::call_VM_leaf_base(address entry_point,
4699 4720 int number_of_arguments) {
4700 4721 call(RuntimeAddress(entry_point));
4701 4722 increment(rsp, number_of_arguments * wordSize);
4702 4723 }
4703 4724
4704 4725 void MacroAssembler::cmpoop(Address src1, jobject obj) {
4705 4726 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4706 4727 }
4707 4728
4708 4729 void MacroAssembler::cmpoop(Register src1, jobject obj) {
4709 4730 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4710 4731 }
4711 4732
4712 4733 void MacroAssembler::extend_sign(Register hi, Register lo) {
4713 4734 // According to Intel Doc. AP-526, "Integer Divide", p.18.
4714 4735 if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4715 4736 cdql();
4716 4737 } else {
4717 4738 movl(hi, lo);
4718 4739 sarl(hi, 31);
4719 4740 }
4720 4741 }
4721 4742
4722 4743 void MacroAssembler::fat_nop() {
4723 4744 // A 5 byte nop that is safe for patching (see patch_verified_entry)
4724 4745 emit_byte(0x26); // es:
4725 4746 emit_byte(0x2e); // cs:
4726 4747 emit_byte(0x64); // fs:
4727 4748 emit_byte(0x65); // gs:
4728 4749 emit_byte(0x90);
4729 4750 }
4730 4751
4731 4752 void MacroAssembler::jC2(Register tmp, Label& L) {
4732 4753 // set parity bit if FPU flag C2 is set (via rax)
4733 4754 save_rax(tmp);
4734 4755 fwait(); fnstsw_ax();
4735 4756 sahf();
4736 4757 restore_rax(tmp);
4737 4758 // branch
4738 4759 jcc(Assembler::parity, L);
4739 4760 }
4740 4761
4741 4762 void MacroAssembler::jnC2(Register tmp, Label& L) {
4742 4763 // set parity bit if FPU flag C2 is set (via rax)
4743 4764 save_rax(tmp);
4744 4765 fwait(); fnstsw_ax();
4745 4766 sahf();
4746 4767 restore_rax(tmp);
4747 4768 // branch
4748 4769 jcc(Assembler::noParity, L);
4749 4770 }
4750 4771
4751 4772 // 32bit can do a case table jump in one instruction but we no longer allow the base
4752 4773 // to be installed in the Address class
4753 4774 void MacroAssembler::jump(ArrayAddress entry) {
4754 4775 jmp(as_Address(entry));
4755 4776 }
4756 4777
4757 4778 // Note: y_lo will be destroyed
4758 4779 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4759 4780 // Long compare for Java (semantics as described in JVM spec.)
4760 4781 Label high, low, done;
4761 4782
4762 4783 cmpl(x_hi, y_hi);
4763 4784 jcc(Assembler::less, low);
4764 4785 jcc(Assembler::greater, high);
4765 4786 // x_hi is the return register
4766 4787 xorl(x_hi, x_hi);
4767 4788 cmpl(x_lo, y_lo);
4768 4789 jcc(Assembler::below, low);
4769 4790 jcc(Assembler::equal, done);
4770 4791
4771 4792 bind(high);
4772 4793 xorl(x_hi, x_hi);
4773 4794 increment(x_hi);
4774 4795 jmp(done);
4775 4796
4776 4797 bind(low);
4777 4798 xorl(x_hi, x_hi);
4778 4799 decrementl(x_hi);
4779 4800
4780 4801 bind(done);
4781 4802 }
4782 4803
4783 4804 void MacroAssembler::lea(Register dst, AddressLiteral src) {
4784 4805 mov_literal32(dst, (int32_t)src.target(), src.rspec());
4785 4806 }
4786 4807
4787 4808 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4788 4809 // leal(dst, as_Address(adr));
4789 4810 // see note in movl as to why we must use a move
4790 4811 mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4791 4812 }
4792 4813
4793 4814 void MacroAssembler::leave() {
4794 4815 mov(rsp, rbp);
4795 4816 pop(rbp);
4796 4817 }
4797 4818
4798 4819 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4799 4820 // Multiplication of two Java long values stored on the stack
4800 4821 // as illustrated below. Result is in rdx:rax.
4801 4822 //
4802 4823 // rsp ---> [ ?? ] \ \
4803 4824 // .... | y_rsp_offset |
4804 4825 // [ y_lo ] / (in bytes) | x_rsp_offset
4805 4826 // [ y_hi ] | (in bytes)
4806 4827 // .... |
4807 4828 // [ x_lo ] /
4808 4829 // [ x_hi ]
4809 4830 // ....
4810 4831 //
4811 4832 // Basic idea: lo(result) = lo(x_lo * y_lo)
4812 4833 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4813 4834 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4814 4835 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4815 4836 Label quick;
4816 4837 // load x_hi, y_hi and check if quick
4817 4838 // multiplication is possible
4818 4839 movl(rbx, x_hi);
4819 4840 movl(rcx, y_hi);
4820 4841 movl(rax, rbx);
4821 4842 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4822 4843 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
4823 4844 // do full multiplication
4824 4845 // 1st step
4825 4846 mull(y_lo); // x_hi * y_lo
4826 4847 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
4827 4848 // 2nd step
4828 4849 movl(rax, x_lo);
4829 4850 mull(rcx); // x_lo * y_hi
4830 4851 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
4831 4852 // 3rd step
4832 4853 bind(quick); // note: rbx, = 0 if quick multiply!
4833 4854 movl(rax, x_lo);
4834 4855 mull(y_lo); // x_lo * y_lo
4835 4856 addl(rdx, rbx); // correct hi(x_lo * y_lo)
4836 4857 }
4837 4858
4838 4859 void MacroAssembler::lneg(Register hi, Register lo) {
4839 4860 negl(lo);
4840 4861 adcl(hi, 0);
4841 4862 negl(hi);
4842 4863 }
4843 4864
4844 4865 void MacroAssembler::lshl(Register hi, Register lo) {
4845 4866 // Java shift left long support (semantics as described in JVM spec., p.305)
4846 4867 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4847 4868 // shift value is in rcx !
4848 4869 assert(hi != rcx, "must not use rcx");
4849 4870 assert(lo != rcx, "must not use rcx");
4850 4871 const Register s = rcx; // shift count
4851 4872 const int n = BitsPerWord;
4852 4873 Label L;
4853 4874 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4854 4875 cmpl(s, n); // if (s < n)
4855 4876 jcc(Assembler::less, L); // else (s >= n)
4856 4877 movl(hi, lo); // x := x << n
4857 4878 xorl(lo, lo);
4858 4879 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4859 4880 bind(L); // s (mod n) < n
4860 4881 shldl(hi, lo); // x := x << s
4861 4882 shll(lo);
4862 4883 }
4863 4884
4864 4885
4865 4886 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4866 4887 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4867 4888 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4868 4889 assert(hi != rcx, "must not use rcx");
4869 4890 assert(lo != rcx, "must not use rcx");
4870 4891 const Register s = rcx; // shift count
4871 4892 const int n = BitsPerWord;
4872 4893 Label L;
4873 4894 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4874 4895 cmpl(s, n); // if (s < n)
4875 4896 jcc(Assembler::less, L); // else (s >= n)
4876 4897 movl(lo, hi); // x := x >> n
4877 4898 if (sign_extension) sarl(hi, 31);
4878 4899 else xorl(hi, hi);
4879 4900 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4880 4901 bind(L); // s (mod n) < n
4881 4902 shrdl(lo, hi); // x := x >> s
4882 4903 if (sign_extension) sarl(hi);
4883 4904 else shrl(hi);
4884 4905 }
4885 4906
4886 4907 void MacroAssembler::movoop(Register dst, jobject obj) {
4887 4908 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4888 4909 }
4889 4910
4890 4911 void MacroAssembler::movoop(Address dst, jobject obj) {
4891 4912 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4892 4913 }
4893 4914
4894 4915 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4895 4916 if (src.is_lval()) {
4896 4917 mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4897 4918 } else {
4898 4919 movl(dst, as_Address(src));
4899 4920 }
4900 4921 }
4901 4922
4902 4923 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4903 4924 movl(as_Address(dst), src);
4904 4925 }
4905 4926
↓ open down ↓ |
541 lines elided |
↑ open up ↑ |
4906 4927 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4907 4928 movl(dst, as_Address(src));
4908 4929 }
4909 4930
4910 4931 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
4911 4932 void MacroAssembler::movptr(Address dst, intptr_t src) {
4912 4933 movl(dst, src);
4913 4934 }
4914 4935
4915 4936
4916 -void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
4917 - movsd(dst, as_Address(src));
4918 -}
4919 -
4920 4937 void MacroAssembler::pop_callee_saved_registers() {
4921 4938 pop(rcx);
4922 4939 pop(rdx);
4923 4940 pop(rdi);
4924 4941 pop(rsi);
4925 4942 }
4926 4943
4927 4944 void MacroAssembler::pop_fTOS() {
4928 4945 fld_d(Address(rsp, 0));
4929 4946 addl(rsp, 2 * wordSize);
4930 4947 }
4931 4948
4932 4949 void MacroAssembler::push_callee_saved_registers() {
4933 4950 push(rsi);
4934 4951 push(rdi);
4935 4952 push(rdx);
4936 4953 push(rcx);
4937 4954 }
4938 4955
4939 4956 void MacroAssembler::push_fTOS() {
4940 4957 subl(rsp, 2 * wordSize);
4941 4958 fstp_d(Address(rsp, 0));
4942 4959 }
4943 4960
4944 4961
4945 4962 void MacroAssembler::pushoop(jobject obj) {
4946 4963 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
4947 4964 }
4948 4965
4949 4966
4950 4967 void MacroAssembler::pushptr(AddressLiteral src) {
4951 4968 if (src.is_lval()) {
4952 4969 push_literal32((int32_t)src.target(), src.rspec());
4953 4970 } else {
4954 4971 pushl(as_Address(src));
4955 4972 }
4956 4973 }
4957 4974
4958 4975 void MacroAssembler::set_word_if_not_zero(Register dst) {
4959 4976 xorl(dst, dst);
4960 4977 set_byte_if_not_zero(dst);
4961 4978 }
4962 4979
4963 4980 static void pass_arg0(MacroAssembler* masm, Register arg) {
4964 4981 masm->push(arg);
4965 4982 }
4966 4983
4967 4984 static void pass_arg1(MacroAssembler* masm, Register arg) {
4968 4985 masm->push(arg);
4969 4986 }
4970 4987
4971 4988 static void pass_arg2(MacroAssembler* masm, Register arg) {
4972 4989 masm->push(arg);
4973 4990 }
4974 4991
4975 4992 static void pass_arg3(MacroAssembler* masm, Register arg) {
4976 4993 masm->push(arg);
4977 4994 }
4978 4995
4979 4996 #ifndef PRODUCT
4980 4997 extern "C" void findpc(intptr_t x);
4981 4998 #endif
4982 4999
4983 5000 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
4984 5001 // In order to get locks to work, we need to fake a in_VM state
4985 5002 JavaThread* thread = JavaThread::current();
4986 5003 JavaThreadState saved_state = thread->thread_state();
4987 5004 thread->set_thread_state(_thread_in_vm);
4988 5005 if (ShowMessageBoxOnError) {
4989 5006 JavaThread* thread = JavaThread::current();
4990 5007 JavaThreadState saved_state = thread->thread_state();
4991 5008 thread->set_thread_state(_thread_in_vm);
4992 5009 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
4993 5010 ttyLocker ttyl;
4994 5011 BytecodeCounter::print();
4995 5012 }
4996 5013 // To see where a verify_oop failed, get $ebx+40/X for this frame.
4997 5014 // This is the value of eip which points to where verify_oop will return.
4998 5015 if (os::message_box(msg, "Execution stopped, print registers?")) {
4999 5016 ttyLocker ttyl;
5000 5017 tty->print_cr("eip = 0x%08x", eip);
5001 5018 #ifndef PRODUCT
5002 5019 if ((WizardMode || Verbose) && PrintMiscellaneous) {
5003 5020 tty->cr();
5004 5021 findpc(eip);
5005 5022 tty->cr();
5006 5023 }
5007 5024 #endif
5008 5025 tty->print_cr("rax = 0x%08x", rax);
5009 5026 tty->print_cr("rbx = 0x%08x", rbx);
5010 5027 tty->print_cr("rcx = 0x%08x", rcx);
5011 5028 tty->print_cr("rdx = 0x%08x", rdx);
5012 5029 tty->print_cr("rdi = 0x%08x", rdi);
5013 5030 tty->print_cr("rsi = 0x%08x", rsi);
5014 5031 tty->print_cr("rbp = 0x%08x", rbp);
5015 5032 tty->print_cr("rsp = 0x%08x", rsp);
5016 5033 BREAKPOINT;
5017 5034 assert(false, "start up GDB");
5018 5035 }
5019 5036 } else {
5020 5037 ttyLocker ttyl;
5021 5038 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
5022 5039 assert(false, "DEBUG MESSAGE");
5023 5040 }
5024 5041 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5025 5042 }
5026 5043
5027 5044 void MacroAssembler::stop(const char* msg) {
5028 5045 ExternalAddress message((address)msg);
5029 5046 // push address of message
5030 5047 pushptr(message.addr());
5031 5048 { Label L; call(L, relocInfo::none); bind(L); } // push eip
5032 5049 pusha(); // push registers
5033 5050 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
5034 5051 hlt();
5035 5052 }
5036 5053
5037 5054 void MacroAssembler::warn(const char* msg) {
5038 5055 push_CPU_state();
5039 5056
5040 5057 ExternalAddress message((address) msg);
5041 5058 // push address of message
5042 5059 pushptr(message.addr());
5043 5060
5044 5061 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
5045 5062 addl(rsp, wordSize); // discard argument
5046 5063 pop_CPU_state();
5047 5064 }
5048 5065
5049 5066 #else // _LP64
5050 5067
5051 5068 // 64 bit versions
5052 5069
5053 5070 Address MacroAssembler::as_Address(AddressLiteral adr) {
5054 5071 // amd64 always does this as a pc-rel
5055 5072 // we can be absolute or disp based on the instruction type
5056 5073 // jmp/call are displacements others are absolute
5057 5074 assert(!adr.is_lval(), "must be rval");
5058 5075 assert(reachable(adr), "must be");
5059 5076 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
5060 5077
5061 5078 }
5062 5079
5063 5080 Address MacroAssembler::as_Address(ArrayAddress adr) {
5064 5081 AddressLiteral base = adr.base();
5065 5082 lea(rscratch1, base);
5066 5083 Address index = adr.index();
5067 5084 assert(index._disp == 0, "must not have disp"); // maybe it can?
5068 5085 Address array(rscratch1, index._index, index._scale, index._disp);
5069 5086 return array;
5070 5087 }
5071 5088
5072 5089 int MacroAssembler::biased_locking_enter(Register lock_reg,
5073 5090 Register obj_reg,
5074 5091 Register swap_reg,
5075 5092 Register tmp_reg,
5076 5093 bool swap_reg_contains_mark,
5077 5094 Label& done,
5078 5095 Label* slow_case,
5079 5096 BiasedLockingCounters* counters) {
5080 5097 assert(UseBiasedLocking, "why call this otherwise?");
5081 5098 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
5082 5099 assert(tmp_reg != noreg, "tmp_reg must be supplied");
5083 5100 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
5084 5101 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
5085 5102 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
5086 5103 Address saved_mark_addr(lock_reg, 0);
5087 5104
5088 5105 if (PrintBiasedLockingStatistics && counters == NULL)
5089 5106 counters = BiasedLocking::counters();
5090 5107
5091 5108 // Biased locking
5092 5109 // See whether the lock is currently biased toward our thread and
5093 5110 // whether the epoch is still valid
5094 5111 // Note that the runtime guarantees sufficient alignment of JavaThread
5095 5112 // pointers to allow age to be placed into low bits
5096 5113 // First check to see whether biasing is even enabled for this object
5097 5114 Label cas_label;
5098 5115 int null_check_offset = -1;
5099 5116 if (!swap_reg_contains_mark) {
5100 5117 null_check_offset = offset();
5101 5118 movq(swap_reg, mark_addr);
5102 5119 }
5103 5120 movq(tmp_reg, swap_reg);
5104 5121 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5105 5122 cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
5106 5123 jcc(Assembler::notEqual, cas_label);
5107 5124 // The bias pattern is present in the object's header. Need to check
5108 5125 // whether the bias owner and the epoch are both still current.
5109 5126 load_prototype_header(tmp_reg, obj_reg);
5110 5127 orq(tmp_reg, r15_thread);
5111 5128 xorq(tmp_reg, swap_reg);
5112 5129 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
5113 5130 if (counters != NULL) {
5114 5131 cond_inc32(Assembler::zero,
5115 5132 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5116 5133 }
5117 5134 jcc(Assembler::equal, done);
5118 5135
5119 5136 Label try_revoke_bias;
5120 5137 Label try_rebias;
5121 5138
5122 5139 // At this point we know that the header has the bias pattern and
5123 5140 // that we are not the bias owner in the current epoch. We need to
5124 5141 // figure out more details about the state of the header in order to
5125 5142 // know what operations can be legally performed on the object's
5126 5143 // header.
5127 5144
5128 5145 // If the low three bits in the xor result aren't clear, that means
5129 5146 // the prototype header is no longer biased and we have to revoke
5130 5147 // the bias on this object.
5131 5148 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5132 5149 jcc(Assembler::notZero, try_revoke_bias);
5133 5150
5134 5151 // Biasing is still enabled for this data type. See whether the
5135 5152 // epoch of the current bias is still valid, meaning that the epoch
5136 5153 // bits of the mark word are equal to the epoch bits of the
5137 5154 // prototype header. (Note that the prototype header's epoch bits
5138 5155 // only change at a safepoint.) If not, attempt to rebias the object
5139 5156 // toward the current thread. Note that we must be absolutely sure
5140 5157 // that the current epoch is invalid in order to do this because
5141 5158 // otherwise the manipulations it performs on the mark word are
5142 5159 // illegal.
5143 5160 testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5144 5161 jcc(Assembler::notZero, try_rebias);
5145 5162
5146 5163 // The epoch of the current bias is still valid but we know nothing
5147 5164 // about the owner; it might be set or it might be clear. Try to
5148 5165 // acquire the bias of the object using an atomic operation. If this
5149 5166 // fails we will go in to the runtime to revoke the object's bias.
5150 5167 // Note that we first construct the presumed unbiased header so we
5151 5168 // don't accidentally blow away another thread's valid bias.
5152 5169 andq(swap_reg,
5153 5170 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5154 5171 movq(tmp_reg, swap_reg);
5155 5172 orq(tmp_reg, r15_thread);
5156 5173 if (os::is_MP()) {
5157 5174 lock();
5158 5175 }
5159 5176 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5160 5177 // If the biasing toward our thread failed, this means that
5161 5178 // another thread succeeded in biasing it toward itself and we
5162 5179 // need to revoke that bias. The revocation will occur in the
5163 5180 // interpreter runtime in the slow case.
5164 5181 if (counters != NULL) {
5165 5182 cond_inc32(Assembler::zero,
5166 5183 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5167 5184 }
5168 5185 if (slow_case != NULL) {
5169 5186 jcc(Assembler::notZero, *slow_case);
5170 5187 }
5171 5188 jmp(done);
5172 5189
5173 5190 bind(try_rebias);
5174 5191 // At this point we know the epoch has expired, meaning that the
5175 5192 // current "bias owner", if any, is actually invalid. Under these
5176 5193 // circumstances _only_, we are allowed to use the current header's
5177 5194 // value as the comparison value when doing the cas to acquire the
5178 5195 // bias in the current epoch. In other words, we allow transfer of
5179 5196 // the bias from one thread to another directly in this situation.
5180 5197 //
5181 5198 // FIXME: due to a lack of registers we currently blow away the age
5182 5199 // bits in this situation. Should attempt to preserve them.
5183 5200 load_prototype_header(tmp_reg, obj_reg);
5184 5201 orq(tmp_reg, r15_thread);
5185 5202 if (os::is_MP()) {
5186 5203 lock();
5187 5204 }
5188 5205 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5189 5206 // If the biasing toward our thread failed, then another thread
5190 5207 // succeeded in biasing it toward itself and we need to revoke that
5191 5208 // bias. The revocation will occur in the runtime in the slow case.
5192 5209 if (counters != NULL) {
5193 5210 cond_inc32(Assembler::zero,
5194 5211 ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5195 5212 }
5196 5213 if (slow_case != NULL) {
5197 5214 jcc(Assembler::notZero, *slow_case);
5198 5215 }
5199 5216 jmp(done);
5200 5217
5201 5218 bind(try_revoke_bias);
5202 5219 // The prototype mark in the klass doesn't have the bias bit set any
5203 5220 // more, indicating that objects of this data type are not supposed
5204 5221 // to be biased any more. We are going to try to reset the mark of
5205 5222 // this object to the prototype value and fall through to the
5206 5223 // CAS-based locking scheme. Note that if our CAS fails, it means
5207 5224 // that another thread raced us for the privilege of revoking the
5208 5225 // bias of this particular object, so it's okay to continue in the
5209 5226 // normal locking code.
5210 5227 //
5211 5228 // FIXME: due to a lack of registers we currently blow away the age
5212 5229 // bits in this situation. Should attempt to preserve them.
5213 5230 load_prototype_header(tmp_reg, obj_reg);
5214 5231 if (os::is_MP()) {
5215 5232 lock();
5216 5233 }
5217 5234 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5218 5235 // Fall through to the normal CAS-based lock, because no matter what
5219 5236 // the result of the above CAS, some thread must have succeeded in
5220 5237 // removing the bias bit from the object's header.
5221 5238 if (counters != NULL) {
5222 5239 cond_inc32(Assembler::zero,
5223 5240 ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5224 5241 }
5225 5242
5226 5243 bind(cas_label);
5227 5244
5228 5245 return null_check_offset;
5229 5246 }
5230 5247
5231 5248 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5232 5249 Label L, E;
5233 5250
5234 5251 #ifdef _WIN64
5235 5252 // Windows always allocates space for it's register args
5236 5253 assert(num_args <= 4, "only register arguments supported");
5237 5254 subq(rsp, frame::arg_reg_save_area_bytes);
5238 5255 #endif
5239 5256
5240 5257 // Align stack if necessary
5241 5258 testl(rsp, 15);
5242 5259 jcc(Assembler::zero, L);
5243 5260
5244 5261 subq(rsp, 8);
5245 5262 {
5246 5263 call(RuntimeAddress(entry_point));
5247 5264 }
5248 5265 addq(rsp, 8);
5249 5266 jmp(E);
5250 5267
5251 5268 bind(L);
5252 5269 {
5253 5270 call(RuntimeAddress(entry_point));
5254 5271 }
5255 5272
5256 5273 bind(E);
5257 5274
5258 5275 #ifdef _WIN64
5259 5276 // restore stack pointer
5260 5277 addq(rsp, frame::arg_reg_save_area_bytes);
5261 5278 #endif
5262 5279
5263 5280 }
5264 5281
5265 5282 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5266 5283 assert(!src2.is_lval(), "should use cmpptr");
5267 5284
5268 5285 if (reachable(src2)) {
5269 5286 cmpq(src1, as_Address(src2));
5270 5287 } else {
5271 5288 lea(rscratch1, src2);
5272 5289 Assembler::cmpq(src1, Address(rscratch1, 0));
5273 5290 }
5274 5291 }
5275 5292
5276 5293 int MacroAssembler::corrected_idivq(Register reg) {
5277 5294 // Full implementation of Java ldiv and lrem; checks for special
5278 5295 // case as described in JVM spec., p.243 & p.271. The function
5279 5296 // returns the (pc) offset of the idivl instruction - may be needed
5280 5297 // for implicit exceptions.
5281 5298 //
5282 5299 // normal case special case
5283 5300 //
5284 5301 // input : rax: dividend min_long
5285 5302 // reg: divisor (may not be eax/edx) -1
5286 5303 //
5287 5304 // output: rax: quotient (= rax idiv reg) min_long
5288 5305 // rdx: remainder (= rax irem reg) 0
5289 5306 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5290 5307 static const int64_t min_long = 0x8000000000000000;
5291 5308 Label normal_case, special_case;
5292 5309
5293 5310 // check for special case
5294 5311 cmp64(rax, ExternalAddress((address) &min_long));
5295 5312 jcc(Assembler::notEqual, normal_case);
5296 5313 xorl(rdx, rdx); // prepare rdx for possible special case (where
5297 5314 // remainder = 0)
5298 5315 cmpq(reg, -1);
5299 5316 jcc(Assembler::equal, special_case);
5300 5317
5301 5318 // handle normal case
5302 5319 bind(normal_case);
5303 5320 cdqq();
5304 5321 int idivq_offset = offset();
5305 5322 idivq(reg);
5306 5323
5307 5324 // normal and special case exit
5308 5325 bind(special_case);
5309 5326
5310 5327 return idivq_offset;
5311 5328 }
5312 5329
5313 5330 void MacroAssembler::decrementq(Register reg, int value) {
5314 5331 if (value == min_jint) { subq(reg, value); return; }
5315 5332 if (value < 0) { incrementq(reg, -value); return; }
5316 5333 if (value == 0) { ; return; }
5317 5334 if (value == 1 && UseIncDec) { decq(reg) ; return; }
5318 5335 /* else */ { subq(reg, value) ; return; }
5319 5336 }
5320 5337
5321 5338 void MacroAssembler::decrementq(Address dst, int value) {
5322 5339 if (value == min_jint) { subq(dst, value); return; }
5323 5340 if (value < 0) { incrementq(dst, -value); return; }
5324 5341 if (value == 0) { ; return; }
5325 5342 if (value == 1 && UseIncDec) { decq(dst) ; return; }
5326 5343 /* else */ { subq(dst, value) ; return; }
5327 5344 }
5328 5345
5329 5346 void MacroAssembler::fat_nop() {
5330 5347 // A 5 byte nop that is safe for patching (see patch_verified_entry)
5331 5348 // Recommened sequence from 'Software Optimization Guide for the AMD
5332 5349 // Hammer Processor'
5333 5350 emit_byte(0x66);
5334 5351 emit_byte(0x66);
5335 5352 emit_byte(0x90);
5336 5353 emit_byte(0x66);
5337 5354 emit_byte(0x90);
5338 5355 }
5339 5356
5340 5357 void MacroAssembler::incrementq(Register reg, int value) {
5341 5358 if (value == min_jint) { addq(reg, value); return; }
5342 5359 if (value < 0) { decrementq(reg, -value); return; }
5343 5360 if (value == 0) { ; return; }
5344 5361 if (value == 1 && UseIncDec) { incq(reg) ; return; }
5345 5362 /* else */ { addq(reg, value) ; return; }
5346 5363 }
5347 5364
5348 5365 void MacroAssembler::incrementq(Address dst, int value) {
5349 5366 if (value == min_jint) { addq(dst, value); return; }
5350 5367 if (value < 0) { decrementq(dst, -value); return; }
5351 5368 if (value == 0) { ; return; }
5352 5369 if (value == 1 && UseIncDec) { incq(dst) ; return; }
5353 5370 /* else */ { addq(dst, value) ; return; }
5354 5371 }
5355 5372
5356 5373 // 32bit can do a case table jump in one instruction but we no longer allow the base
5357 5374 // to be installed in the Address class
5358 5375 void MacroAssembler::jump(ArrayAddress entry) {
5359 5376 lea(rscratch1, entry.base());
5360 5377 Address dispatch = entry.index();
5361 5378 assert(dispatch._base == noreg, "must be");
5362 5379 dispatch._base = rscratch1;
5363 5380 jmp(dispatch);
5364 5381 }
5365 5382
5366 5383 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5367 5384 ShouldNotReachHere(); // 64bit doesn't use two regs
5368 5385 cmpq(x_lo, y_lo);
5369 5386 }
5370 5387
5371 5388 void MacroAssembler::lea(Register dst, AddressLiteral src) {
5372 5389 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5373 5390 }
5374 5391
5375 5392 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5376 5393 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5377 5394 movptr(dst, rscratch1);
5378 5395 }
5379 5396
5380 5397 void MacroAssembler::leave() {
5381 5398 // %%% is this really better? Why not on 32bit too?
5382 5399 emit_byte(0xC9); // LEAVE
5383 5400 }
5384 5401
5385 5402 void MacroAssembler::lneg(Register hi, Register lo) {
5386 5403 ShouldNotReachHere(); // 64bit doesn't use two regs
5387 5404 negq(lo);
5388 5405 }
5389 5406
5390 5407 void MacroAssembler::movoop(Register dst, jobject obj) {
5391 5408 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5392 5409 }
5393 5410
5394 5411 void MacroAssembler::movoop(Address dst, jobject obj) {
5395 5412 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5396 5413 movq(dst, rscratch1);
5397 5414 }
5398 5415
5399 5416 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5400 5417 if (src.is_lval()) {
5401 5418 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5402 5419 } else {
5403 5420 if (reachable(src)) {
5404 5421 movq(dst, as_Address(src));
5405 5422 } else {
5406 5423 lea(rscratch1, src);
5407 5424 movq(dst, Address(rscratch1,0));
5408 5425 }
5409 5426 }
5410 5427 }
5411 5428
5412 5429 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5413 5430 movq(as_Address(dst), src);
5414 5431 }
5415 5432
5416 5433 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5417 5434 movq(dst, as_Address(src));
5418 5435 }
5419 5436
5420 5437 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
5421 5438 void MacroAssembler::movptr(Address dst, intptr_t src) {
5422 5439 mov64(rscratch1, src);
5423 5440 movq(dst, rscratch1);
5424 5441 }
5425 5442
5426 5443 // These are mostly for initializing NULL
5427 5444 void MacroAssembler::movptr(Address dst, int32_t src) {
5428 5445 movslq(dst, src);
5429 5446 }
5430 5447
5431 5448 void MacroAssembler::movptr(Register dst, int32_t src) {
5432 5449 mov64(dst, (intptr_t)src);
5433 5450 }
5434 5451
5435 5452 void MacroAssembler::pushoop(jobject obj) {
5436 5453 movoop(rscratch1, obj);
5437 5454 push(rscratch1);
5438 5455 }
5439 5456
5440 5457 void MacroAssembler::pushptr(AddressLiteral src) {
5441 5458 lea(rscratch1, src);
5442 5459 if (src.is_lval()) {
5443 5460 push(rscratch1);
5444 5461 } else {
5445 5462 pushq(Address(rscratch1, 0));
5446 5463 }
5447 5464 }
5448 5465
5449 5466 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5450 5467 bool clear_pc) {
5451 5468 // we must set sp to zero to clear frame
5452 5469 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
5453 5470 // must clear fp, so that compiled frames are not confused; it is
5454 5471 // possible that we need it only for debugging
5455 5472 if (clear_fp) {
5456 5473 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
5457 5474 }
5458 5475
5459 5476 if (clear_pc) {
5460 5477 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
5461 5478 }
5462 5479 }
5463 5480
5464 5481 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5465 5482 Register last_java_fp,
5466 5483 address last_java_pc) {
5467 5484 // determine last_java_sp register
5468 5485 if (!last_java_sp->is_valid()) {
5469 5486 last_java_sp = rsp;
5470 5487 }
5471 5488
5472 5489 // last_java_fp is optional
5473 5490 if (last_java_fp->is_valid()) {
5474 5491 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5475 5492 last_java_fp);
5476 5493 }
5477 5494
5478 5495 // last_java_pc is optional
5479 5496 if (last_java_pc != NULL) {
5480 5497 Address java_pc(r15_thread,
5481 5498 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5482 5499 lea(rscratch1, InternalAddress(last_java_pc));
5483 5500 movptr(java_pc, rscratch1);
5484 5501 }
5485 5502
5486 5503 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5487 5504 }
5488 5505
5489 5506 static void pass_arg0(MacroAssembler* masm, Register arg) {
5490 5507 if (c_rarg0 != arg ) {
5491 5508 masm->mov(c_rarg0, arg);
5492 5509 }
5493 5510 }
5494 5511
5495 5512 static void pass_arg1(MacroAssembler* masm, Register arg) {
5496 5513 if (c_rarg1 != arg ) {
5497 5514 masm->mov(c_rarg1, arg);
5498 5515 }
5499 5516 }
5500 5517
5501 5518 static void pass_arg2(MacroAssembler* masm, Register arg) {
5502 5519 if (c_rarg2 != arg ) {
5503 5520 masm->mov(c_rarg2, arg);
5504 5521 }
5505 5522 }
5506 5523
5507 5524 static void pass_arg3(MacroAssembler* masm, Register arg) {
5508 5525 if (c_rarg3 != arg ) {
5509 5526 masm->mov(c_rarg3, arg);
5510 5527 }
5511 5528 }
5512 5529
5513 5530 void MacroAssembler::stop(const char* msg) {
5514 5531 address rip = pc();
5515 5532 pusha(); // get regs on stack
5516 5533 lea(c_rarg0, ExternalAddress((address) msg));
5517 5534 lea(c_rarg1, InternalAddress(rip));
5518 5535 movq(c_rarg2, rsp); // pass pointer to regs array
5519 5536 andq(rsp, -16); // align stack as required by ABI
5520 5537 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5521 5538 hlt();
5522 5539 }
5523 5540
5524 5541 void MacroAssembler::warn(const char* msg) {
5525 5542 push(r12);
5526 5543 movq(r12, rsp);
5527 5544 andq(rsp, -16); // align stack as required by push_CPU_state and call
5528 5545
5529 5546 push_CPU_state(); // keeps alignment at 16 bytes
5530 5547 lea(c_rarg0, ExternalAddress((address) msg));
5531 5548 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5532 5549 pop_CPU_state();
5533 5550
5534 5551 movq(rsp, r12);
5535 5552 pop(r12);
5536 5553 }
5537 5554
5538 5555 #ifndef PRODUCT
5539 5556 extern "C" void findpc(intptr_t x);
5540 5557 #endif
5541 5558
5542 5559 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5543 5560 // In order to get locks to work, we need to fake a in_VM state
5544 5561 if (ShowMessageBoxOnError ) {
5545 5562 JavaThread* thread = JavaThread::current();
5546 5563 JavaThreadState saved_state = thread->thread_state();
5547 5564 thread->set_thread_state(_thread_in_vm);
5548 5565 #ifndef PRODUCT
5549 5566 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5550 5567 ttyLocker ttyl;
5551 5568 BytecodeCounter::print();
5552 5569 }
5553 5570 #endif
5554 5571 // To see where a verify_oop failed, get $ebx+40/X for this frame.
5555 5572 // XXX correct this offset for amd64
5556 5573 // This is the value of eip which points to where verify_oop will return.
5557 5574 if (os::message_box(msg, "Execution stopped, print registers?")) {
5558 5575 ttyLocker ttyl;
5559 5576 tty->print_cr("rip = 0x%016lx", pc);
5560 5577 #ifndef PRODUCT
5561 5578 tty->cr();
5562 5579 findpc(pc);
5563 5580 tty->cr();
5564 5581 #endif
5565 5582 tty->print_cr("rax = 0x%016lx", regs[15]);
5566 5583 tty->print_cr("rbx = 0x%016lx", regs[12]);
5567 5584 tty->print_cr("rcx = 0x%016lx", regs[14]);
5568 5585 tty->print_cr("rdx = 0x%016lx", regs[13]);
5569 5586 tty->print_cr("rdi = 0x%016lx", regs[8]);
5570 5587 tty->print_cr("rsi = 0x%016lx", regs[9]);
5571 5588 tty->print_cr("rbp = 0x%016lx", regs[10]);
5572 5589 tty->print_cr("rsp = 0x%016lx", regs[11]);
5573 5590 tty->print_cr("r8 = 0x%016lx", regs[7]);
5574 5591 tty->print_cr("r9 = 0x%016lx", regs[6]);
5575 5592 tty->print_cr("r10 = 0x%016lx", regs[5]);
5576 5593 tty->print_cr("r11 = 0x%016lx", regs[4]);
5577 5594 tty->print_cr("r12 = 0x%016lx", regs[3]);
5578 5595 tty->print_cr("r13 = 0x%016lx", regs[2]);
5579 5596 tty->print_cr("r14 = 0x%016lx", regs[1]);
5580 5597 tty->print_cr("r15 = 0x%016lx", regs[0]);
5581 5598 BREAKPOINT;
5582 5599 }
5583 5600 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5584 5601 } else {
5585 5602 ttyLocker ttyl;
5586 5603 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5587 5604 msg);
5588 5605 }
5589 5606 }
5590 5607
5591 5608 #endif // _LP64
5592 5609
5593 5610 // Now versions that are common to 32/64 bit
5594 5611
5595 5612 void MacroAssembler::addptr(Register dst, int32_t imm32) {
5596 5613 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5597 5614 }
5598 5615
5599 5616 void MacroAssembler::addptr(Register dst, Register src) {
5600 5617 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5601 5618 }
5602 5619
5603 5620 void MacroAssembler::addptr(Address dst, Register src) {
5604 5621 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5605 5622 }
5606 5623
5607 5624 void MacroAssembler::align(int modulus) {
5608 5625 if (offset() % modulus != 0) {
5609 5626 nop(modulus - (offset() % modulus));
5610 5627 }
5611 5628 }
5612 5629
5613 5630 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5614 5631 if (reachable(src)) {
5615 5632 andpd(dst, as_Address(src));
5616 5633 } else {
5617 5634 lea(rscratch1, src);
5618 5635 andpd(dst, Address(rscratch1, 0));
5619 5636 }
5620 5637 }
5621 5638
5622 5639 void MacroAssembler::andptr(Register dst, int32_t imm32) {
5623 5640 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5624 5641 }
5625 5642
5626 5643 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5627 5644 pushf();
5628 5645 if (os::is_MP())
5629 5646 lock();
5630 5647 incrementl(counter_addr);
5631 5648 popf();
5632 5649 }
5633 5650
5634 5651 // Writes to stack successive pages until offset reached to check for
5635 5652 // stack overflow + shadow pages. This clobbers tmp.
5636 5653 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5637 5654 movptr(tmp, rsp);
5638 5655 // Bang stack for total size given plus shadow page size.
5639 5656 // Bang one page at a time because large size can bang beyond yellow and
5640 5657 // red zones.
5641 5658 Label loop;
5642 5659 bind(loop);
5643 5660 movl(Address(tmp, (-os::vm_page_size())), size );
5644 5661 subptr(tmp, os::vm_page_size());
5645 5662 subl(size, os::vm_page_size());
5646 5663 jcc(Assembler::greater, loop);
5647 5664
5648 5665 // Bang down shadow pages too.
5649 5666 // The -1 because we already subtracted 1 page.
5650 5667 for (int i = 0; i< StackShadowPages-1; i++) {
5651 5668 // this could be any sized move but this is can be a debugging crumb
5652 5669 // so the bigger the better.
5653 5670 movptr(Address(tmp, (-i*os::vm_page_size())), size );
5654 5671 }
5655 5672 }
5656 5673
5657 5674 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5658 5675 assert(UseBiasedLocking, "why call this otherwise?");
5659 5676
5660 5677 // Check for biased locking unlock case, which is a no-op
5661 5678 // Note: we do not have to check the thread ID for two reasons.
5662 5679 // First, the interpreter checks for IllegalMonitorStateException at
5663 5680 // a higher level. Second, if the bias was revoked while we held the
5664 5681 // lock, the object could not be rebiased toward another thread, so
5665 5682 // the bias bit would be clear.
5666 5683 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5667 5684 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5668 5685 cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5669 5686 jcc(Assembler::equal, done);
5670 5687 }
5671 5688
5672 5689 void MacroAssembler::c2bool(Register x) {
5673 5690 // implements x == 0 ? 0 : 1
5674 5691 // note: must only look at least-significant byte of x
5675 5692 // since C-style booleans are stored in one byte
5676 5693 // only! (was bug)
5677 5694 andl(x, 0xFF);
5678 5695 setb(Assembler::notZero, x);
5679 5696 }
5680 5697
5681 5698 // Wouldn't need if AddressLiteral version had new name
5682 5699 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5683 5700 Assembler::call(L, rtype);
5684 5701 }
5685 5702
5686 5703 void MacroAssembler::call(Register entry) {
5687 5704 Assembler::call(entry);
5688 5705 }
5689 5706
5690 5707 void MacroAssembler::call(AddressLiteral entry) {
5691 5708 if (reachable(entry)) {
5692 5709 Assembler::call_literal(entry.target(), entry.rspec());
5693 5710 } else {
5694 5711 lea(rscratch1, entry);
5695 5712 Assembler::call(rscratch1);
5696 5713 }
5697 5714 }
5698 5715
5699 5716 // Implementation of call_VM versions
5700 5717
5701 5718 void MacroAssembler::call_VM(Register oop_result,
5702 5719 address entry_point,
5703 5720 bool check_exceptions) {
5704 5721 Label C, E;
5705 5722 call(C, relocInfo::none);
5706 5723 jmp(E);
5707 5724
5708 5725 bind(C);
5709 5726 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5710 5727 ret(0);
5711 5728
5712 5729 bind(E);
5713 5730 }
5714 5731
5715 5732 void MacroAssembler::call_VM(Register oop_result,
5716 5733 address entry_point,
5717 5734 Register arg_1,
5718 5735 bool check_exceptions) {
5719 5736 Label C, E;
5720 5737 call(C, relocInfo::none);
5721 5738 jmp(E);
5722 5739
5723 5740 bind(C);
5724 5741 pass_arg1(this, arg_1);
5725 5742 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5726 5743 ret(0);
5727 5744
5728 5745 bind(E);
5729 5746 }
5730 5747
5731 5748 void MacroAssembler::call_VM(Register oop_result,
5732 5749 address entry_point,
5733 5750 Register arg_1,
5734 5751 Register arg_2,
5735 5752 bool check_exceptions) {
5736 5753 Label C, E;
5737 5754 call(C, relocInfo::none);
5738 5755 jmp(E);
5739 5756
5740 5757 bind(C);
5741 5758
5742 5759 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5743 5760
5744 5761 pass_arg2(this, arg_2);
5745 5762 pass_arg1(this, arg_1);
5746 5763 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5747 5764 ret(0);
5748 5765
5749 5766 bind(E);
5750 5767 }
5751 5768
5752 5769 void MacroAssembler::call_VM(Register oop_result,
5753 5770 address entry_point,
5754 5771 Register arg_1,
5755 5772 Register arg_2,
5756 5773 Register arg_3,
5757 5774 bool check_exceptions) {
5758 5775 Label C, E;
5759 5776 call(C, relocInfo::none);
5760 5777 jmp(E);
5761 5778
5762 5779 bind(C);
5763 5780
5764 5781 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5765 5782 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5766 5783 pass_arg3(this, arg_3);
5767 5784
5768 5785 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5769 5786 pass_arg2(this, arg_2);
5770 5787
5771 5788 pass_arg1(this, arg_1);
5772 5789 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5773 5790 ret(0);
5774 5791
5775 5792 bind(E);
5776 5793 }
5777 5794
5778 5795 void MacroAssembler::call_VM(Register oop_result,
5779 5796 Register last_java_sp,
5780 5797 address entry_point,
5781 5798 int number_of_arguments,
5782 5799 bool check_exceptions) {
5783 5800 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5784 5801 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5785 5802 }
5786 5803
5787 5804 void MacroAssembler::call_VM(Register oop_result,
5788 5805 Register last_java_sp,
5789 5806 address entry_point,
5790 5807 Register arg_1,
5791 5808 bool check_exceptions) {
5792 5809 pass_arg1(this, arg_1);
5793 5810 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5794 5811 }
5795 5812
5796 5813 void MacroAssembler::call_VM(Register oop_result,
5797 5814 Register last_java_sp,
5798 5815 address entry_point,
5799 5816 Register arg_1,
5800 5817 Register arg_2,
5801 5818 bool check_exceptions) {
5802 5819
5803 5820 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5804 5821 pass_arg2(this, arg_2);
5805 5822 pass_arg1(this, arg_1);
5806 5823 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5807 5824 }
5808 5825
5809 5826 void MacroAssembler::call_VM(Register oop_result,
5810 5827 Register last_java_sp,
5811 5828 address entry_point,
5812 5829 Register arg_1,
5813 5830 Register arg_2,
5814 5831 Register arg_3,
5815 5832 bool check_exceptions) {
5816 5833 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5817 5834 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5818 5835 pass_arg3(this, arg_3);
5819 5836 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5820 5837 pass_arg2(this, arg_2);
5821 5838 pass_arg1(this, arg_1);
5822 5839 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5823 5840 }
5824 5841
5825 5842 void MacroAssembler::call_VM_base(Register oop_result,
5826 5843 Register java_thread,
5827 5844 Register last_java_sp,
5828 5845 address entry_point,
5829 5846 int number_of_arguments,
5830 5847 bool check_exceptions) {
5831 5848 // determine java_thread register
5832 5849 if (!java_thread->is_valid()) {
5833 5850 #ifdef _LP64
5834 5851 java_thread = r15_thread;
5835 5852 #else
5836 5853 java_thread = rdi;
5837 5854 get_thread(java_thread);
5838 5855 #endif // LP64
5839 5856 }
5840 5857 // determine last_java_sp register
5841 5858 if (!last_java_sp->is_valid()) {
5842 5859 last_java_sp = rsp;
5843 5860 }
5844 5861 // debugging support
5845 5862 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
5846 5863 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5847 5864 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
5848 5865 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5849 5866
5850 5867 // push java thread (becomes first argument of C function)
5851 5868
5852 5869 NOT_LP64(push(java_thread); number_of_arguments++);
5853 5870 LP64_ONLY(mov(c_rarg0, r15_thread));
5854 5871
5855 5872 // set last Java frame before call
5856 5873 assert(last_java_sp != rbp, "can't use ebp/rbp");
5857 5874
5858 5875 // Only interpreter should have to set fp
5859 5876 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5860 5877
5861 5878 // do the call, remove parameters
5862 5879 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5863 5880
5864 5881 // restore the thread (cannot use the pushed argument since arguments
5865 5882 // may be overwritten by C code generated by an optimizing compiler);
5866 5883 // however can use the register value directly if it is callee saved.
5867 5884 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5868 5885 // rdi & rsi (also r15) are callee saved -> nothing to do
5869 5886 #ifdef ASSERT
5870 5887 guarantee(java_thread != rax, "change this code");
5871 5888 push(rax);
5872 5889 { Label L;
5873 5890 get_thread(rax);
5874 5891 cmpptr(java_thread, rax);
5875 5892 jcc(Assembler::equal, L);
5876 5893 stop("MacroAssembler::call_VM_base: rdi not callee saved?");
5877 5894 bind(L);
5878 5895 }
5879 5896 pop(rax);
5880 5897 #endif
5881 5898 } else {
5882 5899 get_thread(java_thread);
5883 5900 }
5884 5901 // reset last Java frame
5885 5902 // Only interpreter should have to clear fp
5886 5903 reset_last_Java_frame(java_thread, true, false);
5887 5904
5888 5905 #ifndef CC_INTERP
5889 5906 // C++ interp handles this in the interpreter
5890 5907 check_and_handle_popframe(java_thread);
5891 5908 check_and_handle_earlyret(java_thread);
5892 5909 #endif /* CC_INTERP */
5893 5910
5894 5911 if (check_exceptions) {
5895 5912 // check for pending exceptions (java_thread is set upon return)
5896 5913 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
5897 5914 #ifndef _LP64
5898 5915 jump_cc(Assembler::notEqual,
5899 5916 RuntimeAddress(StubRoutines::forward_exception_entry()));
5900 5917 #else
5901 5918 // This used to conditionally jump to forward_exception however it is
5902 5919 // possible if we relocate that the branch will not reach. So we must jump
5903 5920 // around so we can always reach
5904 5921
5905 5922 Label ok;
5906 5923 jcc(Assembler::equal, ok);
5907 5924 jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5908 5925 bind(ok);
5909 5926 #endif // LP64
5910 5927 }
5911 5928
5912 5929 // get oop result if there is one and reset the value in the thread
5913 5930 if (oop_result->is_valid()) {
5914 5931 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
5915 5932 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
5916 5933 verify_oop(oop_result, "broken oop in call_VM_base");
5917 5934 }
5918 5935 }
5919 5936
5920 5937 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
5921 5938
5922 5939 // Calculate the value for last_Java_sp
5923 5940 // somewhat subtle. call_VM does an intermediate call
5924 5941 // which places a return address on the stack just under the
5925 5942 // stack pointer as the user finsihed with it. This allows
5926 5943 // use to retrieve last_Java_pc from last_Java_sp[-1].
5927 5944 // On 32bit we then have to push additional args on the stack to accomplish
5928 5945 // the actual requested call. On 64bit call_VM only can use register args
5929 5946 // so the only extra space is the return address that call_VM created.
5930 5947 // This hopefully explains the calculations here.
5931 5948
5932 5949 #ifdef _LP64
5933 5950 // We've pushed one address, correct last_Java_sp
5934 5951 lea(rax, Address(rsp, wordSize));
5935 5952 #else
5936 5953 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
5937 5954 #endif // LP64
5938 5955
5939 5956 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
5940 5957
5941 5958 }
5942 5959
5943 5960 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
5944 5961 call_VM_leaf_base(entry_point, number_of_arguments);
5945 5962 }
5946 5963
5947 5964 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
5948 5965 pass_arg0(this, arg_0);
5949 5966 call_VM_leaf(entry_point, 1);
5950 5967 }
5951 5968
5952 5969 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
5953 5970
5954 5971 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5955 5972 pass_arg1(this, arg_1);
5956 5973 pass_arg0(this, arg_0);
5957 5974 call_VM_leaf(entry_point, 2);
5958 5975 }
5959 5976
5960 5977 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
5961 5978 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
5962 5979 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5963 5980 pass_arg2(this, arg_2);
5964 5981 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5965 5982 pass_arg1(this, arg_1);
5966 5983 pass_arg0(this, arg_0);
5967 5984 call_VM_leaf(entry_point, 3);
5968 5985 }
5969 5986
5970 5987 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
5971 5988 }
5972 5989
5973 5990 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
5974 5991 }
5975 5992
5976 5993 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
5977 5994 if (reachable(src1)) {
5978 5995 cmpl(as_Address(src1), imm);
5979 5996 } else {
5980 5997 lea(rscratch1, src1);
5981 5998 cmpl(Address(rscratch1, 0), imm);
5982 5999 }
5983 6000 }
5984 6001
5985 6002 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
5986 6003 assert(!src2.is_lval(), "use cmpptr");
5987 6004 if (reachable(src2)) {
5988 6005 cmpl(src1, as_Address(src2));
5989 6006 } else {
5990 6007 lea(rscratch1, src2);
5991 6008 cmpl(src1, Address(rscratch1, 0));
5992 6009 }
5993 6010 }
5994 6011
5995 6012 void MacroAssembler::cmp32(Register src1, int32_t imm) {
5996 6013 Assembler::cmpl(src1, imm);
5997 6014 }
5998 6015
5999 6016 void MacroAssembler::cmp32(Register src1, Address src2) {
6000 6017 Assembler::cmpl(src1, src2);
6001 6018 }
6002 6019
6003 6020 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6004 6021 ucomisd(opr1, opr2);
6005 6022
6006 6023 Label L;
6007 6024 if (unordered_is_less) {
6008 6025 movl(dst, -1);
6009 6026 jcc(Assembler::parity, L);
6010 6027 jcc(Assembler::below , L);
6011 6028 movl(dst, 0);
6012 6029 jcc(Assembler::equal , L);
6013 6030 increment(dst);
6014 6031 } else { // unordered is greater
6015 6032 movl(dst, 1);
6016 6033 jcc(Assembler::parity, L);
6017 6034 jcc(Assembler::above , L);
6018 6035 movl(dst, 0);
6019 6036 jcc(Assembler::equal , L);
6020 6037 decrementl(dst);
6021 6038 }
6022 6039 bind(L);
6023 6040 }
6024 6041
6025 6042 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6026 6043 ucomiss(opr1, opr2);
6027 6044
6028 6045 Label L;
6029 6046 if (unordered_is_less) {
6030 6047 movl(dst, -1);
6031 6048 jcc(Assembler::parity, L);
6032 6049 jcc(Assembler::below , L);
6033 6050 movl(dst, 0);
6034 6051 jcc(Assembler::equal , L);
6035 6052 increment(dst);
6036 6053 } else { // unordered is greater
6037 6054 movl(dst, 1);
6038 6055 jcc(Assembler::parity, L);
6039 6056 jcc(Assembler::above , L);
6040 6057 movl(dst, 0);
6041 6058 jcc(Assembler::equal , L);
6042 6059 decrementl(dst);
6043 6060 }
6044 6061 bind(L);
6045 6062 }
6046 6063
6047 6064
6048 6065 void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
6049 6066 if (reachable(src1)) {
6050 6067 cmpb(as_Address(src1), imm);
6051 6068 } else {
6052 6069 lea(rscratch1, src1);
6053 6070 cmpb(Address(rscratch1, 0), imm);
6054 6071 }
6055 6072 }
6056 6073
6057 6074 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
6058 6075 #ifdef _LP64
6059 6076 if (src2.is_lval()) {
6060 6077 movptr(rscratch1, src2);
6061 6078 Assembler::cmpq(src1, rscratch1);
6062 6079 } else if (reachable(src2)) {
6063 6080 cmpq(src1, as_Address(src2));
6064 6081 } else {
6065 6082 lea(rscratch1, src2);
6066 6083 Assembler::cmpq(src1, Address(rscratch1, 0));
6067 6084 }
6068 6085 #else
6069 6086 if (src2.is_lval()) {
6070 6087 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6071 6088 } else {
6072 6089 cmpl(src1, as_Address(src2));
6073 6090 }
6074 6091 #endif // _LP64
6075 6092 }
6076 6093
6077 6094 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
6078 6095 assert(src2.is_lval(), "not a mem-mem compare");
6079 6096 #ifdef _LP64
6080 6097 // moves src2's literal address
6081 6098 movptr(rscratch1, src2);
6082 6099 Assembler::cmpq(src1, rscratch1);
6083 6100 #else
6084 6101 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6085 6102 #endif // _LP64
6086 6103 }
6087 6104
6088 6105 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
6089 6106 if (reachable(adr)) {
6090 6107 if (os::is_MP())
6091 6108 lock();
6092 6109 cmpxchgptr(reg, as_Address(adr));
6093 6110 } else {
6094 6111 lea(rscratch1, adr);
6095 6112 if (os::is_MP())
6096 6113 lock();
6097 6114 cmpxchgptr(reg, Address(rscratch1, 0));
6098 6115 }
6099 6116 }
6100 6117
6101 6118 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
6102 6119 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
6103 6120 }
6104 6121
6105 6122 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
6106 6123 if (reachable(src)) {
6107 6124 comisd(dst, as_Address(src));
6108 6125 } else {
6109 6126 lea(rscratch1, src);
6110 6127 comisd(dst, Address(rscratch1, 0));
6111 6128 }
6112 6129 }
6113 6130
6114 6131 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
6115 6132 if (reachable(src)) {
6116 6133 comiss(dst, as_Address(src));
6117 6134 } else {
6118 6135 lea(rscratch1, src);
6119 6136 comiss(dst, Address(rscratch1, 0));
6120 6137 }
6121 6138 }
6122 6139
6123 6140
6124 6141 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
6125 6142 Condition negated_cond = negate_condition(cond);
6126 6143 Label L;
6127 6144 jcc(negated_cond, L);
6128 6145 atomic_incl(counter_addr);
6129 6146 bind(L);
6130 6147 }
6131 6148
6132 6149 int MacroAssembler::corrected_idivl(Register reg) {
6133 6150 // Full implementation of Java idiv and irem; checks for
6134 6151 // special case as described in JVM spec., p.243 & p.271.
6135 6152 // The function returns the (pc) offset of the idivl
6136 6153 // instruction - may be needed for implicit exceptions.
6137 6154 //
6138 6155 // normal case special case
6139 6156 //
6140 6157 // input : rax,: dividend min_int
6141 6158 // reg: divisor (may not be rax,/rdx) -1
6142 6159 //
6143 6160 // output: rax,: quotient (= rax, idiv reg) min_int
6144 6161 // rdx: remainder (= rax, irem reg) 0
6145 6162 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
6146 6163 const int min_int = 0x80000000;
6147 6164 Label normal_case, special_case;
6148 6165
6149 6166 // check for special case
6150 6167 cmpl(rax, min_int);
6151 6168 jcc(Assembler::notEqual, normal_case);
6152 6169 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
6153 6170 cmpl(reg, -1);
6154 6171 jcc(Assembler::equal, special_case);
6155 6172
6156 6173 // handle normal case
6157 6174 bind(normal_case);
6158 6175 cdql();
6159 6176 int idivl_offset = offset();
6160 6177 idivl(reg);
6161 6178
6162 6179 // normal and special case exit
6163 6180 bind(special_case);
6164 6181
6165 6182 return idivl_offset;
6166 6183 }
6167 6184
6168 6185
6169 6186
6170 6187 void MacroAssembler::decrementl(Register reg, int value) {
6171 6188 if (value == min_jint) {subl(reg, value) ; return; }
6172 6189 if (value < 0) { incrementl(reg, -value); return; }
6173 6190 if (value == 0) { ; return; }
6174 6191 if (value == 1 && UseIncDec) { decl(reg) ; return; }
6175 6192 /* else */ { subl(reg, value) ; return; }
6176 6193 }
6177 6194
6178 6195 void MacroAssembler::decrementl(Address dst, int value) {
6179 6196 if (value == min_jint) {subl(dst, value) ; return; }
6180 6197 if (value < 0) { incrementl(dst, -value); return; }
6181 6198 if (value == 0) { ; return; }
6182 6199 if (value == 1 && UseIncDec) { decl(dst) ; return; }
6183 6200 /* else */ { subl(dst, value) ; return; }
6184 6201 }
6185 6202
6186 6203 void MacroAssembler::division_with_shift (Register reg, int shift_value) {
6187 6204 assert (shift_value > 0, "illegal shift value");
6188 6205 Label _is_positive;
6189 6206 testl (reg, reg);
6190 6207 jcc (Assembler::positive, _is_positive);
6191 6208 int offset = (1 << shift_value) - 1 ;
6192 6209
6193 6210 if (offset == 1) {
6194 6211 incrementl(reg);
6195 6212 } else {
6196 6213 addl(reg, offset);
6197 6214 }
6198 6215
6199 6216 bind (_is_positive);
6200 6217 sarl(reg, shift_value);
6201 6218 }
6202 6219
6203 6220 // !defined(COMPILER2) is because of stupid core builds
6204 6221 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6205 6222 void MacroAssembler::empty_FPU_stack() {
6206 6223 if (VM_Version::supports_mmx()) {
6207 6224 emms();
6208 6225 } else {
6209 6226 for (int i = 8; i-- > 0; ) ffree(i);
6210 6227 }
6211 6228 }
6212 6229 #endif // !LP64 || C1 || !C2
6213 6230
6214 6231
6215 6232 // Defines obj, preserves var_size_in_bytes
6216 6233 void MacroAssembler::eden_allocate(Register obj,
6217 6234 Register var_size_in_bytes,
6218 6235 int con_size_in_bytes,
6219 6236 Register t1,
6220 6237 Label& slow_case) {
6221 6238 assert(obj == rax, "obj must be in rax, for cmpxchg");
6222 6239 assert_different_registers(obj, var_size_in_bytes, t1);
6223 6240 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6224 6241 jmp(slow_case);
6225 6242 } else {
6226 6243 Register end = t1;
6227 6244 Label retry;
6228 6245 bind(retry);
6229 6246 ExternalAddress heap_top((address) Universe::heap()->top_addr());
6230 6247 movptr(obj, heap_top);
6231 6248 if (var_size_in_bytes == noreg) {
6232 6249 lea(end, Address(obj, con_size_in_bytes));
6233 6250 } else {
6234 6251 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6235 6252 }
6236 6253 // if end < obj then we wrapped around => object too long => slow case
6237 6254 cmpptr(end, obj);
6238 6255 jcc(Assembler::below, slow_case);
6239 6256 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6240 6257 jcc(Assembler::above, slow_case);
6241 6258 // Compare obj with the top addr, and if still equal, store the new top addr in
6242 6259 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6243 6260 // it otherwise. Use lock prefix for atomicity on MPs.
6244 6261 locked_cmpxchgptr(end, heap_top);
6245 6262 jcc(Assembler::notEqual, retry);
6246 6263 }
6247 6264 }
6248 6265
6249 6266 void MacroAssembler::enter() {
6250 6267 push(rbp);
6251 6268 mov(rbp, rsp);
6252 6269 }
6253 6270
6254 6271 void MacroAssembler::fcmp(Register tmp) {
6255 6272 fcmp(tmp, 1, true, true);
6256 6273 }
6257 6274
6258 6275 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6259 6276 assert(!pop_right || pop_left, "usage error");
6260 6277 if (VM_Version::supports_cmov()) {
6261 6278 assert(tmp == noreg, "unneeded temp");
6262 6279 if (pop_left) {
6263 6280 fucomip(index);
6264 6281 } else {
6265 6282 fucomi(index);
6266 6283 }
6267 6284 if (pop_right) {
6268 6285 fpop();
6269 6286 }
6270 6287 } else {
6271 6288 assert(tmp != noreg, "need temp");
6272 6289 if (pop_left) {
6273 6290 if (pop_right) {
6274 6291 fcompp();
6275 6292 } else {
6276 6293 fcomp(index);
6277 6294 }
6278 6295 } else {
6279 6296 fcom(index);
6280 6297 }
6281 6298 // convert FPU condition into eflags condition via rax,
6282 6299 save_rax(tmp);
6283 6300 fwait(); fnstsw_ax();
6284 6301 sahf();
6285 6302 restore_rax(tmp);
6286 6303 }
6287 6304 // condition codes set as follows:
6288 6305 //
6289 6306 // CF (corresponds to C0) if x < y
6290 6307 // PF (corresponds to C2) if unordered
6291 6308 // ZF (corresponds to C3) if x = y
6292 6309 }
6293 6310
6294 6311 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6295 6312 fcmp2int(dst, unordered_is_less, 1, true, true);
6296 6313 }
6297 6314
6298 6315 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6299 6316 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6300 6317 Label L;
6301 6318 if (unordered_is_less) {
6302 6319 movl(dst, -1);
6303 6320 jcc(Assembler::parity, L);
6304 6321 jcc(Assembler::below , L);
6305 6322 movl(dst, 0);
6306 6323 jcc(Assembler::equal , L);
6307 6324 increment(dst);
6308 6325 } else { // unordered is greater
6309 6326 movl(dst, 1);
6310 6327 jcc(Assembler::parity, L);
6311 6328 jcc(Assembler::above , L);
6312 6329 movl(dst, 0);
6313 6330 jcc(Assembler::equal , L);
6314 6331 decrementl(dst);
6315 6332 }
6316 6333 bind(L);
6317 6334 }
6318 6335
6319 6336 void MacroAssembler::fld_d(AddressLiteral src) {
6320 6337 fld_d(as_Address(src));
6321 6338 }
6322 6339
6323 6340 void MacroAssembler::fld_s(AddressLiteral src) {
6324 6341 fld_s(as_Address(src));
6325 6342 }
6326 6343
6327 6344 void MacroAssembler::fld_x(AddressLiteral src) {
6328 6345 Assembler::fld_x(as_Address(src));
6329 6346 }
6330 6347
6331 6348 void MacroAssembler::fldcw(AddressLiteral src) {
6332 6349 Assembler::fldcw(as_Address(src));
6333 6350 }
6334 6351
6335 6352 void MacroAssembler::fpop() {
6336 6353 ffree();
6337 6354 fincstp();
6338 6355 }
6339 6356
6340 6357 void MacroAssembler::fremr(Register tmp) {
6341 6358 save_rax(tmp);
6342 6359 { Label L;
6343 6360 bind(L);
6344 6361 fprem();
6345 6362 fwait(); fnstsw_ax();
6346 6363 #ifdef _LP64
6347 6364 testl(rax, 0x400);
6348 6365 jcc(Assembler::notEqual, L);
6349 6366 #else
6350 6367 sahf();
6351 6368 jcc(Assembler::parity, L);
6352 6369 #endif // _LP64
6353 6370 }
6354 6371 restore_rax(tmp);
6355 6372 // Result is in ST0.
6356 6373 // Note: fxch & fpop to get rid of ST1
6357 6374 // (otherwise FPU stack could overflow eventually)
6358 6375 fxch(1);
6359 6376 fpop();
6360 6377 }
6361 6378
6362 6379
6363 6380 void MacroAssembler::incrementl(AddressLiteral dst) {
6364 6381 if (reachable(dst)) {
6365 6382 incrementl(as_Address(dst));
6366 6383 } else {
6367 6384 lea(rscratch1, dst);
6368 6385 incrementl(Address(rscratch1, 0));
6369 6386 }
6370 6387 }
6371 6388
6372 6389 void MacroAssembler::incrementl(ArrayAddress dst) {
6373 6390 incrementl(as_Address(dst));
6374 6391 }
6375 6392
6376 6393 void MacroAssembler::incrementl(Register reg, int value) {
6377 6394 if (value == min_jint) {addl(reg, value) ; return; }
6378 6395 if (value < 0) { decrementl(reg, -value); return; }
6379 6396 if (value == 0) { ; return; }
6380 6397 if (value == 1 && UseIncDec) { incl(reg) ; return; }
6381 6398 /* else */ { addl(reg, value) ; return; }
6382 6399 }
6383 6400
6384 6401 void MacroAssembler::incrementl(Address dst, int value) {
6385 6402 if (value == min_jint) {addl(dst, value) ; return; }
6386 6403 if (value < 0) { decrementl(dst, -value); return; }
6387 6404 if (value == 0) { ; return; }
6388 6405 if (value == 1 && UseIncDec) { incl(dst) ; return; }
6389 6406 /* else */ { addl(dst, value) ; return; }
6390 6407 }
6391 6408
6392 6409 void MacroAssembler::jump(AddressLiteral dst) {
6393 6410 if (reachable(dst)) {
6394 6411 jmp_literal(dst.target(), dst.rspec());
6395 6412 } else {
6396 6413 lea(rscratch1, dst);
6397 6414 jmp(rscratch1);
6398 6415 }
6399 6416 }
6400 6417
6401 6418 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6402 6419 if (reachable(dst)) {
6403 6420 InstructionMark im(this);
6404 6421 relocate(dst.reloc());
6405 6422 const int short_size = 2;
6406 6423 const int long_size = 6;
6407 6424 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6408 6425 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6409 6426 // 0111 tttn #8-bit disp
6410 6427 emit_byte(0x70 | cc);
6411 6428 emit_byte((offs - short_size) & 0xFF);
6412 6429 } else {
6413 6430 // 0000 1111 1000 tttn #32-bit disp
6414 6431 emit_byte(0x0F);
6415 6432 emit_byte(0x80 | cc);
6416 6433 emit_long(offs - long_size);
6417 6434 }
6418 6435 } else {
6419 6436 #ifdef ASSERT
6420 6437 warning("reversing conditional branch");
6421 6438 #endif /* ASSERT */
6422 6439 Label skip;
6423 6440 jccb(reverse[cc], skip);
6424 6441 lea(rscratch1, dst);
6425 6442 Assembler::jmp(rscratch1);
6426 6443 bind(skip);
6427 6444 }
6428 6445 }
6429 6446
6430 6447 void MacroAssembler::ldmxcsr(AddressLiteral src) {
6431 6448 if (reachable(src)) {
6432 6449 Assembler::ldmxcsr(as_Address(src));
6433 6450 } else {
6434 6451 lea(rscratch1, src);
6435 6452 Assembler::ldmxcsr(Address(rscratch1, 0));
6436 6453 }
6437 6454 }
6438 6455
6439 6456 int MacroAssembler::load_signed_byte(Register dst, Address src) {
6440 6457 int off;
6441 6458 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6442 6459 off = offset();
6443 6460 movsbl(dst, src); // movsxb
6444 6461 } else {
6445 6462 off = load_unsigned_byte(dst, src);
6446 6463 shll(dst, 24);
6447 6464 sarl(dst, 24);
6448 6465 }
6449 6466 return off;
6450 6467 }
6451 6468
6452 6469 // Note: load_signed_short used to be called load_signed_word.
6453 6470 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler
6454 6471 // manual, which means 16 bits, that usage is found nowhere in HotSpot code.
6455 6472 // The term "word" in HotSpot means a 32- or 64-bit machine word.
6456 6473 int MacroAssembler::load_signed_short(Register dst, Address src) {
6457 6474 int off;
6458 6475 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6459 6476 // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6460 6477 // version but this is what 64bit has always done. This seems to imply
6461 6478 // that users are only using 32bits worth.
6462 6479 off = offset();
6463 6480 movswl(dst, src); // movsxw
6464 6481 } else {
6465 6482 off = load_unsigned_short(dst, src);
6466 6483 shll(dst, 16);
6467 6484 sarl(dst, 16);
6468 6485 }
6469 6486 return off;
6470 6487 }
6471 6488
6472 6489 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6473 6490 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6474 6491 // and "3.9 Partial Register Penalties", p. 22).
6475 6492 int off;
6476 6493 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6477 6494 off = offset();
6478 6495 movzbl(dst, src); // movzxb
6479 6496 } else {
6480 6497 xorl(dst, dst);
6481 6498 off = offset();
6482 6499 movb(dst, src);
6483 6500 }
6484 6501 return off;
6485 6502 }
6486 6503
6487 6504 // Note: load_unsigned_short used to be called load_unsigned_word.
6488 6505 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
6489 6506 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6490 6507 // and "3.9 Partial Register Penalties", p. 22).
6491 6508 int off;
6492 6509 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6493 6510 off = offset();
6494 6511 movzwl(dst, src); // movzxw
6495 6512 } else {
6496 6513 xorl(dst, dst);
6497 6514 off = offset();
6498 6515 movw(dst, src);
6499 6516 }
6500 6517 return off;
6501 6518 }
6502 6519
6503 6520 void MacroAssembler::load_sized_value(Register dst, Address src,
6504 6521 size_t size_in_bytes, bool is_signed) {
6505 6522 switch (size_in_bytes) {
6506 6523 #ifndef _LP64
6507 6524 // For case 8, caller is responsible for manually loading
6508 6525 // the second word into another register.
6509 6526 case 8: movl(dst, src); break;
6510 6527 #else
6511 6528 case 8: movq(dst, src); break;
6512 6529 #endif
6513 6530 case 4: movl(dst, src); break;
6514 6531 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
6515 6532 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
6516 6533 default: ShouldNotReachHere();
6517 6534 }
6518 6535 }
6519 6536
6520 6537 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6521 6538 if (reachable(dst)) {
6522 6539 movl(as_Address(dst), src);
6523 6540 } else {
6524 6541 lea(rscratch1, dst);
6525 6542 movl(Address(rscratch1, 0), src);
6526 6543 }
6527 6544 }
6528 6545
6529 6546 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6530 6547 if (reachable(src)) {
6531 6548 movl(dst, as_Address(src));
6532 6549 } else {
6533 6550 lea(rscratch1, src);
6534 6551 movl(dst, Address(rscratch1, 0));
6535 6552 }
6536 6553 }
6537 6554
6538 6555 // C++ bool manipulation
6539 6556
6540 6557 void MacroAssembler::movbool(Register dst, Address src) {
6541 6558 if(sizeof(bool) == 1)
6542 6559 movb(dst, src);
6543 6560 else if(sizeof(bool) == 2)
6544 6561 movw(dst, src);
6545 6562 else if(sizeof(bool) == 4)
6546 6563 movl(dst, src);
6547 6564 else
6548 6565 // unsupported
6549 6566 ShouldNotReachHere();
6550 6567 }
6551 6568
6552 6569 void MacroAssembler::movbool(Address dst, bool boolconst) {
6553 6570 if(sizeof(bool) == 1)
6554 6571 movb(dst, (int) boolconst);
6555 6572 else if(sizeof(bool) == 2)
6556 6573 movw(dst, (int) boolconst);
6557 6574 else if(sizeof(bool) == 4)
6558 6575 movl(dst, (int) boolconst);
6559 6576 else
6560 6577 // unsupported
6561 6578 ShouldNotReachHere();
6562 6579 }
6563 6580
6564 6581 void MacroAssembler::movbool(Address dst, Register src) {
6565 6582 if(sizeof(bool) == 1)
6566 6583 movb(dst, src);
6567 6584 else if(sizeof(bool) == 2)
6568 6585 movw(dst, src);
6569 6586 else if(sizeof(bool) == 4)
6570 6587 movl(dst, src);
6571 6588 else
6572 6589 // unsupported
6573 6590 ShouldNotReachHere();
6574 6591 }
6575 6592
6576 6593 void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6577 6594 movb(as_Address(dst), src);
6578 6595 }
6579 6596
6580 6597 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6581 6598 if (reachable(src)) {
6582 6599 if (UseXmmLoadAndClearUpper) {
6583 6600 movsd (dst, as_Address(src));
6584 6601 } else {
6585 6602 movlpd(dst, as_Address(src));
6586 6603 }
6587 6604 } else {
6588 6605 lea(rscratch1, src);
6589 6606 if (UseXmmLoadAndClearUpper) {
6590 6607 movsd (dst, Address(rscratch1, 0));
6591 6608 } else {
6592 6609 movlpd(dst, Address(rscratch1, 0));
6593 6610 }
6594 6611 }
6595 6612 }
6596 6613
6597 6614 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6598 6615 if (reachable(src)) {
6599 6616 movss(dst, as_Address(src));
6600 6617 } else {
6601 6618 lea(rscratch1, src);
6602 6619 movss(dst, Address(rscratch1, 0));
6603 6620 }
6604 6621 }
6605 6622
6606 6623 void MacroAssembler::movptr(Register dst, Register src) {
6607 6624 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6608 6625 }
6609 6626
6610 6627 void MacroAssembler::movptr(Register dst, Address src) {
6611 6628 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6612 6629 }
6613 6630
6614 6631 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
6615 6632 void MacroAssembler::movptr(Register dst, intptr_t src) {
6616 6633 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6617 6634 }
6618 6635
6619 6636 void MacroAssembler::movptr(Address dst, Register src) {
6620 6637 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6621 6638 }
6622 6639
6623 6640 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6624 6641 if (reachable(src)) {
6625 6642 movss(dst, as_Address(src));
6626 6643 } else {
6627 6644 lea(rscratch1, src);
6628 6645 movss(dst, Address(rscratch1, 0));
6629 6646 }
6630 6647 }
6631 6648
6632 6649 void MacroAssembler::null_check(Register reg, int offset) {
6633 6650 if (needs_explicit_null_check(offset)) {
6634 6651 // provoke OS NULL exception if reg = NULL by
6635 6652 // accessing M[reg] w/o changing any (non-CC) registers
6636 6653 // NOTE: cmpl is plenty here to provoke a segv
6637 6654 cmpptr(rax, Address(reg, 0));
6638 6655 // Note: should probably use testl(rax, Address(reg, 0));
6639 6656 // may be shorter code (however, this version of
6640 6657 // testl needs to be implemented first)
6641 6658 } else {
6642 6659 // nothing to do, (later) access of M[reg + offset]
6643 6660 // will provoke OS NULL exception if reg = NULL
6644 6661 }
6645 6662 }
6646 6663
6647 6664 void MacroAssembler::os_breakpoint() {
6648 6665 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6649 6666 // (e.g., MSVC can't call ps() otherwise)
6650 6667 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6651 6668 }
6652 6669
6653 6670 void MacroAssembler::pop_CPU_state() {
6654 6671 pop_FPU_state();
6655 6672 pop_IU_state();
6656 6673 }
6657 6674
6658 6675 void MacroAssembler::pop_FPU_state() {
6659 6676 NOT_LP64(frstor(Address(rsp, 0));)
6660 6677 LP64_ONLY(fxrstor(Address(rsp, 0));)
6661 6678 addptr(rsp, FPUStateSizeInWords * wordSize);
6662 6679 }
6663 6680
6664 6681 void MacroAssembler::pop_IU_state() {
6665 6682 popa();
6666 6683 LP64_ONLY(addq(rsp, 8));
6667 6684 popf();
6668 6685 }
6669 6686
6670 6687 // Save Integer and Float state
6671 6688 // Warning: Stack must be 16 byte aligned (64bit)
6672 6689 void MacroAssembler::push_CPU_state() {
6673 6690 push_IU_state();
6674 6691 push_FPU_state();
6675 6692 }
6676 6693
6677 6694 void MacroAssembler::push_FPU_state() {
6678 6695 subptr(rsp, FPUStateSizeInWords * wordSize);
6679 6696 #ifndef _LP64
6680 6697 fnsave(Address(rsp, 0));
6681 6698 fwait();
6682 6699 #else
6683 6700 fxsave(Address(rsp, 0));
6684 6701 #endif // LP64
6685 6702 }
6686 6703
6687 6704 void MacroAssembler::push_IU_state() {
6688 6705 // Push flags first because pusha kills them
6689 6706 pushf();
6690 6707 // Make sure rsp stays 16-byte aligned
6691 6708 LP64_ONLY(subq(rsp, 8));
6692 6709 pusha();
6693 6710 }
6694 6711
6695 6712 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6696 6713 // determine java_thread register
6697 6714 if (!java_thread->is_valid()) {
6698 6715 java_thread = rdi;
6699 6716 get_thread(java_thread);
6700 6717 }
6701 6718 // we must set sp to zero to clear frame
6702 6719 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
6703 6720 if (clear_fp) {
6704 6721 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
6705 6722 }
6706 6723
6707 6724 if (clear_pc)
6708 6725 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
6709 6726
6710 6727 }
6711 6728
6712 6729 void MacroAssembler::restore_rax(Register tmp) {
6713 6730 if (tmp == noreg) pop(rax);
6714 6731 else if (tmp != rax) mov(rax, tmp);
6715 6732 }
6716 6733
6717 6734 void MacroAssembler::round_to(Register reg, int modulus) {
6718 6735 addptr(reg, modulus - 1);
6719 6736 andptr(reg, -modulus);
6720 6737 }
6721 6738
6722 6739 void MacroAssembler::save_rax(Register tmp) {
6723 6740 if (tmp == noreg) push(rax);
6724 6741 else if (tmp != rax) mov(tmp, rax);
6725 6742 }
6726 6743
6727 6744 // Write serialization page so VM thread can do a pseudo remote membar.
6728 6745 // We use the current thread pointer to calculate a thread specific
6729 6746 // offset to write to within the page. This minimizes bus traffic
6730 6747 // due to cache line collision.
6731 6748 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6732 6749 movl(tmp, thread);
6733 6750 shrl(tmp, os::get_serialize_page_shift_count());
6734 6751 andl(tmp, (os::vm_page_size() - sizeof(int)));
6735 6752
6736 6753 Address index(noreg, tmp, Address::times_1);
6737 6754 ExternalAddress page(os::get_memory_serialize_page());
6738 6755
6739 6756 // Size of store must match masking code above
6740 6757 movl(as_Address(ArrayAddress(page, index)), tmp);
6741 6758 }
6742 6759
6743 6760 // Calls to C land
6744 6761 //
6745 6762 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6746 6763 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6747 6764 // has to be reset to 0. This is required to allow proper stack traversal.
6748 6765 void MacroAssembler::set_last_Java_frame(Register java_thread,
6749 6766 Register last_java_sp,
6750 6767 Register last_java_fp,
6751 6768 address last_java_pc) {
6752 6769 // determine java_thread register
6753 6770 if (!java_thread->is_valid()) {
6754 6771 java_thread = rdi;
6755 6772 get_thread(java_thread);
6756 6773 }
6757 6774 // determine last_java_sp register
6758 6775 if (!last_java_sp->is_valid()) {
6759 6776 last_java_sp = rsp;
6760 6777 }
6761 6778
6762 6779 // last_java_fp is optional
6763 6780
6764 6781 if (last_java_fp->is_valid()) {
6765 6782 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6766 6783 }
6767 6784
6768 6785 // last_java_pc is optional
6769 6786
6770 6787 if (last_java_pc != NULL) {
6771 6788 lea(Address(java_thread,
6772 6789 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6773 6790 InternalAddress(last_java_pc));
6774 6791
6775 6792 }
6776 6793 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6777 6794 }
6778 6795
6779 6796 void MacroAssembler::shlptr(Register dst, int imm8) {
6780 6797 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6781 6798 }
6782 6799
6783 6800 void MacroAssembler::shrptr(Register dst, int imm8) {
6784 6801 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6785 6802 }
6786 6803
6787 6804 void MacroAssembler::sign_extend_byte(Register reg) {
6788 6805 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6789 6806 movsbl(reg, reg); // movsxb
6790 6807 } else {
6791 6808 shll(reg, 24);
6792 6809 sarl(reg, 24);
6793 6810 }
6794 6811 }
6795 6812
6796 6813 void MacroAssembler::sign_extend_short(Register reg) {
6797 6814 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6798 6815 movswl(reg, reg); // movsxw
6799 6816 } else {
6800 6817 shll(reg, 16);
6801 6818 sarl(reg, 16);
6802 6819 }
6803 6820 }
6804 6821
6805 6822 //////////////////////////////////////////////////////////////////////////////////
6806 6823 #ifndef SERIALGC
6807 6824
6808 6825 void MacroAssembler::g1_write_barrier_pre(Register obj,
6809 6826 #ifndef _LP64
6810 6827 Register thread,
6811 6828 #endif
6812 6829 Register tmp,
6813 6830 Register tmp2,
6814 6831 bool tosca_live) {
6815 6832 LP64_ONLY(Register thread = r15_thread;)
6816 6833 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6817 6834 PtrQueue::byte_offset_of_active()));
6818 6835
6819 6836 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6820 6837 PtrQueue::byte_offset_of_index()));
6821 6838 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6822 6839 PtrQueue::byte_offset_of_buf()));
6823 6840
6824 6841
6825 6842 Label done;
6826 6843 Label runtime;
6827 6844
6828 6845 // if (!marking_in_progress) goto done;
6829 6846 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
6830 6847 cmpl(in_progress, 0);
6831 6848 } else {
6832 6849 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
6833 6850 cmpb(in_progress, 0);
6834 6851 }
6835 6852 jcc(Assembler::equal, done);
6836 6853
6837 6854 // if (x.f == NULL) goto done;
6838 6855 #ifdef _LP64
6839 6856 load_heap_oop(tmp2, Address(obj, 0));
6840 6857 #else
6841 6858 movptr(tmp2, Address(obj, 0));
6842 6859 #endif
6843 6860 cmpptr(tmp2, (int32_t) NULL_WORD);
6844 6861 jcc(Assembler::equal, done);
6845 6862
6846 6863 // Can we store original value in the thread's buffer?
6847 6864
6848 6865 #ifdef _LP64
6849 6866 movslq(tmp, index);
6850 6867 cmpq(tmp, 0);
6851 6868 #else
6852 6869 cmpl(index, 0);
6853 6870 #endif
6854 6871 jcc(Assembler::equal, runtime);
6855 6872 #ifdef _LP64
6856 6873 subq(tmp, wordSize);
6857 6874 movl(index, tmp);
6858 6875 addq(tmp, buffer);
6859 6876 #else
6860 6877 subl(index, wordSize);
6861 6878 movl(tmp, buffer);
6862 6879 addl(tmp, index);
6863 6880 #endif
6864 6881 movptr(Address(tmp, 0), tmp2);
6865 6882 jmp(done);
6866 6883 bind(runtime);
6867 6884 // save the live input values
6868 6885 if(tosca_live) push(rax);
6869 6886 push(obj);
6870 6887 #ifdef _LP64
6871 6888 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread);
6872 6889 #else
6873 6890 push(thread);
6874 6891 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
6875 6892 pop(thread);
6876 6893 #endif
6877 6894 pop(obj);
6878 6895 if(tosca_live) pop(rax);
6879 6896 bind(done);
6880 6897
6881 6898 }
6882 6899
6883 6900 void MacroAssembler::g1_write_barrier_post(Register store_addr,
6884 6901 Register new_val,
6885 6902 #ifndef _LP64
6886 6903 Register thread,
6887 6904 #endif
6888 6905 Register tmp,
6889 6906 Register tmp2) {
6890 6907
6891 6908 LP64_ONLY(Register thread = r15_thread;)
6892 6909 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6893 6910 PtrQueue::byte_offset_of_index()));
6894 6911 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6895 6912 PtrQueue::byte_offset_of_buf()));
6896 6913 BarrierSet* bs = Universe::heap()->barrier_set();
6897 6914 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6898 6915 Label done;
6899 6916 Label runtime;
6900 6917
6901 6918 // Does store cross heap regions?
6902 6919
6903 6920 movptr(tmp, store_addr);
6904 6921 xorptr(tmp, new_val);
6905 6922 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
6906 6923 jcc(Assembler::equal, done);
6907 6924
6908 6925 // crosses regions, storing NULL?
6909 6926
6910 6927 cmpptr(new_val, (int32_t) NULL_WORD);
6911 6928 jcc(Assembler::equal, done);
6912 6929
6913 6930 // storing region crossing non-NULL, is card already dirty?
6914 6931
6915 6932 ExternalAddress cardtable((address) ct->byte_map_base);
6916 6933 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6917 6934 #ifdef _LP64
6918 6935 const Register card_addr = tmp;
6919 6936
6920 6937 movq(card_addr, store_addr);
6921 6938 shrq(card_addr, CardTableModRefBS::card_shift);
6922 6939
6923 6940 lea(tmp2, cardtable);
6924 6941
6925 6942 // get the address of the card
6926 6943 addq(card_addr, tmp2);
6927 6944 #else
6928 6945 const Register card_index = tmp;
6929 6946
6930 6947 movl(card_index, store_addr);
6931 6948 shrl(card_index, CardTableModRefBS::card_shift);
6932 6949
6933 6950 Address index(noreg, card_index, Address::times_1);
6934 6951 const Register card_addr = tmp;
6935 6952 lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
6936 6953 #endif
6937 6954 cmpb(Address(card_addr, 0), 0);
6938 6955 jcc(Assembler::equal, done);
6939 6956
6940 6957 // storing a region crossing, non-NULL oop, card is clean.
6941 6958 // dirty card and log.
6942 6959
6943 6960 movb(Address(card_addr, 0), 0);
6944 6961
6945 6962 cmpl(queue_index, 0);
6946 6963 jcc(Assembler::equal, runtime);
6947 6964 subl(queue_index, wordSize);
6948 6965 movptr(tmp2, buffer);
6949 6966 #ifdef _LP64
6950 6967 movslq(rscratch1, queue_index);
6951 6968 addq(tmp2, rscratch1);
6952 6969 movq(Address(tmp2, 0), card_addr);
6953 6970 #else
6954 6971 addl(tmp2, queue_index);
6955 6972 movl(Address(tmp2, 0), card_index);
6956 6973 #endif
6957 6974 jmp(done);
6958 6975
6959 6976 bind(runtime);
6960 6977 // save the live input values
6961 6978 push(store_addr);
6962 6979 push(new_val);
6963 6980 #ifdef _LP64
6964 6981 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
6965 6982 #else
6966 6983 push(thread);
6967 6984 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
6968 6985 pop(thread);
6969 6986 #endif
6970 6987 pop(new_val);
6971 6988 pop(store_addr);
6972 6989
6973 6990 bind(done);
6974 6991
6975 6992 }
6976 6993
6977 6994 #endif // SERIALGC
6978 6995 //////////////////////////////////////////////////////////////////////////////////
6979 6996
6980 6997
6981 6998 void MacroAssembler::store_check(Register obj) {
6982 6999 // Does a store check for the oop in register obj. The content of
6983 7000 // register obj is destroyed afterwards.
6984 7001 store_check_part_1(obj);
6985 7002 store_check_part_2(obj);
6986 7003 }
6987 7004
6988 7005 void MacroAssembler::store_check(Register obj, Address dst) {
6989 7006 store_check(obj);
6990 7007 }
6991 7008
6992 7009
6993 7010 // split the store check operation so that other instructions can be scheduled inbetween
6994 7011 void MacroAssembler::store_check_part_1(Register obj) {
6995 7012 BarrierSet* bs = Universe::heap()->barrier_set();
6996 7013 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6997 7014 shrptr(obj, CardTableModRefBS::card_shift);
6998 7015 }
6999 7016
7000 7017 void MacroAssembler::store_check_part_2(Register obj) {
7001 7018 BarrierSet* bs = Universe::heap()->barrier_set();
7002 7019 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
7003 7020 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
7004 7021 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
7005 7022
7006 7023 // The calculation for byte_map_base is as follows:
7007 7024 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
7008 7025 // So this essentially converts an address to a displacement and
7009 7026 // it will never need to be relocated. On 64bit however the value may be too
7010 7027 // large for a 32bit displacement
7011 7028
7012 7029 intptr_t disp = (intptr_t) ct->byte_map_base;
7013 7030 if (is_simm32(disp)) {
7014 7031 Address cardtable(noreg, obj, Address::times_1, disp);
7015 7032 movb(cardtable, 0);
7016 7033 } else {
7017 7034 // By doing it as an ExternalAddress disp could be converted to a rip-relative
7018 7035 // displacement and done in a single instruction given favorable mapping and
7019 7036 // a smarter version of as_Address. Worst case it is two instructions which
7020 7037 // is no worse off then loading disp into a register and doing as a simple
7021 7038 // Address() as above.
7022 7039 // We can't do as ExternalAddress as the only style since if disp == 0 we'll
7023 7040 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
7024 7041 // in some cases we'll get a single instruction version.
7025 7042
7026 7043 ExternalAddress cardtable((address)disp);
7027 7044 Address index(noreg, obj, Address::times_1);
7028 7045 movb(as_Address(ArrayAddress(cardtable, index)), 0);
7029 7046 }
7030 7047 }
7031 7048
7032 7049 void MacroAssembler::subptr(Register dst, int32_t imm32) {
7033 7050 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
7034 7051 }
7035 7052
7036 7053 void MacroAssembler::subptr(Register dst, Register src) {
7037 7054 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
7038 7055 }
7039 7056
7040 7057 void MacroAssembler::test32(Register src1, AddressLiteral src2) {
7041 7058 // src2 must be rval
7042 7059
7043 7060 if (reachable(src2)) {
7044 7061 testl(src1, as_Address(src2));
7045 7062 } else {
7046 7063 lea(rscratch1, src2);
7047 7064 testl(src1, Address(rscratch1, 0));
7048 7065 }
7049 7066 }
7050 7067
7051 7068 // C++ bool manipulation
7052 7069 void MacroAssembler::testbool(Register dst) {
7053 7070 if(sizeof(bool) == 1)
7054 7071 testb(dst, 0xff);
7055 7072 else if(sizeof(bool) == 2) {
7056 7073 // testw implementation needed for two byte bools
7057 7074 ShouldNotReachHere();
7058 7075 } else if(sizeof(bool) == 4)
7059 7076 testl(dst, dst);
7060 7077 else
7061 7078 // unsupported
7062 7079 ShouldNotReachHere();
7063 7080 }
7064 7081
7065 7082 void MacroAssembler::testptr(Register dst, Register src) {
7066 7083 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
7067 7084 }
7068 7085
7069 7086 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
7070 7087 void MacroAssembler::tlab_allocate(Register obj,
7071 7088 Register var_size_in_bytes,
7072 7089 int con_size_in_bytes,
7073 7090 Register t1,
7074 7091 Register t2,
7075 7092 Label& slow_case) {
7076 7093 assert_different_registers(obj, t1, t2);
7077 7094 assert_different_registers(obj, var_size_in_bytes, t1);
7078 7095 Register end = t2;
7079 7096 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
7080 7097
7081 7098 verify_tlab();
7082 7099
7083 7100 NOT_LP64(get_thread(thread));
7084 7101
7085 7102 movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
7086 7103 if (var_size_in_bytes == noreg) {
7087 7104 lea(end, Address(obj, con_size_in_bytes));
7088 7105 } else {
7089 7106 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
7090 7107 }
7091 7108 cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
7092 7109 jcc(Assembler::above, slow_case);
7093 7110
7094 7111 // update the tlab top pointer
7095 7112 movptr(Address(thread, JavaThread::tlab_top_offset()), end);
7096 7113
7097 7114 // recover var_size_in_bytes if necessary
7098 7115 if (var_size_in_bytes == end) {
7099 7116 subptr(var_size_in_bytes, obj);
7100 7117 }
7101 7118 verify_tlab();
7102 7119 }
7103 7120
7104 7121 // Preserves rbx, and rdx.
7105 7122 void MacroAssembler::tlab_refill(Label& retry,
7106 7123 Label& try_eden,
7107 7124 Label& slow_case) {
7108 7125 Register top = rax;
7109 7126 Register t1 = rcx;
7110 7127 Register t2 = rsi;
7111 7128 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
7112 7129 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
7113 7130 Label do_refill, discard_tlab;
7114 7131
7115 7132 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
7116 7133 // No allocation in the shared eden.
7117 7134 jmp(slow_case);
7118 7135 }
7119 7136
7120 7137 NOT_LP64(get_thread(thread_reg));
7121 7138
7122 7139 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7123 7140 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7124 7141
7125 7142 // calculate amount of free space
7126 7143 subptr(t1, top);
7127 7144 shrptr(t1, LogHeapWordSize);
7128 7145
7129 7146 // Retain tlab and allocate object in shared space if
7130 7147 // the amount free in the tlab is too large to discard.
7131 7148 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
7132 7149 jcc(Assembler::lessEqual, discard_tlab);
7133 7150
7134 7151 // Retain
7135 7152 // %%% yuck as movptr...
7136 7153 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
7137 7154 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
7138 7155 if (TLABStats) {
7139 7156 // increment number of slow_allocations
7140 7157 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
7141 7158 }
7142 7159 jmp(try_eden);
7143 7160
7144 7161 bind(discard_tlab);
7145 7162 if (TLABStats) {
7146 7163 // increment number of refills
7147 7164 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
7148 7165 // accumulate wastage -- t1 is amount free in tlab
7149 7166 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
7150 7167 }
7151 7168
7152 7169 // if tlab is currently allocated (top or end != null) then
7153 7170 // fill [top, end + alignment_reserve) with array object
7154 7171 testptr (top, top);
7155 7172 jcc(Assembler::zero, do_refill);
7156 7173
7157 7174 // set up the mark word
7158 7175 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
7159 7176 // set the length to the remaining space
7160 7177 subptr(t1, typeArrayOopDesc::header_size(T_INT));
7161 7178 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
7162 7179 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
7163 7180 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
7164 7181 // set klass to intArrayKlass
7165 7182 // dubious reloc why not an oop reloc?
7166 7183 movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
7167 7184 // store klass last. concurrent gcs assumes klass length is valid if
7168 7185 // klass field is not null.
7169 7186 store_klass(top, t1);
7170 7187
7171 7188 // refill the tlab with an eden allocation
7172 7189 bind(do_refill);
7173 7190 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7174 7191 shlptr(t1, LogHeapWordSize);
7175 7192 // add object_size ??
7176 7193 eden_allocate(top, t1, 0, t2, slow_case);
7177 7194
7178 7195 // Check that t1 was preserved in eden_allocate.
7179 7196 #ifdef ASSERT
7180 7197 if (UseTLAB) {
7181 7198 Label ok;
7182 7199 Register tsize = rsi;
7183 7200 assert_different_registers(tsize, thread_reg, t1);
7184 7201 push(tsize);
7185 7202 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7186 7203 shlptr(tsize, LogHeapWordSize);
7187 7204 cmpptr(t1, tsize);
7188 7205 jcc(Assembler::equal, ok);
7189 7206 stop("assert(t1 != tlab size)");
7190 7207 should_not_reach_here();
7191 7208
7192 7209 bind(ok);
7193 7210 pop(tsize);
7194 7211 }
7195 7212 #endif
7196 7213 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
7197 7214 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
7198 7215 addptr(top, t1);
7199 7216 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
7200 7217 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
7201 7218 verify_tlab();
7202 7219 jmp(retry);
7203 7220 }
7204 7221
7205 7222 static const double pi_4 = 0.7853981633974483;
7206 7223
7207 7224 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
7208 7225 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
7209 7226 // was attempted in this code; unfortunately it appears that the
7210 7227 // switch to 80-bit precision and back causes this to be
7211 7228 // unprofitable compared with simply performing a runtime call if
7212 7229 // the argument is out of the (-pi/4, pi/4) range.
7213 7230
7214 7231 Register tmp = noreg;
7215 7232 if (!VM_Version::supports_cmov()) {
7216 7233 // fcmp needs a temporary so preserve rbx,
7217 7234 tmp = rbx;
7218 7235 push(tmp);
7219 7236 }
7220 7237
7221 7238 Label slow_case, done;
7222 7239
7223 7240 ExternalAddress pi4_adr = (address)&pi_4;
7224 7241 if (reachable(pi4_adr)) {
7225 7242 // x ?<= pi/4
7226 7243 fld_d(pi4_adr);
7227 7244 fld_s(1); // Stack: X PI/4 X
7228 7245 fabs(); // Stack: |X| PI/4 X
7229 7246 fcmp(tmp);
7230 7247 jcc(Assembler::above, slow_case);
7231 7248
7232 7249 // fastest case: -pi/4 <= x <= pi/4
7233 7250 switch(trig) {
7234 7251 case 's':
7235 7252 fsin();
7236 7253 break;
7237 7254 case 'c':
7238 7255 fcos();
7239 7256 break;
7240 7257 case 't':
7241 7258 ftan();
7242 7259 break;
7243 7260 default:
7244 7261 assert(false, "bad intrinsic");
7245 7262 break;
7246 7263 }
7247 7264 jmp(done);
7248 7265 }
7249 7266
7250 7267 // slow case: runtime call
7251 7268 bind(slow_case);
7252 7269 // Preserve registers across runtime call
7253 7270 pusha();
7254 7271 int incoming_argument_and_return_value_offset = -1;
7255 7272 if (num_fpu_regs_in_use > 1) {
7256 7273 // Must preserve all other FPU regs (could alternatively convert
7257 7274 // SharedRuntime::dsin and dcos into assembly routines known not to trash
7258 7275 // FPU state, but can not trust C compiler)
7259 7276 NEEDS_CLEANUP;
7260 7277 // NOTE that in this case we also push the incoming argument to
7261 7278 // the stack and restore it later; we also use this stack slot to
7262 7279 // hold the return value from dsin or dcos.
7263 7280 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7264 7281 subptr(rsp, sizeof(jdouble));
7265 7282 fstp_d(Address(rsp, 0));
7266 7283 }
7267 7284 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
7268 7285 fld_d(Address(rsp, incoming_argument_and_return_value_offset));
7269 7286 }
7270 7287 subptr(rsp, sizeof(jdouble));
7271 7288 fstp_d(Address(rsp, 0));
7272 7289 #ifdef _LP64
7273 7290 movdbl(xmm0, Address(rsp, 0));
7274 7291 #endif // _LP64
7275 7292
7276 7293 // NOTE: we must not use call_VM_leaf here because that requires a
7277 7294 // complete interpreter frame in debug mode -- same bug as 4387334
7278 7295 // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7279 7296 // do proper 64bit abi
7280 7297
7281 7298 NEEDS_CLEANUP;
7282 7299 // Need to add stack banging before this runtime call if it needs to
7283 7300 // be taken; however, there is no generic stack banging routine at
7284 7301 // the MacroAssembler level
7285 7302 switch(trig) {
7286 7303 case 's':
7287 7304 {
7288 7305 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7289 7306 }
7290 7307 break;
7291 7308 case 'c':
7292 7309 {
7293 7310 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7294 7311 }
7295 7312 break;
7296 7313 case 't':
7297 7314 {
7298 7315 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7299 7316 }
7300 7317 break;
7301 7318 default:
7302 7319 assert(false, "bad intrinsic");
7303 7320 break;
7304 7321 }
7305 7322 #ifdef _LP64
7306 7323 movsd(Address(rsp, 0), xmm0);
7307 7324 fld_d(Address(rsp, 0));
7308 7325 #endif // _LP64
7309 7326 addptr(rsp, sizeof(jdouble));
7310 7327 if (num_fpu_regs_in_use > 1) {
7311 7328 // Must save return value to stack and then restore entire FPU stack
7312 7329 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7313 7330 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7314 7331 fld_d(Address(rsp, 0));
7315 7332 addptr(rsp, sizeof(jdouble));
7316 7333 }
7317 7334 }
7318 7335 popa();
7319 7336
7320 7337 // Come here with result in F-TOS
7321 7338 bind(done);
7322 7339
7323 7340 if (tmp != noreg) {
7324 7341 pop(tmp);
7325 7342 }
7326 7343 }
7327 7344
7328 7345
7329 7346 // Look up the method for a megamorphic invokeinterface call.
7330 7347 // The target method is determined by <intf_klass, itable_index>.
7331 7348 // The receiver klass is in recv_klass.
7332 7349 // On success, the result will be in method_result, and execution falls through.
7333 7350 // On failure, execution transfers to the given label.
7334 7351 void MacroAssembler::lookup_interface_method(Register recv_klass,
7335 7352 Register intf_klass,
7336 7353 RegisterOrConstant itable_index,
7337 7354 Register method_result,
7338 7355 Register scan_temp,
7339 7356 Label& L_no_such_interface) {
7340 7357 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
7341 7358 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
7342 7359 "caller must use same register for non-constant itable index as for method");
7343 7360
7344 7361 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
7345 7362 int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
7346 7363 int itentry_off = itableMethodEntry::method_offset_in_bytes();
7347 7364 int scan_step = itableOffsetEntry::size() * wordSize;
7348 7365 int vte_size = vtableEntry::size() * wordSize;
7349 7366 Address::ScaleFactor times_vte_scale = Address::times_ptr;
7350 7367 assert(vte_size == wordSize, "else adjust times_vte_scale");
7351 7368
7352 7369 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
7353 7370
7354 7371 // %%% Could store the aligned, prescaled offset in the klassoop.
7355 7372 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
7356 7373 if (HeapWordsPerLong > 1) {
7357 7374 // Round up to align_object_offset boundary
7358 7375 // see code for instanceKlass::start_of_itable!
7359 7376 round_to(scan_temp, BytesPerLong);
7360 7377 }
7361 7378
7362 7379 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
7363 7380 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
7364 7381 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
7365 7382
7366 7383 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
7367 7384 // if (scan->interface() == intf) {
7368 7385 // result = (klass + scan->offset() + itable_index);
7369 7386 // }
7370 7387 // }
7371 7388 Label search, found_method;
7372 7389
7373 7390 for (int peel = 1; peel >= 0; peel--) {
7374 7391 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
7375 7392 cmpptr(intf_klass, method_result);
7376 7393
7377 7394 if (peel) {
7378 7395 jccb(Assembler::equal, found_method);
7379 7396 } else {
7380 7397 jccb(Assembler::notEqual, search);
7381 7398 // (invert the test to fall through to found_method...)
7382 7399 }
7383 7400
7384 7401 if (!peel) break;
7385 7402
7386 7403 bind(search);
7387 7404
7388 7405 // Check that the previous entry is non-null. A null entry means that
7389 7406 // the receiver class doesn't implement the interface, and wasn't the
7390 7407 // same as when the caller was compiled.
7391 7408 testptr(method_result, method_result);
7392 7409 jcc(Assembler::zero, L_no_such_interface);
7393 7410 addptr(scan_temp, scan_step);
7394 7411 }
7395 7412
7396 7413 bind(found_method);
7397 7414
7398 7415 // Got a hit.
7399 7416 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
7400 7417 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
7401 7418 }
7402 7419
7403 7420
7404 7421 void MacroAssembler::check_klass_subtype(Register sub_klass,
7405 7422 Register super_klass,
7406 7423 Register temp_reg,
7407 7424 Label& L_success) {
7408 7425 Label L_failure;
7409 7426 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
7410 7427 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
7411 7428 bind(L_failure);
7412 7429 }
7413 7430
7414 7431
7415 7432 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
7416 7433 Register super_klass,
7417 7434 Register temp_reg,
7418 7435 Label* L_success,
7419 7436 Label* L_failure,
7420 7437 Label* L_slow_path,
7421 7438 RegisterOrConstant super_check_offset) {
7422 7439 assert_different_registers(sub_klass, super_klass, temp_reg);
7423 7440 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
7424 7441 if (super_check_offset.is_register()) {
7425 7442 assert_different_registers(sub_klass, super_klass,
7426 7443 super_check_offset.as_register());
7427 7444 } else if (must_load_sco) {
7428 7445 assert(temp_reg != noreg, "supply either a temp or a register offset");
7429 7446 }
7430 7447
7431 7448 Label L_fallthrough;
7432 7449 int label_nulls = 0;
7433 7450 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
7434 7451 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
7435 7452 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
7436 7453 assert(label_nulls <= 1, "at most one NULL in the batch");
7437 7454
7438 7455 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7439 7456 Klass::secondary_super_cache_offset_in_bytes());
7440 7457 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
7441 7458 Klass::super_check_offset_offset_in_bytes());
7442 7459 Address super_check_offset_addr(super_klass, sco_offset);
7443 7460
7444 7461 // Hacked jcc, which "knows" that L_fallthrough, at least, is in
7445 7462 // range of a jccb. If this routine grows larger, reconsider at
7446 7463 // least some of these.
7447 7464 #define local_jcc(assembler_cond, label) \
7448 7465 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
7449 7466 else jcc( assembler_cond, label) /*omit semi*/
7450 7467
7451 7468 // Hacked jmp, which may only be used just before L_fallthrough.
7452 7469 #define final_jmp(label) \
7453 7470 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
7454 7471 else jmp(label) /*omit semi*/
7455 7472
7456 7473 // If the pointers are equal, we are done (e.g., String[] elements).
7457 7474 // This self-check enables sharing of secondary supertype arrays among
7458 7475 // non-primary types such as array-of-interface. Otherwise, each such
7459 7476 // type would need its own customized SSA.
7460 7477 // We move this check to the front of the fast path because many
7461 7478 // type checks are in fact trivially successful in this manner,
7462 7479 // so we get a nicely predicted branch right at the start of the check.
7463 7480 cmpptr(sub_klass, super_klass);
7464 7481 local_jcc(Assembler::equal, *L_success);
7465 7482
7466 7483 // Check the supertype display:
7467 7484 if (must_load_sco) {
7468 7485 // Positive movl does right thing on LP64.
7469 7486 movl(temp_reg, super_check_offset_addr);
7470 7487 super_check_offset = RegisterOrConstant(temp_reg);
7471 7488 }
7472 7489 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
7473 7490 cmpptr(super_klass, super_check_addr); // load displayed supertype
7474 7491
7475 7492 // This check has worked decisively for primary supers.
7476 7493 // Secondary supers are sought in the super_cache ('super_cache_addr').
7477 7494 // (Secondary supers are interfaces and very deeply nested subtypes.)
7478 7495 // This works in the same check above because of a tricky aliasing
7479 7496 // between the super_cache and the primary super display elements.
7480 7497 // (The 'super_check_addr' can address either, as the case requires.)
7481 7498 // Note that the cache is updated below if it does not help us find
7482 7499 // what we need immediately.
7483 7500 // So if it was a primary super, we can just fail immediately.
7484 7501 // Otherwise, it's the slow path for us (no success at this point).
7485 7502
7486 7503 if (super_check_offset.is_register()) {
7487 7504 local_jcc(Assembler::equal, *L_success);
7488 7505 cmpl(super_check_offset.as_register(), sc_offset);
7489 7506 if (L_failure == &L_fallthrough) {
7490 7507 local_jcc(Assembler::equal, *L_slow_path);
7491 7508 } else {
7492 7509 local_jcc(Assembler::notEqual, *L_failure);
7493 7510 final_jmp(*L_slow_path);
7494 7511 }
7495 7512 } else if (super_check_offset.as_constant() == sc_offset) {
7496 7513 // Need a slow path; fast failure is impossible.
7497 7514 if (L_slow_path == &L_fallthrough) {
7498 7515 local_jcc(Assembler::equal, *L_success);
7499 7516 } else {
7500 7517 local_jcc(Assembler::notEqual, *L_slow_path);
7501 7518 final_jmp(*L_success);
7502 7519 }
7503 7520 } else {
7504 7521 // No slow path; it's a fast decision.
7505 7522 if (L_failure == &L_fallthrough) {
7506 7523 local_jcc(Assembler::equal, *L_success);
7507 7524 } else {
7508 7525 local_jcc(Assembler::notEqual, *L_failure);
7509 7526 final_jmp(*L_success);
7510 7527 }
7511 7528 }
7512 7529
7513 7530 bind(L_fallthrough);
7514 7531
7515 7532 #undef local_jcc
7516 7533 #undef final_jmp
7517 7534 }
7518 7535
7519 7536
7520 7537 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
7521 7538 Register super_klass,
7522 7539 Register temp_reg,
7523 7540 Register temp2_reg,
7524 7541 Label* L_success,
7525 7542 Label* L_failure,
7526 7543 bool set_cond_codes) {
7527 7544 assert_different_registers(sub_klass, super_klass, temp_reg);
7528 7545 if (temp2_reg != noreg)
7529 7546 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
7530 7547 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
7531 7548
7532 7549 Label L_fallthrough;
7533 7550 int label_nulls = 0;
7534 7551 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
7535 7552 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
7536 7553 assert(label_nulls <= 1, "at most one NULL in the batch");
7537 7554
7538 7555 // a couple of useful fields in sub_klass:
7539 7556 int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
7540 7557 Klass::secondary_supers_offset_in_bytes());
7541 7558 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7542 7559 Klass::secondary_super_cache_offset_in_bytes());
7543 7560 Address secondary_supers_addr(sub_klass, ss_offset);
7544 7561 Address super_cache_addr( sub_klass, sc_offset);
7545 7562
7546 7563 // Do a linear scan of the secondary super-klass chain.
7547 7564 // This code is rarely used, so simplicity is a virtue here.
7548 7565 // The repne_scan instruction uses fixed registers, which we must spill.
7549 7566 // Don't worry too much about pre-existing connections with the input regs.
7550 7567
7551 7568 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
7552 7569 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
7553 7570
7554 7571 // Get super_klass value into rax (even if it was in rdi or rcx).
7555 7572 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
7556 7573 if (super_klass != rax || UseCompressedOops) {
7557 7574 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
7558 7575 mov(rax, super_klass);
7559 7576 }
7560 7577 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
7561 7578 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
7562 7579
7563 7580 #ifndef PRODUCT
7564 7581 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
7565 7582 ExternalAddress pst_counter_addr((address) pst_counter);
7566 7583 NOT_LP64( incrementl(pst_counter_addr) );
7567 7584 LP64_ONLY( lea(rcx, pst_counter_addr) );
7568 7585 LP64_ONLY( incrementl(Address(rcx, 0)) );
7569 7586 #endif //PRODUCT
7570 7587
7571 7588 // We will consult the secondary-super array.
7572 7589 movptr(rdi, secondary_supers_addr);
7573 7590 // Load the array length. (Positive movl does right thing on LP64.)
7574 7591 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
7575 7592 // Skip to start of data.
7576 7593 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
7577 7594
7578 7595 // Scan RCX words at [RDI] for an occurrence of RAX.
7579 7596 // Set NZ/Z based on last compare.
7580 7597 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
7581 7598 // not change flags (only scas instruction which is repeated sets flags).
7582 7599 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
7583 7600 #ifdef _LP64
7584 7601 // This part is tricky, as values in supers array could be 32 or 64 bit wide
7585 7602 // and we store values in objArrays always encoded, thus we need to encode
7586 7603 // the value of rax before repne. Note that rax is dead after the repne.
7587 7604 if (UseCompressedOops) {
7588 7605 encode_heap_oop_not_null(rax); // Changes flags.
7589 7606 // The superclass is never null; it would be a basic system error if a null
7590 7607 // pointer were to sneak in here. Note that we have already loaded the
7591 7608 // Klass::super_check_offset from the super_klass in the fast path,
7592 7609 // so if there is a null in that register, we are already in the afterlife.
7593 7610 testl(rax,rax); // Set Z = 0
7594 7611 repne_scanl();
7595 7612 } else
7596 7613 #endif // _LP64
7597 7614 {
7598 7615 testptr(rax,rax); // Set Z = 0
7599 7616 repne_scan();
7600 7617 }
7601 7618 // Unspill the temp. registers:
7602 7619 if (pushed_rdi) pop(rdi);
7603 7620 if (pushed_rcx) pop(rcx);
7604 7621 if (pushed_rax) pop(rax);
7605 7622
7606 7623 if (set_cond_codes) {
7607 7624 // Special hack for the AD files: rdi is guaranteed non-zero.
7608 7625 assert(!pushed_rdi, "rdi must be left non-NULL");
7609 7626 // Also, the condition codes are properly set Z/NZ on succeed/failure.
7610 7627 }
7611 7628
7612 7629 if (L_failure == &L_fallthrough)
7613 7630 jccb(Assembler::notEqual, *L_failure);
7614 7631 else jcc(Assembler::notEqual, *L_failure);
7615 7632
7616 7633 // Success. Cache the super we found and proceed in triumph.
7617 7634 movptr(super_cache_addr, super_klass);
7618 7635
7619 7636 if (L_success != &L_fallthrough) {
7620 7637 jmp(*L_success);
7621 7638 }
7622 7639
7623 7640 #undef IS_A_TEMP
7624 7641
7625 7642 bind(L_fallthrough);
7626 7643 }
7627 7644
7628 7645
7629 7646 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7630 7647 ucomisd(dst, as_Address(src));
7631 7648 }
7632 7649
7633 7650 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7634 7651 ucomiss(dst, as_Address(src));
7635 7652 }
7636 7653
7637 7654 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7638 7655 if (reachable(src)) {
7639 7656 xorpd(dst, as_Address(src));
7640 7657 } else {
7641 7658 lea(rscratch1, src);
7642 7659 xorpd(dst, Address(rscratch1, 0));
7643 7660 }
7644 7661 }
7645 7662
7646 7663 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7647 7664 if (reachable(src)) {
7648 7665 xorps(dst, as_Address(src));
7649 7666 } else {
7650 7667 lea(rscratch1, src);
7651 7668 xorps(dst, Address(rscratch1, 0));
7652 7669 }
7653 7670 }
7654 7671
7655 7672 void MacroAssembler::verify_oop(Register reg, const char* s) {
7656 7673 if (!VerifyOops) return;
7657 7674
7658 7675 // Pass register number to verify_oop_subroutine
7659 7676 char* b = new char[strlen(s) + 50];
7660 7677 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7661 7678 #ifdef _LP64
7662 7679 push(rscratch1); // save r10, trashed by movptr()
7663 7680 #endif
7664 7681 push(rax); // save rax,
7665 7682 push(reg); // pass register argument
7666 7683 ExternalAddress buffer((address) b);
7667 7684 // avoid using pushptr, as it modifies scratch registers
7668 7685 // and our contract is not to modify anything
7669 7686 movptr(rax, buffer.addr());
7670 7687 push(rax);
7671 7688 // call indirectly to solve generation ordering problem
7672 7689 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7673 7690 call(rax);
7674 7691 // Caller pops the arguments (oop, message) and restores rax, r10
7675 7692 }
7676 7693
7677 7694
7678 7695 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
7679 7696 Register tmp,
7680 7697 int offset) {
7681 7698 intptr_t value = *delayed_value_addr;
7682 7699 if (value != 0)
7683 7700 return RegisterOrConstant(value + offset);
7684 7701
7685 7702 // load indirectly to solve generation ordering problem
7686 7703 movptr(tmp, ExternalAddress((address) delayed_value_addr));
7687 7704
7688 7705 #ifdef ASSERT
7689 7706 { Label L;
7690 7707 testptr(tmp, tmp);
7691 7708 if (WizardMode) {
7692 7709 jcc(Assembler::notZero, L);
7693 7710 char* buf = new char[40];
7694 7711 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
7695 7712 stop(buf);
7696 7713 } else {
7697 7714 jccb(Assembler::notZero, L);
7698 7715 hlt();
7699 7716 }
7700 7717 bind(L);
7701 7718 }
7702 7719 #endif
7703 7720
7704 7721 if (offset != 0)
7705 7722 addptr(tmp, offset);
7706 7723
7707 7724 return RegisterOrConstant(tmp);
7708 7725 }
7709 7726
7710 7727
7711 7728 // registers on entry:
7712 7729 // - rax ('check' register): required MethodType
7713 7730 // - rcx: method handle
7714 7731 // - rdx, rsi, or ?: killable temp
7715 7732 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
7716 7733 Register temp_reg,
7717 7734 Label& wrong_method_type) {
7718 7735 Address type_addr(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg));
7719 7736 // compare method type against that of the receiver
7720 7737 if (UseCompressedOops) {
7721 7738 load_heap_oop(temp_reg, type_addr);
7722 7739 cmpptr(mtype_reg, temp_reg);
7723 7740 } else {
7724 7741 cmpptr(mtype_reg, type_addr);
7725 7742 }
7726 7743 jcc(Assembler::notEqual, wrong_method_type);
7727 7744 }
7728 7745
7729 7746
7730 7747 // A method handle has a "vmslots" field which gives the size of its
7731 7748 // argument list in JVM stack slots. This field is either located directly
7732 7749 // in every method handle, or else is indirectly accessed through the
7733 7750 // method handle's MethodType. This macro hides the distinction.
7734 7751 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
7735 7752 Register temp_reg) {
7736 7753 assert_different_registers(vmslots_reg, mh_reg, temp_reg);
7737 7754 // load mh.type.form.vmslots
7738 7755 if (java_dyn_MethodHandle::vmslots_offset_in_bytes() != 0) {
7739 7756 // hoist vmslots into every mh to avoid dependent load chain
7740 7757 movl(vmslots_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmslots_offset_in_bytes, temp_reg)));
7741 7758 } else {
7742 7759 Register temp2_reg = vmslots_reg;
7743 7760 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg)));
7744 7761 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_dyn_MethodType::form_offset_in_bytes, temp_reg)));
7745 7762 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_dyn_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
7746 7763 }
7747 7764 }
7748 7765
7749 7766
7750 7767 // registers on entry:
7751 7768 // - rcx: method handle
7752 7769 // - rdx: killable temp (interpreted only)
7753 7770 // - rax: killable temp (compiled only)
7754 7771 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
7755 7772 assert(mh_reg == rcx, "caller must put MH object in rcx");
7756 7773 assert_different_registers(mh_reg, temp_reg);
7757 7774
7758 7775 // pick out the interpreted side of the handler
7759 7776 // NOTE: vmentry is not an oop!
7760 7777 movptr(temp_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
7761 7778
7762 7779 // off we go...
7763 7780 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
7764 7781
7765 7782 // for the various stubs which take control at this point,
7766 7783 // see MethodHandles::generate_method_handle_stub
7767 7784 }
7768 7785
7769 7786
7770 7787 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
7771 7788 int extra_slot_offset) {
7772 7789 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
7773 7790 int stackElementSize = Interpreter::stackElementSize;
7774 7791 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
7775 7792 #ifdef ASSERT
7776 7793 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
7777 7794 assert(offset1 - offset == stackElementSize, "correct arithmetic");
7778 7795 #endif
7779 7796 Register scale_reg = noreg;
7780 7797 Address::ScaleFactor scale_factor = Address::no_scale;
7781 7798 if (arg_slot.is_constant()) {
7782 7799 offset += arg_slot.as_constant() * stackElementSize;
7783 7800 } else {
7784 7801 scale_reg = arg_slot.as_register();
7785 7802 scale_factor = Address::times(stackElementSize);
7786 7803 }
7787 7804 offset += wordSize; // return PC is on stack
7788 7805 return Address(rsp, scale_reg, scale_factor, offset);
7789 7806 }
7790 7807
7791 7808
7792 7809 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
7793 7810 if (!VerifyOops) return;
7794 7811
7795 7812 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
7796 7813 // Pass register number to verify_oop_subroutine
7797 7814 char* b = new char[strlen(s) + 50];
7798 7815 sprintf(b, "verify_oop_addr: %s", s);
7799 7816
7800 7817 #ifdef _LP64
7801 7818 push(rscratch1); // save r10, trashed by movptr()
7802 7819 #endif
7803 7820 push(rax); // save rax,
7804 7821 // addr may contain rsp so we will have to adjust it based on the push
7805 7822 // we just did
7806 7823 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
7807 7824 // stores rax into addr which is backwards of what was intended.
7808 7825 if (addr.uses(rsp)) {
7809 7826 lea(rax, addr);
7810 7827 pushptr(Address(rax, BytesPerWord));
7811 7828 } else {
7812 7829 pushptr(addr);
7813 7830 }
7814 7831
7815 7832 ExternalAddress buffer((address) b);
7816 7833 // pass msg argument
7817 7834 // avoid using pushptr, as it modifies scratch registers
7818 7835 // and our contract is not to modify anything
7819 7836 movptr(rax, buffer.addr());
7820 7837 push(rax);
7821 7838
7822 7839 // call indirectly to solve generation ordering problem
7823 7840 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7824 7841 call(rax);
7825 7842 // Caller pops the arguments (addr, message) and restores rax, r10.
7826 7843 }
7827 7844
7828 7845 void MacroAssembler::verify_tlab() {
7829 7846 #ifdef ASSERT
7830 7847 if (UseTLAB && VerifyOops) {
7831 7848 Label next, ok;
7832 7849 Register t1 = rsi;
7833 7850 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
7834 7851
7835 7852 push(t1);
7836 7853 NOT_LP64(push(thread_reg));
7837 7854 NOT_LP64(get_thread(thread_reg));
7838 7855
7839 7856 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7840 7857 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7841 7858 jcc(Assembler::aboveEqual, next);
7842 7859 stop("assert(top >= start)");
7843 7860 should_not_reach_here();
7844 7861
7845 7862 bind(next);
7846 7863 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7847 7864 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7848 7865 jcc(Assembler::aboveEqual, ok);
7849 7866 stop("assert(top <= end)");
7850 7867 should_not_reach_here();
7851 7868
7852 7869 bind(ok);
7853 7870 NOT_LP64(pop(thread_reg));
7854 7871 pop(t1);
7855 7872 }
7856 7873 #endif
7857 7874 }
7858 7875
7859 7876 class ControlWord {
7860 7877 public:
7861 7878 int32_t _value;
7862 7879
7863 7880 int rounding_control() const { return (_value >> 10) & 3 ; }
7864 7881 int precision_control() const { return (_value >> 8) & 3 ; }
7865 7882 bool precision() const { return ((_value >> 5) & 1) != 0; }
7866 7883 bool underflow() const { return ((_value >> 4) & 1) != 0; }
7867 7884 bool overflow() const { return ((_value >> 3) & 1) != 0; }
7868 7885 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
7869 7886 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
7870 7887 bool invalid() const { return ((_value >> 0) & 1) != 0; }
7871 7888
7872 7889 void print() const {
7873 7890 // rounding control
7874 7891 const char* rc;
7875 7892 switch (rounding_control()) {
7876 7893 case 0: rc = "round near"; break;
7877 7894 case 1: rc = "round down"; break;
7878 7895 case 2: rc = "round up "; break;
7879 7896 case 3: rc = "chop "; break;
7880 7897 };
7881 7898 // precision control
7882 7899 const char* pc;
7883 7900 switch (precision_control()) {
7884 7901 case 0: pc = "24 bits "; break;
7885 7902 case 1: pc = "reserved"; break;
7886 7903 case 2: pc = "53 bits "; break;
7887 7904 case 3: pc = "64 bits "; break;
7888 7905 };
7889 7906 // flags
7890 7907 char f[9];
7891 7908 f[0] = ' ';
7892 7909 f[1] = ' ';
7893 7910 f[2] = (precision ()) ? 'P' : 'p';
7894 7911 f[3] = (underflow ()) ? 'U' : 'u';
7895 7912 f[4] = (overflow ()) ? 'O' : 'o';
7896 7913 f[5] = (zero_divide ()) ? 'Z' : 'z';
7897 7914 f[6] = (denormalized()) ? 'D' : 'd';
7898 7915 f[7] = (invalid ()) ? 'I' : 'i';
7899 7916 f[8] = '\x0';
7900 7917 // output
7901 7918 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
7902 7919 }
7903 7920
7904 7921 };
7905 7922
7906 7923 class StatusWord {
7907 7924 public:
7908 7925 int32_t _value;
7909 7926
7910 7927 bool busy() const { return ((_value >> 15) & 1) != 0; }
7911 7928 bool C3() const { return ((_value >> 14) & 1) != 0; }
7912 7929 bool C2() const { return ((_value >> 10) & 1) != 0; }
7913 7930 bool C1() const { return ((_value >> 9) & 1) != 0; }
7914 7931 bool C0() const { return ((_value >> 8) & 1) != 0; }
7915 7932 int top() const { return (_value >> 11) & 7 ; }
7916 7933 bool error_status() const { return ((_value >> 7) & 1) != 0; }
7917 7934 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
7918 7935 bool precision() const { return ((_value >> 5) & 1) != 0; }
7919 7936 bool underflow() const { return ((_value >> 4) & 1) != 0; }
7920 7937 bool overflow() const { return ((_value >> 3) & 1) != 0; }
7921 7938 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
7922 7939 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
7923 7940 bool invalid() const { return ((_value >> 0) & 1) != 0; }
7924 7941
7925 7942 void print() const {
7926 7943 // condition codes
7927 7944 char c[5];
7928 7945 c[0] = (C3()) ? '3' : '-';
7929 7946 c[1] = (C2()) ? '2' : '-';
7930 7947 c[2] = (C1()) ? '1' : '-';
7931 7948 c[3] = (C0()) ? '0' : '-';
7932 7949 c[4] = '\x0';
7933 7950 // flags
7934 7951 char f[9];
7935 7952 f[0] = (error_status()) ? 'E' : '-';
7936 7953 f[1] = (stack_fault ()) ? 'S' : '-';
7937 7954 f[2] = (precision ()) ? 'P' : '-';
7938 7955 f[3] = (underflow ()) ? 'U' : '-';
7939 7956 f[4] = (overflow ()) ? 'O' : '-';
7940 7957 f[5] = (zero_divide ()) ? 'Z' : '-';
7941 7958 f[6] = (denormalized()) ? 'D' : '-';
7942 7959 f[7] = (invalid ()) ? 'I' : '-';
7943 7960 f[8] = '\x0';
7944 7961 // output
7945 7962 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
7946 7963 }
7947 7964
7948 7965 };
7949 7966
7950 7967 class TagWord {
7951 7968 public:
7952 7969 int32_t _value;
7953 7970
7954 7971 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
7955 7972
7956 7973 void print() const {
7957 7974 printf("%04x", _value & 0xFFFF);
7958 7975 }
7959 7976
7960 7977 };
7961 7978
7962 7979 class FPU_Register {
7963 7980 public:
7964 7981 int32_t _m0;
7965 7982 int32_t _m1;
7966 7983 int16_t _ex;
7967 7984
7968 7985 bool is_indefinite() const {
7969 7986 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
7970 7987 }
7971 7988
7972 7989 void print() const {
7973 7990 char sign = (_ex < 0) ? '-' : '+';
7974 7991 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
7975 7992 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
7976 7993 };
7977 7994
7978 7995 };
7979 7996
7980 7997 class FPU_State {
7981 7998 public:
7982 7999 enum {
7983 8000 register_size = 10,
7984 8001 number_of_registers = 8,
7985 8002 register_mask = 7
7986 8003 };
7987 8004
7988 8005 ControlWord _control_word;
7989 8006 StatusWord _status_word;
7990 8007 TagWord _tag_word;
7991 8008 int32_t _error_offset;
7992 8009 int32_t _error_selector;
7993 8010 int32_t _data_offset;
7994 8011 int32_t _data_selector;
7995 8012 int8_t _register[register_size * number_of_registers];
7996 8013
7997 8014 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
7998 8015 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
7999 8016
8000 8017 const char* tag_as_string(int tag) const {
8001 8018 switch (tag) {
8002 8019 case 0: return "valid";
8003 8020 case 1: return "zero";
8004 8021 case 2: return "special";
8005 8022 case 3: return "empty";
8006 8023 }
8007 8024 ShouldNotReachHere();
8008 8025 return NULL;
8009 8026 }
8010 8027
8011 8028 void print() const {
8012 8029 // print computation registers
8013 8030 { int t = _status_word.top();
8014 8031 for (int i = 0; i < number_of_registers; i++) {
8015 8032 int j = (i - t) & register_mask;
8016 8033 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
8017 8034 st(j)->print();
8018 8035 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
8019 8036 }
8020 8037 }
8021 8038 printf("\n");
8022 8039 // print control registers
8023 8040 printf("ctrl = "); _control_word.print(); printf("\n");
8024 8041 printf("stat = "); _status_word .print(); printf("\n");
8025 8042 printf("tags = "); _tag_word .print(); printf("\n");
8026 8043 }
8027 8044
8028 8045 };
8029 8046
8030 8047 class Flag_Register {
8031 8048 public:
8032 8049 int32_t _value;
8033 8050
8034 8051 bool overflow() const { return ((_value >> 11) & 1) != 0; }
8035 8052 bool direction() const { return ((_value >> 10) & 1) != 0; }
8036 8053 bool sign() const { return ((_value >> 7) & 1) != 0; }
8037 8054 bool zero() const { return ((_value >> 6) & 1) != 0; }
8038 8055 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
8039 8056 bool parity() const { return ((_value >> 2) & 1) != 0; }
8040 8057 bool carry() const { return ((_value >> 0) & 1) != 0; }
8041 8058
8042 8059 void print() const {
8043 8060 // flags
8044 8061 char f[8];
8045 8062 f[0] = (overflow ()) ? 'O' : '-';
8046 8063 f[1] = (direction ()) ? 'D' : '-';
8047 8064 f[2] = (sign ()) ? 'S' : '-';
8048 8065 f[3] = (zero ()) ? 'Z' : '-';
8049 8066 f[4] = (auxiliary_carry()) ? 'A' : '-';
8050 8067 f[5] = (parity ()) ? 'P' : '-';
8051 8068 f[6] = (carry ()) ? 'C' : '-';
8052 8069 f[7] = '\x0';
8053 8070 // output
8054 8071 printf("%08x flags = %s", _value, f);
8055 8072 }
8056 8073
8057 8074 };
8058 8075
8059 8076 class IU_Register {
8060 8077 public:
8061 8078 int32_t _value;
8062 8079
8063 8080 void print() const {
8064 8081 printf("%08x %11d", _value, _value);
8065 8082 }
8066 8083
8067 8084 };
8068 8085
8069 8086 class IU_State {
8070 8087 public:
8071 8088 Flag_Register _eflags;
8072 8089 IU_Register _rdi;
8073 8090 IU_Register _rsi;
8074 8091 IU_Register _rbp;
8075 8092 IU_Register _rsp;
8076 8093 IU_Register _rbx;
8077 8094 IU_Register _rdx;
8078 8095 IU_Register _rcx;
8079 8096 IU_Register _rax;
8080 8097
8081 8098 void print() const {
8082 8099 // computation registers
8083 8100 printf("rax, = "); _rax.print(); printf("\n");
8084 8101 printf("rbx, = "); _rbx.print(); printf("\n");
8085 8102 printf("rcx = "); _rcx.print(); printf("\n");
8086 8103 printf("rdx = "); _rdx.print(); printf("\n");
8087 8104 printf("rdi = "); _rdi.print(); printf("\n");
8088 8105 printf("rsi = "); _rsi.print(); printf("\n");
8089 8106 printf("rbp, = "); _rbp.print(); printf("\n");
8090 8107 printf("rsp = "); _rsp.print(); printf("\n");
8091 8108 printf("\n");
8092 8109 // control registers
8093 8110 printf("flgs = "); _eflags.print(); printf("\n");
8094 8111 }
8095 8112 };
8096 8113
8097 8114
8098 8115 class CPU_State {
8099 8116 public:
8100 8117 FPU_State _fpu_state;
8101 8118 IU_State _iu_state;
8102 8119
8103 8120 void print() const {
8104 8121 printf("--------------------------------------------------\n");
8105 8122 _iu_state .print();
8106 8123 printf("\n");
8107 8124 _fpu_state.print();
8108 8125 printf("--------------------------------------------------\n");
8109 8126 }
8110 8127
8111 8128 };
8112 8129
8113 8130
8114 8131 static void _print_CPU_state(CPU_State* state) {
8115 8132 state->print();
8116 8133 };
8117 8134
8118 8135
8119 8136 void MacroAssembler::print_CPU_state() {
8120 8137 push_CPU_state();
8121 8138 push(rsp); // pass CPU state
8122 8139 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
8123 8140 addptr(rsp, wordSize); // discard argument
8124 8141 pop_CPU_state();
8125 8142 }
8126 8143
8127 8144
8128 8145 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
8129 8146 static int counter = 0;
8130 8147 FPU_State* fs = &state->_fpu_state;
8131 8148 counter++;
8132 8149 // For leaf calls, only verify that the top few elements remain empty.
8133 8150 // We only need 1 empty at the top for C2 code.
8134 8151 if( stack_depth < 0 ) {
8135 8152 if( fs->tag_for_st(7) != 3 ) {
8136 8153 printf("FPR7 not empty\n");
8137 8154 state->print();
8138 8155 assert(false, "error");
8139 8156 return false;
8140 8157 }
8141 8158 return true; // All other stack states do not matter
8142 8159 }
8143 8160
8144 8161 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
8145 8162 "bad FPU control word");
8146 8163
8147 8164 // compute stack depth
8148 8165 int i = 0;
8149 8166 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
8150 8167 int d = i;
8151 8168 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
8152 8169 // verify findings
8153 8170 if (i != FPU_State::number_of_registers) {
8154 8171 // stack not contiguous
8155 8172 printf("%s: stack not contiguous at ST%d\n", s, i);
8156 8173 state->print();
8157 8174 assert(false, "error");
8158 8175 return false;
8159 8176 }
8160 8177 // check if computed stack depth corresponds to expected stack depth
8161 8178 if (stack_depth < 0) {
8162 8179 // expected stack depth is -stack_depth or less
8163 8180 if (d > -stack_depth) {
8164 8181 // too many elements on the stack
8165 8182 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
8166 8183 state->print();
8167 8184 assert(false, "error");
8168 8185 return false;
8169 8186 }
8170 8187 } else {
8171 8188 // expected stack depth is stack_depth
8172 8189 if (d != stack_depth) {
8173 8190 // wrong stack depth
8174 8191 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
8175 8192 state->print();
8176 8193 assert(false, "error");
8177 8194 return false;
8178 8195 }
8179 8196 }
8180 8197 // everything is cool
8181 8198 return true;
8182 8199 }
8183 8200
8184 8201
8185 8202 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
8186 8203 if (!VerifyFPU) return;
8187 8204 push_CPU_state();
8188 8205 push(rsp); // pass CPU state
8189 8206 ExternalAddress msg((address) s);
8190 8207 // pass message string s
8191 8208 pushptr(msg.addr());
8192 8209 push(stack_depth); // pass stack depth
8193 8210 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
8194 8211 addptr(rsp, 3 * wordSize); // discard arguments
8195 8212 // check for error
8196 8213 { Label L;
8197 8214 testl(rax, rax);
8198 8215 jcc(Assembler::notZero, L);
8199 8216 int3(); // break if error condition
8200 8217 bind(L);
8201 8218 }
8202 8219 pop_CPU_state();
8203 8220 }
8204 8221
8205 8222 void MacroAssembler::load_klass(Register dst, Register src) {
8206 8223 #ifdef _LP64
8207 8224 if (UseCompressedOops) {
8208 8225 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8209 8226 decode_heap_oop_not_null(dst);
8210 8227 } else
8211 8228 #endif
8212 8229 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8213 8230 }
8214 8231
8215 8232 void MacroAssembler::load_prototype_header(Register dst, Register src) {
8216 8233 #ifdef _LP64
8217 8234 if (UseCompressedOops) {
8218 8235 assert (Universe::heap() != NULL, "java heap should be initialized");
8219 8236 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8220 8237 if (Universe::narrow_oop_shift() != 0) {
8221 8238 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8222 8239 if (LogMinObjAlignmentInBytes == Address::times_8) {
8223 8240 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8224 8241 } else {
8225 8242 // OK to use shift since we don't need to preserve flags.
8226 8243 shlq(dst, LogMinObjAlignmentInBytes);
8227 8244 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8228 8245 }
8229 8246 } else {
8230 8247 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8231 8248 }
8232 8249 } else
8233 8250 #endif
8234 8251 {
8235 8252 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8236 8253 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8237 8254 }
8238 8255 }
8239 8256
8240 8257 void MacroAssembler::store_klass(Register dst, Register src) {
8241 8258 #ifdef _LP64
8242 8259 if (UseCompressedOops) {
8243 8260 encode_heap_oop_not_null(src);
8244 8261 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8245 8262 } else
8246 8263 #endif
8247 8264 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8248 8265 }
8249 8266
8250 8267 void MacroAssembler::load_heap_oop(Register dst, Address src) {
8251 8268 #ifdef _LP64
8252 8269 if (UseCompressedOops) {
8253 8270 movl(dst, src);
8254 8271 decode_heap_oop(dst);
8255 8272 } else
8256 8273 #endif
8257 8274 movptr(dst, src);
8258 8275 }
8259 8276
8260 8277 void MacroAssembler::store_heap_oop(Address dst, Register src) {
8261 8278 #ifdef _LP64
8262 8279 if (UseCompressedOops) {
8263 8280 assert(!dst.uses(src), "not enough registers");
8264 8281 encode_heap_oop(src);
8265 8282 movl(dst, src);
8266 8283 } else
8267 8284 #endif
8268 8285 movptr(dst, src);
8269 8286 }
8270 8287
8271 8288 // Used for storing NULLs.
8272 8289 void MacroAssembler::store_heap_oop_null(Address dst) {
8273 8290 #ifdef _LP64
8274 8291 if (UseCompressedOops) {
8275 8292 movl(dst, (int32_t)NULL_WORD);
8276 8293 } else {
8277 8294 movslq(dst, (int32_t)NULL_WORD);
8278 8295 }
8279 8296 #else
8280 8297 movl(dst, (int32_t)NULL_WORD);
8281 8298 #endif
8282 8299 }
8283 8300
8284 8301 #ifdef _LP64
8285 8302 void MacroAssembler::store_klass_gap(Register dst, Register src) {
8286 8303 if (UseCompressedOops) {
8287 8304 // Store to klass gap in destination
8288 8305 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
8289 8306 }
8290 8307 }
8291 8308
8292 8309 #ifdef ASSERT
8293 8310 void MacroAssembler::verify_heapbase(const char* msg) {
8294 8311 assert (UseCompressedOops, "should be compressed");
8295 8312 assert (Universe::heap() != NULL, "java heap should be initialized");
8296 8313 if (CheckCompressedOops) {
8297 8314 Label ok;
8298 8315 push(rscratch1); // cmpptr trashes rscratch1
8299 8316 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8300 8317 jcc(Assembler::equal, ok);
8301 8318 stop(msg);
8302 8319 bind(ok);
8303 8320 pop(rscratch1);
8304 8321 }
8305 8322 }
8306 8323 #endif
8307 8324
8308 8325 // Algorithm must match oop.inline.hpp encode_heap_oop.
8309 8326 void MacroAssembler::encode_heap_oop(Register r) {
8310 8327 #ifdef ASSERT
8311 8328 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
8312 8329 #endif
8313 8330 verify_oop(r, "broken oop in encode_heap_oop");
8314 8331 if (Universe::narrow_oop_base() == NULL) {
8315 8332 if (Universe::narrow_oop_shift() != 0) {
8316 8333 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8317 8334 shrq(r, LogMinObjAlignmentInBytes);
8318 8335 }
8319 8336 return;
8320 8337 }
8321 8338 testq(r, r);
8322 8339 cmovq(Assembler::equal, r, r12_heapbase);
8323 8340 subq(r, r12_heapbase);
8324 8341 shrq(r, LogMinObjAlignmentInBytes);
8325 8342 }
8326 8343
8327 8344 void MacroAssembler::encode_heap_oop_not_null(Register r) {
8328 8345 #ifdef ASSERT
8329 8346 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
8330 8347 if (CheckCompressedOops) {
8331 8348 Label ok;
8332 8349 testq(r, r);
8333 8350 jcc(Assembler::notEqual, ok);
8334 8351 stop("null oop passed to encode_heap_oop_not_null");
8335 8352 bind(ok);
8336 8353 }
8337 8354 #endif
8338 8355 verify_oop(r, "broken oop in encode_heap_oop_not_null");
8339 8356 if (Universe::narrow_oop_base() != NULL) {
8340 8357 subq(r, r12_heapbase);
8341 8358 }
8342 8359 if (Universe::narrow_oop_shift() != 0) {
8343 8360 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8344 8361 shrq(r, LogMinObjAlignmentInBytes);
8345 8362 }
8346 8363 }
8347 8364
8348 8365 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
8349 8366 #ifdef ASSERT
8350 8367 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
8351 8368 if (CheckCompressedOops) {
8352 8369 Label ok;
8353 8370 testq(src, src);
8354 8371 jcc(Assembler::notEqual, ok);
8355 8372 stop("null oop passed to encode_heap_oop_not_null2");
8356 8373 bind(ok);
8357 8374 }
8358 8375 #endif
8359 8376 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
8360 8377 if (dst != src) {
8361 8378 movq(dst, src);
8362 8379 }
8363 8380 if (Universe::narrow_oop_base() != NULL) {
8364 8381 subq(dst, r12_heapbase);
8365 8382 }
8366 8383 if (Universe::narrow_oop_shift() != 0) {
8367 8384 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8368 8385 shrq(dst, LogMinObjAlignmentInBytes);
8369 8386 }
8370 8387 }
8371 8388
8372 8389 void MacroAssembler::decode_heap_oop(Register r) {
8373 8390 #ifdef ASSERT
8374 8391 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
8375 8392 #endif
8376 8393 if (Universe::narrow_oop_base() == NULL) {
8377 8394 if (Universe::narrow_oop_shift() != 0) {
8378 8395 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8379 8396 shlq(r, LogMinObjAlignmentInBytes);
8380 8397 }
8381 8398 } else {
8382 8399 Label done;
8383 8400 shlq(r, LogMinObjAlignmentInBytes);
8384 8401 jccb(Assembler::equal, done);
8385 8402 addq(r, r12_heapbase);
8386 8403 bind(done);
8387 8404 }
8388 8405 verify_oop(r, "broken oop in decode_heap_oop");
8389 8406 }
8390 8407
8391 8408 void MacroAssembler::decode_heap_oop_not_null(Register r) {
8392 8409 // Note: it will change flags
8393 8410 assert (UseCompressedOops, "should only be used for compressed headers");
8394 8411 assert (Universe::heap() != NULL, "java heap should be initialized");
8395 8412 // Cannot assert, unverified entry point counts instructions (see .ad file)
8396 8413 // vtableStubs also counts instructions in pd_code_size_limit.
8397 8414 // Also do not verify_oop as this is called by verify_oop.
8398 8415 if (Universe::narrow_oop_shift() != 0) {
8399 8416 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8400 8417 shlq(r, LogMinObjAlignmentInBytes);
8401 8418 if (Universe::narrow_oop_base() != NULL) {
8402 8419 addq(r, r12_heapbase);
8403 8420 }
8404 8421 } else {
8405 8422 assert (Universe::narrow_oop_base() == NULL, "sanity");
8406 8423 }
8407 8424 }
8408 8425
8409 8426 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
8410 8427 // Note: it will change flags
8411 8428 assert (UseCompressedOops, "should only be used for compressed headers");
8412 8429 assert (Universe::heap() != NULL, "java heap should be initialized");
8413 8430 // Cannot assert, unverified entry point counts instructions (see .ad file)
8414 8431 // vtableStubs also counts instructions in pd_code_size_limit.
8415 8432 // Also do not verify_oop as this is called by verify_oop.
8416 8433 if (Universe::narrow_oop_shift() != 0) {
8417 8434 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8418 8435 if (LogMinObjAlignmentInBytes == Address::times_8) {
8419 8436 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
8420 8437 } else {
8421 8438 if (dst != src) {
8422 8439 movq(dst, src);
8423 8440 }
8424 8441 shlq(dst, LogMinObjAlignmentInBytes);
8425 8442 if (Universe::narrow_oop_base() != NULL) {
8426 8443 addq(dst, r12_heapbase);
8427 8444 }
8428 8445 }
8429 8446 } else {
8430 8447 assert (Universe::narrow_oop_base() == NULL, "sanity");
8431 8448 if (dst != src) {
8432 8449 movq(dst, src);
8433 8450 }
8434 8451 }
8435 8452 }
8436 8453
8437 8454 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
8438 8455 assert (UseCompressedOops, "should only be used for compressed headers");
8439 8456 assert (Universe::heap() != NULL, "java heap should be initialized");
8440 8457 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8441 8458 int oop_index = oop_recorder()->find_index(obj);
8442 8459 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8443 8460 mov_narrow_oop(dst, oop_index, rspec);
8444 8461 }
8445 8462
8446 8463 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
8447 8464 assert (UseCompressedOops, "should only be used for compressed headers");
8448 8465 assert (Universe::heap() != NULL, "java heap should be initialized");
8449 8466 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8450 8467 int oop_index = oop_recorder()->find_index(obj);
8451 8468 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8452 8469 mov_narrow_oop(dst, oop_index, rspec);
8453 8470 }
8454 8471
8455 8472 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
8456 8473 assert (UseCompressedOops, "should only be used for compressed headers");
8457 8474 assert (Universe::heap() != NULL, "java heap should be initialized");
8458 8475 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8459 8476 int oop_index = oop_recorder()->find_index(obj);
8460 8477 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8461 8478 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8462 8479 }
8463 8480
8464 8481 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
8465 8482 assert (UseCompressedOops, "should only be used for compressed headers");
8466 8483 assert (Universe::heap() != NULL, "java heap should be initialized");
8467 8484 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8468 8485 int oop_index = oop_recorder()->find_index(obj);
8469 8486 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8470 8487 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8471 8488 }
8472 8489
8473 8490 void MacroAssembler::reinit_heapbase() {
8474 8491 if (UseCompressedOops) {
8475 8492 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8476 8493 }
8477 8494 }
8478 8495 #endif // _LP64
8479 8496
8480 8497 // IndexOf substring.
8481 8498 void MacroAssembler::string_indexof(Register str1, Register str2,
8482 8499 Register cnt1, Register cnt2, Register result,
8483 8500 XMMRegister vec, Register tmp) {
8484 8501 assert(UseSSE42Intrinsics, "SSE4.2 is required");
8485 8502
8486 8503 Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
8487 8504 SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP;
8488 8505
8489 8506 push(str1); // string addr
8490 8507 push(str2); // substr addr
8491 8508 push(cnt2); // substr count
8492 8509 jmpb(PREP_FOR_SCAN);
8493 8510
8494 8511 // Substr count saved at sp
8495 8512 // Substr saved at sp+1*wordSize
8496 8513 // String saved at sp+2*wordSize
8497 8514
8498 8515 // Reload substr for rescan
8499 8516 bind(RELOAD_SUBSTR);
8500 8517 movl(cnt2, Address(rsp, 0));
8501 8518 movptr(str2, Address(rsp, wordSize));
8502 8519 // We came here after the beginninig of the substring was
8503 8520 // matched but the rest of it was not so we need to search
8504 8521 // again. Start from the next element after the previous match.
8505 8522 subptr(str1, result); // Restore counter
8506 8523 shrl(str1, 1);
8507 8524 addl(cnt1, str1);
8508 8525 decrementl(cnt1);
8509 8526 lea(str1, Address(result, 2)); // Reload string
8510 8527
8511 8528 // Load substr
8512 8529 bind(PREP_FOR_SCAN);
8513 8530 movdqu(vec, Address(str2, 0));
8514 8531 addl(cnt1, 8); // prime the loop
8515 8532 subptr(str1, 16);
8516 8533
8517 8534 // Scan string for substr in 16-byte vectors
8518 8535 bind(SCAN_TO_SUBSTR);
8519 8536 subl(cnt1, 8);
8520 8537 addptr(str1, 16);
8521 8538
8522 8539 // pcmpestri
8523 8540 // inputs:
8524 8541 // xmm - substring
8525 8542 // rax - substring length (elements count)
8526 8543 // mem - scaned string
8527 8544 // rdx - string length (elements count)
8528 8545 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
8529 8546 // outputs:
8530 8547 // rcx - matched index in string
8531 8548 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8532 8549
8533 8550 pcmpestri(vec, Address(str1, 0), 0x0d);
8534 8551 jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0
8535 8552 jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
8536 8553
8537 8554 // Fallthrough: found a potential substr
8538 8555
8539 8556 // Make sure string is still long enough
8540 8557 subl(cnt1, tmp);
8541 8558 cmpl(cnt1, cnt2);
8542 8559 jccb(Assembler::negative, RET_NOT_FOUND);
8543 8560 // Compute start addr of substr
8544 8561 lea(str1, Address(str1, tmp, Address::times_2));
8545 8562 movptr(result, str1); // save
8546 8563
8547 8564 // Compare potential substr
8548 8565 addl(cnt1, 8); // prime the loop
8549 8566 addl(cnt2, 8);
8550 8567 subptr(str1, 16);
8551 8568 subptr(str2, 16);
8552 8569
8553 8570 // Scan 16-byte vectors of string and substr
8554 8571 bind(SCAN_SUBSTR);
8555 8572 subl(cnt1, 8);
8556 8573 subl(cnt2, 8);
8557 8574 addptr(str1, 16);
8558 8575 addptr(str2, 16);
8559 8576 movdqu(vec, Address(str2, 0));
8560 8577 pcmpestri(vec, Address(str1, 0), 0x0d);
8561 8578 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
8562 8579 jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0
8563 8580
8564 8581 // Compute substr offset
8565 8582 subptr(result, Address(rsp, 2*wordSize));
8566 8583 shrl(result, 1); // index
8567 8584 jmpb(CLEANUP);
8568 8585
8569 8586 bind(RET_NOT_FOUND);
8570 8587 movl(result, -1);
8571 8588
8572 8589 bind(CLEANUP);
8573 8590 addptr(rsp, 3*wordSize);
8574 8591 }
8575 8592
8576 8593 // Compare strings.
8577 8594 void MacroAssembler::string_compare(Register str1, Register str2,
8578 8595 Register cnt1, Register cnt2, Register result,
8579 8596 XMMRegister vec1, XMMRegister vec2) {
8580 8597 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
8581 8598
8582 8599 // Compute the minimum of the string lengths and the
8583 8600 // difference of the string lengths (stack).
8584 8601 // Do the conditional move stuff
8585 8602 movl(result, cnt1);
8586 8603 subl(cnt1, cnt2);
8587 8604 push(cnt1);
8588 8605 if (VM_Version::supports_cmov()) {
8589 8606 cmovl(Assembler::lessEqual, cnt2, result);
8590 8607 } else {
8591 8608 Label GT_LABEL;
8592 8609 jccb(Assembler::greater, GT_LABEL);
8593 8610 movl(cnt2, result);
8594 8611 bind(GT_LABEL);
8595 8612 }
8596 8613
8597 8614 // Is the minimum length zero?
8598 8615 testl(cnt2, cnt2);
8599 8616 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
8600 8617
8601 8618 // Load first characters
8602 8619 load_unsigned_short(result, Address(str1, 0));
8603 8620 load_unsigned_short(cnt1, Address(str2, 0));
8604 8621
8605 8622 // Compare first characters
8606 8623 subl(result, cnt1);
8607 8624 jcc(Assembler::notZero, POP_LABEL);
8608 8625 decrementl(cnt2);
8609 8626 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
8610 8627
8611 8628 {
8612 8629 // Check after comparing first character to see if strings are equivalent
8613 8630 Label LSkip2;
8614 8631 // Check if the strings start at same location
8615 8632 cmpptr(str1, str2);
8616 8633 jccb(Assembler::notEqual, LSkip2);
8617 8634
8618 8635 // Check if the length difference is zero (from stack)
8619 8636 cmpl(Address(rsp, 0), 0x0);
8620 8637 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
8621 8638
8622 8639 // Strings might not be equivalent
8623 8640 bind(LSkip2);
8624 8641 }
8625 8642
8626 8643 // Advance to next character
8627 8644 addptr(str1, 2);
8628 8645 addptr(str2, 2);
8629 8646
8630 8647 if (UseSSE42Intrinsics) {
8631 8648 // With SSE4.2, use double quad vector compare
8632 8649 Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
8633 8650 // Setup to compare 16-byte vectors
8634 8651 movl(cnt1, cnt2);
8635 8652 andl(cnt2, 0xfffffff8); // cnt2 holds the vector count
8636 8653 andl(cnt1, 0x00000007); // cnt1 holds the tail count
8637 8654 testl(cnt2, cnt2);
8638 8655 jccb(Assembler::zero, COMPARE_TAIL);
8639 8656
8640 8657 lea(str2, Address(str2, cnt2, Address::times_2));
8641 8658 lea(str1, Address(str1, cnt2, Address::times_2));
8642 8659 negptr(cnt2);
8643 8660
8644 8661 bind(COMPARE_VECTORS);
8645 8662 movdqu(vec1, Address(str1, cnt2, Address::times_2));
8646 8663 movdqu(vec2, Address(str2, cnt2, Address::times_2));
8647 8664 pxor(vec1, vec2);
8648 8665 ptest(vec1, vec1);
8649 8666 jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
8650 8667 addptr(cnt2, 8);
8651 8668 jcc(Assembler::notZero, COMPARE_VECTORS);
8652 8669 jmpb(COMPARE_TAIL);
8653 8670
8654 8671 // Mismatched characters in the vectors
8655 8672 bind(VECTOR_NOT_EQUAL);
8656 8673 lea(str1, Address(str1, cnt2, Address::times_2));
8657 8674 lea(str2, Address(str2, cnt2, Address::times_2));
8658 8675 movl(cnt1, 8);
8659 8676
8660 8677 // Compare tail (< 8 chars), or rescan last vectors to
8661 8678 // find 1st mismatched characters
8662 8679 bind(COMPARE_TAIL);
8663 8680 testl(cnt1, cnt1);
8664 8681 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
8665 8682 movl(cnt2, cnt1);
8666 8683 // Fallthru to tail compare
8667 8684 }
8668 8685
8669 8686 // Shift str2 and str1 to the end of the arrays, negate min
8670 8687 lea(str1, Address(str1, cnt2, Address::times_2, 0));
8671 8688 lea(str2, Address(str2, cnt2, Address::times_2, 0));
8672 8689 negptr(cnt2);
8673 8690
8674 8691 // Compare the rest of the characters
8675 8692 bind(WHILE_HEAD_LABEL);
8676 8693 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0));
8677 8694 load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0));
8678 8695 subl(result, cnt1);
8679 8696 jccb(Assembler::notZero, POP_LABEL);
8680 8697 increment(cnt2);
8681 8698 jcc(Assembler::notZero, WHILE_HEAD_LABEL);
8682 8699
8683 8700 // Strings are equal up to min length. Return the length difference.
8684 8701 bind(LENGTH_DIFF_LABEL);
8685 8702 pop(result);
8686 8703 jmpb(DONE_LABEL);
8687 8704
8688 8705 // Discard the stored length difference
8689 8706 bind(POP_LABEL);
8690 8707 addptr(rsp, wordSize);
8691 8708
8692 8709 // That's it
8693 8710 bind(DONE_LABEL);
8694 8711 }
8695 8712
8696 8713 // Compare char[] arrays aligned to 4 bytes or substrings.
8697 8714 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
8698 8715 Register limit, Register result, Register chr,
8699 8716 XMMRegister vec1, XMMRegister vec2) {
8700 8717 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
8701 8718
8702 8719 int length_offset = arrayOopDesc::length_offset_in_bytes();
8703 8720 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
8704 8721
8705 8722 // Check the input args
8706 8723 cmpptr(ary1, ary2);
8707 8724 jcc(Assembler::equal, TRUE_LABEL);
8708 8725
8709 8726 if (is_array_equ) {
8710 8727 // Need additional checks for arrays_equals.
8711 8728 testptr(ary1, ary1);
8712 8729 jcc(Assembler::zero, FALSE_LABEL);
8713 8730 testptr(ary2, ary2);
8714 8731 jcc(Assembler::zero, FALSE_LABEL);
8715 8732
8716 8733 // Check the lengths
8717 8734 movl(limit, Address(ary1, length_offset));
8718 8735 cmpl(limit, Address(ary2, length_offset));
8719 8736 jcc(Assembler::notEqual, FALSE_LABEL);
8720 8737 }
8721 8738
8722 8739 // count == 0
8723 8740 testl(limit, limit);
8724 8741 jcc(Assembler::zero, TRUE_LABEL);
8725 8742
8726 8743 if (is_array_equ) {
8727 8744 // Load array address
8728 8745 lea(ary1, Address(ary1, base_offset));
8729 8746 lea(ary2, Address(ary2, base_offset));
8730 8747 }
8731 8748
8732 8749 shll(limit, 1); // byte count != 0
8733 8750 movl(result, limit); // copy
8734 8751
8735 8752 if (UseSSE42Intrinsics) {
8736 8753 // With SSE4.2, use double quad vector compare
8737 8754 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
8738 8755 // Compare 16-byte vectors
8739 8756 andl(result, 0x0000000e); // tail count (in bytes)
8740 8757 andl(limit, 0xfffffff0); // vector count (in bytes)
8741 8758 jccb(Assembler::zero, COMPARE_TAIL);
8742 8759
8743 8760 lea(ary1, Address(ary1, limit, Address::times_1));
8744 8761 lea(ary2, Address(ary2, limit, Address::times_1));
8745 8762 negptr(limit);
8746 8763
8747 8764 bind(COMPARE_WIDE_VECTORS);
8748 8765 movdqu(vec1, Address(ary1, limit, Address::times_1));
8749 8766 movdqu(vec2, Address(ary2, limit, Address::times_1));
8750 8767 pxor(vec1, vec2);
8751 8768 ptest(vec1, vec1);
8752 8769 jccb(Assembler::notZero, FALSE_LABEL);
8753 8770 addptr(limit, 16);
8754 8771 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
8755 8772
8756 8773 bind(COMPARE_TAIL); // limit is zero
8757 8774 movl(limit, result);
8758 8775 // Fallthru to tail compare
8759 8776 }
8760 8777
8761 8778 // Compare 4-byte vectors
8762 8779 andl(limit, 0xfffffffc); // vector count (in bytes)
8763 8780 jccb(Assembler::zero, COMPARE_CHAR);
8764 8781
8765 8782 lea(ary1, Address(ary1, limit, Address::times_1));
8766 8783 lea(ary2, Address(ary2, limit, Address::times_1));
8767 8784 negptr(limit);
8768 8785
8769 8786 bind(COMPARE_VECTORS);
8770 8787 movl(chr, Address(ary1, limit, Address::times_1));
8771 8788 cmpl(chr, Address(ary2, limit, Address::times_1));
8772 8789 jccb(Assembler::notEqual, FALSE_LABEL);
8773 8790 addptr(limit, 4);
8774 8791 jcc(Assembler::notZero, COMPARE_VECTORS);
8775 8792
8776 8793 // Compare trailing char (final 2 bytes), if any
8777 8794 bind(COMPARE_CHAR);
8778 8795 testl(result, 0x2); // tail char
8779 8796 jccb(Assembler::zero, TRUE_LABEL);
8780 8797 load_unsigned_short(chr, Address(ary1, 0));
8781 8798 load_unsigned_short(limit, Address(ary2, 0));
8782 8799 cmpl(chr, limit);
8783 8800 jccb(Assembler::notEqual, FALSE_LABEL);
8784 8801
8785 8802 bind(TRUE_LABEL);
8786 8803 movl(result, 1); // return true
8787 8804 jmpb(DONE);
8788 8805
8789 8806 bind(FALSE_LABEL);
8790 8807 xorl(result, result); // return false
8791 8808
8792 8809 // That's it
8793 8810 bind(DONE);
8794 8811 }
8795 8812
8796 8813 #ifdef PRODUCT
8797 8814 #define BLOCK_COMMENT(str) /* nothing */
8798 8815 #else
8799 8816 #define BLOCK_COMMENT(str) block_comment(str)
8800 8817 #endif
8801 8818
8802 8819 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
8803 8820 void MacroAssembler::generate_fill(BasicType t, bool aligned,
8804 8821 Register to, Register value, Register count,
8805 8822 Register rtmp, XMMRegister xtmp) {
8806 8823 assert_different_registers(to, value, count, rtmp);
8807 8824 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
8808 8825 Label L_fill_2_bytes, L_fill_4_bytes;
8809 8826
8810 8827 int shift = -1;
8811 8828 switch (t) {
8812 8829 case T_BYTE:
8813 8830 shift = 2;
8814 8831 break;
8815 8832 case T_SHORT:
8816 8833 shift = 1;
8817 8834 break;
8818 8835 case T_INT:
8819 8836 shift = 0;
8820 8837 break;
8821 8838 default: ShouldNotReachHere();
8822 8839 }
8823 8840
8824 8841 if (t == T_BYTE) {
8825 8842 andl(value, 0xff);
8826 8843 movl(rtmp, value);
8827 8844 shll(rtmp, 8);
8828 8845 orl(value, rtmp);
8829 8846 }
8830 8847 if (t == T_SHORT) {
8831 8848 andl(value, 0xffff);
8832 8849 }
8833 8850 if (t == T_BYTE || t == T_SHORT) {
8834 8851 movl(rtmp, value);
8835 8852 shll(rtmp, 16);
8836 8853 orl(value, rtmp);
8837 8854 }
8838 8855
8839 8856 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
8840 8857 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
8841 8858 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
8842 8859 // align source address at 4 bytes address boundary
8843 8860 if (t == T_BYTE) {
8844 8861 // One byte misalignment happens only for byte arrays
8845 8862 testptr(to, 1);
8846 8863 jccb(Assembler::zero, L_skip_align1);
8847 8864 movb(Address(to, 0), value);
8848 8865 increment(to);
8849 8866 decrement(count);
8850 8867 BIND(L_skip_align1);
8851 8868 }
8852 8869 // Two bytes misalignment happens only for byte and short (char) arrays
8853 8870 testptr(to, 2);
8854 8871 jccb(Assembler::zero, L_skip_align2);
8855 8872 movw(Address(to, 0), value);
8856 8873 addptr(to, 2);
8857 8874 subl(count, 1<<(shift-1));
8858 8875 BIND(L_skip_align2);
8859 8876 }
8860 8877 if (UseSSE < 2) {
8861 8878 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
8862 8879 // Fill 32-byte chunks
8863 8880 subl(count, 8 << shift);
8864 8881 jcc(Assembler::less, L_check_fill_8_bytes);
8865 8882 align(16);
8866 8883
8867 8884 BIND(L_fill_32_bytes_loop);
8868 8885
8869 8886 for (int i = 0; i < 32; i += 4) {
8870 8887 movl(Address(to, i), value);
8871 8888 }
8872 8889
8873 8890 addptr(to, 32);
8874 8891 subl(count, 8 << shift);
8875 8892 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
8876 8893 BIND(L_check_fill_8_bytes);
8877 8894 addl(count, 8 << shift);
8878 8895 jccb(Assembler::zero, L_exit);
8879 8896 jmpb(L_fill_8_bytes);
8880 8897
8881 8898 //
8882 8899 // length is too short, just fill qwords
8883 8900 //
8884 8901 BIND(L_fill_8_bytes_loop);
8885 8902 movl(Address(to, 0), value);
8886 8903 movl(Address(to, 4), value);
8887 8904 addptr(to, 8);
8888 8905 BIND(L_fill_8_bytes);
8889 8906 subl(count, 1 << (shift + 1));
8890 8907 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
8891 8908 // fall through to fill 4 bytes
8892 8909 } else {
8893 8910 Label L_fill_32_bytes;
8894 8911 if (!UseUnalignedLoadStores) {
8895 8912 // align to 8 bytes, we know we are 4 byte aligned to start
8896 8913 testptr(to, 4);
8897 8914 jccb(Assembler::zero, L_fill_32_bytes);
8898 8915 movl(Address(to, 0), value);
8899 8916 addptr(to, 4);
8900 8917 subl(count, 1<<shift);
8901 8918 }
8902 8919 BIND(L_fill_32_bytes);
8903 8920 {
8904 8921 assert( UseSSE >= 2, "supported cpu only" );
8905 8922 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
8906 8923 // Fill 32-byte chunks
8907 8924 movdl(xtmp, value);
8908 8925 pshufd(xtmp, xtmp, 0);
8909 8926
8910 8927 subl(count, 8 << shift);
8911 8928 jcc(Assembler::less, L_check_fill_8_bytes);
8912 8929 align(16);
8913 8930
8914 8931 BIND(L_fill_32_bytes_loop);
8915 8932
8916 8933 if (UseUnalignedLoadStores) {
8917 8934 movdqu(Address(to, 0), xtmp);
8918 8935 movdqu(Address(to, 16), xtmp);
8919 8936 } else {
8920 8937 movq(Address(to, 0), xtmp);
8921 8938 movq(Address(to, 8), xtmp);
8922 8939 movq(Address(to, 16), xtmp);
8923 8940 movq(Address(to, 24), xtmp);
8924 8941 }
8925 8942
8926 8943 addptr(to, 32);
8927 8944 subl(count, 8 << shift);
8928 8945 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
8929 8946 BIND(L_check_fill_8_bytes);
8930 8947 addl(count, 8 << shift);
8931 8948 jccb(Assembler::zero, L_exit);
8932 8949 jmpb(L_fill_8_bytes);
8933 8950
8934 8951 //
8935 8952 // length is too short, just fill qwords
8936 8953 //
8937 8954 BIND(L_fill_8_bytes_loop);
8938 8955 movq(Address(to, 0), xtmp);
8939 8956 addptr(to, 8);
8940 8957 BIND(L_fill_8_bytes);
8941 8958 subl(count, 1 << (shift + 1));
8942 8959 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
8943 8960 }
8944 8961 }
8945 8962 // fill trailing 4 bytes
8946 8963 BIND(L_fill_4_bytes);
8947 8964 testl(count, 1<<shift);
8948 8965 jccb(Assembler::zero, L_fill_2_bytes);
8949 8966 movl(Address(to, 0), value);
8950 8967 if (t == T_BYTE || t == T_SHORT) {
8951 8968 addptr(to, 4);
8952 8969 BIND(L_fill_2_bytes);
8953 8970 // fill trailing 2 bytes
8954 8971 testl(count, 1<<(shift-1));
8955 8972 jccb(Assembler::zero, L_fill_byte);
8956 8973 movw(Address(to, 0), value);
8957 8974 if (t == T_BYTE) {
8958 8975 addptr(to, 2);
8959 8976 BIND(L_fill_byte);
8960 8977 // fill trailing byte
8961 8978 testl(count, 1);
8962 8979 jccb(Assembler::zero, L_exit);
8963 8980 movb(Address(to, 0), value);
8964 8981 } else {
8965 8982 BIND(L_fill_byte);
8966 8983 }
8967 8984 } else {
8968 8985 BIND(L_fill_2_bytes);
8969 8986 }
8970 8987 BIND(L_exit);
8971 8988 }
8972 8989 #undef BIND
8973 8990 #undef BLOCK_COMMENT
8974 8991
8975 8992
8976 8993 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8977 8994 switch (cond) {
8978 8995 // Note some conditions are synonyms for others
8979 8996 case Assembler::zero: return Assembler::notZero;
8980 8997 case Assembler::notZero: return Assembler::zero;
8981 8998 case Assembler::less: return Assembler::greaterEqual;
8982 8999 case Assembler::lessEqual: return Assembler::greater;
8983 9000 case Assembler::greater: return Assembler::lessEqual;
8984 9001 case Assembler::greaterEqual: return Assembler::less;
8985 9002 case Assembler::below: return Assembler::aboveEqual;
8986 9003 case Assembler::belowEqual: return Assembler::above;
8987 9004 case Assembler::above: return Assembler::belowEqual;
8988 9005 case Assembler::aboveEqual: return Assembler::below;
8989 9006 case Assembler::overflow: return Assembler::noOverflow;
8990 9007 case Assembler::noOverflow: return Assembler::overflow;
8991 9008 case Assembler::negative: return Assembler::positive;
8992 9009 case Assembler::positive: return Assembler::negative;
8993 9010 case Assembler::parity: return Assembler::noParity;
8994 9011 case Assembler::noParity: return Assembler::parity;
8995 9012 }
8996 9013 ShouldNotReachHere(); return Assembler::overflow;
8997 9014 }
8998 9015
8999 9016 SkipIfEqual::SkipIfEqual(
9000 9017 MacroAssembler* masm, const bool* flag_addr, bool value) {
9001 9018 _masm = masm;
9002 9019 _masm->cmp8(ExternalAddress((address)flag_addr), value);
9003 9020 _masm->jcc(Assembler::equal, _label);
9004 9021 }
9005 9022
9006 9023 SkipIfEqual::~SkipIfEqual() {
9007 9024 _masm->bind(_label);
9008 9025 }
↓ open down ↓ |
4079 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX