Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/src/cpu/x86/vm/assembler_x86.cpp
+++ new/src/cpu/x86/vm/assembler_x86.cpp
1 1 /*
2 - * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
2 + * Copyright 1997-2010 Sun Microsystems, Inc. All Rights Reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 21 * have any questions.
22 22 *
23 23 */
24 24
25 25 #include "incls/_precompiled.incl"
26 26 #include "incls/_assembler_x86.cpp.incl"
27 27
28 28 // Implementation of AddressLiteral
29 29
30 30 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
31 31 _is_lval = false;
32 32 _target = target;
33 33 switch (rtype) {
34 34 case relocInfo::oop_type:
35 35 // Oops are a special case. Normally they would be their own section
36 36 // but in cases like icBuffer they are literals in the code stream that
37 37 // we don't have a section for. We use none so that we get a literal address
38 38 // which is always patchable.
39 39 break;
40 40 case relocInfo::external_word_type:
41 41 _rspec = external_word_Relocation::spec(target);
42 42 break;
43 43 case relocInfo::internal_word_type:
44 44 _rspec = internal_word_Relocation::spec(target);
45 45 break;
46 46 case relocInfo::opt_virtual_call_type:
47 47 _rspec = opt_virtual_call_Relocation::spec();
48 48 break;
49 49 case relocInfo::static_call_type:
50 50 _rspec = static_call_Relocation::spec();
51 51 break;
52 52 case relocInfo::runtime_call_type:
53 53 _rspec = runtime_call_Relocation::spec();
54 54 break;
55 55 case relocInfo::poll_type:
56 56 case relocInfo::poll_return_type:
57 57 _rspec = Relocation::spec_simple(rtype);
58 58 break;
59 59 case relocInfo::none:
60 60 break;
61 61 default:
62 62 ShouldNotReachHere();
63 63 break;
64 64 }
65 65 }
66 66
67 67 // Implementation of Address
68 68
69 69 #ifdef _LP64
70 70
71 71 Address Address::make_array(ArrayAddress adr) {
72 72 // Not implementable on 64bit machines
73 73 // Should have been handled higher up the call chain.
74 74 ShouldNotReachHere();
75 75 return Address();
76 76 }
77 77
78 78 // exceedingly dangerous constructor
79 79 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
80 80 _base = noreg;
81 81 _index = noreg;
82 82 _scale = no_scale;
83 83 _disp = disp;
84 84 switch (rtype) {
85 85 case relocInfo::external_word_type:
86 86 _rspec = external_word_Relocation::spec(loc);
87 87 break;
88 88 case relocInfo::internal_word_type:
89 89 _rspec = internal_word_Relocation::spec(loc);
90 90 break;
91 91 case relocInfo::runtime_call_type:
92 92 // HMM
93 93 _rspec = runtime_call_Relocation::spec();
94 94 break;
95 95 case relocInfo::poll_type:
96 96 case relocInfo::poll_return_type:
97 97 _rspec = Relocation::spec_simple(rtype);
98 98 break;
99 99 case relocInfo::none:
100 100 break;
101 101 default:
102 102 ShouldNotReachHere();
103 103 }
104 104 }
105 105 #else // LP64
106 106
107 107 Address Address::make_array(ArrayAddress adr) {
108 108 AddressLiteral base = adr.base();
109 109 Address index = adr.index();
110 110 assert(index._disp == 0, "must not have disp"); // maybe it can?
111 111 Address array(index._base, index._index, index._scale, (intptr_t) base.target());
112 112 array._rspec = base._rspec;
113 113 return array;
114 114 }
115 115
116 116 // exceedingly dangerous constructor
117 117 Address::Address(address loc, RelocationHolder spec) {
118 118 _base = noreg;
119 119 _index = noreg;
120 120 _scale = no_scale;
121 121 _disp = (intptr_t) loc;
122 122 _rspec = spec;
123 123 }
124 124
125 125 #endif // _LP64
126 126
127 127
128 128
129 129 // Convert the raw encoding form into the form expected by the constructor for
130 130 // Address. An index of 4 (rsp) corresponds to having no index, so convert
131 131 // that to noreg for the Address constructor.
132 132 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
133 133 RelocationHolder rspec;
134 134 if (disp_is_oop) {
135 135 rspec = Relocation::spec_simple(relocInfo::oop_type);
136 136 }
137 137 bool valid_index = index != rsp->encoding();
138 138 if (valid_index) {
139 139 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
140 140 madr._rspec = rspec;
141 141 return madr;
142 142 } else {
143 143 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
144 144 madr._rspec = rspec;
145 145 return madr;
146 146 }
147 147 }
148 148
149 149 // Implementation of Assembler
150 150
151 151 int AbstractAssembler::code_fill_byte() {
152 152 return (u_char)'\xF4'; // hlt
153 153 }
154 154
155 155 // make this go away someday
156 156 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
157 157 if (rtype == relocInfo::none)
158 158 emit_long(data);
159 159 else emit_data(data, Relocation::spec_simple(rtype), format);
160 160 }
161 161
162 162 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
163 163 assert(imm_operand == 0, "default format must be immediate in this file");
164 164 assert(inst_mark() != NULL, "must be inside InstructionMark");
165 165 if (rspec.type() != relocInfo::none) {
166 166 #ifdef ASSERT
167 167 check_relocation(rspec, format);
168 168 #endif
169 169 // Do not use AbstractAssembler::relocate, which is not intended for
170 170 // embedded words. Instead, relocate to the enclosing instruction.
171 171
172 172 // hack. call32 is too wide for mask so use disp32
173 173 if (format == call32_operand)
174 174 code_section()->relocate(inst_mark(), rspec, disp32_operand);
175 175 else
176 176 code_section()->relocate(inst_mark(), rspec, format);
177 177 }
178 178 emit_long(data);
179 179 }
180 180
181 181 static int encode(Register r) {
182 182 int enc = r->encoding();
183 183 if (enc >= 8) {
184 184 enc -= 8;
185 185 }
186 186 return enc;
187 187 }
188 188
189 189 static int encode(XMMRegister r) {
190 190 int enc = r->encoding();
191 191 if (enc >= 8) {
192 192 enc -= 8;
193 193 }
194 194 return enc;
195 195 }
196 196
197 197 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
198 198 assert(dst->has_byte_register(), "must have byte register");
199 199 assert(isByte(op1) && isByte(op2), "wrong opcode");
200 200 assert(isByte(imm8), "not a byte");
201 201 assert((op1 & 0x01) == 0, "should be 8bit operation");
202 202 emit_byte(op1);
203 203 emit_byte(op2 | encode(dst));
204 204 emit_byte(imm8);
205 205 }
206 206
207 207
208 208 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
209 209 assert(isByte(op1) && isByte(op2), "wrong opcode");
210 210 assert((op1 & 0x01) == 1, "should be 32bit operation");
211 211 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
212 212 if (is8bit(imm32)) {
213 213 emit_byte(op1 | 0x02); // set sign bit
214 214 emit_byte(op2 | encode(dst));
215 215 emit_byte(imm32 & 0xFF);
216 216 } else {
217 217 emit_byte(op1);
218 218 emit_byte(op2 | encode(dst));
219 219 emit_long(imm32);
220 220 }
221 221 }
222 222
223 223 // immediate-to-memory forms
224 224 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
225 225 assert((op1 & 0x01) == 1, "should be 32bit operation");
226 226 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
227 227 if (is8bit(imm32)) {
228 228 emit_byte(op1 | 0x02); // set sign bit
229 229 emit_operand(rm, adr, 1);
230 230 emit_byte(imm32 & 0xFF);
231 231 } else {
232 232 emit_byte(op1);
233 233 emit_operand(rm, adr, 4);
234 234 emit_long(imm32);
235 235 }
236 236 }
237 237
238 238 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
239 239 LP64_ONLY(ShouldNotReachHere());
240 240 assert(isByte(op1) && isByte(op2), "wrong opcode");
241 241 assert((op1 & 0x01) == 1, "should be 32bit operation");
242 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
243 243 InstructionMark im(this);
244 244 emit_byte(op1);
245 245 emit_byte(op2 | encode(dst));
246 246 emit_data((intptr_t)obj, relocInfo::oop_type, 0);
247 247 }
248 248
249 249
250 250 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
251 251 assert(isByte(op1) && isByte(op2), "wrong opcode");
252 252 emit_byte(op1);
253 253 emit_byte(op2 | encode(dst) << 3 | encode(src));
254 254 }
255 255
256 256
257 257 void Assembler::emit_operand(Register reg, Register base, Register index,
258 258 Address::ScaleFactor scale, int disp,
259 259 RelocationHolder const& rspec,
260 260 int rip_relative_correction) {
261 261 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
262 262
263 263 // Encode the registers as needed in the fields they are used in
264 264
265 265 int regenc = encode(reg) << 3;
266 266 int indexenc = index->is_valid() ? encode(index) << 3 : 0;
267 267 int baseenc = base->is_valid() ? encode(base) : 0;
268 268
269 269 if (base->is_valid()) {
270 270 if (index->is_valid()) {
271 271 assert(scale != Address::no_scale, "inconsistent address");
272 272 // [base + index*scale + disp]
273 273 if (disp == 0 && rtype == relocInfo::none &&
274 274 base != rbp LP64_ONLY(&& base != r13)) {
275 275 // [base + index*scale]
276 276 // [00 reg 100][ss index base]
277 277 assert(index != rsp, "illegal addressing mode");
278 278 emit_byte(0x04 | regenc);
279 279 emit_byte(scale << 6 | indexenc | baseenc);
280 280 } else if (is8bit(disp) && rtype == relocInfo::none) {
281 281 // [base + index*scale + imm8]
282 282 // [01 reg 100][ss index base] imm8
283 283 assert(index != rsp, "illegal addressing mode");
284 284 emit_byte(0x44 | regenc);
285 285 emit_byte(scale << 6 | indexenc | baseenc);
286 286 emit_byte(disp & 0xFF);
287 287 } else {
288 288 // [base + index*scale + disp32]
289 289 // [10 reg 100][ss index base] disp32
290 290 assert(index != rsp, "illegal addressing mode");
291 291 emit_byte(0x84 | regenc);
292 292 emit_byte(scale << 6 | indexenc | baseenc);
293 293 emit_data(disp, rspec, disp32_operand);
294 294 }
295 295 } else if (base == rsp LP64_ONLY(|| base == r12)) {
296 296 // [rsp + disp]
297 297 if (disp == 0 && rtype == relocInfo::none) {
298 298 // [rsp]
299 299 // [00 reg 100][00 100 100]
300 300 emit_byte(0x04 | regenc);
301 301 emit_byte(0x24);
302 302 } else if (is8bit(disp) && rtype == relocInfo::none) {
303 303 // [rsp + imm8]
304 304 // [01 reg 100][00 100 100] disp8
305 305 emit_byte(0x44 | regenc);
306 306 emit_byte(0x24);
307 307 emit_byte(disp & 0xFF);
308 308 } else {
309 309 // [rsp + imm32]
310 310 // [10 reg 100][00 100 100] disp32
311 311 emit_byte(0x84 | regenc);
312 312 emit_byte(0x24);
313 313 emit_data(disp, rspec, disp32_operand);
314 314 }
315 315 } else {
316 316 // [base + disp]
317 317 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
318 318 if (disp == 0 && rtype == relocInfo::none &&
319 319 base != rbp LP64_ONLY(&& base != r13)) {
320 320 // [base]
321 321 // [00 reg base]
322 322 emit_byte(0x00 | regenc | baseenc);
323 323 } else if (is8bit(disp) && rtype == relocInfo::none) {
324 324 // [base + disp8]
325 325 // [01 reg base] disp8
326 326 emit_byte(0x40 | regenc | baseenc);
327 327 emit_byte(disp & 0xFF);
328 328 } else {
329 329 // [base + disp32]
330 330 // [10 reg base] disp32
331 331 emit_byte(0x80 | regenc | baseenc);
332 332 emit_data(disp, rspec, disp32_operand);
333 333 }
334 334 }
335 335 } else {
336 336 if (index->is_valid()) {
337 337 assert(scale != Address::no_scale, "inconsistent address");
338 338 // [index*scale + disp]
339 339 // [00 reg 100][ss index 101] disp32
340 340 assert(index != rsp, "illegal addressing mode");
341 341 emit_byte(0x04 | regenc);
342 342 emit_byte(scale << 6 | indexenc | 0x05);
343 343 emit_data(disp, rspec, disp32_operand);
344 344 } else if (rtype != relocInfo::none ) {
345 345 // [disp] (64bit) RIP-RELATIVE (32bit) abs
346 346 // [00 000 101] disp32
347 347
348 348 emit_byte(0x05 | regenc);
349 349 // Note that the RIP-rel. correction applies to the generated
350 350 // disp field, but _not_ to the target address in the rspec.
351 351
352 352 // disp was created by converting the target address minus the pc
353 353 // at the start of the instruction. That needs more correction here.
354 354 // intptr_t disp = target - next_ip;
355 355 assert(inst_mark() != NULL, "must be inside InstructionMark");
356 356 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
357 357 int64_t adjusted = disp;
358 358 // Do rip-rel adjustment for 64bit
359 359 LP64_ONLY(adjusted -= (next_ip - inst_mark()));
360 360 assert(is_simm32(adjusted),
361 361 "must be 32bit offset (RIP relative address)");
362 362 emit_data((int32_t) adjusted, rspec, disp32_operand);
363 363
364 364 } else {
365 365 // 32bit never did this, did everything as the rip-rel/disp code above
366 366 // [disp] ABSOLUTE
367 367 // [00 reg 100][00 100 101] disp32
368 368 emit_byte(0x04 | regenc);
369 369 emit_byte(0x25);
370 370 emit_data(disp, rspec, disp32_operand);
371 371 }
372 372 }
373 373 }
374 374
375 375 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
376 376 Address::ScaleFactor scale, int disp,
377 377 RelocationHolder const& rspec) {
378 378 emit_operand((Register)reg, base, index, scale, disp, rspec);
379 379 }
380 380
381 381 // Secret local extension to Assembler::WhichOperand:
382 382 #define end_pc_operand (_WhichOperand_limit)
383 383
384 384 address Assembler::locate_operand(address inst, WhichOperand which) {
385 385 // Decode the given instruction, and return the address of
386 386 // an embedded 32-bit operand word.
387 387
388 388 // If "which" is disp32_operand, selects the displacement portion
389 389 // of an effective address specifier.
390 390 // If "which" is imm64_operand, selects the trailing immediate constant.
391 391 // If "which" is call32_operand, selects the displacement of a call or jump.
392 392 // Caller is responsible for ensuring that there is such an operand,
393 393 // and that it is 32/64 bits wide.
394 394
395 395 // If "which" is end_pc_operand, find the end of the instruction.
396 396
397 397 address ip = inst;
398 398 bool is_64bit = false;
399 399
400 400 debug_only(bool has_disp32 = false);
401 401 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
402 402
403 403 again_after_prefix:
404 404 switch (0xFF & *ip++) {
405 405
406 406 // These convenience macros generate groups of "case" labels for the switch.
407 407 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
408 408 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
409 409 case (x)+4: case (x)+5: case (x)+6: case (x)+7
410 410 #define REP16(x) REP8((x)+0): \
411 411 case REP8((x)+8)
412 412
413 413 case CS_segment:
414 414 case SS_segment:
415 415 case DS_segment:
416 416 case ES_segment:
417 417 case FS_segment:
418 418 case GS_segment:
419 419 // Seems dubious
420 420 LP64_ONLY(assert(false, "shouldn't have that prefix"));
421 421 assert(ip == inst+1, "only one prefix allowed");
422 422 goto again_after_prefix;
423 423
424 424 case 0x67:
425 425 case REX:
426 426 case REX_B:
427 427 case REX_X:
428 428 case REX_XB:
429 429 case REX_R:
430 430 case REX_RB:
431 431 case REX_RX:
432 432 case REX_RXB:
433 433 NOT_LP64(assert(false, "64bit prefixes"));
434 434 goto again_after_prefix;
435 435
436 436 case REX_W:
437 437 case REX_WB:
438 438 case REX_WX:
439 439 case REX_WXB:
440 440 case REX_WR:
441 441 case REX_WRB:
442 442 case REX_WRX:
443 443 case REX_WRXB:
444 444 NOT_LP64(assert(false, "64bit prefixes"));
445 445 is_64bit = true;
446 446 goto again_after_prefix;
447 447
448 448 case 0xFF: // pushq a; decl a; incl a; call a; jmp a
449 449 case 0x88: // movb a, r
450 450 case 0x89: // movl a, r
451 451 case 0x8A: // movb r, a
452 452 case 0x8B: // movl r, a
453 453 case 0x8F: // popl a
454 454 debug_only(has_disp32 = true);
455 455 break;
456 456
457 457 case 0x68: // pushq #32
458 458 if (which == end_pc_operand) {
459 459 return ip + 4;
460 460 }
461 461 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
462 462 return ip; // not produced by emit_operand
463 463
464 464 case 0x66: // movw ... (size prefix)
465 465 again_after_size_prefix2:
466 466 switch (0xFF & *ip++) {
467 467 case REX:
468 468 case REX_B:
469 469 case REX_X:
470 470 case REX_XB:
471 471 case REX_R:
472 472 case REX_RB:
473 473 case REX_RX:
474 474 case REX_RXB:
475 475 case REX_W:
476 476 case REX_WB:
477 477 case REX_WX:
478 478 case REX_WXB:
479 479 case REX_WR:
480 480 case REX_WRB:
481 481 case REX_WRX:
482 482 case REX_WRXB:
483 483 NOT_LP64(assert(false, "64bit prefix found"));
484 484 goto again_after_size_prefix2;
485 485 case 0x8B: // movw r, a
486 486 case 0x89: // movw a, r
487 487 debug_only(has_disp32 = true);
488 488 break;
489 489 case 0xC7: // movw a, #16
490 490 debug_only(has_disp32 = true);
491 491 tail_size = 2; // the imm16
492 492 break;
493 493 case 0x0F: // several SSE/SSE2 variants
494 494 ip--; // reparse the 0x0F
495 495 goto again_after_prefix;
496 496 default:
497 497 ShouldNotReachHere();
498 498 }
499 499 break;
500 500
501 501 case REP8(0xB8): // movl/q r, #32/#64(oop?)
502 502 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4);
503 503 // these asserts are somewhat nonsensical
504 504 #ifndef _LP64
505 505 assert(which == imm_operand || which == disp32_operand, "");
506 506 #else
507 507 assert((which == call32_operand || which == imm_operand) && is_64bit ||
508 508 which == narrow_oop_operand && !is_64bit, "");
509 509 #endif // _LP64
510 510 return ip;
511 511
512 512 case 0x69: // imul r, a, #32
513 513 case 0xC7: // movl a, #32(oop?)
514 514 tail_size = 4;
515 515 debug_only(has_disp32 = true); // has both kinds of operands!
516 516 break;
517 517
518 518 case 0x0F: // movx..., etc.
519 519 switch (0xFF & *ip++) {
520 520 case 0x12: // movlps
521 521 case 0x28: // movaps
522 522 case 0x2E: // ucomiss
523 523 case 0x2F: // comiss
524 524 case 0x54: // andps
525 525 case 0x55: // andnps
526 526 case 0x56: // orps
527 527 case 0x57: // xorps
528 528 case 0x6E: // movd
529 529 case 0x7E: // movd
530 530 case 0xAE: // ldmxcsr a
531 531 // 64bit side says it these have both operands but that doesn't
532 532 // appear to be true
533 533 debug_only(has_disp32 = true);
534 534 break;
535 535
536 536 case 0xAD: // shrd r, a, %cl
537 537 case 0xAF: // imul r, a
538 538 case 0xBE: // movsbl r, a (movsxb)
539 539 case 0xBF: // movswl r, a (movsxw)
540 540 case 0xB6: // movzbl r, a (movzxb)
541 541 case 0xB7: // movzwl r, a (movzxw)
542 542 case REP16(0x40): // cmovl cc, r, a
543 543 case 0xB0: // cmpxchgb
544 544 case 0xB1: // cmpxchg
545 545 case 0xC1: // xaddl
546 546 case 0xC7: // cmpxchg8
547 547 case REP16(0x90): // setcc a
548 548 debug_only(has_disp32 = true);
549 549 // fall out of the switch to decode the address
550 550 break;
551 551
552 552 case 0xAC: // shrd r, a, #8
553 553 debug_only(has_disp32 = true);
554 554 tail_size = 1; // the imm8
555 555 break;
556 556
557 557 case REP16(0x80): // jcc rdisp32
558 558 if (which == end_pc_operand) return ip + 4;
559 559 assert(which == call32_operand, "jcc has no disp32 or imm");
560 560 return ip;
561 561 default:
562 562 ShouldNotReachHere();
563 563 }
564 564 break;
565 565
566 566 case 0x81: // addl a, #32; addl r, #32
567 567 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
568 568 // on 32bit in the case of cmpl, the imm might be an oop
569 569 tail_size = 4;
570 570 debug_only(has_disp32 = true); // has both kinds of operands!
571 571 break;
572 572
573 573 case 0x83: // addl a, #8; addl r, #8
574 574 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
575 575 debug_only(has_disp32 = true); // has both kinds of operands!
576 576 tail_size = 1;
577 577 break;
578 578
579 579 case 0x9B:
580 580 switch (0xFF & *ip++) {
581 581 case 0xD9: // fnstcw a
582 582 debug_only(has_disp32 = true);
583 583 break;
584 584 default:
585 585 ShouldNotReachHere();
586 586 }
587 587 break;
588 588
589 589 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
590 590 case REP4(0x10): // adc...
591 591 case REP4(0x20): // and...
592 592 case REP4(0x30): // xor...
593 593 case REP4(0x08): // or...
594 594 case REP4(0x18): // sbb...
595 595 case REP4(0x28): // sub...
596 596 case 0xF7: // mull a
597 597 case 0x8D: // lea r, a
598 598 case 0x87: // xchg r, a
599 599 case REP4(0x38): // cmp...
600 600 case 0x85: // test r, a
601 601 debug_only(has_disp32 = true); // has both kinds of operands!
602 602 break;
603 603
604 604 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
605 605 case 0xC6: // movb a, #8
606 606 case 0x80: // cmpb a, #8
607 607 case 0x6B: // imul r, a, #8
608 608 debug_only(has_disp32 = true); // has both kinds of operands!
609 609 tail_size = 1; // the imm8
610 610 break;
611 611
612 612 case 0xE8: // call rdisp32
613 613 case 0xE9: // jmp rdisp32
614 614 if (which == end_pc_operand) return ip + 4;
615 615 assert(which == call32_operand, "call has no disp32 or imm");
616 616 return ip;
617 617
618 618 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
619 619 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
620 620 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
621 621 case 0xDD: // fld_d a; fst_d a; fstp_d a
622 622 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
623 623 case 0xDF: // fild_d a; fistp_d a
624 624 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
625 625 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
626 626 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
627 627 debug_only(has_disp32 = true);
628 628 break;
629 629
630 630 case 0xF0: // Lock
631 631 assert(os::is_MP(), "only on MP");
632 632 goto again_after_prefix;
633 633
634 634 case 0xF3: // For SSE
635 635 case 0xF2: // For SSE2
636 636 switch (0xFF & *ip++) {
637 637 case REX:
638 638 case REX_B:
639 639 case REX_X:
640 640 case REX_XB:
641 641 case REX_R:
642 642 case REX_RB:
643 643 case REX_RX:
644 644 case REX_RXB:
645 645 case REX_W:
646 646 case REX_WB:
647 647 case REX_WX:
648 648 case REX_WXB:
649 649 case REX_WR:
650 650 case REX_WRB:
651 651 case REX_WRX:
652 652 case REX_WRXB:
653 653 NOT_LP64(assert(false, "found 64bit prefix"));
654 654 ip++;
655 655 default:
656 656 ip++;
657 657 }
658 658 debug_only(has_disp32 = true); // has both kinds of operands!
659 659 break;
660 660
661 661 default:
662 662 ShouldNotReachHere();
663 663
664 664 #undef REP8
665 665 #undef REP16
666 666 }
667 667
668 668 assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
669 669 #ifdef _LP64
670 670 assert(which != imm_operand, "instruction is not a movq reg, imm64");
671 671 #else
672 672 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
673 673 assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
674 674 #endif // LP64
675 675 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
676 676
677 677 // parse the output of emit_operand
678 678 int op2 = 0xFF & *ip++;
679 679 int base = op2 & 0x07;
680 680 int op3 = -1;
681 681 const int b100 = 4;
682 682 const int b101 = 5;
683 683 if (base == b100 && (op2 >> 6) != 3) {
684 684 op3 = 0xFF & *ip++;
685 685 base = op3 & 0x07; // refetch the base
686 686 }
687 687 // now ip points at the disp (if any)
688 688
689 689 switch (op2 >> 6) {
690 690 case 0:
691 691 // [00 reg 100][ss index base]
692 692 // [00 reg 100][00 100 esp]
693 693 // [00 reg base]
694 694 // [00 reg 100][ss index 101][disp32]
695 695 // [00 reg 101] [disp32]
696 696
697 697 if (base == b101) {
698 698 if (which == disp32_operand)
699 699 return ip; // caller wants the disp32
700 700 ip += 4; // skip the disp32
701 701 }
702 702 break;
703 703
704 704 case 1:
705 705 // [01 reg 100][ss index base][disp8]
706 706 // [01 reg 100][00 100 esp][disp8]
707 707 // [01 reg base] [disp8]
708 708 ip += 1; // skip the disp8
709 709 break;
710 710
711 711 case 2:
712 712 // [10 reg 100][ss index base][disp32]
713 713 // [10 reg 100][00 100 esp][disp32]
714 714 // [10 reg base] [disp32]
715 715 if (which == disp32_operand)
716 716 return ip; // caller wants the disp32
717 717 ip += 4; // skip the disp32
718 718 break;
719 719
720 720 case 3:
721 721 // [11 reg base] (not a memory addressing mode)
722 722 break;
723 723 }
724 724
725 725 if (which == end_pc_operand) {
726 726 return ip + tail_size;
727 727 }
728 728
729 729 #ifdef _LP64
730 730 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
731 731 #else
732 732 assert(which == imm_operand, "instruction has only an imm field");
733 733 #endif // LP64
734 734 return ip;
735 735 }
736 736
737 737 address Assembler::locate_next_instruction(address inst) {
738 738 // Secretly share code with locate_operand:
739 739 return locate_operand(inst, end_pc_operand);
740 740 }
741 741
742 742
743 743 #ifdef ASSERT
744 744 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
745 745 address inst = inst_mark();
746 746 assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
747 747 address opnd;
748 748
749 749 Relocation* r = rspec.reloc();
750 750 if (r->type() == relocInfo::none) {
751 751 return;
752 752 } else if (r->is_call() || format == call32_operand) {
753 753 // assert(format == imm32_operand, "cannot specify a nonzero format");
754 754 opnd = locate_operand(inst, call32_operand);
755 755 } else if (r->is_data()) {
756 756 assert(format == imm_operand || format == disp32_operand
757 757 LP64_ONLY(|| format == narrow_oop_operand), "format ok");
758 758 opnd = locate_operand(inst, (WhichOperand)format);
759 759 } else {
760 760 assert(format == imm_operand, "cannot specify a format");
761 761 return;
762 762 }
763 763 assert(opnd == pc(), "must put operand where relocs can find it");
764 764 }
765 765 #endif // ASSERT
766 766
767 767 void Assembler::emit_operand32(Register reg, Address adr) {
768 768 assert(reg->encoding() < 8, "no extended registers");
769 769 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
770 770 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
771 771 adr._rspec);
772 772 }
773 773
774 774 void Assembler::emit_operand(Register reg, Address adr,
775 775 int rip_relative_correction) {
776 776 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
777 777 adr._rspec,
778 778 rip_relative_correction);
779 779 }
780 780
781 781 void Assembler::emit_operand(XMMRegister reg, Address adr) {
782 782 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
783 783 adr._rspec);
784 784 }
785 785
786 786 // MMX operations
787 787 void Assembler::emit_operand(MMXRegister reg, Address adr) {
788 788 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
789 789 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
790 790 }
791 791
792 792 // work around gcc (3.2.1-7a) bug
793 793 void Assembler::emit_operand(Address adr, MMXRegister reg) {
794 794 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
795 795 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
796 796 }
797 797
798 798
799 799 void Assembler::emit_farith(int b1, int b2, int i) {
800 800 assert(isByte(b1) && isByte(b2), "wrong opcode");
801 801 assert(0 <= i && i < 8, "illegal stack offset");
802 802 emit_byte(b1);
803 803 emit_byte(b2 + i);
804 804 }
805 805
806 806
807 807 // Now the Assembler instruction (identical for 32/64 bits)
808 808
809 809 void Assembler::adcl(Register dst, int32_t imm32) {
810 810 prefix(dst);
811 811 emit_arith(0x81, 0xD0, dst, imm32);
812 812 }
813 813
814 814 void Assembler::adcl(Register dst, Address src) {
815 815 InstructionMark im(this);
816 816 prefix(src, dst);
817 817 emit_byte(0x13);
818 818 emit_operand(dst, src);
819 819 }
820 820
821 821 void Assembler::adcl(Register dst, Register src) {
822 822 (void) prefix_and_encode(dst->encoding(), src->encoding());
823 823 emit_arith(0x13, 0xC0, dst, src);
824 824 }
825 825
826 826 void Assembler::addl(Address dst, int32_t imm32) {
827 827 InstructionMark im(this);
828 828 prefix(dst);
829 829 emit_arith_operand(0x81, rax, dst, imm32);
830 830 }
831 831
832 832 void Assembler::addl(Address dst, Register src) {
833 833 InstructionMark im(this);
834 834 prefix(dst, src);
835 835 emit_byte(0x01);
836 836 emit_operand(src, dst);
837 837 }
838 838
839 839 void Assembler::addl(Register dst, int32_t imm32) {
840 840 prefix(dst);
841 841 emit_arith(0x81, 0xC0, dst, imm32);
842 842 }
843 843
844 844 void Assembler::addl(Register dst, Address src) {
845 845 InstructionMark im(this);
846 846 prefix(src, dst);
847 847 emit_byte(0x03);
848 848 emit_operand(dst, src);
849 849 }
850 850
851 851 void Assembler::addl(Register dst, Register src) {
852 852 (void) prefix_and_encode(dst->encoding(), src->encoding());
853 853 emit_arith(0x03, 0xC0, dst, src);
854 854 }
855 855
856 856 void Assembler::addr_nop_4() {
857 857 // 4 bytes: NOP DWORD PTR [EAX+0]
858 858 emit_byte(0x0F);
859 859 emit_byte(0x1F);
860 860 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
861 861 emit_byte(0); // 8-bits offset (1 byte)
862 862 }
863 863
864 864 void Assembler::addr_nop_5() {
865 865 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
866 866 emit_byte(0x0F);
867 867 emit_byte(0x1F);
868 868 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
869 869 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
870 870 emit_byte(0); // 8-bits offset (1 byte)
871 871 }
872 872
873 873 void Assembler::addr_nop_7() {
874 874 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
875 875 emit_byte(0x0F);
876 876 emit_byte(0x1F);
877 877 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
878 878 emit_long(0); // 32-bits offset (4 bytes)
879 879 }
880 880
881 881 void Assembler::addr_nop_8() {
882 882 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
883 883 emit_byte(0x0F);
884 884 emit_byte(0x1F);
885 885 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
886 886 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
887 887 emit_long(0); // 32-bits offset (4 bytes)
888 888 }
889 889
890 890 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
891 891 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
892 892 emit_byte(0xF2);
893 893 int encode = prefix_and_encode(dst->encoding(), src->encoding());
894 894 emit_byte(0x0F);
895 895 emit_byte(0x58);
896 896 emit_byte(0xC0 | encode);
897 897 }
898 898
899 899 void Assembler::addsd(XMMRegister dst, Address src) {
900 900 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
901 901 InstructionMark im(this);
902 902 emit_byte(0xF2);
903 903 prefix(src, dst);
904 904 emit_byte(0x0F);
905 905 emit_byte(0x58);
906 906 emit_operand(dst, src);
907 907 }
908 908
909 909 void Assembler::addss(XMMRegister dst, XMMRegister src) {
910 910 NOT_LP64(assert(VM_Version::supports_sse(), ""));
911 911 emit_byte(0xF3);
912 912 int encode = prefix_and_encode(dst->encoding(), src->encoding());
913 913 emit_byte(0x0F);
914 914 emit_byte(0x58);
915 915 emit_byte(0xC0 | encode);
916 916 }
917 917
918 918 void Assembler::addss(XMMRegister dst, Address src) {
919 919 NOT_LP64(assert(VM_Version::supports_sse(), ""));
920 920 InstructionMark im(this);
921 921 emit_byte(0xF3);
922 922 prefix(src, dst);
923 923 emit_byte(0x0F);
924 924 emit_byte(0x58);
925 925 emit_operand(dst, src);
926 926 }
927 927
928 928 void Assembler::andl(Register dst, int32_t imm32) {
929 929 prefix(dst);
930 930 emit_arith(0x81, 0xE0, dst, imm32);
931 931 }
932 932
933 933 void Assembler::andl(Register dst, Address src) {
934 934 InstructionMark im(this);
935 935 prefix(src, dst);
936 936 emit_byte(0x23);
937 937 emit_operand(dst, src);
938 938 }
939 939
940 940 void Assembler::andl(Register dst, Register src) {
941 941 (void) prefix_and_encode(dst->encoding(), src->encoding());
942 942 emit_arith(0x23, 0xC0, dst, src);
943 943 }
944 944
945 945 void Assembler::andpd(XMMRegister dst, Address src) {
946 946 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
947 947 InstructionMark im(this);
948 948 emit_byte(0x66);
949 949 prefix(src, dst);
950 950 emit_byte(0x0F);
951 951 emit_byte(0x54);
952 952 emit_operand(dst, src);
953 953 }
954 954
955 955 void Assembler::bsfl(Register dst, Register src) {
956 956 int encode = prefix_and_encode(dst->encoding(), src->encoding());
957 957 emit_byte(0x0F);
958 958 emit_byte(0xBC);
959 959 emit_byte(0xC0 | encode);
960 960 }
961 961
962 962 void Assembler::bsrl(Register dst, Register src) {
963 963 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
964 964 int encode = prefix_and_encode(dst->encoding(), src->encoding());
965 965 emit_byte(0x0F);
966 966 emit_byte(0xBD);
967 967 emit_byte(0xC0 | encode);
968 968 }
969 969
970 970 void Assembler::bswapl(Register reg) { // bswap
971 971 int encode = prefix_and_encode(reg->encoding());
972 972 emit_byte(0x0F);
973 973 emit_byte(0xC8 | encode);
974 974 }
975 975
976 976 void Assembler::call(Label& L, relocInfo::relocType rtype) {
977 977 // suspect disp32 is always good
978 978 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
979 979
980 980 if (L.is_bound()) {
981 981 const int long_size = 5;
982 982 int offs = (int)( target(L) - pc() );
983 983 assert(offs <= 0, "assembler error");
984 984 InstructionMark im(this);
985 985 // 1110 1000 #32-bit disp
986 986 emit_byte(0xE8);
987 987 emit_data(offs - long_size, rtype, operand);
988 988 } else {
989 989 InstructionMark im(this);
990 990 // 1110 1000 #32-bit disp
991 991 L.add_patch_at(code(), locator());
992 992
993 993 emit_byte(0xE8);
994 994 emit_data(int(0), rtype, operand);
995 995 }
996 996 }
997 997
998 998 void Assembler::call(Register dst) {
999 999 // This was originally using a 32bit register encoding
1000 1000 // and surely we want 64bit!
1001 1001 // this is a 32bit encoding but in 64bit mode the default
1002 1002 // operand size is 64bit so there is no need for the
1003 1003 // wide prefix. So prefix only happens if we use the
1004 1004 // new registers. Much like push/pop.
1005 1005 int x = offset();
1006 1006 // this may be true but dbx disassembles it as if it
1007 1007 // were 32bits...
1008 1008 // int encode = prefix_and_encode(dst->encoding());
1009 1009 // if (offset() != x) assert(dst->encoding() >= 8, "what?");
1010 1010 int encode = prefixq_and_encode(dst->encoding());
1011 1011
1012 1012 emit_byte(0xFF);
1013 1013 emit_byte(0xD0 | encode);
1014 1014 }
1015 1015
1016 1016
1017 1017 void Assembler::call(Address adr) {
1018 1018 InstructionMark im(this);
1019 1019 prefix(adr);
1020 1020 emit_byte(0xFF);
1021 1021 emit_operand(rdx, adr);
1022 1022 }
1023 1023
1024 1024 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1025 1025 assert(entry != NULL, "call most probably wrong");
1026 1026 InstructionMark im(this);
1027 1027 emit_byte(0xE8);
1028 1028 intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1029 1029 assert(is_simm32(disp), "must be 32bit offset (call2)");
1030 1030 // Technically, should use call32_operand, but this format is
1031 1031 // implied by the fact that we're emitting a call instruction.
1032 1032
1033 1033 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1034 1034 emit_data((int) disp, rspec, operand);
1035 1035 }
1036 1036
1037 1037 void Assembler::cdql() {
1038 1038 emit_byte(0x99);
1039 1039 }
1040 1040
1041 1041 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1042 1042 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1043 1043 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1044 1044 emit_byte(0x0F);
1045 1045 emit_byte(0x40 | cc);
1046 1046 emit_byte(0xC0 | encode);
1047 1047 }
1048 1048
1049 1049
1050 1050 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1051 1051 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1052 1052 prefix(src, dst);
1053 1053 emit_byte(0x0F);
1054 1054 emit_byte(0x40 | cc);
1055 1055 emit_operand(dst, src);
1056 1056 }
1057 1057
1058 1058 void Assembler::cmpb(Address dst, int imm8) {
1059 1059 InstructionMark im(this);
1060 1060 prefix(dst);
1061 1061 emit_byte(0x80);
1062 1062 emit_operand(rdi, dst, 1);
1063 1063 emit_byte(imm8);
1064 1064 }
1065 1065
1066 1066 void Assembler::cmpl(Address dst, int32_t imm32) {
1067 1067 InstructionMark im(this);
1068 1068 prefix(dst);
1069 1069 emit_byte(0x81);
1070 1070 emit_operand(rdi, dst, 4);
1071 1071 emit_long(imm32);
1072 1072 }
1073 1073
1074 1074 void Assembler::cmpl(Register dst, int32_t imm32) {
1075 1075 prefix(dst);
1076 1076 emit_arith(0x81, 0xF8, dst, imm32);
1077 1077 }
1078 1078
1079 1079 void Assembler::cmpl(Register dst, Register src) {
1080 1080 (void) prefix_and_encode(dst->encoding(), src->encoding());
1081 1081 emit_arith(0x3B, 0xC0, dst, src);
1082 1082 }
1083 1083
1084 1084
1085 1085 void Assembler::cmpl(Register dst, Address src) {
1086 1086 InstructionMark im(this);
1087 1087 prefix(src, dst);
1088 1088 emit_byte(0x3B);
1089 1089 emit_operand(dst, src);
1090 1090 }
1091 1091
1092 1092 void Assembler::cmpw(Address dst, int imm16) {
1093 1093 InstructionMark im(this);
1094 1094 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1095 1095 emit_byte(0x66);
1096 1096 emit_byte(0x81);
1097 1097 emit_operand(rdi, dst, 2);
1098 1098 emit_word(imm16);
1099 1099 }
1100 1100
1101 1101 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1102 1102 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1103 1103 // The ZF is set if the compared values were equal, and cleared otherwise.
1104 1104 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1105 1105 if (Atomics & 2) {
1106 1106 // caveat: no instructionmark, so this isn't relocatable.
1107 1107 // Emit a synthetic, non-atomic, CAS equivalent.
1108 1108 // Beware. The synthetic form sets all ICCs, not just ZF.
1109 1109 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1110 1110 cmpl(rax, adr);
1111 1111 movl(rax, adr);
1112 1112 if (reg != rax) {
1113 1113 Label L ;
1114 1114 jcc(Assembler::notEqual, L);
1115 1115 movl(adr, reg);
1116 1116 bind(L);
1117 1117 }
1118 1118 } else {
1119 1119 InstructionMark im(this);
1120 1120 prefix(adr, reg);
1121 1121 emit_byte(0x0F);
1122 1122 emit_byte(0xB1);
1123 1123 emit_operand(reg, adr);
1124 1124 }
1125 1125 }
1126 1126
1127 1127 void Assembler::comisd(XMMRegister dst, Address src) {
1128 1128 // NOTE: dbx seems to decode this as comiss even though the
1129 1129 // 0x66 is there. Strangly ucomisd comes out correct
1130 1130 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1131 1131 emit_byte(0x66);
1132 1132 comiss(dst, src);
1133 1133 }
1134 1134
1135 1135 void Assembler::comiss(XMMRegister dst, Address src) {
1136 1136 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1137 1137
1138 1138 InstructionMark im(this);
1139 1139 prefix(src, dst);
1140 1140 emit_byte(0x0F);
1141 1141 emit_byte(0x2F);
1142 1142 emit_operand(dst, src);
1143 1143 }
1144 1144
1145 1145 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1146 1146 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1147 1147 emit_byte(0xF3);
1148 1148 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1149 1149 emit_byte(0x0F);
1150 1150 emit_byte(0xE6);
1151 1151 emit_byte(0xC0 | encode);
1152 1152 }
1153 1153
1154 1154 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1155 1155 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1156 1156 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1157 1157 emit_byte(0x0F);
1158 1158 emit_byte(0x5B);
1159 1159 emit_byte(0xC0 | encode);
1160 1160 }
1161 1161
1162 1162 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1163 1163 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1164 1164 emit_byte(0xF2);
1165 1165 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1166 1166 emit_byte(0x0F);
1167 1167 emit_byte(0x5A);
1168 1168 emit_byte(0xC0 | encode);
1169 1169 }
1170 1170
1171 1171 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1172 1172 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1173 1173 emit_byte(0xF2);
1174 1174 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1175 1175 emit_byte(0x0F);
1176 1176 emit_byte(0x2A);
1177 1177 emit_byte(0xC0 | encode);
1178 1178 }
1179 1179
1180 1180 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1181 1181 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1182 1182 emit_byte(0xF3);
1183 1183 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1184 1184 emit_byte(0x0F);
1185 1185 emit_byte(0x2A);
1186 1186 emit_byte(0xC0 | encode);
1187 1187 }
1188 1188
1189 1189 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1190 1190 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1191 1191 emit_byte(0xF3);
1192 1192 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1193 1193 emit_byte(0x0F);
1194 1194 emit_byte(0x5A);
1195 1195 emit_byte(0xC0 | encode);
1196 1196 }
1197 1197
1198 1198 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1199 1199 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1200 1200 emit_byte(0xF2);
1201 1201 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1202 1202 emit_byte(0x0F);
1203 1203 emit_byte(0x2C);
1204 1204 emit_byte(0xC0 | encode);
1205 1205 }
1206 1206
1207 1207 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1208 1208 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1209 1209 emit_byte(0xF3);
1210 1210 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1211 1211 emit_byte(0x0F);
1212 1212 emit_byte(0x2C);
1213 1213 emit_byte(0xC0 | encode);
1214 1214 }
1215 1215
1216 1216 void Assembler::decl(Address dst) {
1217 1217 // Don't use it directly. Use MacroAssembler::decrement() instead.
1218 1218 InstructionMark im(this);
1219 1219 prefix(dst);
1220 1220 emit_byte(0xFF);
1221 1221 emit_operand(rcx, dst);
1222 1222 }
1223 1223
1224 1224 void Assembler::divsd(XMMRegister dst, Address src) {
1225 1225 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1226 1226 InstructionMark im(this);
1227 1227 emit_byte(0xF2);
1228 1228 prefix(src, dst);
1229 1229 emit_byte(0x0F);
1230 1230 emit_byte(0x5E);
1231 1231 emit_operand(dst, src);
1232 1232 }
1233 1233
1234 1234 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1235 1235 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1236 1236 emit_byte(0xF2);
1237 1237 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1238 1238 emit_byte(0x0F);
1239 1239 emit_byte(0x5E);
1240 1240 emit_byte(0xC0 | encode);
1241 1241 }
1242 1242
1243 1243 void Assembler::divss(XMMRegister dst, Address src) {
1244 1244 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1245 1245 InstructionMark im(this);
1246 1246 emit_byte(0xF3);
1247 1247 prefix(src, dst);
1248 1248 emit_byte(0x0F);
1249 1249 emit_byte(0x5E);
1250 1250 emit_operand(dst, src);
1251 1251 }
1252 1252
1253 1253 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1254 1254 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1255 1255 emit_byte(0xF3);
1256 1256 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1257 1257 emit_byte(0x0F);
1258 1258 emit_byte(0x5E);
1259 1259 emit_byte(0xC0 | encode);
1260 1260 }
1261 1261
1262 1262 void Assembler::emms() {
1263 1263 NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1264 1264 emit_byte(0x0F);
1265 1265 emit_byte(0x77);
1266 1266 }
1267 1267
1268 1268 void Assembler::hlt() {
1269 1269 emit_byte(0xF4);
1270 1270 }
1271 1271
1272 1272 void Assembler::idivl(Register src) {
1273 1273 int encode = prefix_and_encode(src->encoding());
1274 1274 emit_byte(0xF7);
1275 1275 emit_byte(0xF8 | encode);
1276 1276 }
1277 1277
1278 1278 void Assembler::imull(Register dst, Register src) {
1279 1279 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1280 1280 emit_byte(0x0F);
1281 1281 emit_byte(0xAF);
1282 1282 emit_byte(0xC0 | encode);
1283 1283 }
1284 1284
1285 1285
1286 1286 void Assembler::imull(Register dst, Register src, int value) {
1287 1287 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1288 1288 if (is8bit(value)) {
1289 1289 emit_byte(0x6B);
1290 1290 emit_byte(0xC0 | encode);
1291 1291 emit_byte(value);
1292 1292 } else {
1293 1293 emit_byte(0x69);
1294 1294 emit_byte(0xC0 | encode);
1295 1295 emit_long(value);
1296 1296 }
1297 1297 }
1298 1298
1299 1299 void Assembler::incl(Address dst) {
1300 1300 // Don't use it directly. Use MacroAssembler::increment() instead.
1301 1301 InstructionMark im(this);
1302 1302 prefix(dst);
1303 1303 emit_byte(0xFF);
1304 1304 emit_operand(rax, dst);
1305 1305 }
1306 1306
1307 1307 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
1308 1308 InstructionMark im(this);
1309 1309 relocate(rtype);
1310 1310 assert((0 <= cc) && (cc < 16), "illegal cc");
1311 1311 if (L.is_bound()) {
1312 1312 address dst = target(L);
1313 1313 assert(dst != NULL, "jcc most probably wrong");
1314 1314
1315 1315 const int short_size = 2;
1316 1316 const int long_size = 6;
1317 1317 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1318 1318 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1319 1319 // 0111 tttn #8-bit disp
1320 1320 emit_byte(0x70 | cc);
1321 1321 emit_byte((offs - short_size) & 0xFF);
1322 1322 } else {
1323 1323 // 0000 1111 1000 tttn #32-bit disp
1324 1324 assert(is_simm32(offs - long_size),
1325 1325 "must be 32bit offset (call4)");
1326 1326 emit_byte(0x0F);
1327 1327 emit_byte(0x80 | cc);
1328 1328 emit_long(offs - long_size);
1329 1329 }
1330 1330 } else {
1331 1331 // Note: could eliminate cond. jumps to this jump if condition
1332 1332 // is the same however, seems to be rather unlikely case.
1333 1333 // Note: use jccb() if label to be bound is very close to get
1334 1334 // an 8-bit displacement
1335 1335 L.add_patch_at(code(), locator());
1336 1336 emit_byte(0x0F);
1337 1337 emit_byte(0x80 | cc);
1338 1338 emit_long(0);
1339 1339 }
1340 1340 }
1341 1341
1342 1342 void Assembler::jccb(Condition cc, Label& L) {
1343 1343 if (L.is_bound()) {
1344 1344 const int short_size = 2;
1345 1345 address entry = target(L);
1346 1346 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1347 1347 "Dispacement too large for a short jmp");
1348 1348 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1349 1349 // 0111 tttn #8-bit disp
1350 1350 emit_byte(0x70 | cc);
1351 1351 emit_byte((offs - short_size) & 0xFF);
1352 1352 } else {
1353 1353 InstructionMark im(this);
1354 1354 L.add_patch_at(code(), locator());
1355 1355 emit_byte(0x70 | cc);
1356 1356 emit_byte(0);
1357 1357 }
1358 1358 }
1359 1359
1360 1360 void Assembler::jmp(Address adr) {
1361 1361 InstructionMark im(this);
1362 1362 prefix(adr);
1363 1363 emit_byte(0xFF);
1364 1364 emit_operand(rsp, adr);
1365 1365 }
1366 1366
1367 1367 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
1368 1368 if (L.is_bound()) {
1369 1369 address entry = target(L);
1370 1370 assert(entry != NULL, "jmp most probably wrong");
1371 1371 InstructionMark im(this);
1372 1372 const int short_size = 2;
1373 1373 const int long_size = 5;
1374 1374 intptr_t offs = entry - _code_pos;
1375 1375 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1376 1376 emit_byte(0xEB);
1377 1377 emit_byte((offs - short_size) & 0xFF);
1378 1378 } else {
1379 1379 emit_byte(0xE9);
1380 1380 emit_long(offs - long_size);
1381 1381 }
1382 1382 } else {
1383 1383 // By default, forward jumps are always 32-bit displacements, since
1384 1384 // we can't yet know where the label will be bound. If you're sure that
1385 1385 // the forward jump will not run beyond 256 bytes, use jmpb to
1386 1386 // force an 8-bit displacement.
1387 1387 InstructionMark im(this);
1388 1388 relocate(rtype);
1389 1389 L.add_patch_at(code(), locator());
1390 1390 emit_byte(0xE9);
1391 1391 emit_long(0);
1392 1392 }
1393 1393 }
1394 1394
1395 1395 void Assembler::jmp(Register entry) {
1396 1396 int encode = prefix_and_encode(entry->encoding());
1397 1397 emit_byte(0xFF);
1398 1398 emit_byte(0xE0 | encode);
1399 1399 }
1400 1400
1401 1401 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1402 1402 InstructionMark im(this);
1403 1403 emit_byte(0xE9);
1404 1404 assert(dest != NULL, "must have a target");
1405 1405 intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1406 1406 assert(is_simm32(disp), "must be 32bit offset (jmp)");
1407 1407 emit_data(disp, rspec.reloc(), call32_operand);
1408 1408 }
1409 1409
1410 1410 void Assembler::jmpb(Label& L) {
1411 1411 if (L.is_bound()) {
1412 1412 const int short_size = 2;
1413 1413 address entry = target(L);
1414 1414 assert(is8bit((entry - _code_pos) + short_size),
1415 1415 "Dispacement too large for a short jmp");
1416 1416 assert(entry != NULL, "jmp most probably wrong");
1417 1417 intptr_t offs = entry - _code_pos;
1418 1418 emit_byte(0xEB);
1419 1419 emit_byte((offs - short_size) & 0xFF);
1420 1420 } else {
1421 1421 InstructionMark im(this);
1422 1422 L.add_patch_at(code(), locator());
1423 1423 emit_byte(0xEB);
1424 1424 emit_byte(0);
1425 1425 }
1426 1426 }
1427 1427
1428 1428 void Assembler::ldmxcsr( Address src) {
1429 1429 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1430 1430 InstructionMark im(this);
1431 1431 prefix(src);
1432 1432 emit_byte(0x0F);
1433 1433 emit_byte(0xAE);
1434 1434 emit_operand(as_Register(2), src);
1435 1435 }
1436 1436
1437 1437 void Assembler::leal(Register dst, Address src) {
1438 1438 InstructionMark im(this);
1439 1439 #ifdef _LP64
1440 1440 emit_byte(0x67); // addr32
1441 1441 prefix(src, dst);
1442 1442 #endif // LP64
1443 1443 emit_byte(0x8D);
1444 1444 emit_operand(dst, src);
1445 1445 }
1446 1446
1447 1447 void Assembler::lock() {
1448 1448 if (Atomics & 1) {
1449 1449 // Emit either nothing, a NOP, or a NOP: prefix
1450 1450 emit_byte(0x90) ;
1451 1451 } else {
1452 1452 emit_byte(0xF0);
1453 1453 }
1454 1454 }
1455 1455
1456 1456 void Assembler::lzcntl(Register dst, Register src) {
1457 1457 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1458 1458 emit_byte(0xF3);
1459 1459 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1460 1460 emit_byte(0x0F);
1461 1461 emit_byte(0xBD);
1462 1462 emit_byte(0xC0 | encode);
1463 1463 }
1464 1464
1465 1465 // Emit mfence instruction
1466 1466 void Assembler::mfence() {
1467 1467 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1468 1468 emit_byte( 0x0F );
1469 1469 emit_byte( 0xAE );
1470 1470 emit_byte( 0xF0 );
1471 1471 }
1472 1472
1473 1473 void Assembler::mov(Register dst, Register src) {
1474 1474 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1475 1475 }
1476 1476
1477 1477 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1478 1478 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1479 1479 int dstenc = dst->encoding();
1480 1480 int srcenc = src->encoding();
1481 1481 emit_byte(0x66);
1482 1482 if (dstenc < 8) {
1483 1483 if (srcenc >= 8) {
1484 1484 prefix(REX_B);
1485 1485 srcenc -= 8;
1486 1486 }
1487 1487 } else {
1488 1488 if (srcenc < 8) {
1489 1489 prefix(REX_R);
1490 1490 } else {
1491 1491 prefix(REX_RB);
1492 1492 srcenc -= 8;
1493 1493 }
1494 1494 dstenc -= 8;
1495 1495 }
1496 1496 emit_byte(0x0F);
1497 1497 emit_byte(0x28);
1498 1498 emit_byte(0xC0 | dstenc << 3 | srcenc);
1499 1499 }
1500 1500
1501 1501 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1502 1502 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1503 1503 int dstenc = dst->encoding();
1504 1504 int srcenc = src->encoding();
1505 1505 if (dstenc < 8) {
1506 1506 if (srcenc >= 8) {
1507 1507 prefix(REX_B);
1508 1508 srcenc -= 8;
1509 1509 }
1510 1510 } else {
1511 1511 if (srcenc < 8) {
1512 1512 prefix(REX_R);
1513 1513 } else {
1514 1514 prefix(REX_RB);
1515 1515 srcenc -= 8;
1516 1516 }
1517 1517 dstenc -= 8;
1518 1518 }
1519 1519 emit_byte(0x0F);
1520 1520 emit_byte(0x28);
1521 1521 emit_byte(0xC0 | dstenc << 3 | srcenc);
1522 1522 }
1523 1523
1524 1524 void Assembler::movb(Register dst, Address src) {
1525 1525 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1526 1526 InstructionMark im(this);
1527 1527 prefix(src, dst, true);
1528 1528 emit_byte(0x8A);
1529 1529 emit_operand(dst, src);
1530 1530 }
1531 1531
1532 1532
1533 1533 void Assembler::movb(Address dst, int imm8) {
1534 1534 InstructionMark im(this);
1535 1535 prefix(dst);
1536 1536 emit_byte(0xC6);
1537 1537 emit_operand(rax, dst, 1);
1538 1538 emit_byte(imm8);
1539 1539 }
1540 1540
1541 1541
1542 1542 void Assembler::movb(Address dst, Register src) {
1543 1543 assert(src->has_byte_register(), "must have byte register");
1544 1544 InstructionMark im(this);
1545 1545 prefix(dst, src, true);
1546 1546 emit_byte(0x88);
1547 1547 emit_operand(src, dst);
1548 1548 }
1549 1549
1550 1550 void Assembler::movdl(XMMRegister dst, Register src) {
1551 1551 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1552 1552 emit_byte(0x66);
1553 1553 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1554 1554 emit_byte(0x0F);
1555 1555 emit_byte(0x6E);
1556 1556 emit_byte(0xC0 | encode);
1557 1557 }
1558 1558
1559 1559 void Assembler::movdl(Register dst, XMMRegister src) {
1560 1560 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1561 1561 emit_byte(0x66);
1562 1562 // swap src/dst to get correct prefix
1563 1563 int encode = prefix_and_encode(src->encoding(), dst->encoding());
1564 1564 emit_byte(0x0F);
1565 1565 emit_byte(0x7E);
1566 1566 emit_byte(0xC0 | encode);
1567 1567 }
1568 1568
1569 1569 void Assembler::movdqa(XMMRegister dst, Address src) {
1570 1570 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1571 1571 InstructionMark im(this);
1572 1572 emit_byte(0x66);
1573 1573 prefix(src, dst);
1574 1574 emit_byte(0x0F);
1575 1575 emit_byte(0x6F);
1576 1576 emit_operand(dst, src);
1577 1577 }
1578 1578
1579 1579 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1580 1580 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1581 1581 emit_byte(0x66);
1582 1582 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1583 1583 emit_byte(0x0F);
1584 1584 emit_byte(0x6F);
1585 1585 emit_byte(0xC0 | encode);
1586 1586 }
1587 1587
1588 1588 void Assembler::movdqa(Address dst, XMMRegister src) {
1589 1589 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1590 1590 InstructionMark im(this);
1591 1591 emit_byte(0x66);
1592 1592 prefix(dst, src);
1593 1593 emit_byte(0x0F);
1594 1594 emit_byte(0x7F);
1595 1595 emit_operand(src, dst);
1596 1596 }
1597 1597
1598 1598 void Assembler::movdqu(XMMRegister dst, Address src) {
1599 1599 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1600 1600 InstructionMark im(this);
1601 1601 emit_byte(0xF3);
1602 1602 prefix(src, dst);
1603 1603 emit_byte(0x0F);
1604 1604 emit_byte(0x6F);
1605 1605 emit_operand(dst, src);
1606 1606 }
1607 1607
1608 1608 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1609 1609 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1610 1610 emit_byte(0xF3);
1611 1611 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1612 1612 emit_byte(0x0F);
1613 1613 emit_byte(0x6F);
1614 1614 emit_byte(0xC0 | encode);
1615 1615 }
1616 1616
1617 1617 void Assembler::movdqu(Address dst, XMMRegister src) {
1618 1618 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1619 1619 InstructionMark im(this);
1620 1620 emit_byte(0xF3);
1621 1621 prefix(dst, src);
1622 1622 emit_byte(0x0F);
1623 1623 emit_byte(0x7F);
1624 1624 emit_operand(src, dst);
1625 1625 }
1626 1626
1627 1627 // Uses zero extension on 64bit
1628 1628
1629 1629 void Assembler::movl(Register dst, int32_t imm32) {
1630 1630 int encode = prefix_and_encode(dst->encoding());
1631 1631 emit_byte(0xB8 | encode);
1632 1632 emit_long(imm32);
1633 1633 }
1634 1634
1635 1635 void Assembler::movl(Register dst, Register src) {
1636 1636 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1637 1637 emit_byte(0x8B);
1638 1638 emit_byte(0xC0 | encode);
1639 1639 }
1640 1640
1641 1641 void Assembler::movl(Register dst, Address src) {
1642 1642 InstructionMark im(this);
1643 1643 prefix(src, dst);
1644 1644 emit_byte(0x8B);
1645 1645 emit_operand(dst, src);
1646 1646 }
1647 1647
1648 1648 void Assembler::movl(Address dst, int32_t imm32) {
1649 1649 InstructionMark im(this);
1650 1650 prefix(dst);
1651 1651 emit_byte(0xC7);
1652 1652 emit_operand(rax, dst, 4);
1653 1653 emit_long(imm32);
1654 1654 }
1655 1655
1656 1656 void Assembler::movl(Address dst, Register src) {
1657 1657 InstructionMark im(this);
1658 1658 prefix(dst, src);
1659 1659 emit_byte(0x89);
1660 1660 emit_operand(src, dst);
1661 1661 }
1662 1662
1663 1663 // New cpus require to use movsd and movss to avoid partial register stall
1664 1664 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1665 1665 // The selection is done in MacroAssembler::movdbl() and movflt().
1666 1666 void Assembler::movlpd(XMMRegister dst, Address src) {
1667 1667 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1668 1668 InstructionMark im(this);
1669 1669 emit_byte(0x66);
1670 1670 prefix(src, dst);
1671 1671 emit_byte(0x0F);
1672 1672 emit_byte(0x12);
1673 1673 emit_operand(dst, src);
1674 1674 }
1675 1675
1676 1676 void Assembler::movq( MMXRegister dst, Address src ) {
1677 1677 assert( VM_Version::supports_mmx(), "" );
1678 1678 emit_byte(0x0F);
1679 1679 emit_byte(0x6F);
1680 1680 emit_operand(dst, src);
1681 1681 }
1682 1682
1683 1683 void Assembler::movq( Address dst, MMXRegister src ) {
1684 1684 assert( VM_Version::supports_mmx(), "" );
1685 1685 emit_byte(0x0F);
1686 1686 emit_byte(0x7F);
1687 1687 // workaround gcc (3.2.1-7a) bug
1688 1688 // In that version of gcc with only an emit_operand(MMX, Address)
1689 1689 // gcc will tail jump and try and reverse the parameters completely
1690 1690 // obliterating dst in the process. By having a version available
1691 1691 // that doesn't need to swap the args at the tail jump the bug is
1692 1692 // avoided.
1693 1693 emit_operand(dst, src);
1694 1694 }
1695 1695
1696 1696 void Assembler::movq(XMMRegister dst, Address src) {
1697 1697 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1698 1698 InstructionMark im(this);
1699 1699 emit_byte(0xF3);
1700 1700 prefix(src, dst);
1701 1701 emit_byte(0x0F);
1702 1702 emit_byte(0x7E);
1703 1703 emit_operand(dst, src);
1704 1704 }
1705 1705
1706 1706 void Assembler::movq(Address dst, XMMRegister src) {
1707 1707 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1708 1708 InstructionMark im(this);
1709 1709 emit_byte(0x66);
1710 1710 prefix(dst, src);
1711 1711 emit_byte(0x0F);
1712 1712 emit_byte(0xD6);
1713 1713 emit_operand(src, dst);
1714 1714 }
1715 1715
1716 1716 void Assembler::movsbl(Register dst, Address src) { // movsxb
1717 1717 InstructionMark im(this);
1718 1718 prefix(src, dst);
1719 1719 emit_byte(0x0F);
1720 1720 emit_byte(0xBE);
1721 1721 emit_operand(dst, src);
1722 1722 }
1723 1723
1724 1724 void Assembler::movsbl(Register dst, Register src) { // movsxb
1725 1725 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1726 1726 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1727 1727 emit_byte(0x0F);
1728 1728 emit_byte(0xBE);
1729 1729 emit_byte(0xC0 | encode);
1730 1730 }
1731 1731
1732 1732 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1733 1733 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1734 1734 emit_byte(0xF2);
1735 1735 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1736 1736 emit_byte(0x0F);
1737 1737 emit_byte(0x10);
1738 1738 emit_byte(0xC0 | encode);
1739 1739 }
1740 1740
1741 1741 void Assembler::movsd(XMMRegister dst, Address src) {
1742 1742 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1743 1743 InstructionMark im(this);
1744 1744 emit_byte(0xF2);
1745 1745 prefix(src, dst);
1746 1746 emit_byte(0x0F);
1747 1747 emit_byte(0x10);
1748 1748 emit_operand(dst, src);
1749 1749 }
1750 1750
1751 1751 void Assembler::movsd(Address dst, XMMRegister src) {
1752 1752 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1753 1753 InstructionMark im(this);
1754 1754 emit_byte(0xF2);
1755 1755 prefix(dst, src);
1756 1756 emit_byte(0x0F);
1757 1757 emit_byte(0x11);
1758 1758 emit_operand(src, dst);
1759 1759 }
1760 1760
1761 1761 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1762 1762 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1763 1763 emit_byte(0xF3);
1764 1764 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1765 1765 emit_byte(0x0F);
1766 1766 emit_byte(0x10);
1767 1767 emit_byte(0xC0 | encode);
1768 1768 }
1769 1769
1770 1770 void Assembler::movss(XMMRegister dst, Address src) {
1771 1771 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1772 1772 InstructionMark im(this);
1773 1773 emit_byte(0xF3);
1774 1774 prefix(src, dst);
1775 1775 emit_byte(0x0F);
1776 1776 emit_byte(0x10);
1777 1777 emit_operand(dst, src);
1778 1778 }
1779 1779
1780 1780 void Assembler::movss(Address dst, XMMRegister src) {
1781 1781 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1782 1782 InstructionMark im(this);
1783 1783 emit_byte(0xF3);
1784 1784 prefix(dst, src);
1785 1785 emit_byte(0x0F);
1786 1786 emit_byte(0x11);
1787 1787 emit_operand(src, dst);
1788 1788 }
1789 1789
1790 1790 void Assembler::movswl(Register dst, Address src) { // movsxw
1791 1791 InstructionMark im(this);
1792 1792 prefix(src, dst);
1793 1793 emit_byte(0x0F);
1794 1794 emit_byte(0xBF);
1795 1795 emit_operand(dst, src);
1796 1796 }
1797 1797
1798 1798 void Assembler::movswl(Register dst, Register src) { // movsxw
1799 1799 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1800 1800 emit_byte(0x0F);
1801 1801 emit_byte(0xBF);
1802 1802 emit_byte(0xC0 | encode);
1803 1803 }
1804 1804
1805 1805 void Assembler::movw(Address dst, int imm16) {
1806 1806 InstructionMark im(this);
1807 1807
1808 1808 emit_byte(0x66); // switch to 16-bit mode
1809 1809 prefix(dst);
1810 1810 emit_byte(0xC7);
1811 1811 emit_operand(rax, dst, 2);
1812 1812 emit_word(imm16);
1813 1813 }
1814 1814
1815 1815 void Assembler::movw(Register dst, Address src) {
1816 1816 InstructionMark im(this);
1817 1817 emit_byte(0x66);
1818 1818 prefix(src, dst);
1819 1819 emit_byte(0x8B);
1820 1820 emit_operand(dst, src);
1821 1821 }
1822 1822
1823 1823 void Assembler::movw(Address dst, Register src) {
1824 1824 InstructionMark im(this);
1825 1825 emit_byte(0x66);
1826 1826 prefix(dst, src);
1827 1827 emit_byte(0x89);
1828 1828 emit_operand(src, dst);
1829 1829 }
1830 1830
1831 1831 void Assembler::movzbl(Register dst, Address src) { // movzxb
1832 1832 InstructionMark im(this);
1833 1833 prefix(src, dst);
1834 1834 emit_byte(0x0F);
1835 1835 emit_byte(0xB6);
1836 1836 emit_operand(dst, src);
1837 1837 }
1838 1838
1839 1839 void Assembler::movzbl(Register dst, Register src) { // movzxb
1840 1840 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1841 1841 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1842 1842 emit_byte(0x0F);
1843 1843 emit_byte(0xB6);
1844 1844 emit_byte(0xC0 | encode);
1845 1845 }
1846 1846
1847 1847 void Assembler::movzwl(Register dst, Address src) { // movzxw
1848 1848 InstructionMark im(this);
1849 1849 prefix(src, dst);
1850 1850 emit_byte(0x0F);
1851 1851 emit_byte(0xB7);
1852 1852 emit_operand(dst, src);
1853 1853 }
1854 1854
1855 1855 void Assembler::movzwl(Register dst, Register src) { // movzxw
1856 1856 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1857 1857 emit_byte(0x0F);
1858 1858 emit_byte(0xB7);
1859 1859 emit_byte(0xC0 | encode);
1860 1860 }
1861 1861
1862 1862 void Assembler::mull(Address src) {
1863 1863 InstructionMark im(this);
1864 1864 prefix(src);
1865 1865 emit_byte(0xF7);
1866 1866 emit_operand(rsp, src);
1867 1867 }
1868 1868
1869 1869 void Assembler::mull(Register src) {
1870 1870 int encode = prefix_and_encode(src->encoding());
1871 1871 emit_byte(0xF7);
1872 1872 emit_byte(0xE0 | encode);
1873 1873 }
1874 1874
1875 1875 void Assembler::mulsd(XMMRegister dst, Address src) {
1876 1876 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1877 1877 InstructionMark im(this);
1878 1878 emit_byte(0xF2);
1879 1879 prefix(src, dst);
1880 1880 emit_byte(0x0F);
1881 1881 emit_byte(0x59);
1882 1882 emit_operand(dst, src);
1883 1883 }
1884 1884
1885 1885 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1886 1886 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1887 1887 emit_byte(0xF2);
1888 1888 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1889 1889 emit_byte(0x0F);
1890 1890 emit_byte(0x59);
1891 1891 emit_byte(0xC0 | encode);
1892 1892 }
1893 1893
1894 1894 void Assembler::mulss(XMMRegister dst, Address src) {
1895 1895 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1896 1896 InstructionMark im(this);
1897 1897 emit_byte(0xF3);
1898 1898 prefix(src, dst);
1899 1899 emit_byte(0x0F);
1900 1900 emit_byte(0x59);
1901 1901 emit_operand(dst, src);
1902 1902 }
1903 1903
1904 1904 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1905 1905 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1906 1906 emit_byte(0xF3);
1907 1907 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1908 1908 emit_byte(0x0F);
1909 1909 emit_byte(0x59);
1910 1910 emit_byte(0xC0 | encode);
1911 1911 }
1912 1912
1913 1913 void Assembler::negl(Register dst) {
1914 1914 int encode = prefix_and_encode(dst->encoding());
1915 1915 emit_byte(0xF7);
1916 1916 emit_byte(0xD8 | encode);
1917 1917 }
1918 1918
1919 1919 void Assembler::nop(int i) {
1920 1920 #ifdef ASSERT
1921 1921 assert(i > 0, " ");
1922 1922 // The fancy nops aren't currently recognized by debuggers making it a
1923 1923 // pain to disassemble code while debugging. If asserts are on clearly
1924 1924 // speed is not an issue so simply use the single byte traditional nop
1925 1925 // to do alignment.
1926 1926
1927 1927 for (; i > 0 ; i--) emit_byte(0x90);
1928 1928 return;
1929 1929
1930 1930 #endif // ASSERT
1931 1931
1932 1932 if (UseAddressNop && VM_Version::is_intel()) {
1933 1933 //
1934 1934 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1935 1935 // 1: 0x90
1936 1936 // 2: 0x66 0x90
1937 1937 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1938 1938 // 4: 0x0F 0x1F 0x40 0x00
1939 1939 // 5: 0x0F 0x1F 0x44 0x00 0x00
1940 1940 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1941 1941 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1942 1942 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1943 1943 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1944 1944 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1945 1945 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1946 1946
1947 1947 // The rest coding is Intel specific - don't use consecutive address nops
1948 1948
1949 1949 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1950 1950 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1951 1951 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1952 1952 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1953 1953
1954 1954 while(i >= 15) {
1955 1955 // For Intel don't generate consecutive addess nops (mix with regular nops)
1956 1956 i -= 15;
1957 1957 emit_byte(0x66); // size prefix
1958 1958 emit_byte(0x66); // size prefix
1959 1959 emit_byte(0x66); // size prefix
1960 1960 addr_nop_8();
1961 1961 emit_byte(0x66); // size prefix
1962 1962 emit_byte(0x66); // size prefix
1963 1963 emit_byte(0x66); // size prefix
1964 1964 emit_byte(0x90); // nop
1965 1965 }
1966 1966 switch (i) {
1967 1967 case 14:
1968 1968 emit_byte(0x66); // size prefix
1969 1969 case 13:
1970 1970 emit_byte(0x66); // size prefix
1971 1971 case 12:
1972 1972 addr_nop_8();
1973 1973 emit_byte(0x66); // size prefix
1974 1974 emit_byte(0x66); // size prefix
1975 1975 emit_byte(0x66); // size prefix
1976 1976 emit_byte(0x90); // nop
1977 1977 break;
1978 1978 case 11:
1979 1979 emit_byte(0x66); // size prefix
1980 1980 case 10:
1981 1981 emit_byte(0x66); // size prefix
1982 1982 case 9:
1983 1983 emit_byte(0x66); // size prefix
1984 1984 case 8:
1985 1985 addr_nop_8();
1986 1986 break;
1987 1987 case 7:
1988 1988 addr_nop_7();
1989 1989 break;
1990 1990 case 6:
1991 1991 emit_byte(0x66); // size prefix
1992 1992 case 5:
1993 1993 addr_nop_5();
1994 1994 break;
1995 1995 case 4:
1996 1996 addr_nop_4();
1997 1997 break;
1998 1998 case 3:
1999 1999 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2000 2000 emit_byte(0x66); // size prefix
2001 2001 case 2:
2002 2002 emit_byte(0x66); // size prefix
2003 2003 case 1:
2004 2004 emit_byte(0x90); // nop
2005 2005 break;
2006 2006 default:
2007 2007 assert(i == 0, " ");
2008 2008 }
2009 2009 return;
2010 2010 }
2011 2011 if (UseAddressNop && VM_Version::is_amd()) {
2012 2012 //
2013 2013 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2014 2014 // 1: 0x90
2015 2015 // 2: 0x66 0x90
2016 2016 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2017 2017 // 4: 0x0F 0x1F 0x40 0x00
2018 2018 // 5: 0x0F 0x1F 0x44 0x00 0x00
2019 2019 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2020 2020 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2021 2021 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2022 2022 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2023 2023 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2024 2024 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2025 2025
2026 2026 // The rest coding is AMD specific - use consecutive address nops
2027 2027
2028 2028 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2029 2029 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2030 2030 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2031 2031 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2032 2032 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2033 2033 // Size prefixes (0x66) are added for larger sizes
2034 2034
2035 2035 while(i >= 22) {
2036 2036 i -= 11;
2037 2037 emit_byte(0x66); // size prefix
2038 2038 emit_byte(0x66); // size prefix
2039 2039 emit_byte(0x66); // size prefix
2040 2040 addr_nop_8();
2041 2041 }
2042 2042 // Generate first nop for size between 21-12
2043 2043 switch (i) {
2044 2044 case 21:
2045 2045 i -= 1;
2046 2046 emit_byte(0x66); // size prefix
2047 2047 case 20:
2048 2048 case 19:
2049 2049 i -= 1;
2050 2050 emit_byte(0x66); // size prefix
2051 2051 case 18:
2052 2052 case 17:
2053 2053 i -= 1;
2054 2054 emit_byte(0x66); // size prefix
2055 2055 case 16:
2056 2056 case 15:
2057 2057 i -= 8;
2058 2058 addr_nop_8();
2059 2059 break;
2060 2060 case 14:
2061 2061 case 13:
2062 2062 i -= 7;
2063 2063 addr_nop_7();
2064 2064 break;
2065 2065 case 12:
2066 2066 i -= 6;
2067 2067 emit_byte(0x66); // size prefix
2068 2068 addr_nop_5();
2069 2069 break;
2070 2070 default:
2071 2071 assert(i < 12, " ");
2072 2072 }
2073 2073
2074 2074 // Generate second nop for size between 11-1
2075 2075 switch (i) {
2076 2076 case 11:
2077 2077 emit_byte(0x66); // size prefix
2078 2078 case 10:
2079 2079 emit_byte(0x66); // size prefix
2080 2080 case 9:
2081 2081 emit_byte(0x66); // size prefix
2082 2082 case 8:
2083 2083 addr_nop_8();
2084 2084 break;
2085 2085 case 7:
2086 2086 addr_nop_7();
2087 2087 break;
2088 2088 case 6:
2089 2089 emit_byte(0x66); // size prefix
2090 2090 case 5:
2091 2091 addr_nop_5();
2092 2092 break;
2093 2093 case 4:
2094 2094 addr_nop_4();
2095 2095 break;
2096 2096 case 3:
2097 2097 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2098 2098 emit_byte(0x66); // size prefix
2099 2099 case 2:
2100 2100 emit_byte(0x66); // size prefix
2101 2101 case 1:
2102 2102 emit_byte(0x90); // nop
2103 2103 break;
2104 2104 default:
2105 2105 assert(i == 0, " ");
2106 2106 }
2107 2107 return;
2108 2108 }
2109 2109
2110 2110 // Using nops with size prefixes "0x66 0x90".
2111 2111 // From AMD Optimization Guide:
2112 2112 // 1: 0x90
2113 2113 // 2: 0x66 0x90
2114 2114 // 3: 0x66 0x66 0x90
2115 2115 // 4: 0x66 0x66 0x66 0x90
2116 2116 // 5: 0x66 0x66 0x90 0x66 0x90
2117 2117 // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2118 2118 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2119 2119 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2120 2120 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2121 2121 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2122 2122 //
2123 2123 while(i > 12) {
2124 2124 i -= 4;
2125 2125 emit_byte(0x66); // size prefix
2126 2126 emit_byte(0x66);
2127 2127 emit_byte(0x66);
2128 2128 emit_byte(0x90); // nop
2129 2129 }
2130 2130 // 1 - 12 nops
2131 2131 if(i > 8) {
2132 2132 if(i > 9) {
2133 2133 i -= 1;
2134 2134 emit_byte(0x66);
2135 2135 }
2136 2136 i -= 3;
2137 2137 emit_byte(0x66);
2138 2138 emit_byte(0x66);
2139 2139 emit_byte(0x90);
2140 2140 }
2141 2141 // 1 - 8 nops
2142 2142 if(i > 4) {
2143 2143 if(i > 6) {
2144 2144 i -= 1;
2145 2145 emit_byte(0x66);
2146 2146 }
2147 2147 i -= 3;
2148 2148 emit_byte(0x66);
2149 2149 emit_byte(0x66);
2150 2150 emit_byte(0x90);
2151 2151 }
2152 2152 switch (i) {
2153 2153 case 4:
2154 2154 emit_byte(0x66);
2155 2155 case 3:
2156 2156 emit_byte(0x66);
2157 2157 case 2:
2158 2158 emit_byte(0x66);
2159 2159 case 1:
2160 2160 emit_byte(0x90);
2161 2161 break;
2162 2162 default:
2163 2163 assert(i == 0, " ");
2164 2164 }
2165 2165 }
2166 2166
2167 2167 void Assembler::notl(Register dst) {
2168 2168 int encode = prefix_and_encode(dst->encoding());
2169 2169 emit_byte(0xF7);
2170 2170 emit_byte(0xD0 | encode );
2171 2171 }
2172 2172
2173 2173 void Assembler::orl(Address dst, int32_t imm32) {
2174 2174 InstructionMark im(this);
2175 2175 prefix(dst);
2176 2176 emit_byte(0x81);
2177 2177 emit_operand(rcx, dst, 4);
2178 2178 emit_long(imm32);
2179 2179 }
2180 2180
2181 2181 void Assembler::orl(Register dst, int32_t imm32) {
2182 2182 prefix(dst);
2183 2183 emit_arith(0x81, 0xC8, dst, imm32);
2184 2184 }
2185 2185
2186 2186
2187 2187 void Assembler::orl(Register dst, Address src) {
2188 2188 InstructionMark im(this);
2189 2189 prefix(src, dst);
2190 2190 emit_byte(0x0B);
2191 2191 emit_operand(dst, src);
2192 2192 }
2193 2193
2194 2194
2195 2195 void Assembler::orl(Register dst, Register src) {
2196 2196 (void) prefix_and_encode(dst->encoding(), src->encoding());
2197 2197 emit_arith(0x0B, 0xC0, dst, src);
2198 2198 }
2199 2199
2200 2200 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2201 2201 assert(VM_Version::supports_sse4_2(), "");
2202 2202
2203 2203 InstructionMark im(this);
2204 2204 emit_byte(0x66);
2205 2205 prefix(src, dst);
2206 2206 emit_byte(0x0F);
2207 2207 emit_byte(0x3A);
2208 2208 emit_byte(0x61);
2209 2209 emit_operand(dst, src);
2210 2210 emit_byte(imm8);
2211 2211 }
2212 2212
2213 2213 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2214 2214 assert(VM_Version::supports_sse4_2(), "");
2215 2215
2216 2216 emit_byte(0x66);
2217 2217 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2218 2218 emit_byte(0x0F);
2219 2219 emit_byte(0x3A);
2220 2220 emit_byte(0x61);
2221 2221 emit_byte(0xC0 | encode);
2222 2222 emit_byte(imm8);
2223 2223 }
2224 2224
2225 2225 // generic
2226 2226 void Assembler::pop(Register dst) {
2227 2227 int encode = prefix_and_encode(dst->encoding());
2228 2228 emit_byte(0x58 | encode);
2229 2229 }
2230 2230
2231 2231 void Assembler::popcntl(Register dst, Address src) {
2232 2232 assert(VM_Version::supports_popcnt(), "must support");
2233 2233 InstructionMark im(this);
2234 2234 emit_byte(0xF3);
2235 2235 prefix(src, dst);
2236 2236 emit_byte(0x0F);
2237 2237 emit_byte(0xB8);
2238 2238 emit_operand(dst, src);
2239 2239 }
2240 2240
2241 2241 void Assembler::popcntl(Register dst, Register src) {
2242 2242 assert(VM_Version::supports_popcnt(), "must support");
2243 2243 emit_byte(0xF3);
2244 2244 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2245 2245 emit_byte(0x0F);
2246 2246 emit_byte(0xB8);
2247 2247 emit_byte(0xC0 | encode);
2248 2248 }
2249 2249
2250 2250 void Assembler::popf() {
2251 2251 emit_byte(0x9D);
2252 2252 }
2253 2253
2254 2254 #ifndef _LP64 // no 32bit push/pop on amd64
2255 2255 void Assembler::popl(Address dst) {
2256 2256 // NOTE: this will adjust stack by 8byte on 64bits
2257 2257 InstructionMark im(this);
2258 2258 prefix(dst);
2259 2259 emit_byte(0x8F);
2260 2260 emit_operand(rax, dst);
2261 2261 }
2262 2262 #endif
2263 2263
2264 2264 void Assembler::prefetch_prefix(Address src) {
2265 2265 prefix(src);
2266 2266 emit_byte(0x0F);
2267 2267 }
2268 2268
2269 2269 void Assembler::prefetchnta(Address src) {
2270 2270 NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2271 2271 InstructionMark im(this);
2272 2272 prefetch_prefix(src);
2273 2273 emit_byte(0x18);
2274 2274 emit_operand(rax, src); // 0, src
2275 2275 }
2276 2276
2277 2277 void Assembler::prefetchr(Address src) {
2278 2278 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2279 2279 InstructionMark im(this);
2280 2280 prefetch_prefix(src);
2281 2281 emit_byte(0x0D);
2282 2282 emit_operand(rax, src); // 0, src
2283 2283 }
2284 2284
2285 2285 void Assembler::prefetcht0(Address src) {
2286 2286 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2287 2287 InstructionMark im(this);
2288 2288 prefetch_prefix(src);
2289 2289 emit_byte(0x18);
2290 2290 emit_operand(rcx, src); // 1, src
2291 2291 }
2292 2292
2293 2293 void Assembler::prefetcht1(Address src) {
2294 2294 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2295 2295 InstructionMark im(this);
2296 2296 prefetch_prefix(src);
2297 2297 emit_byte(0x18);
2298 2298 emit_operand(rdx, src); // 2, src
2299 2299 }
2300 2300
2301 2301 void Assembler::prefetcht2(Address src) {
2302 2302 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2303 2303 InstructionMark im(this);
2304 2304 prefetch_prefix(src);
2305 2305 emit_byte(0x18);
2306 2306 emit_operand(rbx, src); // 3, src
2307 2307 }
2308 2308
2309 2309 void Assembler::prefetchw(Address src) {
2310 2310 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2311 2311 InstructionMark im(this);
2312 2312 prefetch_prefix(src);
2313 2313 emit_byte(0x0D);
2314 2314 emit_operand(rcx, src); // 1, src
2315 2315 }
2316 2316
2317 2317 void Assembler::prefix(Prefix p) {
2318 2318 a_byte(p);
2319 2319 }
2320 2320
2321 2321 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2322 2322 assert(isByte(mode), "invalid value");
2323 2323 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2324 2324
2325 2325 emit_byte(0x66);
2326 2326 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2327 2327 emit_byte(0x0F);
2328 2328 emit_byte(0x70);
2329 2329 emit_byte(0xC0 | encode);
2330 2330 emit_byte(mode & 0xFF);
2331 2331
2332 2332 }
2333 2333
2334 2334 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2335 2335 assert(isByte(mode), "invalid value");
2336 2336 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2337 2337
2338 2338 InstructionMark im(this);
2339 2339 emit_byte(0x66);
2340 2340 prefix(src, dst);
2341 2341 emit_byte(0x0F);
2342 2342 emit_byte(0x70);
2343 2343 emit_operand(dst, src);
2344 2344 emit_byte(mode & 0xFF);
2345 2345 }
2346 2346
2347 2347 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2348 2348 assert(isByte(mode), "invalid value");
2349 2349 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2350 2350
2351 2351 emit_byte(0xF2);
2352 2352 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2353 2353 emit_byte(0x0F);
2354 2354 emit_byte(0x70);
2355 2355 emit_byte(0xC0 | encode);
2356 2356 emit_byte(mode & 0xFF);
2357 2357 }
2358 2358
2359 2359 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2360 2360 assert(isByte(mode), "invalid value");
2361 2361 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2362 2362
2363 2363 InstructionMark im(this);
2364 2364 emit_byte(0xF2);
2365 2365 prefix(src, dst); // QQ new
2366 2366 emit_byte(0x0F);
2367 2367 emit_byte(0x70);
2368 2368 emit_operand(dst, src);
2369 2369 emit_byte(mode & 0xFF);
2370 2370 }
2371 2371
2372 2372 void Assembler::psrlq(XMMRegister dst, int shift) {
2373 2373 // HMM Table D-1 says sse2 or mmx
2374 2374 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2375 2375
2376 2376 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2377 2377 emit_byte(0x66);
2378 2378 emit_byte(0x0F);
2379 2379 emit_byte(0x73);
2380 2380 emit_byte(0xC0 | encode);
2381 2381 emit_byte(shift);
2382 2382 }
2383 2383
2384 2384 void Assembler::ptest(XMMRegister dst, Address src) {
2385 2385 assert(VM_Version::supports_sse4_1(), "");
2386 2386
2387 2387 InstructionMark im(this);
2388 2388 emit_byte(0x66);
2389 2389 prefix(src, dst);
2390 2390 emit_byte(0x0F);
2391 2391 emit_byte(0x38);
2392 2392 emit_byte(0x17);
2393 2393 emit_operand(dst, src);
2394 2394 }
2395 2395
2396 2396 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2397 2397 assert(VM_Version::supports_sse4_1(), "");
2398 2398
2399 2399 emit_byte(0x66);
2400 2400 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2401 2401 emit_byte(0x0F);
2402 2402 emit_byte(0x38);
2403 2403 emit_byte(0x17);
2404 2404 emit_byte(0xC0 | encode);
2405 2405 }
2406 2406
2407 2407 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2408 2408 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2409 2409 emit_byte(0x66);
2410 2410 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2411 2411 emit_byte(0x0F);
2412 2412 emit_byte(0x60);
2413 2413 emit_byte(0xC0 | encode);
2414 2414 }
2415 2415
2416 2416 void Assembler::push(int32_t imm32) {
2417 2417 // in 64bits we push 64bits onto the stack but only
2418 2418 // take a 32bit immediate
2419 2419 emit_byte(0x68);
2420 2420 emit_long(imm32);
2421 2421 }
2422 2422
2423 2423 void Assembler::push(Register src) {
2424 2424 int encode = prefix_and_encode(src->encoding());
2425 2425
2426 2426 emit_byte(0x50 | encode);
2427 2427 }
2428 2428
2429 2429 void Assembler::pushf() {
2430 2430 emit_byte(0x9C);
2431 2431 }
2432 2432
2433 2433 #ifndef _LP64 // no 32bit push/pop on amd64
2434 2434 void Assembler::pushl(Address src) {
2435 2435 // Note this will push 64bit on 64bit
2436 2436 InstructionMark im(this);
2437 2437 prefix(src);
2438 2438 emit_byte(0xFF);
2439 2439 emit_operand(rsi, src);
2440 2440 }
2441 2441 #endif
2442 2442
2443 2443 void Assembler::pxor(XMMRegister dst, Address src) {
2444 2444 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2445 2445 InstructionMark im(this);
2446 2446 emit_byte(0x66);
2447 2447 prefix(src, dst);
2448 2448 emit_byte(0x0F);
2449 2449 emit_byte(0xEF);
2450 2450 emit_operand(dst, src);
2451 2451 }
2452 2452
2453 2453 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2454 2454 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2455 2455 InstructionMark im(this);
2456 2456 emit_byte(0x66);
2457 2457 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2458 2458 emit_byte(0x0F);
2459 2459 emit_byte(0xEF);
2460 2460 emit_byte(0xC0 | encode);
2461 2461 }
2462 2462
2463 2463 void Assembler::rcll(Register dst, int imm8) {
2464 2464 assert(isShiftCount(imm8), "illegal shift count");
2465 2465 int encode = prefix_and_encode(dst->encoding());
2466 2466 if (imm8 == 1) {
2467 2467 emit_byte(0xD1);
2468 2468 emit_byte(0xD0 | encode);
2469 2469 } else {
2470 2470 emit_byte(0xC1);
2471 2471 emit_byte(0xD0 | encode);
2472 2472 emit_byte(imm8);
2473 2473 }
2474 2474 }
2475 2475
2476 2476 // copies data from [esi] to [edi] using rcx pointer sized words
2477 2477 // generic
2478 2478 void Assembler::rep_mov() {
2479 2479 emit_byte(0xF3);
2480 2480 // MOVSQ
2481 2481 LP64_ONLY(prefix(REX_W));
2482 2482 emit_byte(0xA5);
2483 2483 }
2484 2484
2485 2485 // sets rcx pointer sized words with rax, value at [edi]
2486 2486 // generic
2487 2487 void Assembler::rep_set() { // rep_set
2488 2488 emit_byte(0xF3);
2489 2489 // STOSQ
2490 2490 LP64_ONLY(prefix(REX_W));
2491 2491 emit_byte(0xAB);
2492 2492 }
2493 2493
2494 2494 // scans rcx pointer sized words at [edi] for occurance of rax,
2495 2495 // generic
2496 2496 void Assembler::repne_scan() { // repne_scan
2497 2497 emit_byte(0xF2);
2498 2498 // SCASQ
2499 2499 LP64_ONLY(prefix(REX_W));
2500 2500 emit_byte(0xAF);
2501 2501 }
2502 2502
2503 2503 #ifdef _LP64
2504 2504 // scans rcx 4 byte words at [edi] for occurance of rax,
2505 2505 // generic
2506 2506 void Assembler::repne_scanl() { // repne_scan
2507 2507 emit_byte(0xF2);
2508 2508 // SCASL
2509 2509 emit_byte(0xAF);
2510 2510 }
2511 2511 #endif
2512 2512
2513 2513 void Assembler::ret(int imm16) {
2514 2514 if (imm16 == 0) {
2515 2515 emit_byte(0xC3);
2516 2516 } else {
2517 2517 emit_byte(0xC2);
2518 2518 emit_word(imm16);
2519 2519 }
2520 2520 }
2521 2521
2522 2522 void Assembler::sahf() {
2523 2523 #ifdef _LP64
2524 2524 // Not supported in 64bit mode
2525 2525 ShouldNotReachHere();
2526 2526 #endif
2527 2527 emit_byte(0x9E);
2528 2528 }
2529 2529
2530 2530 void Assembler::sarl(Register dst, int imm8) {
2531 2531 int encode = prefix_and_encode(dst->encoding());
2532 2532 assert(isShiftCount(imm8), "illegal shift count");
2533 2533 if (imm8 == 1) {
2534 2534 emit_byte(0xD1);
2535 2535 emit_byte(0xF8 | encode);
2536 2536 } else {
2537 2537 emit_byte(0xC1);
2538 2538 emit_byte(0xF8 | encode);
2539 2539 emit_byte(imm8);
2540 2540 }
2541 2541 }
2542 2542
2543 2543 void Assembler::sarl(Register dst) {
2544 2544 int encode = prefix_and_encode(dst->encoding());
2545 2545 emit_byte(0xD3);
2546 2546 emit_byte(0xF8 | encode);
2547 2547 }
2548 2548
2549 2549 void Assembler::sbbl(Address dst, int32_t imm32) {
2550 2550 InstructionMark im(this);
2551 2551 prefix(dst);
2552 2552 emit_arith_operand(0x81, rbx, dst, imm32);
2553 2553 }
2554 2554
2555 2555 void Assembler::sbbl(Register dst, int32_t imm32) {
2556 2556 prefix(dst);
2557 2557 emit_arith(0x81, 0xD8, dst, imm32);
2558 2558 }
2559 2559
2560 2560
2561 2561 void Assembler::sbbl(Register dst, Address src) {
2562 2562 InstructionMark im(this);
2563 2563 prefix(src, dst);
2564 2564 emit_byte(0x1B);
2565 2565 emit_operand(dst, src);
2566 2566 }
2567 2567
2568 2568 void Assembler::sbbl(Register dst, Register src) {
2569 2569 (void) prefix_and_encode(dst->encoding(), src->encoding());
2570 2570 emit_arith(0x1B, 0xC0, dst, src);
2571 2571 }
2572 2572
2573 2573 void Assembler::setb(Condition cc, Register dst) {
2574 2574 assert(0 <= cc && cc < 16, "illegal cc");
2575 2575 int encode = prefix_and_encode(dst->encoding(), true);
2576 2576 emit_byte(0x0F);
2577 2577 emit_byte(0x90 | cc);
2578 2578 emit_byte(0xC0 | encode);
2579 2579 }
2580 2580
2581 2581 void Assembler::shll(Register dst, int imm8) {
2582 2582 assert(isShiftCount(imm8), "illegal shift count");
2583 2583 int encode = prefix_and_encode(dst->encoding());
2584 2584 if (imm8 == 1 ) {
2585 2585 emit_byte(0xD1);
2586 2586 emit_byte(0xE0 | encode);
2587 2587 } else {
2588 2588 emit_byte(0xC1);
2589 2589 emit_byte(0xE0 | encode);
2590 2590 emit_byte(imm8);
2591 2591 }
2592 2592 }
2593 2593
2594 2594 void Assembler::shll(Register dst) {
2595 2595 int encode = prefix_and_encode(dst->encoding());
2596 2596 emit_byte(0xD3);
2597 2597 emit_byte(0xE0 | encode);
2598 2598 }
2599 2599
2600 2600 void Assembler::shrl(Register dst, int imm8) {
2601 2601 assert(isShiftCount(imm8), "illegal shift count");
2602 2602 int encode = prefix_and_encode(dst->encoding());
2603 2603 emit_byte(0xC1);
2604 2604 emit_byte(0xE8 | encode);
2605 2605 emit_byte(imm8);
2606 2606 }
2607 2607
2608 2608 void Assembler::shrl(Register dst) {
2609 2609 int encode = prefix_and_encode(dst->encoding());
2610 2610 emit_byte(0xD3);
2611 2611 emit_byte(0xE8 | encode);
2612 2612 }
2613 2613
2614 2614 // copies a single word from [esi] to [edi]
2615 2615 void Assembler::smovl() {
2616 2616 emit_byte(0xA5);
2617 2617 }
2618 2618
2619 2619 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2620 2620 // HMM Table D-1 says sse2
2621 2621 // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2622 2622 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2623 2623 emit_byte(0xF2);
2624 2624 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2625 2625 emit_byte(0x0F);
2626 2626 emit_byte(0x51);
2627 2627 emit_byte(0xC0 | encode);
2628 2628 }
2629 2629
2630 2630 void Assembler::stmxcsr( Address dst) {
2631 2631 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2632 2632 InstructionMark im(this);
2633 2633 prefix(dst);
2634 2634 emit_byte(0x0F);
2635 2635 emit_byte(0xAE);
2636 2636 emit_operand(as_Register(3), dst);
2637 2637 }
2638 2638
2639 2639 void Assembler::subl(Address dst, int32_t imm32) {
2640 2640 InstructionMark im(this);
2641 2641 prefix(dst);
2642 2642 if (is8bit(imm32)) {
2643 2643 emit_byte(0x83);
2644 2644 emit_operand(rbp, dst, 1);
2645 2645 emit_byte(imm32 & 0xFF);
2646 2646 } else {
2647 2647 emit_byte(0x81);
2648 2648 emit_operand(rbp, dst, 4);
2649 2649 emit_long(imm32);
2650 2650 }
2651 2651 }
2652 2652
2653 2653 void Assembler::subl(Register dst, int32_t imm32) {
2654 2654 prefix(dst);
2655 2655 emit_arith(0x81, 0xE8, dst, imm32);
2656 2656 }
2657 2657
2658 2658 void Assembler::subl(Address dst, Register src) {
2659 2659 InstructionMark im(this);
2660 2660 prefix(dst, src);
2661 2661 emit_byte(0x29);
2662 2662 emit_operand(src, dst);
2663 2663 }
2664 2664
2665 2665 void Assembler::subl(Register dst, Address src) {
2666 2666 InstructionMark im(this);
2667 2667 prefix(src, dst);
2668 2668 emit_byte(0x2B);
2669 2669 emit_operand(dst, src);
2670 2670 }
2671 2671
2672 2672 void Assembler::subl(Register dst, Register src) {
2673 2673 (void) prefix_and_encode(dst->encoding(), src->encoding());
2674 2674 emit_arith(0x2B, 0xC0, dst, src);
2675 2675 }
2676 2676
2677 2677 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2678 2678 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2679 2679 emit_byte(0xF2);
2680 2680 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2681 2681 emit_byte(0x0F);
2682 2682 emit_byte(0x5C);
2683 2683 emit_byte(0xC0 | encode);
2684 2684 }
2685 2685
2686 2686 void Assembler::subsd(XMMRegister dst, Address src) {
2687 2687 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2688 2688 InstructionMark im(this);
2689 2689 emit_byte(0xF2);
2690 2690 prefix(src, dst);
2691 2691 emit_byte(0x0F);
2692 2692 emit_byte(0x5C);
2693 2693 emit_operand(dst, src);
2694 2694 }
2695 2695
2696 2696 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2697 2697 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2698 2698 emit_byte(0xF3);
2699 2699 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2700 2700 emit_byte(0x0F);
2701 2701 emit_byte(0x5C);
2702 2702 emit_byte(0xC0 | encode);
2703 2703 }
2704 2704
2705 2705 void Assembler::subss(XMMRegister dst, Address src) {
2706 2706 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2707 2707 InstructionMark im(this);
2708 2708 emit_byte(0xF3);
2709 2709 prefix(src, dst);
2710 2710 emit_byte(0x0F);
2711 2711 emit_byte(0x5C);
2712 2712 emit_operand(dst, src);
2713 2713 }
2714 2714
2715 2715 void Assembler::testb(Register dst, int imm8) {
2716 2716 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2717 2717 (void) prefix_and_encode(dst->encoding(), true);
2718 2718 emit_arith_b(0xF6, 0xC0, dst, imm8);
2719 2719 }
2720 2720
2721 2721 void Assembler::testl(Register dst, int32_t imm32) {
2722 2722 // not using emit_arith because test
2723 2723 // doesn't support sign-extension of
2724 2724 // 8bit operands
2725 2725 int encode = dst->encoding();
2726 2726 if (encode == 0) {
2727 2727 emit_byte(0xA9);
2728 2728 } else {
2729 2729 encode = prefix_and_encode(encode);
2730 2730 emit_byte(0xF7);
2731 2731 emit_byte(0xC0 | encode);
2732 2732 }
2733 2733 emit_long(imm32);
2734 2734 }
2735 2735
2736 2736 void Assembler::testl(Register dst, Register src) {
2737 2737 (void) prefix_and_encode(dst->encoding(), src->encoding());
2738 2738 emit_arith(0x85, 0xC0, dst, src);
2739 2739 }
2740 2740
2741 2741 void Assembler::testl(Register dst, Address src) {
2742 2742 InstructionMark im(this);
2743 2743 prefix(src, dst);
2744 2744 emit_byte(0x85);
2745 2745 emit_operand(dst, src);
2746 2746 }
2747 2747
2748 2748 void Assembler::ucomisd(XMMRegister dst, Address src) {
2749 2749 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2750 2750 emit_byte(0x66);
2751 2751 ucomiss(dst, src);
2752 2752 }
2753 2753
2754 2754 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2755 2755 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2756 2756 emit_byte(0x66);
2757 2757 ucomiss(dst, src);
2758 2758 }
2759 2759
2760 2760 void Assembler::ucomiss(XMMRegister dst, Address src) {
2761 2761 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2762 2762
2763 2763 InstructionMark im(this);
2764 2764 prefix(src, dst);
2765 2765 emit_byte(0x0F);
2766 2766 emit_byte(0x2E);
2767 2767 emit_operand(dst, src);
2768 2768 }
2769 2769
2770 2770 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2771 2771 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2772 2772 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2773 2773 emit_byte(0x0F);
2774 2774 emit_byte(0x2E);
2775 2775 emit_byte(0xC0 | encode);
2776 2776 }
2777 2777
2778 2778
2779 2779 void Assembler::xaddl(Address dst, Register src) {
2780 2780 InstructionMark im(this);
2781 2781 prefix(dst, src);
2782 2782 emit_byte(0x0F);
2783 2783 emit_byte(0xC1);
2784 2784 emit_operand(src, dst);
2785 2785 }
2786 2786
2787 2787 void Assembler::xchgl(Register dst, Address src) { // xchg
2788 2788 InstructionMark im(this);
2789 2789 prefix(src, dst);
2790 2790 emit_byte(0x87);
2791 2791 emit_operand(dst, src);
2792 2792 }
2793 2793
2794 2794 void Assembler::xchgl(Register dst, Register src) {
2795 2795 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2796 2796 emit_byte(0x87);
2797 2797 emit_byte(0xc0 | encode);
2798 2798 }
2799 2799
2800 2800 void Assembler::xorl(Register dst, int32_t imm32) {
2801 2801 prefix(dst);
2802 2802 emit_arith(0x81, 0xF0, dst, imm32);
2803 2803 }
2804 2804
2805 2805 void Assembler::xorl(Register dst, Address src) {
2806 2806 InstructionMark im(this);
2807 2807 prefix(src, dst);
2808 2808 emit_byte(0x33);
2809 2809 emit_operand(dst, src);
2810 2810 }
2811 2811
2812 2812 void Assembler::xorl(Register dst, Register src) {
2813 2813 (void) prefix_and_encode(dst->encoding(), src->encoding());
2814 2814 emit_arith(0x33, 0xC0, dst, src);
2815 2815 }
2816 2816
2817 2817 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2818 2818 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2819 2819 emit_byte(0x66);
2820 2820 xorps(dst, src);
2821 2821 }
2822 2822
2823 2823 void Assembler::xorpd(XMMRegister dst, Address src) {
2824 2824 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2825 2825 InstructionMark im(this);
2826 2826 emit_byte(0x66);
2827 2827 prefix(src, dst);
2828 2828 emit_byte(0x0F);
2829 2829 emit_byte(0x57);
2830 2830 emit_operand(dst, src);
2831 2831 }
2832 2832
2833 2833
2834 2834 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2835 2835 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2836 2836 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2837 2837 emit_byte(0x0F);
2838 2838 emit_byte(0x57);
2839 2839 emit_byte(0xC0 | encode);
2840 2840 }
2841 2841
2842 2842 void Assembler::xorps(XMMRegister dst, Address src) {
2843 2843 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2844 2844 InstructionMark im(this);
2845 2845 prefix(src, dst);
2846 2846 emit_byte(0x0F);
2847 2847 emit_byte(0x57);
2848 2848 emit_operand(dst, src);
2849 2849 }
2850 2850
2851 2851 #ifndef _LP64
2852 2852 // 32bit only pieces of the assembler
2853 2853
2854 2854 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2855 2855 // NO PREFIX AS NEVER 64BIT
2856 2856 InstructionMark im(this);
2857 2857 emit_byte(0x81);
2858 2858 emit_byte(0xF8 | src1->encoding());
2859 2859 emit_data(imm32, rspec, 0);
2860 2860 }
2861 2861
2862 2862 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2863 2863 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2864 2864 InstructionMark im(this);
2865 2865 emit_byte(0x81);
2866 2866 emit_operand(rdi, src1);
2867 2867 emit_data(imm32, rspec, 0);
2868 2868 }
2869 2869
2870 2870 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2871 2871 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2872 2872 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise.
2873 2873 void Assembler::cmpxchg8(Address adr) {
2874 2874 InstructionMark im(this);
2875 2875 emit_byte(0x0F);
2876 2876 emit_byte(0xc7);
2877 2877 emit_operand(rcx, adr);
2878 2878 }
2879 2879
2880 2880 void Assembler::decl(Register dst) {
2881 2881 // Don't use it directly. Use MacroAssembler::decrementl() instead.
2882 2882 emit_byte(0x48 | dst->encoding());
2883 2883 }
2884 2884
2885 2885 #endif // _LP64
2886 2886
2887 2887 // 64bit typically doesn't use the x87 but needs to for the trig funcs
2888 2888
2889 2889 void Assembler::fabs() {
2890 2890 emit_byte(0xD9);
2891 2891 emit_byte(0xE1);
2892 2892 }
2893 2893
2894 2894 void Assembler::fadd(int i) {
2895 2895 emit_farith(0xD8, 0xC0, i);
2896 2896 }
2897 2897
2898 2898 void Assembler::fadd_d(Address src) {
2899 2899 InstructionMark im(this);
2900 2900 emit_byte(0xDC);
2901 2901 emit_operand32(rax, src);
2902 2902 }
2903 2903
2904 2904 void Assembler::fadd_s(Address src) {
2905 2905 InstructionMark im(this);
2906 2906 emit_byte(0xD8);
2907 2907 emit_operand32(rax, src);
2908 2908 }
2909 2909
2910 2910 void Assembler::fadda(int i) {
2911 2911 emit_farith(0xDC, 0xC0, i);
2912 2912 }
2913 2913
2914 2914 void Assembler::faddp(int i) {
2915 2915 emit_farith(0xDE, 0xC0, i);
2916 2916 }
2917 2917
2918 2918 void Assembler::fchs() {
2919 2919 emit_byte(0xD9);
2920 2920 emit_byte(0xE0);
2921 2921 }
2922 2922
2923 2923 void Assembler::fcom(int i) {
2924 2924 emit_farith(0xD8, 0xD0, i);
2925 2925 }
2926 2926
2927 2927 void Assembler::fcomp(int i) {
2928 2928 emit_farith(0xD8, 0xD8, i);
2929 2929 }
2930 2930
2931 2931 void Assembler::fcomp_d(Address src) {
2932 2932 InstructionMark im(this);
2933 2933 emit_byte(0xDC);
2934 2934 emit_operand32(rbx, src);
2935 2935 }
2936 2936
2937 2937 void Assembler::fcomp_s(Address src) {
2938 2938 InstructionMark im(this);
2939 2939 emit_byte(0xD8);
2940 2940 emit_operand32(rbx, src);
2941 2941 }
2942 2942
2943 2943 void Assembler::fcompp() {
2944 2944 emit_byte(0xDE);
2945 2945 emit_byte(0xD9);
2946 2946 }
2947 2947
2948 2948 void Assembler::fcos() {
2949 2949 emit_byte(0xD9);
2950 2950 emit_byte(0xFF);
2951 2951 }
2952 2952
2953 2953 void Assembler::fdecstp() {
2954 2954 emit_byte(0xD9);
2955 2955 emit_byte(0xF6);
2956 2956 }
2957 2957
2958 2958 void Assembler::fdiv(int i) {
2959 2959 emit_farith(0xD8, 0xF0, i);
2960 2960 }
2961 2961
2962 2962 void Assembler::fdiv_d(Address src) {
2963 2963 InstructionMark im(this);
2964 2964 emit_byte(0xDC);
2965 2965 emit_operand32(rsi, src);
2966 2966 }
2967 2967
2968 2968 void Assembler::fdiv_s(Address src) {
2969 2969 InstructionMark im(this);
2970 2970 emit_byte(0xD8);
2971 2971 emit_operand32(rsi, src);
2972 2972 }
2973 2973
2974 2974 void Assembler::fdiva(int i) {
2975 2975 emit_farith(0xDC, 0xF8, i);
2976 2976 }
2977 2977
2978 2978 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
2979 2979 // is erroneous for some of the floating-point instructions below.
2980 2980
2981 2981 void Assembler::fdivp(int i) {
2982 2982 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
2983 2983 }
2984 2984
2985 2985 void Assembler::fdivr(int i) {
2986 2986 emit_farith(0xD8, 0xF8, i);
2987 2987 }
2988 2988
2989 2989 void Assembler::fdivr_d(Address src) {
2990 2990 InstructionMark im(this);
2991 2991 emit_byte(0xDC);
2992 2992 emit_operand32(rdi, src);
2993 2993 }
2994 2994
2995 2995 void Assembler::fdivr_s(Address src) {
2996 2996 InstructionMark im(this);
2997 2997 emit_byte(0xD8);
2998 2998 emit_operand32(rdi, src);
2999 2999 }
3000 3000
3001 3001 void Assembler::fdivra(int i) {
3002 3002 emit_farith(0xDC, 0xF0, i);
3003 3003 }
3004 3004
3005 3005 void Assembler::fdivrp(int i) {
3006 3006 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3007 3007 }
3008 3008
3009 3009 void Assembler::ffree(int i) {
3010 3010 emit_farith(0xDD, 0xC0, i);
3011 3011 }
3012 3012
3013 3013 void Assembler::fild_d(Address adr) {
3014 3014 InstructionMark im(this);
3015 3015 emit_byte(0xDF);
3016 3016 emit_operand32(rbp, adr);
3017 3017 }
3018 3018
3019 3019 void Assembler::fild_s(Address adr) {
3020 3020 InstructionMark im(this);
3021 3021 emit_byte(0xDB);
3022 3022 emit_operand32(rax, adr);
3023 3023 }
3024 3024
3025 3025 void Assembler::fincstp() {
3026 3026 emit_byte(0xD9);
3027 3027 emit_byte(0xF7);
3028 3028 }
3029 3029
3030 3030 void Assembler::finit() {
3031 3031 emit_byte(0x9B);
3032 3032 emit_byte(0xDB);
3033 3033 emit_byte(0xE3);
3034 3034 }
3035 3035
3036 3036 void Assembler::fist_s(Address adr) {
3037 3037 InstructionMark im(this);
3038 3038 emit_byte(0xDB);
3039 3039 emit_operand32(rdx, adr);
3040 3040 }
3041 3041
3042 3042 void Assembler::fistp_d(Address adr) {
3043 3043 InstructionMark im(this);
3044 3044 emit_byte(0xDF);
3045 3045 emit_operand32(rdi, adr);
3046 3046 }
3047 3047
3048 3048 void Assembler::fistp_s(Address adr) {
3049 3049 InstructionMark im(this);
3050 3050 emit_byte(0xDB);
3051 3051 emit_operand32(rbx, adr);
3052 3052 }
3053 3053
3054 3054 void Assembler::fld1() {
3055 3055 emit_byte(0xD9);
3056 3056 emit_byte(0xE8);
3057 3057 }
3058 3058
3059 3059 void Assembler::fld_d(Address adr) {
3060 3060 InstructionMark im(this);
3061 3061 emit_byte(0xDD);
3062 3062 emit_operand32(rax, adr);
3063 3063 }
3064 3064
3065 3065 void Assembler::fld_s(Address adr) {
3066 3066 InstructionMark im(this);
3067 3067 emit_byte(0xD9);
3068 3068 emit_operand32(rax, adr);
3069 3069 }
3070 3070
3071 3071
3072 3072 void Assembler::fld_s(int index) {
3073 3073 emit_farith(0xD9, 0xC0, index);
3074 3074 }
3075 3075
3076 3076 void Assembler::fld_x(Address adr) {
3077 3077 InstructionMark im(this);
3078 3078 emit_byte(0xDB);
3079 3079 emit_operand32(rbp, adr);
3080 3080 }
3081 3081
3082 3082 void Assembler::fldcw(Address src) {
3083 3083 InstructionMark im(this);
3084 3084 emit_byte(0xd9);
3085 3085 emit_operand32(rbp, src);
3086 3086 }
3087 3087
3088 3088 void Assembler::fldenv(Address src) {
3089 3089 InstructionMark im(this);
3090 3090 emit_byte(0xD9);
3091 3091 emit_operand32(rsp, src);
3092 3092 }
3093 3093
3094 3094 void Assembler::fldlg2() {
3095 3095 emit_byte(0xD9);
3096 3096 emit_byte(0xEC);
3097 3097 }
3098 3098
3099 3099 void Assembler::fldln2() {
3100 3100 emit_byte(0xD9);
3101 3101 emit_byte(0xED);
3102 3102 }
3103 3103
3104 3104 void Assembler::fldz() {
3105 3105 emit_byte(0xD9);
3106 3106 emit_byte(0xEE);
3107 3107 }
3108 3108
3109 3109 void Assembler::flog() {
3110 3110 fldln2();
3111 3111 fxch();
3112 3112 fyl2x();
3113 3113 }
3114 3114
3115 3115 void Assembler::flog10() {
3116 3116 fldlg2();
3117 3117 fxch();
3118 3118 fyl2x();
3119 3119 }
3120 3120
3121 3121 void Assembler::fmul(int i) {
3122 3122 emit_farith(0xD8, 0xC8, i);
3123 3123 }
3124 3124
3125 3125 void Assembler::fmul_d(Address src) {
3126 3126 InstructionMark im(this);
3127 3127 emit_byte(0xDC);
3128 3128 emit_operand32(rcx, src);
3129 3129 }
3130 3130
3131 3131 void Assembler::fmul_s(Address src) {
3132 3132 InstructionMark im(this);
3133 3133 emit_byte(0xD8);
3134 3134 emit_operand32(rcx, src);
3135 3135 }
3136 3136
3137 3137 void Assembler::fmula(int i) {
3138 3138 emit_farith(0xDC, 0xC8, i);
3139 3139 }
3140 3140
3141 3141 void Assembler::fmulp(int i) {
3142 3142 emit_farith(0xDE, 0xC8, i);
3143 3143 }
3144 3144
3145 3145 void Assembler::fnsave(Address dst) {
3146 3146 InstructionMark im(this);
3147 3147 emit_byte(0xDD);
3148 3148 emit_operand32(rsi, dst);
3149 3149 }
3150 3150
3151 3151 void Assembler::fnstcw(Address src) {
3152 3152 InstructionMark im(this);
3153 3153 emit_byte(0x9B);
3154 3154 emit_byte(0xD9);
3155 3155 emit_operand32(rdi, src);
3156 3156 }
3157 3157
3158 3158 void Assembler::fnstsw_ax() {
3159 3159 emit_byte(0xdF);
3160 3160 emit_byte(0xE0);
3161 3161 }
3162 3162
3163 3163 void Assembler::fprem() {
3164 3164 emit_byte(0xD9);
3165 3165 emit_byte(0xF8);
3166 3166 }
3167 3167
3168 3168 void Assembler::fprem1() {
3169 3169 emit_byte(0xD9);
3170 3170 emit_byte(0xF5);
3171 3171 }
3172 3172
3173 3173 void Assembler::frstor(Address src) {
3174 3174 InstructionMark im(this);
3175 3175 emit_byte(0xDD);
3176 3176 emit_operand32(rsp, src);
3177 3177 }
3178 3178
3179 3179 void Assembler::fsin() {
3180 3180 emit_byte(0xD9);
3181 3181 emit_byte(0xFE);
3182 3182 }
3183 3183
3184 3184 void Assembler::fsqrt() {
3185 3185 emit_byte(0xD9);
3186 3186 emit_byte(0xFA);
3187 3187 }
3188 3188
3189 3189 void Assembler::fst_d(Address adr) {
3190 3190 InstructionMark im(this);
3191 3191 emit_byte(0xDD);
3192 3192 emit_operand32(rdx, adr);
3193 3193 }
3194 3194
3195 3195 void Assembler::fst_s(Address adr) {
3196 3196 InstructionMark im(this);
3197 3197 emit_byte(0xD9);
3198 3198 emit_operand32(rdx, adr);
3199 3199 }
3200 3200
3201 3201 void Assembler::fstp_d(Address adr) {
3202 3202 InstructionMark im(this);
3203 3203 emit_byte(0xDD);
3204 3204 emit_operand32(rbx, adr);
3205 3205 }
3206 3206
3207 3207 void Assembler::fstp_d(int index) {
3208 3208 emit_farith(0xDD, 0xD8, index);
3209 3209 }
3210 3210
3211 3211 void Assembler::fstp_s(Address adr) {
3212 3212 InstructionMark im(this);
3213 3213 emit_byte(0xD9);
3214 3214 emit_operand32(rbx, adr);
3215 3215 }
3216 3216
3217 3217 void Assembler::fstp_x(Address adr) {
3218 3218 InstructionMark im(this);
3219 3219 emit_byte(0xDB);
3220 3220 emit_operand32(rdi, adr);
3221 3221 }
3222 3222
3223 3223 void Assembler::fsub(int i) {
3224 3224 emit_farith(0xD8, 0xE0, i);
3225 3225 }
3226 3226
3227 3227 void Assembler::fsub_d(Address src) {
3228 3228 InstructionMark im(this);
3229 3229 emit_byte(0xDC);
3230 3230 emit_operand32(rsp, src);
3231 3231 }
3232 3232
3233 3233 void Assembler::fsub_s(Address src) {
3234 3234 InstructionMark im(this);
3235 3235 emit_byte(0xD8);
3236 3236 emit_operand32(rsp, src);
3237 3237 }
3238 3238
3239 3239 void Assembler::fsuba(int i) {
3240 3240 emit_farith(0xDC, 0xE8, i);
3241 3241 }
3242 3242
3243 3243 void Assembler::fsubp(int i) {
3244 3244 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3245 3245 }
3246 3246
3247 3247 void Assembler::fsubr(int i) {
3248 3248 emit_farith(0xD8, 0xE8, i);
3249 3249 }
3250 3250
3251 3251 void Assembler::fsubr_d(Address src) {
3252 3252 InstructionMark im(this);
3253 3253 emit_byte(0xDC);
3254 3254 emit_operand32(rbp, src);
3255 3255 }
3256 3256
3257 3257 void Assembler::fsubr_s(Address src) {
3258 3258 InstructionMark im(this);
3259 3259 emit_byte(0xD8);
3260 3260 emit_operand32(rbp, src);
3261 3261 }
3262 3262
3263 3263 void Assembler::fsubra(int i) {
3264 3264 emit_farith(0xDC, 0xE0, i);
3265 3265 }
3266 3266
3267 3267 void Assembler::fsubrp(int i) {
3268 3268 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3269 3269 }
3270 3270
3271 3271 void Assembler::ftan() {
3272 3272 emit_byte(0xD9);
3273 3273 emit_byte(0xF2);
3274 3274 emit_byte(0xDD);
3275 3275 emit_byte(0xD8);
3276 3276 }
3277 3277
3278 3278 void Assembler::ftst() {
3279 3279 emit_byte(0xD9);
3280 3280 emit_byte(0xE4);
3281 3281 }
3282 3282
3283 3283 void Assembler::fucomi(int i) {
3284 3284 // make sure the instruction is supported (introduced for P6, together with cmov)
3285 3285 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3286 3286 emit_farith(0xDB, 0xE8, i);
3287 3287 }
3288 3288
3289 3289 void Assembler::fucomip(int i) {
3290 3290 // make sure the instruction is supported (introduced for P6, together with cmov)
3291 3291 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3292 3292 emit_farith(0xDF, 0xE8, i);
3293 3293 }
3294 3294
3295 3295 void Assembler::fwait() {
3296 3296 emit_byte(0x9B);
3297 3297 }
3298 3298
3299 3299 void Assembler::fxch(int i) {
3300 3300 emit_farith(0xD9, 0xC8, i);
3301 3301 }
3302 3302
3303 3303 void Assembler::fyl2x() {
3304 3304 emit_byte(0xD9);
3305 3305 emit_byte(0xF1);
3306 3306 }
3307 3307
3308 3308
3309 3309 #ifndef _LP64
3310 3310
3311 3311 void Assembler::incl(Register dst) {
3312 3312 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3313 3313 emit_byte(0x40 | dst->encoding());
3314 3314 }
3315 3315
3316 3316 void Assembler::lea(Register dst, Address src) {
3317 3317 leal(dst, src);
3318 3318 }
3319 3319
3320 3320 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
3321 3321 InstructionMark im(this);
3322 3322 emit_byte(0xC7);
3323 3323 emit_operand(rax, dst);
3324 3324 emit_data((int)imm32, rspec, 0);
3325 3325 }
3326 3326
3327 3327 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3328 3328 InstructionMark im(this);
3329 3329 int encode = prefix_and_encode(dst->encoding());
3330 3330 emit_byte(0xB8 | encode);
3331 3331 emit_data((int)imm32, rspec, 0);
3332 3332 }
3333 3333
3334 3334 void Assembler::popa() { // 32bit
3335 3335 emit_byte(0x61);
3336 3336 }
3337 3337
3338 3338 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3339 3339 InstructionMark im(this);
3340 3340 emit_byte(0x68);
3341 3341 emit_data(imm32, rspec, 0);
3342 3342 }
3343 3343
3344 3344 void Assembler::pusha() { // 32bit
3345 3345 emit_byte(0x60);
3346 3346 }
3347 3347
3348 3348 void Assembler::set_byte_if_not_zero(Register dst) {
3349 3349 emit_byte(0x0F);
3350 3350 emit_byte(0x95);
3351 3351 emit_byte(0xE0 | dst->encoding());
3352 3352 }
3353 3353
3354 3354 void Assembler::shldl(Register dst, Register src) {
3355 3355 emit_byte(0x0F);
3356 3356 emit_byte(0xA5);
3357 3357 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3358 3358 }
3359 3359
3360 3360 void Assembler::shrdl(Register dst, Register src) {
3361 3361 emit_byte(0x0F);
3362 3362 emit_byte(0xAD);
3363 3363 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3364 3364 }
3365 3365
3366 3366 #else // LP64
3367 3367
3368 3368 void Assembler::set_byte_if_not_zero(Register dst) {
3369 3369 int enc = prefix_and_encode(dst->encoding(), true);
3370 3370 emit_byte(0x0F);
3371 3371 emit_byte(0x95);
3372 3372 emit_byte(0xE0 | enc);
3373 3373 }
3374 3374
3375 3375 // 64bit only pieces of the assembler
3376 3376 // This should only be used by 64bit instructions that can use rip-relative
3377 3377 // it cannot be used by instructions that want an immediate value.
3378 3378
3379 3379 bool Assembler::reachable(AddressLiteral adr) {
3380 3380 int64_t disp;
3381 3381 // None will force a 64bit literal to the code stream. Likely a placeholder
3382 3382 // for something that will be patched later and we need to certain it will
3383 3383 // always be reachable.
3384 3384 if (adr.reloc() == relocInfo::none) {
3385 3385 return false;
3386 3386 }
3387 3387 if (adr.reloc() == relocInfo::internal_word_type) {
3388 3388 // This should be rip relative and easily reachable.
3389 3389 return true;
3390 3390 }
3391 3391 if (adr.reloc() == relocInfo::virtual_call_type ||
3392 3392 adr.reloc() == relocInfo::opt_virtual_call_type ||
3393 3393 adr.reloc() == relocInfo::static_call_type ||
3394 3394 adr.reloc() == relocInfo::static_stub_type ) {
3395 3395 // This should be rip relative within the code cache and easily
3396 3396 // reachable until we get huge code caches. (At which point
3397 3397 // ic code is going to have issues).
3398 3398 return true;
3399 3399 }
3400 3400 if (adr.reloc() != relocInfo::external_word_type &&
3401 3401 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special
3402 3402 adr.reloc() != relocInfo::poll_type && // relocs to identify them
3403 3403 adr.reloc() != relocInfo::runtime_call_type ) {
3404 3404 return false;
3405 3405 }
3406 3406
3407 3407 // Stress the correction code
3408 3408 if (ForceUnreachable) {
3409 3409 // Must be runtimecall reloc, see if it is in the codecache
3410 3410 // Flipping stuff in the codecache to be unreachable causes issues
3411 3411 // with things like inline caches where the additional instructions
3412 3412 // are not handled.
3413 3413 if (CodeCache::find_blob(adr._target) == NULL) {
3414 3414 return false;
3415 3415 }
3416 3416 }
3417 3417 // For external_word_type/runtime_call_type if it is reachable from where we
3418 3418 // are now (possibly a temp buffer) and where we might end up
3419 3419 // anywhere in the codeCache then we are always reachable.
3420 3420 // This would have to change if we ever save/restore shared code
3421 3421 // to be more pessimistic.
3422 3422
3423 3423 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3424 3424 if (!is_simm32(disp)) return false;
3425 3425 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3426 3426 if (!is_simm32(disp)) return false;
3427 3427
3428 3428 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3429 3429
3430 3430 // Because rip relative is a disp + address_of_next_instruction and we
3431 3431 // don't know the value of address_of_next_instruction we apply a fudge factor
3432 3432 // to make sure we will be ok no matter the size of the instruction we get placed into.
3433 3433 // We don't have to fudge the checks above here because they are already worst case.
3434 3434
3435 3435 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
3436 3436 // + 4 because better safe than sorry.
3437 3437 const int fudge = 12 + 4;
3438 3438 if (disp < 0) {
3439 3439 disp -= fudge;
3440 3440 } else {
3441 3441 disp += fudge;
3442 3442 }
3443 3443 return is_simm32(disp);
3444 3444 }
3445 3445
3446 3446 void Assembler::emit_data64(jlong data,
3447 3447 relocInfo::relocType rtype,
3448 3448 int format) {
3449 3449 if (rtype == relocInfo::none) {
3450 3450 emit_long64(data);
3451 3451 } else {
3452 3452 emit_data64(data, Relocation::spec_simple(rtype), format);
3453 3453 }
3454 3454 }
3455 3455
3456 3456 void Assembler::emit_data64(jlong data,
3457 3457 RelocationHolder const& rspec,
3458 3458 int format) {
3459 3459 assert(imm_operand == 0, "default format must be immediate in this file");
3460 3460 assert(imm_operand == format, "must be immediate");
3461 3461 assert(inst_mark() != NULL, "must be inside InstructionMark");
3462 3462 // Do not use AbstractAssembler::relocate, which is not intended for
3463 3463 // embedded words. Instead, relocate to the enclosing instruction.
3464 3464 code_section()->relocate(inst_mark(), rspec, format);
3465 3465 #ifdef ASSERT
3466 3466 check_relocation(rspec, format);
3467 3467 #endif
3468 3468 emit_long64(data);
3469 3469 }
3470 3470
3471 3471 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3472 3472 if (reg_enc >= 8) {
3473 3473 prefix(REX_B);
3474 3474 reg_enc -= 8;
3475 3475 } else if (byteinst && reg_enc >= 4) {
3476 3476 prefix(REX);
3477 3477 }
3478 3478 return reg_enc;
3479 3479 }
3480 3480
3481 3481 int Assembler::prefixq_and_encode(int reg_enc) {
3482 3482 if (reg_enc < 8) {
3483 3483 prefix(REX_W);
3484 3484 } else {
3485 3485 prefix(REX_WB);
3486 3486 reg_enc -= 8;
3487 3487 }
3488 3488 return reg_enc;
3489 3489 }
3490 3490
3491 3491 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3492 3492 if (dst_enc < 8) {
3493 3493 if (src_enc >= 8) {
3494 3494 prefix(REX_B);
3495 3495 src_enc -= 8;
3496 3496 } else if (byteinst && src_enc >= 4) {
3497 3497 prefix(REX);
3498 3498 }
3499 3499 } else {
3500 3500 if (src_enc < 8) {
3501 3501 prefix(REX_R);
3502 3502 } else {
3503 3503 prefix(REX_RB);
3504 3504 src_enc -= 8;
3505 3505 }
3506 3506 dst_enc -= 8;
3507 3507 }
3508 3508 return dst_enc << 3 | src_enc;
3509 3509 }
3510 3510
3511 3511 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3512 3512 if (dst_enc < 8) {
3513 3513 if (src_enc < 8) {
3514 3514 prefix(REX_W);
3515 3515 } else {
3516 3516 prefix(REX_WB);
3517 3517 src_enc -= 8;
3518 3518 }
3519 3519 } else {
3520 3520 if (src_enc < 8) {
3521 3521 prefix(REX_WR);
3522 3522 } else {
3523 3523 prefix(REX_WRB);
3524 3524 src_enc -= 8;
3525 3525 }
3526 3526 dst_enc -= 8;
3527 3527 }
3528 3528 return dst_enc << 3 | src_enc;
3529 3529 }
3530 3530
3531 3531 void Assembler::prefix(Register reg) {
3532 3532 if (reg->encoding() >= 8) {
3533 3533 prefix(REX_B);
3534 3534 }
3535 3535 }
3536 3536
3537 3537 void Assembler::prefix(Address adr) {
3538 3538 if (adr.base_needs_rex()) {
3539 3539 if (adr.index_needs_rex()) {
3540 3540 prefix(REX_XB);
3541 3541 } else {
3542 3542 prefix(REX_B);
3543 3543 }
3544 3544 } else {
3545 3545 if (adr.index_needs_rex()) {
3546 3546 prefix(REX_X);
3547 3547 }
3548 3548 }
3549 3549 }
3550 3550
3551 3551 void Assembler::prefixq(Address adr) {
3552 3552 if (adr.base_needs_rex()) {
3553 3553 if (adr.index_needs_rex()) {
3554 3554 prefix(REX_WXB);
3555 3555 } else {
3556 3556 prefix(REX_WB);
3557 3557 }
3558 3558 } else {
3559 3559 if (adr.index_needs_rex()) {
3560 3560 prefix(REX_WX);
3561 3561 } else {
3562 3562 prefix(REX_W);
3563 3563 }
3564 3564 }
3565 3565 }
3566 3566
3567 3567
3568 3568 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3569 3569 if (reg->encoding() < 8) {
3570 3570 if (adr.base_needs_rex()) {
3571 3571 if (adr.index_needs_rex()) {
3572 3572 prefix(REX_XB);
3573 3573 } else {
3574 3574 prefix(REX_B);
3575 3575 }
3576 3576 } else {
3577 3577 if (adr.index_needs_rex()) {
3578 3578 prefix(REX_X);
3579 3579 } else if (reg->encoding() >= 4 ) {
3580 3580 prefix(REX);
3581 3581 }
3582 3582 }
3583 3583 } else {
3584 3584 if (adr.base_needs_rex()) {
3585 3585 if (adr.index_needs_rex()) {
3586 3586 prefix(REX_RXB);
3587 3587 } else {
3588 3588 prefix(REX_RB);
3589 3589 }
3590 3590 } else {
3591 3591 if (adr.index_needs_rex()) {
3592 3592 prefix(REX_RX);
3593 3593 } else {
3594 3594 prefix(REX_R);
3595 3595 }
3596 3596 }
3597 3597 }
3598 3598 }
3599 3599
3600 3600 void Assembler::prefixq(Address adr, Register src) {
3601 3601 if (src->encoding() < 8) {
3602 3602 if (adr.base_needs_rex()) {
3603 3603 if (adr.index_needs_rex()) {
3604 3604 prefix(REX_WXB);
3605 3605 } else {
3606 3606 prefix(REX_WB);
3607 3607 }
3608 3608 } else {
3609 3609 if (adr.index_needs_rex()) {
3610 3610 prefix(REX_WX);
3611 3611 } else {
3612 3612 prefix(REX_W);
3613 3613 }
3614 3614 }
3615 3615 } else {
3616 3616 if (adr.base_needs_rex()) {
3617 3617 if (adr.index_needs_rex()) {
3618 3618 prefix(REX_WRXB);
3619 3619 } else {
3620 3620 prefix(REX_WRB);
3621 3621 }
3622 3622 } else {
3623 3623 if (adr.index_needs_rex()) {
3624 3624 prefix(REX_WRX);
3625 3625 } else {
3626 3626 prefix(REX_WR);
3627 3627 }
3628 3628 }
3629 3629 }
3630 3630 }
3631 3631
3632 3632 void Assembler::prefix(Address adr, XMMRegister reg) {
3633 3633 if (reg->encoding() < 8) {
3634 3634 if (adr.base_needs_rex()) {
3635 3635 if (adr.index_needs_rex()) {
3636 3636 prefix(REX_XB);
3637 3637 } else {
3638 3638 prefix(REX_B);
3639 3639 }
3640 3640 } else {
3641 3641 if (adr.index_needs_rex()) {
3642 3642 prefix(REX_X);
3643 3643 }
3644 3644 }
3645 3645 } else {
3646 3646 if (adr.base_needs_rex()) {
3647 3647 if (adr.index_needs_rex()) {
3648 3648 prefix(REX_RXB);
3649 3649 } else {
3650 3650 prefix(REX_RB);
3651 3651 }
3652 3652 } else {
3653 3653 if (adr.index_needs_rex()) {
3654 3654 prefix(REX_RX);
3655 3655 } else {
3656 3656 prefix(REX_R);
3657 3657 }
3658 3658 }
3659 3659 }
3660 3660 }
3661 3661
3662 3662 void Assembler::adcq(Register dst, int32_t imm32) {
3663 3663 (void) prefixq_and_encode(dst->encoding());
3664 3664 emit_arith(0x81, 0xD0, dst, imm32);
3665 3665 }
3666 3666
3667 3667 void Assembler::adcq(Register dst, Address src) {
3668 3668 InstructionMark im(this);
3669 3669 prefixq(src, dst);
3670 3670 emit_byte(0x13);
3671 3671 emit_operand(dst, src);
3672 3672 }
3673 3673
3674 3674 void Assembler::adcq(Register dst, Register src) {
3675 3675 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3676 3676 emit_arith(0x13, 0xC0, dst, src);
3677 3677 }
3678 3678
3679 3679 void Assembler::addq(Address dst, int32_t imm32) {
3680 3680 InstructionMark im(this);
3681 3681 prefixq(dst);
3682 3682 emit_arith_operand(0x81, rax, dst,imm32);
3683 3683 }
3684 3684
3685 3685 void Assembler::addq(Address dst, Register src) {
3686 3686 InstructionMark im(this);
3687 3687 prefixq(dst, src);
3688 3688 emit_byte(0x01);
3689 3689 emit_operand(src, dst);
3690 3690 }
3691 3691
3692 3692 void Assembler::addq(Register dst, int32_t imm32) {
3693 3693 (void) prefixq_and_encode(dst->encoding());
3694 3694 emit_arith(0x81, 0xC0, dst, imm32);
3695 3695 }
3696 3696
3697 3697 void Assembler::addq(Register dst, Address src) {
3698 3698 InstructionMark im(this);
3699 3699 prefixq(src, dst);
3700 3700 emit_byte(0x03);
3701 3701 emit_operand(dst, src);
3702 3702 }
3703 3703
3704 3704 void Assembler::addq(Register dst, Register src) {
3705 3705 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3706 3706 emit_arith(0x03, 0xC0, dst, src);
3707 3707 }
3708 3708
3709 3709 void Assembler::andq(Register dst, int32_t imm32) {
3710 3710 (void) prefixq_and_encode(dst->encoding());
3711 3711 emit_arith(0x81, 0xE0, dst, imm32);
3712 3712 }
3713 3713
3714 3714 void Assembler::andq(Register dst, Address src) {
3715 3715 InstructionMark im(this);
3716 3716 prefixq(src, dst);
3717 3717 emit_byte(0x23);
3718 3718 emit_operand(dst, src);
3719 3719 }
3720 3720
3721 3721 void Assembler::andq(Register dst, Register src) {
3722 3722 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3723 3723 emit_arith(0x23, 0xC0, dst, src);
3724 3724 }
3725 3725
3726 3726 void Assembler::bsfq(Register dst, Register src) {
3727 3727 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3728 3728 emit_byte(0x0F);
3729 3729 emit_byte(0xBC);
3730 3730 emit_byte(0xC0 | encode);
3731 3731 }
3732 3732
3733 3733 void Assembler::bsrq(Register dst, Register src) {
3734 3734 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
3735 3735 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3736 3736 emit_byte(0x0F);
3737 3737 emit_byte(0xBD);
3738 3738 emit_byte(0xC0 | encode);
3739 3739 }
3740 3740
3741 3741 void Assembler::bswapq(Register reg) {
3742 3742 int encode = prefixq_and_encode(reg->encoding());
3743 3743 emit_byte(0x0F);
3744 3744 emit_byte(0xC8 | encode);
3745 3745 }
3746 3746
3747 3747 void Assembler::cdqq() {
3748 3748 prefix(REX_W);
3749 3749 emit_byte(0x99);
3750 3750 }
3751 3751
3752 3752 void Assembler::clflush(Address adr) {
3753 3753 prefix(adr);
3754 3754 emit_byte(0x0F);
3755 3755 emit_byte(0xAE);
3756 3756 emit_operand(rdi, adr);
3757 3757 }
3758 3758
3759 3759 void Assembler::cmovq(Condition cc, Register dst, Register src) {
3760 3760 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3761 3761 emit_byte(0x0F);
3762 3762 emit_byte(0x40 | cc);
3763 3763 emit_byte(0xC0 | encode);
3764 3764 }
3765 3765
3766 3766 void Assembler::cmovq(Condition cc, Register dst, Address src) {
3767 3767 InstructionMark im(this);
3768 3768 prefixq(src, dst);
3769 3769 emit_byte(0x0F);
3770 3770 emit_byte(0x40 | cc);
3771 3771 emit_operand(dst, src);
3772 3772 }
3773 3773
3774 3774 void Assembler::cmpq(Address dst, int32_t imm32) {
3775 3775 InstructionMark im(this);
3776 3776 prefixq(dst);
3777 3777 emit_byte(0x81);
3778 3778 emit_operand(rdi, dst, 4);
3779 3779 emit_long(imm32);
3780 3780 }
3781 3781
3782 3782 void Assembler::cmpq(Register dst, int32_t imm32) {
3783 3783 (void) prefixq_and_encode(dst->encoding());
3784 3784 emit_arith(0x81, 0xF8, dst, imm32);
3785 3785 }
3786 3786
3787 3787 void Assembler::cmpq(Address dst, Register src) {
3788 3788 InstructionMark im(this);
3789 3789 prefixq(dst, src);
3790 3790 emit_byte(0x3B);
3791 3791 emit_operand(src, dst);
3792 3792 }
3793 3793
3794 3794 void Assembler::cmpq(Register dst, Register src) {
3795 3795 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3796 3796 emit_arith(0x3B, 0xC0, dst, src);
3797 3797 }
3798 3798
3799 3799 void Assembler::cmpq(Register dst, Address src) {
3800 3800 InstructionMark im(this);
3801 3801 prefixq(src, dst);
3802 3802 emit_byte(0x3B);
3803 3803 emit_operand(dst, src);
3804 3804 }
3805 3805
3806 3806 void Assembler::cmpxchgq(Register reg, Address adr) {
3807 3807 InstructionMark im(this);
3808 3808 prefixq(adr, reg);
3809 3809 emit_byte(0x0F);
3810 3810 emit_byte(0xB1);
3811 3811 emit_operand(reg, adr);
3812 3812 }
3813 3813
3814 3814 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3815 3815 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3816 3816 emit_byte(0xF2);
3817 3817 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3818 3818 emit_byte(0x0F);
3819 3819 emit_byte(0x2A);
3820 3820 emit_byte(0xC0 | encode);
3821 3821 }
3822 3822
3823 3823 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3824 3824 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3825 3825 emit_byte(0xF3);
3826 3826 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3827 3827 emit_byte(0x0F);
3828 3828 emit_byte(0x2A);
3829 3829 emit_byte(0xC0 | encode);
3830 3830 }
3831 3831
3832 3832 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3833 3833 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3834 3834 emit_byte(0xF2);
3835 3835 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3836 3836 emit_byte(0x0F);
3837 3837 emit_byte(0x2C);
3838 3838 emit_byte(0xC0 | encode);
3839 3839 }
3840 3840
3841 3841 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3842 3842 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3843 3843 emit_byte(0xF3);
3844 3844 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3845 3845 emit_byte(0x0F);
3846 3846 emit_byte(0x2C);
3847 3847 emit_byte(0xC0 | encode);
3848 3848 }
3849 3849
3850 3850 void Assembler::decl(Register dst) {
3851 3851 // Don't use it directly. Use MacroAssembler::decrementl() instead.
3852 3852 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3853 3853 int encode = prefix_and_encode(dst->encoding());
3854 3854 emit_byte(0xFF);
3855 3855 emit_byte(0xC8 | encode);
3856 3856 }
3857 3857
3858 3858 void Assembler::decq(Register dst) {
3859 3859 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3860 3860 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3861 3861 int encode = prefixq_and_encode(dst->encoding());
3862 3862 emit_byte(0xFF);
3863 3863 emit_byte(0xC8 | encode);
3864 3864 }
3865 3865
3866 3866 void Assembler::decq(Address dst) {
3867 3867 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3868 3868 InstructionMark im(this);
3869 3869 prefixq(dst);
3870 3870 emit_byte(0xFF);
3871 3871 emit_operand(rcx, dst);
3872 3872 }
3873 3873
3874 3874 void Assembler::fxrstor(Address src) {
3875 3875 prefixq(src);
3876 3876 emit_byte(0x0F);
3877 3877 emit_byte(0xAE);
3878 3878 emit_operand(as_Register(1), src);
3879 3879 }
3880 3880
3881 3881 void Assembler::fxsave(Address dst) {
3882 3882 prefixq(dst);
3883 3883 emit_byte(0x0F);
3884 3884 emit_byte(0xAE);
3885 3885 emit_operand(as_Register(0), dst);
3886 3886 }
3887 3887
3888 3888 void Assembler::idivq(Register src) {
3889 3889 int encode = prefixq_and_encode(src->encoding());
3890 3890 emit_byte(0xF7);
3891 3891 emit_byte(0xF8 | encode);
3892 3892 }
3893 3893
3894 3894 void Assembler::imulq(Register dst, Register src) {
3895 3895 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3896 3896 emit_byte(0x0F);
3897 3897 emit_byte(0xAF);
3898 3898 emit_byte(0xC0 | encode);
3899 3899 }
3900 3900
3901 3901 void Assembler::imulq(Register dst, Register src, int value) {
3902 3902 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3903 3903 if (is8bit(value)) {
3904 3904 emit_byte(0x6B);
3905 3905 emit_byte(0xC0 | encode);
3906 3906 emit_byte(value);
3907 3907 } else {
3908 3908 emit_byte(0x69);
3909 3909 emit_byte(0xC0 | encode);
3910 3910 emit_long(value);
3911 3911 }
3912 3912 }
3913 3913
3914 3914 void Assembler::incl(Register dst) {
3915 3915 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3916 3916 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3917 3917 int encode = prefix_and_encode(dst->encoding());
3918 3918 emit_byte(0xFF);
3919 3919 emit_byte(0xC0 | encode);
3920 3920 }
3921 3921
3922 3922 void Assembler::incq(Register dst) {
3923 3923 // Don't use it directly. Use MacroAssembler::incrementq() instead.
3924 3924 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3925 3925 int encode = prefixq_and_encode(dst->encoding());
3926 3926 emit_byte(0xFF);
3927 3927 emit_byte(0xC0 | encode);
3928 3928 }
3929 3929
3930 3930 void Assembler::incq(Address dst) {
3931 3931 // Don't use it directly. Use MacroAssembler::incrementq() instead.
3932 3932 InstructionMark im(this);
3933 3933 prefixq(dst);
3934 3934 emit_byte(0xFF);
3935 3935 emit_operand(rax, dst);
3936 3936 }
3937 3937
3938 3938 void Assembler::lea(Register dst, Address src) {
3939 3939 leaq(dst, src);
3940 3940 }
3941 3941
3942 3942 void Assembler::leaq(Register dst, Address src) {
3943 3943 InstructionMark im(this);
3944 3944 prefixq(src, dst);
3945 3945 emit_byte(0x8D);
3946 3946 emit_operand(dst, src);
3947 3947 }
3948 3948
3949 3949 void Assembler::mov64(Register dst, int64_t imm64) {
3950 3950 InstructionMark im(this);
3951 3951 int encode = prefixq_and_encode(dst->encoding());
3952 3952 emit_byte(0xB8 | encode);
3953 3953 emit_long64(imm64);
3954 3954 }
3955 3955
3956 3956 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
3957 3957 InstructionMark im(this);
3958 3958 int encode = prefixq_and_encode(dst->encoding());
3959 3959 emit_byte(0xB8 | encode);
3960 3960 emit_data64(imm64, rspec);
3961 3961 }
3962 3962
3963 3963 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3964 3964 InstructionMark im(this);
3965 3965 int encode = prefix_and_encode(dst->encoding());
3966 3966 emit_byte(0xB8 | encode);
3967 3967 emit_data((int)imm32, rspec, narrow_oop_operand);
3968 3968 }
3969 3969
3970 3970 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) {
3971 3971 InstructionMark im(this);
3972 3972 prefix(dst);
3973 3973 emit_byte(0xC7);
3974 3974 emit_operand(rax, dst, 4);
3975 3975 emit_data((int)imm32, rspec, narrow_oop_operand);
3976 3976 }
3977 3977
3978 3978 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3979 3979 InstructionMark im(this);
3980 3980 int encode = prefix_and_encode(src1->encoding());
3981 3981 emit_byte(0x81);
3982 3982 emit_byte(0xF8 | encode);
3983 3983 emit_data((int)imm32, rspec, narrow_oop_operand);
3984 3984 }
3985 3985
3986 3986 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3987 3987 InstructionMark im(this);
3988 3988 prefix(src1);
3989 3989 emit_byte(0x81);
3990 3990 emit_operand(rax, src1, 4);
3991 3991 emit_data((int)imm32, rspec, narrow_oop_operand);
3992 3992 }
3993 3993
3994 3994 void Assembler::lzcntq(Register dst, Register src) {
3995 3995 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
3996 3996 emit_byte(0xF3);
3997 3997 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3998 3998 emit_byte(0x0F);
3999 3999 emit_byte(0xBD);
4000 4000 emit_byte(0xC0 | encode);
4001 4001 }
4002 4002
4003 4003 void Assembler::movdq(XMMRegister dst, Register src) {
4004 4004 // table D-1 says MMX/SSE2
4005 4005 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4006 4006 emit_byte(0x66);
4007 4007 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4008 4008 emit_byte(0x0F);
4009 4009 emit_byte(0x6E);
4010 4010 emit_byte(0xC0 | encode);
4011 4011 }
4012 4012
4013 4013 void Assembler::movdq(Register dst, XMMRegister src) {
4014 4014 // table D-1 says MMX/SSE2
4015 4015 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4016 4016 emit_byte(0x66);
4017 4017 // swap src/dst to get correct prefix
4018 4018 int encode = prefixq_and_encode(src->encoding(), dst->encoding());
4019 4019 emit_byte(0x0F);
4020 4020 emit_byte(0x7E);
4021 4021 emit_byte(0xC0 | encode);
4022 4022 }
4023 4023
4024 4024 void Assembler::movq(Register dst, Register src) {
4025 4025 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4026 4026 emit_byte(0x8B);
4027 4027 emit_byte(0xC0 | encode);
4028 4028 }
4029 4029
4030 4030 void Assembler::movq(Register dst, Address src) {
4031 4031 InstructionMark im(this);
4032 4032 prefixq(src, dst);
4033 4033 emit_byte(0x8B);
4034 4034 emit_operand(dst, src);
4035 4035 }
4036 4036
4037 4037 void Assembler::movq(Address dst, Register src) {
4038 4038 InstructionMark im(this);
4039 4039 prefixq(dst, src);
4040 4040 emit_byte(0x89);
4041 4041 emit_operand(src, dst);
4042 4042 }
4043 4043
4044 4044 void Assembler::movsbq(Register dst, Address src) {
4045 4045 InstructionMark im(this);
4046 4046 prefixq(src, dst);
4047 4047 emit_byte(0x0F);
4048 4048 emit_byte(0xBE);
4049 4049 emit_operand(dst, src);
4050 4050 }
4051 4051
4052 4052 void Assembler::movsbq(Register dst, Register src) {
4053 4053 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4054 4054 emit_byte(0x0F);
4055 4055 emit_byte(0xBE);
4056 4056 emit_byte(0xC0 | encode);
4057 4057 }
4058 4058
4059 4059 void Assembler::movslq(Register dst, int32_t imm32) {
4060 4060 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx)
4061 4061 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx)
4062 4062 // as a result we shouldn't use until tested at runtime...
4063 4063 ShouldNotReachHere();
4064 4064 InstructionMark im(this);
4065 4065 int encode = prefixq_and_encode(dst->encoding());
4066 4066 emit_byte(0xC7 | encode);
4067 4067 emit_long(imm32);
4068 4068 }
4069 4069
4070 4070 void Assembler::movslq(Address dst, int32_t imm32) {
4071 4071 assert(is_simm32(imm32), "lost bits");
4072 4072 InstructionMark im(this);
4073 4073 prefixq(dst);
4074 4074 emit_byte(0xC7);
4075 4075 emit_operand(rax, dst, 4);
4076 4076 emit_long(imm32);
4077 4077 }
4078 4078
4079 4079 void Assembler::movslq(Register dst, Address src) {
4080 4080 InstructionMark im(this);
4081 4081 prefixq(src, dst);
4082 4082 emit_byte(0x63);
4083 4083 emit_operand(dst, src);
4084 4084 }
4085 4085
4086 4086 void Assembler::movslq(Register dst, Register src) {
4087 4087 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4088 4088 emit_byte(0x63);
4089 4089 emit_byte(0xC0 | encode);
4090 4090 }
4091 4091
4092 4092 void Assembler::movswq(Register dst, Address src) {
4093 4093 InstructionMark im(this);
4094 4094 prefixq(src, dst);
4095 4095 emit_byte(0x0F);
4096 4096 emit_byte(0xBF);
4097 4097 emit_operand(dst, src);
4098 4098 }
4099 4099
4100 4100 void Assembler::movswq(Register dst, Register src) {
4101 4101 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4102 4102 emit_byte(0x0F);
4103 4103 emit_byte(0xBF);
4104 4104 emit_byte(0xC0 | encode);
4105 4105 }
4106 4106
4107 4107 void Assembler::movzbq(Register dst, Address src) {
4108 4108 InstructionMark im(this);
4109 4109 prefixq(src, dst);
4110 4110 emit_byte(0x0F);
4111 4111 emit_byte(0xB6);
4112 4112 emit_operand(dst, src);
4113 4113 }
4114 4114
4115 4115 void Assembler::movzbq(Register dst, Register src) {
4116 4116 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4117 4117 emit_byte(0x0F);
4118 4118 emit_byte(0xB6);
4119 4119 emit_byte(0xC0 | encode);
4120 4120 }
4121 4121
4122 4122 void Assembler::movzwq(Register dst, Address src) {
4123 4123 InstructionMark im(this);
4124 4124 prefixq(src, dst);
4125 4125 emit_byte(0x0F);
4126 4126 emit_byte(0xB7);
4127 4127 emit_operand(dst, src);
4128 4128 }
4129 4129
4130 4130 void Assembler::movzwq(Register dst, Register src) {
4131 4131 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4132 4132 emit_byte(0x0F);
4133 4133 emit_byte(0xB7);
4134 4134 emit_byte(0xC0 | encode);
4135 4135 }
4136 4136
4137 4137 void Assembler::negq(Register dst) {
4138 4138 int encode = prefixq_and_encode(dst->encoding());
4139 4139 emit_byte(0xF7);
4140 4140 emit_byte(0xD8 | encode);
4141 4141 }
4142 4142
4143 4143 void Assembler::notq(Register dst) {
4144 4144 int encode = prefixq_and_encode(dst->encoding());
4145 4145 emit_byte(0xF7);
4146 4146 emit_byte(0xD0 | encode);
4147 4147 }
4148 4148
4149 4149 void Assembler::orq(Address dst, int32_t imm32) {
4150 4150 InstructionMark im(this);
4151 4151 prefixq(dst);
4152 4152 emit_byte(0x81);
4153 4153 emit_operand(rcx, dst, 4);
4154 4154 emit_long(imm32);
4155 4155 }
4156 4156
4157 4157 void Assembler::orq(Register dst, int32_t imm32) {
4158 4158 (void) prefixq_and_encode(dst->encoding());
4159 4159 emit_arith(0x81, 0xC8, dst, imm32);
4160 4160 }
4161 4161
4162 4162 void Assembler::orq(Register dst, Address src) {
4163 4163 InstructionMark im(this);
4164 4164 prefixq(src, dst);
4165 4165 emit_byte(0x0B);
4166 4166 emit_operand(dst, src);
4167 4167 }
4168 4168
4169 4169 void Assembler::orq(Register dst, Register src) {
4170 4170 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4171 4171 emit_arith(0x0B, 0xC0, dst, src);
4172 4172 }
4173 4173
4174 4174 void Assembler::popa() { // 64bit
4175 4175 movq(r15, Address(rsp, 0));
4176 4176 movq(r14, Address(rsp, wordSize));
4177 4177 movq(r13, Address(rsp, 2 * wordSize));
4178 4178 movq(r12, Address(rsp, 3 * wordSize));
4179 4179 movq(r11, Address(rsp, 4 * wordSize));
4180 4180 movq(r10, Address(rsp, 5 * wordSize));
4181 4181 movq(r9, Address(rsp, 6 * wordSize));
4182 4182 movq(r8, Address(rsp, 7 * wordSize));
4183 4183 movq(rdi, Address(rsp, 8 * wordSize));
4184 4184 movq(rsi, Address(rsp, 9 * wordSize));
4185 4185 movq(rbp, Address(rsp, 10 * wordSize));
4186 4186 // skip rsp
4187 4187 movq(rbx, Address(rsp, 12 * wordSize));
4188 4188 movq(rdx, Address(rsp, 13 * wordSize));
4189 4189 movq(rcx, Address(rsp, 14 * wordSize));
4190 4190 movq(rax, Address(rsp, 15 * wordSize));
4191 4191
4192 4192 addq(rsp, 16 * wordSize);
4193 4193 }
4194 4194
4195 4195 void Assembler::popcntq(Register dst, Address src) {
4196 4196 assert(VM_Version::supports_popcnt(), "must support");
4197 4197 InstructionMark im(this);
4198 4198 emit_byte(0xF3);
4199 4199 prefixq(src, dst);
4200 4200 emit_byte(0x0F);
4201 4201 emit_byte(0xB8);
4202 4202 emit_operand(dst, src);
4203 4203 }
4204 4204
4205 4205 void Assembler::popcntq(Register dst, Register src) {
4206 4206 assert(VM_Version::supports_popcnt(), "must support");
4207 4207 emit_byte(0xF3);
4208 4208 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4209 4209 emit_byte(0x0F);
4210 4210 emit_byte(0xB8);
4211 4211 emit_byte(0xC0 | encode);
4212 4212 }
4213 4213
4214 4214 void Assembler::popq(Address dst) {
4215 4215 InstructionMark im(this);
4216 4216 prefixq(dst);
4217 4217 emit_byte(0x8F);
4218 4218 emit_operand(rax, dst);
4219 4219 }
4220 4220
4221 4221 void Assembler::pusha() { // 64bit
4222 4222 // we have to store original rsp. ABI says that 128 bytes
4223 4223 // below rsp are local scratch.
4224 4224 movq(Address(rsp, -5 * wordSize), rsp);
4225 4225
4226 4226 subq(rsp, 16 * wordSize);
4227 4227
4228 4228 movq(Address(rsp, 15 * wordSize), rax);
4229 4229 movq(Address(rsp, 14 * wordSize), rcx);
4230 4230 movq(Address(rsp, 13 * wordSize), rdx);
4231 4231 movq(Address(rsp, 12 * wordSize), rbx);
4232 4232 // skip rsp
4233 4233 movq(Address(rsp, 10 * wordSize), rbp);
4234 4234 movq(Address(rsp, 9 * wordSize), rsi);
4235 4235 movq(Address(rsp, 8 * wordSize), rdi);
4236 4236 movq(Address(rsp, 7 * wordSize), r8);
4237 4237 movq(Address(rsp, 6 * wordSize), r9);
4238 4238 movq(Address(rsp, 5 * wordSize), r10);
4239 4239 movq(Address(rsp, 4 * wordSize), r11);
4240 4240 movq(Address(rsp, 3 * wordSize), r12);
4241 4241 movq(Address(rsp, 2 * wordSize), r13);
4242 4242 movq(Address(rsp, wordSize), r14);
4243 4243 movq(Address(rsp, 0), r15);
4244 4244 }
4245 4245
4246 4246 void Assembler::pushq(Address src) {
4247 4247 InstructionMark im(this);
4248 4248 prefixq(src);
4249 4249 emit_byte(0xFF);
4250 4250 emit_operand(rsi, src);
4251 4251 }
4252 4252
4253 4253 void Assembler::rclq(Register dst, int imm8) {
4254 4254 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4255 4255 int encode = prefixq_and_encode(dst->encoding());
4256 4256 if (imm8 == 1) {
4257 4257 emit_byte(0xD1);
4258 4258 emit_byte(0xD0 | encode);
4259 4259 } else {
4260 4260 emit_byte(0xC1);
4261 4261 emit_byte(0xD0 | encode);
4262 4262 emit_byte(imm8);
4263 4263 }
4264 4264 }
4265 4265 void Assembler::sarq(Register dst, int imm8) {
4266 4266 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4267 4267 int encode = prefixq_and_encode(dst->encoding());
4268 4268 if (imm8 == 1) {
4269 4269 emit_byte(0xD1);
4270 4270 emit_byte(0xF8 | encode);
4271 4271 } else {
4272 4272 emit_byte(0xC1);
4273 4273 emit_byte(0xF8 | encode);
4274 4274 emit_byte(imm8);
4275 4275 }
4276 4276 }
4277 4277
4278 4278 void Assembler::sarq(Register dst) {
4279 4279 int encode = prefixq_and_encode(dst->encoding());
4280 4280 emit_byte(0xD3);
4281 4281 emit_byte(0xF8 | encode);
4282 4282 }
4283 4283 void Assembler::sbbq(Address dst, int32_t imm32) {
4284 4284 InstructionMark im(this);
4285 4285 prefixq(dst);
4286 4286 emit_arith_operand(0x81, rbx, dst, imm32);
4287 4287 }
4288 4288
4289 4289 void Assembler::sbbq(Register dst, int32_t imm32) {
4290 4290 (void) prefixq_and_encode(dst->encoding());
4291 4291 emit_arith(0x81, 0xD8, dst, imm32);
4292 4292 }
4293 4293
4294 4294 void Assembler::sbbq(Register dst, Address src) {
4295 4295 InstructionMark im(this);
4296 4296 prefixq(src, dst);
4297 4297 emit_byte(0x1B);
4298 4298 emit_operand(dst, src);
4299 4299 }
4300 4300
4301 4301 void Assembler::sbbq(Register dst, Register src) {
4302 4302 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4303 4303 emit_arith(0x1B, 0xC0, dst, src);
4304 4304 }
4305 4305
4306 4306 void Assembler::shlq(Register dst, int imm8) {
4307 4307 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4308 4308 int encode = prefixq_and_encode(dst->encoding());
4309 4309 if (imm8 == 1) {
4310 4310 emit_byte(0xD1);
4311 4311 emit_byte(0xE0 | encode);
4312 4312 } else {
4313 4313 emit_byte(0xC1);
4314 4314 emit_byte(0xE0 | encode);
4315 4315 emit_byte(imm8);
4316 4316 }
4317 4317 }
4318 4318
4319 4319 void Assembler::shlq(Register dst) {
4320 4320 int encode = prefixq_and_encode(dst->encoding());
4321 4321 emit_byte(0xD3);
4322 4322 emit_byte(0xE0 | encode);
4323 4323 }
4324 4324
4325 4325 void Assembler::shrq(Register dst, int imm8) {
4326 4326 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4327 4327 int encode = prefixq_and_encode(dst->encoding());
4328 4328 emit_byte(0xC1);
4329 4329 emit_byte(0xE8 | encode);
4330 4330 emit_byte(imm8);
4331 4331 }
4332 4332
4333 4333 void Assembler::shrq(Register dst) {
4334 4334 int encode = prefixq_and_encode(dst->encoding());
4335 4335 emit_byte(0xD3);
4336 4336 emit_byte(0xE8 | encode);
4337 4337 }
4338 4338
4339 4339 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4340 4340 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4341 4341 InstructionMark im(this);
4342 4342 emit_byte(0xF2);
4343 4343 prefix(src, dst);
4344 4344 emit_byte(0x0F);
4345 4345 emit_byte(0x51);
4346 4346 emit_operand(dst, src);
4347 4347 }
4348 4348
4349 4349 void Assembler::subq(Address dst, int32_t imm32) {
4350 4350 InstructionMark im(this);
4351 4351 prefixq(dst);
4352 4352 if (is8bit(imm32)) {
4353 4353 emit_byte(0x83);
4354 4354 emit_operand(rbp, dst, 1);
4355 4355 emit_byte(imm32 & 0xFF);
4356 4356 } else {
4357 4357 emit_byte(0x81);
4358 4358 emit_operand(rbp, dst, 4);
4359 4359 emit_long(imm32);
4360 4360 }
4361 4361 }
4362 4362
4363 4363 void Assembler::subq(Register dst, int32_t imm32) {
4364 4364 (void) prefixq_and_encode(dst->encoding());
4365 4365 emit_arith(0x81, 0xE8, dst, imm32);
4366 4366 }
4367 4367
4368 4368 void Assembler::subq(Address dst, Register src) {
4369 4369 InstructionMark im(this);
4370 4370 prefixq(dst, src);
4371 4371 emit_byte(0x29);
4372 4372 emit_operand(src, dst);
4373 4373 }
4374 4374
4375 4375 void Assembler::subq(Register dst, Address src) {
4376 4376 InstructionMark im(this);
4377 4377 prefixq(src, dst);
4378 4378 emit_byte(0x2B);
4379 4379 emit_operand(dst, src);
4380 4380 }
4381 4381
4382 4382 void Assembler::subq(Register dst, Register src) {
4383 4383 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4384 4384 emit_arith(0x2B, 0xC0, dst, src);
4385 4385 }
4386 4386
4387 4387 void Assembler::testq(Register dst, int32_t imm32) {
4388 4388 // not using emit_arith because test
4389 4389 // doesn't support sign-extension of
4390 4390 // 8bit operands
4391 4391 int encode = dst->encoding();
4392 4392 if (encode == 0) {
4393 4393 prefix(REX_W);
4394 4394 emit_byte(0xA9);
4395 4395 } else {
4396 4396 encode = prefixq_and_encode(encode);
4397 4397 emit_byte(0xF7);
4398 4398 emit_byte(0xC0 | encode);
4399 4399 }
4400 4400 emit_long(imm32);
4401 4401 }
4402 4402
4403 4403 void Assembler::testq(Register dst, Register src) {
4404 4404 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4405 4405 emit_arith(0x85, 0xC0, dst, src);
4406 4406 }
4407 4407
4408 4408 void Assembler::xaddq(Address dst, Register src) {
4409 4409 InstructionMark im(this);
4410 4410 prefixq(dst, src);
4411 4411 emit_byte(0x0F);
4412 4412 emit_byte(0xC1);
4413 4413 emit_operand(src, dst);
4414 4414 }
4415 4415
4416 4416 void Assembler::xchgq(Register dst, Address src) {
4417 4417 InstructionMark im(this);
4418 4418 prefixq(src, dst);
4419 4419 emit_byte(0x87);
4420 4420 emit_operand(dst, src);
4421 4421 }
4422 4422
4423 4423 void Assembler::xchgq(Register dst, Register src) {
4424 4424 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4425 4425 emit_byte(0x87);
4426 4426 emit_byte(0xc0 | encode);
4427 4427 }
4428 4428
4429 4429 void Assembler::xorq(Register dst, Register src) {
4430 4430 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4431 4431 emit_arith(0x33, 0xC0, dst, src);
4432 4432 }
4433 4433
4434 4434 void Assembler::xorq(Register dst, Address src) {
4435 4435 InstructionMark im(this);
4436 4436 prefixq(src, dst);
4437 4437 emit_byte(0x33);
4438 4438 emit_operand(dst, src);
4439 4439 }
4440 4440
4441 4441 #endif // !LP64
4442 4442
4443 4443 static Assembler::Condition reverse[] = {
4444 4444 Assembler::noOverflow /* overflow = 0x0 */ ,
4445 4445 Assembler::overflow /* noOverflow = 0x1 */ ,
4446 4446 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
4447 4447 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
4448 4448 Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
4449 4449 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
4450 4450 Assembler::above /* belowEqual = 0x6 */ ,
4451 4451 Assembler::belowEqual /* above = 0x7 */ ,
4452 4452 Assembler::positive /* negative = 0x8 */ ,
4453 4453 Assembler::negative /* positive = 0x9 */ ,
4454 4454 Assembler::noParity /* parity = 0xa */ ,
4455 4455 Assembler::parity /* noParity = 0xb */ ,
4456 4456 Assembler::greaterEqual /* less = 0xc */ ,
4457 4457 Assembler::less /* greaterEqual = 0xd */ ,
4458 4458 Assembler::greater /* lessEqual = 0xe */ ,
4459 4459 Assembler::lessEqual /* greater = 0xf, */
4460 4460
4461 4461 };
4462 4462
4463 4463
4464 4464 // Implementation of MacroAssembler
4465 4465
4466 4466 // First all the versions that have distinct versions depending on 32/64 bit
4467 4467 // Unless the difference is trivial (1 line or so).
4468 4468
4469 4469 #ifndef _LP64
4470 4470
4471 4471 // 32bit versions
4472 4472
4473 4473 Address MacroAssembler::as_Address(AddressLiteral adr) {
4474 4474 return Address(adr.target(), adr.rspec());
4475 4475 }
4476 4476
4477 4477 Address MacroAssembler::as_Address(ArrayAddress adr) {
4478 4478 return Address::make_array(adr);
4479 4479 }
4480 4480
4481 4481 int MacroAssembler::biased_locking_enter(Register lock_reg,
4482 4482 Register obj_reg,
4483 4483 Register swap_reg,
4484 4484 Register tmp_reg,
4485 4485 bool swap_reg_contains_mark,
4486 4486 Label& done,
4487 4487 Label* slow_case,
4488 4488 BiasedLockingCounters* counters) {
4489 4489 assert(UseBiasedLocking, "why call this otherwise?");
4490 4490 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4491 4491 assert_different_registers(lock_reg, obj_reg, swap_reg);
4492 4492
4493 4493 if (PrintBiasedLockingStatistics && counters == NULL)
4494 4494 counters = BiasedLocking::counters();
4495 4495
4496 4496 bool need_tmp_reg = false;
4497 4497 if (tmp_reg == noreg) {
4498 4498 need_tmp_reg = true;
4499 4499 tmp_reg = lock_reg;
4500 4500 } else {
4501 4501 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4502 4502 }
4503 4503 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4504 4504 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
4505 4505 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
4506 4506 Address saved_mark_addr(lock_reg, 0);
4507 4507
4508 4508 // Biased locking
4509 4509 // See whether the lock is currently biased toward our thread and
4510 4510 // whether the epoch is still valid
4511 4511 // Note that the runtime guarantees sufficient alignment of JavaThread
4512 4512 // pointers to allow age to be placed into low bits
4513 4513 // First check to see whether biasing is even enabled for this object
4514 4514 Label cas_label;
4515 4515 int null_check_offset = -1;
4516 4516 if (!swap_reg_contains_mark) {
4517 4517 null_check_offset = offset();
4518 4518 movl(swap_reg, mark_addr);
4519 4519 }
4520 4520 if (need_tmp_reg) {
4521 4521 push(tmp_reg);
4522 4522 }
4523 4523 movl(tmp_reg, swap_reg);
4524 4524 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4525 4525 cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4526 4526 if (need_tmp_reg) {
4527 4527 pop(tmp_reg);
4528 4528 }
4529 4529 jcc(Assembler::notEqual, cas_label);
4530 4530 // The bias pattern is present in the object's header. Need to check
4531 4531 // whether the bias owner and the epoch are both still current.
4532 4532 // Note that because there is no current thread register on x86 we
4533 4533 // need to store off the mark word we read out of the object to
4534 4534 // avoid reloading it and needing to recheck invariants below. This
4535 4535 // store is unfortunate but it makes the overall code shorter and
4536 4536 // simpler.
4537 4537 movl(saved_mark_addr, swap_reg);
4538 4538 if (need_tmp_reg) {
4539 4539 push(tmp_reg);
4540 4540 }
4541 4541 get_thread(tmp_reg);
4542 4542 xorl(swap_reg, tmp_reg);
4543 4543 if (swap_reg_contains_mark) {
4544 4544 null_check_offset = offset();
4545 4545 }
4546 4546 movl(tmp_reg, klass_addr);
4547 4547 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4548 4548 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4549 4549 if (need_tmp_reg) {
4550 4550 pop(tmp_reg);
4551 4551 }
4552 4552 if (counters != NULL) {
4553 4553 cond_inc32(Assembler::zero,
4554 4554 ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4555 4555 }
4556 4556 jcc(Assembler::equal, done);
4557 4557
4558 4558 Label try_revoke_bias;
4559 4559 Label try_rebias;
4560 4560
4561 4561 // At this point we know that the header has the bias pattern and
4562 4562 // that we are not the bias owner in the current epoch. We need to
4563 4563 // figure out more details about the state of the header in order to
4564 4564 // know what operations can be legally performed on the object's
4565 4565 // header.
4566 4566
4567 4567 // If the low three bits in the xor result aren't clear, that means
4568 4568 // the prototype header is no longer biased and we have to revoke
4569 4569 // the bias on this object.
4570 4570 testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4571 4571 jcc(Assembler::notZero, try_revoke_bias);
4572 4572
4573 4573 // Biasing is still enabled for this data type. See whether the
4574 4574 // epoch of the current bias is still valid, meaning that the epoch
4575 4575 // bits of the mark word are equal to the epoch bits of the
4576 4576 // prototype header. (Note that the prototype header's epoch bits
4577 4577 // only change at a safepoint.) If not, attempt to rebias the object
4578 4578 // toward the current thread. Note that we must be absolutely sure
4579 4579 // that the current epoch is invalid in order to do this because
4580 4580 // otherwise the manipulations it performs on the mark word are
4581 4581 // illegal.
4582 4582 testl(swap_reg, markOopDesc::epoch_mask_in_place);
4583 4583 jcc(Assembler::notZero, try_rebias);
4584 4584
4585 4585 // The epoch of the current bias is still valid but we know nothing
4586 4586 // about the owner; it might be set or it might be clear. Try to
4587 4587 // acquire the bias of the object using an atomic operation. If this
4588 4588 // fails we will go in to the runtime to revoke the object's bias.
4589 4589 // Note that we first construct the presumed unbiased header so we
4590 4590 // don't accidentally blow away another thread's valid bias.
4591 4591 movl(swap_reg, saved_mark_addr);
4592 4592 andl(swap_reg,
4593 4593 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4594 4594 if (need_tmp_reg) {
4595 4595 push(tmp_reg);
4596 4596 }
4597 4597 get_thread(tmp_reg);
4598 4598 orl(tmp_reg, swap_reg);
4599 4599 if (os::is_MP()) {
4600 4600 lock();
4601 4601 }
4602 4602 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4603 4603 if (need_tmp_reg) {
4604 4604 pop(tmp_reg);
4605 4605 }
4606 4606 // If the biasing toward our thread failed, this means that
4607 4607 // another thread succeeded in biasing it toward itself and we
4608 4608 // need to revoke that bias. The revocation will occur in the
4609 4609 // interpreter runtime in the slow case.
4610 4610 if (counters != NULL) {
4611 4611 cond_inc32(Assembler::zero,
4612 4612 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4613 4613 }
4614 4614 if (slow_case != NULL) {
4615 4615 jcc(Assembler::notZero, *slow_case);
4616 4616 }
4617 4617 jmp(done);
4618 4618
4619 4619 bind(try_rebias);
4620 4620 // At this point we know the epoch has expired, meaning that the
4621 4621 // current "bias owner", if any, is actually invalid. Under these
4622 4622 // circumstances _only_, we are allowed to use the current header's
4623 4623 // value as the comparison value when doing the cas to acquire the
4624 4624 // bias in the current epoch. In other words, we allow transfer of
4625 4625 // the bias from one thread to another directly in this situation.
4626 4626 //
4627 4627 // FIXME: due to a lack of registers we currently blow away the age
4628 4628 // bits in this situation. Should attempt to preserve them.
4629 4629 if (need_tmp_reg) {
4630 4630 push(tmp_reg);
4631 4631 }
4632 4632 get_thread(tmp_reg);
4633 4633 movl(swap_reg, klass_addr);
4634 4634 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4635 4635 movl(swap_reg, saved_mark_addr);
4636 4636 if (os::is_MP()) {
4637 4637 lock();
4638 4638 }
4639 4639 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4640 4640 if (need_tmp_reg) {
4641 4641 pop(tmp_reg);
4642 4642 }
4643 4643 // If the biasing toward our thread failed, then another thread
4644 4644 // succeeded in biasing it toward itself and we need to revoke that
4645 4645 // bias. The revocation will occur in the runtime in the slow case.
4646 4646 if (counters != NULL) {
4647 4647 cond_inc32(Assembler::zero,
4648 4648 ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4649 4649 }
4650 4650 if (slow_case != NULL) {
4651 4651 jcc(Assembler::notZero, *slow_case);
4652 4652 }
4653 4653 jmp(done);
4654 4654
4655 4655 bind(try_revoke_bias);
4656 4656 // The prototype mark in the klass doesn't have the bias bit set any
4657 4657 // more, indicating that objects of this data type are not supposed
4658 4658 // to be biased any more. We are going to try to reset the mark of
4659 4659 // this object to the prototype value and fall through to the
4660 4660 // CAS-based locking scheme. Note that if our CAS fails, it means
4661 4661 // that another thread raced us for the privilege of revoking the
4662 4662 // bias of this particular object, so it's okay to continue in the
4663 4663 // normal locking code.
4664 4664 //
4665 4665 // FIXME: due to a lack of registers we currently blow away the age
4666 4666 // bits in this situation. Should attempt to preserve them.
4667 4667 movl(swap_reg, saved_mark_addr);
4668 4668 if (need_tmp_reg) {
4669 4669 push(tmp_reg);
4670 4670 }
4671 4671 movl(tmp_reg, klass_addr);
4672 4672 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4673 4673 if (os::is_MP()) {
4674 4674 lock();
4675 4675 }
4676 4676 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4677 4677 if (need_tmp_reg) {
4678 4678 pop(tmp_reg);
4679 4679 }
4680 4680 // Fall through to the normal CAS-based lock, because no matter what
4681 4681 // the result of the above CAS, some thread must have succeeded in
4682 4682 // removing the bias bit from the object's header.
4683 4683 if (counters != NULL) {
4684 4684 cond_inc32(Assembler::zero,
4685 4685 ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4686 4686 }
4687 4687
4688 4688 bind(cas_label);
4689 4689
4690 4690 return null_check_offset;
4691 4691 }
4692 4692 void MacroAssembler::call_VM_leaf_base(address entry_point,
4693 4693 int number_of_arguments) {
4694 4694 call(RuntimeAddress(entry_point));
4695 4695 increment(rsp, number_of_arguments * wordSize);
4696 4696 }
4697 4697
4698 4698 void MacroAssembler::cmpoop(Address src1, jobject obj) {
4699 4699 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4700 4700 }
4701 4701
4702 4702 void MacroAssembler::cmpoop(Register src1, jobject obj) {
4703 4703 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4704 4704 }
4705 4705
4706 4706 void MacroAssembler::extend_sign(Register hi, Register lo) {
4707 4707 // According to Intel Doc. AP-526, "Integer Divide", p.18.
4708 4708 if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4709 4709 cdql();
4710 4710 } else {
4711 4711 movl(hi, lo);
4712 4712 sarl(hi, 31);
4713 4713 }
4714 4714 }
4715 4715
4716 4716 void MacroAssembler::fat_nop() {
4717 4717 // A 5 byte nop that is safe for patching (see patch_verified_entry)
4718 4718 emit_byte(0x26); // es:
4719 4719 emit_byte(0x2e); // cs:
4720 4720 emit_byte(0x64); // fs:
4721 4721 emit_byte(0x65); // gs:
4722 4722 emit_byte(0x90);
4723 4723 }
4724 4724
4725 4725 void MacroAssembler::jC2(Register tmp, Label& L) {
4726 4726 // set parity bit if FPU flag C2 is set (via rax)
4727 4727 save_rax(tmp);
4728 4728 fwait(); fnstsw_ax();
4729 4729 sahf();
4730 4730 restore_rax(tmp);
4731 4731 // branch
4732 4732 jcc(Assembler::parity, L);
4733 4733 }
4734 4734
4735 4735 void MacroAssembler::jnC2(Register tmp, Label& L) {
4736 4736 // set parity bit if FPU flag C2 is set (via rax)
4737 4737 save_rax(tmp);
4738 4738 fwait(); fnstsw_ax();
4739 4739 sahf();
4740 4740 restore_rax(tmp);
4741 4741 // branch
4742 4742 jcc(Assembler::noParity, L);
4743 4743 }
4744 4744
4745 4745 // 32bit can do a case table jump in one instruction but we no longer allow the base
4746 4746 // to be installed in the Address class
4747 4747 void MacroAssembler::jump(ArrayAddress entry) {
4748 4748 jmp(as_Address(entry));
4749 4749 }
4750 4750
4751 4751 // Note: y_lo will be destroyed
4752 4752 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4753 4753 // Long compare for Java (semantics as described in JVM spec.)
4754 4754 Label high, low, done;
4755 4755
4756 4756 cmpl(x_hi, y_hi);
4757 4757 jcc(Assembler::less, low);
4758 4758 jcc(Assembler::greater, high);
4759 4759 // x_hi is the return register
4760 4760 xorl(x_hi, x_hi);
4761 4761 cmpl(x_lo, y_lo);
4762 4762 jcc(Assembler::below, low);
4763 4763 jcc(Assembler::equal, done);
4764 4764
4765 4765 bind(high);
4766 4766 xorl(x_hi, x_hi);
4767 4767 increment(x_hi);
4768 4768 jmp(done);
4769 4769
4770 4770 bind(low);
4771 4771 xorl(x_hi, x_hi);
4772 4772 decrementl(x_hi);
4773 4773
4774 4774 bind(done);
4775 4775 }
4776 4776
4777 4777 void MacroAssembler::lea(Register dst, AddressLiteral src) {
4778 4778 mov_literal32(dst, (int32_t)src.target(), src.rspec());
4779 4779 }
4780 4780
4781 4781 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4782 4782 // leal(dst, as_Address(adr));
4783 4783 // see note in movl as to why we must use a move
4784 4784 mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4785 4785 }
4786 4786
4787 4787 void MacroAssembler::leave() {
4788 4788 mov(rsp, rbp);
4789 4789 pop(rbp);
4790 4790 }
4791 4791
4792 4792 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4793 4793 // Multiplication of two Java long values stored on the stack
4794 4794 // as illustrated below. Result is in rdx:rax.
4795 4795 //
4796 4796 // rsp ---> [ ?? ] \ \
4797 4797 // .... | y_rsp_offset |
4798 4798 // [ y_lo ] / (in bytes) | x_rsp_offset
4799 4799 // [ y_hi ] | (in bytes)
4800 4800 // .... |
4801 4801 // [ x_lo ] /
4802 4802 // [ x_hi ]
4803 4803 // ....
4804 4804 //
4805 4805 // Basic idea: lo(result) = lo(x_lo * y_lo)
4806 4806 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4807 4807 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4808 4808 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4809 4809 Label quick;
4810 4810 // load x_hi, y_hi and check if quick
4811 4811 // multiplication is possible
4812 4812 movl(rbx, x_hi);
4813 4813 movl(rcx, y_hi);
4814 4814 movl(rax, rbx);
4815 4815 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4816 4816 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
4817 4817 // do full multiplication
4818 4818 // 1st step
4819 4819 mull(y_lo); // x_hi * y_lo
4820 4820 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
4821 4821 // 2nd step
4822 4822 movl(rax, x_lo);
4823 4823 mull(rcx); // x_lo * y_hi
4824 4824 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
4825 4825 // 3rd step
4826 4826 bind(quick); // note: rbx, = 0 if quick multiply!
4827 4827 movl(rax, x_lo);
4828 4828 mull(y_lo); // x_lo * y_lo
4829 4829 addl(rdx, rbx); // correct hi(x_lo * y_lo)
4830 4830 }
4831 4831
4832 4832 void MacroAssembler::lneg(Register hi, Register lo) {
4833 4833 negl(lo);
4834 4834 adcl(hi, 0);
4835 4835 negl(hi);
4836 4836 }
4837 4837
4838 4838 void MacroAssembler::lshl(Register hi, Register lo) {
4839 4839 // Java shift left long support (semantics as described in JVM spec., p.305)
4840 4840 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4841 4841 // shift value is in rcx !
4842 4842 assert(hi != rcx, "must not use rcx");
4843 4843 assert(lo != rcx, "must not use rcx");
4844 4844 const Register s = rcx; // shift count
4845 4845 const int n = BitsPerWord;
4846 4846 Label L;
4847 4847 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4848 4848 cmpl(s, n); // if (s < n)
4849 4849 jcc(Assembler::less, L); // else (s >= n)
4850 4850 movl(hi, lo); // x := x << n
4851 4851 xorl(lo, lo);
4852 4852 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4853 4853 bind(L); // s (mod n) < n
4854 4854 shldl(hi, lo); // x := x << s
4855 4855 shll(lo);
4856 4856 }
4857 4857
4858 4858
4859 4859 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4860 4860 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4861 4861 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4862 4862 assert(hi != rcx, "must not use rcx");
4863 4863 assert(lo != rcx, "must not use rcx");
4864 4864 const Register s = rcx; // shift count
4865 4865 const int n = BitsPerWord;
4866 4866 Label L;
4867 4867 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4868 4868 cmpl(s, n); // if (s < n)
4869 4869 jcc(Assembler::less, L); // else (s >= n)
4870 4870 movl(lo, hi); // x := x >> n
4871 4871 if (sign_extension) sarl(hi, 31);
4872 4872 else xorl(hi, hi);
4873 4873 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4874 4874 bind(L); // s (mod n) < n
4875 4875 shrdl(lo, hi); // x := x >> s
4876 4876 if (sign_extension) sarl(hi);
4877 4877 else shrl(hi);
4878 4878 }
4879 4879
4880 4880 void MacroAssembler::movoop(Register dst, jobject obj) {
4881 4881 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4882 4882 }
4883 4883
4884 4884 void MacroAssembler::movoop(Address dst, jobject obj) {
4885 4885 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4886 4886 }
4887 4887
4888 4888 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4889 4889 if (src.is_lval()) {
4890 4890 mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4891 4891 } else {
4892 4892 movl(dst, as_Address(src));
4893 4893 }
4894 4894 }
4895 4895
4896 4896 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4897 4897 movl(as_Address(dst), src);
4898 4898 }
4899 4899
4900 4900 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4901 4901 movl(dst, as_Address(src));
4902 4902 }
4903 4903
4904 4904 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
4905 4905 void MacroAssembler::movptr(Address dst, intptr_t src) {
4906 4906 movl(dst, src);
4907 4907 }
4908 4908
4909 4909
4910 4910 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
4911 4911 movsd(dst, as_Address(src));
4912 4912 }
4913 4913
4914 4914 void MacroAssembler::pop_callee_saved_registers() {
4915 4915 pop(rcx);
4916 4916 pop(rdx);
4917 4917 pop(rdi);
4918 4918 pop(rsi);
4919 4919 }
4920 4920
4921 4921 void MacroAssembler::pop_fTOS() {
4922 4922 fld_d(Address(rsp, 0));
4923 4923 addl(rsp, 2 * wordSize);
4924 4924 }
4925 4925
4926 4926 void MacroAssembler::push_callee_saved_registers() {
4927 4927 push(rsi);
4928 4928 push(rdi);
4929 4929 push(rdx);
4930 4930 push(rcx);
4931 4931 }
4932 4932
4933 4933 void MacroAssembler::push_fTOS() {
4934 4934 subl(rsp, 2 * wordSize);
4935 4935 fstp_d(Address(rsp, 0));
4936 4936 }
4937 4937
4938 4938
4939 4939 void MacroAssembler::pushoop(jobject obj) {
4940 4940 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
4941 4941 }
4942 4942
4943 4943
4944 4944 void MacroAssembler::pushptr(AddressLiteral src) {
4945 4945 if (src.is_lval()) {
4946 4946 push_literal32((int32_t)src.target(), src.rspec());
4947 4947 } else {
4948 4948 pushl(as_Address(src));
4949 4949 }
4950 4950 }
4951 4951
4952 4952 void MacroAssembler::set_word_if_not_zero(Register dst) {
4953 4953 xorl(dst, dst);
4954 4954 set_byte_if_not_zero(dst);
4955 4955 }
4956 4956
4957 4957 static void pass_arg0(MacroAssembler* masm, Register arg) {
4958 4958 masm->push(arg);
4959 4959 }
4960 4960
4961 4961 static void pass_arg1(MacroAssembler* masm, Register arg) {
4962 4962 masm->push(arg);
4963 4963 }
4964 4964
4965 4965 static void pass_arg2(MacroAssembler* masm, Register arg) {
4966 4966 masm->push(arg);
4967 4967 }
4968 4968
4969 4969 static void pass_arg3(MacroAssembler* masm, Register arg) {
4970 4970 masm->push(arg);
4971 4971 }
4972 4972
4973 4973 #ifndef PRODUCT
4974 4974 extern "C" void findpc(intptr_t x);
4975 4975 #endif
4976 4976
4977 4977 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
4978 4978 // In order to get locks to work, we need to fake a in_VM state
4979 4979 JavaThread* thread = JavaThread::current();
4980 4980 JavaThreadState saved_state = thread->thread_state();
4981 4981 thread->set_thread_state(_thread_in_vm);
4982 4982 if (ShowMessageBoxOnError) {
4983 4983 JavaThread* thread = JavaThread::current();
4984 4984 JavaThreadState saved_state = thread->thread_state();
4985 4985 thread->set_thread_state(_thread_in_vm);
4986 4986 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
4987 4987 ttyLocker ttyl;
4988 4988 BytecodeCounter::print();
4989 4989 }
4990 4990 // To see where a verify_oop failed, get $ebx+40/X for this frame.
4991 4991 // This is the value of eip which points to where verify_oop will return.
4992 4992 if (os::message_box(msg, "Execution stopped, print registers?")) {
4993 4993 ttyLocker ttyl;
4994 4994 tty->print_cr("eip = 0x%08x", eip);
4995 4995 #ifndef PRODUCT
4996 4996 tty->cr();
4997 4997 findpc(eip);
4998 4998 tty->cr();
4999 4999 #endif
5000 5000 tty->print_cr("rax, = 0x%08x", rax);
5001 5001 tty->print_cr("rbx, = 0x%08x", rbx);
5002 5002 tty->print_cr("rcx = 0x%08x", rcx);
5003 5003 tty->print_cr("rdx = 0x%08x", rdx);
5004 5004 tty->print_cr("rdi = 0x%08x", rdi);
5005 5005 tty->print_cr("rsi = 0x%08x", rsi);
5006 5006 tty->print_cr("rbp, = 0x%08x", rbp);
5007 5007 tty->print_cr("rsp = 0x%08x", rsp);
5008 5008 BREAKPOINT;
5009 5009 }
5010 5010 } else {
5011 5011 ttyLocker ttyl;
5012 5012 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
5013 5013 assert(false, "DEBUG MESSAGE");
5014 5014 }
5015 5015 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5016 5016 }
5017 5017
5018 5018 void MacroAssembler::stop(const char* msg) {
5019 5019 ExternalAddress message((address)msg);
5020 5020 // push address of message
5021 5021 pushptr(message.addr());
5022 5022 { Label L; call(L, relocInfo::none); bind(L); } // push eip
5023 5023 pusha(); // push registers
5024 5024 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
5025 5025 hlt();
5026 5026 }
5027 5027
5028 5028 void MacroAssembler::warn(const char* msg) {
5029 5029 push_CPU_state();
5030 5030
5031 5031 ExternalAddress message((address) msg);
5032 5032 // push address of message
5033 5033 pushptr(message.addr());
5034 5034
5035 5035 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
5036 5036 addl(rsp, wordSize); // discard argument
5037 5037 pop_CPU_state();
5038 5038 }
5039 5039
5040 5040 #else // _LP64
5041 5041
5042 5042 // 64 bit versions
5043 5043
5044 5044 Address MacroAssembler::as_Address(AddressLiteral adr) {
5045 5045 // amd64 always does this as a pc-rel
5046 5046 // we can be absolute or disp based on the instruction type
5047 5047 // jmp/call are displacements others are absolute
5048 5048 assert(!adr.is_lval(), "must be rval");
5049 5049 assert(reachable(adr), "must be");
5050 5050 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
5051 5051
5052 5052 }
5053 5053
5054 5054 Address MacroAssembler::as_Address(ArrayAddress adr) {
5055 5055 AddressLiteral base = adr.base();
5056 5056 lea(rscratch1, base);
5057 5057 Address index = adr.index();
5058 5058 assert(index._disp == 0, "must not have disp"); // maybe it can?
5059 5059 Address array(rscratch1, index._index, index._scale, index._disp);
5060 5060 return array;
5061 5061 }
5062 5062
5063 5063 int MacroAssembler::biased_locking_enter(Register lock_reg,
5064 5064 Register obj_reg,
5065 5065 Register swap_reg,
5066 5066 Register tmp_reg,
5067 5067 bool swap_reg_contains_mark,
5068 5068 Label& done,
5069 5069 Label* slow_case,
5070 5070 BiasedLockingCounters* counters) {
5071 5071 assert(UseBiasedLocking, "why call this otherwise?");
5072 5072 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
5073 5073 assert(tmp_reg != noreg, "tmp_reg must be supplied");
5074 5074 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
5075 5075 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
5076 5076 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
5077 5077 Address saved_mark_addr(lock_reg, 0);
5078 5078
5079 5079 if (PrintBiasedLockingStatistics && counters == NULL)
5080 5080 counters = BiasedLocking::counters();
5081 5081
5082 5082 // Biased locking
5083 5083 // See whether the lock is currently biased toward our thread and
5084 5084 // whether the epoch is still valid
5085 5085 // Note that the runtime guarantees sufficient alignment of JavaThread
5086 5086 // pointers to allow age to be placed into low bits
5087 5087 // First check to see whether biasing is even enabled for this object
5088 5088 Label cas_label;
5089 5089 int null_check_offset = -1;
5090 5090 if (!swap_reg_contains_mark) {
5091 5091 null_check_offset = offset();
5092 5092 movq(swap_reg, mark_addr);
5093 5093 }
5094 5094 movq(tmp_reg, swap_reg);
5095 5095 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5096 5096 cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
5097 5097 jcc(Assembler::notEqual, cas_label);
5098 5098 // The bias pattern is present in the object's header. Need to check
5099 5099 // whether the bias owner and the epoch are both still current.
5100 5100 load_prototype_header(tmp_reg, obj_reg);
5101 5101 orq(tmp_reg, r15_thread);
5102 5102 xorq(tmp_reg, swap_reg);
5103 5103 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
5104 5104 if (counters != NULL) {
5105 5105 cond_inc32(Assembler::zero,
5106 5106 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5107 5107 }
5108 5108 jcc(Assembler::equal, done);
5109 5109
5110 5110 Label try_revoke_bias;
5111 5111 Label try_rebias;
5112 5112
5113 5113 // At this point we know that the header has the bias pattern and
5114 5114 // that we are not the bias owner in the current epoch. We need to
5115 5115 // figure out more details about the state of the header in order to
5116 5116 // know what operations can be legally performed on the object's
5117 5117 // header.
5118 5118
5119 5119 // If the low three bits in the xor result aren't clear, that means
5120 5120 // the prototype header is no longer biased and we have to revoke
5121 5121 // the bias on this object.
5122 5122 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5123 5123 jcc(Assembler::notZero, try_revoke_bias);
5124 5124
5125 5125 // Biasing is still enabled for this data type. See whether the
5126 5126 // epoch of the current bias is still valid, meaning that the epoch
5127 5127 // bits of the mark word are equal to the epoch bits of the
5128 5128 // prototype header. (Note that the prototype header's epoch bits
5129 5129 // only change at a safepoint.) If not, attempt to rebias the object
5130 5130 // toward the current thread. Note that we must be absolutely sure
5131 5131 // that the current epoch is invalid in order to do this because
5132 5132 // otherwise the manipulations it performs on the mark word are
5133 5133 // illegal.
5134 5134 testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5135 5135 jcc(Assembler::notZero, try_rebias);
5136 5136
5137 5137 // The epoch of the current bias is still valid but we know nothing
5138 5138 // about the owner; it might be set or it might be clear. Try to
5139 5139 // acquire the bias of the object using an atomic operation. If this
5140 5140 // fails we will go in to the runtime to revoke the object's bias.
5141 5141 // Note that we first construct the presumed unbiased header so we
5142 5142 // don't accidentally blow away another thread's valid bias.
5143 5143 andq(swap_reg,
5144 5144 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5145 5145 movq(tmp_reg, swap_reg);
5146 5146 orq(tmp_reg, r15_thread);
5147 5147 if (os::is_MP()) {
5148 5148 lock();
5149 5149 }
5150 5150 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5151 5151 // If the biasing toward our thread failed, this means that
5152 5152 // another thread succeeded in biasing it toward itself and we
5153 5153 // need to revoke that bias. The revocation will occur in the
5154 5154 // interpreter runtime in the slow case.
5155 5155 if (counters != NULL) {
5156 5156 cond_inc32(Assembler::zero,
5157 5157 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5158 5158 }
5159 5159 if (slow_case != NULL) {
5160 5160 jcc(Assembler::notZero, *slow_case);
5161 5161 }
5162 5162 jmp(done);
5163 5163
5164 5164 bind(try_rebias);
5165 5165 // At this point we know the epoch has expired, meaning that the
5166 5166 // current "bias owner", if any, is actually invalid. Under these
5167 5167 // circumstances _only_, we are allowed to use the current header's
5168 5168 // value as the comparison value when doing the cas to acquire the
5169 5169 // bias in the current epoch. In other words, we allow transfer of
5170 5170 // the bias from one thread to another directly in this situation.
5171 5171 //
5172 5172 // FIXME: due to a lack of registers we currently blow away the age
5173 5173 // bits in this situation. Should attempt to preserve them.
5174 5174 load_prototype_header(tmp_reg, obj_reg);
5175 5175 orq(tmp_reg, r15_thread);
5176 5176 if (os::is_MP()) {
5177 5177 lock();
5178 5178 }
5179 5179 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5180 5180 // If the biasing toward our thread failed, then another thread
5181 5181 // succeeded in biasing it toward itself and we need to revoke that
5182 5182 // bias. The revocation will occur in the runtime in the slow case.
5183 5183 if (counters != NULL) {
5184 5184 cond_inc32(Assembler::zero,
5185 5185 ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5186 5186 }
5187 5187 if (slow_case != NULL) {
5188 5188 jcc(Assembler::notZero, *slow_case);
5189 5189 }
5190 5190 jmp(done);
5191 5191
5192 5192 bind(try_revoke_bias);
5193 5193 // The prototype mark in the klass doesn't have the bias bit set any
5194 5194 // more, indicating that objects of this data type are not supposed
5195 5195 // to be biased any more. We are going to try to reset the mark of
5196 5196 // this object to the prototype value and fall through to the
5197 5197 // CAS-based locking scheme. Note that if our CAS fails, it means
5198 5198 // that another thread raced us for the privilege of revoking the
5199 5199 // bias of this particular object, so it's okay to continue in the
5200 5200 // normal locking code.
5201 5201 //
5202 5202 // FIXME: due to a lack of registers we currently blow away the age
5203 5203 // bits in this situation. Should attempt to preserve them.
5204 5204 load_prototype_header(tmp_reg, obj_reg);
5205 5205 if (os::is_MP()) {
5206 5206 lock();
5207 5207 }
5208 5208 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5209 5209 // Fall through to the normal CAS-based lock, because no matter what
5210 5210 // the result of the above CAS, some thread must have succeeded in
5211 5211 // removing the bias bit from the object's header.
5212 5212 if (counters != NULL) {
5213 5213 cond_inc32(Assembler::zero,
5214 5214 ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5215 5215 }
5216 5216
5217 5217 bind(cas_label);
5218 5218
5219 5219 return null_check_offset;
5220 5220 }
5221 5221
5222 5222 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5223 5223 Label L, E;
5224 5224
5225 5225 #ifdef _WIN64
5226 5226 // Windows always allocates space for it's register args
5227 5227 assert(num_args <= 4, "only register arguments supported");
5228 5228 subq(rsp, frame::arg_reg_save_area_bytes);
5229 5229 #endif
5230 5230
5231 5231 // Align stack if necessary
5232 5232 testl(rsp, 15);
5233 5233 jcc(Assembler::zero, L);
5234 5234
5235 5235 subq(rsp, 8);
5236 5236 {
5237 5237 call(RuntimeAddress(entry_point));
5238 5238 }
5239 5239 addq(rsp, 8);
5240 5240 jmp(E);
5241 5241
5242 5242 bind(L);
5243 5243 {
5244 5244 call(RuntimeAddress(entry_point));
5245 5245 }
5246 5246
5247 5247 bind(E);
5248 5248
5249 5249 #ifdef _WIN64
5250 5250 // restore stack pointer
5251 5251 addq(rsp, frame::arg_reg_save_area_bytes);
5252 5252 #endif
5253 5253
5254 5254 }
5255 5255
5256 5256 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5257 5257 assert(!src2.is_lval(), "should use cmpptr");
5258 5258
5259 5259 if (reachable(src2)) {
5260 5260 cmpq(src1, as_Address(src2));
5261 5261 } else {
5262 5262 lea(rscratch1, src2);
5263 5263 Assembler::cmpq(src1, Address(rscratch1, 0));
5264 5264 }
5265 5265 }
5266 5266
5267 5267 int MacroAssembler::corrected_idivq(Register reg) {
5268 5268 // Full implementation of Java ldiv and lrem; checks for special
5269 5269 // case as described in JVM spec., p.243 & p.271. The function
5270 5270 // returns the (pc) offset of the idivl instruction - may be needed
5271 5271 // for implicit exceptions.
5272 5272 //
5273 5273 // normal case special case
5274 5274 //
5275 5275 // input : rax: dividend min_long
5276 5276 // reg: divisor (may not be eax/edx) -1
5277 5277 //
5278 5278 // output: rax: quotient (= rax idiv reg) min_long
5279 5279 // rdx: remainder (= rax irem reg) 0
5280 5280 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5281 5281 static const int64_t min_long = 0x8000000000000000;
5282 5282 Label normal_case, special_case;
5283 5283
5284 5284 // check for special case
5285 5285 cmp64(rax, ExternalAddress((address) &min_long));
5286 5286 jcc(Assembler::notEqual, normal_case);
5287 5287 xorl(rdx, rdx); // prepare rdx for possible special case (where
5288 5288 // remainder = 0)
5289 5289 cmpq(reg, -1);
5290 5290 jcc(Assembler::equal, special_case);
5291 5291
5292 5292 // handle normal case
5293 5293 bind(normal_case);
5294 5294 cdqq();
5295 5295 int idivq_offset = offset();
5296 5296 idivq(reg);
5297 5297
5298 5298 // normal and special case exit
5299 5299 bind(special_case);
5300 5300
5301 5301 return idivq_offset;
5302 5302 }
5303 5303
5304 5304 void MacroAssembler::decrementq(Register reg, int value) {
5305 5305 if (value == min_jint) { subq(reg, value); return; }
5306 5306 if (value < 0) { incrementq(reg, -value); return; }
5307 5307 if (value == 0) { ; return; }
5308 5308 if (value == 1 && UseIncDec) { decq(reg) ; return; }
5309 5309 /* else */ { subq(reg, value) ; return; }
5310 5310 }
5311 5311
5312 5312 void MacroAssembler::decrementq(Address dst, int value) {
5313 5313 if (value == min_jint) { subq(dst, value); return; }
5314 5314 if (value < 0) { incrementq(dst, -value); return; }
5315 5315 if (value == 0) { ; return; }
5316 5316 if (value == 1 && UseIncDec) { decq(dst) ; return; }
5317 5317 /* else */ { subq(dst, value) ; return; }
5318 5318 }
5319 5319
5320 5320 void MacroAssembler::fat_nop() {
5321 5321 // A 5 byte nop that is safe for patching (see patch_verified_entry)
5322 5322 // Recommened sequence from 'Software Optimization Guide for the AMD
5323 5323 // Hammer Processor'
5324 5324 emit_byte(0x66);
5325 5325 emit_byte(0x66);
5326 5326 emit_byte(0x90);
5327 5327 emit_byte(0x66);
5328 5328 emit_byte(0x90);
5329 5329 }
5330 5330
5331 5331 void MacroAssembler::incrementq(Register reg, int value) {
5332 5332 if (value == min_jint) { addq(reg, value); return; }
5333 5333 if (value < 0) { decrementq(reg, -value); return; }
5334 5334 if (value == 0) { ; return; }
5335 5335 if (value == 1 && UseIncDec) { incq(reg) ; return; }
5336 5336 /* else */ { addq(reg, value) ; return; }
5337 5337 }
5338 5338
5339 5339 void MacroAssembler::incrementq(Address dst, int value) {
5340 5340 if (value == min_jint) { addq(dst, value); return; }
5341 5341 if (value < 0) { decrementq(dst, -value); return; }
5342 5342 if (value == 0) { ; return; }
5343 5343 if (value == 1 && UseIncDec) { incq(dst) ; return; }
5344 5344 /* else */ { addq(dst, value) ; return; }
5345 5345 }
5346 5346
5347 5347 // 32bit can do a case table jump in one instruction but we no longer allow the base
5348 5348 // to be installed in the Address class
5349 5349 void MacroAssembler::jump(ArrayAddress entry) {
5350 5350 lea(rscratch1, entry.base());
5351 5351 Address dispatch = entry.index();
5352 5352 assert(dispatch._base == noreg, "must be");
5353 5353 dispatch._base = rscratch1;
5354 5354 jmp(dispatch);
5355 5355 }
5356 5356
5357 5357 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5358 5358 ShouldNotReachHere(); // 64bit doesn't use two regs
5359 5359 cmpq(x_lo, y_lo);
5360 5360 }
5361 5361
5362 5362 void MacroAssembler::lea(Register dst, AddressLiteral src) {
5363 5363 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5364 5364 }
5365 5365
5366 5366 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5367 5367 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5368 5368 movptr(dst, rscratch1);
5369 5369 }
5370 5370
5371 5371 void MacroAssembler::leave() {
5372 5372 // %%% is this really better? Why not on 32bit too?
5373 5373 emit_byte(0xC9); // LEAVE
5374 5374 }
5375 5375
5376 5376 void MacroAssembler::lneg(Register hi, Register lo) {
5377 5377 ShouldNotReachHere(); // 64bit doesn't use two regs
5378 5378 negq(lo);
5379 5379 }
5380 5380
5381 5381 void MacroAssembler::movoop(Register dst, jobject obj) {
5382 5382 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5383 5383 }
5384 5384
5385 5385 void MacroAssembler::movoop(Address dst, jobject obj) {
5386 5386 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5387 5387 movq(dst, rscratch1);
5388 5388 }
5389 5389
5390 5390 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5391 5391 if (src.is_lval()) {
5392 5392 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5393 5393 } else {
5394 5394 if (reachable(src)) {
5395 5395 movq(dst, as_Address(src));
5396 5396 } else {
5397 5397 lea(rscratch1, src);
5398 5398 movq(dst, Address(rscratch1,0));
5399 5399 }
5400 5400 }
5401 5401 }
5402 5402
5403 5403 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5404 5404 movq(as_Address(dst), src);
5405 5405 }
5406 5406
5407 5407 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5408 5408 movq(dst, as_Address(src));
5409 5409 }
5410 5410
5411 5411 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
5412 5412 void MacroAssembler::movptr(Address dst, intptr_t src) {
5413 5413 mov64(rscratch1, src);
5414 5414 movq(dst, rscratch1);
5415 5415 }
5416 5416
5417 5417 // These are mostly for initializing NULL
5418 5418 void MacroAssembler::movptr(Address dst, int32_t src) {
5419 5419 movslq(dst, src);
5420 5420 }
5421 5421
5422 5422 void MacroAssembler::movptr(Register dst, int32_t src) {
5423 5423 mov64(dst, (intptr_t)src);
5424 5424 }
5425 5425
5426 5426 void MacroAssembler::pushoop(jobject obj) {
5427 5427 movoop(rscratch1, obj);
5428 5428 push(rscratch1);
5429 5429 }
5430 5430
5431 5431 void MacroAssembler::pushptr(AddressLiteral src) {
5432 5432 lea(rscratch1, src);
5433 5433 if (src.is_lval()) {
5434 5434 push(rscratch1);
5435 5435 } else {
5436 5436 pushq(Address(rscratch1, 0));
5437 5437 }
5438 5438 }
5439 5439
5440 5440 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5441 5441 bool clear_pc) {
5442 5442 // we must set sp to zero to clear frame
5443 5443 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
5444 5444 // must clear fp, so that compiled frames are not confused; it is
5445 5445 // possible that we need it only for debugging
5446 5446 if (clear_fp) {
5447 5447 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
5448 5448 }
5449 5449
5450 5450 if (clear_pc) {
5451 5451 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
5452 5452 }
5453 5453 }
5454 5454
5455 5455 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5456 5456 Register last_java_fp,
5457 5457 address last_java_pc) {
5458 5458 // determine last_java_sp register
5459 5459 if (!last_java_sp->is_valid()) {
5460 5460 last_java_sp = rsp;
5461 5461 }
5462 5462
5463 5463 // last_java_fp is optional
5464 5464 if (last_java_fp->is_valid()) {
5465 5465 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5466 5466 last_java_fp);
5467 5467 }
5468 5468
5469 5469 // last_java_pc is optional
5470 5470 if (last_java_pc != NULL) {
5471 5471 Address java_pc(r15_thread,
5472 5472 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5473 5473 lea(rscratch1, InternalAddress(last_java_pc));
5474 5474 movptr(java_pc, rscratch1);
5475 5475 }
5476 5476
5477 5477 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5478 5478 }
5479 5479
5480 5480 static void pass_arg0(MacroAssembler* masm, Register arg) {
5481 5481 if (c_rarg0 != arg ) {
5482 5482 masm->mov(c_rarg0, arg);
5483 5483 }
5484 5484 }
5485 5485
5486 5486 static void pass_arg1(MacroAssembler* masm, Register arg) {
5487 5487 if (c_rarg1 != arg ) {
5488 5488 masm->mov(c_rarg1, arg);
5489 5489 }
5490 5490 }
5491 5491
5492 5492 static void pass_arg2(MacroAssembler* masm, Register arg) {
5493 5493 if (c_rarg2 != arg ) {
5494 5494 masm->mov(c_rarg2, arg);
5495 5495 }
5496 5496 }
5497 5497
5498 5498 static void pass_arg3(MacroAssembler* masm, Register arg) {
5499 5499 if (c_rarg3 != arg ) {
5500 5500 masm->mov(c_rarg3, arg);
5501 5501 }
5502 5502 }
5503 5503
5504 5504 void MacroAssembler::stop(const char* msg) {
5505 5505 address rip = pc();
5506 5506 pusha(); // get regs on stack
5507 5507 lea(c_rarg0, ExternalAddress((address) msg));
5508 5508 lea(c_rarg1, InternalAddress(rip));
5509 5509 movq(c_rarg2, rsp); // pass pointer to regs array
5510 5510 andq(rsp, -16); // align stack as required by ABI
5511 5511 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5512 5512 hlt();
5513 5513 }
5514 5514
5515 5515 void MacroAssembler::warn(const char* msg) {
5516 5516 push(r12);
5517 5517 movq(r12, rsp);
5518 5518 andq(rsp, -16); // align stack as required by push_CPU_state and call
5519 5519
5520 5520 push_CPU_state(); // keeps alignment at 16 bytes
5521 5521 lea(c_rarg0, ExternalAddress((address) msg));
5522 5522 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5523 5523 pop_CPU_state();
5524 5524
5525 5525 movq(rsp, r12);
5526 5526 pop(r12);
5527 5527 }
5528 5528
5529 5529 #ifndef PRODUCT
5530 5530 extern "C" void findpc(intptr_t x);
5531 5531 #endif
5532 5532
5533 5533 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5534 5534 // In order to get locks to work, we need to fake a in_VM state
5535 5535 if (ShowMessageBoxOnError ) {
5536 5536 JavaThread* thread = JavaThread::current();
5537 5537 JavaThreadState saved_state = thread->thread_state();
5538 5538 thread->set_thread_state(_thread_in_vm);
5539 5539 #ifndef PRODUCT
5540 5540 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5541 5541 ttyLocker ttyl;
5542 5542 BytecodeCounter::print();
5543 5543 }
5544 5544 #endif
5545 5545 // To see where a verify_oop failed, get $ebx+40/X for this frame.
5546 5546 // XXX correct this offset for amd64
5547 5547 // This is the value of eip which points to where verify_oop will return.
5548 5548 if (os::message_box(msg, "Execution stopped, print registers?")) {
5549 5549 ttyLocker ttyl;
5550 5550 tty->print_cr("rip = 0x%016lx", pc);
5551 5551 #ifndef PRODUCT
5552 5552 tty->cr();
5553 5553 findpc(pc);
5554 5554 tty->cr();
5555 5555 #endif
5556 5556 tty->print_cr("rax = 0x%016lx", regs[15]);
5557 5557 tty->print_cr("rbx = 0x%016lx", regs[12]);
5558 5558 tty->print_cr("rcx = 0x%016lx", regs[14]);
5559 5559 tty->print_cr("rdx = 0x%016lx", regs[13]);
5560 5560 tty->print_cr("rdi = 0x%016lx", regs[8]);
5561 5561 tty->print_cr("rsi = 0x%016lx", regs[9]);
5562 5562 tty->print_cr("rbp = 0x%016lx", regs[10]);
5563 5563 tty->print_cr("rsp = 0x%016lx", regs[11]);
5564 5564 tty->print_cr("r8 = 0x%016lx", regs[7]);
5565 5565 tty->print_cr("r9 = 0x%016lx", regs[6]);
5566 5566 tty->print_cr("r10 = 0x%016lx", regs[5]);
5567 5567 tty->print_cr("r11 = 0x%016lx", regs[4]);
5568 5568 tty->print_cr("r12 = 0x%016lx", regs[3]);
5569 5569 tty->print_cr("r13 = 0x%016lx", regs[2]);
5570 5570 tty->print_cr("r14 = 0x%016lx", regs[1]);
5571 5571 tty->print_cr("r15 = 0x%016lx", regs[0]);
5572 5572 BREAKPOINT;
5573 5573 }
5574 5574 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5575 5575 } else {
5576 5576 ttyLocker ttyl;
5577 5577 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5578 5578 msg);
5579 5579 }
5580 5580 }
5581 5581
5582 5582 #endif // _LP64
5583 5583
5584 5584 // Now versions that are common to 32/64 bit
5585 5585
5586 5586 void MacroAssembler::addptr(Register dst, int32_t imm32) {
5587 5587 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5588 5588 }
5589 5589
5590 5590 void MacroAssembler::addptr(Register dst, Register src) {
5591 5591 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5592 5592 }
5593 5593
5594 5594 void MacroAssembler::addptr(Address dst, Register src) {
5595 5595 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5596 5596 }
5597 5597
5598 5598 void MacroAssembler::align(int modulus) {
5599 5599 if (offset() % modulus != 0) {
5600 5600 nop(modulus - (offset() % modulus));
5601 5601 }
5602 5602 }
5603 5603
5604 5604 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5605 5605 if (reachable(src)) {
5606 5606 andpd(dst, as_Address(src));
5607 5607 } else {
5608 5608 lea(rscratch1, src);
5609 5609 andpd(dst, Address(rscratch1, 0));
5610 5610 }
5611 5611 }
5612 5612
5613 5613 void MacroAssembler::andptr(Register dst, int32_t imm32) {
5614 5614 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5615 5615 }
5616 5616
5617 5617 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5618 5618 pushf();
5619 5619 if (os::is_MP())
5620 5620 lock();
5621 5621 incrementl(counter_addr);
5622 5622 popf();
5623 5623 }
5624 5624
5625 5625 // Writes to stack successive pages until offset reached to check for
5626 5626 // stack overflow + shadow pages. This clobbers tmp.
5627 5627 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5628 5628 movptr(tmp, rsp);
5629 5629 // Bang stack for total size given plus shadow page size.
5630 5630 // Bang one page at a time because large size can bang beyond yellow and
5631 5631 // red zones.
5632 5632 Label loop;
5633 5633 bind(loop);
5634 5634 movl(Address(tmp, (-os::vm_page_size())), size );
5635 5635 subptr(tmp, os::vm_page_size());
5636 5636 subl(size, os::vm_page_size());
5637 5637 jcc(Assembler::greater, loop);
5638 5638
5639 5639 // Bang down shadow pages too.
5640 5640 // The -1 because we already subtracted 1 page.
5641 5641 for (int i = 0; i< StackShadowPages-1; i++) {
5642 5642 // this could be any sized move but this is can be a debugging crumb
5643 5643 // so the bigger the better.
5644 5644 movptr(Address(tmp, (-i*os::vm_page_size())), size );
5645 5645 }
5646 5646 }
5647 5647
5648 5648 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5649 5649 assert(UseBiasedLocking, "why call this otherwise?");
5650 5650
5651 5651 // Check for biased locking unlock case, which is a no-op
5652 5652 // Note: we do not have to check the thread ID for two reasons.
5653 5653 // First, the interpreter checks for IllegalMonitorStateException at
5654 5654 // a higher level. Second, if the bias was revoked while we held the
5655 5655 // lock, the object could not be rebiased toward another thread, so
5656 5656 // the bias bit would be clear.
5657 5657 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5658 5658 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5659 5659 cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5660 5660 jcc(Assembler::equal, done);
5661 5661 }
5662 5662
5663 5663 void MacroAssembler::c2bool(Register x) {
5664 5664 // implements x == 0 ? 0 : 1
5665 5665 // note: must only look at least-significant byte of x
5666 5666 // since C-style booleans are stored in one byte
5667 5667 // only! (was bug)
5668 5668 andl(x, 0xFF);
5669 5669 setb(Assembler::notZero, x);
5670 5670 }
5671 5671
5672 5672 // Wouldn't need if AddressLiteral version had new name
5673 5673 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5674 5674 Assembler::call(L, rtype);
5675 5675 }
5676 5676
5677 5677 void MacroAssembler::call(Register entry) {
5678 5678 Assembler::call(entry);
5679 5679 }
5680 5680
5681 5681 void MacroAssembler::call(AddressLiteral entry) {
5682 5682 if (reachable(entry)) {
5683 5683 Assembler::call_literal(entry.target(), entry.rspec());
5684 5684 } else {
5685 5685 lea(rscratch1, entry);
5686 5686 Assembler::call(rscratch1);
5687 5687 }
5688 5688 }
5689 5689
5690 5690 // Implementation of call_VM versions
5691 5691
5692 5692 void MacroAssembler::call_VM(Register oop_result,
5693 5693 address entry_point,
5694 5694 bool check_exceptions) {
5695 5695 Label C, E;
5696 5696 call(C, relocInfo::none);
5697 5697 jmp(E);
5698 5698
5699 5699 bind(C);
5700 5700 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5701 5701 ret(0);
5702 5702
5703 5703 bind(E);
5704 5704 }
5705 5705
5706 5706 void MacroAssembler::call_VM(Register oop_result,
5707 5707 address entry_point,
5708 5708 Register arg_1,
5709 5709 bool check_exceptions) {
5710 5710 Label C, E;
5711 5711 call(C, relocInfo::none);
5712 5712 jmp(E);
5713 5713
5714 5714 bind(C);
5715 5715 pass_arg1(this, arg_1);
5716 5716 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5717 5717 ret(0);
5718 5718
5719 5719 bind(E);
5720 5720 }
5721 5721
5722 5722 void MacroAssembler::call_VM(Register oop_result,
5723 5723 address entry_point,
5724 5724 Register arg_1,
5725 5725 Register arg_2,
5726 5726 bool check_exceptions) {
5727 5727 Label C, E;
5728 5728 call(C, relocInfo::none);
5729 5729 jmp(E);
5730 5730
5731 5731 bind(C);
5732 5732
5733 5733 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5734 5734
5735 5735 pass_arg2(this, arg_2);
5736 5736 pass_arg1(this, arg_1);
5737 5737 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5738 5738 ret(0);
5739 5739
5740 5740 bind(E);
5741 5741 }
5742 5742
5743 5743 void MacroAssembler::call_VM(Register oop_result,
5744 5744 address entry_point,
5745 5745 Register arg_1,
5746 5746 Register arg_2,
5747 5747 Register arg_3,
5748 5748 bool check_exceptions) {
5749 5749 Label C, E;
5750 5750 call(C, relocInfo::none);
5751 5751 jmp(E);
5752 5752
5753 5753 bind(C);
5754 5754
5755 5755 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5756 5756 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5757 5757 pass_arg3(this, arg_3);
5758 5758
5759 5759 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5760 5760 pass_arg2(this, arg_2);
5761 5761
5762 5762 pass_arg1(this, arg_1);
5763 5763 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5764 5764 ret(0);
5765 5765
5766 5766 bind(E);
5767 5767 }
5768 5768
5769 5769 void MacroAssembler::call_VM(Register oop_result,
5770 5770 Register last_java_sp,
5771 5771 address entry_point,
5772 5772 int number_of_arguments,
5773 5773 bool check_exceptions) {
5774 5774 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5775 5775 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5776 5776 }
5777 5777
5778 5778 void MacroAssembler::call_VM(Register oop_result,
5779 5779 Register last_java_sp,
5780 5780 address entry_point,
5781 5781 Register arg_1,
5782 5782 bool check_exceptions) {
5783 5783 pass_arg1(this, arg_1);
5784 5784 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5785 5785 }
5786 5786
5787 5787 void MacroAssembler::call_VM(Register oop_result,
5788 5788 Register last_java_sp,
5789 5789 address entry_point,
5790 5790 Register arg_1,
5791 5791 Register arg_2,
5792 5792 bool check_exceptions) {
5793 5793
5794 5794 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5795 5795 pass_arg2(this, arg_2);
5796 5796 pass_arg1(this, arg_1);
5797 5797 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5798 5798 }
5799 5799
5800 5800 void MacroAssembler::call_VM(Register oop_result,
5801 5801 Register last_java_sp,
5802 5802 address entry_point,
5803 5803 Register arg_1,
5804 5804 Register arg_2,
5805 5805 Register arg_3,
5806 5806 bool check_exceptions) {
5807 5807 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5808 5808 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5809 5809 pass_arg3(this, arg_3);
5810 5810 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5811 5811 pass_arg2(this, arg_2);
5812 5812 pass_arg1(this, arg_1);
5813 5813 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5814 5814 }
5815 5815
5816 5816 void MacroAssembler::call_VM_base(Register oop_result,
5817 5817 Register java_thread,
5818 5818 Register last_java_sp,
5819 5819 address entry_point,
5820 5820 int number_of_arguments,
5821 5821 bool check_exceptions) {
5822 5822 // determine java_thread register
5823 5823 if (!java_thread->is_valid()) {
5824 5824 #ifdef _LP64
5825 5825 java_thread = r15_thread;
5826 5826 #else
5827 5827 java_thread = rdi;
5828 5828 get_thread(java_thread);
5829 5829 #endif // LP64
5830 5830 }
5831 5831 // determine last_java_sp register
5832 5832 if (!last_java_sp->is_valid()) {
5833 5833 last_java_sp = rsp;
5834 5834 }
5835 5835 // debugging support
5836 5836 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
5837 5837 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5838 5838 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
5839 5839 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5840 5840
5841 5841 // push java thread (becomes first argument of C function)
5842 5842
5843 5843 NOT_LP64(push(java_thread); number_of_arguments++);
5844 5844 LP64_ONLY(mov(c_rarg0, r15_thread));
5845 5845
5846 5846 // set last Java frame before call
5847 5847 assert(last_java_sp != rbp, "can't use ebp/rbp");
5848 5848
5849 5849 // Only interpreter should have to set fp
5850 5850 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5851 5851
5852 5852 // do the call, remove parameters
5853 5853 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5854 5854
5855 5855 // restore the thread (cannot use the pushed argument since arguments
5856 5856 // may be overwritten by C code generated by an optimizing compiler);
5857 5857 // however can use the register value directly if it is callee saved.
5858 5858 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5859 5859 // rdi & rsi (also r15) are callee saved -> nothing to do
5860 5860 #ifdef ASSERT
5861 5861 guarantee(java_thread != rax, "change this code");
5862 5862 push(rax);
5863 5863 { Label L;
5864 5864 get_thread(rax);
5865 5865 cmpptr(java_thread, rax);
5866 5866 jcc(Assembler::equal, L);
5867 5867 stop("MacroAssembler::call_VM_base: rdi not callee saved?");
5868 5868 bind(L);
5869 5869 }
5870 5870 pop(rax);
5871 5871 #endif
5872 5872 } else {
5873 5873 get_thread(java_thread);
5874 5874 }
5875 5875 // reset last Java frame
5876 5876 // Only interpreter should have to clear fp
5877 5877 reset_last_Java_frame(java_thread, true, false);
5878 5878
5879 5879 #ifndef CC_INTERP
5880 5880 // C++ interp handles this in the interpreter
5881 5881 check_and_handle_popframe(java_thread);
5882 5882 check_and_handle_earlyret(java_thread);
5883 5883 #endif /* CC_INTERP */
5884 5884
5885 5885 if (check_exceptions) {
5886 5886 // check for pending exceptions (java_thread is set upon return)
5887 5887 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
5888 5888 #ifndef _LP64
5889 5889 jump_cc(Assembler::notEqual,
5890 5890 RuntimeAddress(StubRoutines::forward_exception_entry()));
5891 5891 #else
5892 5892 // This used to conditionally jump to forward_exception however it is
5893 5893 // possible if we relocate that the branch will not reach. So we must jump
5894 5894 // around so we can always reach
5895 5895
5896 5896 Label ok;
5897 5897 jcc(Assembler::equal, ok);
5898 5898 jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5899 5899 bind(ok);
5900 5900 #endif // LP64
5901 5901 }
5902 5902
5903 5903 // get oop result if there is one and reset the value in the thread
5904 5904 if (oop_result->is_valid()) {
5905 5905 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
5906 5906 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
5907 5907 verify_oop(oop_result, "broken oop in call_VM_base");
5908 5908 }
5909 5909 }
5910 5910
5911 5911 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
5912 5912
5913 5913 // Calculate the value for last_Java_sp
5914 5914 // somewhat subtle. call_VM does an intermediate call
5915 5915 // which places a return address on the stack just under the
5916 5916 // stack pointer as the user finsihed with it. This allows
5917 5917 // use to retrieve last_Java_pc from last_Java_sp[-1].
5918 5918 // On 32bit we then have to push additional args on the stack to accomplish
5919 5919 // the actual requested call. On 64bit call_VM only can use register args
5920 5920 // so the only extra space is the return address that call_VM created.
5921 5921 // This hopefully explains the calculations here.
5922 5922
5923 5923 #ifdef _LP64
5924 5924 // We've pushed one address, correct last_Java_sp
5925 5925 lea(rax, Address(rsp, wordSize));
5926 5926 #else
5927 5927 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
5928 5928 #endif // LP64
5929 5929
5930 5930 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
5931 5931
5932 5932 }
5933 5933
5934 5934 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
5935 5935 call_VM_leaf_base(entry_point, number_of_arguments);
5936 5936 }
5937 5937
5938 5938 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
5939 5939 pass_arg0(this, arg_0);
5940 5940 call_VM_leaf(entry_point, 1);
5941 5941 }
5942 5942
5943 5943 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
5944 5944
5945 5945 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5946 5946 pass_arg1(this, arg_1);
5947 5947 pass_arg0(this, arg_0);
5948 5948 call_VM_leaf(entry_point, 2);
5949 5949 }
5950 5950
5951 5951 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
5952 5952 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
5953 5953 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5954 5954 pass_arg2(this, arg_2);
5955 5955 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
5956 5956 pass_arg1(this, arg_1);
5957 5957 pass_arg0(this, arg_0);
5958 5958 call_VM_leaf(entry_point, 3);
5959 5959 }
5960 5960
5961 5961 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
5962 5962 }
5963 5963
5964 5964 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
5965 5965 }
5966 5966
5967 5967 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
5968 5968 if (reachable(src1)) {
5969 5969 cmpl(as_Address(src1), imm);
5970 5970 } else {
5971 5971 lea(rscratch1, src1);
5972 5972 cmpl(Address(rscratch1, 0), imm);
5973 5973 }
5974 5974 }
5975 5975
5976 5976 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
5977 5977 assert(!src2.is_lval(), "use cmpptr");
5978 5978 if (reachable(src2)) {
5979 5979 cmpl(src1, as_Address(src2));
5980 5980 } else {
5981 5981 lea(rscratch1, src2);
5982 5982 cmpl(src1, Address(rscratch1, 0));
5983 5983 }
5984 5984 }
5985 5985
5986 5986 void MacroAssembler::cmp32(Register src1, int32_t imm) {
5987 5987 Assembler::cmpl(src1, imm);
5988 5988 }
5989 5989
5990 5990 void MacroAssembler::cmp32(Register src1, Address src2) {
5991 5991 Assembler::cmpl(src1, src2);
5992 5992 }
5993 5993
5994 5994 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
5995 5995 ucomisd(opr1, opr2);
5996 5996
5997 5997 Label L;
5998 5998 if (unordered_is_less) {
5999 5999 movl(dst, -1);
6000 6000 jcc(Assembler::parity, L);
6001 6001 jcc(Assembler::below , L);
6002 6002 movl(dst, 0);
6003 6003 jcc(Assembler::equal , L);
6004 6004 increment(dst);
6005 6005 } else { // unordered is greater
6006 6006 movl(dst, 1);
6007 6007 jcc(Assembler::parity, L);
6008 6008 jcc(Assembler::above , L);
6009 6009 movl(dst, 0);
6010 6010 jcc(Assembler::equal , L);
6011 6011 decrementl(dst);
6012 6012 }
6013 6013 bind(L);
6014 6014 }
6015 6015
6016 6016 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6017 6017 ucomiss(opr1, opr2);
6018 6018
6019 6019 Label L;
6020 6020 if (unordered_is_less) {
6021 6021 movl(dst, -1);
6022 6022 jcc(Assembler::parity, L);
6023 6023 jcc(Assembler::below , L);
6024 6024 movl(dst, 0);
6025 6025 jcc(Assembler::equal , L);
6026 6026 increment(dst);
6027 6027 } else { // unordered is greater
6028 6028 movl(dst, 1);
6029 6029 jcc(Assembler::parity, L);
6030 6030 jcc(Assembler::above , L);
6031 6031 movl(dst, 0);
6032 6032 jcc(Assembler::equal , L);
6033 6033 decrementl(dst);
6034 6034 }
6035 6035 bind(L);
6036 6036 }
6037 6037
6038 6038
6039 6039 void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
6040 6040 if (reachable(src1)) {
6041 6041 cmpb(as_Address(src1), imm);
6042 6042 } else {
6043 6043 lea(rscratch1, src1);
6044 6044 cmpb(Address(rscratch1, 0), imm);
6045 6045 }
6046 6046 }
6047 6047
6048 6048 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
6049 6049 #ifdef _LP64
6050 6050 if (src2.is_lval()) {
6051 6051 movptr(rscratch1, src2);
6052 6052 Assembler::cmpq(src1, rscratch1);
6053 6053 } else if (reachable(src2)) {
6054 6054 cmpq(src1, as_Address(src2));
6055 6055 } else {
6056 6056 lea(rscratch1, src2);
6057 6057 Assembler::cmpq(src1, Address(rscratch1, 0));
6058 6058 }
6059 6059 #else
6060 6060 if (src2.is_lval()) {
6061 6061 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6062 6062 } else {
6063 6063 cmpl(src1, as_Address(src2));
6064 6064 }
6065 6065 #endif // _LP64
6066 6066 }
6067 6067
6068 6068 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
6069 6069 assert(src2.is_lval(), "not a mem-mem compare");
6070 6070 #ifdef _LP64
6071 6071 // moves src2's literal address
6072 6072 movptr(rscratch1, src2);
6073 6073 Assembler::cmpq(src1, rscratch1);
6074 6074 #else
6075 6075 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6076 6076 #endif // _LP64
6077 6077 }
6078 6078
6079 6079 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
6080 6080 if (reachable(adr)) {
6081 6081 if (os::is_MP())
6082 6082 lock();
6083 6083 cmpxchgptr(reg, as_Address(adr));
6084 6084 } else {
6085 6085 lea(rscratch1, adr);
6086 6086 if (os::is_MP())
6087 6087 lock();
6088 6088 cmpxchgptr(reg, Address(rscratch1, 0));
6089 6089 }
6090 6090 }
6091 6091
6092 6092 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
6093 6093 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
6094 6094 }
6095 6095
6096 6096 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
6097 6097 if (reachable(src)) {
6098 6098 comisd(dst, as_Address(src));
6099 6099 } else {
6100 6100 lea(rscratch1, src);
6101 6101 comisd(dst, Address(rscratch1, 0));
6102 6102 }
6103 6103 }
6104 6104
6105 6105 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
6106 6106 if (reachable(src)) {
6107 6107 comiss(dst, as_Address(src));
6108 6108 } else {
6109 6109 lea(rscratch1, src);
6110 6110 comiss(dst, Address(rscratch1, 0));
6111 6111 }
6112 6112 }
6113 6113
6114 6114
6115 6115 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
6116 6116 Condition negated_cond = negate_condition(cond);
6117 6117 Label L;
6118 6118 jcc(negated_cond, L);
6119 6119 atomic_incl(counter_addr);
6120 6120 bind(L);
6121 6121 }
6122 6122
6123 6123 int MacroAssembler::corrected_idivl(Register reg) {
6124 6124 // Full implementation of Java idiv and irem; checks for
6125 6125 // special case as described in JVM spec., p.243 & p.271.
6126 6126 // The function returns the (pc) offset of the idivl
6127 6127 // instruction - may be needed for implicit exceptions.
6128 6128 //
6129 6129 // normal case special case
6130 6130 //
6131 6131 // input : rax,: dividend min_int
6132 6132 // reg: divisor (may not be rax,/rdx) -1
6133 6133 //
6134 6134 // output: rax,: quotient (= rax, idiv reg) min_int
6135 6135 // rdx: remainder (= rax, irem reg) 0
6136 6136 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
6137 6137 const int min_int = 0x80000000;
6138 6138 Label normal_case, special_case;
6139 6139
6140 6140 // check for special case
6141 6141 cmpl(rax, min_int);
6142 6142 jcc(Assembler::notEqual, normal_case);
6143 6143 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
6144 6144 cmpl(reg, -1);
6145 6145 jcc(Assembler::equal, special_case);
6146 6146
6147 6147 // handle normal case
6148 6148 bind(normal_case);
6149 6149 cdql();
6150 6150 int idivl_offset = offset();
6151 6151 idivl(reg);
6152 6152
6153 6153 // normal and special case exit
6154 6154 bind(special_case);
6155 6155
6156 6156 return idivl_offset;
6157 6157 }
6158 6158
6159 6159
6160 6160
6161 6161 void MacroAssembler::decrementl(Register reg, int value) {
6162 6162 if (value == min_jint) {subl(reg, value) ; return; }
6163 6163 if (value < 0) { incrementl(reg, -value); return; }
6164 6164 if (value == 0) { ; return; }
6165 6165 if (value == 1 && UseIncDec) { decl(reg) ; return; }
6166 6166 /* else */ { subl(reg, value) ; return; }
6167 6167 }
6168 6168
6169 6169 void MacroAssembler::decrementl(Address dst, int value) {
6170 6170 if (value == min_jint) {subl(dst, value) ; return; }
6171 6171 if (value < 0) { incrementl(dst, -value); return; }
6172 6172 if (value == 0) { ; return; }
6173 6173 if (value == 1 && UseIncDec) { decl(dst) ; return; }
6174 6174 /* else */ { subl(dst, value) ; return; }
6175 6175 }
6176 6176
6177 6177 void MacroAssembler::division_with_shift (Register reg, int shift_value) {
6178 6178 assert (shift_value > 0, "illegal shift value");
6179 6179 Label _is_positive;
6180 6180 testl (reg, reg);
6181 6181 jcc (Assembler::positive, _is_positive);
6182 6182 int offset = (1 << shift_value) - 1 ;
6183 6183
6184 6184 if (offset == 1) {
6185 6185 incrementl(reg);
6186 6186 } else {
6187 6187 addl(reg, offset);
6188 6188 }
6189 6189
6190 6190 bind (_is_positive);
6191 6191 sarl(reg, shift_value);
6192 6192 }
6193 6193
6194 6194 // !defined(COMPILER2) is because of stupid core builds
6195 6195 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6196 6196 void MacroAssembler::empty_FPU_stack() {
6197 6197 if (VM_Version::supports_mmx()) {
6198 6198 emms();
6199 6199 } else {
6200 6200 for (int i = 8; i-- > 0; ) ffree(i);
6201 6201 }
6202 6202 }
6203 6203 #endif // !LP64 || C1 || !C2
6204 6204
6205 6205
6206 6206 // Defines obj, preserves var_size_in_bytes
6207 6207 void MacroAssembler::eden_allocate(Register obj,
6208 6208 Register var_size_in_bytes,
6209 6209 int con_size_in_bytes,
6210 6210 Register t1,
6211 6211 Label& slow_case) {
6212 6212 assert(obj == rax, "obj must be in rax, for cmpxchg");
6213 6213 assert_different_registers(obj, var_size_in_bytes, t1);
6214 6214 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6215 6215 jmp(slow_case);
6216 6216 } else {
6217 6217 Register end = t1;
6218 6218 Label retry;
6219 6219 bind(retry);
6220 6220 ExternalAddress heap_top((address) Universe::heap()->top_addr());
6221 6221 movptr(obj, heap_top);
6222 6222 if (var_size_in_bytes == noreg) {
6223 6223 lea(end, Address(obj, con_size_in_bytes));
6224 6224 } else {
6225 6225 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6226 6226 }
6227 6227 // if end < obj then we wrapped around => object too long => slow case
6228 6228 cmpptr(end, obj);
6229 6229 jcc(Assembler::below, slow_case);
6230 6230 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6231 6231 jcc(Assembler::above, slow_case);
6232 6232 // Compare obj with the top addr, and if still equal, store the new top addr in
6233 6233 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6234 6234 // it otherwise. Use lock prefix for atomicity on MPs.
6235 6235 locked_cmpxchgptr(end, heap_top);
6236 6236 jcc(Assembler::notEqual, retry);
6237 6237 }
6238 6238 }
6239 6239
6240 6240 void MacroAssembler::enter() {
6241 6241 push(rbp);
6242 6242 mov(rbp, rsp);
6243 6243 }
6244 6244
6245 6245 void MacroAssembler::fcmp(Register tmp) {
6246 6246 fcmp(tmp, 1, true, true);
6247 6247 }
6248 6248
6249 6249 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6250 6250 assert(!pop_right || pop_left, "usage error");
6251 6251 if (VM_Version::supports_cmov()) {
6252 6252 assert(tmp == noreg, "unneeded temp");
6253 6253 if (pop_left) {
6254 6254 fucomip(index);
6255 6255 } else {
6256 6256 fucomi(index);
6257 6257 }
6258 6258 if (pop_right) {
6259 6259 fpop();
6260 6260 }
6261 6261 } else {
6262 6262 assert(tmp != noreg, "need temp");
6263 6263 if (pop_left) {
6264 6264 if (pop_right) {
6265 6265 fcompp();
6266 6266 } else {
6267 6267 fcomp(index);
6268 6268 }
6269 6269 } else {
6270 6270 fcom(index);
6271 6271 }
6272 6272 // convert FPU condition into eflags condition via rax,
6273 6273 save_rax(tmp);
6274 6274 fwait(); fnstsw_ax();
6275 6275 sahf();
6276 6276 restore_rax(tmp);
6277 6277 }
6278 6278 // condition codes set as follows:
6279 6279 //
6280 6280 // CF (corresponds to C0) if x < y
6281 6281 // PF (corresponds to C2) if unordered
6282 6282 // ZF (corresponds to C3) if x = y
6283 6283 }
6284 6284
6285 6285 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6286 6286 fcmp2int(dst, unordered_is_less, 1, true, true);
6287 6287 }
6288 6288
6289 6289 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6290 6290 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6291 6291 Label L;
6292 6292 if (unordered_is_less) {
6293 6293 movl(dst, -1);
6294 6294 jcc(Assembler::parity, L);
6295 6295 jcc(Assembler::below , L);
6296 6296 movl(dst, 0);
6297 6297 jcc(Assembler::equal , L);
6298 6298 increment(dst);
6299 6299 } else { // unordered is greater
6300 6300 movl(dst, 1);
6301 6301 jcc(Assembler::parity, L);
6302 6302 jcc(Assembler::above , L);
6303 6303 movl(dst, 0);
6304 6304 jcc(Assembler::equal , L);
6305 6305 decrementl(dst);
6306 6306 }
6307 6307 bind(L);
6308 6308 }
6309 6309
6310 6310 void MacroAssembler::fld_d(AddressLiteral src) {
6311 6311 fld_d(as_Address(src));
6312 6312 }
6313 6313
6314 6314 void MacroAssembler::fld_s(AddressLiteral src) {
6315 6315 fld_s(as_Address(src));
6316 6316 }
6317 6317
6318 6318 void MacroAssembler::fld_x(AddressLiteral src) {
6319 6319 Assembler::fld_x(as_Address(src));
6320 6320 }
6321 6321
6322 6322 void MacroAssembler::fldcw(AddressLiteral src) {
6323 6323 Assembler::fldcw(as_Address(src));
6324 6324 }
6325 6325
6326 6326 void MacroAssembler::fpop() {
6327 6327 ffree();
6328 6328 fincstp();
6329 6329 }
6330 6330
6331 6331 void MacroAssembler::fremr(Register tmp) {
6332 6332 save_rax(tmp);
6333 6333 { Label L;
6334 6334 bind(L);
6335 6335 fprem();
6336 6336 fwait(); fnstsw_ax();
6337 6337 #ifdef _LP64
6338 6338 testl(rax, 0x400);
6339 6339 jcc(Assembler::notEqual, L);
6340 6340 #else
6341 6341 sahf();
6342 6342 jcc(Assembler::parity, L);
6343 6343 #endif // _LP64
6344 6344 }
6345 6345 restore_rax(tmp);
6346 6346 // Result is in ST0.
6347 6347 // Note: fxch & fpop to get rid of ST1
6348 6348 // (otherwise FPU stack could overflow eventually)
6349 6349 fxch(1);
6350 6350 fpop();
6351 6351 }
6352 6352
6353 6353
6354 6354 void MacroAssembler::incrementl(AddressLiteral dst) {
6355 6355 if (reachable(dst)) {
6356 6356 incrementl(as_Address(dst));
6357 6357 } else {
6358 6358 lea(rscratch1, dst);
6359 6359 incrementl(Address(rscratch1, 0));
6360 6360 }
6361 6361 }
6362 6362
6363 6363 void MacroAssembler::incrementl(ArrayAddress dst) {
6364 6364 incrementl(as_Address(dst));
6365 6365 }
6366 6366
6367 6367 void MacroAssembler::incrementl(Register reg, int value) {
6368 6368 if (value == min_jint) {addl(reg, value) ; return; }
6369 6369 if (value < 0) { decrementl(reg, -value); return; }
6370 6370 if (value == 0) { ; return; }
6371 6371 if (value == 1 && UseIncDec) { incl(reg) ; return; }
6372 6372 /* else */ { addl(reg, value) ; return; }
6373 6373 }
6374 6374
6375 6375 void MacroAssembler::incrementl(Address dst, int value) {
6376 6376 if (value == min_jint) {addl(dst, value) ; return; }
6377 6377 if (value < 0) { decrementl(dst, -value); return; }
6378 6378 if (value == 0) { ; return; }
6379 6379 if (value == 1 && UseIncDec) { incl(dst) ; return; }
6380 6380 /* else */ { addl(dst, value) ; return; }
6381 6381 }
6382 6382
6383 6383 void MacroAssembler::jump(AddressLiteral dst) {
6384 6384 if (reachable(dst)) {
6385 6385 jmp_literal(dst.target(), dst.rspec());
6386 6386 } else {
6387 6387 lea(rscratch1, dst);
6388 6388 jmp(rscratch1);
6389 6389 }
6390 6390 }
6391 6391
6392 6392 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6393 6393 if (reachable(dst)) {
6394 6394 InstructionMark im(this);
6395 6395 relocate(dst.reloc());
6396 6396 const int short_size = 2;
6397 6397 const int long_size = 6;
6398 6398 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6399 6399 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6400 6400 // 0111 tttn #8-bit disp
6401 6401 emit_byte(0x70 | cc);
6402 6402 emit_byte((offs - short_size) & 0xFF);
6403 6403 } else {
6404 6404 // 0000 1111 1000 tttn #32-bit disp
6405 6405 emit_byte(0x0F);
6406 6406 emit_byte(0x80 | cc);
6407 6407 emit_long(offs - long_size);
6408 6408 }
6409 6409 } else {
6410 6410 #ifdef ASSERT
6411 6411 warning("reversing conditional branch");
6412 6412 #endif /* ASSERT */
6413 6413 Label skip;
6414 6414 jccb(reverse[cc], skip);
6415 6415 lea(rscratch1, dst);
6416 6416 Assembler::jmp(rscratch1);
6417 6417 bind(skip);
6418 6418 }
6419 6419 }
6420 6420
6421 6421 void MacroAssembler::ldmxcsr(AddressLiteral src) {
6422 6422 if (reachable(src)) {
6423 6423 Assembler::ldmxcsr(as_Address(src));
6424 6424 } else {
6425 6425 lea(rscratch1, src);
6426 6426 Assembler::ldmxcsr(Address(rscratch1, 0));
6427 6427 }
6428 6428 }
6429 6429
6430 6430 int MacroAssembler::load_signed_byte(Register dst, Address src) {
6431 6431 int off;
6432 6432 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6433 6433 off = offset();
6434 6434 movsbl(dst, src); // movsxb
6435 6435 } else {
6436 6436 off = load_unsigned_byte(dst, src);
6437 6437 shll(dst, 24);
6438 6438 sarl(dst, 24);
6439 6439 }
6440 6440 return off;
6441 6441 }
6442 6442
6443 6443 // Note: load_signed_short used to be called load_signed_word.
6444 6444 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler
6445 6445 // manual, which means 16 bits, that usage is found nowhere in HotSpot code.
6446 6446 // The term "word" in HotSpot means a 32- or 64-bit machine word.
6447 6447 int MacroAssembler::load_signed_short(Register dst, Address src) {
6448 6448 int off;
6449 6449 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6450 6450 // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6451 6451 // version but this is what 64bit has always done. This seems to imply
6452 6452 // that users are only using 32bits worth.
6453 6453 off = offset();
6454 6454 movswl(dst, src); // movsxw
6455 6455 } else {
6456 6456 off = load_unsigned_short(dst, src);
6457 6457 shll(dst, 16);
6458 6458 sarl(dst, 16);
6459 6459 }
6460 6460 return off;
6461 6461 }
6462 6462
6463 6463 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6464 6464 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6465 6465 // and "3.9 Partial Register Penalties", p. 22).
6466 6466 int off;
6467 6467 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6468 6468 off = offset();
6469 6469 movzbl(dst, src); // movzxb
6470 6470 } else {
6471 6471 xorl(dst, dst);
6472 6472 off = offset();
6473 6473 movb(dst, src);
6474 6474 }
6475 6475 return off;
6476 6476 }
6477 6477
6478 6478 // Note: load_unsigned_short used to be called load_unsigned_word.
6479 6479 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
6480 6480 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6481 6481 // and "3.9 Partial Register Penalties", p. 22).
6482 6482 int off;
6483 6483 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6484 6484 off = offset();
6485 6485 movzwl(dst, src); // movzxw
↓ open down ↓ |
6473 lines elided |
↑ open up ↑ |
6486 6486 } else {
6487 6487 xorl(dst, dst);
6488 6488 off = offset();
6489 6489 movw(dst, src);
6490 6490 }
6491 6491 return off;
6492 6492 }
6493 6493
6494 6494 void MacroAssembler::load_sized_value(Register dst, Address src,
6495 6495 int size_in_bytes, bool is_signed) {
6496 - switch (size_in_bytes ^ (is_signed ? -1 : 0)) {
6496 + switch (size_in_bytes) {
6497 6497 #ifndef _LP64
6498 6498 // For case 8, caller is responsible for manually loading
6499 6499 // the second word into another register.
6500 - case ~8: // fall through:
6501 - case 8: movl( dst, src ); break;
6500 + case 8: movl(dst, src); break;
6502 6501 #else
6503 - case ~8: // fall through:
6504 - case 8: movq( dst, src ); break;
6502 + case 8: movq(dst, src); break;
6505 6503 #endif
6506 - case ~4: // fall through:
6507 - case 4: movl( dst, src ); break;
6508 - case ~2: load_signed_short( dst, src ); break;
6509 - case 2: load_unsigned_short( dst, src ); break;
6510 - case ~1: load_signed_byte( dst, src ); break;
6511 - case 1: load_unsigned_byte( dst, src ); break;
6512 - default: ShouldNotReachHere();
6504 + case 4: movl(dst, src); break;
6505 + case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
6506 + case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
6507 + default: ShouldNotReachHere();
6513 6508 }
6514 6509 }
6515 6510
6516 6511 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6517 6512 if (reachable(dst)) {
6518 6513 movl(as_Address(dst), src);
6519 6514 } else {
6520 6515 lea(rscratch1, dst);
6521 6516 movl(Address(rscratch1, 0), src);
6522 6517 }
6523 6518 }
6524 6519
6525 6520 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6526 6521 if (reachable(src)) {
6527 6522 movl(dst, as_Address(src));
6528 6523 } else {
6529 6524 lea(rscratch1, src);
6530 6525 movl(dst, Address(rscratch1, 0));
6531 6526 }
6532 6527 }
6533 6528
6534 6529 // C++ bool manipulation
6535 6530
6536 6531 void MacroAssembler::movbool(Register dst, Address src) {
6537 6532 if(sizeof(bool) == 1)
6538 6533 movb(dst, src);
6539 6534 else if(sizeof(bool) == 2)
6540 6535 movw(dst, src);
6541 6536 else if(sizeof(bool) == 4)
6542 6537 movl(dst, src);
6543 6538 else
6544 6539 // unsupported
6545 6540 ShouldNotReachHere();
6546 6541 }
6547 6542
6548 6543 void MacroAssembler::movbool(Address dst, bool boolconst) {
6549 6544 if(sizeof(bool) == 1)
6550 6545 movb(dst, (int) boolconst);
6551 6546 else if(sizeof(bool) == 2)
6552 6547 movw(dst, (int) boolconst);
6553 6548 else if(sizeof(bool) == 4)
6554 6549 movl(dst, (int) boolconst);
6555 6550 else
6556 6551 // unsupported
6557 6552 ShouldNotReachHere();
6558 6553 }
6559 6554
6560 6555 void MacroAssembler::movbool(Address dst, Register src) {
6561 6556 if(sizeof(bool) == 1)
6562 6557 movb(dst, src);
6563 6558 else if(sizeof(bool) == 2)
6564 6559 movw(dst, src);
6565 6560 else if(sizeof(bool) == 4)
6566 6561 movl(dst, src);
6567 6562 else
6568 6563 // unsupported
6569 6564 ShouldNotReachHere();
6570 6565 }
6571 6566
6572 6567 void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6573 6568 movb(as_Address(dst), src);
6574 6569 }
6575 6570
6576 6571 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6577 6572 if (reachable(src)) {
6578 6573 if (UseXmmLoadAndClearUpper) {
6579 6574 movsd (dst, as_Address(src));
6580 6575 } else {
6581 6576 movlpd(dst, as_Address(src));
6582 6577 }
6583 6578 } else {
6584 6579 lea(rscratch1, src);
6585 6580 if (UseXmmLoadAndClearUpper) {
6586 6581 movsd (dst, Address(rscratch1, 0));
6587 6582 } else {
6588 6583 movlpd(dst, Address(rscratch1, 0));
6589 6584 }
6590 6585 }
6591 6586 }
6592 6587
6593 6588 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6594 6589 if (reachable(src)) {
6595 6590 movss(dst, as_Address(src));
6596 6591 } else {
6597 6592 lea(rscratch1, src);
6598 6593 movss(dst, Address(rscratch1, 0));
6599 6594 }
6600 6595 }
6601 6596
6602 6597 void MacroAssembler::movptr(Register dst, Register src) {
6603 6598 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6604 6599 }
6605 6600
6606 6601 void MacroAssembler::movptr(Register dst, Address src) {
6607 6602 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6608 6603 }
6609 6604
6610 6605 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
6611 6606 void MacroAssembler::movptr(Register dst, intptr_t src) {
6612 6607 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6613 6608 }
6614 6609
6615 6610 void MacroAssembler::movptr(Address dst, Register src) {
6616 6611 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6617 6612 }
6618 6613
6619 6614 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6620 6615 if (reachable(src)) {
6621 6616 movss(dst, as_Address(src));
6622 6617 } else {
6623 6618 lea(rscratch1, src);
6624 6619 movss(dst, Address(rscratch1, 0));
6625 6620 }
6626 6621 }
6627 6622
6628 6623 void MacroAssembler::null_check(Register reg, int offset) {
6629 6624 if (needs_explicit_null_check(offset)) {
6630 6625 // provoke OS NULL exception if reg = NULL by
6631 6626 // accessing M[reg] w/o changing any (non-CC) registers
6632 6627 // NOTE: cmpl is plenty here to provoke a segv
6633 6628 cmpptr(rax, Address(reg, 0));
6634 6629 // Note: should probably use testl(rax, Address(reg, 0));
6635 6630 // may be shorter code (however, this version of
6636 6631 // testl needs to be implemented first)
6637 6632 } else {
6638 6633 // nothing to do, (later) access of M[reg + offset]
6639 6634 // will provoke OS NULL exception if reg = NULL
6640 6635 }
6641 6636 }
6642 6637
6643 6638 void MacroAssembler::os_breakpoint() {
6644 6639 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6645 6640 // (e.g., MSVC can't call ps() otherwise)
6646 6641 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6647 6642 }
6648 6643
6649 6644 void MacroAssembler::pop_CPU_state() {
6650 6645 pop_FPU_state();
6651 6646 pop_IU_state();
6652 6647 }
6653 6648
6654 6649 void MacroAssembler::pop_FPU_state() {
6655 6650 NOT_LP64(frstor(Address(rsp, 0));)
6656 6651 LP64_ONLY(fxrstor(Address(rsp, 0));)
6657 6652 addptr(rsp, FPUStateSizeInWords * wordSize);
6658 6653 }
6659 6654
6660 6655 void MacroAssembler::pop_IU_state() {
6661 6656 popa();
6662 6657 LP64_ONLY(addq(rsp, 8));
6663 6658 popf();
6664 6659 }
6665 6660
6666 6661 // Save Integer and Float state
6667 6662 // Warning: Stack must be 16 byte aligned (64bit)
6668 6663 void MacroAssembler::push_CPU_state() {
6669 6664 push_IU_state();
6670 6665 push_FPU_state();
6671 6666 }
6672 6667
6673 6668 void MacroAssembler::push_FPU_state() {
6674 6669 subptr(rsp, FPUStateSizeInWords * wordSize);
6675 6670 #ifndef _LP64
6676 6671 fnsave(Address(rsp, 0));
6677 6672 fwait();
6678 6673 #else
6679 6674 fxsave(Address(rsp, 0));
6680 6675 #endif // LP64
6681 6676 }
6682 6677
6683 6678 void MacroAssembler::push_IU_state() {
6684 6679 // Push flags first because pusha kills them
6685 6680 pushf();
6686 6681 // Make sure rsp stays 16-byte aligned
6687 6682 LP64_ONLY(subq(rsp, 8));
6688 6683 pusha();
6689 6684 }
6690 6685
6691 6686 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6692 6687 // determine java_thread register
6693 6688 if (!java_thread->is_valid()) {
6694 6689 java_thread = rdi;
6695 6690 get_thread(java_thread);
6696 6691 }
6697 6692 // we must set sp to zero to clear frame
6698 6693 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
6699 6694 if (clear_fp) {
6700 6695 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
6701 6696 }
6702 6697
6703 6698 if (clear_pc)
6704 6699 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
6705 6700
6706 6701 }
6707 6702
6708 6703 void MacroAssembler::restore_rax(Register tmp) {
6709 6704 if (tmp == noreg) pop(rax);
6710 6705 else if (tmp != rax) mov(rax, tmp);
6711 6706 }
6712 6707
6713 6708 void MacroAssembler::round_to(Register reg, int modulus) {
6714 6709 addptr(reg, modulus - 1);
6715 6710 andptr(reg, -modulus);
6716 6711 }
6717 6712
6718 6713 void MacroAssembler::save_rax(Register tmp) {
6719 6714 if (tmp == noreg) push(rax);
6720 6715 else if (tmp != rax) mov(tmp, rax);
6721 6716 }
6722 6717
6723 6718 // Write serialization page so VM thread can do a pseudo remote membar.
6724 6719 // We use the current thread pointer to calculate a thread specific
6725 6720 // offset to write to within the page. This minimizes bus traffic
6726 6721 // due to cache line collision.
6727 6722 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6728 6723 movl(tmp, thread);
6729 6724 shrl(tmp, os::get_serialize_page_shift_count());
6730 6725 andl(tmp, (os::vm_page_size() - sizeof(int)));
6731 6726
6732 6727 Address index(noreg, tmp, Address::times_1);
6733 6728 ExternalAddress page(os::get_memory_serialize_page());
6734 6729
6735 6730 // Size of store must match masking code above
6736 6731 movl(as_Address(ArrayAddress(page, index)), tmp);
6737 6732 }
6738 6733
6739 6734 // Calls to C land
6740 6735 //
6741 6736 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6742 6737 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6743 6738 // has to be reset to 0. This is required to allow proper stack traversal.
6744 6739 void MacroAssembler::set_last_Java_frame(Register java_thread,
6745 6740 Register last_java_sp,
6746 6741 Register last_java_fp,
6747 6742 address last_java_pc) {
6748 6743 // determine java_thread register
6749 6744 if (!java_thread->is_valid()) {
6750 6745 java_thread = rdi;
6751 6746 get_thread(java_thread);
6752 6747 }
6753 6748 // determine last_java_sp register
6754 6749 if (!last_java_sp->is_valid()) {
6755 6750 last_java_sp = rsp;
6756 6751 }
6757 6752
6758 6753 // last_java_fp is optional
6759 6754
6760 6755 if (last_java_fp->is_valid()) {
6761 6756 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6762 6757 }
6763 6758
6764 6759 // last_java_pc is optional
6765 6760
6766 6761 if (last_java_pc != NULL) {
6767 6762 lea(Address(java_thread,
6768 6763 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6769 6764 InternalAddress(last_java_pc));
6770 6765
6771 6766 }
6772 6767 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6773 6768 }
6774 6769
6775 6770 void MacroAssembler::shlptr(Register dst, int imm8) {
6776 6771 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6777 6772 }
6778 6773
6779 6774 void MacroAssembler::shrptr(Register dst, int imm8) {
6780 6775 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6781 6776 }
6782 6777
6783 6778 void MacroAssembler::sign_extend_byte(Register reg) {
6784 6779 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6785 6780 movsbl(reg, reg); // movsxb
6786 6781 } else {
6787 6782 shll(reg, 24);
6788 6783 sarl(reg, 24);
6789 6784 }
6790 6785 }
6791 6786
6792 6787 void MacroAssembler::sign_extend_short(Register reg) {
6793 6788 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6794 6789 movswl(reg, reg); // movsxw
6795 6790 } else {
6796 6791 shll(reg, 16);
6797 6792 sarl(reg, 16);
6798 6793 }
6799 6794 }
6800 6795
6801 6796 //////////////////////////////////////////////////////////////////////////////////
6802 6797 #ifndef SERIALGC
6803 6798
6804 6799 void MacroAssembler::g1_write_barrier_pre(Register obj,
6805 6800 #ifndef _LP64
6806 6801 Register thread,
6807 6802 #endif
6808 6803 Register tmp,
6809 6804 Register tmp2,
6810 6805 bool tosca_live) {
6811 6806 LP64_ONLY(Register thread = r15_thread;)
6812 6807 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6813 6808 PtrQueue::byte_offset_of_active()));
6814 6809
6815 6810 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6816 6811 PtrQueue::byte_offset_of_index()));
6817 6812 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6818 6813 PtrQueue::byte_offset_of_buf()));
6819 6814
6820 6815
6821 6816 Label done;
6822 6817 Label runtime;
6823 6818
6824 6819 // if (!marking_in_progress) goto done;
6825 6820 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
6826 6821 cmpl(in_progress, 0);
6827 6822 } else {
6828 6823 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
6829 6824 cmpb(in_progress, 0);
6830 6825 }
6831 6826 jcc(Assembler::equal, done);
6832 6827
6833 6828 // if (x.f == NULL) goto done;
6834 6829 #ifdef _LP64
6835 6830 load_heap_oop(tmp2, Address(obj, 0));
6836 6831 #else
6837 6832 movptr(tmp2, Address(obj, 0));
6838 6833 #endif
6839 6834 cmpptr(tmp2, (int32_t) NULL_WORD);
6840 6835 jcc(Assembler::equal, done);
6841 6836
6842 6837 // Can we store original value in the thread's buffer?
6843 6838
6844 6839 #ifdef _LP64
6845 6840 movslq(tmp, index);
6846 6841 cmpq(tmp, 0);
6847 6842 #else
6848 6843 cmpl(index, 0);
6849 6844 #endif
6850 6845 jcc(Assembler::equal, runtime);
6851 6846 #ifdef _LP64
6852 6847 subq(tmp, wordSize);
6853 6848 movl(index, tmp);
6854 6849 addq(tmp, buffer);
6855 6850 #else
6856 6851 subl(index, wordSize);
6857 6852 movl(tmp, buffer);
6858 6853 addl(tmp, index);
6859 6854 #endif
6860 6855 movptr(Address(tmp, 0), tmp2);
6861 6856 jmp(done);
6862 6857 bind(runtime);
6863 6858 // save the live input values
6864 6859 if(tosca_live) push(rax);
6865 6860 push(obj);
6866 6861 #ifdef _LP64
6867 6862 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread);
6868 6863 #else
6869 6864 push(thread);
6870 6865 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
6871 6866 pop(thread);
6872 6867 #endif
6873 6868 pop(obj);
6874 6869 if(tosca_live) pop(rax);
6875 6870 bind(done);
6876 6871
6877 6872 }
6878 6873
6879 6874 void MacroAssembler::g1_write_barrier_post(Register store_addr,
6880 6875 Register new_val,
6881 6876 #ifndef _LP64
6882 6877 Register thread,
6883 6878 #endif
6884 6879 Register tmp,
6885 6880 Register tmp2) {
6886 6881
6887 6882 LP64_ONLY(Register thread = r15_thread;)
6888 6883 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6889 6884 PtrQueue::byte_offset_of_index()));
6890 6885 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6891 6886 PtrQueue::byte_offset_of_buf()));
6892 6887 BarrierSet* bs = Universe::heap()->barrier_set();
6893 6888 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6894 6889 Label done;
6895 6890 Label runtime;
6896 6891
6897 6892 // Does store cross heap regions?
6898 6893
6899 6894 movptr(tmp, store_addr);
6900 6895 xorptr(tmp, new_val);
6901 6896 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
6902 6897 jcc(Assembler::equal, done);
6903 6898
6904 6899 // crosses regions, storing NULL?
6905 6900
6906 6901 cmpptr(new_val, (int32_t) NULL_WORD);
6907 6902 jcc(Assembler::equal, done);
6908 6903
6909 6904 // storing region crossing non-NULL, is card already dirty?
6910 6905
6911 6906 ExternalAddress cardtable((address) ct->byte_map_base);
6912 6907 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
6913 6908 #ifdef _LP64
6914 6909 const Register card_addr = tmp;
6915 6910
6916 6911 movq(card_addr, store_addr);
6917 6912 shrq(card_addr, CardTableModRefBS::card_shift);
6918 6913
6919 6914 lea(tmp2, cardtable);
6920 6915
6921 6916 // get the address of the card
6922 6917 addq(card_addr, tmp2);
6923 6918 #else
6924 6919 const Register card_index = tmp;
6925 6920
6926 6921 movl(card_index, store_addr);
6927 6922 shrl(card_index, CardTableModRefBS::card_shift);
6928 6923
6929 6924 Address index(noreg, card_index, Address::times_1);
6930 6925 const Register card_addr = tmp;
6931 6926 lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
6932 6927 #endif
6933 6928 cmpb(Address(card_addr, 0), 0);
6934 6929 jcc(Assembler::equal, done);
6935 6930
6936 6931 // storing a region crossing, non-NULL oop, card is clean.
6937 6932 // dirty card and log.
6938 6933
6939 6934 movb(Address(card_addr, 0), 0);
6940 6935
6941 6936 cmpl(queue_index, 0);
6942 6937 jcc(Assembler::equal, runtime);
6943 6938 subl(queue_index, wordSize);
6944 6939 movptr(tmp2, buffer);
6945 6940 #ifdef _LP64
6946 6941 movslq(rscratch1, queue_index);
6947 6942 addq(tmp2, rscratch1);
6948 6943 movq(Address(tmp2, 0), card_addr);
6949 6944 #else
6950 6945 addl(tmp2, queue_index);
6951 6946 movl(Address(tmp2, 0), card_index);
6952 6947 #endif
6953 6948 jmp(done);
6954 6949
6955 6950 bind(runtime);
6956 6951 // save the live input values
6957 6952 push(store_addr);
6958 6953 push(new_val);
6959 6954 #ifdef _LP64
6960 6955 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
6961 6956 #else
6962 6957 push(thread);
6963 6958 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
6964 6959 pop(thread);
6965 6960 #endif
6966 6961 pop(new_val);
6967 6962 pop(store_addr);
6968 6963
6969 6964 bind(done);
6970 6965
6971 6966 }
6972 6967
6973 6968 #endif // SERIALGC
6974 6969 //////////////////////////////////////////////////////////////////////////////////
6975 6970
6976 6971
6977 6972 void MacroAssembler::store_check(Register obj) {
6978 6973 // Does a store check for the oop in register obj. The content of
6979 6974 // register obj is destroyed afterwards.
6980 6975 store_check_part_1(obj);
6981 6976 store_check_part_2(obj);
6982 6977 }
6983 6978
6984 6979 void MacroAssembler::store_check(Register obj, Address dst) {
6985 6980 store_check(obj);
6986 6981 }
6987 6982
6988 6983
6989 6984 // split the store check operation so that other instructions can be scheduled inbetween
6990 6985 void MacroAssembler::store_check_part_1(Register obj) {
6991 6986 BarrierSet* bs = Universe::heap()->barrier_set();
6992 6987 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6993 6988 shrptr(obj, CardTableModRefBS::card_shift);
6994 6989 }
6995 6990
6996 6991 void MacroAssembler::store_check_part_2(Register obj) {
6997 6992 BarrierSet* bs = Universe::heap()->barrier_set();
6998 6993 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
6999 6994 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
7000 6995 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
7001 6996
7002 6997 // The calculation for byte_map_base is as follows:
7003 6998 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
7004 6999 // So this essentially converts an address to a displacement and
7005 7000 // it will never need to be relocated. On 64bit however the value may be too
7006 7001 // large for a 32bit displacement
7007 7002
7008 7003 intptr_t disp = (intptr_t) ct->byte_map_base;
7009 7004 if (is_simm32(disp)) {
7010 7005 Address cardtable(noreg, obj, Address::times_1, disp);
7011 7006 movb(cardtable, 0);
7012 7007 } else {
7013 7008 // By doing it as an ExternalAddress disp could be converted to a rip-relative
7014 7009 // displacement and done in a single instruction given favorable mapping and
7015 7010 // a smarter version of as_Address. Worst case it is two instructions which
7016 7011 // is no worse off then loading disp into a register and doing as a simple
7017 7012 // Address() as above.
7018 7013 // We can't do as ExternalAddress as the only style since if disp == 0 we'll
7019 7014 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
7020 7015 // in some cases we'll get a single instruction version.
7021 7016
7022 7017 ExternalAddress cardtable((address)disp);
7023 7018 Address index(noreg, obj, Address::times_1);
7024 7019 movb(as_Address(ArrayAddress(cardtable, index)), 0);
7025 7020 }
7026 7021 }
7027 7022
7028 7023 void MacroAssembler::subptr(Register dst, int32_t imm32) {
7029 7024 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
7030 7025 }
7031 7026
7032 7027 void MacroAssembler::subptr(Register dst, Register src) {
7033 7028 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
7034 7029 }
7035 7030
7036 7031 void MacroAssembler::test32(Register src1, AddressLiteral src2) {
7037 7032 // src2 must be rval
7038 7033
7039 7034 if (reachable(src2)) {
7040 7035 testl(src1, as_Address(src2));
7041 7036 } else {
7042 7037 lea(rscratch1, src2);
7043 7038 testl(src1, Address(rscratch1, 0));
7044 7039 }
7045 7040 }
7046 7041
7047 7042 // C++ bool manipulation
7048 7043 void MacroAssembler::testbool(Register dst) {
7049 7044 if(sizeof(bool) == 1)
7050 7045 testb(dst, 0xff);
7051 7046 else if(sizeof(bool) == 2) {
7052 7047 // testw implementation needed for two byte bools
7053 7048 ShouldNotReachHere();
7054 7049 } else if(sizeof(bool) == 4)
7055 7050 testl(dst, dst);
7056 7051 else
7057 7052 // unsupported
7058 7053 ShouldNotReachHere();
7059 7054 }
7060 7055
7061 7056 void MacroAssembler::testptr(Register dst, Register src) {
7062 7057 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
7063 7058 }
7064 7059
7065 7060 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
7066 7061 void MacroAssembler::tlab_allocate(Register obj,
7067 7062 Register var_size_in_bytes,
7068 7063 int con_size_in_bytes,
7069 7064 Register t1,
7070 7065 Register t2,
7071 7066 Label& slow_case) {
7072 7067 assert_different_registers(obj, t1, t2);
7073 7068 assert_different_registers(obj, var_size_in_bytes, t1);
7074 7069 Register end = t2;
7075 7070 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
7076 7071
7077 7072 verify_tlab();
7078 7073
7079 7074 NOT_LP64(get_thread(thread));
7080 7075
7081 7076 movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
7082 7077 if (var_size_in_bytes == noreg) {
7083 7078 lea(end, Address(obj, con_size_in_bytes));
7084 7079 } else {
7085 7080 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
7086 7081 }
7087 7082 cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
7088 7083 jcc(Assembler::above, slow_case);
7089 7084
7090 7085 // update the tlab top pointer
7091 7086 movptr(Address(thread, JavaThread::tlab_top_offset()), end);
7092 7087
7093 7088 // recover var_size_in_bytes if necessary
7094 7089 if (var_size_in_bytes == end) {
7095 7090 subptr(var_size_in_bytes, obj);
7096 7091 }
7097 7092 verify_tlab();
7098 7093 }
7099 7094
7100 7095 // Preserves rbx, and rdx.
7101 7096 void MacroAssembler::tlab_refill(Label& retry,
7102 7097 Label& try_eden,
7103 7098 Label& slow_case) {
7104 7099 Register top = rax;
7105 7100 Register t1 = rcx;
7106 7101 Register t2 = rsi;
7107 7102 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
7108 7103 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
7109 7104 Label do_refill, discard_tlab;
7110 7105
7111 7106 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
7112 7107 // No allocation in the shared eden.
7113 7108 jmp(slow_case);
7114 7109 }
7115 7110
7116 7111 NOT_LP64(get_thread(thread_reg));
7117 7112
7118 7113 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7119 7114 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7120 7115
7121 7116 // calculate amount of free space
7122 7117 subptr(t1, top);
7123 7118 shrptr(t1, LogHeapWordSize);
7124 7119
7125 7120 // Retain tlab and allocate object in shared space if
7126 7121 // the amount free in the tlab is too large to discard.
7127 7122 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
7128 7123 jcc(Assembler::lessEqual, discard_tlab);
7129 7124
7130 7125 // Retain
7131 7126 // %%% yuck as movptr...
7132 7127 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
7133 7128 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
7134 7129 if (TLABStats) {
7135 7130 // increment number of slow_allocations
7136 7131 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
7137 7132 }
7138 7133 jmp(try_eden);
7139 7134
7140 7135 bind(discard_tlab);
7141 7136 if (TLABStats) {
7142 7137 // increment number of refills
7143 7138 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
7144 7139 // accumulate wastage -- t1 is amount free in tlab
7145 7140 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
7146 7141 }
7147 7142
7148 7143 // if tlab is currently allocated (top or end != null) then
7149 7144 // fill [top, end + alignment_reserve) with array object
7150 7145 testptr (top, top);
7151 7146 jcc(Assembler::zero, do_refill);
7152 7147
7153 7148 // set up the mark word
7154 7149 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
7155 7150 // set the length to the remaining space
7156 7151 subptr(t1, typeArrayOopDesc::header_size(T_INT));
7157 7152 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
7158 7153 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
7159 7154 movptr(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
7160 7155 // set klass to intArrayKlass
7161 7156 // dubious reloc why not an oop reloc?
7162 7157 movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
7163 7158 // store klass last. concurrent gcs assumes klass length is valid if
7164 7159 // klass field is not null.
7165 7160 store_klass(top, t1);
7166 7161
7167 7162 // refill the tlab with an eden allocation
7168 7163 bind(do_refill);
7169 7164 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7170 7165 shlptr(t1, LogHeapWordSize);
7171 7166 // add object_size ??
7172 7167 eden_allocate(top, t1, 0, t2, slow_case);
7173 7168
7174 7169 // Check that t1 was preserved in eden_allocate.
7175 7170 #ifdef ASSERT
7176 7171 if (UseTLAB) {
7177 7172 Label ok;
7178 7173 Register tsize = rsi;
7179 7174 assert_different_registers(tsize, thread_reg, t1);
7180 7175 push(tsize);
7181 7176 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7182 7177 shlptr(tsize, LogHeapWordSize);
7183 7178 cmpptr(t1, tsize);
7184 7179 jcc(Assembler::equal, ok);
7185 7180 stop("assert(t1 != tlab size)");
7186 7181 should_not_reach_here();
7187 7182
7188 7183 bind(ok);
7189 7184 pop(tsize);
7190 7185 }
7191 7186 #endif
7192 7187 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
7193 7188 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
7194 7189 addptr(top, t1);
7195 7190 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
7196 7191 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
7197 7192 verify_tlab();
7198 7193 jmp(retry);
7199 7194 }
7200 7195
7201 7196 static const double pi_4 = 0.7853981633974483;
7202 7197
7203 7198 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
7204 7199 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
7205 7200 // was attempted in this code; unfortunately it appears that the
7206 7201 // switch to 80-bit precision and back causes this to be
7207 7202 // unprofitable compared with simply performing a runtime call if
7208 7203 // the argument is out of the (-pi/4, pi/4) range.
7209 7204
7210 7205 Register tmp = noreg;
7211 7206 if (!VM_Version::supports_cmov()) {
7212 7207 // fcmp needs a temporary so preserve rbx,
7213 7208 tmp = rbx;
7214 7209 push(tmp);
7215 7210 }
7216 7211
7217 7212 Label slow_case, done;
7218 7213
7219 7214 ExternalAddress pi4_adr = (address)&pi_4;
7220 7215 if (reachable(pi4_adr)) {
7221 7216 // x ?<= pi/4
7222 7217 fld_d(pi4_adr);
7223 7218 fld_s(1); // Stack: X PI/4 X
7224 7219 fabs(); // Stack: |X| PI/4 X
7225 7220 fcmp(tmp);
7226 7221 jcc(Assembler::above, slow_case);
7227 7222
7228 7223 // fastest case: -pi/4 <= x <= pi/4
7229 7224 switch(trig) {
7230 7225 case 's':
7231 7226 fsin();
7232 7227 break;
7233 7228 case 'c':
7234 7229 fcos();
7235 7230 break;
7236 7231 case 't':
7237 7232 ftan();
7238 7233 break;
7239 7234 default:
7240 7235 assert(false, "bad intrinsic");
7241 7236 break;
7242 7237 }
7243 7238 jmp(done);
7244 7239 }
7245 7240
7246 7241 // slow case: runtime call
7247 7242 bind(slow_case);
7248 7243 // Preserve registers across runtime call
7249 7244 pusha();
7250 7245 int incoming_argument_and_return_value_offset = -1;
7251 7246 if (num_fpu_regs_in_use > 1) {
7252 7247 // Must preserve all other FPU regs (could alternatively convert
7253 7248 // SharedRuntime::dsin and dcos into assembly routines known not to trash
7254 7249 // FPU state, but can not trust C compiler)
7255 7250 NEEDS_CLEANUP;
7256 7251 // NOTE that in this case we also push the incoming argument to
7257 7252 // the stack and restore it later; we also use this stack slot to
7258 7253 // hold the return value from dsin or dcos.
7259 7254 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7260 7255 subptr(rsp, sizeof(jdouble));
7261 7256 fstp_d(Address(rsp, 0));
7262 7257 }
7263 7258 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
7264 7259 fld_d(Address(rsp, incoming_argument_and_return_value_offset));
7265 7260 }
7266 7261 subptr(rsp, sizeof(jdouble));
7267 7262 fstp_d(Address(rsp, 0));
7268 7263 #ifdef _LP64
7269 7264 movdbl(xmm0, Address(rsp, 0));
7270 7265 #endif // _LP64
7271 7266
7272 7267 // NOTE: we must not use call_VM_leaf here because that requires a
7273 7268 // complete interpreter frame in debug mode -- same bug as 4387334
7274 7269 // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7275 7270 // do proper 64bit abi
7276 7271
7277 7272 NEEDS_CLEANUP;
7278 7273 // Need to add stack banging before this runtime call if it needs to
7279 7274 // be taken; however, there is no generic stack banging routine at
7280 7275 // the MacroAssembler level
7281 7276 switch(trig) {
7282 7277 case 's':
7283 7278 {
7284 7279 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7285 7280 }
7286 7281 break;
7287 7282 case 'c':
7288 7283 {
7289 7284 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7290 7285 }
7291 7286 break;
7292 7287 case 't':
7293 7288 {
7294 7289 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7295 7290 }
7296 7291 break;
7297 7292 default:
7298 7293 assert(false, "bad intrinsic");
7299 7294 break;
7300 7295 }
7301 7296 #ifdef _LP64
7302 7297 movsd(Address(rsp, 0), xmm0);
7303 7298 fld_d(Address(rsp, 0));
7304 7299 #endif // _LP64
7305 7300 addptr(rsp, sizeof(jdouble));
7306 7301 if (num_fpu_regs_in_use > 1) {
7307 7302 // Must save return value to stack and then restore entire FPU stack
7308 7303 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7309 7304 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7310 7305 fld_d(Address(rsp, 0));
7311 7306 addptr(rsp, sizeof(jdouble));
7312 7307 }
7313 7308 }
7314 7309 popa();
7315 7310
7316 7311 // Come here with result in F-TOS
7317 7312 bind(done);
7318 7313
7319 7314 if (tmp != noreg) {
7320 7315 pop(tmp);
7321 7316 }
7322 7317 }
7323 7318
7324 7319
7325 7320 // Look up the method for a megamorphic invokeinterface call.
7326 7321 // The target method is determined by <intf_klass, itable_index>.
7327 7322 // The receiver klass is in recv_klass.
7328 7323 // On success, the result will be in method_result, and execution falls through.
7329 7324 // On failure, execution transfers to the given label.
7330 7325 void MacroAssembler::lookup_interface_method(Register recv_klass,
7331 7326 Register intf_klass,
7332 7327 RegisterOrConstant itable_index,
7333 7328 Register method_result,
7334 7329 Register scan_temp,
7335 7330 Label& L_no_such_interface) {
7336 7331 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
7337 7332 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
7338 7333 "caller must use same register for non-constant itable index as for method");
7339 7334
7340 7335 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
7341 7336 int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
7342 7337 int itentry_off = itableMethodEntry::method_offset_in_bytes();
7343 7338 int scan_step = itableOffsetEntry::size() * wordSize;
7344 7339 int vte_size = vtableEntry::size() * wordSize;
7345 7340 Address::ScaleFactor times_vte_scale = Address::times_ptr;
7346 7341 assert(vte_size == wordSize, "else adjust times_vte_scale");
7347 7342
7348 7343 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
7349 7344
7350 7345 // %%% Could store the aligned, prescaled offset in the klassoop.
7351 7346 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
7352 7347 if (HeapWordsPerLong > 1) {
7353 7348 // Round up to align_object_offset boundary
7354 7349 // see code for instanceKlass::start_of_itable!
7355 7350 round_to(scan_temp, BytesPerLong);
7356 7351 }
7357 7352
7358 7353 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
7359 7354 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
7360 7355 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
7361 7356
7362 7357 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
7363 7358 // if (scan->interface() == intf) {
7364 7359 // result = (klass + scan->offset() + itable_index);
7365 7360 // }
7366 7361 // }
7367 7362 Label search, found_method;
7368 7363
7369 7364 for (int peel = 1; peel >= 0; peel--) {
7370 7365 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
7371 7366 cmpptr(intf_klass, method_result);
7372 7367
7373 7368 if (peel) {
7374 7369 jccb(Assembler::equal, found_method);
7375 7370 } else {
7376 7371 jccb(Assembler::notEqual, search);
7377 7372 // (invert the test to fall through to found_method...)
7378 7373 }
7379 7374
7380 7375 if (!peel) break;
7381 7376
7382 7377 bind(search);
7383 7378
7384 7379 // Check that the previous entry is non-null. A null entry means that
7385 7380 // the receiver class doesn't implement the interface, and wasn't the
7386 7381 // same as when the caller was compiled.
7387 7382 testptr(method_result, method_result);
7388 7383 jcc(Assembler::zero, L_no_such_interface);
7389 7384 addptr(scan_temp, scan_step);
7390 7385 }
7391 7386
7392 7387 bind(found_method);
7393 7388
7394 7389 // Got a hit.
7395 7390 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
7396 7391 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
7397 7392 }
7398 7393
7399 7394
7400 7395 void MacroAssembler::check_klass_subtype(Register sub_klass,
7401 7396 Register super_klass,
7402 7397 Register temp_reg,
7403 7398 Label& L_success) {
7404 7399 Label L_failure;
7405 7400 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
7406 7401 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
7407 7402 bind(L_failure);
7408 7403 }
7409 7404
7410 7405
7411 7406 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
7412 7407 Register super_klass,
7413 7408 Register temp_reg,
7414 7409 Label* L_success,
7415 7410 Label* L_failure,
7416 7411 Label* L_slow_path,
7417 7412 RegisterOrConstant super_check_offset) {
7418 7413 assert_different_registers(sub_klass, super_klass, temp_reg);
7419 7414 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
7420 7415 if (super_check_offset.is_register()) {
7421 7416 assert_different_registers(sub_klass, super_klass,
7422 7417 super_check_offset.as_register());
7423 7418 } else if (must_load_sco) {
7424 7419 assert(temp_reg != noreg, "supply either a temp or a register offset");
7425 7420 }
7426 7421
7427 7422 Label L_fallthrough;
7428 7423 int label_nulls = 0;
7429 7424 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
7430 7425 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
7431 7426 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
7432 7427 assert(label_nulls <= 1, "at most one NULL in the batch");
7433 7428
7434 7429 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7435 7430 Klass::secondary_super_cache_offset_in_bytes());
7436 7431 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
7437 7432 Klass::super_check_offset_offset_in_bytes());
7438 7433 Address super_check_offset_addr(super_klass, sco_offset);
7439 7434
7440 7435 // Hacked jcc, which "knows" that L_fallthrough, at least, is in
7441 7436 // range of a jccb. If this routine grows larger, reconsider at
7442 7437 // least some of these.
7443 7438 #define local_jcc(assembler_cond, label) \
7444 7439 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
7445 7440 else jcc( assembler_cond, label) /*omit semi*/
7446 7441
7447 7442 // Hacked jmp, which may only be used just before L_fallthrough.
7448 7443 #define final_jmp(label) \
7449 7444 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
7450 7445 else jmp(label) /*omit semi*/
7451 7446
7452 7447 // If the pointers are equal, we are done (e.g., String[] elements).
7453 7448 // This self-check enables sharing of secondary supertype arrays among
7454 7449 // non-primary types such as array-of-interface. Otherwise, each such
7455 7450 // type would need its own customized SSA.
7456 7451 // We move this check to the front of the fast path because many
7457 7452 // type checks are in fact trivially successful in this manner,
7458 7453 // so we get a nicely predicted branch right at the start of the check.
7459 7454 cmpptr(sub_klass, super_klass);
7460 7455 local_jcc(Assembler::equal, *L_success);
7461 7456
7462 7457 // Check the supertype display:
7463 7458 if (must_load_sco) {
7464 7459 // Positive movl does right thing on LP64.
7465 7460 movl(temp_reg, super_check_offset_addr);
7466 7461 super_check_offset = RegisterOrConstant(temp_reg);
7467 7462 }
7468 7463 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
7469 7464 cmpptr(super_klass, super_check_addr); // load displayed supertype
7470 7465
7471 7466 // This check has worked decisively for primary supers.
7472 7467 // Secondary supers are sought in the super_cache ('super_cache_addr').
7473 7468 // (Secondary supers are interfaces and very deeply nested subtypes.)
7474 7469 // This works in the same check above because of a tricky aliasing
7475 7470 // between the super_cache and the primary super display elements.
7476 7471 // (The 'super_check_addr' can address either, as the case requires.)
7477 7472 // Note that the cache is updated below if it does not help us find
7478 7473 // what we need immediately.
7479 7474 // So if it was a primary super, we can just fail immediately.
7480 7475 // Otherwise, it's the slow path for us (no success at this point).
7481 7476
7482 7477 if (super_check_offset.is_register()) {
7483 7478 local_jcc(Assembler::equal, *L_success);
7484 7479 cmpl(super_check_offset.as_register(), sc_offset);
7485 7480 if (L_failure == &L_fallthrough) {
7486 7481 local_jcc(Assembler::equal, *L_slow_path);
7487 7482 } else {
7488 7483 local_jcc(Assembler::notEqual, *L_failure);
7489 7484 final_jmp(*L_slow_path);
7490 7485 }
7491 7486 } else if (super_check_offset.as_constant() == sc_offset) {
7492 7487 // Need a slow path; fast failure is impossible.
7493 7488 if (L_slow_path == &L_fallthrough) {
7494 7489 local_jcc(Assembler::equal, *L_success);
7495 7490 } else {
7496 7491 local_jcc(Assembler::notEqual, *L_slow_path);
7497 7492 final_jmp(*L_success);
7498 7493 }
7499 7494 } else {
7500 7495 // No slow path; it's a fast decision.
7501 7496 if (L_failure == &L_fallthrough) {
7502 7497 local_jcc(Assembler::equal, *L_success);
7503 7498 } else {
7504 7499 local_jcc(Assembler::notEqual, *L_failure);
7505 7500 final_jmp(*L_success);
7506 7501 }
7507 7502 }
7508 7503
7509 7504 bind(L_fallthrough);
7510 7505
7511 7506 #undef local_jcc
7512 7507 #undef final_jmp
7513 7508 }
7514 7509
7515 7510
7516 7511 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
7517 7512 Register super_klass,
7518 7513 Register temp_reg,
7519 7514 Register temp2_reg,
7520 7515 Label* L_success,
7521 7516 Label* L_failure,
7522 7517 bool set_cond_codes) {
7523 7518 assert_different_registers(sub_klass, super_klass, temp_reg);
7524 7519 if (temp2_reg != noreg)
7525 7520 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
7526 7521 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
7527 7522
7528 7523 Label L_fallthrough;
7529 7524 int label_nulls = 0;
7530 7525 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
7531 7526 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
7532 7527 assert(label_nulls <= 1, "at most one NULL in the batch");
7533 7528
7534 7529 // a couple of useful fields in sub_klass:
7535 7530 int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
7536 7531 Klass::secondary_supers_offset_in_bytes());
7537 7532 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7538 7533 Klass::secondary_super_cache_offset_in_bytes());
7539 7534 Address secondary_supers_addr(sub_klass, ss_offset);
7540 7535 Address super_cache_addr( sub_klass, sc_offset);
7541 7536
7542 7537 // Do a linear scan of the secondary super-klass chain.
7543 7538 // This code is rarely used, so simplicity is a virtue here.
7544 7539 // The repne_scan instruction uses fixed registers, which we must spill.
7545 7540 // Don't worry too much about pre-existing connections with the input regs.
7546 7541
7547 7542 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
7548 7543 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
7549 7544
7550 7545 // Get super_klass value into rax (even if it was in rdi or rcx).
7551 7546 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
7552 7547 if (super_klass != rax || UseCompressedOops) {
7553 7548 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
7554 7549 mov(rax, super_klass);
7555 7550 }
7556 7551 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
7557 7552 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
7558 7553
7559 7554 #ifndef PRODUCT
7560 7555 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
7561 7556 ExternalAddress pst_counter_addr((address) pst_counter);
7562 7557 NOT_LP64( incrementl(pst_counter_addr) );
7563 7558 LP64_ONLY( lea(rcx, pst_counter_addr) );
7564 7559 LP64_ONLY( incrementl(Address(rcx, 0)) );
7565 7560 #endif //PRODUCT
7566 7561
7567 7562 // We will consult the secondary-super array.
7568 7563 movptr(rdi, secondary_supers_addr);
7569 7564 // Load the array length. (Positive movl does right thing on LP64.)
7570 7565 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
7571 7566 // Skip to start of data.
7572 7567 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
7573 7568
7574 7569 // Scan RCX words at [RDI] for an occurrence of RAX.
7575 7570 // Set NZ/Z based on last compare.
7576 7571 #ifdef _LP64
7577 7572 // This part is tricky, as values in supers array could be 32 or 64 bit wide
7578 7573 // and we store values in objArrays always encoded, thus we need to encode
7579 7574 // the value of rax before repne. Note that rax is dead after the repne.
7580 7575 if (UseCompressedOops) {
7581 7576 encode_heap_oop_not_null(rax);
7582 7577 // The superclass is never null; it would be a basic system error if a null
7583 7578 // pointer were to sneak in here. Note that we have already loaded the
7584 7579 // Klass::super_check_offset from the super_klass in the fast path,
7585 7580 // so if there is a null in that register, we are already in the afterlife.
7586 7581 repne_scanl();
7587 7582 } else
7588 7583 #endif // _LP64
7589 7584 repne_scan();
7590 7585
7591 7586 // Unspill the temp. registers:
7592 7587 if (pushed_rdi) pop(rdi);
7593 7588 if (pushed_rcx) pop(rcx);
7594 7589 if (pushed_rax) pop(rax);
7595 7590
7596 7591 if (set_cond_codes) {
7597 7592 // Special hack for the AD files: rdi is guaranteed non-zero.
7598 7593 assert(!pushed_rdi, "rdi must be left non-NULL");
7599 7594 // Also, the condition codes are properly set Z/NZ on succeed/failure.
7600 7595 }
7601 7596
7602 7597 if (L_failure == &L_fallthrough)
7603 7598 jccb(Assembler::notEqual, *L_failure);
7604 7599 else jcc(Assembler::notEqual, *L_failure);
7605 7600
7606 7601 // Success. Cache the super we found and proceed in triumph.
7607 7602 movptr(super_cache_addr, super_klass);
7608 7603
7609 7604 if (L_success != &L_fallthrough) {
7610 7605 jmp(*L_success);
7611 7606 }
7612 7607
7613 7608 #undef IS_A_TEMP
7614 7609
7615 7610 bind(L_fallthrough);
7616 7611 }
7617 7612
7618 7613
7619 7614 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7620 7615 ucomisd(dst, as_Address(src));
7621 7616 }
7622 7617
7623 7618 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7624 7619 ucomiss(dst, as_Address(src));
7625 7620 }
7626 7621
7627 7622 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7628 7623 if (reachable(src)) {
7629 7624 xorpd(dst, as_Address(src));
7630 7625 } else {
7631 7626 lea(rscratch1, src);
7632 7627 xorpd(dst, Address(rscratch1, 0));
7633 7628 }
7634 7629 }
7635 7630
7636 7631 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7637 7632 if (reachable(src)) {
7638 7633 xorps(dst, as_Address(src));
7639 7634 } else {
7640 7635 lea(rscratch1, src);
7641 7636 xorps(dst, Address(rscratch1, 0));
7642 7637 }
7643 7638 }
7644 7639
7645 7640 void MacroAssembler::verify_oop(Register reg, const char* s) {
7646 7641 if (!VerifyOops) return;
7647 7642
7648 7643 // Pass register number to verify_oop_subroutine
7649 7644 char* b = new char[strlen(s) + 50];
7650 7645 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7651 7646 push(rax); // save rax,
7652 7647 push(reg); // pass register argument
7653 7648 ExternalAddress buffer((address) b);
7654 7649 // avoid using pushptr, as it modifies scratch registers
7655 7650 // and our contract is not to modify anything
7656 7651 movptr(rax, buffer.addr());
7657 7652 push(rax);
7658 7653 // call indirectly to solve generation ordering problem
7659 7654 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7660 7655 call(rax);
7661 7656 }
7662 7657
7663 7658
7664 7659 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
7665 7660 Register tmp,
7666 7661 int offset) {
7667 7662 intptr_t value = *delayed_value_addr;
7668 7663 if (value != 0)
7669 7664 return RegisterOrConstant(value + offset);
7670 7665
7671 7666 // load indirectly to solve generation ordering problem
7672 7667 movptr(tmp, ExternalAddress((address) delayed_value_addr));
7673 7668
7674 7669 #ifdef ASSERT
7675 7670 Label L;
7676 7671 testptr(tmp, tmp);
7677 7672 jccb(Assembler::notZero, L);
7678 7673 hlt();
7679 7674 bind(L);
7680 7675 #endif
7681 7676
7682 7677 if (offset != 0)
7683 7678 addptr(tmp, offset);
7684 7679
7685 7680 return RegisterOrConstant(tmp);
7686 7681 }
7687 7682
7688 7683
7689 7684 // registers on entry:
7690 7685 // - rax ('check' register): required MethodType
7691 7686 // - rcx: method handle
7692 7687 // - rdx, rsi, or ?: killable temp
7693 7688 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
7694 7689 Register temp_reg,
7695 7690 Label& wrong_method_type) {
7696 7691 if (UseCompressedOops) unimplemented(); // field accesses must decode
7697 7692 // compare method type against that of the receiver
7698 7693 cmpptr(mtype_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg)));
↓ open down ↓ |
1176 lines elided |
↑ open up ↑ |
7699 7694 jcc(Assembler::notEqual, wrong_method_type);
7700 7695 }
7701 7696
7702 7697
7703 7698 // A method handle has a "vmslots" field which gives the size of its
7704 7699 // argument list in JVM stack slots. This field is either located directly
7705 7700 // in every method handle, or else is indirectly accessed through the
7706 7701 // method handle's MethodType. This macro hides the distinction.
7707 7702 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
7708 7703 Register temp_reg) {
7704 + assert_different_registers(vmslots_reg, mh_reg, temp_reg);
7709 7705 if (UseCompressedOops) unimplemented(); // field accesses must decode
7710 7706 // load mh.type.form.vmslots
7711 7707 if (java_dyn_MethodHandle::vmslots_offset_in_bytes() != 0) {
7712 7708 // hoist vmslots into every mh to avoid dependent load chain
7713 7709 movl(vmslots_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmslots_offset_in_bytes, temp_reg)));
7714 7710 } else {
7715 7711 Register temp2_reg = vmslots_reg;
7716 7712 movptr(temp2_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg)));
7717 7713 movptr(temp2_reg, Address(temp2_reg, delayed_value(java_dyn_MethodType::form_offset_in_bytes, temp_reg)));
7718 7714 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_dyn_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
7719 7715 }
7720 7716 }
7721 7717
7722 7718
7723 7719 // registers on entry:
7724 7720 // - rcx: method handle
7725 7721 // - rdx: killable temp (interpreted only)
7726 7722 // - rax: killable temp (compiled only)
7727 7723 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
7728 7724 assert(mh_reg == rcx, "caller must put MH object in rcx");
7729 7725 assert_different_registers(mh_reg, temp_reg);
7730 7726
7731 7727 if (UseCompressedOops) unimplemented(); // field accesses must decode
7732 7728
7733 7729 // pick out the interpreted side of the handler
7734 7730 movptr(temp_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
7735 7731
7736 7732 // off we go...
7737 7733 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
7738 7734
7739 7735 // for the various stubs which take control at this point,
7740 7736 // see MethodHandles::generate_method_handle_stub
7741 7737 }
7742 7738
7743 7739
7744 7740 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
7745 7741 int extra_slot_offset) {
7746 7742 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
7747 7743 int stackElementSize = Interpreter::stackElementSize();
7748 7744 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
7749 7745 #ifdef ASSERT
7750 7746 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
7751 7747 assert(offset1 - offset == stackElementSize, "correct arithmetic");
7752 7748 #endif
7753 7749 Register scale_reg = noreg;
7754 7750 Address::ScaleFactor scale_factor = Address::no_scale;
7755 7751 if (arg_slot.is_constant()) {
7756 7752 offset += arg_slot.as_constant() * stackElementSize;
7757 7753 } else {
7758 7754 scale_reg = arg_slot.as_register();
7759 7755 scale_factor = Address::times(stackElementSize);
7760 7756 }
7761 7757 offset += wordSize; // return PC is on stack
7762 7758 return Address(rsp, scale_reg, scale_factor, offset);
7763 7759 }
7764 7760
7765 7761
7766 7762 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
7767 7763 if (!VerifyOops) return;
7768 7764
7769 7765 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
7770 7766 // Pass register number to verify_oop_subroutine
7771 7767 char* b = new char[strlen(s) + 50];
7772 7768 sprintf(b, "verify_oop_addr: %s", s);
7773 7769
7774 7770 push(rax); // save rax,
7775 7771 // addr may contain rsp so we will have to adjust it based on the push
7776 7772 // we just did
7777 7773 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
7778 7774 // stores rax into addr which is backwards of what was intended.
7779 7775 if (addr.uses(rsp)) {
7780 7776 lea(rax, addr);
7781 7777 pushptr(Address(rax, BytesPerWord));
7782 7778 } else {
7783 7779 pushptr(addr);
7784 7780 }
7785 7781
7786 7782 ExternalAddress buffer((address) b);
7787 7783 // pass msg argument
7788 7784 // avoid using pushptr, as it modifies scratch registers
7789 7785 // and our contract is not to modify anything
7790 7786 movptr(rax, buffer.addr());
7791 7787 push(rax);
7792 7788
7793 7789 // call indirectly to solve generation ordering problem
7794 7790 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7795 7791 call(rax);
7796 7792 // Caller pops the arguments and restores rax, from the stack
7797 7793 }
7798 7794
7799 7795 void MacroAssembler::verify_tlab() {
7800 7796 #ifdef ASSERT
7801 7797 if (UseTLAB && VerifyOops) {
7802 7798 Label next, ok;
7803 7799 Register t1 = rsi;
7804 7800 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
7805 7801
7806 7802 push(t1);
7807 7803 NOT_LP64(push(thread_reg));
7808 7804 NOT_LP64(get_thread(thread_reg));
7809 7805
7810 7806 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7811 7807 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7812 7808 jcc(Assembler::aboveEqual, next);
7813 7809 stop("assert(top >= start)");
7814 7810 should_not_reach_here();
7815 7811
7816 7812 bind(next);
7817 7813 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7818 7814 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7819 7815 jcc(Assembler::aboveEqual, ok);
7820 7816 stop("assert(top <= end)");
7821 7817 should_not_reach_here();
7822 7818
7823 7819 bind(ok);
7824 7820 NOT_LP64(pop(thread_reg));
7825 7821 pop(t1);
7826 7822 }
7827 7823 #endif
7828 7824 }
7829 7825
7830 7826 class ControlWord {
7831 7827 public:
7832 7828 int32_t _value;
7833 7829
7834 7830 int rounding_control() const { return (_value >> 10) & 3 ; }
7835 7831 int precision_control() const { return (_value >> 8) & 3 ; }
7836 7832 bool precision() const { return ((_value >> 5) & 1) != 0; }
7837 7833 bool underflow() const { return ((_value >> 4) & 1) != 0; }
7838 7834 bool overflow() const { return ((_value >> 3) & 1) != 0; }
7839 7835 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
7840 7836 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
7841 7837 bool invalid() const { return ((_value >> 0) & 1) != 0; }
7842 7838
7843 7839 void print() const {
7844 7840 // rounding control
7845 7841 const char* rc;
7846 7842 switch (rounding_control()) {
7847 7843 case 0: rc = "round near"; break;
7848 7844 case 1: rc = "round down"; break;
7849 7845 case 2: rc = "round up "; break;
7850 7846 case 3: rc = "chop "; break;
7851 7847 };
7852 7848 // precision control
7853 7849 const char* pc;
7854 7850 switch (precision_control()) {
7855 7851 case 0: pc = "24 bits "; break;
7856 7852 case 1: pc = "reserved"; break;
7857 7853 case 2: pc = "53 bits "; break;
7858 7854 case 3: pc = "64 bits "; break;
7859 7855 };
7860 7856 // flags
7861 7857 char f[9];
7862 7858 f[0] = ' ';
7863 7859 f[1] = ' ';
7864 7860 f[2] = (precision ()) ? 'P' : 'p';
7865 7861 f[3] = (underflow ()) ? 'U' : 'u';
7866 7862 f[4] = (overflow ()) ? 'O' : 'o';
7867 7863 f[5] = (zero_divide ()) ? 'Z' : 'z';
7868 7864 f[6] = (denormalized()) ? 'D' : 'd';
7869 7865 f[7] = (invalid ()) ? 'I' : 'i';
7870 7866 f[8] = '\x0';
7871 7867 // output
7872 7868 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
7873 7869 }
7874 7870
7875 7871 };
7876 7872
7877 7873 class StatusWord {
7878 7874 public:
7879 7875 int32_t _value;
7880 7876
7881 7877 bool busy() const { return ((_value >> 15) & 1) != 0; }
7882 7878 bool C3() const { return ((_value >> 14) & 1) != 0; }
7883 7879 bool C2() const { return ((_value >> 10) & 1) != 0; }
7884 7880 bool C1() const { return ((_value >> 9) & 1) != 0; }
7885 7881 bool C0() const { return ((_value >> 8) & 1) != 0; }
7886 7882 int top() const { return (_value >> 11) & 7 ; }
7887 7883 bool error_status() const { return ((_value >> 7) & 1) != 0; }
7888 7884 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
7889 7885 bool precision() const { return ((_value >> 5) & 1) != 0; }
7890 7886 bool underflow() const { return ((_value >> 4) & 1) != 0; }
7891 7887 bool overflow() const { return ((_value >> 3) & 1) != 0; }
7892 7888 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
7893 7889 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
7894 7890 bool invalid() const { return ((_value >> 0) & 1) != 0; }
7895 7891
7896 7892 void print() const {
7897 7893 // condition codes
7898 7894 char c[5];
7899 7895 c[0] = (C3()) ? '3' : '-';
7900 7896 c[1] = (C2()) ? '2' : '-';
7901 7897 c[2] = (C1()) ? '1' : '-';
7902 7898 c[3] = (C0()) ? '0' : '-';
7903 7899 c[4] = '\x0';
7904 7900 // flags
7905 7901 char f[9];
7906 7902 f[0] = (error_status()) ? 'E' : '-';
7907 7903 f[1] = (stack_fault ()) ? 'S' : '-';
7908 7904 f[2] = (precision ()) ? 'P' : '-';
7909 7905 f[3] = (underflow ()) ? 'U' : '-';
7910 7906 f[4] = (overflow ()) ? 'O' : '-';
7911 7907 f[5] = (zero_divide ()) ? 'Z' : '-';
7912 7908 f[6] = (denormalized()) ? 'D' : '-';
7913 7909 f[7] = (invalid ()) ? 'I' : '-';
7914 7910 f[8] = '\x0';
7915 7911 // output
7916 7912 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
7917 7913 }
7918 7914
7919 7915 };
7920 7916
7921 7917 class TagWord {
7922 7918 public:
7923 7919 int32_t _value;
7924 7920
7925 7921 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
7926 7922
7927 7923 void print() const {
7928 7924 printf("%04x", _value & 0xFFFF);
7929 7925 }
7930 7926
7931 7927 };
7932 7928
7933 7929 class FPU_Register {
7934 7930 public:
7935 7931 int32_t _m0;
7936 7932 int32_t _m1;
7937 7933 int16_t _ex;
7938 7934
7939 7935 bool is_indefinite() const {
7940 7936 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
7941 7937 }
7942 7938
7943 7939 void print() const {
7944 7940 char sign = (_ex < 0) ? '-' : '+';
7945 7941 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
7946 7942 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
7947 7943 };
7948 7944
7949 7945 };
7950 7946
7951 7947 class FPU_State {
7952 7948 public:
7953 7949 enum {
7954 7950 register_size = 10,
7955 7951 number_of_registers = 8,
7956 7952 register_mask = 7
7957 7953 };
7958 7954
7959 7955 ControlWord _control_word;
7960 7956 StatusWord _status_word;
7961 7957 TagWord _tag_word;
7962 7958 int32_t _error_offset;
7963 7959 int32_t _error_selector;
7964 7960 int32_t _data_offset;
7965 7961 int32_t _data_selector;
7966 7962 int8_t _register[register_size * number_of_registers];
7967 7963
7968 7964 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
7969 7965 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
7970 7966
7971 7967 const char* tag_as_string(int tag) const {
7972 7968 switch (tag) {
7973 7969 case 0: return "valid";
7974 7970 case 1: return "zero";
7975 7971 case 2: return "special";
7976 7972 case 3: return "empty";
7977 7973 }
7978 7974 ShouldNotReachHere()
7979 7975 return NULL;
7980 7976 }
7981 7977
7982 7978 void print() const {
7983 7979 // print computation registers
7984 7980 { int t = _status_word.top();
7985 7981 for (int i = 0; i < number_of_registers; i++) {
7986 7982 int j = (i - t) & register_mask;
7987 7983 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
7988 7984 st(j)->print();
7989 7985 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
7990 7986 }
7991 7987 }
7992 7988 printf("\n");
7993 7989 // print control registers
7994 7990 printf("ctrl = "); _control_word.print(); printf("\n");
7995 7991 printf("stat = "); _status_word .print(); printf("\n");
7996 7992 printf("tags = "); _tag_word .print(); printf("\n");
7997 7993 }
7998 7994
7999 7995 };
8000 7996
8001 7997 class Flag_Register {
8002 7998 public:
8003 7999 int32_t _value;
8004 8000
8005 8001 bool overflow() const { return ((_value >> 11) & 1) != 0; }
8006 8002 bool direction() const { return ((_value >> 10) & 1) != 0; }
8007 8003 bool sign() const { return ((_value >> 7) & 1) != 0; }
8008 8004 bool zero() const { return ((_value >> 6) & 1) != 0; }
8009 8005 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
8010 8006 bool parity() const { return ((_value >> 2) & 1) != 0; }
8011 8007 bool carry() const { return ((_value >> 0) & 1) != 0; }
8012 8008
8013 8009 void print() const {
8014 8010 // flags
8015 8011 char f[8];
8016 8012 f[0] = (overflow ()) ? 'O' : '-';
8017 8013 f[1] = (direction ()) ? 'D' : '-';
8018 8014 f[2] = (sign ()) ? 'S' : '-';
8019 8015 f[3] = (zero ()) ? 'Z' : '-';
8020 8016 f[4] = (auxiliary_carry()) ? 'A' : '-';
8021 8017 f[5] = (parity ()) ? 'P' : '-';
8022 8018 f[6] = (carry ()) ? 'C' : '-';
8023 8019 f[7] = '\x0';
8024 8020 // output
8025 8021 printf("%08x flags = %s", _value, f);
8026 8022 }
8027 8023
8028 8024 };
8029 8025
8030 8026 class IU_Register {
8031 8027 public:
8032 8028 int32_t _value;
8033 8029
8034 8030 void print() const {
8035 8031 printf("%08x %11d", _value, _value);
8036 8032 }
8037 8033
8038 8034 };
8039 8035
8040 8036 class IU_State {
8041 8037 public:
8042 8038 Flag_Register _eflags;
8043 8039 IU_Register _rdi;
8044 8040 IU_Register _rsi;
8045 8041 IU_Register _rbp;
8046 8042 IU_Register _rsp;
8047 8043 IU_Register _rbx;
8048 8044 IU_Register _rdx;
8049 8045 IU_Register _rcx;
8050 8046 IU_Register _rax;
8051 8047
8052 8048 void print() const {
8053 8049 // computation registers
8054 8050 printf("rax, = "); _rax.print(); printf("\n");
8055 8051 printf("rbx, = "); _rbx.print(); printf("\n");
8056 8052 printf("rcx = "); _rcx.print(); printf("\n");
8057 8053 printf("rdx = "); _rdx.print(); printf("\n");
8058 8054 printf("rdi = "); _rdi.print(); printf("\n");
8059 8055 printf("rsi = "); _rsi.print(); printf("\n");
8060 8056 printf("rbp, = "); _rbp.print(); printf("\n");
8061 8057 printf("rsp = "); _rsp.print(); printf("\n");
8062 8058 printf("\n");
8063 8059 // control registers
8064 8060 printf("flgs = "); _eflags.print(); printf("\n");
8065 8061 }
8066 8062 };
8067 8063
8068 8064
8069 8065 class CPU_State {
8070 8066 public:
8071 8067 FPU_State _fpu_state;
8072 8068 IU_State _iu_state;
8073 8069
8074 8070 void print() const {
8075 8071 printf("--------------------------------------------------\n");
8076 8072 _iu_state .print();
8077 8073 printf("\n");
8078 8074 _fpu_state.print();
8079 8075 printf("--------------------------------------------------\n");
8080 8076 }
8081 8077
8082 8078 };
8083 8079
8084 8080
8085 8081 static void _print_CPU_state(CPU_State* state) {
8086 8082 state->print();
8087 8083 };
8088 8084
8089 8085
8090 8086 void MacroAssembler::print_CPU_state() {
8091 8087 push_CPU_state();
8092 8088 push(rsp); // pass CPU state
8093 8089 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
8094 8090 addptr(rsp, wordSize); // discard argument
8095 8091 pop_CPU_state();
8096 8092 }
8097 8093
8098 8094
8099 8095 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
8100 8096 static int counter = 0;
8101 8097 FPU_State* fs = &state->_fpu_state;
8102 8098 counter++;
8103 8099 // For leaf calls, only verify that the top few elements remain empty.
8104 8100 // We only need 1 empty at the top for C2 code.
8105 8101 if( stack_depth < 0 ) {
8106 8102 if( fs->tag_for_st(7) != 3 ) {
8107 8103 printf("FPR7 not empty\n");
8108 8104 state->print();
8109 8105 assert(false, "error");
8110 8106 return false;
8111 8107 }
8112 8108 return true; // All other stack states do not matter
8113 8109 }
8114 8110
8115 8111 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
8116 8112 "bad FPU control word");
8117 8113
8118 8114 // compute stack depth
8119 8115 int i = 0;
8120 8116 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
8121 8117 int d = i;
8122 8118 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
8123 8119 // verify findings
8124 8120 if (i != FPU_State::number_of_registers) {
8125 8121 // stack not contiguous
8126 8122 printf("%s: stack not contiguous at ST%d\n", s, i);
8127 8123 state->print();
8128 8124 assert(false, "error");
8129 8125 return false;
8130 8126 }
8131 8127 // check if computed stack depth corresponds to expected stack depth
8132 8128 if (stack_depth < 0) {
8133 8129 // expected stack depth is -stack_depth or less
8134 8130 if (d > -stack_depth) {
8135 8131 // too many elements on the stack
8136 8132 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
8137 8133 state->print();
8138 8134 assert(false, "error");
8139 8135 return false;
8140 8136 }
8141 8137 } else {
8142 8138 // expected stack depth is stack_depth
8143 8139 if (d != stack_depth) {
8144 8140 // wrong stack depth
8145 8141 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
8146 8142 state->print();
8147 8143 assert(false, "error");
8148 8144 return false;
8149 8145 }
8150 8146 }
8151 8147 // everything is cool
8152 8148 return true;
8153 8149 }
8154 8150
8155 8151
8156 8152 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
8157 8153 if (!VerifyFPU) return;
8158 8154 push_CPU_state();
8159 8155 push(rsp); // pass CPU state
8160 8156 ExternalAddress msg((address) s);
8161 8157 // pass message string s
8162 8158 pushptr(msg.addr());
8163 8159 push(stack_depth); // pass stack depth
8164 8160 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
8165 8161 addptr(rsp, 3 * wordSize); // discard arguments
8166 8162 // check for error
8167 8163 { Label L;
8168 8164 testl(rax, rax);
8169 8165 jcc(Assembler::notZero, L);
8170 8166 int3(); // break if error condition
8171 8167 bind(L);
8172 8168 }
8173 8169 pop_CPU_state();
8174 8170 }
8175 8171
8176 8172 void MacroAssembler::load_klass(Register dst, Register src) {
8177 8173 #ifdef _LP64
8178 8174 if (UseCompressedOops) {
8179 8175 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8180 8176 decode_heap_oop_not_null(dst);
8181 8177 } else
8182 8178 #endif
8183 8179 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8184 8180 }
8185 8181
8186 8182 void MacroAssembler::load_prototype_header(Register dst, Register src) {
8187 8183 #ifdef _LP64
8188 8184 if (UseCompressedOops) {
8189 8185 assert (Universe::heap() != NULL, "java heap should be initialized");
8190 8186 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8191 8187 if (Universe::narrow_oop_shift() != 0) {
8192 8188 assert(Address::times_8 == LogMinObjAlignmentInBytes &&
8193 8189 Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8194 8190 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8195 8191 } else {
8196 8192 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8197 8193 }
8198 8194 } else
8199 8195 #endif
8200 8196 {
8201 8197 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8202 8198 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8203 8199 }
8204 8200 }
8205 8201
8206 8202 void MacroAssembler::store_klass(Register dst, Register src) {
8207 8203 #ifdef _LP64
8208 8204 if (UseCompressedOops) {
8209 8205 encode_heap_oop_not_null(src);
8210 8206 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8211 8207 } else
8212 8208 #endif
8213 8209 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8214 8210 }
8215 8211
8216 8212 #ifdef _LP64
8217 8213 void MacroAssembler::store_klass_gap(Register dst, Register src) {
8218 8214 if (UseCompressedOops) {
8219 8215 // Store to klass gap in destination
8220 8216 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
8221 8217 }
8222 8218 }
8223 8219
8224 8220 void MacroAssembler::load_heap_oop(Register dst, Address src) {
8225 8221 if (UseCompressedOops) {
8226 8222 movl(dst, src);
8227 8223 decode_heap_oop(dst);
8228 8224 } else {
8229 8225 movq(dst, src);
8230 8226 }
8231 8227 }
8232 8228
8233 8229 void MacroAssembler::store_heap_oop(Address dst, Register src) {
8234 8230 if (UseCompressedOops) {
8235 8231 assert(!dst.uses(src), "not enough registers");
8236 8232 encode_heap_oop(src);
8237 8233 movl(dst, src);
8238 8234 } else {
8239 8235 movq(dst, src);
8240 8236 }
8241 8237 }
8242 8238
8243 8239 // Used for storing NULLs.
8244 8240 void MacroAssembler::store_heap_oop_null(Address dst) {
8245 8241 if (UseCompressedOops) {
8246 8242 movl(dst, (int32_t)NULL_WORD);
8247 8243 } else {
8248 8244 movslq(dst, (int32_t)NULL_WORD);
8249 8245 }
8250 8246 }
8251 8247
8252 8248 // Algorithm must match oop.inline.hpp encode_heap_oop.
8253 8249 void MacroAssembler::encode_heap_oop(Register r) {
8254 8250 assert (UseCompressedOops, "should be compressed");
8255 8251 assert (Universe::heap() != NULL, "java heap should be initialized");
8256 8252 if (Universe::narrow_oop_base() == NULL) {
8257 8253 verify_oop(r, "broken oop in encode_heap_oop");
8258 8254 if (Universe::narrow_oop_shift() != 0) {
8259 8255 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8260 8256 shrq(r, LogMinObjAlignmentInBytes);
8261 8257 }
8262 8258 return;
8263 8259 }
8264 8260 #ifdef ASSERT
8265 8261 if (CheckCompressedOops) {
8266 8262 Label ok;
8267 8263 push(rscratch1); // cmpptr trashes rscratch1
8268 8264 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8269 8265 jcc(Assembler::equal, ok);
8270 8266 stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
8271 8267 bind(ok);
8272 8268 pop(rscratch1);
8273 8269 }
8274 8270 #endif
8275 8271 verify_oop(r, "broken oop in encode_heap_oop");
8276 8272 testq(r, r);
8277 8273 cmovq(Assembler::equal, r, r12_heapbase);
8278 8274 subq(r, r12_heapbase);
8279 8275 shrq(r, LogMinObjAlignmentInBytes);
8280 8276 }
8281 8277
8282 8278 void MacroAssembler::encode_heap_oop_not_null(Register r) {
8283 8279 assert (UseCompressedOops, "should be compressed");
8284 8280 assert (Universe::heap() != NULL, "java heap should be initialized");
8285 8281 #ifdef ASSERT
8286 8282 if (CheckCompressedOops) {
8287 8283 Label ok;
8288 8284 testq(r, r);
8289 8285 jcc(Assembler::notEqual, ok);
8290 8286 stop("null oop passed to encode_heap_oop_not_null");
8291 8287 bind(ok);
8292 8288 }
8293 8289 #endif
8294 8290 verify_oop(r, "broken oop in encode_heap_oop_not_null");
8295 8291 if (Universe::narrow_oop_base() != NULL) {
8296 8292 subq(r, r12_heapbase);
8297 8293 }
8298 8294 if (Universe::narrow_oop_shift() != 0) {
8299 8295 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8300 8296 shrq(r, LogMinObjAlignmentInBytes);
8301 8297 }
8302 8298 }
8303 8299
8304 8300 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
8305 8301 assert (UseCompressedOops, "should be compressed");
8306 8302 assert (Universe::heap() != NULL, "java heap should be initialized");
8307 8303 #ifdef ASSERT
8308 8304 if (CheckCompressedOops) {
8309 8305 Label ok;
8310 8306 testq(src, src);
8311 8307 jcc(Assembler::notEqual, ok);
8312 8308 stop("null oop passed to encode_heap_oop_not_null2");
8313 8309 bind(ok);
8314 8310 }
8315 8311 #endif
8316 8312 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
8317 8313 if (dst != src) {
8318 8314 movq(dst, src);
8319 8315 }
8320 8316 if (Universe::narrow_oop_base() != NULL) {
8321 8317 subq(dst, r12_heapbase);
8322 8318 }
8323 8319 if (Universe::narrow_oop_shift() != 0) {
8324 8320 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8325 8321 shrq(dst, LogMinObjAlignmentInBytes);
8326 8322 }
8327 8323 }
8328 8324
8329 8325 void MacroAssembler::decode_heap_oop(Register r) {
8330 8326 assert (UseCompressedOops, "should be compressed");
8331 8327 assert (Universe::heap() != NULL, "java heap should be initialized");
8332 8328 if (Universe::narrow_oop_base() == NULL) {
8333 8329 if (Universe::narrow_oop_shift() != 0) {
8334 8330 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8335 8331 shlq(r, LogMinObjAlignmentInBytes);
8336 8332 }
8337 8333 verify_oop(r, "broken oop in decode_heap_oop");
8338 8334 return;
8339 8335 }
8340 8336 #ifdef ASSERT
8341 8337 if (CheckCompressedOops) {
8342 8338 Label ok;
8343 8339 push(rscratch1);
8344 8340 cmpptr(r12_heapbase,
8345 8341 ExternalAddress((address)Universe::narrow_oop_base_addr()));
8346 8342 jcc(Assembler::equal, ok);
8347 8343 stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
8348 8344 bind(ok);
8349 8345 pop(rscratch1);
8350 8346 }
8351 8347 #endif
8352 8348
8353 8349 Label done;
8354 8350 shlq(r, LogMinObjAlignmentInBytes);
8355 8351 jccb(Assembler::equal, done);
8356 8352 addq(r, r12_heapbase);
8357 8353 #if 0
8358 8354 // alternate decoding probably a wash.
8359 8355 testq(r, r);
8360 8356 jccb(Assembler::equal, done);
8361 8357 leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
8362 8358 #endif
8363 8359 bind(done);
8364 8360 verify_oop(r, "broken oop in decode_heap_oop");
8365 8361 }
8366 8362
8367 8363 void MacroAssembler::decode_heap_oop_not_null(Register r) {
8368 8364 assert (UseCompressedOops, "should only be used for compressed headers");
8369 8365 assert (Universe::heap() != NULL, "java heap should be initialized");
8370 8366 // Cannot assert, unverified entry point counts instructions (see .ad file)
8371 8367 // vtableStubs also counts instructions in pd_code_size_limit.
8372 8368 // Also do not verify_oop as this is called by verify_oop.
8373 8369 if (Universe::narrow_oop_shift() != 0) {
8374 8370 assert (Address::times_8 == LogMinObjAlignmentInBytes &&
8375 8371 Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8376 8372 // Don't use Shift since it modifies flags.
8377 8373 leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
8378 8374 } else {
8379 8375 assert (Universe::narrow_oop_base() == NULL, "sanity");
8380 8376 }
8381 8377 }
8382 8378
8383 8379 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
8384 8380 assert (UseCompressedOops, "should only be used for compressed headers");
8385 8381 assert (Universe::heap() != NULL, "java heap should be initialized");
8386 8382 // Cannot assert, unverified entry point counts instructions (see .ad file)
8387 8383 // vtableStubs also counts instructions in pd_code_size_limit.
8388 8384 // Also do not verify_oop as this is called by verify_oop.
8389 8385 if (Universe::narrow_oop_shift() != 0) {
8390 8386 assert (Address::times_8 == LogMinObjAlignmentInBytes &&
8391 8387 Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
8392 8388 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
8393 8389 } else if (dst != src) {
8394 8390 assert (Universe::narrow_oop_base() == NULL, "sanity");
8395 8391 movq(dst, src);
8396 8392 }
8397 8393 }
8398 8394
8399 8395 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
8400 8396 assert (UseCompressedOops, "should only be used for compressed headers");
8401 8397 assert (Universe::heap() != NULL, "java heap should be initialized");
8402 8398 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8403 8399 int oop_index = oop_recorder()->find_index(obj);
8404 8400 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8405 8401 mov_narrow_oop(dst, oop_index, rspec);
8406 8402 }
8407 8403
8408 8404 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
8409 8405 assert (UseCompressedOops, "should only be used for compressed headers");
8410 8406 assert (Universe::heap() != NULL, "java heap should be initialized");
8411 8407 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8412 8408 int oop_index = oop_recorder()->find_index(obj);
8413 8409 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8414 8410 mov_narrow_oop(dst, oop_index, rspec);
8415 8411 }
8416 8412
8417 8413 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
8418 8414 assert (UseCompressedOops, "should only be used for compressed headers");
8419 8415 assert (Universe::heap() != NULL, "java heap should be initialized");
8420 8416 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8421 8417 int oop_index = oop_recorder()->find_index(obj);
8422 8418 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8423 8419 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8424 8420 }
8425 8421
8426 8422 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
8427 8423 assert (UseCompressedOops, "should only be used for compressed headers");
8428 8424 assert (Universe::heap() != NULL, "java heap should be initialized");
8429 8425 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8430 8426 int oop_index = oop_recorder()->find_index(obj);
8431 8427 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8432 8428 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8433 8429 }
8434 8430
8435 8431 void MacroAssembler::reinit_heapbase() {
8436 8432 if (UseCompressedOops) {
8437 8433 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8438 8434 }
8439 8435 }
8440 8436 #endif // _LP64
8441 8437
8442 8438 // IndexOf substring.
8443 8439 void MacroAssembler::string_indexof(Register str1, Register str2,
8444 8440 Register cnt1, Register cnt2, Register result,
8445 8441 XMMRegister vec, Register tmp) {
8446 8442 assert(UseSSE42Intrinsics, "SSE4.2 is required");
8447 8443
8448 8444 Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
8449 8445 SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP;
8450 8446
8451 8447 push(str1); // string addr
8452 8448 push(str2); // substr addr
8453 8449 push(cnt2); // substr count
8454 8450 jmpb(PREP_FOR_SCAN);
8455 8451
8456 8452 // Substr count saved at sp
8457 8453 // Substr saved at sp+1*wordSize
8458 8454 // String saved at sp+2*wordSize
8459 8455
8460 8456 // Reload substr for rescan
8461 8457 bind(RELOAD_SUBSTR);
8462 8458 movl(cnt2, Address(rsp, 0));
8463 8459 movptr(str2, Address(rsp, wordSize));
8464 8460 // We came here after the beginninig of the substring was
8465 8461 // matched but the rest of it was not so we need to search
8466 8462 // again. Start from the next element after the previous match.
8467 8463 subptr(str1, result); // Restore counter
8468 8464 shrl(str1, 1);
8469 8465 addl(cnt1, str1);
8470 8466 decrementl(cnt1);
8471 8467 lea(str1, Address(result, 2)); // Reload string
8472 8468
8473 8469 // Load substr
8474 8470 bind(PREP_FOR_SCAN);
8475 8471 movdqu(vec, Address(str2, 0));
8476 8472 addl(cnt1, 8); // prime the loop
8477 8473 subptr(str1, 16);
8478 8474
8479 8475 // Scan string for substr in 16-byte vectors
8480 8476 bind(SCAN_TO_SUBSTR);
8481 8477 subl(cnt1, 8);
8482 8478 addptr(str1, 16);
8483 8479
8484 8480 // pcmpestri
8485 8481 // inputs:
8486 8482 // xmm - substring
8487 8483 // rax - substring length (elements count)
8488 8484 // mem - scaned string
8489 8485 // rdx - string length (elements count)
8490 8486 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
8491 8487 // outputs:
8492 8488 // rcx - matched index in string
8493 8489 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8494 8490
8495 8491 pcmpestri(vec, Address(str1, 0), 0x0d);
8496 8492 jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0
8497 8493 jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
8498 8494
8499 8495 // Fallthrough: found a potential substr
8500 8496
8501 8497 // Make sure string is still long enough
8502 8498 subl(cnt1, tmp);
8503 8499 cmpl(cnt1, cnt2);
8504 8500 jccb(Assembler::negative, RET_NOT_FOUND);
8505 8501 // Compute start addr of substr
8506 8502 lea(str1, Address(str1, tmp, Address::times_2));
8507 8503 movptr(result, str1); // save
8508 8504
8509 8505 // Compare potential substr
8510 8506 addl(cnt1, 8); // prime the loop
8511 8507 addl(cnt2, 8);
8512 8508 subptr(str1, 16);
8513 8509 subptr(str2, 16);
8514 8510
8515 8511 // Scan 16-byte vectors of string and substr
8516 8512 bind(SCAN_SUBSTR);
8517 8513 subl(cnt1, 8);
8518 8514 subl(cnt2, 8);
8519 8515 addptr(str1, 16);
8520 8516 addptr(str2, 16);
8521 8517 movdqu(vec, Address(str2, 0));
8522 8518 pcmpestri(vec, Address(str1, 0), 0x0d);
8523 8519 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
8524 8520 jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0
8525 8521
8526 8522 // Compute substr offset
8527 8523 subptr(result, Address(rsp, 2*wordSize));
8528 8524 shrl(result, 1); // index
8529 8525 jmpb(CLEANUP);
8530 8526
8531 8527 bind(RET_NOT_FOUND);
8532 8528 movl(result, -1);
8533 8529
8534 8530 bind(CLEANUP);
8535 8531 addptr(rsp, 3*wordSize);
8536 8532 }
8537 8533
8538 8534 // Compare strings.
8539 8535 void MacroAssembler::string_compare(Register str1, Register str2,
8540 8536 Register cnt1, Register cnt2, Register result,
8541 8537 XMMRegister vec1, XMMRegister vec2) {
8542 8538 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
8543 8539
8544 8540 // Compute the minimum of the string lengths and the
8545 8541 // difference of the string lengths (stack).
8546 8542 // Do the conditional move stuff
8547 8543 movl(result, cnt1);
8548 8544 subl(cnt1, cnt2);
8549 8545 push(cnt1);
8550 8546 if (VM_Version::supports_cmov()) {
8551 8547 cmovl(Assembler::lessEqual, cnt2, result);
8552 8548 } else {
8553 8549 Label GT_LABEL;
8554 8550 jccb(Assembler::greater, GT_LABEL);
8555 8551 movl(cnt2, result);
8556 8552 bind(GT_LABEL);
8557 8553 }
8558 8554
8559 8555 // Is the minimum length zero?
8560 8556 testl(cnt2, cnt2);
8561 8557 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
8562 8558
8563 8559 // Load first characters
8564 8560 load_unsigned_short(result, Address(str1, 0));
8565 8561 load_unsigned_short(cnt1, Address(str2, 0));
8566 8562
8567 8563 // Compare first characters
8568 8564 subl(result, cnt1);
8569 8565 jcc(Assembler::notZero, POP_LABEL);
8570 8566 decrementl(cnt2);
8571 8567 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
8572 8568
8573 8569 {
8574 8570 // Check after comparing first character to see if strings are equivalent
8575 8571 Label LSkip2;
8576 8572 // Check if the strings start at same location
8577 8573 cmpptr(str1, str2);
8578 8574 jccb(Assembler::notEqual, LSkip2);
8579 8575
8580 8576 // Check if the length difference is zero (from stack)
8581 8577 cmpl(Address(rsp, 0), 0x0);
8582 8578 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
8583 8579
8584 8580 // Strings might not be equivalent
8585 8581 bind(LSkip2);
8586 8582 }
8587 8583
8588 8584 // Advance to next character
8589 8585 addptr(str1, 2);
8590 8586 addptr(str2, 2);
8591 8587
8592 8588 if (UseSSE42Intrinsics) {
8593 8589 // With SSE4.2, use double quad vector compare
8594 8590 Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
8595 8591 // Setup to compare 16-byte vectors
8596 8592 movl(cnt1, cnt2);
8597 8593 andl(cnt2, 0xfffffff8); // cnt2 holds the vector count
8598 8594 andl(cnt1, 0x00000007); // cnt1 holds the tail count
8599 8595 testl(cnt2, cnt2);
8600 8596 jccb(Assembler::zero, COMPARE_TAIL);
8601 8597
8602 8598 lea(str2, Address(str2, cnt2, Address::times_2));
8603 8599 lea(str1, Address(str1, cnt2, Address::times_2));
8604 8600 negptr(cnt2);
8605 8601
8606 8602 bind(COMPARE_VECTORS);
8607 8603 movdqu(vec1, Address(str1, cnt2, Address::times_2));
8608 8604 movdqu(vec2, Address(str2, cnt2, Address::times_2));
8609 8605 pxor(vec1, vec2);
8610 8606 ptest(vec1, vec1);
8611 8607 jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
8612 8608 addptr(cnt2, 8);
8613 8609 jcc(Assembler::notZero, COMPARE_VECTORS);
8614 8610 jmpb(COMPARE_TAIL);
8615 8611
8616 8612 // Mismatched characters in the vectors
8617 8613 bind(VECTOR_NOT_EQUAL);
8618 8614 lea(str1, Address(str1, cnt2, Address::times_2));
8619 8615 lea(str2, Address(str2, cnt2, Address::times_2));
8620 8616 movl(cnt1, 8);
8621 8617
8622 8618 // Compare tail (< 8 chars), or rescan last vectors to
8623 8619 // find 1st mismatched characters
8624 8620 bind(COMPARE_TAIL);
8625 8621 testl(cnt1, cnt1);
8626 8622 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
8627 8623 movl(cnt2, cnt1);
8628 8624 // Fallthru to tail compare
8629 8625 }
8630 8626
8631 8627 // Shift str2 and str1 to the end of the arrays, negate min
8632 8628 lea(str1, Address(str1, cnt2, Address::times_2, 0));
8633 8629 lea(str2, Address(str2, cnt2, Address::times_2, 0));
8634 8630 negptr(cnt2);
8635 8631
8636 8632 // Compare the rest of the characters
8637 8633 bind(WHILE_HEAD_LABEL);
8638 8634 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0));
8639 8635 load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0));
8640 8636 subl(result, cnt1);
8641 8637 jccb(Assembler::notZero, POP_LABEL);
8642 8638 increment(cnt2);
8643 8639 jcc(Assembler::notZero, WHILE_HEAD_LABEL);
8644 8640
8645 8641 // Strings are equal up to min length. Return the length difference.
8646 8642 bind(LENGTH_DIFF_LABEL);
8647 8643 pop(result);
8648 8644 jmpb(DONE_LABEL);
8649 8645
8650 8646 // Discard the stored length difference
8651 8647 bind(POP_LABEL);
8652 8648 addptr(rsp, wordSize);
8653 8649
8654 8650 // That's it
8655 8651 bind(DONE_LABEL);
8656 8652 }
8657 8653
8658 8654 // Compare char[] arrays aligned to 4 bytes or substrings.
8659 8655 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
8660 8656 Register limit, Register result, Register chr,
8661 8657 XMMRegister vec1, XMMRegister vec2) {
8662 8658 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
8663 8659
8664 8660 int length_offset = arrayOopDesc::length_offset_in_bytes();
8665 8661 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
8666 8662
8667 8663 // Check the input args
8668 8664 cmpptr(ary1, ary2);
8669 8665 jcc(Assembler::equal, TRUE_LABEL);
8670 8666
8671 8667 if (is_array_equ) {
8672 8668 // Need additional checks for arrays_equals.
8673 8669 testptr(ary1, ary1);
8674 8670 jcc(Assembler::zero, FALSE_LABEL);
8675 8671 testptr(ary2, ary2);
8676 8672 jcc(Assembler::zero, FALSE_LABEL);
8677 8673
8678 8674 // Check the lengths
8679 8675 movl(limit, Address(ary1, length_offset));
8680 8676 cmpl(limit, Address(ary2, length_offset));
8681 8677 jcc(Assembler::notEqual, FALSE_LABEL);
8682 8678 }
8683 8679
8684 8680 // count == 0
8685 8681 testl(limit, limit);
8686 8682 jcc(Assembler::zero, TRUE_LABEL);
8687 8683
8688 8684 if (is_array_equ) {
8689 8685 // Load array address
8690 8686 lea(ary1, Address(ary1, base_offset));
8691 8687 lea(ary2, Address(ary2, base_offset));
8692 8688 }
8693 8689
8694 8690 shll(limit, 1); // byte count != 0
8695 8691 movl(result, limit); // copy
8696 8692
8697 8693 if (UseSSE42Intrinsics) {
8698 8694 // With SSE4.2, use double quad vector compare
8699 8695 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
8700 8696 // Compare 16-byte vectors
8701 8697 andl(result, 0x0000000e); // tail count (in bytes)
8702 8698 andl(limit, 0xfffffff0); // vector count (in bytes)
8703 8699 jccb(Assembler::zero, COMPARE_TAIL);
8704 8700
8705 8701 lea(ary1, Address(ary1, limit, Address::times_1));
8706 8702 lea(ary2, Address(ary2, limit, Address::times_1));
8707 8703 negptr(limit);
8708 8704
8709 8705 bind(COMPARE_WIDE_VECTORS);
8710 8706 movdqu(vec1, Address(ary1, limit, Address::times_1));
8711 8707 movdqu(vec2, Address(ary2, limit, Address::times_1));
8712 8708 pxor(vec1, vec2);
8713 8709 ptest(vec1, vec1);
8714 8710 jccb(Assembler::notZero, FALSE_LABEL);
8715 8711 addptr(limit, 16);
8716 8712 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
8717 8713
8718 8714 bind(COMPARE_TAIL); // limit is zero
8719 8715 movl(limit, result);
8720 8716 // Fallthru to tail compare
8721 8717 }
8722 8718
8723 8719 // Compare 4-byte vectors
8724 8720 andl(limit, 0xfffffffc); // vector count (in bytes)
8725 8721 jccb(Assembler::zero, COMPARE_CHAR);
8726 8722
8727 8723 lea(ary1, Address(ary1, limit, Address::times_1));
8728 8724 lea(ary2, Address(ary2, limit, Address::times_1));
8729 8725 negptr(limit);
8730 8726
8731 8727 bind(COMPARE_VECTORS);
8732 8728 movl(chr, Address(ary1, limit, Address::times_1));
8733 8729 cmpl(chr, Address(ary2, limit, Address::times_1));
8734 8730 jccb(Assembler::notEqual, FALSE_LABEL);
8735 8731 addptr(limit, 4);
8736 8732 jcc(Assembler::notZero, COMPARE_VECTORS);
8737 8733
8738 8734 // Compare trailing char (final 2 bytes), if any
8739 8735 bind(COMPARE_CHAR);
8740 8736 testl(result, 0x2); // tail char
8741 8737 jccb(Assembler::zero, TRUE_LABEL);
8742 8738 load_unsigned_short(chr, Address(ary1, 0));
8743 8739 load_unsigned_short(limit, Address(ary2, 0));
8744 8740 cmpl(chr, limit);
8745 8741 jccb(Assembler::notEqual, FALSE_LABEL);
8746 8742
8747 8743 bind(TRUE_LABEL);
8748 8744 movl(result, 1); // return true
8749 8745 jmpb(DONE);
8750 8746
8751 8747 bind(FALSE_LABEL);
8752 8748 xorl(result, result); // return false
8753 8749
8754 8750 // That's it
8755 8751 bind(DONE);
8756 8752 }
8757 8753
8758 8754 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8759 8755 switch (cond) {
8760 8756 // Note some conditions are synonyms for others
8761 8757 case Assembler::zero: return Assembler::notZero;
8762 8758 case Assembler::notZero: return Assembler::zero;
8763 8759 case Assembler::less: return Assembler::greaterEqual;
8764 8760 case Assembler::lessEqual: return Assembler::greater;
8765 8761 case Assembler::greater: return Assembler::lessEqual;
8766 8762 case Assembler::greaterEqual: return Assembler::less;
8767 8763 case Assembler::below: return Assembler::aboveEqual;
8768 8764 case Assembler::belowEqual: return Assembler::above;
8769 8765 case Assembler::above: return Assembler::belowEqual;
8770 8766 case Assembler::aboveEqual: return Assembler::below;
8771 8767 case Assembler::overflow: return Assembler::noOverflow;
8772 8768 case Assembler::noOverflow: return Assembler::overflow;
8773 8769 case Assembler::negative: return Assembler::positive;
8774 8770 case Assembler::positive: return Assembler::negative;
8775 8771 case Assembler::parity: return Assembler::noParity;
8776 8772 case Assembler::noParity: return Assembler::parity;
8777 8773 }
8778 8774 ShouldNotReachHere(); return Assembler::overflow;
8779 8775 }
8780 8776
8781 8777 SkipIfEqual::SkipIfEqual(
8782 8778 MacroAssembler* masm, const bool* flag_addr, bool value) {
8783 8779 _masm = masm;
8784 8780 _masm->cmp8(ExternalAddress((address)flag_addr), value);
8785 8781 _masm->jcc(Assembler::equal, _label);
8786 8782 }
8787 8783
8788 8784 SkipIfEqual::~SkipIfEqual() {
8789 8785 _masm->bind(_label);
8790 8786 }
↓ open down ↓ |
1072 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX