Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/src/cpu/x86/vm/assembler_x86.cpp
+++ new/src/cpu/x86/vm/assembler_x86.cpp
1 1 /*
2 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "assembler_x86.inline.hpp"
27 27 #include "gc_interface/collectedHeap.inline.hpp"
28 28 #include "interpreter/interpreter.hpp"
29 29 #include "memory/cardTableModRefBS.hpp"
30 30 #include "memory/resourceArea.hpp"
31 31 #include "prims/methodHandles.hpp"
32 32 #include "runtime/biasedLocking.hpp"
33 33 #include "runtime/interfaceSupport.hpp"
34 34 #include "runtime/objectMonitor.hpp"
35 35 #include "runtime/os.hpp"
36 36 #include "runtime/sharedRuntime.hpp"
37 37 #include "runtime/stubRoutines.hpp"
38 38 #ifndef SERIALGC
39 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
40 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
41 41 #include "gc_implementation/g1/heapRegion.hpp"
42 42 #endif
43 43
44 44 // Implementation of AddressLiteral
45 45
46 46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
47 47 _is_lval = false;
48 48 _target = target;
49 49 switch (rtype) {
50 50 case relocInfo::oop_type:
51 51 // Oops are a special case. Normally they would be their own section
52 52 // but in cases like icBuffer they are literals in the code stream that
53 53 // we don't have a section for. We use none so that we get a literal address
54 54 // which is always patchable.
55 55 break;
56 56 case relocInfo::external_word_type:
57 57 _rspec = external_word_Relocation::spec(target);
58 58 break;
59 59 case relocInfo::internal_word_type:
60 60 _rspec = internal_word_Relocation::spec(target);
61 61 break;
62 62 case relocInfo::opt_virtual_call_type:
63 63 _rspec = opt_virtual_call_Relocation::spec();
64 64 break;
65 65 case relocInfo::static_call_type:
66 66 _rspec = static_call_Relocation::spec();
67 67 break;
68 68 case relocInfo::runtime_call_type:
69 69 _rspec = runtime_call_Relocation::spec();
70 70 break;
71 71 case relocInfo::poll_type:
72 72 case relocInfo::poll_return_type:
73 73 _rspec = Relocation::spec_simple(rtype);
74 74 break;
75 75 case relocInfo::none:
76 76 break;
77 77 default:
78 78 ShouldNotReachHere();
79 79 break;
80 80 }
81 81 }
82 82
83 83 // Implementation of Address
84 84
85 85 #ifdef _LP64
86 86
87 87 Address Address::make_array(ArrayAddress adr) {
88 88 // Not implementable on 64bit machines
89 89 // Should have been handled higher up the call chain.
90 90 ShouldNotReachHere();
91 91 return Address();
92 92 }
93 93
94 94 // exceedingly dangerous constructor
95 95 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
96 96 _base = noreg;
97 97 _index = noreg;
98 98 _scale = no_scale;
99 99 _disp = disp;
100 100 switch (rtype) {
101 101 case relocInfo::external_word_type:
102 102 _rspec = external_word_Relocation::spec(loc);
103 103 break;
104 104 case relocInfo::internal_word_type:
105 105 _rspec = internal_word_Relocation::spec(loc);
106 106 break;
107 107 case relocInfo::runtime_call_type:
108 108 // HMM
109 109 _rspec = runtime_call_Relocation::spec();
110 110 break;
111 111 case relocInfo::poll_type:
112 112 case relocInfo::poll_return_type:
113 113 _rspec = Relocation::spec_simple(rtype);
114 114 break;
115 115 case relocInfo::none:
116 116 break;
117 117 default:
118 118 ShouldNotReachHere();
119 119 }
120 120 }
121 121 #else // LP64
122 122
123 123 Address Address::make_array(ArrayAddress adr) {
124 124 AddressLiteral base = adr.base();
125 125 Address index = adr.index();
126 126 assert(index._disp == 0, "must not have disp"); // maybe it can?
127 127 Address array(index._base, index._index, index._scale, (intptr_t) base.target());
128 128 array._rspec = base._rspec;
129 129 return array;
130 130 }
131 131
132 132 // exceedingly dangerous constructor
133 133 Address::Address(address loc, RelocationHolder spec) {
134 134 _base = noreg;
135 135 _index = noreg;
136 136 _scale = no_scale;
137 137 _disp = (intptr_t) loc;
138 138 _rspec = spec;
139 139 }
140 140
141 141 #endif // _LP64
142 142
143 143
144 144
145 145 // Convert the raw encoding form into the form expected by the constructor for
146 146 // Address. An index of 4 (rsp) corresponds to having no index, so convert
147 147 // that to noreg for the Address constructor.
148 148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
149 149 RelocationHolder rspec;
150 150 if (disp_is_oop) {
151 151 rspec = Relocation::spec_simple(relocInfo::oop_type);
152 152 }
153 153 bool valid_index = index != rsp->encoding();
154 154 if (valid_index) {
155 155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
156 156 madr._rspec = rspec;
157 157 return madr;
158 158 } else {
159 159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
160 160 madr._rspec = rspec;
161 161 return madr;
162 162 }
163 163 }
164 164
165 165 // Implementation of Assembler
166 166
167 167 int AbstractAssembler::code_fill_byte() {
168 168 return (u_char)'\xF4'; // hlt
169 169 }
170 170
171 171 // make this go away someday
172 172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
173 173 if (rtype == relocInfo::none)
174 174 emit_long(data);
175 175 else emit_data(data, Relocation::spec_simple(rtype), format);
176 176 }
177 177
178 178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
179 179 assert(imm_operand == 0, "default format must be immediate in this file");
180 180 assert(inst_mark() != NULL, "must be inside InstructionMark");
181 181 if (rspec.type() != relocInfo::none) {
182 182 #ifdef ASSERT
183 183 check_relocation(rspec, format);
184 184 #endif
185 185 // Do not use AbstractAssembler::relocate, which is not intended for
186 186 // embedded words. Instead, relocate to the enclosing instruction.
187 187
188 188 // hack. call32 is too wide for mask so use disp32
189 189 if (format == call32_operand)
190 190 code_section()->relocate(inst_mark(), rspec, disp32_operand);
191 191 else
192 192 code_section()->relocate(inst_mark(), rspec, format);
193 193 }
194 194 emit_long(data);
195 195 }
196 196
197 197 static int encode(Register r) {
198 198 int enc = r->encoding();
199 199 if (enc >= 8) {
200 200 enc -= 8;
201 201 }
202 202 return enc;
203 203 }
204 204
205 205 static int encode(XMMRegister r) {
206 206 int enc = r->encoding();
207 207 if (enc >= 8) {
208 208 enc -= 8;
209 209 }
210 210 return enc;
211 211 }
212 212
213 213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
214 214 assert(dst->has_byte_register(), "must have byte register");
215 215 assert(isByte(op1) && isByte(op2), "wrong opcode");
216 216 assert(isByte(imm8), "not a byte");
217 217 assert((op1 & 0x01) == 0, "should be 8bit operation");
218 218 emit_byte(op1);
219 219 emit_byte(op2 | encode(dst));
220 220 emit_byte(imm8);
221 221 }
222 222
223 223
224 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
225 225 assert(isByte(op1) && isByte(op2), "wrong opcode");
226 226 assert((op1 & 0x01) == 1, "should be 32bit operation");
227 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
228 228 if (is8bit(imm32)) {
229 229 emit_byte(op1 | 0x02); // set sign bit
230 230 emit_byte(op2 | encode(dst));
231 231 emit_byte(imm32 & 0xFF);
232 232 } else {
233 233 emit_byte(op1);
234 234 emit_byte(op2 | encode(dst));
235 235 emit_long(imm32);
236 236 }
237 237 }
238 238
239 239 // immediate-to-memory forms
240 240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
241 241 assert((op1 & 0x01) == 1, "should be 32bit operation");
242 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
243 243 if (is8bit(imm32)) {
244 244 emit_byte(op1 | 0x02); // set sign bit
245 245 emit_operand(rm, adr, 1);
246 246 emit_byte(imm32 & 0xFF);
247 247 } else {
248 248 emit_byte(op1);
249 249 emit_operand(rm, adr, 4);
250 250 emit_long(imm32);
251 251 }
252 252 }
253 253
254 254 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) {
255 255 LP64_ONLY(ShouldNotReachHere());
256 256 assert(isByte(op1) && isByte(op2), "wrong opcode");
257 257 assert((op1 & 0x01) == 1, "should be 32bit operation");
258 258 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
259 259 InstructionMark im(this);
260 260 emit_byte(op1);
261 261 emit_byte(op2 | encode(dst));
262 262 emit_data((intptr_t)obj, relocInfo::oop_type, 0);
263 263 }
264 264
265 265
266 266 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
267 267 assert(isByte(op1) && isByte(op2), "wrong opcode");
268 268 emit_byte(op1);
269 269 emit_byte(op2 | encode(dst) << 3 | encode(src));
270 270 }
271 271
272 272
273 273 void Assembler::emit_operand(Register reg, Register base, Register index,
274 274 Address::ScaleFactor scale, int disp,
275 275 RelocationHolder const& rspec,
276 276 int rip_relative_correction) {
277 277 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
278 278
279 279 // Encode the registers as needed in the fields they are used in
280 280
281 281 int regenc = encode(reg) << 3;
282 282 int indexenc = index->is_valid() ? encode(index) << 3 : 0;
283 283 int baseenc = base->is_valid() ? encode(base) : 0;
284 284
285 285 if (base->is_valid()) {
286 286 if (index->is_valid()) {
287 287 assert(scale != Address::no_scale, "inconsistent address");
288 288 // [base + index*scale + disp]
289 289 if (disp == 0 && rtype == relocInfo::none &&
290 290 base != rbp LP64_ONLY(&& base != r13)) {
291 291 // [base + index*scale]
292 292 // [00 reg 100][ss index base]
293 293 assert(index != rsp, "illegal addressing mode");
294 294 emit_byte(0x04 | regenc);
295 295 emit_byte(scale << 6 | indexenc | baseenc);
296 296 } else if (is8bit(disp) && rtype == relocInfo::none) {
297 297 // [base + index*scale + imm8]
298 298 // [01 reg 100][ss index base] imm8
299 299 assert(index != rsp, "illegal addressing mode");
300 300 emit_byte(0x44 | regenc);
301 301 emit_byte(scale << 6 | indexenc | baseenc);
302 302 emit_byte(disp & 0xFF);
303 303 } else {
304 304 // [base + index*scale + disp32]
305 305 // [10 reg 100][ss index base] disp32
306 306 assert(index != rsp, "illegal addressing mode");
307 307 emit_byte(0x84 | regenc);
308 308 emit_byte(scale << 6 | indexenc | baseenc);
309 309 emit_data(disp, rspec, disp32_operand);
310 310 }
311 311 } else if (base == rsp LP64_ONLY(|| base == r12)) {
312 312 // [rsp + disp]
313 313 if (disp == 0 && rtype == relocInfo::none) {
314 314 // [rsp]
315 315 // [00 reg 100][00 100 100]
316 316 emit_byte(0x04 | regenc);
317 317 emit_byte(0x24);
318 318 } else if (is8bit(disp) && rtype == relocInfo::none) {
319 319 // [rsp + imm8]
320 320 // [01 reg 100][00 100 100] disp8
321 321 emit_byte(0x44 | regenc);
322 322 emit_byte(0x24);
323 323 emit_byte(disp & 0xFF);
324 324 } else {
325 325 // [rsp + imm32]
326 326 // [10 reg 100][00 100 100] disp32
327 327 emit_byte(0x84 | regenc);
328 328 emit_byte(0x24);
329 329 emit_data(disp, rspec, disp32_operand);
330 330 }
331 331 } else {
332 332 // [base + disp]
333 333 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
334 334 if (disp == 0 && rtype == relocInfo::none &&
335 335 base != rbp LP64_ONLY(&& base != r13)) {
336 336 // [base]
337 337 // [00 reg base]
338 338 emit_byte(0x00 | regenc | baseenc);
339 339 } else if (is8bit(disp) && rtype == relocInfo::none) {
340 340 // [base + disp8]
341 341 // [01 reg base] disp8
342 342 emit_byte(0x40 | regenc | baseenc);
343 343 emit_byte(disp & 0xFF);
344 344 } else {
345 345 // [base + disp32]
346 346 // [10 reg base] disp32
347 347 emit_byte(0x80 | regenc | baseenc);
348 348 emit_data(disp, rspec, disp32_operand);
349 349 }
350 350 }
351 351 } else {
352 352 if (index->is_valid()) {
353 353 assert(scale != Address::no_scale, "inconsistent address");
354 354 // [index*scale + disp]
355 355 // [00 reg 100][ss index 101] disp32
356 356 assert(index != rsp, "illegal addressing mode");
357 357 emit_byte(0x04 | regenc);
358 358 emit_byte(scale << 6 | indexenc | 0x05);
359 359 emit_data(disp, rspec, disp32_operand);
360 360 } else if (rtype != relocInfo::none ) {
361 361 // [disp] (64bit) RIP-RELATIVE (32bit) abs
362 362 // [00 000 101] disp32
363 363
364 364 emit_byte(0x05 | regenc);
365 365 // Note that the RIP-rel. correction applies to the generated
366 366 // disp field, but _not_ to the target address in the rspec.
367 367
368 368 // disp was created by converting the target address minus the pc
369 369 // at the start of the instruction. That needs more correction here.
370 370 // intptr_t disp = target - next_ip;
371 371 assert(inst_mark() != NULL, "must be inside InstructionMark");
372 372 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
373 373 int64_t adjusted = disp;
374 374 // Do rip-rel adjustment for 64bit
375 375 LP64_ONLY(adjusted -= (next_ip - inst_mark()));
376 376 assert(is_simm32(adjusted),
377 377 "must be 32bit offset (RIP relative address)");
378 378 emit_data((int32_t) adjusted, rspec, disp32_operand);
379 379
380 380 } else {
381 381 // 32bit never did this, did everything as the rip-rel/disp code above
382 382 // [disp] ABSOLUTE
383 383 // [00 reg 100][00 100 101] disp32
384 384 emit_byte(0x04 | regenc);
385 385 emit_byte(0x25);
386 386 emit_data(disp, rspec, disp32_operand);
387 387 }
388 388 }
389 389 }
390 390
391 391 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
392 392 Address::ScaleFactor scale, int disp,
393 393 RelocationHolder const& rspec) {
394 394 emit_operand((Register)reg, base, index, scale, disp, rspec);
395 395 }
396 396
397 397 // Secret local extension to Assembler::WhichOperand:
398 398 #define end_pc_operand (_WhichOperand_limit)
399 399
400 400 address Assembler::locate_operand(address inst, WhichOperand which) {
401 401 // Decode the given instruction, and return the address of
402 402 // an embedded 32-bit operand word.
403 403
404 404 // If "which" is disp32_operand, selects the displacement portion
405 405 // of an effective address specifier.
406 406 // If "which" is imm64_operand, selects the trailing immediate constant.
407 407 // If "which" is call32_operand, selects the displacement of a call or jump.
408 408 // Caller is responsible for ensuring that there is such an operand,
409 409 // and that it is 32/64 bits wide.
410 410
411 411 // If "which" is end_pc_operand, find the end of the instruction.
412 412
413 413 address ip = inst;
414 414 bool is_64bit = false;
415 415
416 416 debug_only(bool has_disp32 = false);
417 417 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
418 418
419 419 again_after_prefix:
420 420 switch (0xFF & *ip++) {
421 421
422 422 // These convenience macros generate groups of "case" labels for the switch.
423 423 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
424 424 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
425 425 case (x)+4: case (x)+5: case (x)+6: case (x)+7
426 426 #define REP16(x) REP8((x)+0): \
427 427 case REP8((x)+8)
428 428
429 429 case CS_segment:
430 430 case SS_segment:
431 431 case DS_segment:
432 432 case ES_segment:
433 433 case FS_segment:
434 434 case GS_segment:
435 435 // Seems dubious
436 436 LP64_ONLY(assert(false, "shouldn't have that prefix"));
437 437 assert(ip == inst+1, "only one prefix allowed");
438 438 goto again_after_prefix;
439 439
440 440 case 0x67:
441 441 case REX:
442 442 case REX_B:
443 443 case REX_X:
444 444 case REX_XB:
445 445 case REX_R:
446 446 case REX_RB:
447 447 case REX_RX:
448 448 case REX_RXB:
449 449 NOT_LP64(assert(false, "64bit prefixes"));
450 450 goto again_after_prefix;
451 451
452 452 case REX_W:
453 453 case REX_WB:
454 454 case REX_WX:
455 455 case REX_WXB:
456 456 case REX_WR:
457 457 case REX_WRB:
458 458 case REX_WRX:
459 459 case REX_WRXB:
460 460 NOT_LP64(assert(false, "64bit prefixes"));
461 461 is_64bit = true;
462 462 goto again_after_prefix;
463 463
464 464 case 0xFF: // pushq a; decl a; incl a; call a; jmp a
465 465 case 0x88: // movb a, r
466 466 case 0x89: // movl a, r
467 467 case 0x8A: // movb r, a
468 468 case 0x8B: // movl r, a
469 469 case 0x8F: // popl a
470 470 debug_only(has_disp32 = true);
471 471 break;
472 472
473 473 case 0x68: // pushq #32
474 474 if (which == end_pc_operand) {
475 475 return ip + 4;
476 476 }
477 477 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
478 478 return ip; // not produced by emit_operand
479 479
480 480 case 0x66: // movw ... (size prefix)
481 481 again_after_size_prefix2:
482 482 switch (0xFF & *ip++) {
483 483 case REX:
484 484 case REX_B:
485 485 case REX_X:
486 486 case REX_XB:
487 487 case REX_R:
488 488 case REX_RB:
489 489 case REX_RX:
490 490 case REX_RXB:
491 491 case REX_W:
492 492 case REX_WB:
493 493 case REX_WX:
494 494 case REX_WXB:
495 495 case REX_WR:
496 496 case REX_WRB:
497 497 case REX_WRX:
498 498 case REX_WRXB:
499 499 NOT_LP64(assert(false, "64bit prefix found"));
500 500 goto again_after_size_prefix2;
501 501 case 0x8B: // movw r, a
502 502 case 0x89: // movw a, r
503 503 debug_only(has_disp32 = true);
504 504 break;
505 505 case 0xC7: // movw a, #16
506 506 debug_only(has_disp32 = true);
507 507 tail_size = 2; // the imm16
508 508 break;
509 509 case 0x0F: // several SSE/SSE2 variants
510 510 ip--; // reparse the 0x0F
511 511 goto again_after_prefix;
512 512 default:
513 513 ShouldNotReachHere();
514 514 }
515 515 break;
516 516
517 517 case REP8(0xB8): // movl/q r, #32/#64(oop?)
518 518 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4);
519 519 // these asserts are somewhat nonsensical
520 520 #ifndef _LP64
521 521 assert(which == imm_operand || which == disp32_operand, "");
522 522 #else
523 523 assert((which == call32_operand || which == imm_operand) && is_64bit ||
524 524 which == narrow_oop_operand && !is_64bit, "");
525 525 #endif // _LP64
526 526 return ip;
527 527
528 528 case 0x69: // imul r, a, #32
529 529 case 0xC7: // movl a, #32(oop?)
530 530 tail_size = 4;
531 531 debug_only(has_disp32 = true); // has both kinds of operands!
532 532 break;
533 533
534 534 case 0x0F: // movx..., etc.
535 535 switch (0xFF & *ip++) {
536 536 case 0x12: // movlps
537 537 case 0x28: // movaps
538 538 case 0x2E: // ucomiss
539 539 case 0x2F: // comiss
540 540 case 0x54: // andps
541 541 case 0x55: // andnps
542 542 case 0x56: // orps
543 543 case 0x57: // xorps
544 544 case 0x6E: // movd
545 545 case 0x7E: // movd
546 546 case 0xAE: // ldmxcsr a
547 547 // 64bit side says it these have both operands but that doesn't
548 548 // appear to be true
549 549 debug_only(has_disp32 = true);
550 550 break;
551 551
552 552 case 0xAD: // shrd r, a, %cl
553 553 case 0xAF: // imul r, a
554 554 case 0xBE: // movsbl r, a (movsxb)
555 555 case 0xBF: // movswl r, a (movsxw)
556 556 case 0xB6: // movzbl r, a (movzxb)
557 557 case 0xB7: // movzwl r, a (movzxw)
558 558 case REP16(0x40): // cmovl cc, r, a
559 559 case 0xB0: // cmpxchgb
560 560 case 0xB1: // cmpxchg
561 561 case 0xC1: // xaddl
562 562 case 0xC7: // cmpxchg8
563 563 case REP16(0x90): // setcc a
564 564 debug_only(has_disp32 = true);
565 565 // fall out of the switch to decode the address
566 566 break;
567 567
568 568 case 0xAC: // shrd r, a, #8
569 569 debug_only(has_disp32 = true);
570 570 tail_size = 1; // the imm8
571 571 break;
572 572
573 573 case REP16(0x80): // jcc rdisp32
574 574 if (which == end_pc_operand) return ip + 4;
575 575 assert(which == call32_operand, "jcc has no disp32 or imm");
576 576 return ip;
577 577 default:
578 578 ShouldNotReachHere();
579 579 }
580 580 break;
581 581
582 582 case 0x81: // addl a, #32; addl r, #32
583 583 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
584 584 // on 32bit in the case of cmpl, the imm might be an oop
585 585 tail_size = 4;
586 586 debug_only(has_disp32 = true); // has both kinds of operands!
587 587 break;
588 588
589 589 case 0x83: // addl a, #8; addl r, #8
590 590 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
591 591 debug_only(has_disp32 = true); // has both kinds of operands!
592 592 tail_size = 1;
593 593 break;
594 594
595 595 case 0x9B:
596 596 switch (0xFF & *ip++) {
597 597 case 0xD9: // fnstcw a
598 598 debug_only(has_disp32 = true);
599 599 break;
600 600 default:
601 601 ShouldNotReachHere();
602 602 }
603 603 break;
604 604
605 605 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
606 606 case REP4(0x10): // adc...
607 607 case REP4(0x20): // and...
608 608 case REP4(0x30): // xor...
609 609 case REP4(0x08): // or...
610 610 case REP4(0x18): // sbb...
611 611 case REP4(0x28): // sub...
612 612 case 0xF7: // mull a
613 613 case 0x8D: // lea r, a
614 614 case 0x87: // xchg r, a
615 615 case REP4(0x38): // cmp...
616 616 case 0x85: // test r, a
617 617 debug_only(has_disp32 = true); // has both kinds of operands!
618 618 break;
619 619
620 620 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
621 621 case 0xC6: // movb a, #8
622 622 case 0x80: // cmpb a, #8
623 623 case 0x6B: // imul r, a, #8
624 624 debug_only(has_disp32 = true); // has both kinds of operands!
625 625 tail_size = 1; // the imm8
626 626 break;
627 627
628 628 case 0xE8: // call rdisp32
629 629 case 0xE9: // jmp rdisp32
630 630 if (which == end_pc_operand) return ip + 4;
631 631 assert(which == call32_operand, "call has no disp32 or imm");
632 632 return ip;
633 633
634 634 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
635 635 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
636 636 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
637 637 case 0xDD: // fld_d a; fst_d a; fstp_d a
638 638 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
639 639 case 0xDF: // fild_d a; fistp_d a
640 640 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
641 641 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
642 642 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
643 643 debug_only(has_disp32 = true);
644 644 break;
645 645
646 646 case 0xF0: // Lock
647 647 assert(os::is_MP(), "only on MP");
648 648 goto again_after_prefix;
649 649
650 650 case 0xF3: // For SSE
651 651 case 0xF2: // For SSE2
652 652 switch (0xFF & *ip++) {
653 653 case REX:
654 654 case REX_B:
655 655 case REX_X:
656 656 case REX_XB:
657 657 case REX_R:
658 658 case REX_RB:
659 659 case REX_RX:
660 660 case REX_RXB:
661 661 case REX_W:
662 662 case REX_WB:
663 663 case REX_WX:
664 664 case REX_WXB:
665 665 case REX_WR:
666 666 case REX_WRB:
667 667 case REX_WRX:
668 668 case REX_WRXB:
669 669 NOT_LP64(assert(false, "found 64bit prefix"));
670 670 ip++;
671 671 default:
672 672 ip++;
673 673 }
674 674 debug_only(has_disp32 = true); // has both kinds of operands!
675 675 break;
676 676
677 677 default:
678 678 ShouldNotReachHere();
679 679
680 680 #undef REP8
681 681 #undef REP16
682 682 }
683 683
684 684 assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
685 685 #ifdef _LP64
686 686 assert(which != imm_operand, "instruction is not a movq reg, imm64");
687 687 #else
688 688 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
689 689 assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
690 690 #endif // LP64
691 691 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
692 692
693 693 // parse the output of emit_operand
694 694 int op2 = 0xFF & *ip++;
695 695 int base = op2 & 0x07;
696 696 int op3 = -1;
697 697 const int b100 = 4;
698 698 const int b101 = 5;
699 699 if (base == b100 && (op2 >> 6) != 3) {
700 700 op3 = 0xFF & *ip++;
701 701 base = op3 & 0x07; // refetch the base
702 702 }
703 703 // now ip points at the disp (if any)
704 704
705 705 switch (op2 >> 6) {
706 706 case 0:
707 707 // [00 reg 100][ss index base]
708 708 // [00 reg 100][00 100 esp]
709 709 // [00 reg base]
710 710 // [00 reg 100][ss index 101][disp32]
711 711 // [00 reg 101] [disp32]
712 712
713 713 if (base == b101) {
714 714 if (which == disp32_operand)
715 715 return ip; // caller wants the disp32
716 716 ip += 4; // skip the disp32
717 717 }
718 718 break;
719 719
720 720 case 1:
721 721 // [01 reg 100][ss index base][disp8]
722 722 // [01 reg 100][00 100 esp][disp8]
723 723 // [01 reg base] [disp8]
724 724 ip += 1; // skip the disp8
725 725 break;
726 726
727 727 case 2:
728 728 // [10 reg 100][ss index base][disp32]
729 729 // [10 reg 100][00 100 esp][disp32]
730 730 // [10 reg base] [disp32]
731 731 if (which == disp32_operand)
732 732 return ip; // caller wants the disp32
733 733 ip += 4; // skip the disp32
734 734 break;
735 735
736 736 case 3:
737 737 // [11 reg base] (not a memory addressing mode)
738 738 break;
739 739 }
740 740
741 741 if (which == end_pc_operand) {
742 742 return ip + tail_size;
743 743 }
744 744
745 745 #ifdef _LP64
746 746 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
747 747 #else
748 748 assert(which == imm_operand, "instruction has only an imm field");
749 749 #endif // LP64
750 750 return ip;
751 751 }
752 752
753 753 address Assembler::locate_next_instruction(address inst) {
754 754 // Secretly share code with locate_operand:
755 755 return locate_operand(inst, end_pc_operand);
756 756 }
757 757
758 758
759 759 #ifdef ASSERT
760 760 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
761 761 address inst = inst_mark();
762 762 assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
763 763 address opnd;
764 764
765 765 Relocation* r = rspec.reloc();
766 766 if (r->type() == relocInfo::none) {
767 767 return;
768 768 } else if (r->is_call() || format == call32_operand) {
769 769 // assert(format == imm32_operand, "cannot specify a nonzero format");
770 770 opnd = locate_operand(inst, call32_operand);
771 771 } else if (r->is_data()) {
772 772 assert(format == imm_operand || format == disp32_operand
773 773 LP64_ONLY(|| format == narrow_oop_operand), "format ok");
774 774 opnd = locate_operand(inst, (WhichOperand)format);
775 775 } else {
776 776 assert(format == imm_operand, "cannot specify a format");
777 777 return;
778 778 }
779 779 assert(opnd == pc(), "must put operand where relocs can find it");
780 780 }
781 781 #endif // ASSERT
782 782
783 783 void Assembler::emit_operand32(Register reg, Address adr) {
784 784 assert(reg->encoding() < 8, "no extended registers");
785 785 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
786 786 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
787 787 adr._rspec);
788 788 }
789 789
790 790 void Assembler::emit_operand(Register reg, Address adr,
791 791 int rip_relative_correction) {
792 792 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
793 793 adr._rspec,
794 794 rip_relative_correction);
795 795 }
796 796
797 797 void Assembler::emit_operand(XMMRegister reg, Address adr) {
798 798 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
799 799 adr._rspec);
800 800 }
801 801
802 802 // MMX operations
803 803 void Assembler::emit_operand(MMXRegister reg, Address adr) {
804 804 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
805 805 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
806 806 }
807 807
808 808 // work around gcc (3.2.1-7a) bug
809 809 void Assembler::emit_operand(Address adr, MMXRegister reg) {
810 810 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
811 811 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
812 812 }
813 813
814 814
815 815 void Assembler::emit_farith(int b1, int b2, int i) {
816 816 assert(isByte(b1) && isByte(b2), "wrong opcode");
817 817 assert(0 <= i && i < 8, "illegal stack offset");
818 818 emit_byte(b1);
819 819 emit_byte(b2 + i);
820 820 }
821 821
822 822
823 823 // Now the Assembler instructions (identical for 32/64 bits)
824 824
825 825 void Assembler::adcl(Address dst, int32_t imm32) {
826 826 InstructionMark im(this);
827 827 prefix(dst);
828 828 emit_arith_operand(0x81, rdx, dst, imm32);
829 829 }
830 830
831 831 void Assembler::adcl(Address dst, Register src) {
832 832 InstructionMark im(this);
833 833 prefix(dst, src);
834 834 emit_byte(0x11);
835 835 emit_operand(src, dst);
836 836 }
837 837
838 838 void Assembler::adcl(Register dst, int32_t imm32) {
839 839 prefix(dst);
840 840 emit_arith(0x81, 0xD0, dst, imm32);
841 841 }
842 842
843 843 void Assembler::adcl(Register dst, Address src) {
844 844 InstructionMark im(this);
845 845 prefix(src, dst);
846 846 emit_byte(0x13);
847 847 emit_operand(dst, src);
848 848 }
849 849
850 850 void Assembler::adcl(Register dst, Register src) {
851 851 (void) prefix_and_encode(dst->encoding(), src->encoding());
852 852 emit_arith(0x13, 0xC0, dst, src);
853 853 }
854 854
855 855 void Assembler::addl(Address dst, int32_t imm32) {
856 856 InstructionMark im(this);
857 857 prefix(dst);
858 858 emit_arith_operand(0x81, rax, dst, imm32);
859 859 }
860 860
861 861 void Assembler::addl(Address dst, Register src) {
862 862 InstructionMark im(this);
863 863 prefix(dst, src);
864 864 emit_byte(0x01);
865 865 emit_operand(src, dst);
866 866 }
867 867
868 868 void Assembler::addl(Register dst, int32_t imm32) {
869 869 prefix(dst);
870 870 emit_arith(0x81, 0xC0, dst, imm32);
871 871 }
872 872
873 873 void Assembler::addl(Register dst, Address src) {
874 874 InstructionMark im(this);
875 875 prefix(src, dst);
876 876 emit_byte(0x03);
877 877 emit_operand(dst, src);
878 878 }
879 879
880 880 void Assembler::addl(Register dst, Register src) {
881 881 (void) prefix_and_encode(dst->encoding(), src->encoding());
882 882 emit_arith(0x03, 0xC0, dst, src);
883 883 }
884 884
885 885 void Assembler::addr_nop_4() {
886 886 // 4 bytes: NOP DWORD PTR [EAX+0]
887 887 emit_byte(0x0F);
888 888 emit_byte(0x1F);
889 889 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
890 890 emit_byte(0); // 8-bits offset (1 byte)
891 891 }
892 892
893 893 void Assembler::addr_nop_5() {
894 894 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
895 895 emit_byte(0x0F);
896 896 emit_byte(0x1F);
897 897 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
898 898 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
899 899 emit_byte(0); // 8-bits offset (1 byte)
900 900 }
901 901
902 902 void Assembler::addr_nop_7() {
903 903 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
904 904 emit_byte(0x0F);
905 905 emit_byte(0x1F);
906 906 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
907 907 emit_long(0); // 32-bits offset (4 bytes)
908 908 }
909 909
910 910 void Assembler::addr_nop_8() {
911 911 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
912 912 emit_byte(0x0F);
913 913 emit_byte(0x1F);
914 914 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
915 915 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
916 916 emit_long(0); // 32-bits offset (4 bytes)
917 917 }
918 918
919 919 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
920 920 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
921 921 emit_byte(0xF2);
922 922 int encode = prefix_and_encode(dst->encoding(), src->encoding());
923 923 emit_byte(0x0F);
924 924 emit_byte(0x58);
925 925 emit_byte(0xC0 | encode);
926 926 }
927 927
928 928 void Assembler::addsd(XMMRegister dst, Address src) {
929 929 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
930 930 InstructionMark im(this);
931 931 emit_byte(0xF2);
932 932 prefix(src, dst);
933 933 emit_byte(0x0F);
934 934 emit_byte(0x58);
935 935 emit_operand(dst, src);
936 936 }
937 937
938 938 void Assembler::addss(XMMRegister dst, XMMRegister src) {
939 939 NOT_LP64(assert(VM_Version::supports_sse(), ""));
940 940 emit_byte(0xF3);
941 941 int encode = prefix_and_encode(dst->encoding(), src->encoding());
942 942 emit_byte(0x0F);
943 943 emit_byte(0x58);
944 944 emit_byte(0xC0 | encode);
945 945 }
946 946
947 947 void Assembler::addss(XMMRegister dst, Address src) {
948 948 NOT_LP64(assert(VM_Version::supports_sse(), ""));
949 949 InstructionMark im(this);
950 950 emit_byte(0xF3);
951 951 prefix(src, dst);
952 952 emit_byte(0x0F);
953 953 emit_byte(0x58);
954 954 emit_operand(dst, src);
955 955 }
956 956
957 957 void Assembler::andl(Register dst, int32_t imm32) {
958 958 prefix(dst);
959 959 emit_arith(0x81, 0xE0, dst, imm32);
960 960 }
961 961
962 962 void Assembler::andl(Register dst, Address src) {
963 963 InstructionMark im(this);
964 964 prefix(src, dst);
965 965 emit_byte(0x23);
966 966 emit_operand(dst, src);
967 967 }
968 968
969 969 void Assembler::andl(Register dst, Register src) {
970 970 (void) prefix_and_encode(dst->encoding(), src->encoding());
971 971 emit_arith(0x23, 0xC0, dst, src);
972 972 }
973 973
974 974 void Assembler::andpd(XMMRegister dst, Address src) {
975 975 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
976 976 InstructionMark im(this);
977 977 emit_byte(0x66);
978 978 prefix(src, dst);
979 979 emit_byte(0x0F);
980 980 emit_byte(0x54);
981 981 emit_operand(dst, src);
982 982 }
983 983
984 984 void Assembler::bsfl(Register dst, Register src) {
985 985 int encode = prefix_and_encode(dst->encoding(), src->encoding());
986 986 emit_byte(0x0F);
987 987 emit_byte(0xBC);
988 988 emit_byte(0xC0 | encode);
989 989 }
990 990
991 991 void Assembler::bsrl(Register dst, Register src) {
992 992 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
993 993 int encode = prefix_and_encode(dst->encoding(), src->encoding());
994 994 emit_byte(0x0F);
995 995 emit_byte(0xBD);
996 996 emit_byte(0xC0 | encode);
997 997 }
998 998
999 999 void Assembler::bswapl(Register reg) { // bswap
1000 1000 int encode = prefix_and_encode(reg->encoding());
1001 1001 emit_byte(0x0F);
1002 1002 emit_byte(0xC8 | encode);
1003 1003 }
1004 1004
1005 1005 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1006 1006 // suspect disp32 is always good
1007 1007 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1008 1008
1009 1009 if (L.is_bound()) {
1010 1010 const int long_size = 5;
1011 1011 int offs = (int)( target(L) - pc() );
1012 1012 assert(offs <= 0, "assembler error");
1013 1013 InstructionMark im(this);
1014 1014 // 1110 1000 #32-bit disp
1015 1015 emit_byte(0xE8);
1016 1016 emit_data(offs - long_size, rtype, operand);
1017 1017 } else {
1018 1018 InstructionMark im(this);
1019 1019 // 1110 1000 #32-bit disp
1020 1020 L.add_patch_at(code(), locator());
1021 1021
1022 1022 emit_byte(0xE8);
1023 1023 emit_data(int(0), rtype, operand);
1024 1024 }
1025 1025 }
1026 1026
1027 1027 void Assembler::call(Register dst) {
1028 1028 // This was originally using a 32bit register encoding
1029 1029 // and surely we want 64bit!
1030 1030 // this is a 32bit encoding but in 64bit mode the default
1031 1031 // operand size is 64bit so there is no need for the
1032 1032 // wide prefix. So prefix only happens if we use the
1033 1033 // new registers. Much like push/pop.
1034 1034 int x = offset();
1035 1035 // this may be true but dbx disassembles it as if it
1036 1036 // were 32bits...
1037 1037 // int encode = prefix_and_encode(dst->encoding());
1038 1038 // if (offset() != x) assert(dst->encoding() >= 8, "what?");
1039 1039 int encode = prefixq_and_encode(dst->encoding());
1040 1040
1041 1041 emit_byte(0xFF);
1042 1042 emit_byte(0xD0 | encode);
1043 1043 }
1044 1044
1045 1045
1046 1046 void Assembler::call(Address adr) {
1047 1047 InstructionMark im(this);
1048 1048 prefix(adr);
1049 1049 emit_byte(0xFF);
1050 1050 emit_operand(rdx, adr);
1051 1051 }
1052 1052
1053 1053 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1054 1054 assert(entry != NULL, "call most probably wrong");
1055 1055 InstructionMark im(this);
1056 1056 emit_byte(0xE8);
1057 1057 intptr_t disp = entry - (_code_pos + sizeof(int32_t));
1058 1058 assert(is_simm32(disp), "must be 32bit offset (call2)");
1059 1059 // Technically, should use call32_operand, but this format is
1060 1060 // implied by the fact that we're emitting a call instruction.
1061 1061
1062 1062 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1063 1063 emit_data((int) disp, rspec, operand);
1064 1064 }
1065 1065
1066 1066 void Assembler::cdql() {
1067 1067 emit_byte(0x99);
1068 1068 }
1069 1069
1070 1070 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1071 1071 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1072 1072 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1073 1073 emit_byte(0x0F);
1074 1074 emit_byte(0x40 | cc);
1075 1075 emit_byte(0xC0 | encode);
1076 1076 }
1077 1077
1078 1078
1079 1079 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1080 1080 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1081 1081 prefix(src, dst);
1082 1082 emit_byte(0x0F);
1083 1083 emit_byte(0x40 | cc);
1084 1084 emit_operand(dst, src);
1085 1085 }
1086 1086
1087 1087 void Assembler::cmpb(Address dst, int imm8) {
1088 1088 InstructionMark im(this);
1089 1089 prefix(dst);
1090 1090 emit_byte(0x80);
1091 1091 emit_operand(rdi, dst, 1);
1092 1092 emit_byte(imm8);
1093 1093 }
1094 1094
1095 1095 void Assembler::cmpl(Address dst, int32_t imm32) {
1096 1096 InstructionMark im(this);
1097 1097 prefix(dst);
1098 1098 emit_byte(0x81);
1099 1099 emit_operand(rdi, dst, 4);
1100 1100 emit_long(imm32);
1101 1101 }
1102 1102
1103 1103 void Assembler::cmpl(Register dst, int32_t imm32) {
1104 1104 prefix(dst);
1105 1105 emit_arith(0x81, 0xF8, dst, imm32);
1106 1106 }
1107 1107
1108 1108 void Assembler::cmpl(Register dst, Register src) {
1109 1109 (void) prefix_and_encode(dst->encoding(), src->encoding());
1110 1110 emit_arith(0x3B, 0xC0, dst, src);
1111 1111 }
1112 1112
1113 1113
1114 1114 void Assembler::cmpl(Register dst, Address src) {
1115 1115 InstructionMark im(this);
1116 1116 prefix(src, dst);
1117 1117 emit_byte(0x3B);
1118 1118 emit_operand(dst, src);
1119 1119 }
1120 1120
1121 1121 void Assembler::cmpw(Address dst, int imm16) {
1122 1122 InstructionMark im(this);
1123 1123 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1124 1124 emit_byte(0x66);
1125 1125 emit_byte(0x81);
1126 1126 emit_operand(rdi, dst, 2);
1127 1127 emit_word(imm16);
1128 1128 }
1129 1129
1130 1130 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1131 1131 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1132 1132 // The ZF is set if the compared values were equal, and cleared otherwise.
1133 1133 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1134 1134 if (Atomics & 2) {
1135 1135 // caveat: no instructionmark, so this isn't relocatable.
1136 1136 // Emit a synthetic, non-atomic, CAS equivalent.
1137 1137 // Beware. The synthetic form sets all ICCs, not just ZF.
1138 1138 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r)
1139 1139 cmpl(rax, adr);
1140 1140 movl(rax, adr);
1141 1141 if (reg != rax) {
1142 1142 Label L ;
1143 1143 jcc(Assembler::notEqual, L);
1144 1144 movl(adr, reg);
1145 1145 bind(L);
1146 1146 }
1147 1147 } else {
1148 1148 InstructionMark im(this);
1149 1149 prefix(adr, reg);
1150 1150 emit_byte(0x0F);
1151 1151 emit_byte(0xB1);
1152 1152 emit_operand(reg, adr);
1153 1153 }
1154 1154 }
1155 1155
1156 1156 void Assembler::comisd(XMMRegister dst, Address src) {
1157 1157 // NOTE: dbx seems to decode this as comiss even though the
1158 1158 // 0x66 is there. Strangly ucomisd comes out correct
1159 1159 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1160 1160 emit_byte(0x66);
1161 1161 comiss(dst, src);
1162 1162 }
1163 1163
1164 1164 void Assembler::comiss(XMMRegister dst, Address src) {
1165 1165 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1166 1166
1167 1167 InstructionMark im(this);
1168 1168 prefix(src, dst);
1169 1169 emit_byte(0x0F);
1170 1170 emit_byte(0x2F);
1171 1171 emit_operand(dst, src);
1172 1172 }
1173 1173
1174 1174 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1175 1175 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1176 1176 emit_byte(0xF3);
1177 1177 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1178 1178 emit_byte(0x0F);
1179 1179 emit_byte(0xE6);
1180 1180 emit_byte(0xC0 | encode);
1181 1181 }
1182 1182
1183 1183 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1184 1184 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1185 1185 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1186 1186 emit_byte(0x0F);
1187 1187 emit_byte(0x5B);
1188 1188 emit_byte(0xC0 | encode);
1189 1189 }
1190 1190
1191 1191 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1192 1192 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1193 1193 emit_byte(0xF2);
1194 1194 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1195 1195 emit_byte(0x0F);
1196 1196 emit_byte(0x5A);
1197 1197 emit_byte(0xC0 | encode);
1198 1198 }
1199 1199
1200 1200 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1201 1201 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1202 1202 emit_byte(0xF2);
1203 1203 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1204 1204 emit_byte(0x0F);
1205 1205 emit_byte(0x2A);
1206 1206 emit_byte(0xC0 | encode);
1207 1207 }
1208 1208
1209 1209 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1210 1210 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1211 1211 emit_byte(0xF3);
1212 1212 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1213 1213 emit_byte(0x0F);
1214 1214 emit_byte(0x2A);
1215 1215 emit_byte(0xC0 | encode);
1216 1216 }
1217 1217
1218 1218 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1219 1219 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1220 1220 emit_byte(0xF3);
1221 1221 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1222 1222 emit_byte(0x0F);
1223 1223 emit_byte(0x5A);
1224 1224 emit_byte(0xC0 | encode);
1225 1225 }
1226 1226
1227 1227 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1228 1228 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1229 1229 emit_byte(0xF2);
1230 1230 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1231 1231 emit_byte(0x0F);
1232 1232 emit_byte(0x2C);
1233 1233 emit_byte(0xC0 | encode);
1234 1234 }
1235 1235
1236 1236 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1237 1237 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1238 1238 emit_byte(0xF3);
1239 1239 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1240 1240 emit_byte(0x0F);
1241 1241 emit_byte(0x2C);
1242 1242 emit_byte(0xC0 | encode);
1243 1243 }
1244 1244
1245 1245 void Assembler::decl(Address dst) {
1246 1246 // Don't use it directly. Use MacroAssembler::decrement() instead.
1247 1247 InstructionMark im(this);
1248 1248 prefix(dst);
1249 1249 emit_byte(0xFF);
1250 1250 emit_operand(rcx, dst);
1251 1251 }
1252 1252
1253 1253 void Assembler::divsd(XMMRegister dst, Address src) {
1254 1254 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1255 1255 InstructionMark im(this);
1256 1256 emit_byte(0xF2);
1257 1257 prefix(src, dst);
1258 1258 emit_byte(0x0F);
1259 1259 emit_byte(0x5E);
1260 1260 emit_operand(dst, src);
1261 1261 }
1262 1262
1263 1263 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1264 1264 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1265 1265 emit_byte(0xF2);
1266 1266 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1267 1267 emit_byte(0x0F);
1268 1268 emit_byte(0x5E);
1269 1269 emit_byte(0xC0 | encode);
1270 1270 }
1271 1271
1272 1272 void Assembler::divss(XMMRegister dst, Address src) {
1273 1273 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1274 1274 InstructionMark im(this);
1275 1275 emit_byte(0xF3);
1276 1276 prefix(src, dst);
1277 1277 emit_byte(0x0F);
1278 1278 emit_byte(0x5E);
1279 1279 emit_operand(dst, src);
1280 1280 }
1281 1281
1282 1282 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1283 1283 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1284 1284 emit_byte(0xF3);
1285 1285 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1286 1286 emit_byte(0x0F);
1287 1287 emit_byte(0x5E);
1288 1288 emit_byte(0xC0 | encode);
1289 1289 }
1290 1290
1291 1291 void Assembler::emms() {
1292 1292 NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1293 1293 emit_byte(0x0F);
1294 1294 emit_byte(0x77);
1295 1295 }
1296 1296
1297 1297 void Assembler::hlt() {
1298 1298 emit_byte(0xF4);
1299 1299 }
1300 1300
1301 1301 void Assembler::idivl(Register src) {
1302 1302 int encode = prefix_and_encode(src->encoding());
1303 1303 emit_byte(0xF7);
1304 1304 emit_byte(0xF8 | encode);
1305 1305 }
1306 1306
1307 1307 void Assembler::divl(Register src) { // Unsigned
1308 1308 int encode = prefix_and_encode(src->encoding());
1309 1309 emit_byte(0xF7);
1310 1310 emit_byte(0xF0 | encode);
1311 1311 }
1312 1312
1313 1313 void Assembler::imull(Register dst, Register src) {
1314 1314 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1315 1315 emit_byte(0x0F);
1316 1316 emit_byte(0xAF);
1317 1317 emit_byte(0xC0 | encode);
1318 1318 }
1319 1319
1320 1320
1321 1321 void Assembler::imull(Register dst, Register src, int value) {
1322 1322 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1323 1323 if (is8bit(value)) {
1324 1324 emit_byte(0x6B);
1325 1325 emit_byte(0xC0 | encode);
1326 1326 emit_byte(value & 0xFF);
1327 1327 } else {
1328 1328 emit_byte(0x69);
1329 1329 emit_byte(0xC0 | encode);
1330 1330 emit_long(value);
1331 1331 }
1332 1332 }
1333 1333
1334 1334 void Assembler::incl(Address dst) {
1335 1335 // Don't use it directly. Use MacroAssembler::increment() instead.
1336 1336 InstructionMark im(this);
1337 1337 prefix(dst);
1338 1338 emit_byte(0xFF);
1339 1339 emit_operand(rax, dst);
1340 1340 }
1341 1341
1342 1342 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
1343 1343 InstructionMark im(this);
1344 1344 relocate(rtype);
1345 1345 assert((0 <= cc) && (cc < 16), "illegal cc");
1346 1346 if (L.is_bound()) {
1347 1347 address dst = target(L);
1348 1348 assert(dst != NULL, "jcc most probably wrong");
1349 1349
1350 1350 const int short_size = 2;
1351 1351 const int long_size = 6;
1352 1352 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
1353 1353 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1354 1354 // 0111 tttn #8-bit disp
1355 1355 emit_byte(0x70 | cc);
1356 1356 emit_byte((offs - short_size) & 0xFF);
1357 1357 } else {
1358 1358 // 0000 1111 1000 tttn #32-bit disp
1359 1359 assert(is_simm32(offs - long_size),
1360 1360 "must be 32bit offset (call4)");
1361 1361 emit_byte(0x0F);
1362 1362 emit_byte(0x80 | cc);
1363 1363 emit_long(offs - long_size);
1364 1364 }
1365 1365 } else {
1366 1366 // Note: could eliminate cond. jumps to this jump if condition
1367 1367 // is the same however, seems to be rather unlikely case.
1368 1368 // Note: use jccb() if label to be bound is very close to get
1369 1369 // an 8-bit displacement
1370 1370 L.add_patch_at(code(), locator());
1371 1371 emit_byte(0x0F);
1372 1372 emit_byte(0x80 | cc);
1373 1373 emit_long(0);
1374 1374 }
1375 1375 }
1376 1376
1377 1377 void Assembler::jccb(Condition cc, Label& L) {
1378 1378 if (L.is_bound()) {
1379 1379 const int short_size = 2;
1380 1380 address entry = target(L);
1381 1381 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
1382 1382 "Dispacement too large for a short jmp");
1383 1383 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
1384 1384 // 0111 tttn #8-bit disp
1385 1385 emit_byte(0x70 | cc);
1386 1386 emit_byte((offs - short_size) & 0xFF);
1387 1387 } else {
1388 1388 InstructionMark im(this);
1389 1389 L.add_patch_at(code(), locator());
1390 1390 emit_byte(0x70 | cc);
1391 1391 emit_byte(0);
1392 1392 }
1393 1393 }
1394 1394
1395 1395 void Assembler::jmp(Address adr) {
1396 1396 InstructionMark im(this);
1397 1397 prefix(adr);
1398 1398 emit_byte(0xFF);
1399 1399 emit_operand(rsp, adr);
1400 1400 }
1401 1401
1402 1402 void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
1403 1403 if (L.is_bound()) {
1404 1404 address entry = target(L);
1405 1405 assert(entry != NULL, "jmp most probably wrong");
1406 1406 InstructionMark im(this);
1407 1407 const int short_size = 2;
1408 1408 const int long_size = 5;
1409 1409 intptr_t offs = entry - _code_pos;
1410 1410 if (rtype == relocInfo::none && is8bit(offs - short_size)) {
1411 1411 emit_byte(0xEB);
1412 1412 emit_byte((offs - short_size) & 0xFF);
1413 1413 } else {
1414 1414 emit_byte(0xE9);
1415 1415 emit_long(offs - long_size);
1416 1416 }
1417 1417 } else {
1418 1418 // By default, forward jumps are always 32-bit displacements, since
1419 1419 // we can't yet know where the label will be bound. If you're sure that
1420 1420 // the forward jump will not run beyond 256 bytes, use jmpb to
1421 1421 // force an 8-bit displacement.
1422 1422 InstructionMark im(this);
1423 1423 relocate(rtype);
1424 1424 L.add_patch_at(code(), locator());
1425 1425 emit_byte(0xE9);
1426 1426 emit_long(0);
1427 1427 }
1428 1428 }
1429 1429
1430 1430 void Assembler::jmp(Register entry) {
1431 1431 int encode = prefix_and_encode(entry->encoding());
1432 1432 emit_byte(0xFF);
1433 1433 emit_byte(0xE0 | encode);
1434 1434 }
1435 1435
1436 1436 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1437 1437 InstructionMark im(this);
1438 1438 emit_byte(0xE9);
1439 1439 assert(dest != NULL, "must have a target");
1440 1440 intptr_t disp = dest - (_code_pos + sizeof(int32_t));
1441 1441 assert(is_simm32(disp), "must be 32bit offset (jmp)");
1442 1442 emit_data(disp, rspec.reloc(), call32_operand);
1443 1443 }
1444 1444
1445 1445 void Assembler::jmpb(Label& L) {
1446 1446 if (L.is_bound()) {
1447 1447 const int short_size = 2;
1448 1448 address entry = target(L);
1449 1449 assert(is8bit((entry - _code_pos) + short_size),
1450 1450 "Dispacement too large for a short jmp");
1451 1451 assert(entry != NULL, "jmp most probably wrong");
1452 1452 intptr_t offs = entry - _code_pos;
1453 1453 emit_byte(0xEB);
1454 1454 emit_byte((offs - short_size) & 0xFF);
1455 1455 } else {
1456 1456 InstructionMark im(this);
1457 1457 L.add_patch_at(code(), locator());
1458 1458 emit_byte(0xEB);
1459 1459 emit_byte(0);
1460 1460 }
1461 1461 }
1462 1462
1463 1463 void Assembler::ldmxcsr( Address src) {
1464 1464 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1465 1465 InstructionMark im(this);
1466 1466 prefix(src);
1467 1467 emit_byte(0x0F);
1468 1468 emit_byte(0xAE);
1469 1469 emit_operand(as_Register(2), src);
1470 1470 }
1471 1471
1472 1472 void Assembler::leal(Register dst, Address src) {
1473 1473 InstructionMark im(this);
1474 1474 #ifdef _LP64
1475 1475 emit_byte(0x67); // addr32
1476 1476 prefix(src, dst);
1477 1477 #endif // LP64
1478 1478 emit_byte(0x8D);
1479 1479 emit_operand(dst, src);
1480 1480 }
1481 1481
1482 1482 void Assembler::lock() {
1483 1483 if (Atomics & 1) {
1484 1484 // Emit either nothing, a NOP, or a NOP: prefix
1485 1485 emit_byte(0x90) ;
1486 1486 } else {
1487 1487 emit_byte(0xF0);
1488 1488 }
1489 1489 }
1490 1490
1491 1491 void Assembler::lzcntl(Register dst, Register src) {
1492 1492 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1493 1493 emit_byte(0xF3);
1494 1494 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1495 1495 emit_byte(0x0F);
1496 1496 emit_byte(0xBD);
1497 1497 emit_byte(0xC0 | encode);
1498 1498 }
1499 1499
1500 1500 // Emit mfence instruction
1501 1501 void Assembler::mfence() {
1502 1502 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1503 1503 emit_byte( 0x0F );
1504 1504 emit_byte( 0xAE );
1505 1505 emit_byte( 0xF0 );
1506 1506 }
1507 1507
1508 1508 void Assembler::mov(Register dst, Register src) {
1509 1509 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1510 1510 }
1511 1511
1512 1512 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1513 1513 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1514 1514 int dstenc = dst->encoding();
1515 1515 int srcenc = src->encoding();
1516 1516 emit_byte(0x66);
1517 1517 if (dstenc < 8) {
1518 1518 if (srcenc >= 8) {
1519 1519 prefix(REX_B);
1520 1520 srcenc -= 8;
1521 1521 }
1522 1522 } else {
1523 1523 if (srcenc < 8) {
1524 1524 prefix(REX_R);
1525 1525 } else {
1526 1526 prefix(REX_RB);
1527 1527 srcenc -= 8;
1528 1528 }
1529 1529 dstenc -= 8;
1530 1530 }
1531 1531 emit_byte(0x0F);
1532 1532 emit_byte(0x28);
1533 1533 emit_byte(0xC0 | dstenc << 3 | srcenc);
1534 1534 }
1535 1535
1536 1536 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1537 1537 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1538 1538 int dstenc = dst->encoding();
1539 1539 int srcenc = src->encoding();
1540 1540 if (dstenc < 8) {
1541 1541 if (srcenc >= 8) {
1542 1542 prefix(REX_B);
1543 1543 srcenc -= 8;
1544 1544 }
1545 1545 } else {
1546 1546 if (srcenc < 8) {
1547 1547 prefix(REX_R);
1548 1548 } else {
1549 1549 prefix(REX_RB);
1550 1550 srcenc -= 8;
1551 1551 }
1552 1552 dstenc -= 8;
1553 1553 }
1554 1554 emit_byte(0x0F);
1555 1555 emit_byte(0x28);
1556 1556 emit_byte(0xC0 | dstenc << 3 | srcenc);
1557 1557 }
1558 1558
1559 1559 void Assembler::movb(Register dst, Address src) {
1560 1560 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1561 1561 InstructionMark im(this);
1562 1562 prefix(src, dst, true);
1563 1563 emit_byte(0x8A);
1564 1564 emit_operand(dst, src);
1565 1565 }
1566 1566
1567 1567
1568 1568 void Assembler::movb(Address dst, int imm8) {
1569 1569 InstructionMark im(this);
1570 1570 prefix(dst);
1571 1571 emit_byte(0xC6);
1572 1572 emit_operand(rax, dst, 1);
1573 1573 emit_byte(imm8);
1574 1574 }
1575 1575
1576 1576
1577 1577 void Assembler::movb(Address dst, Register src) {
1578 1578 assert(src->has_byte_register(), "must have byte register");
1579 1579 InstructionMark im(this);
1580 1580 prefix(dst, src, true);
1581 1581 emit_byte(0x88);
1582 1582 emit_operand(src, dst);
1583 1583 }
1584 1584
1585 1585 void Assembler::movdl(XMMRegister dst, Register src) {
1586 1586 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1587 1587 emit_byte(0x66);
1588 1588 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1589 1589 emit_byte(0x0F);
1590 1590 emit_byte(0x6E);
1591 1591 emit_byte(0xC0 | encode);
1592 1592 }
1593 1593
1594 1594 void Assembler::movdl(Register dst, XMMRegister src) {
1595 1595 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1596 1596 emit_byte(0x66);
1597 1597 // swap src/dst to get correct prefix
1598 1598 int encode = prefix_and_encode(src->encoding(), dst->encoding());
1599 1599 emit_byte(0x0F);
1600 1600 emit_byte(0x7E);
1601 1601 emit_byte(0xC0 | encode);
1602 1602 }
1603 1603
1604 1604 void Assembler::movdl(XMMRegister dst, Address src) {
1605 1605 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1606 1606 InstructionMark im(this);
1607 1607 emit_byte(0x66);
1608 1608 prefix(src, dst);
1609 1609 emit_byte(0x0F);
1610 1610 emit_byte(0x6E);
1611 1611 emit_operand(dst, src);
1612 1612 }
1613 1613
1614 1614
1615 1615 void Assembler::movdqa(XMMRegister dst, Address src) {
1616 1616 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1617 1617 InstructionMark im(this);
1618 1618 emit_byte(0x66);
1619 1619 prefix(src, dst);
1620 1620 emit_byte(0x0F);
1621 1621 emit_byte(0x6F);
1622 1622 emit_operand(dst, src);
1623 1623 }
1624 1624
1625 1625 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1626 1626 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1627 1627 emit_byte(0x66);
1628 1628 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1629 1629 emit_byte(0x0F);
1630 1630 emit_byte(0x6F);
1631 1631 emit_byte(0xC0 | encode);
1632 1632 }
1633 1633
1634 1634 void Assembler::movdqa(Address dst, XMMRegister src) {
1635 1635 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1636 1636 InstructionMark im(this);
1637 1637 emit_byte(0x66);
1638 1638 prefix(dst, src);
1639 1639 emit_byte(0x0F);
1640 1640 emit_byte(0x7F);
1641 1641 emit_operand(src, dst);
1642 1642 }
1643 1643
1644 1644 void Assembler::movdqu(XMMRegister dst, Address src) {
1645 1645 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1646 1646 InstructionMark im(this);
1647 1647 emit_byte(0xF3);
1648 1648 prefix(src, dst);
1649 1649 emit_byte(0x0F);
1650 1650 emit_byte(0x6F);
1651 1651 emit_operand(dst, src);
1652 1652 }
1653 1653
1654 1654 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1655 1655 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1656 1656 emit_byte(0xF3);
1657 1657 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
1658 1658 emit_byte(0x0F);
1659 1659 emit_byte(0x6F);
1660 1660 emit_byte(0xC0 | encode);
1661 1661 }
1662 1662
1663 1663 void Assembler::movdqu(Address dst, XMMRegister src) {
1664 1664 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1665 1665 InstructionMark im(this);
1666 1666 emit_byte(0xF3);
1667 1667 prefix(dst, src);
1668 1668 emit_byte(0x0F);
1669 1669 emit_byte(0x7F);
1670 1670 emit_operand(src, dst);
1671 1671 }
1672 1672
1673 1673 // Uses zero extension on 64bit
1674 1674
1675 1675 void Assembler::movl(Register dst, int32_t imm32) {
1676 1676 int encode = prefix_and_encode(dst->encoding());
1677 1677 emit_byte(0xB8 | encode);
1678 1678 emit_long(imm32);
1679 1679 }
1680 1680
1681 1681 void Assembler::movl(Register dst, Register src) {
1682 1682 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1683 1683 emit_byte(0x8B);
1684 1684 emit_byte(0xC0 | encode);
1685 1685 }
1686 1686
1687 1687 void Assembler::movl(Register dst, Address src) {
1688 1688 InstructionMark im(this);
1689 1689 prefix(src, dst);
1690 1690 emit_byte(0x8B);
1691 1691 emit_operand(dst, src);
1692 1692 }
1693 1693
1694 1694 void Assembler::movl(Address dst, int32_t imm32) {
1695 1695 InstructionMark im(this);
1696 1696 prefix(dst);
1697 1697 emit_byte(0xC7);
1698 1698 emit_operand(rax, dst, 4);
1699 1699 emit_long(imm32);
1700 1700 }
1701 1701
1702 1702 void Assembler::movl(Address dst, Register src) {
1703 1703 InstructionMark im(this);
1704 1704 prefix(dst, src);
1705 1705 emit_byte(0x89);
1706 1706 emit_operand(src, dst);
1707 1707 }
1708 1708
1709 1709 // New cpus require to use movsd and movss to avoid partial register stall
1710 1710 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1711 1711 // The selection is done in MacroAssembler::movdbl() and movflt().
1712 1712 void Assembler::movlpd(XMMRegister dst, Address src) {
1713 1713 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1714 1714 InstructionMark im(this);
1715 1715 emit_byte(0x66);
1716 1716 prefix(src, dst);
1717 1717 emit_byte(0x0F);
1718 1718 emit_byte(0x12);
1719 1719 emit_operand(dst, src);
1720 1720 }
1721 1721
1722 1722 void Assembler::movq( MMXRegister dst, Address src ) {
1723 1723 assert( VM_Version::supports_mmx(), "" );
1724 1724 emit_byte(0x0F);
1725 1725 emit_byte(0x6F);
1726 1726 emit_operand(dst, src);
1727 1727 }
1728 1728
1729 1729 void Assembler::movq( Address dst, MMXRegister src ) {
1730 1730 assert( VM_Version::supports_mmx(), "" );
1731 1731 emit_byte(0x0F);
1732 1732 emit_byte(0x7F);
1733 1733 // workaround gcc (3.2.1-7a) bug
1734 1734 // In that version of gcc with only an emit_operand(MMX, Address)
1735 1735 // gcc will tail jump and try and reverse the parameters completely
1736 1736 // obliterating dst in the process. By having a version available
1737 1737 // that doesn't need to swap the args at the tail jump the bug is
1738 1738 // avoided.
1739 1739 emit_operand(dst, src);
1740 1740 }
1741 1741
1742 1742 void Assembler::movq(XMMRegister dst, Address src) {
1743 1743 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1744 1744 InstructionMark im(this);
1745 1745 emit_byte(0xF3);
1746 1746 prefix(src, dst);
1747 1747 emit_byte(0x0F);
1748 1748 emit_byte(0x7E);
1749 1749 emit_operand(dst, src);
1750 1750 }
1751 1751
1752 1752 void Assembler::movq(Address dst, XMMRegister src) {
1753 1753 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1754 1754 InstructionMark im(this);
1755 1755 emit_byte(0x66);
1756 1756 prefix(dst, src);
1757 1757 emit_byte(0x0F);
1758 1758 emit_byte(0xD6);
1759 1759 emit_operand(src, dst);
1760 1760 }
1761 1761
1762 1762 void Assembler::movsbl(Register dst, Address src) { // movsxb
1763 1763 InstructionMark im(this);
1764 1764 prefix(src, dst);
1765 1765 emit_byte(0x0F);
1766 1766 emit_byte(0xBE);
1767 1767 emit_operand(dst, src);
1768 1768 }
1769 1769
1770 1770 void Assembler::movsbl(Register dst, Register src) { // movsxb
1771 1771 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1772 1772 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1773 1773 emit_byte(0x0F);
1774 1774 emit_byte(0xBE);
1775 1775 emit_byte(0xC0 | encode);
1776 1776 }
1777 1777
1778 1778 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1779 1779 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1780 1780 emit_byte(0xF2);
1781 1781 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1782 1782 emit_byte(0x0F);
1783 1783 emit_byte(0x10);
1784 1784 emit_byte(0xC0 | encode);
1785 1785 }
1786 1786
1787 1787 void Assembler::movsd(XMMRegister dst, Address src) {
1788 1788 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1789 1789 InstructionMark im(this);
1790 1790 emit_byte(0xF2);
1791 1791 prefix(src, dst);
1792 1792 emit_byte(0x0F);
1793 1793 emit_byte(0x10);
1794 1794 emit_operand(dst, src);
1795 1795 }
1796 1796
1797 1797 void Assembler::movsd(Address dst, XMMRegister src) {
1798 1798 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1799 1799 InstructionMark im(this);
1800 1800 emit_byte(0xF2);
1801 1801 prefix(dst, src);
1802 1802 emit_byte(0x0F);
1803 1803 emit_byte(0x11);
1804 1804 emit_operand(src, dst);
1805 1805 }
1806 1806
1807 1807 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1808 1808 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1809 1809 emit_byte(0xF3);
1810 1810 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1811 1811 emit_byte(0x0F);
1812 1812 emit_byte(0x10);
1813 1813 emit_byte(0xC0 | encode);
1814 1814 }
1815 1815
1816 1816 void Assembler::movss(XMMRegister dst, Address src) {
1817 1817 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1818 1818 InstructionMark im(this);
1819 1819 emit_byte(0xF3);
1820 1820 prefix(src, dst);
1821 1821 emit_byte(0x0F);
1822 1822 emit_byte(0x10);
1823 1823 emit_operand(dst, src);
1824 1824 }
1825 1825
1826 1826 void Assembler::movss(Address dst, XMMRegister src) {
1827 1827 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1828 1828 InstructionMark im(this);
1829 1829 emit_byte(0xF3);
1830 1830 prefix(dst, src);
1831 1831 emit_byte(0x0F);
1832 1832 emit_byte(0x11);
1833 1833 emit_operand(src, dst);
1834 1834 }
1835 1835
1836 1836 void Assembler::movswl(Register dst, Address src) { // movsxw
1837 1837 InstructionMark im(this);
1838 1838 prefix(src, dst);
1839 1839 emit_byte(0x0F);
1840 1840 emit_byte(0xBF);
1841 1841 emit_operand(dst, src);
1842 1842 }
1843 1843
1844 1844 void Assembler::movswl(Register dst, Register src) { // movsxw
1845 1845 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1846 1846 emit_byte(0x0F);
1847 1847 emit_byte(0xBF);
1848 1848 emit_byte(0xC0 | encode);
1849 1849 }
1850 1850
1851 1851 void Assembler::movw(Address dst, int imm16) {
1852 1852 InstructionMark im(this);
1853 1853
1854 1854 emit_byte(0x66); // switch to 16-bit mode
1855 1855 prefix(dst);
1856 1856 emit_byte(0xC7);
1857 1857 emit_operand(rax, dst, 2);
1858 1858 emit_word(imm16);
1859 1859 }
1860 1860
1861 1861 void Assembler::movw(Register dst, Address src) {
1862 1862 InstructionMark im(this);
1863 1863 emit_byte(0x66);
1864 1864 prefix(src, dst);
1865 1865 emit_byte(0x8B);
1866 1866 emit_operand(dst, src);
1867 1867 }
1868 1868
1869 1869 void Assembler::movw(Address dst, Register src) {
1870 1870 InstructionMark im(this);
1871 1871 emit_byte(0x66);
1872 1872 prefix(dst, src);
1873 1873 emit_byte(0x89);
1874 1874 emit_operand(src, dst);
1875 1875 }
1876 1876
1877 1877 void Assembler::movzbl(Register dst, Address src) { // movzxb
1878 1878 InstructionMark im(this);
1879 1879 prefix(src, dst);
1880 1880 emit_byte(0x0F);
1881 1881 emit_byte(0xB6);
1882 1882 emit_operand(dst, src);
1883 1883 }
1884 1884
1885 1885 void Assembler::movzbl(Register dst, Register src) { // movzxb
1886 1886 NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1887 1887 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1888 1888 emit_byte(0x0F);
1889 1889 emit_byte(0xB6);
1890 1890 emit_byte(0xC0 | encode);
1891 1891 }
1892 1892
1893 1893 void Assembler::movzwl(Register dst, Address src) { // movzxw
1894 1894 InstructionMark im(this);
1895 1895 prefix(src, dst);
1896 1896 emit_byte(0x0F);
1897 1897 emit_byte(0xB7);
1898 1898 emit_operand(dst, src);
1899 1899 }
1900 1900
1901 1901 void Assembler::movzwl(Register dst, Register src) { // movzxw
1902 1902 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1903 1903 emit_byte(0x0F);
1904 1904 emit_byte(0xB7);
1905 1905 emit_byte(0xC0 | encode);
1906 1906 }
1907 1907
1908 1908 void Assembler::mull(Address src) {
1909 1909 InstructionMark im(this);
1910 1910 prefix(src);
1911 1911 emit_byte(0xF7);
1912 1912 emit_operand(rsp, src);
1913 1913 }
1914 1914
1915 1915 void Assembler::mull(Register src) {
1916 1916 int encode = prefix_and_encode(src->encoding());
1917 1917 emit_byte(0xF7);
1918 1918 emit_byte(0xE0 | encode);
1919 1919 }
1920 1920
1921 1921 void Assembler::mulsd(XMMRegister dst, Address src) {
1922 1922 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1923 1923 InstructionMark im(this);
1924 1924 emit_byte(0xF2);
1925 1925 prefix(src, dst);
1926 1926 emit_byte(0x0F);
1927 1927 emit_byte(0x59);
1928 1928 emit_operand(dst, src);
1929 1929 }
1930 1930
1931 1931 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1932 1932 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1933 1933 emit_byte(0xF2);
1934 1934 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1935 1935 emit_byte(0x0F);
1936 1936 emit_byte(0x59);
1937 1937 emit_byte(0xC0 | encode);
1938 1938 }
1939 1939
1940 1940 void Assembler::mulss(XMMRegister dst, Address src) {
1941 1941 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1942 1942 InstructionMark im(this);
1943 1943 emit_byte(0xF3);
1944 1944 prefix(src, dst);
1945 1945 emit_byte(0x0F);
1946 1946 emit_byte(0x59);
1947 1947 emit_operand(dst, src);
1948 1948 }
1949 1949
1950 1950 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1951 1951 NOT_LP64(assert(VM_Version::supports_sse(), ""));
1952 1952 emit_byte(0xF3);
1953 1953 int encode = prefix_and_encode(dst->encoding(), src->encoding());
1954 1954 emit_byte(0x0F);
1955 1955 emit_byte(0x59);
1956 1956 emit_byte(0xC0 | encode);
1957 1957 }
1958 1958
1959 1959 void Assembler::negl(Register dst) {
1960 1960 int encode = prefix_and_encode(dst->encoding());
1961 1961 emit_byte(0xF7);
1962 1962 emit_byte(0xD8 | encode);
1963 1963 }
1964 1964
1965 1965 void Assembler::nop(int i) {
1966 1966 #ifdef ASSERT
1967 1967 assert(i > 0, " ");
1968 1968 // The fancy nops aren't currently recognized by debuggers making it a
1969 1969 // pain to disassemble code while debugging. If asserts are on clearly
1970 1970 // speed is not an issue so simply use the single byte traditional nop
1971 1971 // to do alignment.
1972 1972
1973 1973 for (; i > 0 ; i--) emit_byte(0x90);
1974 1974 return;
1975 1975
1976 1976 #endif // ASSERT
1977 1977
1978 1978 if (UseAddressNop && VM_Version::is_intel()) {
1979 1979 //
1980 1980 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1981 1981 // 1: 0x90
1982 1982 // 2: 0x66 0x90
1983 1983 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1984 1984 // 4: 0x0F 0x1F 0x40 0x00
1985 1985 // 5: 0x0F 0x1F 0x44 0x00 0x00
1986 1986 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1987 1987 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1988 1988 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1989 1989 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1990 1990 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1991 1991 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1992 1992
1993 1993 // The rest coding is Intel specific - don't use consecutive address nops
1994 1994
1995 1995 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1996 1996 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1997 1997 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1998 1998 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
1999 1999
2000 2000 while(i >= 15) {
2001 2001 // For Intel don't generate consecutive addess nops (mix with regular nops)
2002 2002 i -= 15;
2003 2003 emit_byte(0x66); // size prefix
2004 2004 emit_byte(0x66); // size prefix
2005 2005 emit_byte(0x66); // size prefix
2006 2006 addr_nop_8();
2007 2007 emit_byte(0x66); // size prefix
2008 2008 emit_byte(0x66); // size prefix
2009 2009 emit_byte(0x66); // size prefix
2010 2010 emit_byte(0x90); // nop
2011 2011 }
2012 2012 switch (i) {
2013 2013 case 14:
2014 2014 emit_byte(0x66); // size prefix
2015 2015 case 13:
2016 2016 emit_byte(0x66); // size prefix
2017 2017 case 12:
2018 2018 addr_nop_8();
2019 2019 emit_byte(0x66); // size prefix
2020 2020 emit_byte(0x66); // size prefix
2021 2021 emit_byte(0x66); // size prefix
2022 2022 emit_byte(0x90); // nop
2023 2023 break;
2024 2024 case 11:
2025 2025 emit_byte(0x66); // size prefix
2026 2026 case 10:
2027 2027 emit_byte(0x66); // size prefix
2028 2028 case 9:
2029 2029 emit_byte(0x66); // size prefix
2030 2030 case 8:
2031 2031 addr_nop_8();
2032 2032 break;
2033 2033 case 7:
2034 2034 addr_nop_7();
2035 2035 break;
2036 2036 case 6:
2037 2037 emit_byte(0x66); // size prefix
2038 2038 case 5:
2039 2039 addr_nop_5();
2040 2040 break;
2041 2041 case 4:
2042 2042 addr_nop_4();
2043 2043 break;
2044 2044 case 3:
2045 2045 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2046 2046 emit_byte(0x66); // size prefix
2047 2047 case 2:
2048 2048 emit_byte(0x66); // size prefix
2049 2049 case 1:
2050 2050 emit_byte(0x90); // nop
2051 2051 break;
2052 2052 default:
2053 2053 assert(i == 0, " ");
2054 2054 }
2055 2055 return;
2056 2056 }
2057 2057 if (UseAddressNop && VM_Version::is_amd()) {
2058 2058 //
2059 2059 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2060 2060 // 1: 0x90
2061 2061 // 2: 0x66 0x90
2062 2062 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2063 2063 // 4: 0x0F 0x1F 0x40 0x00
2064 2064 // 5: 0x0F 0x1F 0x44 0x00 0x00
2065 2065 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2066 2066 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2067 2067 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2068 2068 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2069 2069 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2070 2070 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2071 2071
2072 2072 // The rest coding is AMD specific - use consecutive address nops
2073 2073
2074 2074 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2075 2075 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2076 2076 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2077 2077 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2078 2078 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2079 2079 // Size prefixes (0x66) are added for larger sizes
2080 2080
2081 2081 while(i >= 22) {
2082 2082 i -= 11;
2083 2083 emit_byte(0x66); // size prefix
2084 2084 emit_byte(0x66); // size prefix
2085 2085 emit_byte(0x66); // size prefix
2086 2086 addr_nop_8();
2087 2087 }
2088 2088 // Generate first nop for size between 21-12
2089 2089 switch (i) {
2090 2090 case 21:
2091 2091 i -= 1;
2092 2092 emit_byte(0x66); // size prefix
2093 2093 case 20:
2094 2094 case 19:
2095 2095 i -= 1;
2096 2096 emit_byte(0x66); // size prefix
2097 2097 case 18:
2098 2098 case 17:
2099 2099 i -= 1;
2100 2100 emit_byte(0x66); // size prefix
2101 2101 case 16:
2102 2102 case 15:
2103 2103 i -= 8;
2104 2104 addr_nop_8();
2105 2105 break;
2106 2106 case 14:
2107 2107 case 13:
2108 2108 i -= 7;
2109 2109 addr_nop_7();
2110 2110 break;
2111 2111 case 12:
2112 2112 i -= 6;
2113 2113 emit_byte(0x66); // size prefix
2114 2114 addr_nop_5();
2115 2115 break;
2116 2116 default:
2117 2117 assert(i < 12, " ");
2118 2118 }
2119 2119
2120 2120 // Generate second nop for size between 11-1
2121 2121 switch (i) {
2122 2122 case 11:
2123 2123 emit_byte(0x66); // size prefix
2124 2124 case 10:
2125 2125 emit_byte(0x66); // size prefix
2126 2126 case 9:
2127 2127 emit_byte(0x66); // size prefix
2128 2128 case 8:
2129 2129 addr_nop_8();
2130 2130 break;
2131 2131 case 7:
2132 2132 addr_nop_7();
2133 2133 break;
2134 2134 case 6:
2135 2135 emit_byte(0x66); // size prefix
2136 2136 case 5:
2137 2137 addr_nop_5();
2138 2138 break;
2139 2139 case 4:
2140 2140 addr_nop_4();
2141 2141 break;
2142 2142 case 3:
2143 2143 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2144 2144 emit_byte(0x66); // size prefix
2145 2145 case 2:
2146 2146 emit_byte(0x66); // size prefix
2147 2147 case 1:
2148 2148 emit_byte(0x90); // nop
2149 2149 break;
2150 2150 default:
2151 2151 assert(i == 0, " ");
2152 2152 }
2153 2153 return;
2154 2154 }
2155 2155
2156 2156 // Using nops with size prefixes "0x66 0x90".
2157 2157 // From AMD Optimization Guide:
2158 2158 // 1: 0x90
2159 2159 // 2: 0x66 0x90
2160 2160 // 3: 0x66 0x66 0x90
2161 2161 // 4: 0x66 0x66 0x66 0x90
2162 2162 // 5: 0x66 0x66 0x90 0x66 0x90
2163 2163 // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2164 2164 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2165 2165 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2166 2166 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2167 2167 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2168 2168 //
2169 2169 while(i > 12) {
2170 2170 i -= 4;
2171 2171 emit_byte(0x66); // size prefix
2172 2172 emit_byte(0x66);
2173 2173 emit_byte(0x66);
2174 2174 emit_byte(0x90); // nop
2175 2175 }
2176 2176 // 1 - 12 nops
2177 2177 if(i > 8) {
2178 2178 if(i > 9) {
2179 2179 i -= 1;
2180 2180 emit_byte(0x66);
2181 2181 }
2182 2182 i -= 3;
2183 2183 emit_byte(0x66);
2184 2184 emit_byte(0x66);
2185 2185 emit_byte(0x90);
2186 2186 }
2187 2187 // 1 - 8 nops
2188 2188 if(i > 4) {
2189 2189 if(i > 6) {
2190 2190 i -= 1;
2191 2191 emit_byte(0x66);
2192 2192 }
2193 2193 i -= 3;
2194 2194 emit_byte(0x66);
2195 2195 emit_byte(0x66);
2196 2196 emit_byte(0x90);
2197 2197 }
2198 2198 switch (i) {
2199 2199 case 4:
2200 2200 emit_byte(0x66);
2201 2201 case 3:
2202 2202 emit_byte(0x66);
2203 2203 case 2:
2204 2204 emit_byte(0x66);
2205 2205 case 1:
2206 2206 emit_byte(0x90);
2207 2207 break;
2208 2208 default:
2209 2209 assert(i == 0, " ");
2210 2210 }
2211 2211 }
2212 2212
2213 2213 void Assembler::notl(Register dst) {
2214 2214 int encode = prefix_and_encode(dst->encoding());
2215 2215 emit_byte(0xF7);
2216 2216 emit_byte(0xD0 | encode );
2217 2217 }
2218 2218
2219 2219 void Assembler::orl(Address dst, int32_t imm32) {
2220 2220 InstructionMark im(this);
2221 2221 prefix(dst);
2222 2222 emit_arith_operand(0x81, rcx, dst, imm32);
2223 2223 }
2224 2224
2225 2225 void Assembler::orl(Register dst, int32_t imm32) {
2226 2226 prefix(dst);
2227 2227 emit_arith(0x81, 0xC8, dst, imm32);
2228 2228 }
2229 2229
2230 2230 void Assembler::orl(Register dst, Address src) {
2231 2231 InstructionMark im(this);
2232 2232 prefix(src, dst);
2233 2233 emit_byte(0x0B);
2234 2234 emit_operand(dst, src);
2235 2235 }
2236 2236
2237 2237 void Assembler::orl(Register dst, Register src) {
2238 2238 (void) prefix_and_encode(dst->encoding(), src->encoding());
2239 2239 emit_arith(0x0B, 0xC0, dst, src);
2240 2240 }
2241 2241
2242 2242 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2243 2243 assert(VM_Version::supports_sse4_2(), "");
2244 2244
2245 2245 InstructionMark im(this);
2246 2246 emit_byte(0x66);
2247 2247 prefix(src, dst);
2248 2248 emit_byte(0x0F);
2249 2249 emit_byte(0x3A);
2250 2250 emit_byte(0x61);
2251 2251 emit_operand(dst, src);
2252 2252 emit_byte(imm8);
2253 2253 }
2254 2254
2255 2255 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2256 2256 assert(VM_Version::supports_sse4_2(), "");
2257 2257
2258 2258 emit_byte(0x66);
2259 2259 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2260 2260 emit_byte(0x0F);
2261 2261 emit_byte(0x3A);
2262 2262 emit_byte(0x61);
2263 2263 emit_byte(0xC0 | encode);
2264 2264 emit_byte(imm8);
2265 2265 }
2266 2266
2267 2267 // generic
2268 2268 void Assembler::pop(Register dst) {
2269 2269 int encode = prefix_and_encode(dst->encoding());
2270 2270 emit_byte(0x58 | encode);
2271 2271 }
2272 2272
2273 2273 void Assembler::popcntl(Register dst, Address src) {
2274 2274 assert(VM_Version::supports_popcnt(), "must support");
2275 2275 InstructionMark im(this);
2276 2276 emit_byte(0xF3);
2277 2277 prefix(src, dst);
2278 2278 emit_byte(0x0F);
2279 2279 emit_byte(0xB8);
2280 2280 emit_operand(dst, src);
2281 2281 }
2282 2282
2283 2283 void Assembler::popcntl(Register dst, Register src) {
2284 2284 assert(VM_Version::supports_popcnt(), "must support");
2285 2285 emit_byte(0xF3);
2286 2286 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2287 2287 emit_byte(0x0F);
2288 2288 emit_byte(0xB8);
2289 2289 emit_byte(0xC0 | encode);
2290 2290 }
2291 2291
2292 2292 void Assembler::popf() {
2293 2293 emit_byte(0x9D);
2294 2294 }
2295 2295
2296 2296 #ifndef _LP64 // no 32bit push/pop on amd64
2297 2297 void Assembler::popl(Address dst) {
2298 2298 // NOTE: this will adjust stack by 8byte on 64bits
2299 2299 InstructionMark im(this);
2300 2300 prefix(dst);
2301 2301 emit_byte(0x8F);
2302 2302 emit_operand(rax, dst);
2303 2303 }
2304 2304 #endif
2305 2305
2306 2306 void Assembler::prefetch_prefix(Address src) {
2307 2307 prefix(src);
2308 2308 emit_byte(0x0F);
2309 2309 }
2310 2310
2311 2311 void Assembler::prefetchnta(Address src) {
2312 2312 NOT_LP64(assert(VM_Version::supports_sse2(), "must support"));
2313 2313 InstructionMark im(this);
2314 2314 prefetch_prefix(src);
2315 2315 emit_byte(0x18);
2316 2316 emit_operand(rax, src); // 0, src
2317 2317 }
2318 2318
2319 2319 void Assembler::prefetchr(Address src) {
2320 2320 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2321 2321 InstructionMark im(this);
2322 2322 prefetch_prefix(src);
2323 2323 emit_byte(0x0D);
2324 2324 emit_operand(rax, src); // 0, src
2325 2325 }
2326 2326
2327 2327 void Assembler::prefetcht0(Address src) {
2328 2328 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2329 2329 InstructionMark im(this);
2330 2330 prefetch_prefix(src);
2331 2331 emit_byte(0x18);
2332 2332 emit_operand(rcx, src); // 1, src
2333 2333 }
2334 2334
2335 2335 void Assembler::prefetcht1(Address src) {
2336 2336 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2337 2337 InstructionMark im(this);
2338 2338 prefetch_prefix(src);
2339 2339 emit_byte(0x18);
2340 2340 emit_operand(rdx, src); // 2, src
2341 2341 }
2342 2342
2343 2343 void Assembler::prefetcht2(Address src) {
2344 2344 NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2345 2345 InstructionMark im(this);
2346 2346 prefetch_prefix(src);
2347 2347 emit_byte(0x18);
2348 2348 emit_operand(rbx, src); // 3, src
2349 2349 }
2350 2350
2351 2351 void Assembler::prefetchw(Address src) {
2352 2352 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
2353 2353 InstructionMark im(this);
2354 2354 prefetch_prefix(src);
2355 2355 emit_byte(0x0D);
2356 2356 emit_operand(rcx, src); // 1, src
2357 2357 }
2358 2358
2359 2359 void Assembler::prefix(Prefix p) {
2360 2360 a_byte(p);
2361 2361 }
2362 2362
2363 2363 void Assembler::por(XMMRegister dst, XMMRegister src) {
2364 2364 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2365 2365
2366 2366 emit_byte(0x66);
2367 2367 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2368 2368 emit_byte(0x0F);
2369 2369
2370 2370 emit_byte(0xEB);
2371 2371 emit_byte(0xC0 | encode);
2372 2372 }
2373 2373
2374 2374 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2375 2375 assert(isByte(mode), "invalid value");
2376 2376 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2377 2377
2378 2378 emit_byte(0x66);
2379 2379 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2380 2380 emit_byte(0x0F);
2381 2381 emit_byte(0x70);
2382 2382 emit_byte(0xC0 | encode);
2383 2383 emit_byte(mode & 0xFF);
2384 2384
2385 2385 }
2386 2386
2387 2387 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2388 2388 assert(isByte(mode), "invalid value");
2389 2389 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2390 2390
2391 2391 InstructionMark im(this);
2392 2392 emit_byte(0x66);
2393 2393 prefix(src, dst);
2394 2394 emit_byte(0x0F);
2395 2395 emit_byte(0x70);
2396 2396 emit_operand(dst, src);
2397 2397 emit_byte(mode & 0xFF);
2398 2398 }
2399 2399
2400 2400 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2401 2401 assert(isByte(mode), "invalid value");
2402 2402 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2403 2403
2404 2404 emit_byte(0xF2);
2405 2405 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2406 2406 emit_byte(0x0F);
2407 2407 emit_byte(0x70);
2408 2408 emit_byte(0xC0 | encode);
2409 2409 emit_byte(mode & 0xFF);
2410 2410 }
2411 2411
2412 2412 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2413 2413 assert(isByte(mode), "invalid value");
2414 2414 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2415 2415
2416 2416 InstructionMark im(this);
2417 2417 emit_byte(0xF2);
2418 2418 prefix(src, dst); // QQ new
2419 2419 emit_byte(0x0F);
2420 2420 emit_byte(0x70);
2421 2421 emit_operand(dst, src);
2422 2422 emit_byte(mode & 0xFF);
2423 2423 }
2424 2424
2425 2425 void Assembler::psrlq(XMMRegister dst, int shift) {
2426 2426 // Shift 64 bit value logically right by specified number of bits.
2427 2427 // HMM Table D-1 says sse2 or mmx.
2428 2428 // Do not confuse it with psrldq SSE2 instruction which
2429 2429 // shifts 128 bit value in xmm register by number of bytes.
2430 2430 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2431 2431
2432 2432 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
2433 2433 emit_byte(0x66);
2434 2434 emit_byte(0x0F);
2435 2435 emit_byte(0x73);
2436 2436 emit_byte(0xC0 | encode);
2437 2437 emit_byte(shift);
2438 2438 }
2439 2439
2440 2440 void Assembler::psrldq(XMMRegister dst, int shift) {
2441 2441 // Shift 128 bit value in xmm register by number of bytes.
2442 2442 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2443 2443
2444 2444 int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
2445 2445 emit_byte(0x66);
2446 2446 emit_byte(0x0F);
2447 2447 emit_byte(0x73);
2448 2448 emit_byte(0xC0 | encode);
2449 2449 emit_byte(shift);
2450 2450 }
2451 2451
2452 2452 void Assembler::ptest(XMMRegister dst, Address src) {
2453 2453 assert(VM_Version::supports_sse4_1(), "");
2454 2454
2455 2455 InstructionMark im(this);
2456 2456 emit_byte(0x66);
2457 2457 prefix(src, dst);
2458 2458 emit_byte(0x0F);
2459 2459 emit_byte(0x38);
2460 2460 emit_byte(0x17);
2461 2461 emit_operand(dst, src);
2462 2462 }
2463 2463
2464 2464 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2465 2465 assert(VM_Version::supports_sse4_1(), "");
2466 2466
2467 2467 emit_byte(0x66);
2468 2468 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2469 2469 emit_byte(0x0F);
2470 2470 emit_byte(0x38);
2471 2471 emit_byte(0x17);
2472 2472 emit_byte(0xC0 | encode);
2473 2473 }
2474 2474
2475 2475 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2476 2476 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2477 2477 emit_byte(0x66);
2478 2478 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2479 2479 emit_byte(0x0F);
2480 2480 emit_byte(0x60);
2481 2481 emit_byte(0xC0 | encode);
2482 2482 }
2483 2483
2484 2484 void Assembler::push(int32_t imm32) {
2485 2485 // in 64bits we push 64bits onto the stack but only
2486 2486 // take a 32bit immediate
2487 2487 emit_byte(0x68);
2488 2488 emit_long(imm32);
2489 2489 }
2490 2490
2491 2491 void Assembler::push(Register src) {
2492 2492 int encode = prefix_and_encode(src->encoding());
2493 2493
2494 2494 emit_byte(0x50 | encode);
2495 2495 }
2496 2496
2497 2497 void Assembler::pushf() {
2498 2498 emit_byte(0x9C);
2499 2499 }
2500 2500
2501 2501 #ifndef _LP64 // no 32bit push/pop on amd64
2502 2502 void Assembler::pushl(Address src) {
2503 2503 // Note this will push 64bit on 64bit
2504 2504 InstructionMark im(this);
2505 2505 prefix(src);
2506 2506 emit_byte(0xFF);
2507 2507 emit_operand(rsi, src);
2508 2508 }
2509 2509 #endif
2510 2510
2511 2511 void Assembler::pxor(XMMRegister dst, Address src) {
2512 2512 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2513 2513 InstructionMark im(this);
2514 2514 emit_byte(0x66);
2515 2515 prefix(src, dst);
2516 2516 emit_byte(0x0F);
2517 2517 emit_byte(0xEF);
2518 2518 emit_operand(dst, src);
2519 2519 }
2520 2520
2521 2521 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
2522 2522 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2523 2523 InstructionMark im(this);
2524 2524 emit_byte(0x66);
2525 2525 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2526 2526 emit_byte(0x0F);
2527 2527 emit_byte(0xEF);
2528 2528 emit_byte(0xC0 | encode);
2529 2529 }
2530 2530
2531 2531 void Assembler::rcll(Register dst, int imm8) {
2532 2532 assert(isShiftCount(imm8), "illegal shift count");
2533 2533 int encode = prefix_and_encode(dst->encoding());
2534 2534 if (imm8 == 1) {
2535 2535 emit_byte(0xD1);
2536 2536 emit_byte(0xD0 | encode);
2537 2537 } else {
2538 2538 emit_byte(0xC1);
2539 2539 emit_byte(0xD0 | encode);
2540 2540 emit_byte(imm8);
2541 2541 }
2542 2542 }
2543 2543
2544 2544 // copies data from [esi] to [edi] using rcx pointer sized words
2545 2545 // generic
2546 2546 void Assembler::rep_mov() {
2547 2547 emit_byte(0xF3);
2548 2548 // MOVSQ
2549 2549 LP64_ONLY(prefix(REX_W));
2550 2550 emit_byte(0xA5);
2551 2551 }
2552 2552
2553 2553 // sets rcx pointer sized words with rax, value at [edi]
2554 2554 // generic
2555 2555 void Assembler::rep_set() { // rep_set
2556 2556 emit_byte(0xF3);
2557 2557 // STOSQ
2558 2558 LP64_ONLY(prefix(REX_W));
2559 2559 emit_byte(0xAB);
2560 2560 }
2561 2561
2562 2562 // scans rcx pointer sized words at [edi] for occurance of rax,
2563 2563 // generic
2564 2564 void Assembler::repne_scan() { // repne_scan
2565 2565 emit_byte(0xF2);
2566 2566 // SCASQ
2567 2567 LP64_ONLY(prefix(REX_W));
2568 2568 emit_byte(0xAF);
2569 2569 }
2570 2570
2571 2571 #ifdef _LP64
2572 2572 // scans rcx 4 byte words at [edi] for occurance of rax,
2573 2573 // generic
2574 2574 void Assembler::repne_scanl() { // repne_scan
2575 2575 emit_byte(0xF2);
2576 2576 // SCASL
2577 2577 emit_byte(0xAF);
2578 2578 }
2579 2579 #endif
2580 2580
2581 2581 void Assembler::ret(int imm16) {
2582 2582 if (imm16 == 0) {
2583 2583 emit_byte(0xC3);
2584 2584 } else {
2585 2585 emit_byte(0xC2);
2586 2586 emit_word(imm16);
2587 2587 }
2588 2588 }
2589 2589
2590 2590 void Assembler::sahf() {
2591 2591 #ifdef _LP64
2592 2592 // Not supported in 64bit mode
2593 2593 ShouldNotReachHere();
2594 2594 #endif
2595 2595 emit_byte(0x9E);
2596 2596 }
2597 2597
2598 2598 void Assembler::sarl(Register dst, int imm8) {
2599 2599 int encode = prefix_and_encode(dst->encoding());
2600 2600 assert(isShiftCount(imm8), "illegal shift count");
2601 2601 if (imm8 == 1) {
2602 2602 emit_byte(0xD1);
2603 2603 emit_byte(0xF8 | encode);
2604 2604 } else {
2605 2605 emit_byte(0xC1);
2606 2606 emit_byte(0xF8 | encode);
2607 2607 emit_byte(imm8);
2608 2608 }
2609 2609 }
2610 2610
2611 2611 void Assembler::sarl(Register dst) {
2612 2612 int encode = prefix_and_encode(dst->encoding());
2613 2613 emit_byte(0xD3);
2614 2614 emit_byte(0xF8 | encode);
2615 2615 }
2616 2616
2617 2617 void Assembler::sbbl(Address dst, int32_t imm32) {
2618 2618 InstructionMark im(this);
2619 2619 prefix(dst);
2620 2620 emit_arith_operand(0x81, rbx, dst, imm32);
2621 2621 }
2622 2622
2623 2623 void Assembler::sbbl(Register dst, int32_t imm32) {
2624 2624 prefix(dst);
2625 2625 emit_arith(0x81, 0xD8, dst, imm32);
2626 2626 }
2627 2627
2628 2628
2629 2629 void Assembler::sbbl(Register dst, Address src) {
2630 2630 InstructionMark im(this);
2631 2631 prefix(src, dst);
2632 2632 emit_byte(0x1B);
2633 2633 emit_operand(dst, src);
2634 2634 }
2635 2635
2636 2636 void Assembler::sbbl(Register dst, Register src) {
2637 2637 (void) prefix_and_encode(dst->encoding(), src->encoding());
2638 2638 emit_arith(0x1B, 0xC0, dst, src);
2639 2639 }
2640 2640
2641 2641 void Assembler::setb(Condition cc, Register dst) {
2642 2642 assert(0 <= cc && cc < 16, "illegal cc");
2643 2643 int encode = prefix_and_encode(dst->encoding(), true);
2644 2644 emit_byte(0x0F);
2645 2645 emit_byte(0x90 | cc);
2646 2646 emit_byte(0xC0 | encode);
2647 2647 }
2648 2648
2649 2649 void Assembler::shll(Register dst, int imm8) {
2650 2650 assert(isShiftCount(imm8), "illegal shift count");
2651 2651 int encode = prefix_and_encode(dst->encoding());
2652 2652 if (imm8 == 1 ) {
2653 2653 emit_byte(0xD1);
2654 2654 emit_byte(0xE0 | encode);
2655 2655 } else {
2656 2656 emit_byte(0xC1);
2657 2657 emit_byte(0xE0 | encode);
2658 2658 emit_byte(imm8);
2659 2659 }
2660 2660 }
2661 2661
2662 2662 void Assembler::shll(Register dst) {
2663 2663 int encode = prefix_and_encode(dst->encoding());
2664 2664 emit_byte(0xD3);
2665 2665 emit_byte(0xE0 | encode);
2666 2666 }
2667 2667
2668 2668 void Assembler::shrl(Register dst, int imm8) {
2669 2669 assert(isShiftCount(imm8), "illegal shift count");
2670 2670 int encode = prefix_and_encode(dst->encoding());
2671 2671 emit_byte(0xC1);
2672 2672 emit_byte(0xE8 | encode);
2673 2673 emit_byte(imm8);
2674 2674 }
2675 2675
2676 2676 void Assembler::shrl(Register dst) {
2677 2677 int encode = prefix_and_encode(dst->encoding());
2678 2678 emit_byte(0xD3);
2679 2679 emit_byte(0xE8 | encode);
2680 2680 }
2681 2681
2682 2682 // copies a single word from [esi] to [edi]
2683 2683 void Assembler::smovl() {
2684 2684 emit_byte(0xA5);
2685 2685 }
2686 2686
2687 2687 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2688 2688 // HMM Table D-1 says sse2
2689 2689 // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2690 2690 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2691 2691 emit_byte(0xF2);
2692 2692 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2693 2693 emit_byte(0x0F);
2694 2694 emit_byte(0x51);
2695 2695 emit_byte(0xC0 | encode);
2696 2696 }
2697 2697
2698 2698 void Assembler::sqrtsd(XMMRegister dst, Address src) {
2699 2699 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2700 2700 InstructionMark im(this);
2701 2701 emit_byte(0xF2);
2702 2702 prefix(src, dst);
2703 2703 emit_byte(0x0F);
2704 2704 emit_byte(0x51);
2705 2705 emit_operand(dst, src);
2706 2706 }
2707 2707
2708 2708 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2709 2709 // HMM Table D-1 says sse2
2710 2710 // NOT_LP64(assert(VM_Version::supports_sse(), ""));
2711 2711 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2712 2712 emit_byte(0xF3);
2713 2713 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2714 2714 emit_byte(0x0F);
2715 2715 emit_byte(0x51);
2716 2716 emit_byte(0xC0 | encode);
2717 2717 }
2718 2718
2719 2719 void Assembler::sqrtss(XMMRegister dst, Address src) {
2720 2720 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2721 2721 InstructionMark im(this);
2722 2722 emit_byte(0xF3);
2723 2723 prefix(src, dst);
2724 2724 emit_byte(0x0F);
2725 2725 emit_byte(0x51);
2726 2726 emit_operand(dst, src);
2727 2727 }
2728 2728
2729 2729 void Assembler::stmxcsr( Address dst) {
2730 2730 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2731 2731 InstructionMark im(this);
2732 2732 prefix(dst);
2733 2733 emit_byte(0x0F);
2734 2734 emit_byte(0xAE);
2735 2735 emit_operand(as_Register(3), dst);
2736 2736 }
2737 2737
2738 2738 void Assembler::subl(Address dst, int32_t imm32) {
2739 2739 InstructionMark im(this);
2740 2740 prefix(dst);
2741 2741 emit_arith_operand(0x81, rbp, dst, imm32);
2742 2742 }
2743 2743
2744 2744 void Assembler::subl(Address dst, Register src) {
2745 2745 InstructionMark im(this);
2746 2746 prefix(dst, src);
2747 2747 emit_byte(0x29);
2748 2748 emit_operand(src, dst);
2749 2749 }
2750 2750
2751 2751 void Assembler::subl(Register dst, int32_t imm32) {
2752 2752 prefix(dst);
2753 2753 emit_arith(0x81, 0xE8, dst, imm32);
2754 2754 }
2755 2755
2756 2756 void Assembler::subl(Register dst, Address src) {
2757 2757 InstructionMark im(this);
2758 2758 prefix(src, dst);
2759 2759 emit_byte(0x2B);
2760 2760 emit_operand(dst, src);
2761 2761 }
2762 2762
2763 2763 void Assembler::subl(Register dst, Register src) {
2764 2764 (void) prefix_and_encode(dst->encoding(), src->encoding());
2765 2765 emit_arith(0x2B, 0xC0, dst, src);
2766 2766 }
2767 2767
2768 2768 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2769 2769 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2770 2770 emit_byte(0xF2);
2771 2771 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2772 2772 emit_byte(0x0F);
2773 2773 emit_byte(0x5C);
2774 2774 emit_byte(0xC0 | encode);
2775 2775 }
2776 2776
2777 2777 void Assembler::subsd(XMMRegister dst, Address src) {
2778 2778 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2779 2779 InstructionMark im(this);
2780 2780 emit_byte(0xF2);
2781 2781 prefix(src, dst);
2782 2782 emit_byte(0x0F);
2783 2783 emit_byte(0x5C);
2784 2784 emit_operand(dst, src);
2785 2785 }
2786 2786
2787 2787 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2788 2788 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2789 2789 emit_byte(0xF3);
2790 2790 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2791 2791 emit_byte(0x0F);
2792 2792 emit_byte(0x5C);
2793 2793 emit_byte(0xC0 | encode);
2794 2794 }
2795 2795
2796 2796 void Assembler::subss(XMMRegister dst, Address src) {
2797 2797 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2798 2798 InstructionMark im(this);
2799 2799 emit_byte(0xF3);
2800 2800 prefix(src, dst);
2801 2801 emit_byte(0x0F);
2802 2802 emit_byte(0x5C);
2803 2803 emit_operand(dst, src);
2804 2804 }
2805 2805
2806 2806 void Assembler::testb(Register dst, int imm8) {
2807 2807 NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2808 2808 (void) prefix_and_encode(dst->encoding(), true);
2809 2809 emit_arith_b(0xF6, 0xC0, dst, imm8);
2810 2810 }
2811 2811
2812 2812 void Assembler::testl(Register dst, int32_t imm32) {
2813 2813 // not using emit_arith because test
2814 2814 // doesn't support sign-extension of
2815 2815 // 8bit operands
2816 2816 int encode = dst->encoding();
2817 2817 if (encode == 0) {
2818 2818 emit_byte(0xA9);
2819 2819 } else {
2820 2820 encode = prefix_and_encode(encode);
2821 2821 emit_byte(0xF7);
2822 2822 emit_byte(0xC0 | encode);
2823 2823 }
2824 2824 emit_long(imm32);
2825 2825 }
2826 2826
2827 2827 void Assembler::testl(Register dst, Register src) {
2828 2828 (void) prefix_and_encode(dst->encoding(), src->encoding());
2829 2829 emit_arith(0x85, 0xC0, dst, src);
2830 2830 }
2831 2831
2832 2832 void Assembler::testl(Register dst, Address src) {
2833 2833 InstructionMark im(this);
2834 2834 prefix(src, dst);
2835 2835 emit_byte(0x85);
2836 2836 emit_operand(dst, src);
2837 2837 }
2838 2838
2839 2839 void Assembler::ucomisd(XMMRegister dst, Address src) {
2840 2840 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2841 2841 emit_byte(0x66);
2842 2842 ucomiss(dst, src);
2843 2843 }
2844 2844
2845 2845 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2846 2846 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2847 2847 emit_byte(0x66);
2848 2848 ucomiss(dst, src);
2849 2849 }
2850 2850
2851 2851 void Assembler::ucomiss(XMMRegister dst, Address src) {
2852 2852 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2853 2853
2854 2854 InstructionMark im(this);
2855 2855 prefix(src, dst);
2856 2856 emit_byte(0x0F);
2857 2857 emit_byte(0x2E);
2858 2858 emit_operand(dst, src);
2859 2859 }
2860 2860
2861 2861 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2862 2862 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2863 2863 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2864 2864 emit_byte(0x0F);
2865 2865 emit_byte(0x2E);
2866 2866 emit_byte(0xC0 | encode);
2867 2867 }
2868 2868
2869 2869
2870 2870 void Assembler::xaddl(Address dst, Register src) {
2871 2871 InstructionMark im(this);
2872 2872 prefix(dst, src);
2873 2873 emit_byte(0x0F);
2874 2874 emit_byte(0xC1);
2875 2875 emit_operand(src, dst);
2876 2876 }
2877 2877
2878 2878 void Assembler::xchgl(Register dst, Address src) { // xchg
2879 2879 InstructionMark im(this);
2880 2880 prefix(src, dst);
2881 2881 emit_byte(0x87);
2882 2882 emit_operand(dst, src);
2883 2883 }
2884 2884
2885 2885 void Assembler::xchgl(Register dst, Register src) {
2886 2886 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2887 2887 emit_byte(0x87);
2888 2888 emit_byte(0xc0 | encode);
2889 2889 }
2890 2890
2891 2891 void Assembler::xorl(Register dst, int32_t imm32) {
2892 2892 prefix(dst);
2893 2893 emit_arith(0x81, 0xF0, dst, imm32);
2894 2894 }
2895 2895
2896 2896 void Assembler::xorl(Register dst, Address src) {
2897 2897 InstructionMark im(this);
2898 2898 prefix(src, dst);
2899 2899 emit_byte(0x33);
2900 2900 emit_operand(dst, src);
2901 2901 }
2902 2902
2903 2903 void Assembler::xorl(Register dst, Register src) {
2904 2904 (void) prefix_and_encode(dst->encoding(), src->encoding());
2905 2905 emit_arith(0x33, 0xC0, dst, src);
2906 2906 }
2907 2907
2908 2908 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
2909 2909 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2910 2910 emit_byte(0x66);
2911 2911 xorps(dst, src);
2912 2912 }
2913 2913
2914 2914 void Assembler::xorpd(XMMRegister dst, Address src) {
2915 2915 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2916 2916 InstructionMark im(this);
2917 2917 emit_byte(0x66);
2918 2918 prefix(src, dst);
2919 2919 emit_byte(0x0F);
2920 2920 emit_byte(0x57);
2921 2921 emit_operand(dst, src);
2922 2922 }
2923 2923
2924 2924
2925 2925 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
2926 2926 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2927 2927 int encode = prefix_and_encode(dst->encoding(), src->encoding());
2928 2928 emit_byte(0x0F);
2929 2929 emit_byte(0x57);
2930 2930 emit_byte(0xC0 | encode);
2931 2931 }
2932 2932
2933 2933 void Assembler::xorps(XMMRegister dst, Address src) {
2934 2934 NOT_LP64(assert(VM_Version::supports_sse(), ""));
2935 2935 InstructionMark im(this);
2936 2936 prefix(src, dst);
2937 2937 emit_byte(0x0F);
2938 2938 emit_byte(0x57);
2939 2939 emit_operand(dst, src);
2940 2940 }
2941 2941
2942 2942 #ifndef _LP64
2943 2943 // 32bit only pieces of the assembler
2944 2944
2945 2945 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
2946 2946 // NO PREFIX AS NEVER 64BIT
2947 2947 InstructionMark im(this);
2948 2948 emit_byte(0x81);
2949 2949 emit_byte(0xF8 | src1->encoding());
2950 2950 emit_data(imm32, rspec, 0);
2951 2951 }
2952 2952
2953 2953 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
2954 2954 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
2955 2955 InstructionMark im(this);
2956 2956 emit_byte(0x81);
2957 2957 emit_operand(rdi, src1);
2958 2958 emit_data(imm32, rspec, 0);
2959 2959 }
2960 2960
2961 2961 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
2962 2962 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
2963 2963 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise.
2964 2964 void Assembler::cmpxchg8(Address adr) {
2965 2965 InstructionMark im(this);
2966 2966 emit_byte(0x0F);
2967 2967 emit_byte(0xc7);
2968 2968 emit_operand(rcx, adr);
2969 2969 }
2970 2970
2971 2971 void Assembler::decl(Register dst) {
2972 2972 // Don't use it directly. Use MacroAssembler::decrementl() instead.
2973 2973 emit_byte(0x48 | dst->encoding());
2974 2974 }
2975 2975
2976 2976 #endif // _LP64
2977 2977
2978 2978 // 64bit typically doesn't use the x87 but needs to for the trig funcs
2979 2979
2980 2980 void Assembler::fabs() {
2981 2981 emit_byte(0xD9);
2982 2982 emit_byte(0xE1);
2983 2983 }
2984 2984
2985 2985 void Assembler::fadd(int i) {
2986 2986 emit_farith(0xD8, 0xC0, i);
2987 2987 }
2988 2988
2989 2989 void Assembler::fadd_d(Address src) {
2990 2990 InstructionMark im(this);
2991 2991 emit_byte(0xDC);
2992 2992 emit_operand32(rax, src);
2993 2993 }
2994 2994
2995 2995 void Assembler::fadd_s(Address src) {
2996 2996 InstructionMark im(this);
2997 2997 emit_byte(0xD8);
2998 2998 emit_operand32(rax, src);
2999 2999 }
3000 3000
3001 3001 void Assembler::fadda(int i) {
3002 3002 emit_farith(0xDC, 0xC0, i);
3003 3003 }
3004 3004
3005 3005 void Assembler::faddp(int i) {
3006 3006 emit_farith(0xDE, 0xC0, i);
3007 3007 }
3008 3008
3009 3009 void Assembler::fchs() {
3010 3010 emit_byte(0xD9);
3011 3011 emit_byte(0xE0);
3012 3012 }
3013 3013
3014 3014 void Assembler::fcom(int i) {
3015 3015 emit_farith(0xD8, 0xD0, i);
3016 3016 }
3017 3017
3018 3018 void Assembler::fcomp(int i) {
3019 3019 emit_farith(0xD8, 0xD8, i);
3020 3020 }
3021 3021
3022 3022 void Assembler::fcomp_d(Address src) {
3023 3023 InstructionMark im(this);
3024 3024 emit_byte(0xDC);
3025 3025 emit_operand32(rbx, src);
3026 3026 }
3027 3027
3028 3028 void Assembler::fcomp_s(Address src) {
3029 3029 InstructionMark im(this);
3030 3030 emit_byte(0xD8);
3031 3031 emit_operand32(rbx, src);
3032 3032 }
3033 3033
3034 3034 void Assembler::fcompp() {
3035 3035 emit_byte(0xDE);
3036 3036 emit_byte(0xD9);
3037 3037 }
3038 3038
3039 3039 void Assembler::fcos() {
3040 3040 emit_byte(0xD9);
3041 3041 emit_byte(0xFF);
3042 3042 }
3043 3043
3044 3044 void Assembler::fdecstp() {
3045 3045 emit_byte(0xD9);
3046 3046 emit_byte(0xF6);
3047 3047 }
3048 3048
3049 3049 void Assembler::fdiv(int i) {
3050 3050 emit_farith(0xD8, 0xF0, i);
3051 3051 }
3052 3052
3053 3053 void Assembler::fdiv_d(Address src) {
3054 3054 InstructionMark im(this);
3055 3055 emit_byte(0xDC);
3056 3056 emit_operand32(rsi, src);
3057 3057 }
3058 3058
3059 3059 void Assembler::fdiv_s(Address src) {
3060 3060 InstructionMark im(this);
3061 3061 emit_byte(0xD8);
3062 3062 emit_operand32(rsi, src);
3063 3063 }
3064 3064
3065 3065 void Assembler::fdiva(int i) {
3066 3066 emit_farith(0xDC, 0xF8, i);
3067 3067 }
3068 3068
3069 3069 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
3070 3070 // is erroneous for some of the floating-point instructions below.
3071 3071
3072 3072 void Assembler::fdivp(int i) {
3073 3073 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
3074 3074 }
3075 3075
3076 3076 void Assembler::fdivr(int i) {
3077 3077 emit_farith(0xD8, 0xF8, i);
3078 3078 }
3079 3079
3080 3080 void Assembler::fdivr_d(Address src) {
3081 3081 InstructionMark im(this);
3082 3082 emit_byte(0xDC);
3083 3083 emit_operand32(rdi, src);
3084 3084 }
3085 3085
3086 3086 void Assembler::fdivr_s(Address src) {
3087 3087 InstructionMark im(this);
3088 3088 emit_byte(0xD8);
3089 3089 emit_operand32(rdi, src);
3090 3090 }
3091 3091
3092 3092 void Assembler::fdivra(int i) {
3093 3093 emit_farith(0xDC, 0xF0, i);
3094 3094 }
3095 3095
3096 3096 void Assembler::fdivrp(int i) {
3097 3097 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3098 3098 }
3099 3099
3100 3100 void Assembler::ffree(int i) {
3101 3101 emit_farith(0xDD, 0xC0, i);
3102 3102 }
3103 3103
3104 3104 void Assembler::fild_d(Address adr) {
3105 3105 InstructionMark im(this);
3106 3106 emit_byte(0xDF);
3107 3107 emit_operand32(rbp, adr);
3108 3108 }
3109 3109
3110 3110 void Assembler::fild_s(Address adr) {
3111 3111 InstructionMark im(this);
3112 3112 emit_byte(0xDB);
3113 3113 emit_operand32(rax, adr);
3114 3114 }
3115 3115
3116 3116 void Assembler::fincstp() {
3117 3117 emit_byte(0xD9);
3118 3118 emit_byte(0xF7);
3119 3119 }
3120 3120
3121 3121 void Assembler::finit() {
3122 3122 emit_byte(0x9B);
3123 3123 emit_byte(0xDB);
3124 3124 emit_byte(0xE3);
3125 3125 }
3126 3126
3127 3127 void Assembler::fist_s(Address adr) {
3128 3128 InstructionMark im(this);
3129 3129 emit_byte(0xDB);
3130 3130 emit_operand32(rdx, adr);
3131 3131 }
3132 3132
3133 3133 void Assembler::fistp_d(Address adr) {
3134 3134 InstructionMark im(this);
3135 3135 emit_byte(0xDF);
3136 3136 emit_operand32(rdi, adr);
3137 3137 }
3138 3138
3139 3139 void Assembler::fistp_s(Address adr) {
3140 3140 InstructionMark im(this);
3141 3141 emit_byte(0xDB);
3142 3142 emit_operand32(rbx, adr);
3143 3143 }
3144 3144
3145 3145 void Assembler::fld1() {
3146 3146 emit_byte(0xD9);
3147 3147 emit_byte(0xE8);
3148 3148 }
3149 3149
3150 3150 void Assembler::fld_d(Address adr) {
3151 3151 InstructionMark im(this);
3152 3152 emit_byte(0xDD);
3153 3153 emit_operand32(rax, adr);
3154 3154 }
3155 3155
3156 3156 void Assembler::fld_s(Address adr) {
3157 3157 InstructionMark im(this);
3158 3158 emit_byte(0xD9);
3159 3159 emit_operand32(rax, adr);
3160 3160 }
3161 3161
3162 3162
3163 3163 void Assembler::fld_s(int index) {
3164 3164 emit_farith(0xD9, 0xC0, index);
3165 3165 }
3166 3166
3167 3167 void Assembler::fld_x(Address adr) {
3168 3168 InstructionMark im(this);
3169 3169 emit_byte(0xDB);
3170 3170 emit_operand32(rbp, adr);
3171 3171 }
3172 3172
3173 3173 void Assembler::fldcw(Address src) {
3174 3174 InstructionMark im(this);
3175 3175 emit_byte(0xd9);
3176 3176 emit_operand32(rbp, src);
3177 3177 }
3178 3178
3179 3179 void Assembler::fldenv(Address src) {
3180 3180 InstructionMark im(this);
3181 3181 emit_byte(0xD9);
3182 3182 emit_operand32(rsp, src);
3183 3183 }
3184 3184
3185 3185 void Assembler::fldlg2() {
3186 3186 emit_byte(0xD9);
3187 3187 emit_byte(0xEC);
3188 3188 }
3189 3189
3190 3190 void Assembler::fldln2() {
3191 3191 emit_byte(0xD9);
3192 3192 emit_byte(0xED);
3193 3193 }
3194 3194
3195 3195 void Assembler::fldz() {
3196 3196 emit_byte(0xD9);
3197 3197 emit_byte(0xEE);
3198 3198 }
3199 3199
3200 3200 void Assembler::flog() {
3201 3201 fldln2();
3202 3202 fxch();
3203 3203 fyl2x();
3204 3204 }
3205 3205
3206 3206 void Assembler::flog10() {
3207 3207 fldlg2();
3208 3208 fxch();
3209 3209 fyl2x();
3210 3210 }
3211 3211
3212 3212 void Assembler::fmul(int i) {
3213 3213 emit_farith(0xD8, 0xC8, i);
3214 3214 }
3215 3215
3216 3216 void Assembler::fmul_d(Address src) {
3217 3217 InstructionMark im(this);
3218 3218 emit_byte(0xDC);
3219 3219 emit_operand32(rcx, src);
3220 3220 }
3221 3221
3222 3222 void Assembler::fmul_s(Address src) {
3223 3223 InstructionMark im(this);
3224 3224 emit_byte(0xD8);
3225 3225 emit_operand32(rcx, src);
3226 3226 }
3227 3227
3228 3228 void Assembler::fmula(int i) {
3229 3229 emit_farith(0xDC, 0xC8, i);
3230 3230 }
3231 3231
3232 3232 void Assembler::fmulp(int i) {
3233 3233 emit_farith(0xDE, 0xC8, i);
3234 3234 }
3235 3235
3236 3236 void Assembler::fnsave(Address dst) {
3237 3237 InstructionMark im(this);
3238 3238 emit_byte(0xDD);
3239 3239 emit_operand32(rsi, dst);
3240 3240 }
3241 3241
3242 3242 void Assembler::fnstcw(Address src) {
3243 3243 InstructionMark im(this);
3244 3244 emit_byte(0x9B);
3245 3245 emit_byte(0xD9);
3246 3246 emit_operand32(rdi, src);
3247 3247 }
3248 3248
3249 3249 void Assembler::fnstsw_ax() {
3250 3250 emit_byte(0xdF);
3251 3251 emit_byte(0xE0);
3252 3252 }
3253 3253
3254 3254 void Assembler::fprem() {
3255 3255 emit_byte(0xD9);
3256 3256 emit_byte(0xF8);
3257 3257 }
3258 3258
3259 3259 void Assembler::fprem1() {
3260 3260 emit_byte(0xD9);
3261 3261 emit_byte(0xF5);
3262 3262 }
3263 3263
3264 3264 void Assembler::frstor(Address src) {
3265 3265 InstructionMark im(this);
3266 3266 emit_byte(0xDD);
3267 3267 emit_operand32(rsp, src);
3268 3268 }
3269 3269
3270 3270 void Assembler::fsin() {
3271 3271 emit_byte(0xD9);
3272 3272 emit_byte(0xFE);
3273 3273 }
3274 3274
3275 3275 void Assembler::fsqrt() {
3276 3276 emit_byte(0xD9);
3277 3277 emit_byte(0xFA);
3278 3278 }
3279 3279
3280 3280 void Assembler::fst_d(Address adr) {
3281 3281 InstructionMark im(this);
3282 3282 emit_byte(0xDD);
3283 3283 emit_operand32(rdx, adr);
3284 3284 }
3285 3285
3286 3286 void Assembler::fst_s(Address adr) {
3287 3287 InstructionMark im(this);
3288 3288 emit_byte(0xD9);
3289 3289 emit_operand32(rdx, adr);
3290 3290 }
3291 3291
3292 3292 void Assembler::fstp_d(Address adr) {
3293 3293 InstructionMark im(this);
3294 3294 emit_byte(0xDD);
3295 3295 emit_operand32(rbx, adr);
3296 3296 }
3297 3297
3298 3298 void Assembler::fstp_d(int index) {
3299 3299 emit_farith(0xDD, 0xD8, index);
3300 3300 }
3301 3301
3302 3302 void Assembler::fstp_s(Address adr) {
3303 3303 InstructionMark im(this);
3304 3304 emit_byte(0xD9);
3305 3305 emit_operand32(rbx, adr);
3306 3306 }
3307 3307
3308 3308 void Assembler::fstp_x(Address adr) {
3309 3309 InstructionMark im(this);
3310 3310 emit_byte(0xDB);
3311 3311 emit_operand32(rdi, adr);
3312 3312 }
3313 3313
3314 3314 void Assembler::fsub(int i) {
3315 3315 emit_farith(0xD8, 0xE0, i);
3316 3316 }
3317 3317
3318 3318 void Assembler::fsub_d(Address src) {
3319 3319 InstructionMark im(this);
3320 3320 emit_byte(0xDC);
3321 3321 emit_operand32(rsp, src);
3322 3322 }
3323 3323
3324 3324 void Assembler::fsub_s(Address src) {
3325 3325 InstructionMark im(this);
3326 3326 emit_byte(0xD8);
3327 3327 emit_operand32(rsp, src);
3328 3328 }
3329 3329
3330 3330 void Assembler::fsuba(int i) {
3331 3331 emit_farith(0xDC, 0xE8, i);
3332 3332 }
3333 3333
3334 3334 void Assembler::fsubp(int i) {
3335 3335 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
3336 3336 }
3337 3337
3338 3338 void Assembler::fsubr(int i) {
3339 3339 emit_farith(0xD8, 0xE8, i);
3340 3340 }
3341 3341
3342 3342 void Assembler::fsubr_d(Address src) {
3343 3343 InstructionMark im(this);
3344 3344 emit_byte(0xDC);
3345 3345 emit_operand32(rbp, src);
3346 3346 }
3347 3347
3348 3348 void Assembler::fsubr_s(Address src) {
3349 3349 InstructionMark im(this);
3350 3350 emit_byte(0xD8);
3351 3351 emit_operand32(rbp, src);
3352 3352 }
3353 3353
3354 3354 void Assembler::fsubra(int i) {
3355 3355 emit_farith(0xDC, 0xE0, i);
3356 3356 }
3357 3357
3358 3358 void Assembler::fsubrp(int i) {
3359 3359 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
3360 3360 }
3361 3361
3362 3362 void Assembler::ftan() {
3363 3363 emit_byte(0xD9);
3364 3364 emit_byte(0xF2);
3365 3365 emit_byte(0xDD);
3366 3366 emit_byte(0xD8);
3367 3367 }
3368 3368
3369 3369 void Assembler::ftst() {
3370 3370 emit_byte(0xD9);
3371 3371 emit_byte(0xE4);
3372 3372 }
3373 3373
3374 3374 void Assembler::fucomi(int i) {
3375 3375 // make sure the instruction is supported (introduced for P6, together with cmov)
3376 3376 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3377 3377 emit_farith(0xDB, 0xE8, i);
3378 3378 }
3379 3379
3380 3380 void Assembler::fucomip(int i) {
3381 3381 // make sure the instruction is supported (introduced for P6, together with cmov)
3382 3382 guarantee(VM_Version::supports_cmov(), "illegal instruction");
3383 3383 emit_farith(0xDF, 0xE8, i);
3384 3384 }
3385 3385
3386 3386 void Assembler::fwait() {
3387 3387 emit_byte(0x9B);
3388 3388 }
3389 3389
3390 3390 void Assembler::fxch(int i) {
3391 3391 emit_farith(0xD9, 0xC8, i);
3392 3392 }
3393 3393
3394 3394 void Assembler::fyl2x() {
3395 3395 emit_byte(0xD9);
3396 3396 emit_byte(0xF1);
3397 3397 }
3398 3398
3399 3399
3400 3400 #ifndef _LP64
3401 3401
3402 3402 void Assembler::incl(Register dst) {
3403 3403 // Don't use it directly. Use MacroAssembler::incrementl() instead.
3404 3404 emit_byte(0x40 | dst->encoding());
3405 3405 }
3406 3406
3407 3407 void Assembler::lea(Register dst, Address src) {
3408 3408 leal(dst, src);
3409 3409 }
3410 3410
3411 3411 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
3412 3412 InstructionMark im(this);
3413 3413 emit_byte(0xC7);
3414 3414 emit_operand(rax, dst);
3415 3415 emit_data((int)imm32, rspec, 0);
3416 3416 }
3417 3417
3418 3418 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
3419 3419 InstructionMark im(this);
3420 3420 int encode = prefix_and_encode(dst->encoding());
3421 3421 emit_byte(0xB8 | encode);
3422 3422 emit_data((int)imm32, rspec, 0);
3423 3423 }
3424 3424
3425 3425 void Assembler::popa() { // 32bit
3426 3426 emit_byte(0x61);
3427 3427 }
3428 3428
3429 3429 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
3430 3430 InstructionMark im(this);
3431 3431 emit_byte(0x68);
3432 3432 emit_data(imm32, rspec, 0);
3433 3433 }
3434 3434
3435 3435 void Assembler::pusha() { // 32bit
3436 3436 emit_byte(0x60);
3437 3437 }
3438 3438
3439 3439 void Assembler::set_byte_if_not_zero(Register dst) {
3440 3440 emit_byte(0x0F);
3441 3441 emit_byte(0x95);
3442 3442 emit_byte(0xE0 | dst->encoding());
3443 3443 }
3444 3444
3445 3445 void Assembler::shldl(Register dst, Register src) {
3446 3446 emit_byte(0x0F);
3447 3447 emit_byte(0xA5);
3448 3448 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3449 3449 }
3450 3450
3451 3451 void Assembler::shrdl(Register dst, Register src) {
3452 3452 emit_byte(0x0F);
3453 3453 emit_byte(0xAD);
3454 3454 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
3455 3455 }
3456 3456
3457 3457 #else // LP64
3458 3458
3459 3459 void Assembler::set_byte_if_not_zero(Register dst) {
3460 3460 int enc = prefix_and_encode(dst->encoding(), true);
3461 3461 emit_byte(0x0F);
3462 3462 emit_byte(0x95);
3463 3463 emit_byte(0xE0 | enc);
3464 3464 }
3465 3465
3466 3466 // 64bit only pieces of the assembler
3467 3467 // This should only be used by 64bit instructions that can use rip-relative
3468 3468 // it cannot be used by instructions that want an immediate value.
3469 3469
3470 3470 bool Assembler::reachable(AddressLiteral adr) {
3471 3471 int64_t disp;
3472 3472 // None will force a 64bit literal to the code stream. Likely a placeholder
3473 3473 // for something that will be patched later and we need to certain it will
3474 3474 // always be reachable.
3475 3475 if (adr.reloc() == relocInfo::none) {
3476 3476 return false;
3477 3477 }
3478 3478 if (adr.reloc() == relocInfo::internal_word_type) {
3479 3479 // This should be rip relative and easily reachable.
3480 3480 return true;
3481 3481 }
3482 3482 if (adr.reloc() == relocInfo::virtual_call_type ||
3483 3483 adr.reloc() == relocInfo::opt_virtual_call_type ||
3484 3484 adr.reloc() == relocInfo::static_call_type ||
3485 3485 adr.reloc() == relocInfo::static_stub_type ) {
3486 3486 // This should be rip relative within the code cache and easily
3487 3487 // reachable until we get huge code caches. (At which point
3488 3488 // ic code is going to have issues).
3489 3489 return true;
3490 3490 }
3491 3491 if (adr.reloc() != relocInfo::external_word_type &&
3492 3492 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special
3493 3493 adr.reloc() != relocInfo::poll_type && // relocs to identify them
3494 3494 adr.reloc() != relocInfo::runtime_call_type ) {
3495 3495 return false;
3496 3496 }
3497 3497
3498 3498 // Stress the correction code
3499 3499 if (ForceUnreachable) {
3500 3500 // Must be runtimecall reloc, see if it is in the codecache
3501 3501 // Flipping stuff in the codecache to be unreachable causes issues
3502 3502 // with things like inline caches where the additional instructions
↓ open down ↓ |
3502 lines elided |
↑ open up ↑ |
3503 3503 // are not handled.
3504 3504 if (CodeCache::find_blob(adr._target) == NULL) {
3505 3505 return false;
3506 3506 }
3507 3507 }
3508 3508 // For external_word_type/runtime_call_type if it is reachable from where we
3509 3509 // are now (possibly a temp buffer) and where we might end up
3510 3510 // anywhere in the codeCache then we are always reachable.
3511 3511 // This would have to change if we ever save/restore shared code
3512 3512 // to be more pessimistic.
3513 -
3514 3513 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
3515 3514 if (!is_simm32(disp)) return false;
3516 3515 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
3517 3516 if (!is_simm32(disp)) return false;
3518 3517
3519 3518 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
3520 3519
3521 3520 // Because rip relative is a disp + address_of_next_instruction and we
3522 3521 // don't know the value of address_of_next_instruction we apply a fudge factor
3523 3522 // to make sure we will be ok no matter the size of the instruction we get placed into.
3524 3523 // We don't have to fudge the checks above here because they are already worst case.
3525 3524
3526 3525 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
3527 3526 // + 4 because better safe than sorry.
3528 3527 const int fudge = 12 + 4;
3529 3528 if (disp < 0) {
3530 3529 disp -= fudge;
3531 3530 } else {
3532 3531 disp += fudge;
3533 3532 }
3534 3533 return is_simm32(disp);
3535 3534 }
3536 3535
3536 +// Check if the polling page is not reachable from the code cache using rip-relative
3537 +// addressing.
3538 +bool Assembler::is_polling_page_far() {
3539 + intptr_t addr = (intptr_t)os::get_polling_page();
3540 + return !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
3541 + !is_simm32(addr - (intptr_t)CodeCache::high_bound());
3542 +}
3543 +
3537 3544 void Assembler::emit_data64(jlong data,
3538 3545 relocInfo::relocType rtype,
3539 3546 int format) {
3540 3547 if (rtype == relocInfo::none) {
3541 3548 emit_long64(data);
3542 3549 } else {
3543 3550 emit_data64(data, Relocation::spec_simple(rtype), format);
3544 3551 }
3545 3552 }
3546 3553
3547 3554 void Assembler::emit_data64(jlong data,
3548 3555 RelocationHolder const& rspec,
3549 3556 int format) {
3550 3557 assert(imm_operand == 0, "default format must be immediate in this file");
3551 3558 assert(imm_operand == format, "must be immediate");
3552 3559 assert(inst_mark() != NULL, "must be inside InstructionMark");
3553 3560 // Do not use AbstractAssembler::relocate, which is not intended for
3554 3561 // embedded words. Instead, relocate to the enclosing instruction.
3555 3562 code_section()->relocate(inst_mark(), rspec, format);
3556 3563 #ifdef ASSERT
3557 3564 check_relocation(rspec, format);
3558 3565 #endif
3559 3566 emit_long64(data);
3560 3567 }
3561 3568
3562 3569 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
3563 3570 if (reg_enc >= 8) {
3564 3571 prefix(REX_B);
3565 3572 reg_enc -= 8;
3566 3573 } else if (byteinst && reg_enc >= 4) {
3567 3574 prefix(REX);
3568 3575 }
3569 3576 return reg_enc;
3570 3577 }
3571 3578
3572 3579 int Assembler::prefixq_and_encode(int reg_enc) {
3573 3580 if (reg_enc < 8) {
3574 3581 prefix(REX_W);
3575 3582 } else {
3576 3583 prefix(REX_WB);
3577 3584 reg_enc -= 8;
3578 3585 }
3579 3586 return reg_enc;
3580 3587 }
3581 3588
3582 3589 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
3583 3590 if (dst_enc < 8) {
3584 3591 if (src_enc >= 8) {
3585 3592 prefix(REX_B);
3586 3593 src_enc -= 8;
3587 3594 } else if (byteinst && src_enc >= 4) {
3588 3595 prefix(REX);
3589 3596 }
3590 3597 } else {
3591 3598 if (src_enc < 8) {
3592 3599 prefix(REX_R);
3593 3600 } else {
3594 3601 prefix(REX_RB);
3595 3602 src_enc -= 8;
3596 3603 }
3597 3604 dst_enc -= 8;
3598 3605 }
3599 3606 return dst_enc << 3 | src_enc;
3600 3607 }
3601 3608
3602 3609 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
3603 3610 if (dst_enc < 8) {
3604 3611 if (src_enc < 8) {
3605 3612 prefix(REX_W);
3606 3613 } else {
3607 3614 prefix(REX_WB);
3608 3615 src_enc -= 8;
3609 3616 }
3610 3617 } else {
3611 3618 if (src_enc < 8) {
3612 3619 prefix(REX_WR);
3613 3620 } else {
3614 3621 prefix(REX_WRB);
3615 3622 src_enc -= 8;
3616 3623 }
3617 3624 dst_enc -= 8;
3618 3625 }
3619 3626 return dst_enc << 3 | src_enc;
3620 3627 }
3621 3628
3622 3629 void Assembler::prefix(Register reg) {
3623 3630 if (reg->encoding() >= 8) {
3624 3631 prefix(REX_B);
3625 3632 }
3626 3633 }
3627 3634
3628 3635 void Assembler::prefix(Address adr) {
3629 3636 if (adr.base_needs_rex()) {
3630 3637 if (adr.index_needs_rex()) {
3631 3638 prefix(REX_XB);
3632 3639 } else {
3633 3640 prefix(REX_B);
3634 3641 }
3635 3642 } else {
3636 3643 if (adr.index_needs_rex()) {
3637 3644 prefix(REX_X);
3638 3645 }
3639 3646 }
3640 3647 }
3641 3648
3642 3649 void Assembler::prefixq(Address adr) {
3643 3650 if (adr.base_needs_rex()) {
3644 3651 if (adr.index_needs_rex()) {
3645 3652 prefix(REX_WXB);
3646 3653 } else {
3647 3654 prefix(REX_WB);
3648 3655 }
3649 3656 } else {
3650 3657 if (adr.index_needs_rex()) {
3651 3658 prefix(REX_WX);
3652 3659 } else {
3653 3660 prefix(REX_W);
3654 3661 }
3655 3662 }
3656 3663 }
3657 3664
3658 3665
3659 3666 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
3660 3667 if (reg->encoding() < 8) {
3661 3668 if (adr.base_needs_rex()) {
3662 3669 if (adr.index_needs_rex()) {
3663 3670 prefix(REX_XB);
3664 3671 } else {
3665 3672 prefix(REX_B);
3666 3673 }
3667 3674 } else {
3668 3675 if (adr.index_needs_rex()) {
3669 3676 prefix(REX_X);
3670 3677 } else if (reg->encoding() >= 4 ) {
3671 3678 prefix(REX);
3672 3679 }
3673 3680 }
3674 3681 } else {
3675 3682 if (adr.base_needs_rex()) {
3676 3683 if (adr.index_needs_rex()) {
3677 3684 prefix(REX_RXB);
3678 3685 } else {
3679 3686 prefix(REX_RB);
3680 3687 }
3681 3688 } else {
3682 3689 if (adr.index_needs_rex()) {
3683 3690 prefix(REX_RX);
3684 3691 } else {
3685 3692 prefix(REX_R);
3686 3693 }
3687 3694 }
3688 3695 }
3689 3696 }
3690 3697
3691 3698 void Assembler::prefixq(Address adr, Register src) {
3692 3699 if (src->encoding() < 8) {
3693 3700 if (adr.base_needs_rex()) {
3694 3701 if (adr.index_needs_rex()) {
3695 3702 prefix(REX_WXB);
3696 3703 } else {
3697 3704 prefix(REX_WB);
3698 3705 }
3699 3706 } else {
3700 3707 if (adr.index_needs_rex()) {
3701 3708 prefix(REX_WX);
3702 3709 } else {
3703 3710 prefix(REX_W);
3704 3711 }
3705 3712 }
3706 3713 } else {
3707 3714 if (adr.base_needs_rex()) {
3708 3715 if (adr.index_needs_rex()) {
3709 3716 prefix(REX_WRXB);
3710 3717 } else {
3711 3718 prefix(REX_WRB);
3712 3719 }
3713 3720 } else {
3714 3721 if (adr.index_needs_rex()) {
3715 3722 prefix(REX_WRX);
3716 3723 } else {
3717 3724 prefix(REX_WR);
3718 3725 }
3719 3726 }
3720 3727 }
3721 3728 }
3722 3729
3723 3730 void Assembler::prefix(Address adr, XMMRegister reg) {
3724 3731 if (reg->encoding() < 8) {
3725 3732 if (adr.base_needs_rex()) {
3726 3733 if (adr.index_needs_rex()) {
3727 3734 prefix(REX_XB);
3728 3735 } else {
3729 3736 prefix(REX_B);
3730 3737 }
3731 3738 } else {
3732 3739 if (adr.index_needs_rex()) {
3733 3740 prefix(REX_X);
3734 3741 }
3735 3742 }
3736 3743 } else {
3737 3744 if (adr.base_needs_rex()) {
3738 3745 if (adr.index_needs_rex()) {
3739 3746 prefix(REX_RXB);
3740 3747 } else {
3741 3748 prefix(REX_RB);
3742 3749 }
3743 3750 } else {
3744 3751 if (adr.index_needs_rex()) {
3745 3752 prefix(REX_RX);
3746 3753 } else {
3747 3754 prefix(REX_R);
3748 3755 }
3749 3756 }
3750 3757 }
3751 3758 }
3752 3759
3753 3760 void Assembler::adcq(Register dst, int32_t imm32) {
3754 3761 (void) prefixq_and_encode(dst->encoding());
3755 3762 emit_arith(0x81, 0xD0, dst, imm32);
3756 3763 }
3757 3764
3758 3765 void Assembler::adcq(Register dst, Address src) {
3759 3766 InstructionMark im(this);
3760 3767 prefixq(src, dst);
3761 3768 emit_byte(0x13);
3762 3769 emit_operand(dst, src);
3763 3770 }
3764 3771
3765 3772 void Assembler::adcq(Register dst, Register src) {
3766 3773 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3767 3774 emit_arith(0x13, 0xC0, dst, src);
3768 3775 }
3769 3776
3770 3777 void Assembler::addq(Address dst, int32_t imm32) {
3771 3778 InstructionMark im(this);
3772 3779 prefixq(dst);
3773 3780 emit_arith_operand(0x81, rax, dst,imm32);
3774 3781 }
3775 3782
3776 3783 void Assembler::addq(Address dst, Register src) {
3777 3784 InstructionMark im(this);
3778 3785 prefixq(dst, src);
3779 3786 emit_byte(0x01);
3780 3787 emit_operand(src, dst);
3781 3788 }
3782 3789
3783 3790 void Assembler::addq(Register dst, int32_t imm32) {
3784 3791 (void) prefixq_and_encode(dst->encoding());
3785 3792 emit_arith(0x81, 0xC0, dst, imm32);
3786 3793 }
3787 3794
3788 3795 void Assembler::addq(Register dst, Address src) {
3789 3796 InstructionMark im(this);
3790 3797 prefixq(src, dst);
3791 3798 emit_byte(0x03);
3792 3799 emit_operand(dst, src);
3793 3800 }
3794 3801
3795 3802 void Assembler::addq(Register dst, Register src) {
3796 3803 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3797 3804 emit_arith(0x03, 0xC0, dst, src);
3798 3805 }
3799 3806
3800 3807 void Assembler::andq(Register dst, int32_t imm32) {
3801 3808 (void) prefixq_and_encode(dst->encoding());
3802 3809 emit_arith(0x81, 0xE0, dst, imm32);
3803 3810 }
3804 3811
3805 3812 void Assembler::andq(Register dst, Address src) {
3806 3813 InstructionMark im(this);
3807 3814 prefixq(src, dst);
3808 3815 emit_byte(0x23);
3809 3816 emit_operand(dst, src);
3810 3817 }
3811 3818
3812 3819 void Assembler::andq(Register dst, Register src) {
3813 3820 (int) prefixq_and_encode(dst->encoding(), src->encoding());
3814 3821 emit_arith(0x23, 0xC0, dst, src);
3815 3822 }
3816 3823
3817 3824 void Assembler::bsfq(Register dst, Register src) {
3818 3825 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3819 3826 emit_byte(0x0F);
3820 3827 emit_byte(0xBC);
3821 3828 emit_byte(0xC0 | encode);
3822 3829 }
3823 3830
3824 3831 void Assembler::bsrq(Register dst, Register src) {
3825 3832 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
3826 3833 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3827 3834 emit_byte(0x0F);
3828 3835 emit_byte(0xBD);
3829 3836 emit_byte(0xC0 | encode);
3830 3837 }
3831 3838
3832 3839 void Assembler::bswapq(Register reg) {
3833 3840 int encode = prefixq_and_encode(reg->encoding());
3834 3841 emit_byte(0x0F);
3835 3842 emit_byte(0xC8 | encode);
3836 3843 }
3837 3844
3838 3845 void Assembler::cdqq() {
3839 3846 prefix(REX_W);
3840 3847 emit_byte(0x99);
3841 3848 }
3842 3849
3843 3850 void Assembler::clflush(Address adr) {
3844 3851 prefix(adr);
3845 3852 emit_byte(0x0F);
3846 3853 emit_byte(0xAE);
3847 3854 emit_operand(rdi, adr);
3848 3855 }
3849 3856
3850 3857 void Assembler::cmovq(Condition cc, Register dst, Register src) {
3851 3858 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3852 3859 emit_byte(0x0F);
3853 3860 emit_byte(0x40 | cc);
3854 3861 emit_byte(0xC0 | encode);
3855 3862 }
3856 3863
3857 3864 void Assembler::cmovq(Condition cc, Register dst, Address src) {
3858 3865 InstructionMark im(this);
3859 3866 prefixq(src, dst);
3860 3867 emit_byte(0x0F);
3861 3868 emit_byte(0x40 | cc);
3862 3869 emit_operand(dst, src);
3863 3870 }
3864 3871
3865 3872 void Assembler::cmpq(Address dst, int32_t imm32) {
3866 3873 InstructionMark im(this);
3867 3874 prefixq(dst);
3868 3875 emit_byte(0x81);
3869 3876 emit_operand(rdi, dst, 4);
3870 3877 emit_long(imm32);
3871 3878 }
3872 3879
3873 3880 void Assembler::cmpq(Register dst, int32_t imm32) {
3874 3881 (void) prefixq_and_encode(dst->encoding());
3875 3882 emit_arith(0x81, 0xF8, dst, imm32);
3876 3883 }
3877 3884
3878 3885 void Assembler::cmpq(Address dst, Register src) {
3879 3886 InstructionMark im(this);
3880 3887 prefixq(dst, src);
3881 3888 emit_byte(0x3B);
3882 3889 emit_operand(src, dst);
3883 3890 }
3884 3891
3885 3892 void Assembler::cmpq(Register dst, Register src) {
3886 3893 (void) prefixq_and_encode(dst->encoding(), src->encoding());
3887 3894 emit_arith(0x3B, 0xC0, dst, src);
3888 3895 }
3889 3896
3890 3897 void Assembler::cmpq(Register dst, Address src) {
3891 3898 InstructionMark im(this);
3892 3899 prefixq(src, dst);
3893 3900 emit_byte(0x3B);
3894 3901 emit_operand(dst, src);
3895 3902 }
3896 3903
3897 3904 void Assembler::cmpxchgq(Register reg, Address adr) {
3898 3905 InstructionMark im(this);
3899 3906 prefixq(adr, reg);
3900 3907 emit_byte(0x0F);
3901 3908 emit_byte(0xB1);
3902 3909 emit_operand(reg, adr);
3903 3910 }
3904 3911
3905 3912 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
3906 3913 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3907 3914 emit_byte(0xF2);
3908 3915 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3909 3916 emit_byte(0x0F);
3910 3917 emit_byte(0x2A);
3911 3918 emit_byte(0xC0 | encode);
3912 3919 }
3913 3920
3914 3921 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
3915 3922 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3916 3923 emit_byte(0xF3);
3917 3924 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3918 3925 emit_byte(0x0F);
3919 3926 emit_byte(0x2A);
3920 3927 emit_byte(0xC0 | encode);
3921 3928 }
3922 3929
3923 3930 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
3924 3931 NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3925 3932 emit_byte(0xF2);
3926 3933 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3927 3934 emit_byte(0x0F);
3928 3935 emit_byte(0x2C);
3929 3936 emit_byte(0xC0 | encode);
3930 3937 }
3931 3938
3932 3939 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
3933 3940 NOT_LP64(assert(VM_Version::supports_sse(), ""));
3934 3941 emit_byte(0xF3);
3935 3942 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3936 3943 emit_byte(0x0F);
3937 3944 emit_byte(0x2C);
3938 3945 emit_byte(0xC0 | encode);
3939 3946 }
3940 3947
3941 3948 void Assembler::decl(Register dst) {
3942 3949 // Don't use it directly. Use MacroAssembler::decrementl() instead.
3943 3950 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3944 3951 int encode = prefix_and_encode(dst->encoding());
3945 3952 emit_byte(0xFF);
3946 3953 emit_byte(0xC8 | encode);
3947 3954 }
3948 3955
3949 3956 void Assembler::decq(Register dst) {
3950 3957 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3951 3958 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3952 3959 int encode = prefixq_and_encode(dst->encoding());
3953 3960 emit_byte(0xFF);
3954 3961 emit_byte(0xC8 | encode);
3955 3962 }
3956 3963
3957 3964 void Assembler::decq(Address dst) {
3958 3965 // Don't use it directly. Use MacroAssembler::decrementq() instead.
3959 3966 InstructionMark im(this);
3960 3967 prefixq(dst);
3961 3968 emit_byte(0xFF);
3962 3969 emit_operand(rcx, dst);
3963 3970 }
3964 3971
3965 3972 void Assembler::fxrstor(Address src) {
3966 3973 prefixq(src);
3967 3974 emit_byte(0x0F);
3968 3975 emit_byte(0xAE);
3969 3976 emit_operand(as_Register(1), src);
3970 3977 }
3971 3978
3972 3979 void Assembler::fxsave(Address dst) {
3973 3980 prefixq(dst);
3974 3981 emit_byte(0x0F);
3975 3982 emit_byte(0xAE);
3976 3983 emit_operand(as_Register(0), dst);
3977 3984 }
3978 3985
3979 3986 void Assembler::idivq(Register src) {
3980 3987 int encode = prefixq_and_encode(src->encoding());
3981 3988 emit_byte(0xF7);
3982 3989 emit_byte(0xF8 | encode);
3983 3990 }
3984 3991
3985 3992 void Assembler::imulq(Register dst, Register src) {
3986 3993 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3987 3994 emit_byte(0x0F);
3988 3995 emit_byte(0xAF);
3989 3996 emit_byte(0xC0 | encode);
3990 3997 }
3991 3998
3992 3999 void Assembler::imulq(Register dst, Register src, int value) {
3993 4000 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3994 4001 if (is8bit(value)) {
3995 4002 emit_byte(0x6B);
3996 4003 emit_byte(0xC0 | encode);
3997 4004 emit_byte(value & 0xFF);
3998 4005 } else {
3999 4006 emit_byte(0x69);
4000 4007 emit_byte(0xC0 | encode);
4001 4008 emit_long(value);
4002 4009 }
4003 4010 }
4004 4011
4005 4012 void Assembler::incl(Register dst) {
4006 4013 // Don't use it directly. Use MacroAssembler::incrementl() instead.
4007 4014 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4008 4015 int encode = prefix_and_encode(dst->encoding());
4009 4016 emit_byte(0xFF);
4010 4017 emit_byte(0xC0 | encode);
4011 4018 }
4012 4019
4013 4020 void Assembler::incq(Register dst) {
4014 4021 // Don't use it directly. Use MacroAssembler::incrementq() instead.
4015 4022 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4016 4023 int encode = prefixq_and_encode(dst->encoding());
4017 4024 emit_byte(0xFF);
4018 4025 emit_byte(0xC0 | encode);
4019 4026 }
4020 4027
4021 4028 void Assembler::incq(Address dst) {
4022 4029 // Don't use it directly. Use MacroAssembler::incrementq() instead.
4023 4030 InstructionMark im(this);
4024 4031 prefixq(dst);
4025 4032 emit_byte(0xFF);
4026 4033 emit_operand(rax, dst);
4027 4034 }
4028 4035
4029 4036 void Assembler::lea(Register dst, Address src) {
4030 4037 leaq(dst, src);
4031 4038 }
4032 4039
4033 4040 void Assembler::leaq(Register dst, Address src) {
4034 4041 InstructionMark im(this);
4035 4042 prefixq(src, dst);
4036 4043 emit_byte(0x8D);
4037 4044 emit_operand(dst, src);
4038 4045 }
4039 4046
4040 4047 void Assembler::mov64(Register dst, int64_t imm64) {
4041 4048 InstructionMark im(this);
4042 4049 int encode = prefixq_and_encode(dst->encoding());
4043 4050 emit_byte(0xB8 | encode);
4044 4051 emit_long64(imm64);
4045 4052 }
4046 4053
4047 4054 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
4048 4055 InstructionMark im(this);
4049 4056 int encode = prefixq_and_encode(dst->encoding());
4050 4057 emit_byte(0xB8 | encode);
4051 4058 emit_data64(imm64, rspec);
4052 4059 }
4053 4060
4054 4061 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4055 4062 InstructionMark im(this);
4056 4063 int encode = prefix_and_encode(dst->encoding());
4057 4064 emit_byte(0xB8 | encode);
4058 4065 emit_data((int)imm32, rspec, narrow_oop_operand);
4059 4066 }
4060 4067
4061 4068 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) {
4062 4069 InstructionMark im(this);
4063 4070 prefix(dst);
4064 4071 emit_byte(0xC7);
4065 4072 emit_operand(rax, dst, 4);
4066 4073 emit_data((int)imm32, rspec, narrow_oop_operand);
4067 4074 }
4068 4075
4069 4076 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
4070 4077 InstructionMark im(this);
4071 4078 int encode = prefix_and_encode(src1->encoding());
4072 4079 emit_byte(0x81);
4073 4080 emit_byte(0xF8 | encode);
4074 4081 emit_data((int)imm32, rspec, narrow_oop_operand);
4075 4082 }
4076 4083
4077 4084 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
4078 4085 InstructionMark im(this);
4079 4086 prefix(src1);
4080 4087 emit_byte(0x81);
4081 4088 emit_operand(rax, src1, 4);
4082 4089 emit_data((int)imm32, rspec, narrow_oop_operand);
4083 4090 }
4084 4091
4085 4092 void Assembler::lzcntq(Register dst, Register src) {
4086 4093 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
4087 4094 emit_byte(0xF3);
4088 4095 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4089 4096 emit_byte(0x0F);
4090 4097 emit_byte(0xBD);
4091 4098 emit_byte(0xC0 | encode);
4092 4099 }
4093 4100
4094 4101 void Assembler::movdq(XMMRegister dst, Register src) {
4095 4102 // table D-1 says MMX/SSE2
4096 4103 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4097 4104 emit_byte(0x66);
4098 4105 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4099 4106 emit_byte(0x0F);
4100 4107 emit_byte(0x6E);
4101 4108 emit_byte(0xC0 | encode);
4102 4109 }
4103 4110
4104 4111 void Assembler::movdq(Register dst, XMMRegister src) {
4105 4112 // table D-1 says MMX/SSE2
4106 4113 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
4107 4114 emit_byte(0x66);
4108 4115 // swap src/dst to get correct prefix
4109 4116 int encode = prefixq_and_encode(src->encoding(), dst->encoding());
4110 4117 emit_byte(0x0F);
4111 4118 emit_byte(0x7E);
4112 4119 emit_byte(0xC0 | encode);
4113 4120 }
4114 4121
4115 4122 void Assembler::movq(Register dst, Register src) {
4116 4123 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4117 4124 emit_byte(0x8B);
4118 4125 emit_byte(0xC0 | encode);
4119 4126 }
4120 4127
4121 4128 void Assembler::movq(Register dst, Address src) {
4122 4129 InstructionMark im(this);
4123 4130 prefixq(src, dst);
4124 4131 emit_byte(0x8B);
4125 4132 emit_operand(dst, src);
4126 4133 }
4127 4134
4128 4135 void Assembler::movq(Address dst, Register src) {
4129 4136 InstructionMark im(this);
4130 4137 prefixq(dst, src);
4131 4138 emit_byte(0x89);
4132 4139 emit_operand(src, dst);
4133 4140 }
4134 4141
4135 4142 void Assembler::movsbq(Register dst, Address src) {
4136 4143 InstructionMark im(this);
4137 4144 prefixq(src, dst);
4138 4145 emit_byte(0x0F);
4139 4146 emit_byte(0xBE);
4140 4147 emit_operand(dst, src);
4141 4148 }
4142 4149
4143 4150 void Assembler::movsbq(Register dst, Register src) {
4144 4151 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4145 4152 emit_byte(0x0F);
4146 4153 emit_byte(0xBE);
4147 4154 emit_byte(0xC0 | encode);
4148 4155 }
4149 4156
4150 4157 void Assembler::movslq(Register dst, int32_t imm32) {
4151 4158 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx)
4152 4159 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx)
4153 4160 // as a result we shouldn't use until tested at runtime...
4154 4161 ShouldNotReachHere();
4155 4162 InstructionMark im(this);
4156 4163 int encode = prefixq_and_encode(dst->encoding());
4157 4164 emit_byte(0xC7 | encode);
4158 4165 emit_long(imm32);
4159 4166 }
4160 4167
4161 4168 void Assembler::movslq(Address dst, int32_t imm32) {
4162 4169 assert(is_simm32(imm32), "lost bits");
4163 4170 InstructionMark im(this);
4164 4171 prefixq(dst);
4165 4172 emit_byte(0xC7);
4166 4173 emit_operand(rax, dst, 4);
4167 4174 emit_long(imm32);
4168 4175 }
4169 4176
4170 4177 void Assembler::movslq(Register dst, Address src) {
4171 4178 InstructionMark im(this);
4172 4179 prefixq(src, dst);
4173 4180 emit_byte(0x63);
4174 4181 emit_operand(dst, src);
4175 4182 }
4176 4183
4177 4184 void Assembler::movslq(Register dst, Register src) {
4178 4185 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4179 4186 emit_byte(0x63);
4180 4187 emit_byte(0xC0 | encode);
4181 4188 }
4182 4189
4183 4190 void Assembler::movswq(Register dst, Address src) {
4184 4191 InstructionMark im(this);
4185 4192 prefixq(src, dst);
4186 4193 emit_byte(0x0F);
4187 4194 emit_byte(0xBF);
4188 4195 emit_operand(dst, src);
4189 4196 }
4190 4197
4191 4198 void Assembler::movswq(Register dst, Register src) {
4192 4199 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4193 4200 emit_byte(0x0F);
4194 4201 emit_byte(0xBF);
4195 4202 emit_byte(0xC0 | encode);
4196 4203 }
4197 4204
4198 4205 void Assembler::movzbq(Register dst, Address src) {
4199 4206 InstructionMark im(this);
4200 4207 prefixq(src, dst);
4201 4208 emit_byte(0x0F);
4202 4209 emit_byte(0xB6);
4203 4210 emit_operand(dst, src);
4204 4211 }
4205 4212
4206 4213 void Assembler::movzbq(Register dst, Register src) {
4207 4214 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4208 4215 emit_byte(0x0F);
4209 4216 emit_byte(0xB6);
4210 4217 emit_byte(0xC0 | encode);
4211 4218 }
4212 4219
4213 4220 void Assembler::movzwq(Register dst, Address src) {
4214 4221 InstructionMark im(this);
4215 4222 prefixq(src, dst);
4216 4223 emit_byte(0x0F);
4217 4224 emit_byte(0xB7);
4218 4225 emit_operand(dst, src);
4219 4226 }
4220 4227
4221 4228 void Assembler::movzwq(Register dst, Register src) {
4222 4229 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4223 4230 emit_byte(0x0F);
4224 4231 emit_byte(0xB7);
4225 4232 emit_byte(0xC0 | encode);
4226 4233 }
4227 4234
4228 4235 void Assembler::negq(Register dst) {
4229 4236 int encode = prefixq_and_encode(dst->encoding());
4230 4237 emit_byte(0xF7);
4231 4238 emit_byte(0xD8 | encode);
4232 4239 }
4233 4240
4234 4241 void Assembler::notq(Register dst) {
4235 4242 int encode = prefixq_and_encode(dst->encoding());
4236 4243 emit_byte(0xF7);
4237 4244 emit_byte(0xD0 | encode);
4238 4245 }
4239 4246
4240 4247 void Assembler::orq(Address dst, int32_t imm32) {
4241 4248 InstructionMark im(this);
4242 4249 prefixq(dst);
4243 4250 emit_byte(0x81);
4244 4251 emit_operand(rcx, dst, 4);
4245 4252 emit_long(imm32);
4246 4253 }
4247 4254
4248 4255 void Assembler::orq(Register dst, int32_t imm32) {
4249 4256 (void) prefixq_and_encode(dst->encoding());
4250 4257 emit_arith(0x81, 0xC8, dst, imm32);
4251 4258 }
4252 4259
4253 4260 void Assembler::orq(Register dst, Address src) {
4254 4261 InstructionMark im(this);
4255 4262 prefixq(src, dst);
4256 4263 emit_byte(0x0B);
4257 4264 emit_operand(dst, src);
4258 4265 }
4259 4266
4260 4267 void Assembler::orq(Register dst, Register src) {
4261 4268 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4262 4269 emit_arith(0x0B, 0xC0, dst, src);
4263 4270 }
4264 4271
4265 4272 void Assembler::popa() { // 64bit
4266 4273 movq(r15, Address(rsp, 0));
4267 4274 movq(r14, Address(rsp, wordSize));
4268 4275 movq(r13, Address(rsp, 2 * wordSize));
4269 4276 movq(r12, Address(rsp, 3 * wordSize));
4270 4277 movq(r11, Address(rsp, 4 * wordSize));
4271 4278 movq(r10, Address(rsp, 5 * wordSize));
4272 4279 movq(r9, Address(rsp, 6 * wordSize));
4273 4280 movq(r8, Address(rsp, 7 * wordSize));
4274 4281 movq(rdi, Address(rsp, 8 * wordSize));
4275 4282 movq(rsi, Address(rsp, 9 * wordSize));
4276 4283 movq(rbp, Address(rsp, 10 * wordSize));
4277 4284 // skip rsp
4278 4285 movq(rbx, Address(rsp, 12 * wordSize));
4279 4286 movq(rdx, Address(rsp, 13 * wordSize));
4280 4287 movq(rcx, Address(rsp, 14 * wordSize));
4281 4288 movq(rax, Address(rsp, 15 * wordSize));
4282 4289
4283 4290 addq(rsp, 16 * wordSize);
4284 4291 }
4285 4292
4286 4293 void Assembler::popcntq(Register dst, Address src) {
4287 4294 assert(VM_Version::supports_popcnt(), "must support");
4288 4295 InstructionMark im(this);
4289 4296 emit_byte(0xF3);
4290 4297 prefixq(src, dst);
4291 4298 emit_byte(0x0F);
4292 4299 emit_byte(0xB8);
4293 4300 emit_operand(dst, src);
4294 4301 }
4295 4302
4296 4303 void Assembler::popcntq(Register dst, Register src) {
4297 4304 assert(VM_Version::supports_popcnt(), "must support");
4298 4305 emit_byte(0xF3);
4299 4306 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4300 4307 emit_byte(0x0F);
4301 4308 emit_byte(0xB8);
4302 4309 emit_byte(0xC0 | encode);
4303 4310 }
4304 4311
4305 4312 void Assembler::popq(Address dst) {
4306 4313 InstructionMark im(this);
4307 4314 prefixq(dst);
4308 4315 emit_byte(0x8F);
4309 4316 emit_operand(rax, dst);
4310 4317 }
4311 4318
4312 4319 void Assembler::pusha() { // 64bit
4313 4320 // we have to store original rsp. ABI says that 128 bytes
4314 4321 // below rsp are local scratch.
4315 4322 movq(Address(rsp, -5 * wordSize), rsp);
4316 4323
4317 4324 subq(rsp, 16 * wordSize);
4318 4325
4319 4326 movq(Address(rsp, 15 * wordSize), rax);
4320 4327 movq(Address(rsp, 14 * wordSize), rcx);
4321 4328 movq(Address(rsp, 13 * wordSize), rdx);
4322 4329 movq(Address(rsp, 12 * wordSize), rbx);
4323 4330 // skip rsp
4324 4331 movq(Address(rsp, 10 * wordSize), rbp);
4325 4332 movq(Address(rsp, 9 * wordSize), rsi);
4326 4333 movq(Address(rsp, 8 * wordSize), rdi);
4327 4334 movq(Address(rsp, 7 * wordSize), r8);
4328 4335 movq(Address(rsp, 6 * wordSize), r9);
4329 4336 movq(Address(rsp, 5 * wordSize), r10);
4330 4337 movq(Address(rsp, 4 * wordSize), r11);
4331 4338 movq(Address(rsp, 3 * wordSize), r12);
4332 4339 movq(Address(rsp, 2 * wordSize), r13);
4333 4340 movq(Address(rsp, wordSize), r14);
4334 4341 movq(Address(rsp, 0), r15);
4335 4342 }
4336 4343
4337 4344 void Assembler::pushq(Address src) {
4338 4345 InstructionMark im(this);
4339 4346 prefixq(src);
4340 4347 emit_byte(0xFF);
4341 4348 emit_operand(rsi, src);
4342 4349 }
4343 4350
4344 4351 void Assembler::rclq(Register dst, int imm8) {
4345 4352 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4346 4353 int encode = prefixq_and_encode(dst->encoding());
4347 4354 if (imm8 == 1) {
4348 4355 emit_byte(0xD1);
4349 4356 emit_byte(0xD0 | encode);
4350 4357 } else {
4351 4358 emit_byte(0xC1);
4352 4359 emit_byte(0xD0 | encode);
4353 4360 emit_byte(imm8);
4354 4361 }
4355 4362 }
4356 4363 void Assembler::sarq(Register dst, int imm8) {
4357 4364 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4358 4365 int encode = prefixq_and_encode(dst->encoding());
4359 4366 if (imm8 == 1) {
4360 4367 emit_byte(0xD1);
4361 4368 emit_byte(0xF8 | encode);
4362 4369 } else {
4363 4370 emit_byte(0xC1);
4364 4371 emit_byte(0xF8 | encode);
4365 4372 emit_byte(imm8);
4366 4373 }
4367 4374 }
4368 4375
4369 4376 void Assembler::sarq(Register dst) {
4370 4377 int encode = prefixq_and_encode(dst->encoding());
4371 4378 emit_byte(0xD3);
4372 4379 emit_byte(0xF8 | encode);
4373 4380 }
4374 4381
4375 4382 void Assembler::sbbq(Address dst, int32_t imm32) {
4376 4383 InstructionMark im(this);
4377 4384 prefixq(dst);
4378 4385 emit_arith_operand(0x81, rbx, dst, imm32);
4379 4386 }
4380 4387
4381 4388 void Assembler::sbbq(Register dst, int32_t imm32) {
4382 4389 (void) prefixq_and_encode(dst->encoding());
4383 4390 emit_arith(0x81, 0xD8, dst, imm32);
4384 4391 }
4385 4392
4386 4393 void Assembler::sbbq(Register dst, Address src) {
4387 4394 InstructionMark im(this);
4388 4395 prefixq(src, dst);
4389 4396 emit_byte(0x1B);
4390 4397 emit_operand(dst, src);
4391 4398 }
4392 4399
4393 4400 void Assembler::sbbq(Register dst, Register src) {
4394 4401 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4395 4402 emit_arith(0x1B, 0xC0, dst, src);
4396 4403 }
4397 4404
4398 4405 void Assembler::shlq(Register dst, int imm8) {
4399 4406 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4400 4407 int encode = prefixq_and_encode(dst->encoding());
4401 4408 if (imm8 == 1) {
4402 4409 emit_byte(0xD1);
4403 4410 emit_byte(0xE0 | encode);
4404 4411 } else {
4405 4412 emit_byte(0xC1);
4406 4413 emit_byte(0xE0 | encode);
4407 4414 emit_byte(imm8);
4408 4415 }
4409 4416 }
4410 4417
4411 4418 void Assembler::shlq(Register dst) {
4412 4419 int encode = prefixq_and_encode(dst->encoding());
4413 4420 emit_byte(0xD3);
4414 4421 emit_byte(0xE0 | encode);
4415 4422 }
4416 4423
4417 4424 void Assembler::shrq(Register dst, int imm8) {
4418 4425 assert(isShiftCount(imm8 >> 1), "illegal shift count");
4419 4426 int encode = prefixq_and_encode(dst->encoding());
4420 4427 emit_byte(0xC1);
4421 4428 emit_byte(0xE8 | encode);
4422 4429 emit_byte(imm8);
4423 4430 }
4424 4431
4425 4432 void Assembler::shrq(Register dst) {
4426 4433 int encode = prefixq_and_encode(dst->encoding());
4427 4434 emit_byte(0xD3);
4428 4435 emit_byte(0xE8 | encode);
4429 4436 }
4430 4437
4431 4438 void Assembler::subq(Address dst, int32_t imm32) {
4432 4439 InstructionMark im(this);
4433 4440 prefixq(dst);
4434 4441 emit_arith_operand(0x81, rbp, dst, imm32);
4435 4442 }
4436 4443
4437 4444 void Assembler::subq(Address dst, Register src) {
4438 4445 InstructionMark im(this);
4439 4446 prefixq(dst, src);
4440 4447 emit_byte(0x29);
4441 4448 emit_operand(src, dst);
4442 4449 }
4443 4450
4444 4451 void Assembler::subq(Register dst, int32_t imm32) {
4445 4452 (void) prefixq_and_encode(dst->encoding());
4446 4453 emit_arith(0x81, 0xE8, dst, imm32);
4447 4454 }
4448 4455
4449 4456 void Assembler::subq(Register dst, Address src) {
4450 4457 InstructionMark im(this);
4451 4458 prefixq(src, dst);
4452 4459 emit_byte(0x2B);
4453 4460 emit_operand(dst, src);
4454 4461 }
4455 4462
4456 4463 void Assembler::subq(Register dst, Register src) {
4457 4464 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4458 4465 emit_arith(0x2B, 0xC0, dst, src);
4459 4466 }
4460 4467
4461 4468 void Assembler::testq(Register dst, int32_t imm32) {
4462 4469 // not using emit_arith because test
4463 4470 // doesn't support sign-extension of
4464 4471 // 8bit operands
4465 4472 int encode = dst->encoding();
4466 4473 if (encode == 0) {
4467 4474 prefix(REX_W);
4468 4475 emit_byte(0xA9);
4469 4476 } else {
4470 4477 encode = prefixq_and_encode(encode);
4471 4478 emit_byte(0xF7);
4472 4479 emit_byte(0xC0 | encode);
4473 4480 }
4474 4481 emit_long(imm32);
4475 4482 }
4476 4483
4477 4484 void Assembler::testq(Register dst, Register src) {
4478 4485 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4479 4486 emit_arith(0x85, 0xC0, dst, src);
4480 4487 }
4481 4488
4482 4489 void Assembler::xaddq(Address dst, Register src) {
4483 4490 InstructionMark im(this);
4484 4491 prefixq(dst, src);
4485 4492 emit_byte(0x0F);
4486 4493 emit_byte(0xC1);
4487 4494 emit_operand(src, dst);
4488 4495 }
4489 4496
4490 4497 void Assembler::xchgq(Register dst, Address src) {
4491 4498 InstructionMark im(this);
4492 4499 prefixq(src, dst);
4493 4500 emit_byte(0x87);
4494 4501 emit_operand(dst, src);
4495 4502 }
4496 4503
4497 4504 void Assembler::xchgq(Register dst, Register src) {
4498 4505 int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4499 4506 emit_byte(0x87);
4500 4507 emit_byte(0xc0 | encode);
4501 4508 }
4502 4509
4503 4510 void Assembler::xorq(Register dst, Register src) {
4504 4511 (void) prefixq_and_encode(dst->encoding(), src->encoding());
4505 4512 emit_arith(0x33, 0xC0, dst, src);
4506 4513 }
4507 4514
4508 4515 void Assembler::xorq(Register dst, Address src) {
4509 4516 InstructionMark im(this);
4510 4517 prefixq(src, dst);
4511 4518 emit_byte(0x33);
4512 4519 emit_operand(dst, src);
4513 4520 }
4514 4521
4515 4522 #endif // !LP64
4516 4523
4517 4524 static Assembler::Condition reverse[] = {
4518 4525 Assembler::noOverflow /* overflow = 0x0 */ ,
4519 4526 Assembler::overflow /* noOverflow = 0x1 */ ,
4520 4527 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
4521 4528 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
4522 4529 Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
4523 4530 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
4524 4531 Assembler::above /* belowEqual = 0x6 */ ,
4525 4532 Assembler::belowEqual /* above = 0x7 */ ,
4526 4533 Assembler::positive /* negative = 0x8 */ ,
4527 4534 Assembler::negative /* positive = 0x9 */ ,
4528 4535 Assembler::noParity /* parity = 0xa */ ,
4529 4536 Assembler::parity /* noParity = 0xb */ ,
4530 4537 Assembler::greaterEqual /* less = 0xc */ ,
4531 4538 Assembler::less /* greaterEqual = 0xd */ ,
4532 4539 Assembler::greater /* lessEqual = 0xe */ ,
4533 4540 Assembler::lessEqual /* greater = 0xf, */
4534 4541
4535 4542 };
4536 4543
4537 4544
4538 4545 // Implementation of MacroAssembler
4539 4546
4540 4547 // First all the versions that have distinct versions depending on 32/64 bit
4541 4548 // Unless the difference is trivial (1 line or so).
4542 4549
4543 4550 #ifndef _LP64
4544 4551
4545 4552 // 32bit versions
4546 4553
4547 4554 Address MacroAssembler::as_Address(AddressLiteral adr) {
4548 4555 return Address(adr.target(), adr.rspec());
4549 4556 }
4550 4557
4551 4558 Address MacroAssembler::as_Address(ArrayAddress adr) {
4552 4559 return Address::make_array(adr);
4553 4560 }
4554 4561
4555 4562 int MacroAssembler::biased_locking_enter(Register lock_reg,
4556 4563 Register obj_reg,
4557 4564 Register swap_reg,
4558 4565 Register tmp_reg,
4559 4566 bool swap_reg_contains_mark,
4560 4567 Label& done,
4561 4568 Label* slow_case,
4562 4569 BiasedLockingCounters* counters) {
4563 4570 assert(UseBiasedLocking, "why call this otherwise?");
4564 4571 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
4565 4572 assert_different_registers(lock_reg, obj_reg, swap_reg);
4566 4573
4567 4574 if (PrintBiasedLockingStatistics && counters == NULL)
4568 4575 counters = BiasedLocking::counters();
4569 4576
4570 4577 bool need_tmp_reg = false;
4571 4578 if (tmp_reg == noreg) {
4572 4579 need_tmp_reg = true;
4573 4580 tmp_reg = lock_reg;
4574 4581 } else {
4575 4582 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
4576 4583 }
4577 4584 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
4578 4585 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
4579 4586 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
4580 4587 Address saved_mark_addr(lock_reg, 0);
4581 4588
4582 4589 // Biased locking
4583 4590 // See whether the lock is currently biased toward our thread and
4584 4591 // whether the epoch is still valid
4585 4592 // Note that the runtime guarantees sufficient alignment of JavaThread
4586 4593 // pointers to allow age to be placed into low bits
4587 4594 // First check to see whether biasing is even enabled for this object
4588 4595 Label cas_label;
4589 4596 int null_check_offset = -1;
4590 4597 if (!swap_reg_contains_mark) {
4591 4598 null_check_offset = offset();
4592 4599 movl(swap_reg, mark_addr);
4593 4600 }
4594 4601 if (need_tmp_reg) {
4595 4602 push(tmp_reg);
4596 4603 }
4597 4604 movl(tmp_reg, swap_reg);
4598 4605 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
4599 4606 cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
4600 4607 if (need_tmp_reg) {
4601 4608 pop(tmp_reg);
4602 4609 }
4603 4610 jcc(Assembler::notEqual, cas_label);
4604 4611 // The bias pattern is present in the object's header. Need to check
4605 4612 // whether the bias owner and the epoch are both still current.
4606 4613 // Note that because there is no current thread register on x86 we
4607 4614 // need to store off the mark word we read out of the object to
4608 4615 // avoid reloading it and needing to recheck invariants below. This
4609 4616 // store is unfortunate but it makes the overall code shorter and
4610 4617 // simpler.
4611 4618 movl(saved_mark_addr, swap_reg);
4612 4619 if (need_tmp_reg) {
4613 4620 push(tmp_reg);
4614 4621 }
4615 4622 get_thread(tmp_reg);
4616 4623 xorl(swap_reg, tmp_reg);
4617 4624 if (swap_reg_contains_mark) {
4618 4625 null_check_offset = offset();
4619 4626 }
4620 4627 movl(tmp_reg, klass_addr);
4621 4628 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4622 4629 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
4623 4630 if (need_tmp_reg) {
4624 4631 pop(tmp_reg);
4625 4632 }
4626 4633 if (counters != NULL) {
4627 4634 cond_inc32(Assembler::zero,
4628 4635 ExternalAddress((address)counters->biased_lock_entry_count_addr()));
4629 4636 }
4630 4637 jcc(Assembler::equal, done);
4631 4638
4632 4639 Label try_revoke_bias;
4633 4640 Label try_rebias;
4634 4641
4635 4642 // At this point we know that the header has the bias pattern and
4636 4643 // that we are not the bias owner in the current epoch. We need to
4637 4644 // figure out more details about the state of the header in order to
4638 4645 // know what operations can be legally performed on the object's
4639 4646 // header.
4640 4647
4641 4648 // If the low three bits in the xor result aren't clear, that means
4642 4649 // the prototype header is no longer biased and we have to revoke
4643 4650 // the bias on this object.
4644 4651 testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
4645 4652 jcc(Assembler::notZero, try_revoke_bias);
4646 4653
4647 4654 // Biasing is still enabled for this data type. See whether the
4648 4655 // epoch of the current bias is still valid, meaning that the epoch
4649 4656 // bits of the mark word are equal to the epoch bits of the
4650 4657 // prototype header. (Note that the prototype header's epoch bits
4651 4658 // only change at a safepoint.) If not, attempt to rebias the object
4652 4659 // toward the current thread. Note that we must be absolutely sure
4653 4660 // that the current epoch is invalid in order to do this because
4654 4661 // otherwise the manipulations it performs on the mark word are
4655 4662 // illegal.
4656 4663 testl(swap_reg, markOopDesc::epoch_mask_in_place);
4657 4664 jcc(Assembler::notZero, try_rebias);
4658 4665
4659 4666 // The epoch of the current bias is still valid but we know nothing
4660 4667 // about the owner; it might be set or it might be clear. Try to
4661 4668 // acquire the bias of the object using an atomic operation. If this
4662 4669 // fails we will go in to the runtime to revoke the object's bias.
4663 4670 // Note that we first construct the presumed unbiased header so we
4664 4671 // don't accidentally blow away another thread's valid bias.
4665 4672 movl(swap_reg, saved_mark_addr);
4666 4673 andl(swap_reg,
4667 4674 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
4668 4675 if (need_tmp_reg) {
4669 4676 push(tmp_reg);
4670 4677 }
4671 4678 get_thread(tmp_reg);
4672 4679 orl(tmp_reg, swap_reg);
4673 4680 if (os::is_MP()) {
4674 4681 lock();
4675 4682 }
4676 4683 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4677 4684 if (need_tmp_reg) {
4678 4685 pop(tmp_reg);
4679 4686 }
4680 4687 // If the biasing toward our thread failed, this means that
4681 4688 // another thread succeeded in biasing it toward itself and we
4682 4689 // need to revoke that bias. The revocation will occur in the
4683 4690 // interpreter runtime in the slow case.
4684 4691 if (counters != NULL) {
4685 4692 cond_inc32(Assembler::zero,
4686 4693 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
4687 4694 }
4688 4695 if (slow_case != NULL) {
4689 4696 jcc(Assembler::notZero, *slow_case);
4690 4697 }
4691 4698 jmp(done);
4692 4699
4693 4700 bind(try_rebias);
4694 4701 // At this point we know the epoch has expired, meaning that the
4695 4702 // current "bias owner", if any, is actually invalid. Under these
4696 4703 // circumstances _only_, we are allowed to use the current header's
4697 4704 // value as the comparison value when doing the cas to acquire the
4698 4705 // bias in the current epoch. In other words, we allow transfer of
4699 4706 // the bias from one thread to another directly in this situation.
4700 4707 //
4701 4708 // FIXME: due to a lack of registers we currently blow away the age
4702 4709 // bits in this situation. Should attempt to preserve them.
4703 4710 if (need_tmp_reg) {
4704 4711 push(tmp_reg);
4705 4712 }
4706 4713 get_thread(tmp_reg);
4707 4714 movl(swap_reg, klass_addr);
4708 4715 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4709 4716 movl(swap_reg, saved_mark_addr);
4710 4717 if (os::is_MP()) {
4711 4718 lock();
4712 4719 }
4713 4720 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4714 4721 if (need_tmp_reg) {
4715 4722 pop(tmp_reg);
4716 4723 }
4717 4724 // If the biasing toward our thread failed, then another thread
4718 4725 // succeeded in biasing it toward itself and we need to revoke that
4719 4726 // bias. The revocation will occur in the runtime in the slow case.
4720 4727 if (counters != NULL) {
4721 4728 cond_inc32(Assembler::zero,
4722 4729 ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
4723 4730 }
4724 4731 if (slow_case != NULL) {
4725 4732 jcc(Assembler::notZero, *slow_case);
4726 4733 }
4727 4734 jmp(done);
4728 4735
4729 4736 bind(try_revoke_bias);
4730 4737 // The prototype mark in the klass doesn't have the bias bit set any
4731 4738 // more, indicating that objects of this data type are not supposed
4732 4739 // to be biased any more. We are going to try to reset the mark of
4733 4740 // this object to the prototype value and fall through to the
4734 4741 // CAS-based locking scheme. Note that if our CAS fails, it means
4735 4742 // that another thread raced us for the privilege of revoking the
4736 4743 // bias of this particular object, so it's okay to continue in the
4737 4744 // normal locking code.
4738 4745 //
4739 4746 // FIXME: due to a lack of registers we currently blow away the age
4740 4747 // bits in this situation. Should attempt to preserve them.
4741 4748 movl(swap_reg, saved_mark_addr);
4742 4749 if (need_tmp_reg) {
4743 4750 push(tmp_reg);
4744 4751 }
4745 4752 movl(tmp_reg, klass_addr);
4746 4753 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
4747 4754 if (os::is_MP()) {
4748 4755 lock();
4749 4756 }
4750 4757 cmpxchgptr(tmp_reg, Address(obj_reg, 0));
4751 4758 if (need_tmp_reg) {
4752 4759 pop(tmp_reg);
4753 4760 }
4754 4761 // Fall through to the normal CAS-based lock, because no matter what
4755 4762 // the result of the above CAS, some thread must have succeeded in
4756 4763 // removing the bias bit from the object's header.
4757 4764 if (counters != NULL) {
4758 4765 cond_inc32(Assembler::zero,
4759 4766 ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
4760 4767 }
4761 4768
4762 4769 bind(cas_label);
4763 4770
4764 4771 return null_check_offset;
4765 4772 }
4766 4773 void MacroAssembler::call_VM_leaf_base(address entry_point,
4767 4774 int number_of_arguments) {
4768 4775 call(RuntimeAddress(entry_point));
4769 4776 increment(rsp, number_of_arguments * wordSize);
4770 4777 }
4771 4778
4772 4779 void MacroAssembler::cmpoop(Address src1, jobject obj) {
4773 4780 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4774 4781 }
4775 4782
4776 4783 void MacroAssembler::cmpoop(Register src1, jobject obj) {
4777 4784 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
4778 4785 }
4779 4786
4780 4787 void MacroAssembler::extend_sign(Register hi, Register lo) {
4781 4788 // According to Intel Doc. AP-526, "Integer Divide", p.18.
4782 4789 if (VM_Version::is_P6() && hi == rdx && lo == rax) {
4783 4790 cdql();
4784 4791 } else {
4785 4792 movl(hi, lo);
4786 4793 sarl(hi, 31);
4787 4794 }
4788 4795 }
4789 4796
4790 4797 void MacroAssembler::fat_nop() {
4791 4798 // A 5 byte nop that is safe for patching (see patch_verified_entry)
4792 4799 emit_byte(0x26); // es:
4793 4800 emit_byte(0x2e); // cs:
4794 4801 emit_byte(0x64); // fs:
4795 4802 emit_byte(0x65); // gs:
4796 4803 emit_byte(0x90);
4797 4804 }
4798 4805
4799 4806 void MacroAssembler::jC2(Register tmp, Label& L) {
4800 4807 // set parity bit if FPU flag C2 is set (via rax)
4801 4808 save_rax(tmp);
4802 4809 fwait(); fnstsw_ax();
4803 4810 sahf();
4804 4811 restore_rax(tmp);
4805 4812 // branch
4806 4813 jcc(Assembler::parity, L);
4807 4814 }
4808 4815
4809 4816 void MacroAssembler::jnC2(Register tmp, Label& L) {
4810 4817 // set parity bit if FPU flag C2 is set (via rax)
4811 4818 save_rax(tmp);
4812 4819 fwait(); fnstsw_ax();
4813 4820 sahf();
4814 4821 restore_rax(tmp);
4815 4822 // branch
4816 4823 jcc(Assembler::noParity, L);
4817 4824 }
4818 4825
4819 4826 // 32bit can do a case table jump in one instruction but we no longer allow the base
4820 4827 // to be installed in the Address class
4821 4828 void MacroAssembler::jump(ArrayAddress entry) {
4822 4829 jmp(as_Address(entry));
4823 4830 }
4824 4831
4825 4832 // Note: y_lo will be destroyed
4826 4833 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
4827 4834 // Long compare for Java (semantics as described in JVM spec.)
4828 4835 Label high, low, done;
4829 4836
4830 4837 cmpl(x_hi, y_hi);
4831 4838 jcc(Assembler::less, low);
4832 4839 jcc(Assembler::greater, high);
4833 4840 // x_hi is the return register
4834 4841 xorl(x_hi, x_hi);
4835 4842 cmpl(x_lo, y_lo);
4836 4843 jcc(Assembler::below, low);
4837 4844 jcc(Assembler::equal, done);
4838 4845
4839 4846 bind(high);
4840 4847 xorl(x_hi, x_hi);
4841 4848 increment(x_hi);
4842 4849 jmp(done);
4843 4850
4844 4851 bind(low);
4845 4852 xorl(x_hi, x_hi);
4846 4853 decrementl(x_hi);
4847 4854
4848 4855 bind(done);
4849 4856 }
4850 4857
4851 4858 void MacroAssembler::lea(Register dst, AddressLiteral src) {
4852 4859 mov_literal32(dst, (int32_t)src.target(), src.rspec());
4853 4860 }
4854 4861
4855 4862 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
4856 4863 // leal(dst, as_Address(adr));
4857 4864 // see note in movl as to why we must use a move
4858 4865 mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
4859 4866 }
4860 4867
4861 4868 void MacroAssembler::leave() {
4862 4869 mov(rsp, rbp);
4863 4870 pop(rbp);
4864 4871 }
4865 4872
4866 4873 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
4867 4874 // Multiplication of two Java long values stored on the stack
4868 4875 // as illustrated below. Result is in rdx:rax.
4869 4876 //
4870 4877 // rsp ---> [ ?? ] \ \
4871 4878 // .... | y_rsp_offset |
4872 4879 // [ y_lo ] / (in bytes) | x_rsp_offset
4873 4880 // [ y_hi ] | (in bytes)
4874 4881 // .... |
4875 4882 // [ x_lo ] /
4876 4883 // [ x_hi ]
4877 4884 // ....
4878 4885 //
4879 4886 // Basic idea: lo(result) = lo(x_lo * y_lo)
4880 4887 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
4881 4888 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
4882 4889 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
4883 4890 Label quick;
4884 4891 // load x_hi, y_hi and check if quick
4885 4892 // multiplication is possible
4886 4893 movl(rbx, x_hi);
4887 4894 movl(rcx, y_hi);
4888 4895 movl(rax, rbx);
4889 4896 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
4890 4897 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
4891 4898 // do full multiplication
4892 4899 // 1st step
4893 4900 mull(y_lo); // x_hi * y_lo
4894 4901 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
4895 4902 // 2nd step
4896 4903 movl(rax, x_lo);
4897 4904 mull(rcx); // x_lo * y_hi
4898 4905 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
4899 4906 // 3rd step
4900 4907 bind(quick); // note: rbx, = 0 if quick multiply!
4901 4908 movl(rax, x_lo);
4902 4909 mull(y_lo); // x_lo * y_lo
4903 4910 addl(rdx, rbx); // correct hi(x_lo * y_lo)
4904 4911 }
4905 4912
4906 4913 void MacroAssembler::lneg(Register hi, Register lo) {
4907 4914 negl(lo);
4908 4915 adcl(hi, 0);
4909 4916 negl(hi);
4910 4917 }
4911 4918
4912 4919 void MacroAssembler::lshl(Register hi, Register lo) {
4913 4920 // Java shift left long support (semantics as described in JVM spec., p.305)
4914 4921 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
4915 4922 // shift value is in rcx !
4916 4923 assert(hi != rcx, "must not use rcx");
4917 4924 assert(lo != rcx, "must not use rcx");
4918 4925 const Register s = rcx; // shift count
4919 4926 const int n = BitsPerWord;
4920 4927 Label L;
4921 4928 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4922 4929 cmpl(s, n); // if (s < n)
4923 4930 jcc(Assembler::less, L); // else (s >= n)
4924 4931 movl(hi, lo); // x := x << n
4925 4932 xorl(lo, lo);
4926 4933 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4927 4934 bind(L); // s (mod n) < n
4928 4935 shldl(hi, lo); // x := x << s
4929 4936 shll(lo);
4930 4937 }
4931 4938
4932 4939
4933 4940 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
4934 4941 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
4935 4942 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
4936 4943 assert(hi != rcx, "must not use rcx");
4937 4944 assert(lo != rcx, "must not use rcx");
4938 4945 const Register s = rcx; // shift count
4939 4946 const int n = BitsPerWord;
4940 4947 Label L;
4941 4948 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
4942 4949 cmpl(s, n); // if (s < n)
4943 4950 jcc(Assembler::less, L); // else (s >= n)
4944 4951 movl(lo, hi); // x := x >> n
4945 4952 if (sign_extension) sarl(hi, 31);
4946 4953 else xorl(hi, hi);
4947 4954 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
4948 4955 bind(L); // s (mod n) < n
4949 4956 shrdl(lo, hi); // x := x >> s
4950 4957 if (sign_extension) sarl(hi);
4951 4958 else shrl(hi);
4952 4959 }
4953 4960
4954 4961 void MacroAssembler::movoop(Register dst, jobject obj) {
4955 4962 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4956 4963 }
4957 4964
4958 4965 void MacroAssembler::movoop(Address dst, jobject obj) {
4959 4966 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
4960 4967 }
4961 4968
4962 4969 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
4963 4970 if (src.is_lval()) {
4964 4971 mov_literal32(dst, (intptr_t)src.target(), src.rspec());
4965 4972 } else {
4966 4973 movl(dst, as_Address(src));
4967 4974 }
4968 4975 }
4969 4976
4970 4977 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
4971 4978 movl(as_Address(dst), src);
4972 4979 }
4973 4980
4974 4981 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
4975 4982 movl(dst, as_Address(src));
4976 4983 }
4977 4984
4978 4985 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
4979 4986 void MacroAssembler::movptr(Address dst, intptr_t src) {
4980 4987 movl(dst, src);
4981 4988 }
4982 4989
4983 4990
4984 4991 void MacroAssembler::pop_callee_saved_registers() {
4985 4992 pop(rcx);
4986 4993 pop(rdx);
4987 4994 pop(rdi);
4988 4995 pop(rsi);
4989 4996 }
4990 4997
4991 4998 void MacroAssembler::pop_fTOS() {
4992 4999 fld_d(Address(rsp, 0));
4993 5000 addl(rsp, 2 * wordSize);
4994 5001 }
4995 5002
4996 5003 void MacroAssembler::push_callee_saved_registers() {
4997 5004 push(rsi);
4998 5005 push(rdi);
4999 5006 push(rdx);
5000 5007 push(rcx);
5001 5008 }
5002 5009
5003 5010 void MacroAssembler::push_fTOS() {
5004 5011 subl(rsp, 2 * wordSize);
5005 5012 fstp_d(Address(rsp, 0));
5006 5013 }
5007 5014
5008 5015
5009 5016 void MacroAssembler::pushoop(jobject obj) {
5010 5017 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
5011 5018 }
5012 5019
5013 5020
5014 5021 void MacroAssembler::pushptr(AddressLiteral src) {
5015 5022 if (src.is_lval()) {
5016 5023 push_literal32((int32_t)src.target(), src.rspec());
5017 5024 } else {
5018 5025 pushl(as_Address(src));
5019 5026 }
5020 5027 }
5021 5028
5022 5029 void MacroAssembler::set_word_if_not_zero(Register dst) {
5023 5030 xorl(dst, dst);
5024 5031 set_byte_if_not_zero(dst);
5025 5032 }
5026 5033
5027 5034 static void pass_arg0(MacroAssembler* masm, Register arg) {
5028 5035 masm->push(arg);
5029 5036 }
5030 5037
5031 5038 static void pass_arg1(MacroAssembler* masm, Register arg) {
5032 5039 masm->push(arg);
5033 5040 }
5034 5041
5035 5042 static void pass_arg2(MacroAssembler* masm, Register arg) {
5036 5043 masm->push(arg);
5037 5044 }
5038 5045
5039 5046 static void pass_arg3(MacroAssembler* masm, Register arg) {
5040 5047 masm->push(arg);
5041 5048 }
5042 5049
5043 5050 #ifndef PRODUCT
5044 5051 extern "C" void findpc(intptr_t x);
5045 5052 #endif
5046 5053
5047 5054 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
5048 5055 // In order to get locks to work, we need to fake a in_VM state
5049 5056 JavaThread* thread = JavaThread::current();
5050 5057 JavaThreadState saved_state = thread->thread_state();
5051 5058 thread->set_thread_state(_thread_in_vm);
5052 5059 if (ShowMessageBoxOnError) {
5053 5060 JavaThread* thread = JavaThread::current();
5054 5061 JavaThreadState saved_state = thread->thread_state();
5055 5062 thread->set_thread_state(_thread_in_vm);
5056 5063 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5057 5064 ttyLocker ttyl;
5058 5065 BytecodeCounter::print();
5059 5066 }
5060 5067 // To see where a verify_oop failed, get $ebx+40/X for this frame.
5061 5068 // This is the value of eip which points to where verify_oop will return.
5062 5069 if (os::message_box(msg, "Execution stopped, print registers?")) {
5063 5070 ttyLocker ttyl;
5064 5071 tty->print_cr("eip = 0x%08x", eip);
5065 5072 #ifndef PRODUCT
5066 5073 if ((WizardMode || Verbose) && PrintMiscellaneous) {
5067 5074 tty->cr();
5068 5075 findpc(eip);
5069 5076 tty->cr();
5070 5077 }
5071 5078 #endif
5072 5079 tty->print_cr("rax = 0x%08x", rax);
5073 5080 tty->print_cr("rbx = 0x%08x", rbx);
5074 5081 tty->print_cr("rcx = 0x%08x", rcx);
5075 5082 tty->print_cr("rdx = 0x%08x", rdx);
5076 5083 tty->print_cr("rdi = 0x%08x", rdi);
5077 5084 tty->print_cr("rsi = 0x%08x", rsi);
5078 5085 tty->print_cr("rbp = 0x%08x", rbp);
5079 5086 tty->print_cr("rsp = 0x%08x", rsp);
5080 5087 BREAKPOINT;
5081 5088 assert(false, "start up GDB");
5082 5089 }
5083 5090 } else {
5084 5091 ttyLocker ttyl;
5085 5092 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
5086 5093 assert(false, "DEBUG MESSAGE");
5087 5094 }
5088 5095 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5089 5096 }
5090 5097
5091 5098 void MacroAssembler::stop(const char* msg) {
5092 5099 ExternalAddress message((address)msg);
5093 5100 // push address of message
5094 5101 pushptr(message.addr());
5095 5102 { Label L; call(L, relocInfo::none); bind(L); } // push eip
5096 5103 pusha(); // push registers
5097 5104 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
5098 5105 hlt();
5099 5106 }
5100 5107
5101 5108 void MacroAssembler::warn(const char* msg) {
5102 5109 push_CPU_state();
5103 5110
5104 5111 ExternalAddress message((address) msg);
5105 5112 // push address of message
5106 5113 pushptr(message.addr());
5107 5114
5108 5115 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
5109 5116 addl(rsp, wordSize); // discard argument
5110 5117 pop_CPU_state();
5111 5118 }
5112 5119
5113 5120 #else // _LP64
5114 5121
5115 5122 // 64 bit versions
5116 5123
5117 5124 Address MacroAssembler::as_Address(AddressLiteral adr) {
5118 5125 // amd64 always does this as a pc-rel
5119 5126 // we can be absolute or disp based on the instruction type
5120 5127 // jmp/call are displacements others are absolute
5121 5128 assert(!adr.is_lval(), "must be rval");
5122 5129 assert(reachable(adr), "must be");
5123 5130 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
5124 5131
5125 5132 }
5126 5133
5127 5134 Address MacroAssembler::as_Address(ArrayAddress adr) {
5128 5135 AddressLiteral base = adr.base();
5129 5136 lea(rscratch1, base);
5130 5137 Address index = adr.index();
5131 5138 assert(index._disp == 0, "must not have disp"); // maybe it can?
5132 5139 Address array(rscratch1, index._index, index._scale, index._disp);
5133 5140 return array;
5134 5141 }
5135 5142
5136 5143 int MacroAssembler::biased_locking_enter(Register lock_reg,
5137 5144 Register obj_reg,
5138 5145 Register swap_reg,
5139 5146 Register tmp_reg,
5140 5147 bool swap_reg_contains_mark,
5141 5148 Label& done,
5142 5149 Label* slow_case,
5143 5150 BiasedLockingCounters* counters) {
5144 5151 assert(UseBiasedLocking, "why call this otherwise?");
5145 5152 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
5146 5153 assert(tmp_reg != noreg, "tmp_reg must be supplied");
5147 5154 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
5148 5155 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
5149 5156 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
5150 5157 Address saved_mark_addr(lock_reg, 0);
5151 5158
5152 5159 if (PrintBiasedLockingStatistics && counters == NULL)
5153 5160 counters = BiasedLocking::counters();
5154 5161
5155 5162 // Biased locking
5156 5163 // See whether the lock is currently biased toward our thread and
5157 5164 // whether the epoch is still valid
5158 5165 // Note that the runtime guarantees sufficient alignment of JavaThread
5159 5166 // pointers to allow age to be placed into low bits
5160 5167 // First check to see whether biasing is even enabled for this object
5161 5168 Label cas_label;
5162 5169 int null_check_offset = -1;
5163 5170 if (!swap_reg_contains_mark) {
5164 5171 null_check_offset = offset();
5165 5172 movq(swap_reg, mark_addr);
5166 5173 }
5167 5174 movq(tmp_reg, swap_reg);
5168 5175 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5169 5176 cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
5170 5177 jcc(Assembler::notEqual, cas_label);
5171 5178 // The bias pattern is present in the object's header. Need to check
5172 5179 // whether the bias owner and the epoch are both still current.
5173 5180 load_prototype_header(tmp_reg, obj_reg);
5174 5181 orq(tmp_reg, r15_thread);
5175 5182 xorq(tmp_reg, swap_reg);
5176 5183 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
5177 5184 if (counters != NULL) {
5178 5185 cond_inc32(Assembler::zero,
5179 5186 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5180 5187 }
5181 5188 jcc(Assembler::equal, done);
5182 5189
5183 5190 Label try_revoke_bias;
5184 5191 Label try_rebias;
5185 5192
5186 5193 // At this point we know that the header has the bias pattern and
5187 5194 // that we are not the bias owner in the current epoch. We need to
5188 5195 // figure out more details about the state of the header in order to
5189 5196 // know what operations can be legally performed on the object's
5190 5197 // header.
5191 5198
5192 5199 // If the low three bits in the xor result aren't clear, that means
5193 5200 // the prototype header is no longer biased and we have to revoke
5194 5201 // the bias on this object.
5195 5202 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
5196 5203 jcc(Assembler::notZero, try_revoke_bias);
5197 5204
5198 5205 // Biasing is still enabled for this data type. See whether the
5199 5206 // epoch of the current bias is still valid, meaning that the epoch
5200 5207 // bits of the mark word are equal to the epoch bits of the
5201 5208 // prototype header. (Note that the prototype header's epoch bits
5202 5209 // only change at a safepoint.) If not, attempt to rebias the object
5203 5210 // toward the current thread. Note that we must be absolutely sure
5204 5211 // that the current epoch is invalid in order to do this because
5205 5212 // otherwise the manipulations it performs on the mark word are
5206 5213 // illegal.
5207 5214 testq(tmp_reg, markOopDesc::epoch_mask_in_place);
5208 5215 jcc(Assembler::notZero, try_rebias);
5209 5216
5210 5217 // The epoch of the current bias is still valid but we know nothing
5211 5218 // about the owner; it might be set or it might be clear. Try to
5212 5219 // acquire the bias of the object using an atomic operation. If this
5213 5220 // fails we will go in to the runtime to revoke the object's bias.
5214 5221 // Note that we first construct the presumed unbiased header so we
5215 5222 // don't accidentally blow away another thread's valid bias.
5216 5223 andq(swap_reg,
5217 5224 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
5218 5225 movq(tmp_reg, swap_reg);
5219 5226 orq(tmp_reg, r15_thread);
5220 5227 if (os::is_MP()) {
5221 5228 lock();
5222 5229 }
5223 5230 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5224 5231 // If the biasing toward our thread failed, this means that
5225 5232 // another thread succeeded in biasing it toward itself and we
5226 5233 // need to revoke that bias. The revocation will occur in the
5227 5234 // interpreter runtime in the slow case.
5228 5235 if (counters != NULL) {
5229 5236 cond_inc32(Assembler::zero,
5230 5237 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
5231 5238 }
5232 5239 if (slow_case != NULL) {
5233 5240 jcc(Assembler::notZero, *slow_case);
5234 5241 }
5235 5242 jmp(done);
5236 5243
5237 5244 bind(try_rebias);
5238 5245 // At this point we know the epoch has expired, meaning that the
5239 5246 // current "bias owner", if any, is actually invalid. Under these
5240 5247 // circumstances _only_, we are allowed to use the current header's
5241 5248 // value as the comparison value when doing the cas to acquire the
5242 5249 // bias in the current epoch. In other words, we allow transfer of
5243 5250 // the bias from one thread to another directly in this situation.
5244 5251 //
5245 5252 // FIXME: due to a lack of registers we currently blow away the age
5246 5253 // bits in this situation. Should attempt to preserve them.
5247 5254 load_prototype_header(tmp_reg, obj_reg);
5248 5255 orq(tmp_reg, r15_thread);
5249 5256 if (os::is_MP()) {
5250 5257 lock();
5251 5258 }
5252 5259 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5253 5260 // If the biasing toward our thread failed, then another thread
5254 5261 // succeeded in biasing it toward itself and we need to revoke that
5255 5262 // bias. The revocation will occur in the runtime in the slow case.
5256 5263 if (counters != NULL) {
5257 5264 cond_inc32(Assembler::zero,
5258 5265 ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
5259 5266 }
5260 5267 if (slow_case != NULL) {
5261 5268 jcc(Assembler::notZero, *slow_case);
5262 5269 }
5263 5270 jmp(done);
5264 5271
5265 5272 bind(try_revoke_bias);
5266 5273 // The prototype mark in the klass doesn't have the bias bit set any
5267 5274 // more, indicating that objects of this data type are not supposed
5268 5275 // to be biased any more. We are going to try to reset the mark of
5269 5276 // this object to the prototype value and fall through to the
5270 5277 // CAS-based locking scheme. Note that if our CAS fails, it means
5271 5278 // that another thread raced us for the privilege of revoking the
5272 5279 // bias of this particular object, so it's okay to continue in the
5273 5280 // normal locking code.
5274 5281 //
5275 5282 // FIXME: due to a lack of registers we currently blow away the age
5276 5283 // bits in this situation. Should attempt to preserve them.
5277 5284 load_prototype_header(tmp_reg, obj_reg);
5278 5285 if (os::is_MP()) {
5279 5286 lock();
5280 5287 }
5281 5288 cmpxchgq(tmp_reg, Address(obj_reg, 0));
5282 5289 // Fall through to the normal CAS-based lock, because no matter what
5283 5290 // the result of the above CAS, some thread must have succeeded in
5284 5291 // removing the bias bit from the object's header.
5285 5292 if (counters != NULL) {
5286 5293 cond_inc32(Assembler::zero,
5287 5294 ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
5288 5295 }
5289 5296
5290 5297 bind(cas_label);
5291 5298
5292 5299 return null_check_offset;
5293 5300 }
5294 5301
5295 5302 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
5296 5303 Label L, E;
5297 5304
5298 5305 #ifdef _WIN64
5299 5306 // Windows always allocates space for it's register args
5300 5307 assert(num_args <= 4, "only register arguments supported");
5301 5308 subq(rsp, frame::arg_reg_save_area_bytes);
5302 5309 #endif
5303 5310
5304 5311 // Align stack if necessary
5305 5312 testl(rsp, 15);
5306 5313 jcc(Assembler::zero, L);
5307 5314
5308 5315 subq(rsp, 8);
5309 5316 {
5310 5317 call(RuntimeAddress(entry_point));
5311 5318 }
5312 5319 addq(rsp, 8);
5313 5320 jmp(E);
5314 5321
5315 5322 bind(L);
5316 5323 {
5317 5324 call(RuntimeAddress(entry_point));
5318 5325 }
5319 5326
5320 5327 bind(E);
5321 5328
5322 5329 #ifdef _WIN64
5323 5330 // restore stack pointer
5324 5331 addq(rsp, frame::arg_reg_save_area_bytes);
5325 5332 #endif
5326 5333
5327 5334 }
5328 5335
5329 5336 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
5330 5337 assert(!src2.is_lval(), "should use cmpptr");
5331 5338
5332 5339 if (reachable(src2)) {
5333 5340 cmpq(src1, as_Address(src2));
5334 5341 } else {
5335 5342 lea(rscratch1, src2);
5336 5343 Assembler::cmpq(src1, Address(rscratch1, 0));
5337 5344 }
5338 5345 }
5339 5346
5340 5347 int MacroAssembler::corrected_idivq(Register reg) {
5341 5348 // Full implementation of Java ldiv and lrem; checks for special
5342 5349 // case as described in JVM spec., p.243 & p.271. The function
5343 5350 // returns the (pc) offset of the idivl instruction - may be needed
5344 5351 // for implicit exceptions.
5345 5352 //
5346 5353 // normal case special case
5347 5354 //
5348 5355 // input : rax: dividend min_long
5349 5356 // reg: divisor (may not be eax/edx) -1
5350 5357 //
5351 5358 // output: rax: quotient (= rax idiv reg) min_long
5352 5359 // rdx: remainder (= rax irem reg) 0
5353 5360 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
5354 5361 static const int64_t min_long = 0x8000000000000000;
5355 5362 Label normal_case, special_case;
5356 5363
5357 5364 // check for special case
5358 5365 cmp64(rax, ExternalAddress((address) &min_long));
5359 5366 jcc(Assembler::notEqual, normal_case);
5360 5367 xorl(rdx, rdx); // prepare rdx for possible special case (where
5361 5368 // remainder = 0)
5362 5369 cmpq(reg, -1);
5363 5370 jcc(Assembler::equal, special_case);
5364 5371
5365 5372 // handle normal case
5366 5373 bind(normal_case);
5367 5374 cdqq();
5368 5375 int idivq_offset = offset();
5369 5376 idivq(reg);
5370 5377
5371 5378 // normal and special case exit
5372 5379 bind(special_case);
5373 5380
5374 5381 return idivq_offset;
5375 5382 }
5376 5383
5377 5384 void MacroAssembler::decrementq(Register reg, int value) {
5378 5385 if (value == min_jint) { subq(reg, value); return; }
5379 5386 if (value < 0) { incrementq(reg, -value); return; }
5380 5387 if (value == 0) { ; return; }
5381 5388 if (value == 1 && UseIncDec) { decq(reg) ; return; }
5382 5389 /* else */ { subq(reg, value) ; return; }
5383 5390 }
5384 5391
5385 5392 void MacroAssembler::decrementq(Address dst, int value) {
5386 5393 if (value == min_jint) { subq(dst, value); return; }
5387 5394 if (value < 0) { incrementq(dst, -value); return; }
5388 5395 if (value == 0) { ; return; }
5389 5396 if (value == 1 && UseIncDec) { decq(dst) ; return; }
5390 5397 /* else */ { subq(dst, value) ; return; }
5391 5398 }
5392 5399
5393 5400 void MacroAssembler::fat_nop() {
5394 5401 // A 5 byte nop that is safe for patching (see patch_verified_entry)
5395 5402 // Recommened sequence from 'Software Optimization Guide for the AMD
5396 5403 // Hammer Processor'
5397 5404 emit_byte(0x66);
5398 5405 emit_byte(0x66);
5399 5406 emit_byte(0x90);
5400 5407 emit_byte(0x66);
5401 5408 emit_byte(0x90);
5402 5409 }
5403 5410
5404 5411 void MacroAssembler::incrementq(Register reg, int value) {
5405 5412 if (value == min_jint) { addq(reg, value); return; }
5406 5413 if (value < 0) { decrementq(reg, -value); return; }
5407 5414 if (value == 0) { ; return; }
5408 5415 if (value == 1 && UseIncDec) { incq(reg) ; return; }
5409 5416 /* else */ { addq(reg, value) ; return; }
5410 5417 }
5411 5418
5412 5419 void MacroAssembler::incrementq(Address dst, int value) {
5413 5420 if (value == min_jint) { addq(dst, value); return; }
5414 5421 if (value < 0) { decrementq(dst, -value); return; }
5415 5422 if (value == 0) { ; return; }
5416 5423 if (value == 1 && UseIncDec) { incq(dst) ; return; }
5417 5424 /* else */ { addq(dst, value) ; return; }
5418 5425 }
5419 5426
5420 5427 // 32bit can do a case table jump in one instruction but we no longer allow the base
5421 5428 // to be installed in the Address class
5422 5429 void MacroAssembler::jump(ArrayAddress entry) {
5423 5430 lea(rscratch1, entry.base());
5424 5431 Address dispatch = entry.index();
5425 5432 assert(dispatch._base == noreg, "must be");
5426 5433 dispatch._base = rscratch1;
5427 5434 jmp(dispatch);
5428 5435 }
5429 5436
5430 5437 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
5431 5438 ShouldNotReachHere(); // 64bit doesn't use two regs
5432 5439 cmpq(x_lo, y_lo);
5433 5440 }
5434 5441
5435 5442 void MacroAssembler::lea(Register dst, AddressLiteral src) {
5436 5443 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5437 5444 }
5438 5445
5439 5446 void MacroAssembler::lea(Address dst, AddressLiteral adr) {
5440 5447 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
5441 5448 movptr(dst, rscratch1);
5442 5449 }
5443 5450
5444 5451 void MacroAssembler::leave() {
5445 5452 // %%% is this really better? Why not on 32bit too?
5446 5453 emit_byte(0xC9); // LEAVE
5447 5454 }
5448 5455
5449 5456 void MacroAssembler::lneg(Register hi, Register lo) {
5450 5457 ShouldNotReachHere(); // 64bit doesn't use two regs
5451 5458 negq(lo);
5452 5459 }
5453 5460
5454 5461 void MacroAssembler::movoop(Register dst, jobject obj) {
5455 5462 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5456 5463 }
5457 5464
5458 5465 void MacroAssembler::movoop(Address dst, jobject obj) {
5459 5466 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
5460 5467 movq(dst, rscratch1);
5461 5468 }
5462 5469
5463 5470 void MacroAssembler::movptr(Register dst, AddressLiteral src) {
5464 5471 if (src.is_lval()) {
5465 5472 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
5466 5473 } else {
5467 5474 if (reachable(src)) {
5468 5475 movq(dst, as_Address(src));
5469 5476 } else {
5470 5477 lea(rscratch1, src);
5471 5478 movq(dst, Address(rscratch1,0));
5472 5479 }
5473 5480 }
5474 5481 }
5475 5482
5476 5483 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
5477 5484 movq(as_Address(dst), src);
5478 5485 }
5479 5486
5480 5487 void MacroAssembler::movptr(Register dst, ArrayAddress src) {
5481 5488 movq(dst, as_Address(src));
5482 5489 }
5483 5490
5484 5491 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
5485 5492 void MacroAssembler::movptr(Address dst, intptr_t src) {
5486 5493 mov64(rscratch1, src);
5487 5494 movq(dst, rscratch1);
5488 5495 }
5489 5496
5490 5497 // These are mostly for initializing NULL
5491 5498 void MacroAssembler::movptr(Address dst, int32_t src) {
5492 5499 movslq(dst, src);
5493 5500 }
5494 5501
5495 5502 void MacroAssembler::movptr(Register dst, int32_t src) {
5496 5503 mov64(dst, (intptr_t)src);
5497 5504 }
5498 5505
5499 5506 void MacroAssembler::pushoop(jobject obj) {
5500 5507 movoop(rscratch1, obj);
5501 5508 push(rscratch1);
5502 5509 }
5503 5510
5504 5511 void MacroAssembler::pushptr(AddressLiteral src) {
5505 5512 lea(rscratch1, src);
5506 5513 if (src.is_lval()) {
5507 5514 push(rscratch1);
5508 5515 } else {
5509 5516 pushq(Address(rscratch1, 0));
5510 5517 }
5511 5518 }
5512 5519
5513 5520 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
5514 5521 bool clear_pc) {
5515 5522 // we must set sp to zero to clear frame
5516 5523 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
5517 5524 // must clear fp, so that compiled frames are not confused; it is
5518 5525 // possible that we need it only for debugging
5519 5526 if (clear_fp) {
5520 5527 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
5521 5528 }
5522 5529
5523 5530 if (clear_pc) {
5524 5531 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
5525 5532 }
5526 5533 }
5527 5534
5528 5535 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
5529 5536 Register last_java_fp,
5530 5537 address last_java_pc) {
5531 5538 // determine last_java_sp register
5532 5539 if (!last_java_sp->is_valid()) {
5533 5540 last_java_sp = rsp;
5534 5541 }
5535 5542
5536 5543 // last_java_fp is optional
5537 5544 if (last_java_fp->is_valid()) {
5538 5545 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
5539 5546 last_java_fp);
5540 5547 }
5541 5548
5542 5549 // last_java_pc is optional
5543 5550 if (last_java_pc != NULL) {
5544 5551 Address java_pc(r15_thread,
5545 5552 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
5546 5553 lea(rscratch1, InternalAddress(last_java_pc));
5547 5554 movptr(java_pc, rscratch1);
5548 5555 }
5549 5556
5550 5557 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
5551 5558 }
5552 5559
5553 5560 static void pass_arg0(MacroAssembler* masm, Register arg) {
5554 5561 if (c_rarg0 != arg ) {
5555 5562 masm->mov(c_rarg0, arg);
5556 5563 }
5557 5564 }
5558 5565
5559 5566 static void pass_arg1(MacroAssembler* masm, Register arg) {
5560 5567 if (c_rarg1 != arg ) {
5561 5568 masm->mov(c_rarg1, arg);
5562 5569 }
5563 5570 }
5564 5571
5565 5572 static void pass_arg2(MacroAssembler* masm, Register arg) {
5566 5573 if (c_rarg2 != arg ) {
5567 5574 masm->mov(c_rarg2, arg);
5568 5575 }
5569 5576 }
5570 5577
5571 5578 static void pass_arg3(MacroAssembler* masm, Register arg) {
5572 5579 if (c_rarg3 != arg ) {
5573 5580 masm->mov(c_rarg3, arg);
5574 5581 }
5575 5582 }
5576 5583
5577 5584 void MacroAssembler::stop(const char* msg) {
5578 5585 address rip = pc();
5579 5586 pusha(); // get regs on stack
5580 5587 lea(c_rarg0, ExternalAddress((address) msg));
5581 5588 lea(c_rarg1, InternalAddress(rip));
5582 5589 movq(c_rarg2, rsp); // pass pointer to regs array
5583 5590 andq(rsp, -16); // align stack as required by ABI
5584 5591 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
5585 5592 hlt();
5586 5593 }
5587 5594
5588 5595 void MacroAssembler::warn(const char* msg) {
5589 5596 push(rsp);
5590 5597 andq(rsp, -16); // align stack as required by push_CPU_state and call
5591 5598
5592 5599 push_CPU_state(); // keeps alignment at 16 bytes
5593 5600 lea(c_rarg0, ExternalAddress((address) msg));
5594 5601 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
5595 5602 pop_CPU_state();
5596 5603 pop(rsp);
5597 5604 }
5598 5605
5599 5606 #ifndef PRODUCT
5600 5607 extern "C" void findpc(intptr_t x);
5601 5608 #endif
5602 5609
5603 5610 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
5604 5611 // In order to get locks to work, we need to fake a in_VM state
5605 5612 if (ShowMessageBoxOnError ) {
5606 5613 JavaThread* thread = JavaThread::current();
5607 5614 JavaThreadState saved_state = thread->thread_state();
5608 5615 thread->set_thread_state(_thread_in_vm);
5609 5616 #ifndef PRODUCT
5610 5617 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
5611 5618 ttyLocker ttyl;
5612 5619 BytecodeCounter::print();
5613 5620 }
5614 5621 #endif
5615 5622 // To see where a verify_oop failed, get $ebx+40/X for this frame.
5616 5623 // XXX correct this offset for amd64
5617 5624 // This is the value of eip which points to where verify_oop will return.
5618 5625 if (os::message_box(msg, "Execution stopped, print registers?")) {
5619 5626 ttyLocker ttyl;
5620 5627 tty->print_cr("rip = 0x%016lx", pc);
5621 5628 #ifndef PRODUCT
5622 5629 tty->cr();
5623 5630 findpc(pc);
5624 5631 tty->cr();
5625 5632 #endif
5626 5633 tty->print_cr("rax = 0x%016lx", regs[15]);
5627 5634 tty->print_cr("rbx = 0x%016lx", regs[12]);
5628 5635 tty->print_cr("rcx = 0x%016lx", regs[14]);
5629 5636 tty->print_cr("rdx = 0x%016lx", regs[13]);
5630 5637 tty->print_cr("rdi = 0x%016lx", regs[8]);
5631 5638 tty->print_cr("rsi = 0x%016lx", regs[9]);
5632 5639 tty->print_cr("rbp = 0x%016lx", regs[10]);
5633 5640 tty->print_cr("rsp = 0x%016lx", regs[11]);
5634 5641 tty->print_cr("r8 = 0x%016lx", regs[7]);
5635 5642 tty->print_cr("r9 = 0x%016lx", regs[6]);
5636 5643 tty->print_cr("r10 = 0x%016lx", regs[5]);
5637 5644 tty->print_cr("r11 = 0x%016lx", regs[4]);
5638 5645 tty->print_cr("r12 = 0x%016lx", regs[3]);
5639 5646 tty->print_cr("r13 = 0x%016lx", regs[2]);
5640 5647 tty->print_cr("r14 = 0x%016lx", regs[1]);
5641 5648 tty->print_cr("r15 = 0x%016lx", regs[0]);
5642 5649 BREAKPOINT;
5643 5650 }
5644 5651 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
5645 5652 } else {
5646 5653 ttyLocker ttyl;
5647 5654 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
5648 5655 msg);
5649 5656 }
5650 5657 }
5651 5658
5652 5659 #endif // _LP64
5653 5660
5654 5661 // Now versions that are common to 32/64 bit
5655 5662
5656 5663 void MacroAssembler::addptr(Register dst, int32_t imm32) {
5657 5664 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
5658 5665 }
5659 5666
5660 5667 void MacroAssembler::addptr(Register dst, Register src) {
5661 5668 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5662 5669 }
5663 5670
5664 5671 void MacroAssembler::addptr(Address dst, Register src) {
5665 5672 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
5666 5673 }
5667 5674
5668 5675 void MacroAssembler::align(int modulus) {
5669 5676 if (offset() % modulus != 0) {
5670 5677 nop(modulus - (offset() % modulus));
5671 5678 }
5672 5679 }
5673 5680
5674 5681 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
5675 5682 if (reachable(src)) {
5676 5683 andpd(dst, as_Address(src));
5677 5684 } else {
5678 5685 lea(rscratch1, src);
5679 5686 andpd(dst, Address(rscratch1, 0));
5680 5687 }
5681 5688 }
5682 5689
5683 5690 void MacroAssembler::andptr(Register dst, int32_t imm32) {
5684 5691 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
5685 5692 }
5686 5693
5687 5694 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
5688 5695 pushf();
5689 5696 if (os::is_MP())
5690 5697 lock();
5691 5698 incrementl(counter_addr);
5692 5699 popf();
5693 5700 }
5694 5701
5695 5702 // Writes to stack successive pages until offset reached to check for
5696 5703 // stack overflow + shadow pages. This clobbers tmp.
5697 5704 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5698 5705 movptr(tmp, rsp);
5699 5706 // Bang stack for total size given plus shadow page size.
5700 5707 // Bang one page at a time because large size can bang beyond yellow and
5701 5708 // red zones.
5702 5709 Label loop;
5703 5710 bind(loop);
5704 5711 movl(Address(tmp, (-os::vm_page_size())), size );
5705 5712 subptr(tmp, os::vm_page_size());
5706 5713 subl(size, os::vm_page_size());
5707 5714 jcc(Assembler::greater, loop);
5708 5715
5709 5716 // Bang down shadow pages too.
5710 5717 // The -1 because we already subtracted 1 page.
5711 5718 for (int i = 0; i< StackShadowPages-1; i++) {
5712 5719 // this could be any sized move but this is can be a debugging crumb
5713 5720 // so the bigger the better.
5714 5721 movptr(Address(tmp, (-i*os::vm_page_size())), size );
5715 5722 }
5716 5723 }
5717 5724
5718 5725 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
5719 5726 assert(UseBiasedLocking, "why call this otherwise?");
5720 5727
5721 5728 // Check for biased locking unlock case, which is a no-op
5722 5729 // Note: we do not have to check the thread ID for two reasons.
5723 5730 // First, the interpreter checks for IllegalMonitorStateException at
5724 5731 // a higher level. Second, if the bias was revoked while we held the
5725 5732 // lock, the object could not be rebiased toward another thread, so
5726 5733 // the bias bit would be clear.
5727 5734 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
5728 5735 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
5729 5736 cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
5730 5737 jcc(Assembler::equal, done);
5731 5738 }
5732 5739
5733 5740 void MacroAssembler::c2bool(Register x) {
5734 5741 // implements x == 0 ? 0 : 1
5735 5742 // note: must only look at least-significant byte of x
5736 5743 // since C-style booleans are stored in one byte
5737 5744 // only! (was bug)
5738 5745 andl(x, 0xFF);
5739 5746 setb(Assembler::notZero, x);
5740 5747 }
5741 5748
5742 5749 // Wouldn't need if AddressLiteral version had new name
5743 5750 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
5744 5751 Assembler::call(L, rtype);
5745 5752 }
5746 5753
5747 5754 void MacroAssembler::call(Register entry) {
5748 5755 Assembler::call(entry);
5749 5756 }
5750 5757
5751 5758 void MacroAssembler::call(AddressLiteral entry) {
5752 5759 if (reachable(entry)) {
5753 5760 Assembler::call_literal(entry.target(), entry.rspec());
5754 5761 } else {
5755 5762 lea(rscratch1, entry);
5756 5763 Assembler::call(rscratch1);
5757 5764 }
5758 5765 }
5759 5766
5760 5767 // Implementation of call_VM versions
5761 5768
5762 5769 void MacroAssembler::call_VM(Register oop_result,
5763 5770 address entry_point,
5764 5771 bool check_exceptions) {
5765 5772 Label C, E;
5766 5773 call(C, relocInfo::none);
5767 5774 jmp(E);
5768 5775
5769 5776 bind(C);
5770 5777 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
5771 5778 ret(0);
5772 5779
5773 5780 bind(E);
5774 5781 }
5775 5782
5776 5783 void MacroAssembler::call_VM(Register oop_result,
5777 5784 address entry_point,
5778 5785 Register arg_1,
5779 5786 bool check_exceptions) {
5780 5787 Label C, E;
5781 5788 call(C, relocInfo::none);
5782 5789 jmp(E);
5783 5790
5784 5791 bind(C);
5785 5792 pass_arg1(this, arg_1);
5786 5793 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
5787 5794 ret(0);
5788 5795
5789 5796 bind(E);
5790 5797 }
5791 5798
5792 5799 void MacroAssembler::call_VM(Register oop_result,
5793 5800 address entry_point,
5794 5801 Register arg_1,
5795 5802 Register arg_2,
5796 5803 bool check_exceptions) {
5797 5804 Label C, E;
5798 5805 call(C, relocInfo::none);
5799 5806 jmp(E);
5800 5807
5801 5808 bind(C);
5802 5809
5803 5810 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5804 5811
5805 5812 pass_arg2(this, arg_2);
5806 5813 pass_arg1(this, arg_1);
5807 5814 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
5808 5815 ret(0);
5809 5816
5810 5817 bind(E);
5811 5818 }
5812 5819
5813 5820 void MacroAssembler::call_VM(Register oop_result,
5814 5821 address entry_point,
5815 5822 Register arg_1,
5816 5823 Register arg_2,
5817 5824 Register arg_3,
5818 5825 bool check_exceptions) {
5819 5826 Label C, E;
5820 5827 call(C, relocInfo::none);
5821 5828 jmp(E);
5822 5829
5823 5830 bind(C);
5824 5831
5825 5832 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5826 5833 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5827 5834 pass_arg3(this, arg_3);
5828 5835
5829 5836 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5830 5837 pass_arg2(this, arg_2);
5831 5838
5832 5839 pass_arg1(this, arg_1);
5833 5840 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
5834 5841 ret(0);
5835 5842
5836 5843 bind(E);
5837 5844 }
5838 5845
5839 5846 void MacroAssembler::call_VM(Register oop_result,
5840 5847 Register last_java_sp,
5841 5848 address entry_point,
5842 5849 int number_of_arguments,
5843 5850 bool check_exceptions) {
5844 5851 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
5845 5852 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
5846 5853 }
5847 5854
5848 5855 void MacroAssembler::call_VM(Register oop_result,
5849 5856 Register last_java_sp,
5850 5857 address entry_point,
5851 5858 Register arg_1,
5852 5859 bool check_exceptions) {
5853 5860 pass_arg1(this, arg_1);
5854 5861 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
5855 5862 }
5856 5863
5857 5864 void MacroAssembler::call_VM(Register oop_result,
5858 5865 Register last_java_sp,
5859 5866 address entry_point,
5860 5867 Register arg_1,
5861 5868 Register arg_2,
5862 5869 bool check_exceptions) {
5863 5870
5864 5871 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5865 5872 pass_arg2(this, arg_2);
5866 5873 pass_arg1(this, arg_1);
5867 5874 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
5868 5875 }
5869 5876
5870 5877 void MacroAssembler::call_VM(Register oop_result,
5871 5878 Register last_java_sp,
5872 5879 address entry_point,
5873 5880 Register arg_1,
5874 5881 Register arg_2,
5875 5882 Register arg_3,
5876 5883 bool check_exceptions) {
5877 5884 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
5878 5885 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
5879 5886 pass_arg3(this, arg_3);
5880 5887 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
5881 5888 pass_arg2(this, arg_2);
5882 5889 pass_arg1(this, arg_1);
5883 5890 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
5884 5891 }
5885 5892
5886 5893 void MacroAssembler::call_VM_base(Register oop_result,
5887 5894 Register java_thread,
5888 5895 Register last_java_sp,
5889 5896 address entry_point,
5890 5897 int number_of_arguments,
5891 5898 bool check_exceptions) {
5892 5899 // determine java_thread register
5893 5900 if (!java_thread->is_valid()) {
5894 5901 #ifdef _LP64
5895 5902 java_thread = r15_thread;
5896 5903 #else
5897 5904 java_thread = rdi;
5898 5905 get_thread(java_thread);
5899 5906 #endif // LP64
5900 5907 }
5901 5908 // determine last_java_sp register
5902 5909 if (!last_java_sp->is_valid()) {
5903 5910 last_java_sp = rsp;
5904 5911 }
5905 5912 // debugging support
5906 5913 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
5907 5914 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
5908 5915 #ifdef ASSERT
5909 5916 LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");)
5910 5917 #endif // ASSERT
5911 5918
5912 5919 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
5913 5920 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
5914 5921
5915 5922 // push java thread (becomes first argument of C function)
5916 5923
5917 5924 NOT_LP64(push(java_thread); number_of_arguments++);
5918 5925 LP64_ONLY(mov(c_rarg0, r15_thread));
5919 5926
5920 5927 // set last Java frame before call
5921 5928 assert(last_java_sp != rbp, "can't use ebp/rbp");
5922 5929
5923 5930 // Only interpreter should have to set fp
5924 5931 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
5925 5932
5926 5933 // do the call, remove parameters
5927 5934 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
5928 5935
5929 5936 // restore the thread (cannot use the pushed argument since arguments
5930 5937 // may be overwritten by C code generated by an optimizing compiler);
5931 5938 // however can use the register value directly if it is callee saved.
5932 5939 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
5933 5940 // rdi & rsi (also r15) are callee saved -> nothing to do
5934 5941 #ifdef ASSERT
5935 5942 guarantee(java_thread != rax, "change this code");
5936 5943 push(rax);
5937 5944 { Label L;
5938 5945 get_thread(rax);
5939 5946 cmpptr(java_thread, rax);
5940 5947 jcc(Assembler::equal, L);
5941 5948 stop("MacroAssembler::call_VM_base: rdi not callee saved?");
5942 5949 bind(L);
5943 5950 }
5944 5951 pop(rax);
5945 5952 #endif
5946 5953 } else {
5947 5954 get_thread(java_thread);
5948 5955 }
5949 5956 // reset last Java frame
5950 5957 // Only interpreter should have to clear fp
5951 5958 reset_last_Java_frame(java_thread, true, false);
5952 5959
5953 5960 #ifndef CC_INTERP
5954 5961 // C++ interp handles this in the interpreter
5955 5962 check_and_handle_popframe(java_thread);
5956 5963 check_and_handle_earlyret(java_thread);
5957 5964 #endif /* CC_INTERP */
5958 5965
5959 5966 if (check_exceptions) {
5960 5967 // check for pending exceptions (java_thread is set upon return)
5961 5968 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
5962 5969 #ifndef _LP64
5963 5970 jump_cc(Assembler::notEqual,
5964 5971 RuntimeAddress(StubRoutines::forward_exception_entry()));
5965 5972 #else
5966 5973 // This used to conditionally jump to forward_exception however it is
5967 5974 // possible if we relocate that the branch will not reach. So we must jump
5968 5975 // around so we can always reach
5969 5976
5970 5977 Label ok;
5971 5978 jcc(Assembler::equal, ok);
5972 5979 jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
5973 5980 bind(ok);
5974 5981 #endif // LP64
5975 5982 }
5976 5983
5977 5984 // get oop result if there is one and reset the value in the thread
5978 5985 if (oop_result->is_valid()) {
5979 5986 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
5980 5987 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
5981 5988 verify_oop(oop_result, "broken oop in call_VM_base");
5982 5989 }
5983 5990 }
5984 5991
5985 5992 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
5986 5993
5987 5994 // Calculate the value for last_Java_sp
5988 5995 // somewhat subtle. call_VM does an intermediate call
5989 5996 // which places a return address on the stack just under the
5990 5997 // stack pointer as the user finsihed with it. This allows
5991 5998 // use to retrieve last_Java_pc from last_Java_sp[-1].
5992 5999 // On 32bit we then have to push additional args on the stack to accomplish
5993 6000 // the actual requested call. On 64bit call_VM only can use register args
5994 6001 // so the only extra space is the return address that call_VM created.
5995 6002 // This hopefully explains the calculations here.
5996 6003
5997 6004 #ifdef _LP64
5998 6005 // We've pushed one address, correct last_Java_sp
5999 6006 lea(rax, Address(rsp, wordSize));
6000 6007 #else
6001 6008 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
6002 6009 #endif // LP64
6003 6010
6004 6011 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
6005 6012
6006 6013 }
6007 6014
6008 6015 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
6009 6016 call_VM_leaf_base(entry_point, number_of_arguments);
6010 6017 }
6011 6018
6012 6019 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
6013 6020 pass_arg0(this, arg_0);
6014 6021 call_VM_leaf(entry_point, 1);
6015 6022 }
6016 6023
6017 6024 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
6018 6025
6019 6026 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6020 6027 pass_arg1(this, arg_1);
6021 6028 pass_arg0(this, arg_0);
6022 6029 call_VM_leaf(entry_point, 2);
6023 6030 }
6024 6031
6025 6032 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
6026 6033 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
6027 6034 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
6028 6035 pass_arg2(this, arg_2);
6029 6036 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
6030 6037 pass_arg1(this, arg_1);
6031 6038 pass_arg0(this, arg_0);
6032 6039 call_VM_leaf(entry_point, 3);
6033 6040 }
6034 6041
6035 6042 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
6036 6043 }
6037 6044
6038 6045 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
6039 6046 }
6040 6047
6041 6048 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
6042 6049 if (reachable(src1)) {
6043 6050 cmpl(as_Address(src1), imm);
6044 6051 } else {
6045 6052 lea(rscratch1, src1);
6046 6053 cmpl(Address(rscratch1, 0), imm);
6047 6054 }
6048 6055 }
6049 6056
6050 6057 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
6051 6058 assert(!src2.is_lval(), "use cmpptr");
6052 6059 if (reachable(src2)) {
6053 6060 cmpl(src1, as_Address(src2));
6054 6061 } else {
6055 6062 lea(rscratch1, src2);
6056 6063 cmpl(src1, Address(rscratch1, 0));
6057 6064 }
6058 6065 }
6059 6066
6060 6067 void MacroAssembler::cmp32(Register src1, int32_t imm) {
6061 6068 Assembler::cmpl(src1, imm);
6062 6069 }
6063 6070
6064 6071 void MacroAssembler::cmp32(Register src1, Address src2) {
6065 6072 Assembler::cmpl(src1, src2);
6066 6073 }
6067 6074
6068 6075 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6069 6076 ucomisd(opr1, opr2);
6070 6077
6071 6078 Label L;
6072 6079 if (unordered_is_less) {
6073 6080 movl(dst, -1);
6074 6081 jcc(Assembler::parity, L);
6075 6082 jcc(Assembler::below , L);
6076 6083 movl(dst, 0);
6077 6084 jcc(Assembler::equal , L);
6078 6085 increment(dst);
6079 6086 } else { // unordered is greater
6080 6087 movl(dst, 1);
6081 6088 jcc(Assembler::parity, L);
6082 6089 jcc(Assembler::above , L);
6083 6090 movl(dst, 0);
6084 6091 jcc(Assembler::equal , L);
6085 6092 decrementl(dst);
6086 6093 }
6087 6094 bind(L);
6088 6095 }
6089 6096
6090 6097 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
6091 6098 ucomiss(opr1, opr2);
6092 6099
6093 6100 Label L;
6094 6101 if (unordered_is_less) {
6095 6102 movl(dst, -1);
6096 6103 jcc(Assembler::parity, L);
6097 6104 jcc(Assembler::below , L);
6098 6105 movl(dst, 0);
6099 6106 jcc(Assembler::equal , L);
6100 6107 increment(dst);
6101 6108 } else { // unordered is greater
6102 6109 movl(dst, 1);
6103 6110 jcc(Assembler::parity, L);
6104 6111 jcc(Assembler::above , L);
6105 6112 movl(dst, 0);
6106 6113 jcc(Assembler::equal , L);
6107 6114 decrementl(dst);
6108 6115 }
6109 6116 bind(L);
6110 6117 }
6111 6118
6112 6119
6113 6120 void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
6114 6121 if (reachable(src1)) {
6115 6122 cmpb(as_Address(src1), imm);
6116 6123 } else {
6117 6124 lea(rscratch1, src1);
6118 6125 cmpb(Address(rscratch1, 0), imm);
6119 6126 }
6120 6127 }
6121 6128
6122 6129 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
6123 6130 #ifdef _LP64
6124 6131 if (src2.is_lval()) {
6125 6132 movptr(rscratch1, src2);
6126 6133 Assembler::cmpq(src1, rscratch1);
6127 6134 } else if (reachable(src2)) {
6128 6135 cmpq(src1, as_Address(src2));
6129 6136 } else {
6130 6137 lea(rscratch1, src2);
6131 6138 Assembler::cmpq(src1, Address(rscratch1, 0));
6132 6139 }
6133 6140 #else
6134 6141 if (src2.is_lval()) {
6135 6142 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6136 6143 } else {
6137 6144 cmpl(src1, as_Address(src2));
6138 6145 }
6139 6146 #endif // _LP64
6140 6147 }
6141 6148
6142 6149 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
6143 6150 assert(src2.is_lval(), "not a mem-mem compare");
6144 6151 #ifdef _LP64
6145 6152 // moves src2's literal address
6146 6153 movptr(rscratch1, src2);
6147 6154 Assembler::cmpq(src1, rscratch1);
6148 6155 #else
6149 6156 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
6150 6157 #endif // _LP64
6151 6158 }
6152 6159
6153 6160 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
6154 6161 if (reachable(adr)) {
6155 6162 if (os::is_MP())
6156 6163 lock();
6157 6164 cmpxchgptr(reg, as_Address(adr));
6158 6165 } else {
6159 6166 lea(rscratch1, adr);
6160 6167 if (os::is_MP())
6161 6168 lock();
6162 6169 cmpxchgptr(reg, Address(rscratch1, 0));
6163 6170 }
6164 6171 }
6165 6172
6166 6173 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
6167 6174 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
6168 6175 }
6169 6176
6170 6177 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
6171 6178 if (reachable(src)) {
6172 6179 comisd(dst, as_Address(src));
6173 6180 } else {
6174 6181 lea(rscratch1, src);
6175 6182 comisd(dst, Address(rscratch1, 0));
6176 6183 }
6177 6184 }
6178 6185
6179 6186 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
6180 6187 if (reachable(src)) {
6181 6188 comiss(dst, as_Address(src));
6182 6189 } else {
6183 6190 lea(rscratch1, src);
6184 6191 comiss(dst, Address(rscratch1, 0));
6185 6192 }
6186 6193 }
6187 6194
6188 6195
6189 6196 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
6190 6197 Condition negated_cond = negate_condition(cond);
6191 6198 Label L;
6192 6199 jcc(negated_cond, L);
6193 6200 atomic_incl(counter_addr);
6194 6201 bind(L);
6195 6202 }
6196 6203
6197 6204 int MacroAssembler::corrected_idivl(Register reg) {
6198 6205 // Full implementation of Java idiv and irem; checks for
6199 6206 // special case as described in JVM spec., p.243 & p.271.
6200 6207 // The function returns the (pc) offset of the idivl
6201 6208 // instruction - may be needed for implicit exceptions.
6202 6209 //
6203 6210 // normal case special case
6204 6211 //
6205 6212 // input : rax,: dividend min_int
6206 6213 // reg: divisor (may not be rax,/rdx) -1
6207 6214 //
6208 6215 // output: rax,: quotient (= rax, idiv reg) min_int
6209 6216 // rdx: remainder (= rax, irem reg) 0
6210 6217 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
6211 6218 const int min_int = 0x80000000;
6212 6219 Label normal_case, special_case;
6213 6220
6214 6221 // check for special case
6215 6222 cmpl(rax, min_int);
6216 6223 jcc(Assembler::notEqual, normal_case);
6217 6224 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
6218 6225 cmpl(reg, -1);
6219 6226 jcc(Assembler::equal, special_case);
6220 6227
6221 6228 // handle normal case
6222 6229 bind(normal_case);
6223 6230 cdql();
6224 6231 int idivl_offset = offset();
6225 6232 idivl(reg);
6226 6233
6227 6234 // normal and special case exit
6228 6235 bind(special_case);
6229 6236
6230 6237 return idivl_offset;
6231 6238 }
6232 6239
6233 6240
6234 6241
6235 6242 void MacroAssembler::decrementl(Register reg, int value) {
6236 6243 if (value == min_jint) {subl(reg, value) ; return; }
6237 6244 if (value < 0) { incrementl(reg, -value); return; }
6238 6245 if (value == 0) { ; return; }
6239 6246 if (value == 1 && UseIncDec) { decl(reg) ; return; }
6240 6247 /* else */ { subl(reg, value) ; return; }
6241 6248 }
6242 6249
6243 6250 void MacroAssembler::decrementl(Address dst, int value) {
6244 6251 if (value == min_jint) {subl(dst, value) ; return; }
6245 6252 if (value < 0) { incrementl(dst, -value); return; }
6246 6253 if (value == 0) { ; return; }
6247 6254 if (value == 1 && UseIncDec) { decl(dst) ; return; }
6248 6255 /* else */ { subl(dst, value) ; return; }
6249 6256 }
6250 6257
6251 6258 void MacroAssembler::division_with_shift (Register reg, int shift_value) {
6252 6259 assert (shift_value > 0, "illegal shift value");
6253 6260 Label _is_positive;
6254 6261 testl (reg, reg);
6255 6262 jcc (Assembler::positive, _is_positive);
6256 6263 int offset = (1 << shift_value) - 1 ;
6257 6264
6258 6265 if (offset == 1) {
6259 6266 incrementl(reg);
6260 6267 } else {
6261 6268 addl(reg, offset);
6262 6269 }
6263 6270
6264 6271 bind (_is_positive);
6265 6272 sarl(reg, shift_value);
6266 6273 }
6267 6274
6268 6275 // !defined(COMPILER2) is because of stupid core builds
6269 6276 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
6270 6277 void MacroAssembler::empty_FPU_stack() {
6271 6278 if (VM_Version::supports_mmx()) {
6272 6279 emms();
6273 6280 } else {
6274 6281 for (int i = 8; i-- > 0; ) ffree(i);
6275 6282 }
6276 6283 }
6277 6284 #endif // !LP64 || C1 || !C2
6278 6285
6279 6286
6280 6287 // Defines obj, preserves var_size_in_bytes
6281 6288 void MacroAssembler::eden_allocate(Register obj,
6282 6289 Register var_size_in_bytes,
6283 6290 int con_size_in_bytes,
6284 6291 Register t1,
6285 6292 Label& slow_case) {
6286 6293 assert(obj == rax, "obj must be in rax, for cmpxchg");
6287 6294 assert_different_registers(obj, var_size_in_bytes, t1);
6288 6295 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
6289 6296 jmp(slow_case);
6290 6297 } else {
6291 6298 Register end = t1;
6292 6299 Label retry;
6293 6300 bind(retry);
6294 6301 ExternalAddress heap_top((address) Universe::heap()->top_addr());
6295 6302 movptr(obj, heap_top);
6296 6303 if (var_size_in_bytes == noreg) {
6297 6304 lea(end, Address(obj, con_size_in_bytes));
6298 6305 } else {
6299 6306 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
6300 6307 }
6301 6308 // if end < obj then we wrapped around => object too long => slow case
6302 6309 cmpptr(end, obj);
6303 6310 jcc(Assembler::below, slow_case);
6304 6311 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
6305 6312 jcc(Assembler::above, slow_case);
6306 6313 // Compare obj with the top addr, and if still equal, store the new top addr in
6307 6314 // end at the address of the top addr pointer. Sets ZF if was equal, and clears
6308 6315 // it otherwise. Use lock prefix for atomicity on MPs.
6309 6316 locked_cmpxchgptr(end, heap_top);
6310 6317 jcc(Assembler::notEqual, retry);
6311 6318 }
6312 6319 }
6313 6320
6314 6321 void MacroAssembler::enter() {
6315 6322 push(rbp);
6316 6323 mov(rbp, rsp);
6317 6324 }
6318 6325
6319 6326 void MacroAssembler::fcmp(Register tmp) {
6320 6327 fcmp(tmp, 1, true, true);
6321 6328 }
6322 6329
6323 6330 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
6324 6331 assert(!pop_right || pop_left, "usage error");
6325 6332 if (VM_Version::supports_cmov()) {
6326 6333 assert(tmp == noreg, "unneeded temp");
6327 6334 if (pop_left) {
6328 6335 fucomip(index);
6329 6336 } else {
6330 6337 fucomi(index);
6331 6338 }
6332 6339 if (pop_right) {
6333 6340 fpop();
6334 6341 }
6335 6342 } else {
6336 6343 assert(tmp != noreg, "need temp");
6337 6344 if (pop_left) {
6338 6345 if (pop_right) {
6339 6346 fcompp();
6340 6347 } else {
6341 6348 fcomp(index);
6342 6349 }
6343 6350 } else {
6344 6351 fcom(index);
6345 6352 }
6346 6353 // convert FPU condition into eflags condition via rax,
6347 6354 save_rax(tmp);
6348 6355 fwait(); fnstsw_ax();
6349 6356 sahf();
6350 6357 restore_rax(tmp);
6351 6358 }
6352 6359 // condition codes set as follows:
6353 6360 //
6354 6361 // CF (corresponds to C0) if x < y
6355 6362 // PF (corresponds to C2) if unordered
6356 6363 // ZF (corresponds to C3) if x = y
6357 6364 }
6358 6365
6359 6366 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
6360 6367 fcmp2int(dst, unordered_is_less, 1, true, true);
6361 6368 }
6362 6369
6363 6370 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
6364 6371 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
6365 6372 Label L;
6366 6373 if (unordered_is_less) {
6367 6374 movl(dst, -1);
6368 6375 jcc(Assembler::parity, L);
6369 6376 jcc(Assembler::below , L);
6370 6377 movl(dst, 0);
6371 6378 jcc(Assembler::equal , L);
6372 6379 increment(dst);
6373 6380 } else { // unordered is greater
6374 6381 movl(dst, 1);
6375 6382 jcc(Assembler::parity, L);
6376 6383 jcc(Assembler::above , L);
6377 6384 movl(dst, 0);
6378 6385 jcc(Assembler::equal , L);
6379 6386 decrementl(dst);
6380 6387 }
6381 6388 bind(L);
6382 6389 }
6383 6390
6384 6391 void MacroAssembler::fld_d(AddressLiteral src) {
6385 6392 fld_d(as_Address(src));
6386 6393 }
6387 6394
6388 6395 void MacroAssembler::fld_s(AddressLiteral src) {
6389 6396 fld_s(as_Address(src));
6390 6397 }
6391 6398
6392 6399 void MacroAssembler::fld_x(AddressLiteral src) {
6393 6400 Assembler::fld_x(as_Address(src));
6394 6401 }
6395 6402
6396 6403 void MacroAssembler::fldcw(AddressLiteral src) {
6397 6404 Assembler::fldcw(as_Address(src));
6398 6405 }
6399 6406
6400 6407 void MacroAssembler::fpop() {
6401 6408 ffree();
6402 6409 fincstp();
6403 6410 }
6404 6411
6405 6412 void MacroAssembler::fremr(Register tmp) {
6406 6413 save_rax(tmp);
6407 6414 { Label L;
6408 6415 bind(L);
6409 6416 fprem();
6410 6417 fwait(); fnstsw_ax();
6411 6418 #ifdef _LP64
6412 6419 testl(rax, 0x400);
6413 6420 jcc(Assembler::notEqual, L);
6414 6421 #else
6415 6422 sahf();
6416 6423 jcc(Assembler::parity, L);
6417 6424 #endif // _LP64
6418 6425 }
6419 6426 restore_rax(tmp);
6420 6427 // Result is in ST0.
6421 6428 // Note: fxch & fpop to get rid of ST1
6422 6429 // (otherwise FPU stack could overflow eventually)
6423 6430 fxch(1);
6424 6431 fpop();
6425 6432 }
6426 6433
6427 6434
6428 6435 void MacroAssembler::incrementl(AddressLiteral dst) {
6429 6436 if (reachable(dst)) {
6430 6437 incrementl(as_Address(dst));
6431 6438 } else {
6432 6439 lea(rscratch1, dst);
6433 6440 incrementl(Address(rscratch1, 0));
6434 6441 }
6435 6442 }
6436 6443
6437 6444 void MacroAssembler::incrementl(ArrayAddress dst) {
6438 6445 incrementl(as_Address(dst));
6439 6446 }
6440 6447
6441 6448 void MacroAssembler::incrementl(Register reg, int value) {
6442 6449 if (value == min_jint) {addl(reg, value) ; return; }
6443 6450 if (value < 0) { decrementl(reg, -value); return; }
6444 6451 if (value == 0) { ; return; }
6445 6452 if (value == 1 && UseIncDec) { incl(reg) ; return; }
6446 6453 /* else */ { addl(reg, value) ; return; }
6447 6454 }
6448 6455
6449 6456 void MacroAssembler::incrementl(Address dst, int value) {
6450 6457 if (value == min_jint) {addl(dst, value) ; return; }
6451 6458 if (value < 0) { decrementl(dst, -value); return; }
6452 6459 if (value == 0) { ; return; }
6453 6460 if (value == 1 && UseIncDec) { incl(dst) ; return; }
6454 6461 /* else */ { addl(dst, value) ; return; }
6455 6462 }
6456 6463
6457 6464 void MacroAssembler::jump(AddressLiteral dst) {
6458 6465 if (reachable(dst)) {
6459 6466 jmp_literal(dst.target(), dst.rspec());
6460 6467 } else {
6461 6468 lea(rscratch1, dst);
6462 6469 jmp(rscratch1);
6463 6470 }
6464 6471 }
6465 6472
6466 6473 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
6467 6474 if (reachable(dst)) {
6468 6475 InstructionMark im(this);
6469 6476 relocate(dst.reloc());
6470 6477 const int short_size = 2;
6471 6478 const int long_size = 6;
6472 6479 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
6473 6480 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
6474 6481 // 0111 tttn #8-bit disp
6475 6482 emit_byte(0x70 | cc);
6476 6483 emit_byte((offs - short_size) & 0xFF);
6477 6484 } else {
6478 6485 // 0000 1111 1000 tttn #32-bit disp
6479 6486 emit_byte(0x0F);
6480 6487 emit_byte(0x80 | cc);
6481 6488 emit_long(offs - long_size);
6482 6489 }
6483 6490 } else {
6484 6491 #ifdef ASSERT
6485 6492 warning("reversing conditional branch");
6486 6493 #endif /* ASSERT */
6487 6494 Label skip;
6488 6495 jccb(reverse[cc], skip);
6489 6496 lea(rscratch1, dst);
6490 6497 Assembler::jmp(rscratch1);
6491 6498 bind(skip);
6492 6499 }
6493 6500 }
6494 6501
6495 6502 void MacroAssembler::ldmxcsr(AddressLiteral src) {
6496 6503 if (reachable(src)) {
6497 6504 Assembler::ldmxcsr(as_Address(src));
6498 6505 } else {
6499 6506 lea(rscratch1, src);
6500 6507 Assembler::ldmxcsr(Address(rscratch1, 0));
6501 6508 }
6502 6509 }
6503 6510
6504 6511 int MacroAssembler::load_signed_byte(Register dst, Address src) {
6505 6512 int off;
6506 6513 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6507 6514 off = offset();
6508 6515 movsbl(dst, src); // movsxb
6509 6516 } else {
6510 6517 off = load_unsigned_byte(dst, src);
6511 6518 shll(dst, 24);
6512 6519 sarl(dst, 24);
6513 6520 }
6514 6521 return off;
6515 6522 }
6516 6523
6517 6524 // Note: load_signed_short used to be called load_signed_word.
6518 6525 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler
6519 6526 // manual, which means 16 bits, that usage is found nowhere in HotSpot code.
6520 6527 // The term "word" in HotSpot means a 32- or 64-bit machine word.
6521 6528 int MacroAssembler::load_signed_short(Register dst, Address src) {
6522 6529 int off;
6523 6530 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6524 6531 // This is dubious to me since it seems safe to do a signed 16 => 64 bit
6525 6532 // version but this is what 64bit has always done. This seems to imply
6526 6533 // that users are only using 32bits worth.
6527 6534 off = offset();
6528 6535 movswl(dst, src); // movsxw
6529 6536 } else {
6530 6537 off = load_unsigned_short(dst, src);
6531 6538 shll(dst, 16);
6532 6539 sarl(dst, 16);
6533 6540 }
6534 6541 return off;
6535 6542 }
6536 6543
6537 6544 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
6538 6545 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6539 6546 // and "3.9 Partial Register Penalties", p. 22).
6540 6547 int off;
6541 6548 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
6542 6549 off = offset();
6543 6550 movzbl(dst, src); // movzxb
6544 6551 } else {
6545 6552 xorl(dst, dst);
6546 6553 off = offset();
6547 6554 movb(dst, src);
6548 6555 }
6549 6556 return off;
6550 6557 }
6551 6558
6552 6559 // Note: load_unsigned_short used to be called load_unsigned_word.
6553 6560 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
6554 6561 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
6555 6562 // and "3.9 Partial Register Penalties", p. 22).
6556 6563 int off;
6557 6564 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
6558 6565 off = offset();
6559 6566 movzwl(dst, src); // movzxw
6560 6567 } else {
6561 6568 xorl(dst, dst);
6562 6569 off = offset();
6563 6570 movw(dst, src);
6564 6571 }
6565 6572 return off;
6566 6573 }
6567 6574
6568 6575 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
6569 6576 switch (size_in_bytes) {
6570 6577 #ifndef _LP64
6571 6578 case 8:
6572 6579 assert(dst2 != noreg, "second dest register required");
6573 6580 movl(dst, src);
6574 6581 movl(dst2, src.plus_disp(BytesPerInt));
6575 6582 break;
6576 6583 #else
6577 6584 case 8: movq(dst, src); break;
6578 6585 #endif
6579 6586 case 4: movl(dst, src); break;
6580 6587 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
6581 6588 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
6582 6589 default: ShouldNotReachHere();
6583 6590 }
6584 6591 }
6585 6592
6586 6593 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
6587 6594 switch (size_in_bytes) {
6588 6595 #ifndef _LP64
6589 6596 case 8:
6590 6597 assert(src2 != noreg, "second source register required");
6591 6598 movl(dst, src);
6592 6599 movl(dst.plus_disp(BytesPerInt), src2);
6593 6600 break;
6594 6601 #else
6595 6602 case 8: movq(dst, src); break;
6596 6603 #endif
6597 6604 case 4: movl(dst, src); break;
6598 6605 case 2: movw(dst, src); break;
6599 6606 case 1: movb(dst, src); break;
6600 6607 default: ShouldNotReachHere();
6601 6608 }
6602 6609 }
6603 6610
6604 6611 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
6605 6612 if (reachable(dst)) {
6606 6613 movl(as_Address(dst), src);
6607 6614 } else {
6608 6615 lea(rscratch1, dst);
6609 6616 movl(Address(rscratch1, 0), src);
6610 6617 }
6611 6618 }
6612 6619
6613 6620 void MacroAssembler::mov32(Register dst, AddressLiteral src) {
6614 6621 if (reachable(src)) {
6615 6622 movl(dst, as_Address(src));
6616 6623 } else {
6617 6624 lea(rscratch1, src);
6618 6625 movl(dst, Address(rscratch1, 0));
6619 6626 }
6620 6627 }
6621 6628
6622 6629 // C++ bool manipulation
6623 6630
6624 6631 void MacroAssembler::movbool(Register dst, Address src) {
6625 6632 if(sizeof(bool) == 1)
6626 6633 movb(dst, src);
6627 6634 else if(sizeof(bool) == 2)
6628 6635 movw(dst, src);
6629 6636 else if(sizeof(bool) == 4)
6630 6637 movl(dst, src);
6631 6638 else
6632 6639 // unsupported
6633 6640 ShouldNotReachHere();
6634 6641 }
6635 6642
6636 6643 void MacroAssembler::movbool(Address dst, bool boolconst) {
6637 6644 if(sizeof(bool) == 1)
6638 6645 movb(dst, (int) boolconst);
6639 6646 else if(sizeof(bool) == 2)
6640 6647 movw(dst, (int) boolconst);
6641 6648 else if(sizeof(bool) == 4)
6642 6649 movl(dst, (int) boolconst);
6643 6650 else
6644 6651 // unsupported
6645 6652 ShouldNotReachHere();
6646 6653 }
6647 6654
6648 6655 void MacroAssembler::movbool(Address dst, Register src) {
6649 6656 if(sizeof(bool) == 1)
6650 6657 movb(dst, src);
6651 6658 else if(sizeof(bool) == 2)
6652 6659 movw(dst, src);
6653 6660 else if(sizeof(bool) == 4)
6654 6661 movl(dst, src);
6655 6662 else
6656 6663 // unsupported
6657 6664 ShouldNotReachHere();
6658 6665 }
6659 6666
6660 6667 void MacroAssembler::movbyte(ArrayAddress dst, int src) {
6661 6668 movb(as_Address(dst), src);
6662 6669 }
6663 6670
6664 6671 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
6665 6672 if (reachable(src)) {
6666 6673 if (UseXmmLoadAndClearUpper) {
6667 6674 movsd (dst, as_Address(src));
6668 6675 } else {
6669 6676 movlpd(dst, as_Address(src));
6670 6677 }
6671 6678 } else {
6672 6679 lea(rscratch1, src);
6673 6680 if (UseXmmLoadAndClearUpper) {
6674 6681 movsd (dst, Address(rscratch1, 0));
6675 6682 } else {
6676 6683 movlpd(dst, Address(rscratch1, 0));
6677 6684 }
6678 6685 }
6679 6686 }
6680 6687
6681 6688 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
6682 6689 if (reachable(src)) {
6683 6690 movss(dst, as_Address(src));
6684 6691 } else {
6685 6692 lea(rscratch1, src);
6686 6693 movss(dst, Address(rscratch1, 0));
6687 6694 }
6688 6695 }
6689 6696
6690 6697 void MacroAssembler::movptr(Register dst, Register src) {
6691 6698 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6692 6699 }
6693 6700
6694 6701 void MacroAssembler::movptr(Register dst, Address src) {
6695 6702 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6696 6703 }
6697 6704
6698 6705 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
6699 6706 void MacroAssembler::movptr(Register dst, intptr_t src) {
6700 6707 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
6701 6708 }
6702 6709
6703 6710 void MacroAssembler::movptr(Address dst, Register src) {
6704 6711 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
6705 6712 }
6706 6713
6707 6714 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
6708 6715 if (reachable(src)) {
6709 6716 movss(dst, as_Address(src));
6710 6717 } else {
6711 6718 lea(rscratch1, src);
6712 6719 movss(dst, Address(rscratch1, 0));
6713 6720 }
6714 6721 }
6715 6722
6716 6723 void MacroAssembler::null_check(Register reg, int offset) {
6717 6724 if (needs_explicit_null_check(offset)) {
6718 6725 // provoke OS NULL exception if reg = NULL by
6719 6726 // accessing M[reg] w/o changing any (non-CC) registers
6720 6727 // NOTE: cmpl is plenty here to provoke a segv
6721 6728 cmpptr(rax, Address(reg, 0));
6722 6729 // Note: should probably use testl(rax, Address(reg, 0));
6723 6730 // may be shorter code (however, this version of
6724 6731 // testl needs to be implemented first)
6725 6732 } else {
6726 6733 // nothing to do, (later) access of M[reg + offset]
6727 6734 // will provoke OS NULL exception if reg = NULL
6728 6735 }
6729 6736 }
6730 6737
6731 6738 void MacroAssembler::os_breakpoint() {
6732 6739 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
6733 6740 // (e.g., MSVC can't call ps() otherwise)
6734 6741 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
6735 6742 }
6736 6743
6737 6744 void MacroAssembler::pop_CPU_state() {
6738 6745 pop_FPU_state();
6739 6746 pop_IU_state();
6740 6747 }
6741 6748
6742 6749 void MacroAssembler::pop_FPU_state() {
6743 6750 NOT_LP64(frstor(Address(rsp, 0));)
6744 6751 LP64_ONLY(fxrstor(Address(rsp, 0));)
6745 6752 addptr(rsp, FPUStateSizeInWords * wordSize);
6746 6753 }
6747 6754
6748 6755 void MacroAssembler::pop_IU_state() {
6749 6756 popa();
6750 6757 LP64_ONLY(addq(rsp, 8));
6751 6758 popf();
6752 6759 }
6753 6760
6754 6761 // Save Integer and Float state
6755 6762 // Warning: Stack must be 16 byte aligned (64bit)
6756 6763 void MacroAssembler::push_CPU_state() {
6757 6764 push_IU_state();
6758 6765 push_FPU_state();
6759 6766 }
6760 6767
6761 6768 void MacroAssembler::push_FPU_state() {
6762 6769 subptr(rsp, FPUStateSizeInWords * wordSize);
6763 6770 #ifndef _LP64
6764 6771 fnsave(Address(rsp, 0));
6765 6772 fwait();
6766 6773 #else
6767 6774 fxsave(Address(rsp, 0));
6768 6775 #endif // LP64
6769 6776 }
6770 6777
6771 6778 void MacroAssembler::push_IU_state() {
6772 6779 // Push flags first because pusha kills them
6773 6780 pushf();
6774 6781 // Make sure rsp stays 16-byte aligned
6775 6782 LP64_ONLY(subq(rsp, 8));
6776 6783 pusha();
6777 6784 }
6778 6785
6779 6786 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
6780 6787 // determine java_thread register
6781 6788 if (!java_thread->is_valid()) {
6782 6789 java_thread = rdi;
6783 6790 get_thread(java_thread);
6784 6791 }
6785 6792 // we must set sp to zero to clear frame
6786 6793 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
6787 6794 if (clear_fp) {
6788 6795 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
6789 6796 }
6790 6797
6791 6798 if (clear_pc)
6792 6799 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
6793 6800
6794 6801 }
6795 6802
6796 6803 void MacroAssembler::restore_rax(Register tmp) {
6797 6804 if (tmp == noreg) pop(rax);
6798 6805 else if (tmp != rax) mov(rax, tmp);
6799 6806 }
6800 6807
6801 6808 void MacroAssembler::round_to(Register reg, int modulus) {
6802 6809 addptr(reg, modulus - 1);
6803 6810 andptr(reg, -modulus);
6804 6811 }
6805 6812
6806 6813 void MacroAssembler::save_rax(Register tmp) {
6807 6814 if (tmp == noreg) push(rax);
6808 6815 else if (tmp != rax) mov(tmp, rax);
6809 6816 }
6810 6817
6811 6818 // Write serialization page so VM thread can do a pseudo remote membar.
6812 6819 // We use the current thread pointer to calculate a thread specific
6813 6820 // offset to write to within the page. This minimizes bus traffic
6814 6821 // due to cache line collision.
6815 6822 void MacroAssembler::serialize_memory(Register thread, Register tmp) {
6816 6823 movl(tmp, thread);
6817 6824 shrl(tmp, os::get_serialize_page_shift_count());
6818 6825 andl(tmp, (os::vm_page_size() - sizeof(int)));
6819 6826
6820 6827 Address index(noreg, tmp, Address::times_1);
6821 6828 ExternalAddress page(os::get_memory_serialize_page());
6822 6829
6823 6830 // Size of store must match masking code above
6824 6831 movl(as_Address(ArrayAddress(page, index)), tmp);
6825 6832 }
6826 6833
6827 6834 // Calls to C land
6828 6835 //
6829 6836 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
6830 6837 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
6831 6838 // has to be reset to 0. This is required to allow proper stack traversal.
6832 6839 void MacroAssembler::set_last_Java_frame(Register java_thread,
6833 6840 Register last_java_sp,
6834 6841 Register last_java_fp,
6835 6842 address last_java_pc) {
6836 6843 // determine java_thread register
6837 6844 if (!java_thread->is_valid()) {
6838 6845 java_thread = rdi;
6839 6846 get_thread(java_thread);
6840 6847 }
6841 6848 // determine last_java_sp register
6842 6849 if (!last_java_sp->is_valid()) {
6843 6850 last_java_sp = rsp;
6844 6851 }
6845 6852
6846 6853 // last_java_fp is optional
6847 6854
6848 6855 if (last_java_fp->is_valid()) {
6849 6856 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
6850 6857 }
6851 6858
6852 6859 // last_java_pc is optional
6853 6860
6854 6861 if (last_java_pc != NULL) {
6855 6862 lea(Address(java_thread,
6856 6863 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
6857 6864 InternalAddress(last_java_pc));
6858 6865
6859 6866 }
6860 6867 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
6861 6868 }
6862 6869
6863 6870 void MacroAssembler::shlptr(Register dst, int imm8) {
6864 6871 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
6865 6872 }
6866 6873
6867 6874 void MacroAssembler::shrptr(Register dst, int imm8) {
6868 6875 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
6869 6876 }
6870 6877
6871 6878 void MacroAssembler::sign_extend_byte(Register reg) {
6872 6879 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
6873 6880 movsbl(reg, reg); // movsxb
6874 6881 } else {
6875 6882 shll(reg, 24);
6876 6883 sarl(reg, 24);
6877 6884 }
6878 6885 }
↓ open down ↓ |
3332 lines elided |
↑ open up ↑ |
6879 6886
6880 6887 void MacroAssembler::sign_extend_short(Register reg) {
6881 6888 if (LP64_ONLY(true ||) VM_Version::is_P6()) {
6882 6889 movswl(reg, reg); // movsxw
6883 6890 } else {
6884 6891 shll(reg, 16);
6885 6892 sarl(reg, 16);
6886 6893 }
6887 6894 }
6888 6895
6896 +void MacroAssembler::testl(Register dst, AddressLiteral src) {
6897 + assert(reachable(src), "Address should be reachable");
6898 + testl(dst, as_Address(src));
6899 +}
6900 +
6889 6901 //////////////////////////////////////////////////////////////////////////////////
6890 6902 #ifndef SERIALGC
6891 6903
6892 6904 void MacroAssembler::g1_write_barrier_pre(Register obj,
6893 6905 #ifndef _LP64
6894 6906 Register thread,
6895 6907 #endif
6896 6908 Register tmp,
6897 6909 Register tmp2,
6898 6910 bool tosca_live) {
6899 6911 LP64_ONLY(Register thread = r15_thread;)
6900 6912 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6901 6913 PtrQueue::byte_offset_of_active()));
6902 6914
6903 6915 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6904 6916 PtrQueue::byte_offset_of_index()));
6905 6917 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
6906 6918 PtrQueue::byte_offset_of_buf()));
6907 6919
6908 6920
6909 6921 Label done;
6910 6922 Label runtime;
6911 6923
6912 6924 // if (!marking_in_progress) goto done;
6913 6925 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
6914 6926 cmpl(in_progress, 0);
6915 6927 } else {
6916 6928 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
6917 6929 cmpb(in_progress, 0);
6918 6930 }
6919 6931 jcc(Assembler::equal, done);
6920 6932
6921 6933 // if (x.f == NULL) goto done;
6922 6934 #ifdef _LP64
6923 6935 load_heap_oop(tmp2, Address(obj, 0));
6924 6936 #else
6925 6937 movptr(tmp2, Address(obj, 0));
6926 6938 #endif
6927 6939 cmpptr(tmp2, (int32_t) NULL_WORD);
6928 6940 jcc(Assembler::equal, done);
6929 6941
6930 6942 // Can we store original value in the thread's buffer?
6931 6943
6932 6944 #ifdef _LP64
6933 6945 movslq(tmp, index);
6934 6946 cmpq(tmp, 0);
6935 6947 #else
6936 6948 cmpl(index, 0);
6937 6949 #endif
6938 6950 jcc(Assembler::equal, runtime);
6939 6951 #ifdef _LP64
6940 6952 subq(tmp, wordSize);
6941 6953 movl(index, tmp);
6942 6954 addq(tmp, buffer);
6943 6955 #else
6944 6956 subl(index, wordSize);
6945 6957 movl(tmp, buffer);
6946 6958 addl(tmp, index);
6947 6959 #endif
6948 6960 movptr(Address(tmp, 0), tmp2);
6949 6961 jmp(done);
6950 6962 bind(runtime);
6951 6963 // save the live input values
6952 6964 if(tosca_live) push(rax);
6953 6965 push(obj);
6954 6966 #ifdef _LP64
6955 6967 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread);
6956 6968 #else
6957 6969 push(thread);
6958 6970 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
6959 6971 pop(thread);
6960 6972 #endif
6961 6973 pop(obj);
6962 6974 if(tosca_live) pop(rax);
6963 6975 bind(done);
6964 6976
6965 6977 }
6966 6978
6967 6979 void MacroAssembler::g1_write_barrier_post(Register store_addr,
6968 6980 Register new_val,
6969 6981 #ifndef _LP64
6970 6982 Register thread,
6971 6983 #endif
6972 6984 Register tmp,
6973 6985 Register tmp2) {
6974 6986
6975 6987 LP64_ONLY(Register thread = r15_thread;)
6976 6988 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6977 6989 PtrQueue::byte_offset_of_index()));
6978 6990 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
6979 6991 PtrQueue::byte_offset_of_buf()));
6980 6992 BarrierSet* bs = Universe::heap()->barrier_set();
6981 6993 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
6982 6994 Label done;
6983 6995 Label runtime;
6984 6996
6985 6997 // Does store cross heap regions?
6986 6998
6987 6999 movptr(tmp, store_addr);
6988 7000 xorptr(tmp, new_val);
6989 7001 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
6990 7002 jcc(Assembler::equal, done);
6991 7003
6992 7004 // crosses regions, storing NULL?
6993 7005
6994 7006 cmpptr(new_val, (int32_t) NULL_WORD);
6995 7007 jcc(Assembler::equal, done);
6996 7008
6997 7009 // storing region crossing non-NULL, is card already dirty?
6998 7010
6999 7011 ExternalAddress cardtable((address) ct->byte_map_base);
7000 7012 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
7001 7013 #ifdef _LP64
7002 7014 const Register card_addr = tmp;
7003 7015
7004 7016 movq(card_addr, store_addr);
7005 7017 shrq(card_addr, CardTableModRefBS::card_shift);
7006 7018
7007 7019 lea(tmp2, cardtable);
7008 7020
7009 7021 // get the address of the card
7010 7022 addq(card_addr, tmp2);
7011 7023 #else
7012 7024 const Register card_index = tmp;
7013 7025
7014 7026 movl(card_index, store_addr);
7015 7027 shrl(card_index, CardTableModRefBS::card_shift);
7016 7028
7017 7029 Address index(noreg, card_index, Address::times_1);
7018 7030 const Register card_addr = tmp;
7019 7031 lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
7020 7032 #endif
7021 7033 cmpb(Address(card_addr, 0), 0);
7022 7034 jcc(Assembler::equal, done);
7023 7035
7024 7036 // storing a region crossing, non-NULL oop, card is clean.
7025 7037 // dirty card and log.
7026 7038
7027 7039 movb(Address(card_addr, 0), 0);
7028 7040
7029 7041 cmpl(queue_index, 0);
7030 7042 jcc(Assembler::equal, runtime);
7031 7043 subl(queue_index, wordSize);
7032 7044 movptr(tmp2, buffer);
7033 7045 #ifdef _LP64
7034 7046 movslq(rscratch1, queue_index);
7035 7047 addq(tmp2, rscratch1);
7036 7048 movq(Address(tmp2, 0), card_addr);
7037 7049 #else
7038 7050 addl(tmp2, queue_index);
7039 7051 movl(Address(tmp2, 0), card_index);
7040 7052 #endif
7041 7053 jmp(done);
7042 7054
7043 7055 bind(runtime);
7044 7056 // save the live input values
7045 7057 push(store_addr);
7046 7058 push(new_val);
7047 7059 #ifdef _LP64
7048 7060 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
7049 7061 #else
7050 7062 push(thread);
7051 7063 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
7052 7064 pop(thread);
7053 7065 #endif
7054 7066 pop(new_val);
7055 7067 pop(store_addr);
7056 7068
7057 7069 bind(done);
7058 7070
7059 7071 }
7060 7072
7061 7073 #endif // SERIALGC
7062 7074 //////////////////////////////////////////////////////////////////////////////////
7063 7075
7064 7076
7065 7077 void MacroAssembler::store_check(Register obj) {
7066 7078 // Does a store check for the oop in register obj. The content of
7067 7079 // register obj is destroyed afterwards.
7068 7080 store_check_part_1(obj);
7069 7081 store_check_part_2(obj);
7070 7082 }
7071 7083
7072 7084 void MacroAssembler::store_check(Register obj, Address dst) {
7073 7085 store_check(obj);
7074 7086 }
7075 7087
7076 7088
7077 7089 // split the store check operation so that other instructions can be scheduled inbetween
7078 7090 void MacroAssembler::store_check_part_1(Register obj) {
7079 7091 BarrierSet* bs = Universe::heap()->barrier_set();
7080 7092 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
7081 7093 shrptr(obj, CardTableModRefBS::card_shift);
7082 7094 }
7083 7095
7084 7096 void MacroAssembler::store_check_part_2(Register obj) {
7085 7097 BarrierSet* bs = Universe::heap()->barrier_set();
7086 7098 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
7087 7099 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
7088 7100 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
7089 7101
7090 7102 // The calculation for byte_map_base is as follows:
7091 7103 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
7092 7104 // So this essentially converts an address to a displacement and
7093 7105 // it will never need to be relocated. On 64bit however the value may be too
7094 7106 // large for a 32bit displacement
7095 7107
7096 7108 intptr_t disp = (intptr_t) ct->byte_map_base;
7097 7109 if (is_simm32(disp)) {
7098 7110 Address cardtable(noreg, obj, Address::times_1, disp);
7099 7111 movb(cardtable, 0);
7100 7112 } else {
7101 7113 // By doing it as an ExternalAddress disp could be converted to a rip-relative
7102 7114 // displacement and done in a single instruction given favorable mapping and
7103 7115 // a smarter version of as_Address. Worst case it is two instructions which
7104 7116 // is no worse off then loading disp into a register and doing as a simple
7105 7117 // Address() as above.
7106 7118 // We can't do as ExternalAddress as the only style since if disp == 0 we'll
7107 7119 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
7108 7120 // in some cases we'll get a single instruction version.
7109 7121
7110 7122 ExternalAddress cardtable((address)disp);
7111 7123 Address index(noreg, obj, Address::times_1);
7112 7124 movb(as_Address(ArrayAddress(cardtable, index)), 0);
7113 7125 }
↓ open down ↓ |
215 lines elided |
↑ open up ↑ |
7114 7126 }
7115 7127
7116 7128 void MacroAssembler::subptr(Register dst, int32_t imm32) {
7117 7129 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
7118 7130 }
7119 7131
7120 7132 void MacroAssembler::subptr(Register dst, Register src) {
7121 7133 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
7122 7134 }
7123 7135
7124 -void MacroAssembler::test32(Register src1, AddressLiteral src2) {
7125 - // src2 must be rval
7126 -
7127 - if (reachable(src2)) {
7128 - testl(src1, as_Address(src2));
7129 - } else {
7130 - lea(rscratch1, src2);
7131 - testl(src1, Address(rscratch1, 0));
7132 - }
7133 -}
7134 -
7135 7136 // C++ bool manipulation
7136 7137 void MacroAssembler::testbool(Register dst) {
7137 7138 if(sizeof(bool) == 1)
7138 7139 testb(dst, 0xff);
7139 7140 else if(sizeof(bool) == 2) {
7140 7141 // testw implementation needed for two byte bools
7141 7142 ShouldNotReachHere();
7142 7143 } else if(sizeof(bool) == 4)
7143 7144 testl(dst, dst);
7144 7145 else
7145 7146 // unsupported
7146 7147 ShouldNotReachHere();
7147 7148 }
7148 7149
7149 7150 void MacroAssembler::testptr(Register dst, Register src) {
7150 7151 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
7151 7152 }
7152 7153
7153 7154 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
7154 7155 void MacroAssembler::tlab_allocate(Register obj,
7155 7156 Register var_size_in_bytes,
7156 7157 int con_size_in_bytes,
7157 7158 Register t1,
7158 7159 Register t2,
7159 7160 Label& slow_case) {
7160 7161 assert_different_registers(obj, t1, t2);
7161 7162 assert_different_registers(obj, var_size_in_bytes, t1);
7162 7163 Register end = t2;
7163 7164 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
7164 7165
7165 7166 verify_tlab();
7166 7167
7167 7168 NOT_LP64(get_thread(thread));
7168 7169
7169 7170 movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
7170 7171 if (var_size_in_bytes == noreg) {
7171 7172 lea(end, Address(obj, con_size_in_bytes));
7172 7173 } else {
7173 7174 lea(end, Address(obj, var_size_in_bytes, Address::times_1));
7174 7175 }
7175 7176 cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
7176 7177 jcc(Assembler::above, slow_case);
7177 7178
7178 7179 // update the tlab top pointer
7179 7180 movptr(Address(thread, JavaThread::tlab_top_offset()), end);
7180 7181
7181 7182 // recover var_size_in_bytes if necessary
7182 7183 if (var_size_in_bytes == end) {
7183 7184 subptr(var_size_in_bytes, obj);
7184 7185 }
7185 7186 verify_tlab();
7186 7187 }
7187 7188
7188 7189 // Preserves rbx, and rdx.
7189 7190 Register MacroAssembler::tlab_refill(Label& retry,
7190 7191 Label& try_eden,
7191 7192 Label& slow_case) {
7192 7193 Register top = rax;
7193 7194 Register t1 = rcx;
7194 7195 Register t2 = rsi;
7195 7196 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
7196 7197 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
7197 7198 Label do_refill, discard_tlab;
7198 7199
7199 7200 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
7200 7201 // No allocation in the shared eden.
7201 7202 jmp(slow_case);
7202 7203 }
7203 7204
7204 7205 NOT_LP64(get_thread(thread_reg));
7205 7206
7206 7207 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7207 7208 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7208 7209
7209 7210 // calculate amount of free space
7210 7211 subptr(t1, top);
7211 7212 shrptr(t1, LogHeapWordSize);
7212 7213
7213 7214 // Retain tlab and allocate object in shared space if
7214 7215 // the amount free in the tlab is too large to discard.
7215 7216 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
7216 7217 jcc(Assembler::lessEqual, discard_tlab);
7217 7218
7218 7219 // Retain
7219 7220 // %%% yuck as movptr...
7220 7221 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
7221 7222 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
7222 7223 if (TLABStats) {
7223 7224 // increment number of slow_allocations
7224 7225 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
7225 7226 }
7226 7227 jmp(try_eden);
7227 7228
7228 7229 bind(discard_tlab);
7229 7230 if (TLABStats) {
7230 7231 // increment number of refills
7231 7232 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
7232 7233 // accumulate wastage -- t1 is amount free in tlab
7233 7234 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
7234 7235 }
7235 7236
7236 7237 // if tlab is currently allocated (top or end != null) then
7237 7238 // fill [top, end + alignment_reserve) with array object
7238 7239 testptr(top, top);
7239 7240 jcc(Assembler::zero, do_refill);
7240 7241
7241 7242 // set up the mark word
7242 7243 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
7243 7244 // set the length to the remaining space
7244 7245 subptr(t1, typeArrayOopDesc::header_size(T_INT));
7245 7246 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
7246 7247 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
7247 7248 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
7248 7249 // set klass to intArrayKlass
7249 7250 // dubious reloc why not an oop reloc?
7250 7251 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
7251 7252 // store klass last. concurrent gcs assumes klass length is valid if
7252 7253 // klass field is not null.
7253 7254 store_klass(top, t1);
7254 7255
7255 7256 movptr(t1, top);
7256 7257 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7257 7258 incr_allocated_bytes(thread_reg, t1, 0);
7258 7259
7259 7260 // refill the tlab with an eden allocation
7260 7261 bind(do_refill);
7261 7262 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7262 7263 shlptr(t1, LogHeapWordSize);
7263 7264 // allocate new tlab, address returned in top
7264 7265 eden_allocate(top, t1, 0, t2, slow_case);
7265 7266
7266 7267 // Check that t1 was preserved in eden_allocate.
7267 7268 #ifdef ASSERT
7268 7269 if (UseTLAB) {
7269 7270 Label ok;
7270 7271 Register tsize = rsi;
7271 7272 assert_different_registers(tsize, thread_reg, t1);
7272 7273 push(tsize);
7273 7274 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
7274 7275 shlptr(tsize, LogHeapWordSize);
7275 7276 cmpptr(t1, tsize);
7276 7277 jcc(Assembler::equal, ok);
7277 7278 stop("assert(t1 != tlab size)");
7278 7279 should_not_reach_here();
7279 7280
7280 7281 bind(ok);
7281 7282 pop(tsize);
7282 7283 }
7283 7284 #endif
7284 7285 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
7285 7286 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
7286 7287 addptr(top, t1);
7287 7288 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
7288 7289 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
7289 7290 verify_tlab();
7290 7291 jmp(retry);
7291 7292
7292 7293 return thread_reg; // for use by caller
7293 7294 }
7294 7295
7295 7296 void MacroAssembler::incr_allocated_bytes(Register thread,
7296 7297 Register var_size_in_bytes,
7297 7298 int con_size_in_bytes,
7298 7299 Register t1) {
7299 7300 #ifdef _LP64
7300 7301 if (var_size_in_bytes->is_valid()) {
7301 7302 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
7302 7303 } else {
7303 7304 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
7304 7305 }
7305 7306 #else
7306 7307 if (!thread->is_valid()) {
7307 7308 assert(t1->is_valid(), "need temp reg");
7308 7309 thread = t1;
7309 7310 get_thread(thread);
7310 7311 }
7311 7312
7312 7313 if (var_size_in_bytes->is_valid()) {
7313 7314 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
7314 7315 } else {
7315 7316 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
7316 7317 }
7317 7318 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
7318 7319 #endif
7319 7320 }
7320 7321
7321 7322 static const double pi_4 = 0.7853981633974483;
7322 7323
7323 7324 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
7324 7325 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
7325 7326 // was attempted in this code; unfortunately it appears that the
7326 7327 // switch to 80-bit precision and back causes this to be
7327 7328 // unprofitable compared with simply performing a runtime call if
7328 7329 // the argument is out of the (-pi/4, pi/4) range.
7329 7330
7330 7331 Register tmp = noreg;
7331 7332 if (!VM_Version::supports_cmov()) {
7332 7333 // fcmp needs a temporary so preserve rbx,
7333 7334 tmp = rbx;
7334 7335 push(tmp);
7335 7336 }
7336 7337
7337 7338 Label slow_case, done;
7338 7339
7339 7340 ExternalAddress pi4_adr = (address)&pi_4;
7340 7341 if (reachable(pi4_adr)) {
7341 7342 // x ?<= pi/4
7342 7343 fld_d(pi4_adr);
7343 7344 fld_s(1); // Stack: X PI/4 X
7344 7345 fabs(); // Stack: |X| PI/4 X
7345 7346 fcmp(tmp);
7346 7347 jcc(Assembler::above, slow_case);
7347 7348
7348 7349 // fastest case: -pi/4 <= x <= pi/4
7349 7350 switch(trig) {
7350 7351 case 's':
7351 7352 fsin();
7352 7353 break;
7353 7354 case 'c':
7354 7355 fcos();
7355 7356 break;
7356 7357 case 't':
7357 7358 ftan();
7358 7359 break;
7359 7360 default:
7360 7361 assert(false, "bad intrinsic");
7361 7362 break;
7362 7363 }
7363 7364 jmp(done);
7364 7365 }
7365 7366
7366 7367 // slow case: runtime call
7367 7368 bind(slow_case);
7368 7369 // Preserve registers across runtime call
7369 7370 pusha();
7370 7371 int incoming_argument_and_return_value_offset = -1;
7371 7372 if (num_fpu_regs_in_use > 1) {
7372 7373 // Must preserve all other FPU regs (could alternatively convert
7373 7374 // SharedRuntime::dsin and dcos into assembly routines known not to trash
7374 7375 // FPU state, but can not trust C compiler)
7375 7376 NEEDS_CLEANUP;
7376 7377 // NOTE that in this case we also push the incoming argument to
7377 7378 // the stack and restore it later; we also use this stack slot to
7378 7379 // hold the return value from dsin or dcos.
7379 7380 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7380 7381 subptr(rsp, sizeof(jdouble));
7381 7382 fstp_d(Address(rsp, 0));
7382 7383 }
7383 7384 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
7384 7385 fld_d(Address(rsp, incoming_argument_and_return_value_offset));
7385 7386 }
7386 7387 subptr(rsp, sizeof(jdouble));
7387 7388 fstp_d(Address(rsp, 0));
7388 7389 #ifdef _LP64
7389 7390 movdbl(xmm0, Address(rsp, 0));
7390 7391 #endif // _LP64
7391 7392
7392 7393 // NOTE: we must not use call_VM_leaf here because that requires a
7393 7394 // complete interpreter frame in debug mode -- same bug as 4387334
7394 7395 // MacroAssembler::call_VM_leaf_base is perfectly safe and will
7395 7396 // do proper 64bit abi
7396 7397
7397 7398 NEEDS_CLEANUP;
7398 7399 // Need to add stack banging before this runtime call if it needs to
7399 7400 // be taken; however, there is no generic stack banging routine at
7400 7401 // the MacroAssembler level
7401 7402 switch(trig) {
7402 7403 case 's':
7403 7404 {
7404 7405 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
7405 7406 }
7406 7407 break;
7407 7408 case 'c':
7408 7409 {
7409 7410 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
7410 7411 }
7411 7412 break;
7412 7413 case 't':
7413 7414 {
7414 7415 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
7415 7416 }
7416 7417 break;
7417 7418 default:
7418 7419 assert(false, "bad intrinsic");
7419 7420 break;
7420 7421 }
7421 7422 #ifdef _LP64
7422 7423 movsd(Address(rsp, 0), xmm0);
7423 7424 fld_d(Address(rsp, 0));
7424 7425 #endif // _LP64
7425 7426 addptr(rsp, sizeof(jdouble));
7426 7427 if (num_fpu_regs_in_use > 1) {
7427 7428 // Must save return value to stack and then restore entire FPU stack
7428 7429 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
7429 7430 for (int i = 0; i < num_fpu_regs_in_use; i++) {
7430 7431 fld_d(Address(rsp, 0));
7431 7432 addptr(rsp, sizeof(jdouble));
7432 7433 }
7433 7434 }
7434 7435 popa();
7435 7436
7436 7437 // Come here with result in F-TOS
7437 7438 bind(done);
7438 7439
7439 7440 if (tmp != noreg) {
7440 7441 pop(tmp);
7441 7442 }
7442 7443 }
7443 7444
7444 7445
7445 7446 // Look up the method for a megamorphic invokeinterface call.
7446 7447 // The target method is determined by <intf_klass, itable_index>.
7447 7448 // The receiver klass is in recv_klass.
7448 7449 // On success, the result will be in method_result, and execution falls through.
7449 7450 // On failure, execution transfers to the given label.
7450 7451 void MacroAssembler::lookup_interface_method(Register recv_klass,
7451 7452 Register intf_klass,
7452 7453 RegisterOrConstant itable_index,
7453 7454 Register method_result,
7454 7455 Register scan_temp,
7455 7456 Label& L_no_such_interface) {
7456 7457 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
7457 7458 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
7458 7459 "caller must use same register for non-constant itable index as for method");
7459 7460
7460 7461 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
7461 7462 int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
7462 7463 int itentry_off = itableMethodEntry::method_offset_in_bytes();
7463 7464 int scan_step = itableOffsetEntry::size() * wordSize;
7464 7465 int vte_size = vtableEntry::size() * wordSize;
7465 7466 Address::ScaleFactor times_vte_scale = Address::times_ptr;
7466 7467 assert(vte_size == wordSize, "else adjust times_vte_scale");
7467 7468
7468 7469 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
7469 7470
7470 7471 // %%% Could store the aligned, prescaled offset in the klassoop.
7471 7472 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
7472 7473 if (HeapWordsPerLong > 1) {
7473 7474 // Round up to align_object_offset boundary
7474 7475 // see code for instanceKlass::start_of_itable!
7475 7476 round_to(scan_temp, BytesPerLong);
7476 7477 }
7477 7478
7478 7479 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
7479 7480 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
7480 7481 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
7481 7482
7482 7483 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
7483 7484 // if (scan->interface() == intf) {
7484 7485 // result = (klass + scan->offset() + itable_index);
7485 7486 // }
7486 7487 // }
7487 7488 Label search, found_method;
7488 7489
7489 7490 for (int peel = 1; peel >= 0; peel--) {
7490 7491 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
7491 7492 cmpptr(intf_klass, method_result);
7492 7493
7493 7494 if (peel) {
7494 7495 jccb(Assembler::equal, found_method);
7495 7496 } else {
7496 7497 jccb(Assembler::notEqual, search);
7497 7498 // (invert the test to fall through to found_method...)
7498 7499 }
7499 7500
7500 7501 if (!peel) break;
7501 7502
7502 7503 bind(search);
7503 7504
7504 7505 // Check that the previous entry is non-null. A null entry means that
7505 7506 // the receiver class doesn't implement the interface, and wasn't the
7506 7507 // same as when the caller was compiled.
7507 7508 testptr(method_result, method_result);
7508 7509 jcc(Assembler::zero, L_no_such_interface);
7509 7510 addptr(scan_temp, scan_step);
7510 7511 }
7511 7512
7512 7513 bind(found_method);
7513 7514
7514 7515 // Got a hit.
7515 7516 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
7516 7517 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
7517 7518 }
7518 7519
7519 7520
7520 7521 void MacroAssembler::check_klass_subtype(Register sub_klass,
7521 7522 Register super_klass,
7522 7523 Register temp_reg,
7523 7524 Label& L_success) {
7524 7525 Label L_failure;
7525 7526 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
7526 7527 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
7527 7528 bind(L_failure);
7528 7529 }
7529 7530
7530 7531
7531 7532 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
7532 7533 Register super_klass,
7533 7534 Register temp_reg,
7534 7535 Label* L_success,
7535 7536 Label* L_failure,
7536 7537 Label* L_slow_path,
7537 7538 RegisterOrConstant super_check_offset) {
7538 7539 assert_different_registers(sub_klass, super_klass, temp_reg);
7539 7540 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
7540 7541 if (super_check_offset.is_register()) {
7541 7542 assert_different_registers(sub_klass, super_klass,
7542 7543 super_check_offset.as_register());
7543 7544 } else if (must_load_sco) {
7544 7545 assert(temp_reg != noreg, "supply either a temp or a register offset");
7545 7546 }
7546 7547
7547 7548 Label L_fallthrough;
7548 7549 int label_nulls = 0;
7549 7550 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
7550 7551 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
7551 7552 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
7552 7553 assert(label_nulls <= 1, "at most one NULL in the batch");
7553 7554
7554 7555 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7555 7556 Klass::secondary_super_cache_offset_in_bytes());
7556 7557 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
7557 7558 Klass::super_check_offset_offset_in_bytes());
7558 7559 Address super_check_offset_addr(super_klass, sco_offset);
7559 7560
7560 7561 // Hacked jcc, which "knows" that L_fallthrough, at least, is in
7561 7562 // range of a jccb. If this routine grows larger, reconsider at
7562 7563 // least some of these.
7563 7564 #define local_jcc(assembler_cond, label) \
7564 7565 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
7565 7566 else jcc( assembler_cond, label) /*omit semi*/
7566 7567
7567 7568 // Hacked jmp, which may only be used just before L_fallthrough.
7568 7569 #define final_jmp(label) \
7569 7570 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
7570 7571 else jmp(label) /*omit semi*/
7571 7572
7572 7573 // If the pointers are equal, we are done (e.g., String[] elements).
7573 7574 // This self-check enables sharing of secondary supertype arrays among
7574 7575 // non-primary types such as array-of-interface. Otherwise, each such
7575 7576 // type would need its own customized SSA.
7576 7577 // We move this check to the front of the fast path because many
7577 7578 // type checks are in fact trivially successful in this manner,
7578 7579 // so we get a nicely predicted branch right at the start of the check.
7579 7580 cmpptr(sub_klass, super_klass);
7580 7581 local_jcc(Assembler::equal, *L_success);
7581 7582
7582 7583 // Check the supertype display:
7583 7584 if (must_load_sco) {
7584 7585 // Positive movl does right thing on LP64.
7585 7586 movl(temp_reg, super_check_offset_addr);
7586 7587 super_check_offset = RegisterOrConstant(temp_reg);
7587 7588 }
7588 7589 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
7589 7590 cmpptr(super_klass, super_check_addr); // load displayed supertype
7590 7591
7591 7592 // This check has worked decisively for primary supers.
7592 7593 // Secondary supers are sought in the super_cache ('super_cache_addr').
7593 7594 // (Secondary supers are interfaces and very deeply nested subtypes.)
7594 7595 // This works in the same check above because of a tricky aliasing
7595 7596 // between the super_cache and the primary super display elements.
7596 7597 // (The 'super_check_addr' can address either, as the case requires.)
7597 7598 // Note that the cache is updated below if it does not help us find
7598 7599 // what we need immediately.
7599 7600 // So if it was a primary super, we can just fail immediately.
7600 7601 // Otherwise, it's the slow path for us (no success at this point).
7601 7602
7602 7603 if (super_check_offset.is_register()) {
7603 7604 local_jcc(Assembler::equal, *L_success);
7604 7605 cmpl(super_check_offset.as_register(), sc_offset);
7605 7606 if (L_failure == &L_fallthrough) {
7606 7607 local_jcc(Assembler::equal, *L_slow_path);
7607 7608 } else {
7608 7609 local_jcc(Assembler::notEqual, *L_failure);
7609 7610 final_jmp(*L_slow_path);
7610 7611 }
7611 7612 } else if (super_check_offset.as_constant() == sc_offset) {
7612 7613 // Need a slow path; fast failure is impossible.
7613 7614 if (L_slow_path == &L_fallthrough) {
7614 7615 local_jcc(Assembler::equal, *L_success);
7615 7616 } else {
7616 7617 local_jcc(Assembler::notEqual, *L_slow_path);
7617 7618 final_jmp(*L_success);
7618 7619 }
7619 7620 } else {
7620 7621 // No slow path; it's a fast decision.
7621 7622 if (L_failure == &L_fallthrough) {
7622 7623 local_jcc(Assembler::equal, *L_success);
7623 7624 } else {
7624 7625 local_jcc(Assembler::notEqual, *L_failure);
7625 7626 final_jmp(*L_success);
7626 7627 }
7627 7628 }
7628 7629
7629 7630 bind(L_fallthrough);
7630 7631
7631 7632 #undef local_jcc
7632 7633 #undef final_jmp
7633 7634 }
7634 7635
7635 7636
7636 7637 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
7637 7638 Register super_klass,
7638 7639 Register temp_reg,
7639 7640 Register temp2_reg,
7640 7641 Label* L_success,
7641 7642 Label* L_failure,
7642 7643 bool set_cond_codes) {
7643 7644 assert_different_registers(sub_klass, super_klass, temp_reg);
7644 7645 if (temp2_reg != noreg)
7645 7646 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
7646 7647 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
7647 7648
7648 7649 Label L_fallthrough;
7649 7650 int label_nulls = 0;
7650 7651 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
7651 7652 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
7652 7653 assert(label_nulls <= 1, "at most one NULL in the batch");
7653 7654
7654 7655 // a couple of useful fields in sub_klass:
7655 7656 int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
7656 7657 Klass::secondary_supers_offset_in_bytes());
7657 7658 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
7658 7659 Klass::secondary_super_cache_offset_in_bytes());
7659 7660 Address secondary_supers_addr(sub_klass, ss_offset);
7660 7661 Address super_cache_addr( sub_klass, sc_offset);
7661 7662
7662 7663 // Do a linear scan of the secondary super-klass chain.
7663 7664 // This code is rarely used, so simplicity is a virtue here.
7664 7665 // The repne_scan instruction uses fixed registers, which we must spill.
7665 7666 // Don't worry too much about pre-existing connections with the input regs.
7666 7667
7667 7668 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
7668 7669 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
7669 7670
7670 7671 // Get super_klass value into rax (even if it was in rdi or rcx).
7671 7672 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
7672 7673 if (super_klass != rax || UseCompressedOops) {
7673 7674 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
7674 7675 mov(rax, super_klass);
7675 7676 }
7676 7677 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
7677 7678 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
7678 7679
7679 7680 #ifndef PRODUCT
7680 7681 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
7681 7682 ExternalAddress pst_counter_addr((address) pst_counter);
7682 7683 NOT_LP64( incrementl(pst_counter_addr) );
7683 7684 LP64_ONLY( lea(rcx, pst_counter_addr) );
7684 7685 LP64_ONLY( incrementl(Address(rcx, 0)) );
7685 7686 #endif //PRODUCT
7686 7687
7687 7688 // We will consult the secondary-super array.
7688 7689 movptr(rdi, secondary_supers_addr);
7689 7690 // Load the array length. (Positive movl does right thing on LP64.)
7690 7691 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
7691 7692 // Skip to start of data.
7692 7693 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
7693 7694
7694 7695 // Scan RCX words at [RDI] for an occurrence of RAX.
7695 7696 // Set NZ/Z based on last compare.
7696 7697 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
7697 7698 // not change flags (only scas instruction which is repeated sets flags).
7698 7699 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
7699 7700 #ifdef _LP64
7700 7701 // This part is tricky, as values in supers array could be 32 or 64 bit wide
7701 7702 // and we store values in objArrays always encoded, thus we need to encode
7702 7703 // the value of rax before repne. Note that rax is dead after the repne.
7703 7704 if (UseCompressedOops) {
7704 7705 encode_heap_oop_not_null(rax); // Changes flags.
7705 7706 // The superclass is never null; it would be a basic system error if a null
7706 7707 // pointer were to sneak in here. Note that we have already loaded the
7707 7708 // Klass::super_check_offset from the super_klass in the fast path,
7708 7709 // so if there is a null in that register, we are already in the afterlife.
7709 7710 testl(rax,rax); // Set Z = 0
7710 7711 repne_scanl();
7711 7712 } else
7712 7713 #endif // _LP64
7713 7714 {
7714 7715 testptr(rax,rax); // Set Z = 0
7715 7716 repne_scan();
7716 7717 }
7717 7718 // Unspill the temp. registers:
7718 7719 if (pushed_rdi) pop(rdi);
7719 7720 if (pushed_rcx) pop(rcx);
7720 7721 if (pushed_rax) pop(rax);
7721 7722
7722 7723 if (set_cond_codes) {
7723 7724 // Special hack for the AD files: rdi is guaranteed non-zero.
7724 7725 assert(!pushed_rdi, "rdi must be left non-NULL");
7725 7726 // Also, the condition codes are properly set Z/NZ on succeed/failure.
7726 7727 }
7727 7728
7728 7729 if (L_failure == &L_fallthrough)
7729 7730 jccb(Assembler::notEqual, *L_failure);
7730 7731 else jcc(Assembler::notEqual, *L_failure);
7731 7732
7732 7733 // Success. Cache the super we found and proceed in triumph.
7733 7734 movptr(super_cache_addr, super_klass);
7734 7735
7735 7736 if (L_success != &L_fallthrough) {
7736 7737 jmp(*L_success);
7737 7738 }
7738 7739
7739 7740 #undef IS_A_TEMP
7740 7741
7741 7742 bind(L_fallthrough);
7742 7743 }
7743 7744
7744 7745
7745 7746 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
7746 7747 ucomisd(dst, as_Address(src));
7747 7748 }
7748 7749
7749 7750 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
7750 7751 ucomiss(dst, as_Address(src));
7751 7752 }
7752 7753
7753 7754 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
7754 7755 if (reachable(src)) {
7755 7756 xorpd(dst, as_Address(src));
7756 7757 } else {
7757 7758 lea(rscratch1, src);
7758 7759 xorpd(dst, Address(rscratch1, 0));
7759 7760 }
7760 7761 }
7761 7762
7762 7763 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
7763 7764 if (reachable(src)) {
7764 7765 xorps(dst, as_Address(src));
7765 7766 } else {
7766 7767 lea(rscratch1, src);
7767 7768 xorps(dst, Address(rscratch1, 0));
7768 7769 }
7769 7770 }
7770 7771
7771 7772 void MacroAssembler::verify_oop(Register reg, const char* s) {
7772 7773 if (!VerifyOops) return;
7773 7774
7774 7775 // Pass register number to verify_oop_subroutine
7775 7776 char* b = new char[strlen(s) + 50];
7776 7777 sprintf(b, "verify_oop: %s: %s", reg->name(), s);
7777 7778 #ifdef _LP64
7778 7779 push(rscratch1); // save r10, trashed by movptr()
7779 7780 #endif
7780 7781 push(rax); // save rax,
7781 7782 push(reg); // pass register argument
7782 7783 ExternalAddress buffer((address) b);
7783 7784 // avoid using pushptr, as it modifies scratch registers
7784 7785 // and our contract is not to modify anything
7785 7786 movptr(rax, buffer.addr());
7786 7787 push(rax);
7787 7788 // call indirectly to solve generation ordering problem
7788 7789 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7789 7790 call(rax);
7790 7791 // Caller pops the arguments (oop, message) and restores rax, r10
7791 7792 }
7792 7793
7793 7794
7794 7795 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
7795 7796 Register tmp,
7796 7797 int offset) {
7797 7798 intptr_t value = *delayed_value_addr;
7798 7799 if (value != 0)
7799 7800 return RegisterOrConstant(value + offset);
7800 7801
7801 7802 // load indirectly to solve generation ordering problem
7802 7803 movptr(tmp, ExternalAddress((address) delayed_value_addr));
7803 7804
7804 7805 #ifdef ASSERT
7805 7806 { Label L;
7806 7807 testptr(tmp, tmp);
7807 7808 if (WizardMode) {
7808 7809 jcc(Assembler::notZero, L);
7809 7810 char* buf = new char[40];
7810 7811 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
7811 7812 stop(buf);
7812 7813 } else {
7813 7814 jccb(Assembler::notZero, L);
7814 7815 hlt();
7815 7816 }
7816 7817 bind(L);
7817 7818 }
7818 7819 #endif
7819 7820
7820 7821 if (offset != 0)
7821 7822 addptr(tmp, offset);
7822 7823
7823 7824 return RegisterOrConstant(tmp);
7824 7825 }
7825 7826
7826 7827
7827 7828 // registers on entry:
7828 7829 // - rax ('check' register): required MethodType
7829 7830 // - rcx: method handle
7830 7831 // - rdx, rsi, or ?: killable temp
7831 7832 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
7832 7833 Register temp_reg,
7833 7834 Label& wrong_method_type) {
7834 7835 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg));
7835 7836 // compare method type against that of the receiver
7836 7837 if (UseCompressedOops) {
7837 7838 load_heap_oop(temp_reg, type_addr);
7838 7839 cmpptr(mtype_reg, temp_reg);
7839 7840 } else {
7840 7841 cmpptr(mtype_reg, type_addr);
7841 7842 }
7842 7843 jcc(Assembler::notEqual, wrong_method_type);
7843 7844 }
7844 7845
7845 7846
7846 7847 // A method handle has a "vmslots" field which gives the size of its
7847 7848 // argument list in JVM stack slots. This field is either located directly
7848 7849 // in every method handle, or else is indirectly accessed through the
7849 7850 // method handle's MethodType. This macro hides the distinction.
7850 7851 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
7851 7852 Register temp_reg) {
7852 7853 assert_different_registers(vmslots_reg, mh_reg, temp_reg);
7853 7854 // load mh.type.form.vmslots
7854 7855 if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) {
7855 7856 // hoist vmslots into every mh to avoid dependent load chain
7856 7857 movl(vmslots_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg)));
7857 7858 } else {
7858 7859 Register temp2_reg = vmslots_reg;
7859 7860 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)));
7860 7861 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)));
7861 7862 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
7862 7863 }
7863 7864 }
7864 7865
7865 7866
7866 7867 // registers on entry:
7867 7868 // - rcx: method handle
7868 7869 // - rdx: killable temp (interpreted only)
7869 7870 // - rax: killable temp (compiled only)
7870 7871 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
7871 7872 assert(mh_reg == rcx, "caller must put MH object in rcx");
7872 7873 assert_different_registers(mh_reg, temp_reg);
7873 7874
7874 7875 // pick out the interpreted side of the handler
7875 7876 // NOTE: vmentry is not an oop!
7876 7877 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
7877 7878
7878 7879 // off we go...
7879 7880 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
7880 7881
7881 7882 // for the various stubs which take control at this point,
7882 7883 // see MethodHandles::generate_method_handle_stub
7883 7884 }
7884 7885
7885 7886
7886 7887 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
7887 7888 int extra_slot_offset) {
7888 7889 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
7889 7890 int stackElementSize = Interpreter::stackElementSize;
7890 7891 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
7891 7892 #ifdef ASSERT
7892 7893 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
7893 7894 assert(offset1 - offset == stackElementSize, "correct arithmetic");
7894 7895 #endif
7895 7896 Register scale_reg = noreg;
7896 7897 Address::ScaleFactor scale_factor = Address::no_scale;
7897 7898 if (arg_slot.is_constant()) {
7898 7899 offset += arg_slot.as_constant() * stackElementSize;
7899 7900 } else {
7900 7901 scale_reg = arg_slot.as_register();
7901 7902 scale_factor = Address::times(stackElementSize);
7902 7903 }
7903 7904 offset += wordSize; // return PC is on stack
7904 7905 return Address(rsp, scale_reg, scale_factor, offset);
7905 7906 }
7906 7907
7907 7908
7908 7909 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
7909 7910 if (!VerifyOops) return;
7910 7911
7911 7912 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
7912 7913 // Pass register number to verify_oop_subroutine
7913 7914 char* b = new char[strlen(s) + 50];
7914 7915 sprintf(b, "verify_oop_addr: %s", s);
7915 7916
7916 7917 #ifdef _LP64
7917 7918 push(rscratch1); // save r10, trashed by movptr()
7918 7919 #endif
7919 7920 push(rax); // save rax,
7920 7921 // addr may contain rsp so we will have to adjust it based on the push
7921 7922 // we just did
7922 7923 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
7923 7924 // stores rax into addr which is backwards of what was intended.
7924 7925 if (addr.uses(rsp)) {
7925 7926 lea(rax, addr);
7926 7927 pushptr(Address(rax, BytesPerWord));
7927 7928 } else {
7928 7929 pushptr(addr);
7929 7930 }
7930 7931
7931 7932 ExternalAddress buffer((address) b);
7932 7933 // pass msg argument
7933 7934 // avoid using pushptr, as it modifies scratch registers
7934 7935 // and our contract is not to modify anything
7935 7936 movptr(rax, buffer.addr());
7936 7937 push(rax);
7937 7938
7938 7939 // call indirectly to solve generation ordering problem
7939 7940 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
7940 7941 call(rax);
7941 7942 // Caller pops the arguments (addr, message) and restores rax, r10.
7942 7943 }
7943 7944
7944 7945 void MacroAssembler::verify_tlab() {
7945 7946 #ifdef ASSERT
7946 7947 if (UseTLAB && VerifyOops) {
7947 7948 Label next, ok;
7948 7949 Register t1 = rsi;
7949 7950 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
7950 7951
7951 7952 push(t1);
7952 7953 NOT_LP64(push(thread_reg));
7953 7954 NOT_LP64(get_thread(thread_reg));
7954 7955
7955 7956 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7956 7957 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
7957 7958 jcc(Assembler::aboveEqual, next);
7958 7959 stop("assert(top >= start)");
7959 7960 should_not_reach_here();
7960 7961
7961 7962 bind(next);
7962 7963 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
7963 7964 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
7964 7965 jcc(Assembler::aboveEqual, ok);
7965 7966 stop("assert(top <= end)");
7966 7967 should_not_reach_here();
7967 7968
7968 7969 bind(ok);
7969 7970 NOT_LP64(pop(thread_reg));
7970 7971 pop(t1);
7971 7972 }
7972 7973 #endif
7973 7974 }
7974 7975
7975 7976 class ControlWord {
7976 7977 public:
7977 7978 int32_t _value;
7978 7979
7979 7980 int rounding_control() const { return (_value >> 10) & 3 ; }
7980 7981 int precision_control() const { return (_value >> 8) & 3 ; }
7981 7982 bool precision() const { return ((_value >> 5) & 1) != 0; }
7982 7983 bool underflow() const { return ((_value >> 4) & 1) != 0; }
7983 7984 bool overflow() const { return ((_value >> 3) & 1) != 0; }
7984 7985 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
7985 7986 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
7986 7987 bool invalid() const { return ((_value >> 0) & 1) != 0; }
7987 7988
7988 7989 void print() const {
7989 7990 // rounding control
7990 7991 const char* rc;
7991 7992 switch (rounding_control()) {
7992 7993 case 0: rc = "round near"; break;
7993 7994 case 1: rc = "round down"; break;
7994 7995 case 2: rc = "round up "; break;
7995 7996 case 3: rc = "chop "; break;
7996 7997 };
7997 7998 // precision control
7998 7999 const char* pc;
7999 8000 switch (precision_control()) {
8000 8001 case 0: pc = "24 bits "; break;
8001 8002 case 1: pc = "reserved"; break;
8002 8003 case 2: pc = "53 bits "; break;
8003 8004 case 3: pc = "64 bits "; break;
8004 8005 };
8005 8006 // flags
8006 8007 char f[9];
8007 8008 f[0] = ' ';
8008 8009 f[1] = ' ';
8009 8010 f[2] = (precision ()) ? 'P' : 'p';
8010 8011 f[3] = (underflow ()) ? 'U' : 'u';
8011 8012 f[4] = (overflow ()) ? 'O' : 'o';
8012 8013 f[5] = (zero_divide ()) ? 'Z' : 'z';
8013 8014 f[6] = (denormalized()) ? 'D' : 'd';
8014 8015 f[7] = (invalid ()) ? 'I' : 'i';
8015 8016 f[8] = '\x0';
8016 8017 // output
8017 8018 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
8018 8019 }
8019 8020
8020 8021 };
8021 8022
8022 8023 class StatusWord {
8023 8024 public:
8024 8025 int32_t _value;
8025 8026
8026 8027 bool busy() const { return ((_value >> 15) & 1) != 0; }
8027 8028 bool C3() const { return ((_value >> 14) & 1) != 0; }
8028 8029 bool C2() const { return ((_value >> 10) & 1) != 0; }
8029 8030 bool C1() const { return ((_value >> 9) & 1) != 0; }
8030 8031 bool C0() const { return ((_value >> 8) & 1) != 0; }
8031 8032 int top() const { return (_value >> 11) & 7 ; }
8032 8033 bool error_status() const { return ((_value >> 7) & 1) != 0; }
8033 8034 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
8034 8035 bool precision() const { return ((_value >> 5) & 1) != 0; }
8035 8036 bool underflow() const { return ((_value >> 4) & 1) != 0; }
8036 8037 bool overflow() const { return ((_value >> 3) & 1) != 0; }
8037 8038 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
8038 8039 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
8039 8040 bool invalid() const { return ((_value >> 0) & 1) != 0; }
8040 8041
8041 8042 void print() const {
8042 8043 // condition codes
8043 8044 char c[5];
8044 8045 c[0] = (C3()) ? '3' : '-';
8045 8046 c[1] = (C2()) ? '2' : '-';
8046 8047 c[2] = (C1()) ? '1' : '-';
8047 8048 c[3] = (C0()) ? '0' : '-';
8048 8049 c[4] = '\x0';
8049 8050 // flags
8050 8051 char f[9];
8051 8052 f[0] = (error_status()) ? 'E' : '-';
8052 8053 f[1] = (stack_fault ()) ? 'S' : '-';
8053 8054 f[2] = (precision ()) ? 'P' : '-';
8054 8055 f[3] = (underflow ()) ? 'U' : '-';
8055 8056 f[4] = (overflow ()) ? 'O' : '-';
8056 8057 f[5] = (zero_divide ()) ? 'Z' : '-';
8057 8058 f[6] = (denormalized()) ? 'D' : '-';
8058 8059 f[7] = (invalid ()) ? 'I' : '-';
8059 8060 f[8] = '\x0';
8060 8061 // output
8061 8062 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
8062 8063 }
8063 8064
8064 8065 };
8065 8066
8066 8067 class TagWord {
8067 8068 public:
8068 8069 int32_t _value;
8069 8070
8070 8071 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
8071 8072
8072 8073 void print() const {
8073 8074 printf("%04x", _value & 0xFFFF);
8074 8075 }
8075 8076
8076 8077 };
8077 8078
8078 8079 class FPU_Register {
8079 8080 public:
8080 8081 int32_t _m0;
8081 8082 int32_t _m1;
8082 8083 int16_t _ex;
8083 8084
8084 8085 bool is_indefinite() const {
8085 8086 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
8086 8087 }
8087 8088
8088 8089 void print() const {
8089 8090 char sign = (_ex < 0) ? '-' : '+';
8090 8091 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
8091 8092 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
8092 8093 };
8093 8094
8094 8095 };
8095 8096
8096 8097 class FPU_State {
8097 8098 public:
8098 8099 enum {
8099 8100 register_size = 10,
8100 8101 number_of_registers = 8,
8101 8102 register_mask = 7
8102 8103 };
8103 8104
8104 8105 ControlWord _control_word;
8105 8106 StatusWord _status_word;
8106 8107 TagWord _tag_word;
8107 8108 int32_t _error_offset;
8108 8109 int32_t _error_selector;
8109 8110 int32_t _data_offset;
8110 8111 int32_t _data_selector;
8111 8112 int8_t _register[register_size * number_of_registers];
8112 8113
8113 8114 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
8114 8115 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
8115 8116
8116 8117 const char* tag_as_string(int tag) const {
8117 8118 switch (tag) {
8118 8119 case 0: return "valid";
8119 8120 case 1: return "zero";
8120 8121 case 2: return "special";
8121 8122 case 3: return "empty";
8122 8123 }
8123 8124 ShouldNotReachHere();
8124 8125 return NULL;
8125 8126 }
8126 8127
8127 8128 void print() const {
8128 8129 // print computation registers
8129 8130 { int t = _status_word.top();
8130 8131 for (int i = 0; i < number_of_registers; i++) {
8131 8132 int j = (i - t) & register_mask;
8132 8133 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
8133 8134 st(j)->print();
8134 8135 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
8135 8136 }
8136 8137 }
8137 8138 printf("\n");
8138 8139 // print control registers
8139 8140 printf("ctrl = "); _control_word.print(); printf("\n");
8140 8141 printf("stat = "); _status_word .print(); printf("\n");
8141 8142 printf("tags = "); _tag_word .print(); printf("\n");
8142 8143 }
8143 8144
8144 8145 };
8145 8146
8146 8147 class Flag_Register {
8147 8148 public:
8148 8149 int32_t _value;
8149 8150
8150 8151 bool overflow() const { return ((_value >> 11) & 1) != 0; }
8151 8152 bool direction() const { return ((_value >> 10) & 1) != 0; }
8152 8153 bool sign() const { return ((_value >> 7) & 1) != 0; }
8153 8154 bool zero() const { return ((_value >> 6) & 1) != 0; }
8154 8155 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
8155 8156 bool parity() const { return ((_value >> 2) & 1) != 0; }
8156 8157 bool carry() const { return ((_value >> 0) & 1) != 0; }
8157 8158
8158 8159 void print() const {
8159 8160 // flags
8160 8161 char f[8];
8161 8162 f[0] = (overflow ()) ? 'O' : '-';
8162 8163 f[1] = (direction ()) ? 'D' : '-';
8163 8164 f[2] = (sign ()) ? 'S' : '-';
8164 8165 f[3] = (zero ()) ? 'Z' : '-';
8165 8166 f[4] = (auxiliary_carry()) ? 'A' : '-';
8166 8167 f[5] = (parity ()) ? 'P' : '-';
8167 8168 f[6] = (carry ()) ? 'C' : '-';
8168 8169 f[7] = '\x0';
8169 8170 // output
8170 8171 printf("%08x flags = %s", _value, f);
8171 8172 }
8172 8173
8173 8174 };
8174 8175
8175 8176 class IU_Register {
8176 8177 public:
8177 8178 int32_t _value;
8178 8179
8179 8180 void print() const {
8180 8181 printf("%08x %11d", _value, _value);
8181 8182 }
8182 8183
8183 8184 };
8184 8185
8185 8186 class IU_State {
8186 8187 public:
8187 8188 Flag_Register _eflags;
8188 8189 IU_Register _rdi;
8189 8190 IU_Register _rsi;
8190 8191 IU_Register _rbp;
8191 8192 IU_Register _rsp;
8192 8193 IU_Register _rbx;
8193 8194 IU_Register _rdx;
8194 8195 IU_Register _rcx;
8195 8196 IU_Register _rax;
8196 8197
8197 8198 void print() const {
8198 8199 // computation registers
8199 8200 printf("rax, = "); _rax.print(); printf("\n");
8200 8201 printf("rbx, = "); _rbx.print(); printf("\n");
8201 8202 printf("rcx = "); _rcx.print(); printf("\n");
8202 8203 printf("rdx = "); _rdx.print(); printf("\n");
8203 8204 printf("rdi = "); _rdi.print(); printf("\n");
8204 8205 printf("rsi = "); _rsi.print(); printf("\n");
8205 8206 printf("rbp, = "); _rbp.print(); printf("\n");
8206 8207 printf("rsp = "); _rsp.print(); printf("\n");
8207 8208 printf("\n");
8208 8209 // control registers
8209 8210 printf("flgs = "); _eflags.print(); printf("\n");
8210 8211 }
8211 8212 };
8212 8213
8213 8214
8214 8215 class CPU_State {
8215 8216 public:
8216 8217 FPU_State _fpu_state;
8217 8218 IU_State _iu_state;
8218 8219
8219 8220 void print() const {
8220 8221 printf("--------------------------------------------------\n");
8221 8222 _iu_state .print();
8222 8223 printf("\n");
8223 8224 _fpu_state.print();
8224 8225 printf("--------------------------------------------------\n");
8225 8226 }
8226 8227
8227 8228 };
8228 8229
8229 8230
8230 8231 static void _print_CPU_state(CPU_State* state) {
8231 8232 state->print();
8232 8233 };
8233 8234
8234 8235
8235 8236 void MacroAssembler::print_CPU_state() {
8236 8237 push_CPU_state();
8237 8238 push(rsp); // pass CPU state
8238 8239 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
8239 8240 addptr(rsp, wordSize); // discard argument
8240 8241 pop_CPU_state();
8241 8242 }
8242 8243
8243 8244
8244 8245 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
8245 8246 static int counter = 0;
8246 8247 FPU_State* fs = &state->_fpu_state;
8247 8248 counter++;
8248 8249 // For leaf calls, only verify that the top few elements remain empty.
8249 8250 // We only need 1 empty at the top for C2 code.
8250 8251 if( stack_depth < 0 ) {
8251 8252 if( fs->tag_for_st(7) != 3 ) {
8252 8253 printf("FPR7 not empty\n");
8253 8254 state->print();
8254 8255 assert(false, "error");
8255 8256 return false;
8256 8257 }
8257 8258 return true; // All other stack states do not matter
8258 8259 }
8259 8260
8260 8261 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
8261 8262 "bad FPU control word");
8262 8263
8263 8264 // compute stack depth
8264 8265 int i = 0;
8265 8266 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
8266 8267 int d = i;
8267 8268 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
8268 8269 // verify findings
8269 8270 if (i != FPU_State::number_of_registers) {
8270 8271 // stack not contiguous
8271 8272 printf("%s: stack not contiguous at ST%d\n", s, i);
8272 8273 state->print();
8273 8274 assert(false, "error");
8274 8275 return false;
8275 8276 }
8276 8277 // check if computed stack depth corresponds to expected stack depth
8277 8278 if (stack_depth < 0) {
8278 8279 // expected stack depth is -stack_depth or less
8279 8280 if (d > -stack_depth) {
8280 8281 // too many elements on the stack
8281 8282 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
8282 8283 state->print();
8283 8284 assert(false, "error");
8284 8285 return false;
8285 8286 }
8286 8287 } else {
8287 8288 // expected stack depth is stack_depth
8288 8289 if (d != stack_depth) {
8289 8290 // wrong stack depth
8290 8291 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
8291 8292 state->print();
8292 8293 assert(false, "error");
8293 8294 return false;
8294 8295 }
8295 8296 }
8296 8297 // everything is cool
8297 8298 return true;
8298 8299 }
8299 8300
8300 8301
8301 8302 void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
8302 8303 if (!VerifyFPU) return;
8303 8304 push_CPU_state();
8304 8305 push(rsp); // pass CPU state
8305 8306 ExternalAddress msg((address) s);
8306 8307 // pass message string s
8307 8308 pushptr(msg.addr());
8308 8309 push(stack_depth); // pass stack depth
8309 8310 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
8310 8311 addptr(rsp, 3 * wordSize); // discard arguments
8311 8312 // check for error
8312 8313 { Label L;
8313 8314 testl(rax, rax);
8314 8315 jcc(Assembler::notZero, L);
8315 8316 int3(); // break if error condition
8316 8317 bind(L);
8317 8318 }
8318 8319 pop_CPU_state();
8319 8320 }
8320 8321
8321 8322 void MacroAssembler::load_klass(Register dst, Register src) {
8322 8323 #ifdef _LP64
8323 8324 if (UseCompressedOops) {
8324 8325 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8325 8326 decode_heap_oop_not_null(dst);
8326 8327 } else
8327 8328 #endif
8328 8329 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8329 8330 }
8330 8331
8331 8332 void MacroAssembler::load_prototype_header(Register dst, Register src) {
8332 8333 #ifdef _LP64
8333 8334 if (UseCompressedOops) {
8334 8335 assert (Universe::heap() != NULL, "java heap should be initialized");
8335 8336 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8336 8337 if (Universe::narrow_oop_shift() != 0) {
8337 8338 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8338 8339 if (LogMinObjAlignmentInBytes == Address::times_8) {
8339 8340 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8340 8341 } else {
8341 8342 // OK to use shift since we don't need to preserve flags.
8342 8343 shlq(dst, LogMinObjAlignmentInBytes);
8343 8344 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8344 8345 }
8345 8346 } else {
8346 8347 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8347 8348 }
8348 8349 } else
8349 8350 #endif
8350 8351 {
8351 8352 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
8352 8353 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
8353 8354 }
8354 8355 }
8355 8356
8356 8357 void MacroAssembler::store_klass(Register dst, Register src) {
8357 8358 #ifdef _LP64
8358 8359 if (UseCompressedOops) {
8359 8360 encode_heap_oop_not_null(src);
8360 8361 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8361 8362 } else
8362 8363 #endif
8363 8364 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
8364 8365 }
8365 8366
8366 8367 void MacroAssembler::load_heap_oop(Register dst, Address src) {
8367 8368 #ifdef _LP64
8368 8369 if (UseCompressedOops) {
8369 8370 movl(dst, src);
8370 8371 decode_heap_oop(dst);
8371 8372 } else
8372 8373 #endif
8373 8374 movptr(dst, src);
8374 8375 }
8375 8376
8376 8377 void MacroAssembler::store_heap_oop(Address dst, Register src) {
8377 8378 #ifdef _LP64
8378 8379 if (UseCompressedOops) {
8379 8380 assert(!dst.uses(src), "not enough registers");
8380 8381 encode_heap_oop(src);
8381 8382 movl(dst, src);
8382 8383 } else
8383 8384 #endif
8384 8385 movptr(dst, src);
8385 8386 }
8386 8387
8387 8388 // Used for storing NULLs.
8388 8389 void MacroAssembler::store_heap_oop_null(Address dst) {
8389 8390 #ifdef _LP64
8390 8391 if (UseCompressedOops) {
8391 8392 movl(dst, (int32_t)NULL_WORD);
8392 8393 } else {
8393 8394 movslq(dst, (int32_t)NULL_WORD);
8394 8395 }
8395 8396 #else
8396 8397 movl(dst, (int32_t)NULL_WORD);
8397 8398 #endif
8398 8399 }
8399 8400
8400 8401 #ifdef _LP64
8401 8402 void MacroAssembler::store_klass_gap(Register dst, Register src) {
8402 8403 if (UseCompressedOops) {
8403 8404 // Store to klass gap in destination
8404 8405 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
8405 8406 }
8406 8407 }
8407 8408
8408 8409 #ifdef ASSERT
8409 8410 void MacroAssembler::verify_heapbase(const char* msg) {
8410 8411 assert (UseCompressedOops, "should be compressed");
8411 8412 assert (Universe::heap() != NULL, "java heap should be initialized");
8412 8413 if (CheckCompressedOops) {
8413 8414 Label ok;
8414 8415 push(rscratch1); // cmpptr trashes rscratch1
8415 8416 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8416 8417 jcc(Assembler::equal, ok);
8417 8418 stop(msg);
8418 8419 bind(ok);
8419 8420 pop(rscratch1);
8420 8421 }
8421 8422 }
8422 8423 #endif
8423 8424
8424 8425 // Algorithm must match oop.inline.hpp encode_heap_oop.
8425 8426 void MacroAssembler::encode_heap_oop(Register r) {
8426 8427 #ifdef ASSERT
8427 8428 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
8428 8429 #endif
8429 8430 verify_oop(r, "broken oop in encode_heap_oop");
8430 8431 if (Universe::narrow_oop_base() == NULL) {
8431 8432 if (Universe::narrow_oop_shift() != 0) {
8432 8433 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8433 8434 shrq(r, LogMinObjAlignmentInBytes);
8434 8435 }
8435 8436 return;
8436 8437 }
8437 8438 testq(r, r);
8438 8439 cmovq(Assembler::equal, r, r12_heapbase);
8439 8440 subq(r, r12_heapbase);
8440 8441 shrq(r, LogMinObjAlignmentInBytes);
8441 8442 }
8442 8443
8443 8444 void MacroAssembler::encode_heap_oop_not_null(Register r) {
8444 8445 #ifdef ASSERT
8445 8446 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
8446 8447 if (CheckCompressedOops) {
8447 8448 Label ok;
8448 8449 testq(r, r);
8449 8450 jcc(Assembler::notEqual, ok);
8450 8451 stop("null oop passed to encode_heap_oop_not_null");
8451 8452 bind(ok);
8452 8453 }
8453 8454 #endif
8454 8455 verify_oop(r, "broken oop in encode_heap_oop_not_null");
8455 8456 if (Universe::narrow_oop_base() != NULL) {
8456 8457 subq(r, r12_heapbase);
8457 8458 }
8458 8459 if (Universe::narrow_oop_shift() != 0) {
8459 8460 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8460 8461 shrq(r, LogMinObjAlignmentInBytes);
8461 8462 }
8462 8463 }
8463 8464
8464 8465 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
8465 8466 #ifdef ASSERT
8466 8467 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
8467 8468 if (CheckCompressedOops) {
8468 8469 Label ok;
8469 8470 testq(src, src);
8470 8471 jcc(Assembler::notEqual, ok);
8471 8472 stop("null oop passed to encode_heap_oop_not_null2");
8472 8473 bind(ok);
8473 8474 }
8474 8475 #endif
8475 8476 verify_oop(src, "broken oop in encode_heap_oop_not_null2");
8476 8477 if (dst != src) {
8477 8478 movq(dst, src);
8478 8479 }
8479 8480 if (Universe::narrow_oop_base() != NULL) {
8480 8481 subq(dst, r12_heapbase);
8481 8482 }
8482 8483 if (Universe::narrow_oop_shift() != 0) {
8483 8484 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8484 8485 shrq(dst, LogMinObjAlignmentInBytes);
8485 8486 }
8486 8487 }
8487 8488
8488 8489 void MacroAssembler::decode_heap_oop(Register r) {
8489 8490 #ifdef ASSERT
8490 8491 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
8491 8492 #endif
8492 8493 if (Universe::narrow_oop_base() == NULL) {
8493 8494 if (Universe::narrow_oop_shift() != 0) {
8494 8495 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8495 8496 shlq(r, LogMinObjAlignmentInBytes);
8496 8497 }
8497 8498 } else {
8498 8499 Label done;
8499 8500 shlq(r, LogMinObjAlignmentInBytes);
8500 8501 jccb(Assembler::equal, done);
8501 8502 addq(r, r12_heapbase);
8502 8503 bind(done);
8503 8504 }
8504 8505 verify_oop(r, "broken oop in decode_heap_oop");
8505 8506 }
8506 8507
8507 8508 void MacroAssembler::decode_heap_oop_not_null(Register r) {
8508 8509 // Note: it will change flags
8509 8510 assert (UseCompressedOops, "should only be used for compressed headers");
8510 8511 assert (Universe::heap() != NULL, "java heap should be initialized");
8511 8512 // Cannot assert, unverified entry point counts instructions (see .ad file)
8512 8513 // vtableStubs also counts instructions in pd_code_size_limit.
8513 8514 // Also do not verify_oop as this is called by verify_oop.
8514 8515 if (Universe::narrow_oop_shift() != 0) {
8515 8516 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8516 8517 shlq(r, LogMinObjAlignmentInBytes);
8517 8518 if (Universe::narrow_oop_base() != NULL) {
8518 8519 addq(r, r12_heapbase);
8519 8520 }
8520 8521 } else {
8521 8522 assert (Universe::narrow_oop_base() == NULL, "sanity");
8522 8523 }
8523 8524 }
8524 8525
8525 8526 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
8526 8527 // Note: it will change flags
8527 8528 assert (UseCompressedOops, "should only be used for compressed headers");
8528 8529 assert (Universe::heap() != NULL, "java heap should be initialized");
8529 8530 // Cannot assert, unverified entry point counts instructions (see .ad file)
8530 8531 // vtableStubs also counts instructions in pd_code_size_limit.
8531 8532 // Also do not verify_oop as this is called by verify_oop.
8532 8533 if (Universe::narrow_oop_shift() != 0) {
8533 8534 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
8534 8535 if (LogMinObjAlignmentInBytes == Address::times_8) {
8535 8536 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
8536 8537 } else {
8537 8538 if (dst != src) {
8538 8539 movq(dst, src);
8539 8540 }
8540 8541 shlq(dst, LogMinObjAlignmentInBytes);
8541 8542 if (Universe::narrow_oop_base() != NULL) {
8542 8543 addq(dst, r12_heapbase);
8543 8544 }
8544 8545 }
8545 8546 } else {
8546 8547 assert (Universe::narrow_oop_base() == NULL, "sanity");
8547 8548 if (dst != src) {
8548 8549 movq(dst, src);
8549 8550 }
8550 8551 }
8551 8552 }
8552 8553
8553 8554 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
8554 8555 assert (UseCompressedOops, "should only be used for compressed headers");
8555 8556 assert (Universe::heap() != NULL, "java heap should be initialized");
8556 8557 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8557 8558 int oop_index = oop_recorder()->find_index(obj);
8558 8559 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8559 8560 mov_narrow_oop(dst, oop_index, rspec);
8560 8561 }
8561 8562
8562 8563 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
8563 8564 assert (UseCompressedOops, "should only be used for compressed headers");
8564 8565 assert (Universe::heap() != NULL, "java heap should be initialized");
8565 8566 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8566 8567 int oop_index = oop_recorder()->find_index(obj);
8567 8568 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8568 8569 mov_narrow_oop(dst, oop_index, rspec);
8569 8570 }
8570 8571
8571 8572 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
8572 8573 assert (UseCompressedOops, "should only be used for compressed headers");
8573 8574 assert (Universe::heap() != NULL, "java heap should be initialized");
8574 8575 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8575 8576 int oop_index = oop_recorder()->find_index(obj);
8576 8577 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8577 8578 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8578 8579 }
8579 8580
8580 8581 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
8581 8582 assert (UseCompressedOops, "should only be used for compressed headers");
8582 8583 assert (Universe::heap() != NULL, "java heap should be initialized");
8583 8584 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
8584 8585 int oop_index = oop_recorder()->find_index(obj);
8585 8586 RelocationHolder rspec = oop_Relocation::spec(oop_index);
8586 8587 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
8587 8588 }
8588 8589
8589 8590 void MacroAssembler::reinit_heapbase() {
8590 8591 if (UseCompressedOops) {
8591 8592 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
8592 8593 }
8593 8594 }
8594 8595 #endif // _LP64
8595 8596
8596 8597 // IndexOf for constant substrings with size >= 8 chars
8597 8598 // which don't need to be loaded through stack.
8598 8599 void MacroAssembler::string_indexofC8(Register str1, Register str2,
8599 8600 Register cnt1, Register cnt2,
8600 8601 int int_cnt2, Register result,
8601 8602 XMMRegister vec, Register tmp) {
8602 8603 assert(UseSSE42Intrinsics, "SSE4.2 is required");
8603 8604
8604 8605 // This method uses pcmpestri inxtruction with bound registers
8605 8606 // inputs:
8606 8607 // xmm - substring
8607 8608 // rax - substring length (elements count)
8608 8609 // mem - scanned string
8609 8610 // rdx - string length (elements count)
8610 8611 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
8611 8612 // outputs:
8612 8613 // rcx - matched index in string
8613 8614 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8614 8615
8615 8616 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
8616 8617 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
8617 8618 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
8618 8619
8619 8620 // Note, inline_string_indexOf() generates checks:
8620 8621 // if (substr.count > string.count) return -1;
8621 8622 // if (substr.count == 0) return 0;
8622 8623 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
8623 8624
8624 8625 // Load substring.
8625 8626 movdqu(vec, Address(str2, 0));
8626 8627 movl(cnt2, int_cnt2);
8627 8628 movptr(result, str1); // string addr
8628 8629
8629 8630 if (int_cnt2 > 8) {
8630 8631 jmpb(SCAN_TO_SUBSTR);
8631 8632
8632 8633 // Reload substr for rescan, this code
8633 8634 // is executed only for large substrings (> 8 chars)
8634 8635 bind(RELOAD_SUBSTR);
8635 8636 movdqu(vec, Address(str2, 0));
8636 8637 negptr(cnt2); // Jumped here with negative cnt2, convert to positive
8637 8638
8638 8639 bind(RELOAD_STR);
8639 8640 // We came here after the beginning of the substring was
8640 8641 // matched but the rest of it was not so we need to search
8641 8642 // again. Start from the next element after the previous match.
8642 8643
8643 8644 // cnt2 is number of substring reminding elements and
8644 8645 // cnt1 is number of string reminding elements when cmp failed.
8645 8646 // Restored cnt1 = cnt1 - cnt2 + int_cnt2
8646 8647 subl(cnt1, cnt2);
8647 8648 addl(cnt1, int_cnt2);
8648 8649 movl(cnt2, int_cnt2); // Now restore cnt2
8649 8650
8650 8651 decrementl(cnt1); // Shift to next element
8651 8652 cmpl(cnt1, cnt2);
8652 8653 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
8653 8654
8654 8655 addptr(result, 2);
8655 8656
8656 8657 } // (int_cnt2 > 8)
8657 8658
8658 8659 // Scan string for start of substr in 16-byte vectors
8659 8660 bind(SCAN_TO_SUBSTR);
8660 8661 pcmpestri(vec, Address(result, 0), 0x0d);
8661 8662 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
8662 8663 subl(cnt1, 8);
8663 8664 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
8664 8665 cmpl(cnt1, cnt2);
8665 8666 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
8666 8667 addptr(result, 16);
8667 8668 jmpb(SCAN_TO_SUBSTR);
8668 8669
8669 8670 // Found a potential substr
8670 8671 bind(FOUND_CANDIDATE);
8671 8672 // Matched whole vector if first element matched (tmp(rcx) == 0).
8672 8673 if (int_cnt2 == 8) {
8673 8674 jccb(Assembler::overflow, RET_FOUND); // OF == 1
8674 8675 } else { // int_cnt2 > 8
8675 8676 jccb(Assembler::overflow, FOUND_SUBSTR);
8676 8677 }
8677 8678 // After pcmpestri tmp(rcx) contains matched element index
8678 8679 // Compute start addr of substr
8679 8680 lea(result, Address(result, tmp, Address::times_2));
8680 8681
8681 8682 // Make sure string is still long enough
8682 8683 subl(cnt1, tmp);
8683 8684 cmpl(cnt1, cnt2);
8684 8685 if (int_cnt2 == 8) {
8685 8686 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
8686 8687 } else { // int_cnt2 > 8
8687 8688 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
8688 8689 }
8689 8690 // Left less then substring.
8690 8691
8691 8692 bind(RET_NOT_FOUND);
8692 8693 movl(result, -1);
8693 8694 jmpb(EXIT);
8694 8695
8695 8696 if (int_cnt2 > 8) {
8696 8697 // This code is optimized for the case when whole substring
8697 8698 // is matched if its head is matched.
8698 8699 bind(MATCH_SUBSTR_HEAD);
8699 8700 pcmpestri(vec, Address(result, 0), 0x0d);
8700 8701 // Reload only string if does not match
8701 8702 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
8702 8703
8703 8704 Label CONT_SCAN_SUBSTR;
8704 8705 // Compare the rest of substring (> 8 chars).
8705 8706 bind(FOUND_SUBSTR);
8706 8707 // First 8 chars are already matched.
8707 8708 negptr(cnt2);
8708 8709 addptr(cnt2, 8);
8709 8710
8710 8711 bind(SCAN_SUBSTR);
8711 8712 subl(cnt1, 8);
8712 8713 cmpl(cnt2, -8); // Do not read beyond substring
8713 8714 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
8714 8715 // Back-up strings to avoid reading beyond substring:
8715 8716 // cnt1 = cnt1 - cnt2 + 8
8716 8717 addl(cnt1, cnt2); // cnt2 is negative
8717 8718 addl(cnt1, 8);
8718 8719 movl(cnt2, 8); negptr(cnt2);
8719 8720 bind(CONT_SCAN_SUBSTR);
8720 8721 if (int_cnt2 < (int)G) {
8721 8722 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
8722 8723 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
8723 8724 } else {
8724 8725 // calculate index in register to avoid integer overflow (int_cnt2*2)
8725 8726 movl(tmp, int_cnt2);
8726 8727 addptr(tmp, cnt2);
8727 8728 movdqu(vec, Address(str2, tmp, Address::times_2, 0));
8728 8729 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
8729 8730 }
8730 8731 // Need to reload strings pointers if not matched whole vector
8731 8732 jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
8732 8733 addptr(cnt2, 8);
8733 8734 jccb(Assembler::negative, SCAN_SUBSTR);
8734 8735 // Fall through if found full substring
8735 8736
8736 8737 } // (int_cnt2 > 8)
8737 8738
8738 8739 bind(RET_FOUND);
8739 8740 // Found result if we matched full small substring.
8740 8741 // Compute substr offset
8741 8742 subptr(result, str1);
8742 8743 shrl(result, 1); // index
8743 8744 bind(EXIT);
8744 8745
8745 8746 } // string_indexofC8
8746 8747
8747 8748 // Small strings are loaded through stack if they cross page boundary.
8748 8749 void MacroAssembler::string_indexof(Register str1, Register str2,
8749 8750 Register cnt1, Register cnt2,
8750 8751 int int_cnt2, Register result,
8751 8752 XMMRegister vec, Register tmp) {
8752 8753 assert(UseSSE42Intrinsics, "SSE4.2 is required");
8753 8754 //
8754 8755 // int_cnt2 is length of small (< 8 chars) constant substring
8755 8756 // or (-1) for non constant substring in which case its length
8756 8757 // is in cnt2 register.
8757 8758 //
8758 8759 // Note, inline_string_indexOf() generates checks:
8759 8760 // if (substr.count > string.count) return -1;
8760 8761 // if (substr.count == 0) return 0;
8761 8762 //
8762 8763 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
8763 8764
8764 8765 // This method uses pcmpestri inxtruction with bound registers
8765 8766 // inputs:
8766 8767 // xmm - substring
8767 8768 // rax - substring length (elements count)
8768 8769 // mem - scanned string
8769 8770 // rdx - string length (elements count)
8770 8771 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
8771 8772 // outputs:
8772 8773 // rcx - matched index in string
8773 8774 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8774 8775
8775 8776 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
8776 8777 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
8777 8778 FOUND_CANDIDATE;
8778 8779
8779 8780 { //========================================================
8780 8781 // We don't know where these strings are located
8781 8782 // and we can't read beyond them. Load them through stack.
8782 8783 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
8783 8784
8784 8785 movptr(tmp, rsp); // save old SP
8785 8786
8786 8787 if (int_cnt2 > 0) { // small (< 8 chars) constant substring
8787 8788 if (int_cnt2 == 1) { // One char
8788 8789 load_unsigned_short(result, Address(str2, 0));
8789 8790 movdl(vec, result); // move 32 bits
8790 8791 } else if (int_cnt2 == 2) { // Two chars
8791 8792 movdl(vec, Address(str2, 0)); // move 32 bits
8792 8793 } else if (int_cnt2 == 4) { // Four chars
8793 8794 movq(vec, Address(str2, 0)); // move 64 bits
8794 8795 } else { // cnt2 = { 3, 5, 6, 7 }
8795 8796 // Array header size is 12 bytes in 32-bit VM
8796 8797 // + 6 bytes for 3 chars == 18 bytes,
8797 8798 // enough space to load vec and shift.
8798 8799 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity");
8799 8800 movdqu(vec, Address(str2, (int_cnt2*2)-16));
8800 8801 psrldq(vec, 16-(int_cnt2*2));
8801 8802 }
8802 8803 } else { // not constant substring
8803 8804 cmpl(cnt2, 8);
8804 8805 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
8805 8806
8806 8807 // We can read beyond string if srt+16 does not cross page boundary
8807 8808 // since heaps are aligned and mapped by pages.
8808 8809 assert(os::vm_page_size() < (int)G, "default page should be small");
8809 8810 movl(result, str2); // We need only low 32 bits
8810 8811 andl(result, (os::vm_page_size()-1));
8811 8812 cmpl(result, (os::vm_page_size()-16));
8812 8813 jccb(Assembler::belowEqual, CHECK_STR);
8813 8814
8814 8815 // Move small strings to stack to allow load 16 bytes into vec.
8815 8816 subptr(rsp, 16);
8816 8817 int stk_offset = wordSize-2;
8817 8818 push(cnt2);
8818 8819
8819 8820 bind(COPY_SUBSTR);
8820 8821 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
8821 8822 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
8822 8823 decrement(cnt2);
8823 8824 jccb(Assembler::notZero, COPY_SUBSTR);
8824 8825
8825 8826 pop(cnt2);
8826 8827 movptr(str2, rsp); // New substring address
8827 8828 } // non constant
8828 8829
8829 8830 bind(CHECK_STR);
8830 8831 cmpl(cnt1, 8);
8831 8832 jccb(Assembler::aboveEqual, BIG_STRINGS);
8832 8833
8833 8834 // Check cross page boundary.
8834 8835 movl(result, str1); // We need only low 32 bits
8835 8836 andl(result, (os::vm_page_size()-1));
8836 8837 cmpl(result, (os::vm_page_size()-16));
8837 8838 jccb(Assembler::belowEqual, BIG_STRINGS);
8838 8839
8839 8840 subptr(rsp, 16);
8840 8841 int stk_offset = -2;
8841 8842 if (int_cnt2 < 0) { // not constant
8842 8843 push(cnt2);
8843 8844 stk_offset += wordSize;
8844 8845 }
8845 8846 movl(cnt2, cnt1);
8846 8847
8847 8848 bind(COPY_STR);
8848 8849 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
8849 8850 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
8850 8851 decrement(cnt2);
8851 8852 jccb(Assembler::notZero, COPY_STR);
8852 8853
8853 8854 if (int_cnt2 < 0) { // not constant
8854 8855 pop(cnt2);
8855 8856 }
8856 8857 movptr(str1, rsp); // New string address
8857 8858
8858 8859 bind(BIG_STRINGS);
8859 8860 // Load substring.
8860 8861 if (int_cnt2 < 0) { // -1
8861 8862 movdqu(vec, Address(str2, 0));
8862 8863 push(cnt2); // substr count
8863 8864 push(str2); // substr addr
8864 8865 push(str1); // string addr
8865 8866 } else {
8866 8867 // Small (< 8 chars) constant substrings are loaded already.
8867 8868 movl(cnt2, int_cnt2);
8868 8869 }
8869 8870 push(tmp); // original SP
8870 8871
8871 8872 } // Finished loading
8872 8873
8873 8874 //========================================================
8874 8875 // Start search
8875 8876 //
8876 8877
8877 8878 movptr(result, str1); // string addr
8878 8879
8879 8880 if (int_cnt2 < 0) { // Only for non constant substring
8880 8881 jmpb(SCAN_TO_SUBSTR);
8881 8882
8882 8883 // SP saved at sp+0
8883 8884 // String saved at sp+1*wordSize
8884 8885 // Substr saved at sp+2*wordSize
8885 8886 // Substr count saved at sp+3*wordSize
8886 8887
8887 8888 // Reload substr for rescan, this code
8888 8889 // is executed only for large substrings (> 8 chars)
8889 8890 bind(RELOAD_SUBSTR);
8890 8891 movptr(str2, Address(rsp, 2*wordSize));
8891 8892 movl(cnt2, Address(rsp, 3*wordSize));
8892 8893 movdqu(vec, Address(str2, 0));
8893 8894 // We came here after the beginning of the substring was
8894 8895 // matched but the rest of it was not so we need to search
8895 8896 // again. Start from the next element after the previous match.
8896 8897 subptr(str1, result); // Restore counter
8897 8898 shrl(str1, 1);
8898 8899 addl(cnt1, str1);
8899 8900 decrementl(cnt1); // Shift to next element
8900 8901 cmpl(cnt1, cnt2);
8901 8902 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
8902 8903
8903 8904 addptr(result, 2);
8904 8905 } // non constant
8905 8906
8906 8907 // Scan string for start of substr in 16-byte vectors
8907 8908 bind(SCAN_TO_SUBSTR);
8908 8909 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
8909 8910 pcmpestri(vec, Address(result, 0), 0x0d);
8910 8911 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
8911 8912 subl(cnt1, 8);
8912 8913 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
8913 8914 cmpl(cnt1, cnt2);
8914 8915 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
8915 8916 addptr(result, 16);
8916 8917
8917 8918 bind(ADJUST_STR);
8918 8919 cmpl(cnt1, 8); // Do not read beyond string
8919 8920 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
8920 8921 // Back-up string to avoid reading beyond string.
8921 8922 lea(result, Address(result, cnt1, Address::times_2, -16));
8922 8923 movl(cnt1, 8);
8923 8924 jmpb(SCAN_TO_SUBSTR);
8924 8925
8925 8926 // Found a potential substr
8926 8927 bind(FOUND_CANDIDATE);
8927 8928 // After pcmpestri tmp(rcx) contains matched element index
8928 8929
8929 8930 // Make sure string is still long enough
8930 8931 subl(cnt1, tmp);
8931 8932 cmpl(cnt1, cnt2);
8932 8933 jccb(Assembler::greaterEqual, FOUND_SUBSTR);
8933 8934 // Left less then substring.
8934 8935
8935 8936 bind(RET_NOT_FOUND);
8936 8937 movl(result, -1);
8937 8938 jmpb(CLEANUP);
8938 8939
8939 8940 bind(FOUND_SUBSTR);
8940 8941 // Compute start addr of substr
8941 8942 lea(result, Address(result, tmp, Address::times_2));
8942 8943
8943 8944 if (int_cnt2 > 0) { // Constant substring
8944 8945 // Repeat search for small substring (< 8 chars)
8945 8946 // from new point without reloading substring.
8946 8947 // Have to check that we don't read beyond string.
8947 8948 cmpl(tmp, 8-int_cnt2);
8948 8949 jccb(Assembler::greater, ADJUST_STR);
8949 8950 // Fall through if matched whole substring.
8950 8951 } else { // non constant
8951 8952 assert(int_cnt2 == -1, "should be != 0");
8952 8953
8953 8954 addl(tmp, cnt2);
8954 8955 // Found result if we matched whole substring.
8955 8956 cmpl(tmp, 8);
8956 8957 jccb(Assembler::lessEqual, RET_FOUND);
8957 8958
8958 8959 // Repeat search for small substring (<= 8 chars)
8959 8960 // from new point 'str1' without reloading substring.
8960 8961 cmpl(cnt2, 8);
8961 8962 // Have to check that we don't read beyond string.
8962 8963 jccb(Assembler::lessEqual, ADJUST_STR);
8963 8964
8964 8965 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
8965 8966 // Compare the rest of substring (> 8 chars).
8966 8967 movptr(str1, result);
8967 8968
8968 8969 cmpl(tmp, cnt2);
8969 8970 // First 8 chars are already matched.
8970 8971 jccb(Assembler::equal, CHECK_NEXT);
8971 8972
8972 8973 bind(SCAN_SUBSTR);
8973 8974 pcmpestri(vec, Address(str1, 0), 0x0d);
8974 8975 // Need to reload strings pointers if not matched whole vector
8975 8976 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
8976 8977
8977 8978 bind(CHECK_NEXT);
8978 8979 subl(cnt2, 8);
8979 8980 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
8980 8981 addptr(str1, 16);
8981 8982 addptr(str2, 16);
8982 8983 subl(cnt1, 8);
8983 8984 cmpl(cnt2, 8); // Do not read beyond substring
8984 8985 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
8985 8986 // Back-up strings to avoid reading beyond substring.
8986 8987 lea(str2, Address(str2, cnt2, Address::times_2, -16));
8987 8988 lea(str1, Address(str1, cnt2, Address::times_2, -16));
8988 8989 subl(cnt1, cnt2);
8989 8990 movl(cnt2, 8);
8990 8991 addl(cnt1, 8);
8991 8992 bind(CONT_SCAN_SUBSTR);
8992 8993 movdqu(vec, Address(str2, 0));
8993 8994 jmpb(SCAN_SUBSTR);
8994 8995
8995 8996 bind(RET_FOUND_LONG);
8996 8997 movptr(str1, Address(rsp, wordSize));
8997 8998 } // non constant
8998 8999
8999 9000 bind(RET_FOUND);
9000 9001 // Compute substr offset
9001 9002 subptr(result, str1);
9002 9003 shrl(result, 1); // index
9003 9004
9004 9005 bind(CLEANUP);
9005 9006 pop(rsp); // restore SP
9006 9007
9007 9008 } // string_indexof
9008 9009
9009 9010 // Compare strings.
9010 9011 void MacroAssembler::string_compare(Register str1, Register str2,
9011 9012 Register cnt1, Register cnt2, Register result,
9012 9013 XMMRegister vec1) {
9013 9014 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
9014 9015
9015 9016 // Compute the minimum of the string lengths and the
9016 9017 // difference of the string lengths (stack).
9017 9018 // Do the conditional move stuff
9018 9019 movl(result, cnt1);
9019 9020 subl(cnt1, cnt2);
9020 9021 push(cnt1);
9021 9022 if (VM_Version::supports_cmov()) {
9022 9023 cmovl(Assembler::lessEqual, cnt2, result);
9023 9024 } else {
9024 9025 Label GT_LABEL;
9025 9026 jccb(Assembler::greater, GT_LABEL);
9026 9027 movl(cnt2, result);
9027 9028 bind(GT_LABEL);
9028 9029 }
9029 9030
9030 9031 // Is the minimum length zero?
9031 9032 testl(cnt2, cnt2);
9032 9033 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
9033 9034
9034 9035 // Load first characters
9035 9036 load_unsigned_short(result, Address(str1, 0));
9036 9037 load_unsigned_short(cnt1, Address(str2, 0));
9037 9038
9038 9039 // Compare first characters
9039 9040 subl(result, cnt1);
9040 9041 jcc(Assembler::notZero, POP_LABEL);
9041 9042 decrementl(cnt2);
9042 9043 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
9043 9044
9044 9045 {
9045 9046 // Check after comparing first character to see if strings are equivalent
9046 9047 Label LSkip2;
9047 9048 // Check if the strings start at same location
9048 9049 cmpptr(str1, str2);
9049 9050 jccb(Assembler::notEqual, LSkip2);
9050 9051
9051 9052 // Check if the length difference is zero (from stack)
9052 9053 cmpl(Address(rsp, 0), 0x0);
9053 9054 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
9054 9055
9055 9056 // Strings might not be equivalent
9056 9057 bind(LSkip2);
9057 9058 }
9058 9059
9059 9060 Address::ScaleFactor scale = Address::times_2;
9060 9061 int stride = 8;
9061 9062
9062 9063 // Advance to next element
9063 9064 addptr(str1, 16/stride);
9064 9065 addptr(str2, 16/stride);
9065 9066
9066 9067 if (UseSSE42Intrinsics) {
9067 9068 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
9068 9069 int pcmpmask = 0x19;
9069 9070 // Setup to compare 16-byte vectors
9070 9071 movl(result, cnt2);
9071 9072 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
9072 9073 jccb(Assembler::zero, COMPARE_TAIL);
9073 9074
9074 9075 lea(str1, Address(str1, result, scale));
9075 9076 lea(str2, Address(str2, result, scale));
9076 9077 negptr(result);
9077 9078
9078 9079 // pcmpestri
9079 9080 // inputs:
9080 9081 // vec1- substring
9081 9082 // rax - negative string length (elements count)
9082 9083 // mem - scaned string
9083 9084 // rdx - string length (elements count)
9084 9085 // pcmpmask - cmp mode: 11000 (string compare with negated result)
9085 9086 // + 00 (unsigned bytes) or + 01 (unsigned shorts)
9086 9087 // outputs:
9087 9088 // rcx - first mismatched element index
9088 9089 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
9089 9090
9090 9091 bind(COMPARE_WIDE_VECTORS);
9091 9092 movdqu(vec1, Address(str1, result, scale));
9092 9093 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
9093 9094 // After pcmpestri cnt1(rcx) contains mismatched element index
9094 9095
9095 9096 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1
9096 9097 addptr(result, stride);
9097 9098 subptr(cnt2, stride);
9098 9099 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
9099 9100
9100 9101 // compare wide vectors tail
9101 9102 testl(result, result);
9102 9103 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
9103 9104
9104 9105 movl(cnt2, stride);
9105 9106 movl(result, stride);
9106 9107 negptr(result);
9107 9108 movdqu(vec1, Address(str1, result, scale));
9108 9109 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
9109 9110 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
9110 9111
9111 9112 // Mismatched characters in the vectors
9112 9113 bind(VECTOR_NOT_EQUAL);
9113 9114 addptr(result, cnt1);
9114 9115 movptr(cnt2, result);
9115 9116 load_unsigned_short(result, Address(str1, cnt2, scale));
9116 9117 load_unsigned_short(cnt1, Address(str2, cnt2, scale));
9117 9118 subl(result, cnt1);
9118 9119 jmpb(POP_LABEL);
9119 9120
9120 9121 bind(COMPARE_TAIL); // limit is zero
9121 9122 movl(cnt2, result);
9122 9123 // Fallthru to tail compare
9123 9124 }
9124 9125
9125 9126 // Shift str2 and str1 to the end of the arrays, negate min
9126 9127 lea(str1, Address(str1, cnt2, scale, 0));
9127 9128 lea(str2, Address(str2, cnt2, scale, 0));
9128 9129 negptr(cnt2);
9129 9130
9130 9131 // Compare the rest of the elements
9131 9132 bind(WHILE_HEAD_LABEL);
9132 9133 load_unsigned_short(result, Address(str1, cnt2, scale, 0));
9133 9134 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
9134 9135 subl(result, cnt1);
9135 9136 jccb(Assembler::notZero, POP_LABEL);
9136 9137 increment(cnt2);
9137 9138 jccb(Assembler::notZero, WHILE_HEAD_LABEL);
9138 9139
9139 9140 // Strings are equal up to min length. Return the length difference.
9140 9141 bind(LENGTH_DIFF_LABEL);
9141 9142 pop(result);
9142 9143 jmpb(DONE_LABEL);
9143 9144
9144 9145 // Discard the stored length difference
9145 9146 bind(POP_LABEL);
9146 9147 pop(cnt1);
9147 9148
9148 9149 // That's it
9149 9150 bind(DONE_LABEL);
9150 9151 }
9151 9152
9152 9153 // Compare char[] arrays aligned to 4 bytes or substrings.
9153 9154 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
9154 9155 Register limit, Register result, Register chr,
9155 9156 XMMRegister vec1, XMMRegister vec2) {
9156 9157 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
9157 9158
9158 9159 int length_offset = arrayOopDesc::length_offset_in_bytes();
9159 9160 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
9160 9161
9161 9162 // Check the input args
9162 9163 cmpptr(ary1, ary2);
9163 9164 jcc(Assembler::equal, TRUE_LABEL);
9164 9165
9165 9166 if (is_array_equ) {
9166 9167 // Need additional checks for arrays_equals.
9167 9168 testptr(ary1, ary1);
9168 9169 jcc(Assembler::zero, FALSE_LABEL);
9169 9170 testptr(ary2, ary2);
9170 9171 jcc(Assembler::zero, FALSE_LABEL);
9171 9172
9172 9173 // Check the lengths
9173 9174 movl(limit, Address(ary1, length_offset));
9174 9175 cmpl(limit, Address(ary2, length_offset));
9175 9176 jcc(Assembler::notEqual, FALSE_LABEL);
9176 9177 }
9177 9178
9178 9179 // count == 0
9179 9180 testl(limit, limit);
9180 9181 jcc(Assembler::zero, TRUE_LABEL);
9181 9182
9182 9183 if (is_array_equ) {
9183 9184 // Load array address
9184 9185 lea(ary1, Address(ary1, base_offset));
9185 9186 lea(ary2, Address(ary2, base_offset));
9186 9187 }
9187 9188
9188 9189 shll(limit, 1); // byte count != 0
9189 9190 movl(result, limit); // copy
9190 9191
9191 9192 if (UseSSE42Intrinsics) {
9192 9193 // With SSE4.2, use double quad vector compare
9193 9194 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
9194 9195
9195 9196 // Compare 16-byte vectors
9196 9197 andl(result, 0x0000000e); // tail count (in bytes)
9197 9198 andl(limit, 0xfffffff0); // vector count (in bytes)
9198 9199 jccb(Assembler::zero, COMPARE_TAIL);
9199 9200
9200 9201 lea(ary1, Address(ary1, limit, Address::times_1));
9201 9202 lea(ary2, Address(ary2, limit, Address::times_1));
9202 9203 negptr(limit);
9203 9204
9204 9205 bind(COMPARE_WIDE_VECTORS);
9205 9206 movdqu(vec1, Address(ary1, limit, Address::times_1));
9206 9207 movdqu(vec2, Address(ary2, limit, Address::times_1));
9207 9208 pxor(vec1, vec2);
9208 9209
9209 9210 ptest(vec1, vec1);
9210 9211 jccb(Assembler::notZero, FALSE_LABEL);
9211 9212 addptr(limit, 16);
9212 9213 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
9213 9214
9214 9215 testl(result, result);
9215 9216 jccb(Assembler::zero, TRUE_LABEL);
9216 9217
9217 9218 movdqu(vec1, Address(ary1, result, Address::times_1, -16));
9218 9219 movdqu(vec2, Address(ary2, result, Address::times_1, -16));
9219 9220 pxor(vec1, vec2);
9220 9221
9221 9222 ptest(vec1, vec1);
9222 9223 jccb(Assembler::notZero, FALSE_LABEL);
9223 9224 jmpb(TRUE_LABEL);
9224 9225
9225 9226 bind(COMPARE_TAIL); // limit is zero
9226 9227 movl(limit, result);
9227 9228 // Fallthru to tail compare
9228 9229 }
9229 9230
9230 9231 // Compare 4-byte vectors
9231 9232 andl(limit, 0xfffffffc); // vector count (in bytes)
9232 9233 jccb(Assembler::zero, COMPARE_CHAR);
9233 9234
9234 9235 lea(ary1, Address(ary1, limit, Address::times_1));
9235 9236 lea(ary2, Address(ary2, limit, Address::times_1));
9236 9237 negptr(limit);
9237 9238
9238 9239 bind(COMPARE_VECTORS);
9239 9240 movl(chr, Address(ary1, limit, Address::times_1));
9240 9241 cmpl(chr, Address(ary2, limit, Address::times_1));
9241 9242 jccb(Assembler::notEqual, FALSE_LABEL);
9242 9243 addptr(limit, 4);
9243 9244 jcc(Assembler::notZero, COMPARE_VECTORS);
9244 9245
9245 9246 // Compare trailing char (final 2 bytes), if any
9246 9247 bind(COMPARE_CHAR);
9247 9248 testl(result, 0x2); // tail char
9248 9249 jccb(Assembler::zero, TRUE_LABEL);
9249 9250 load_unsigned_short(chr, Address(ary1, 0));
9250 9251 load_unsigned_short(limit, Address(ary2, 0));
9251 9252 cmpl(chr, limit);
9252 9253 jccb(Assembler::notEqual, FALSE_LABEL);
9253 9254
9254 9255 bind(TRUE_LABEL);
9255 9256 movl(result, 1); // return true
9256 9257 jmpb(DONE);
9257 9258
9258 9259 bind(FALSE_LABEL);
9259 9260 xorl(result, result); // return false
9260 9261
9261 9262 // That's it
9262 9263 bind(DONE);
9263 9264 }
9264 9265
9265 9266 #ifdef PRODUCT
9266 9267 #define BLOCK_COMMENT(str) /* nothing */
9267 9268 #else
9268 9269 #define BLOCK_COMMENT(str) block_comment(str)
9269 9270 #endif
9270 9271
9271 9272 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
9272 9273 void MacroAssembler::generate_fill(BasicType t, bool aligned,
9273 9274 Register to, Register value, Register count,
9274 9275 Register rtmp, XMMRegister xtmp) {
9275 9276 assert_different_registers(to, value, count, rtmp);
9276 9277 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
9277 9278 Label L_fill_2_bytes, L_fill_4_bytes;
9278 9279
9279 9280 int shift = -1;
9280 9281 switch (t) {
9281 9282 case T_BYTE:
9282 9283 shift = 2;
9283 9284 break;
9284 9285 case T_SHORT:
9285 9286 shift = 1;
9286 9287 break;
9287 9288 case T_INT:
9288 9289 shift = 0;
9289 9290 break;
9290 9291 default: ShouldNotReachHere();
9291 9292 }
9292 9293
9293 9294 if (t == T_BYTE) {
9294 9295 andl(value, 0xff);
9295 9296 movl(rtmp, value);
9296 9297 shll(rtmp, 8);
9297 9298 orl(value, rtmp);
9298 9299 }
9299 9300 if (t == T_SHORT) {
9300 9301 andl(value, 0xffff);
9301 9302 }
9302 9303 if (t == T_BYTE || t == T_SHORT) {
9303 9304 movl(rtmp, value);
9304 9305 shll(rtmp, 16);
9305 9306 orl(value, rtmp);
9306 9307 }
9307 9308
9308 9309 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
9309 9310 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
9310 9311 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
9311 9312 // align source address at 4 bytes address boundary
9312 9313 if (t == T_BYTE) {
9313 9314 // One byte misalignment happens only for byte arrays
9314 9315 testptr(to, 1);
9315 9316 jccb(Assembler::zero, L_skip_align1);
9316 9317 movb(Address(to, 0), value);
9317 9318 increment(to);
9318 9319 decrement(count);
9319 9320 BIND(L_skip_align1);
9320 9321 }
9321 9322 // Two bytes misalignment happens only for byte and short (char) arrays
9322 9323 testptr(to, 2);
9323 9324 jccb(Assembler::zero, L_skip_align2);
9324 9325 movw(Address(to, 0), value);
9325 9326 addptr(to, 2);
9326 9327 subl(count, 1<<(shift-1));
9327 9328 BIND(L_skip_align2);
9328 9329 }
9329 9330 if (UseSSE < 2) {
9330 9331 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
9331 9332 // Fill 32-byte chunks
9332 9333 subl(count, 8 << shift);
9333 9334 jcc(Assembler::less, L_check_fill_8_bytes);
9334 9335 align(16);
9335 9336
9336 9337 BIND(L_fill_32_bytes_loop);
9337 9338
9338 9339 for (int i = 0; i < 32; i += 4) {
9339 9340 movl(Address(to, i), value);
9340 9341 }
9341 9342
9342 9343 addptr(to, 32);
9343 9344 subl(count, 8 << shift);
9344 9345 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
9345 9346 BIND(L_check_fill_8_bytes);
9346 9347 addl(count, 8 << shift);
9347 9348 jccb(Assembler::zero, L_exit);
9348 9349 jmpb(L_fill_8_bytes);
9349 9350
9350 9351 //
9351 9352 // length is too short, just fill qwords
9352 9353 //
9353 9354 BIND(L_fill_8_bytes_loop);
9354 9355 movl(Address(to, 0), value);
9355 9356 movl(Address(to, 4), value);
9356 9357 addptr(to, 8);
9357 9358 BIND(L_fill_8_bytes);
9358 9359 subl(count, 1 << (shift + 1));
9359 9360 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
9360 9361 // fall through to fill 4 bytes
9361 9362 } else {
9362 9363 Label L_fill_32_bytes;
9363 9364 if (!UseUnalignedLoadStores) {
9364 9365 // align to 8 bytes, we know we are 4 byte aligned to start
9365 9366 testptr(to, 4);
9366 9367 jccb(Assembler::zero, L_fill_32_bytes);
9367 9368 movl(Address(to, 0), value);
9368 9369 addptr(to, 4);
9369 9370 subl(count, 1<<shift);
9370 9371 }
9371 9372 BIND(L_fill_32_bytes);
9372 9373 {
9373 9374 assert( UseSSE >= 2, "supported cpu only" );
9374 9375 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
9375 9376 // Fill 32-byte chunks
9376 9377 movdl(xtmp, value);
9377 9378 pshufd(xtmp, xtmp, 0);
9378 9379
9379 9380 subl(count, 8 << shift);
9380 9381 jcc(Assembler::less, L_check_fill_8_bytes);
9381 9382 align(16);
9382 9383
9383 9384 BIND(L_fill_32_bytes_loop);
9384 9385
9385 9386 if (UseUnalignedLoadStores) {
9386 9387 movdqu(Address(to, 0), xtmp);
9387 9388 movdqu(Address(to, 16), xtmp);
9388 9389 } else {
9389 9390 movq(Address(to, 0), xtmp);
9390 9391 movq(Address(to, 8), xtmp);
9391 9392 movq(Address(to, 16), xtmp);
9392 9393 movq(Address(to, 24), xtmp);
9393 9394 }
9394 9395
9395 9396 addptr(to, 32);
9396 9397 subl(count, 8 << shift);
9397 9398 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
9398 9399 BIND(L_check_fill_8_bytes);
9399 9400 addl(count, 8 << shift);
9400 9401 jccb(Assembler::zero, L_exit);
9401 9402 jmpb(L_fill_8_bytes);
9402 9403
9403 9404 //
9404 9405 // length is too short, just fill qwords
9405 9406 //
9406 9407 BIND(L_fill_8_bytes_loop);
9407 9408 movq(Address(to, 0), xtmp);
9408 9409 addptr(to, 8);
9409 9410 BIND(L_fill_8_bytes);
9410 9411 subl(count, 1 << (shift + 1));
9411 9412 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
9412 9413 }
9413 9414 }
9414 9415 // fill trailing 4 bytes
9415 9416 BIND(L_fill_4_bytes);
9416 9417 testl(count, 1<<shift);
9417 9418 jccb(Assembler::zero, L_fill_2_bytes);
9418 9419 movl(Address(to, 0), value);
9419 9420 if (t == T_BYTE || t == T_SHORT) {
9420 9421 addptr(to, 4);
9421 9422 BIND(L_fill_2_bytes);
9422 9423 // fill trailing 2 bytes
9423 9424 testl(count, 1<<(shift-1));
9424 9425 jccb(Assembler::zero, L_fill_byte);
9425 9426 movw(Address(to, 0), value);
9426 9427 if (t == T_BYTE) {
9427 9428 addptr(to, 2);
9428 9429 BIND(L_fill_byte);
9429 9430 // fill trailing byte
9430 9431 testl(count, 1);
9431 9432 jccb(Assembler::zero, L_exit);
9432 9433 movb(Address(to, 0), value);
9433 9434 } else {
9434 9435 BIND(L_fill_byte);
9435 9436 }
9436 9437 } else {
9437 9438 BIND(L_fill_2_bytes);
9438 9439 }
9439 9440 BIND(L_exit);
9440 9441 }
9441 9442 #undef BIND
9442 9443 #undef BLOCK_COMMENT
9443 9444
9444 9445
9445 9446 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
9446 9447 switch (cond) {
9447 9448 // Note some conditions are synonyms for others
9448 9449 case Assembler::zero: return Assembler::notZero;
9449 9450 case Assembler::notZero: return Assembler::zero;
9450 9451 case Assembler::less: return Assembler::greaterEqual;
9451 9452 case Assembler::lessEqual: return Assembler::greater;
9452 9453 case Assembler::greater: return Assembler::lessEqual;
9453 9454 case Assembler::greaterEqual: return Assembler::less;
9454 9455 case Assembler::below: return Assembler::aboveEqual;
9455 9456 case Assembler::belowEqual: return Assembler::above;
9456 9457 case Assembler::above: return Assembler::belowEqual;
9457 9458 case Assembler::aboveEqual: return Assembler::below;
9458 9459 case Assembler::overflow: return Assembler::noOverflow;
9459 9460 case Assembler::noOverflow: return Assembler::overflow;
9460 9461 case Assembler::negative: return Assembler::positive;
9461 9462 case Assembler::positive: return Assembler::negative;
9462 9463 case Assembler::parity: return Assembler::noParity;
9463 9464 case Assembler::noParity: return Assembler::parity;
9464 9465 }
9465 9466 ShouldNotReachHere(); return Assembler::overflow;
9466 9467 }
9467 9468
9468 9469 SkipIfEqual::SkipIfEqual(
9469 9470 MacroAssembler* masm, const bool* flag_addr, bool value) {
9470 9471 _masm = masm;
9471 9472 _masm->cmp8(ExternalAddress((address)flag_addr), value);
9472 9473 _masm->jcc(Assembler::equal, _label);
9473 9474 }
9474 9475
9475 9476 SkipIfEqual::~SkipIfEqual() {
9476 9477 _masm->bind(_label);
9477 9478 }
↓ open down ↓ |
2333 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX