1 #ifdef USE_PRAGMA_IDENT_SRC
   2 #pragma ident "@(#)templateTable_x86_64.cpp     1.58 07/09/17 09:25:59 JVM"
   3 #endif
   4 /*
   5  * Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This code is free software; you can redistribute it and/or modify it
   9  * under the terms of the GNU General Public License version 2 only, as
  10  * published by the Free Software Foundation.
  11  *
  12  * This code is distributed in the hope that it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15  * version 2 for more details (a copy is included in the LICENSE file that
  16  * accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License version
  19  * 2 along with this work; if not, write to the Free Software Foundation,
  20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21  *
  22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  23  * CA 95054 USA or visit www.sun.com if you need additional information or
  24  * have any questions.
  25  *  
  26  */
  27 
  28 #include "incls/_precompiled.incl"
  29 #include "incls/_templateTable_x86_64.cpp.incl"
  30 
  31 #define __ _masm->
  32 
  33 // Platform-dependent initialization
  34 
  35 void TemplateTable::pd_initialize() {
  36   // No amd64 specific initialization
  37 }
  38 
  39 // Address computation: local variables
  40 
  41 static inline Address iaddress(int n) { 
  42   return Address(r14, Interpreter::local_offset_in_bytes(n));
  43 }
  44 
  45 static inline Address laddress(int n) { 
  46   return iaddress(n + 1); 
  47 }
  48 
  49 static inline Address faddress(int n) {
  50   return iaddress(n);
  51 }
  52 
  53 static inline Address daddress(int n) { 
  54   return laddress(n);
  55 }
  56 
  57 static inline Address aaddress(int n) {
  58   return iaddress(n);
  59 }
  60 
  61 static inline Address iaddress(Register r) {
  62   return Address(r14, r, Address::times_8, Interpreter::value_offset_in_bytes()); 
  63 }
  64 
  65 static inline Address laddress(Register r) {
  66   return Address(r14, r, Address::times_8, Interpreter::local_offset_in_bytes(1));
  67 }
  68 
  69 static inline Address faddress(Register r) {
  70   return iaddress(r);
  71 }
  72 
  73 static inline Address daddress(Register r) {
  74   return laddress(r);
  75 }
  76 
  77 static inline Address aaddress(Register r) { 
  78   return iaddress(r);
  79 }
  80 
  81 static inline Address at_rsp() {
  82   return Address(rsp, 0); 
  83 }
  84 
  85 // At top of Java expression stack which may be different than esp().  It
  86 // isn't for category 1 objects.
  87 static inline Address at_tos   () {
  88   return Address(rsp,  Interpreter::expr_offset_in_bytes(0));
  89 }
  90   
  91 static inline Address at_tos_p1() {
  92   return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
  93 }
  94   
  95 static inline Address at_tos_p2() {
  96   return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
  97 }
  98   
  99 static inline Address at_tos_p3() {
 100   return Address(rsp,  Interpreter::expr_offset_in_bytes(3));
 101 }
 102 
 103 // Condition conversion
 104 static Assembler::Condition j_not(TemplateTable::Condition cc) {
 105   switch (cc) {
 106   case TemplateTable::equal        : return Assembler::notEqual;
 107   case TemplateTable::not_equal    : return Assembler::equal;
 108   case TemplateTable::less         : return Assembler::greaterEqual;
 109   case TemplateTable::less_equal   : return Assembler::greater;
 110   case TemplateTable::greater      : return Assembler::lessEqual;
 111   case TemplateTable::greater_equal: return Assembler::less;
 112   }
 113   ShouldNotReachHere();
 114   return Assembler::zero;
 115 }
 116 
 117 
 118 // Miscelaneous helper routines
 119 
 120 Address TemplateTable::at_bcp(int offset) {
 121   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
 122   return Address(r13, offset);
 123 }
 124 
 125 void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc,
 126                                    Register scratch,
 127                                    bool load_bc_into_scratch/*=true*/) {
 128   if (!RewriteBytecodes) {
 129     return;
 130   }
 131   // the pair bytecodes have already done the load.
 132   if (load_bc_into_scratch) {
 133     __ movl(bc, bytecode);
 134   }
 135   Label patch_done;
 136   if (JvmtiExport::can_post_breakpoint()) {
 137     Label fast_patch;
 138     // if a breakpoint is present we can't rewrite the stream directly
 139     __ movzbl(scratch, at_bcp(0));
 140     __ cmpl(scratch, Bytecodes::_breakpoint);
 141     __ jcc(Assembler::notEqual, fast_patch);
 142     __ get_method(scratch);
 143     // Let breakpoint table handling rewrite to quicker bytecode 
 144     __ call_VM(noreg, 
 145                CAST_FROM_FN_PTR(address, 
 146                                 InterpreterRuntime::set_original_bytecode_at),
 147                scratch, r13, bc);
 148 #ifndef ASSERT
 149     __ jmpb(patch_done);
 150     __ bind(fast_patch);
 151   }
 152 #else
 153     __ jmp(patch_done);
 154     __ bind(fast_patch);
 155   }
 156   Label okay;
 157   __ load_unsigned_byte(scratch, at_bcp(0));
 158   __ cmpl(scratch, (int) Bytecodes::java_code(bytecode));
 159   __ jcc(Assembler::equal, okay);
 160   __ cmpl(scratch, bc);
 161   __ jcc(Assembler::equal, okay);
 162   __ stop("patching the wrong bytecode");
 163   __ bind(okay);
 164 #endif
 165   // patch bytecode
 166   __ movb(at_bcp(0), bc);
 167   __ bind(patch_done);
 168 }
 169 
 170 
 171 // Individual instructions
 172 
 173 void TemplateTable::nop() {
 174   transition(vtos, vtos);
 175   // nothing to do
 176 }
 177 
 178 void TemplateTable::shouldnotreachhere() {
 179   transition(vtos, vtos);
 180   __ stop("shouldnotreachhere bytecode");
 181 }
 182 
 183 void TemplateTable::aconst_null() {
 184   transition(vtos, atos);
 185   __ xorl(rax, rax);
 186 }
 187 
 188 void TemplateTable::iconst(int value) {
 189   transition(vtos, itos);
 190   if (value == 0) {
 191     __ xorl(rax, rax);
 192   } else {
 193     __ movl(rax, value);
 194   }
 195 }
 196 
 197 void TemplateTable::lconst(int value) {
 198   transition(vtos, ltos);
 199   if (value == 0) {
 200     __ xorl(rax, rax);
 201   } else {
 202     __ movl(rax, value);
 203   }
 204 }
 205 
 206 void TemplateTable::fconst(int value) {
 207   transition(vtos, ftos);
 208   static float one = 1.0f, two = 2.0f;
 209   switch (value) {
 210   case 0:
 211     __ xorps(xmm0, xmm0);
 212     break;
 213   case 1:
 214     __ movflt(xmm0, ExternalAddress((address) &one));
 215     break;
 216   case 2:
 217     __ movflt(xmm0, ExternalAddress((address) &two));
 218     break;
 219   default:
 220     ShouldNotReachHere();
 221     break;
 222   }
 223 }
 224 
 225 void TemplateTable::dconst(int value) {
 226   transition(vtos, dtos);
 227   static double one = 1.0;
 228   switch (value) {
 229   case 0:
 230     __ xorpd(xmm0, xmm0);
 231     break;
 232   case 1:
 233     __ movdbl(xmm0, ExternalAddress((address) &one));
 234     break;
 235   default:
 236     ShouldNotReachHere();
 237     break;
 238   }
 239 }
 240 
 241 void TemplateTable::bipush() {
 242   transition(vtos, itos);
 243   __ load_signed_byte(rax, at_bcp(1));
 244 }
 245 
 246 void TemplateTable::sipush() {
 247   transition(vtos, itos);
 248   __ load_unsigned_word(rax, at_bcp(1));
 249   __ bswapl(rax);
 250   __ sarl(rax, 16);
 251 }
 252 
 253 void TemplateTable::ldc(bool wide) {
 254   transition(vtos, vtos);
 255   Label call_ldc, notFloat, notClass, Done;
 256 
 257   if (wide) {
 258     __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
 259   } else {
 260     __ load_unsigned_byte(rbx, at_bcp(1));
 261   }
 262 
 263   __ get_cpool_and_tags(rcx, rax);
 264   const int base_offset = constantPoolOopDesc::header_size() * wordSize;
 265   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
 266 
 267   // get type
 268   __ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));
 269 
 270   // unresolved string - get the resolved string
 271   __ cmpl(rdx, JVM_CONSTANT_UnresolvedString);
 272   __ jccb(Assembler::equal, call_ldc);
 273 
 274   // unresolved class - get the resolved class
 275   __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
 276   __ jccb(Assembler::equal, call_ldc);
 277 
 278   // unresolved class in error state - call into runtime to throw the error 
 279   // from the first resolution attempt
 280   __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
 281   __ jccb(Assembler::equal, call_ldc);
 282 
 283   // resolved class - need to call vm to get java mirror of the class
 284   __ cmpl(rdx, JVM_CONSTANT_Class);
 285   __ jcc(Assembler::notEqual, notClass);
 286 
 287   __ bind(call_ldc);
 288   __ movl(c_rarg1, wide);
 289   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
 290   __ push_ptr(rax);
 291   __ verify_oop(rax);
 292   __ jmp(Done);
 293 
 294   __ bind(notClass);
 295   __ cmpl(rdx, JVM_CONSTANT_Float);
 296   __ jccb(Assembler::notEqual, notFloat);
 297   // ftos
 298   __ movflt(xmm0, Address(rcx, rbx, Address::times_8, base_offset));
 299   __ push_f();
 300   __ jmp(Done);
 301 
 302   __ bind(notFloat);
 303 #ifdef ASSERT
 304   { 
 305     Label L;
 306     __ cmpl(rdx, JVM_CONSTANT_Integer);
 307     __ jcc(Assembler::equal, L);
 308     __ cmpl(rdx, JVM_CONSTANT_String);
 309     __ jcc(Assembler::equal, L);
 310     __ stop("unexpected tag type in ldc");
 311     __ bind(L);
 312   }
 313 #endif
 314   // atos and itos
 315   Label isOop;
 316   __ cmpl(rdx, JVM_CONSTANT_Integer);
 317   __ jcc(Assembler::notEqual, isOop);
 318   __ movl(rax, Address(rcx, rbx, Address::times_8, base_offset));
 319   __ push_i(rax);
 320   __ jmp(Done);
 321 
 322   __ bind(isOop);
 323   __ movq(rax, Address(rcx, rbx, Address::times_8, base_offset));
 324   __ push_ptr(rax);
 325 
 326   if (VerifyOops) {
 327     __ verify_oop(rax);
 328   }
 329 
 330   __ bind(Done);
 331 }
 332 
 333 void TemplateTable::ldc2_w() {
 334   transition(vtos, vtos);
 335   Label Long, Done;
 336   __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
 337 
 338   __ get_cpool_and_tags(rcx, rax);
 339   const int base_offset = constantPoolOopDesc::header_size() * wordSize;
 340   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
 341 
 342   // get type
 343   __ cmpb(Address(rax, rbx, Address::times_1, tags_offset), 
 344           JVM_CONSTANT_Double);
 345   __ jccb(Assembler::notEqual, Long);
 346   // dtos
 347   __ movdbl(xmm0, Address(rcx, rbx, Address::times_8, base_offset));
 348   __ push_d();
 349   __ jmpb(Done);
 350 
 351   __ bind(Long);
 352   // ltos
 353   __ movq(rax, Address(rcx, rbx, Address::times_8, base_offset));
 354   __ push_l();
 355 
 356   __ bind(Done);
 357 }
 358 
 359 void TemplateTable::locals_index(Register reg, int offset) {
 360   __ load_unsigned_byte(reg, at_bcp(offset));
 361   __ negq(reg); 
 362   if (TaggedStackInterpreter) __ shlq(reg, 1);  // index = index*2
 363 }
 364 
 365 void TemplateTable::iload() {
 366   transition(vtos, itos);
 367   if (RewriteFrequentPairs) { 
 368     Label rewrite, done;
 369     const Register bc = c_rarg3;
 370     assert(rbx != bc, "register damaged");
 371 
 372     // get next byte
 373     __ load_unsigned_byte(rbx, 
 374                           at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
 375     // if _iload, wait to rewrite to iload2.  We only want to rewrite the
 376     // last two iloads in a pair.  Comparing against fast_iload means that
 377     // the next bytecode is neither an iload or a caload, and therefore
 378     // an iload pair.
 379     __ cmpl(rbx, Bytecodes::_iload);
 380     __ jcc(Assembler::equal, done);
 381 
 382     __ cmpl(rbx, Bytecodes::_fast_iload);
 383     __ movl(bc, Bytecodes::_fast_iload2);
 384     __ jccb(Assembler::equal, rewrite);
 385 
 386     // if _caload, rewrite to fast_icaload
 387     __ cmpl(rbx, Bytecodes::_caload);
 388     __ movl(bc, Bytecodes::_fast_icaload);
 389     __ jccb(Assembler::equal, rewrite);
 390 
 391     // rewrite so iload doesn't check again.
 392     __ movl(bc, Bytecodes::_fast_iload);
 393 
 394     // rewrite
 395     // bc: fast bytecode
 396     __ bind(rewrite);
 397     patch_bytecode(Bytecodes::_iload, bc, rbx, false);
 398     __ bind(done);
 399   }
 400 
 401   // Get the local value into tos
 402   locals_index(rbx);
 403   __ movl(rax, iaddress(rbx));
 404   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 405 }
 406 
 407 void TemplateTable::fast_iload2() {
 408   transition(vtos, itos);
 409   locals_index(rbx);
 410   __ movl(rax, iaddress(rbx));
 411   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 412   __ push(itos);
 413   locals_index(rbx, 3);
 414   __ movl(rax, iaddress(rbx));
 415   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 416 }
 417   
 418 void TemplateTable::fast_iload() {
 419   transition(vtos, itos);
 420   locals_index(rbx);
 421   __ movl(rax, iaddress(rbx));
 422   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 423 }
 424 
 425 void TemplateTable::lload() {
 426   transition(vtos, ltos);
 427   locals_index(rbx);
 428   __ movq(rax, laddress(rbx));
 429   debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
 430 }
 431 
 432 void TemplateTable::fload() {
 433   transition(vtos, ftos);
 434   locals_index(rbx);
 435   __ movflt(xmm0, faddress(rbx));
 436   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 437 }
 438 
 439 void TemplateTable::dload() {
 440   transition(vtos, dtos);
 441   locals_index(rbx);
 442   __ movdbl(xmm0, daddress(rbx));
 443   debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
 444 }
 445 
 446 void TemplateTable::aload() {
 447   transition(vtos, atos);
 448   locals_index(rbx);
 449   __ movq(rax, aaddress(rbx));
 450   debug_only(__ verify_local_tag(frame::TagReference, rbx));
 451 }
 452 
 453 void TemplateTable::locals_index_wide(Register reg) {
 454   __ movl(reg, at_bcp(2));
 455   __ bswapl(reg);
 456   __ shrl(reg, 16);
 457   __ negq(reg);
 458   if (TaggedStackInterpreter) __ shlq(reg, 1);  // index = index*2
 459 }
 460 
 461 void TemplateTable::wide_iload() {
 462   transition(vtos, itos);
 463   locals_index_wide(rbx);
 464   __ movl(rax, iaddress(rbx));
 465   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 466 }
 467 
 468 void TemplateTable::wide_lload() {
 469   transition(vtos, ltos);
 470   locals_index_wide(rbx);
 471   __ movq(rax, laddress(rbx));
 472   debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
 473 }
 474 
 475 void TemplateTable::wide_fload() {
 476   transition(vtos, ftos);
 477   locals_index_wide(rbx);
 478   __ movflt(xmm0, faddress(rbx));
 479   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 480 }
 481 
 482 void TemplateTable::wide_dload() {
 483   transition(vtos, dtos);
 484   locals_index_wide(rbx);
 485   __ movdbl(xmm0, daddress(rbx));
 486   debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
 487 }
 488 
 489 void TemplateTable::wide_aload() {
 490   transition(vtos, atos);
 491   locals_index_wide(rbx);
 492   __ movq(rax, aaddress(rbx));
 493   debug_only(__ verify_local_tag(frame::TagReference, rbx));
 494 }
 495 
 496 void TemplateTable::index_check(Register array, Register index) {
 497   // destroys rbx
 498   // check array
 499   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
 500   // sign extend index for use by indexed load
 501   __ movslq(index, index);
 502   // check index
 503   __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
 504   if (index != rbx) {
 505     // ??? convention: move aberrant index into ebx for exception message
 506     assert(rbx != array, "different registers");
 507     __ movl(rbx, index);
 508   }
 509   __ jump_cc(Assembler::aboveEqual,
 510              ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
 511 }
 512 
 513 void TemplateTable::iaload() {
 514   transition(itos, itos);
 515   __ pop_ptr(rdx);
 516   // eax: index
 517   // rdx: array
 518   index_check(rdx, rax); // kills rbx
 519   __ movl(rax, Address(rdx, rax, 
 520                        Address::times_4,
 521                        arrayOopDesc::base_offset_in_bytes(T_INT)));
 522 }
 523 
 524 void TemplateTable::laload() {
 525   transition(itos, ltos);
 526   __ pop_ptr(rdx);
 527   // eax: index
 528   // rdx: array
 529   index_check(rdx, rax); // kills rbx
 530   __ movq(rax, Address(rdx, rbx, 
 531                        Address::times_8, 
 532                        arrayOopDesc::base_offset_in_bytes(T_LONG)));
 533 }
 534 
 535 void TemplateTable::faload() {
 536   transition(itos, ftos);
 537   __ pop_ptr(rdx);
 538   // eax: index
 539   // rdx: array
 540   index_check(rdx, rax); // kills rbx
 541   __ movflt(xmm0, Address(rdx, rax, 
 542                          Address::times_4,
 543                          arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
 544 }
 545 
 546 void TemplateTable::daload() {
 547   transition(itos, dtos);
 548   __ pop_ptr(rdx);
 549   // eax: index
 550   // rdx: array
 551   index_check(rdx, rax); // kills rbx
 552   __ movdbl(xmm0, Address(rdx, rax, 
 553                           Address::times_8,
 554                           arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
 555 }
 556 
 557 void TemplateTable::aaload() {
 558   transition(itos, atos);
 559   __ pop_ptr(rdx);
 560   // eax: index
 561   // rdx: array
 562   index_check(rdx, rax); // kills rbx
 563   __ movq(rax, Address(rdx, rax, 
 564                        Address::times_8,
 565                        arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 566 }
 567 
 568 void TemplateTable::baload() {
 569   transition(itos, itos);
 570   __ pop_ptr(rdx);
 571   // eax: index
 572   // rdx: array
 573   index_check(rdx, rax); // kills rbx
 574   __ load_signed_byte(rax, 
 575                       Address(rdx, rax, 
 576                               Address::times_1, 
 577                               arrayOopDesc::base_offset_in_bytes(T_BYTE)));
 578 }
 579 
 580 void TemplateTable::caload() {
 581   transition(itos, itos);
 582   __ pop_ptr(rdx);
 583   // eax: index
 584   // rdx: array
 585   index_check(rdx, rax); // kills rbx
 586   __ load_unsigned_word(rax, 
 587                         Address(rdx, rax, 
 588                                 Address::times_2,
 589                                 arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 590 }
 591 
 592 // iload followed by caload frequent pair
 593 void TemplateTable::fast_icaload() {
 594   transition(vtos, itos);
 595   // load index out of locals
 596   locals_index(rbx);
 597   __ movl(rax, iaddress(rbx));
 598   debug_only(__ verify_local_tag(frame::TagValue, rbx));
 599 
 600   // eax: index
 601   // rdx: array
 602   __ pop_ptr(rdx);
 603   index_check(rdx, rax); // kills rbx
 604   __ load_unsigned_word(rax, 
 605                         Address(rdx, rax, 
 606                                 Address::times_2,
 607                                 arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 608 }
 609 
 610 void TemplateTable::saload() {
 611   transition(itos, itos);
 612   __ pop_ptr(rdx);
 613   // eax: index
 614   // rdx: array
 615   index_check(rdx, rax); // kills rbx
 616   __ load_signed_word(rax,
 617                       Address(rdx, rax, 
 618                               Address::times_2,
 619                               arrayOopDesc::base_offset_in_bytes(T_SHORT)));
 620 }
 621 
 622 void TemplateTable::iload(int n) {
 623   transition(vtos, itos);
 624   __ movl(rax, iaddress(n));
 625   debug_only(__ verify_local_tag(frame::TagValue, n));
 626 }
 627 
 628 void TemplateTable::lload(int n) {
 629   transition(vtos, ltos);
 630   __ movq(rax, laddress(n));
 631   debug_only(__ verify_local_tag(frame::TagCategory2, n));
 632 }
 633 
 634 void TemplateTable::fload(int n) {
 635   transition(vtos, ftos);
 636   __ movflt(xmm0, faddress(n));
 637   debug_only(__ verify_local_tag(frame::TagValue, n));
 638 }
 639 
 640 void TemplateTable::dload(int n) {
 641   transition(vtos, dtos);
 642   __ movdbl(xmm0, daddress(n));
 643   debug_only(__ verify_local_tag(frame::TagCategory2, n));
 644 }
 645 
 646 void TemplateTable::aload(int n) {
 647   transition(vtos, atos);
 648   __ movq(rax, aaddress(n));
 649   debug_only(__ verify_local_tag(frame::TagReference, n));
 650 }
 651 
 652 void TemplateTable::aload_0() {
 653   transition(vtos, atos);
 654   // According to bytecode histograms, the pairs:
 655   //
 656   // _aload_0, _fast_igetfield
 657   // _aload_0, _fast_agetfield
 658   // _aload_0, _fast_fgetfield
 659   //
 660   // occur frequently. If RewriteFrequentPairs is set, the (slow)
 661   // _aload_0 bytecode checks if the next bytecode is either
 662   // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
 663   // rewrites the current bytecode into a pair bytecode; otherwise it
 664   // rewrites the current bytecode into _fast_aload_0 that doesn't do
 665   // the pair check anymore.
 666   //
 667   // Note: If the next bytecode is _getfield, the rewrite must be
 668   //       delayed, otherwise we may miss an opportunity for a pair.
 669   //
 670   // Also rewrite frequent pairs
 671   //   aload_0, aload_1
 672   //   aload_0, iload_1
 673   // These bytecodes with a small amount of code are most profitable
 674   // to rewrite
 675   if (RewriteFrequentPairs) {
 676     Label rewrite, done;
 677     const Register bc = c_rarg3;
 678     assert(rbx != bc, "register damaged");
 679     // get next byte
 680     __ load_unsigned_byte(rbx, 
 681                           at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
 682 
 683     // do actual aload_0
 684     aload(0);
 685 
 686     // if _getfield then wait with rewrite
 687     __ cmpl(rbx, Bytecodes::_getfield);
 688     __ jcc(Assembler::equal, done);
 689 
 690     // if _igetfield then reqrite to _fast_iaccess_0
 691     assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == 
 692            Bytecodes::_aload_0, 
 693            "fix bytecode definition");
 694     __ cmpl(rbx, Bytecodes::_fast_igetfield);
 695     __ movl(bc, Bytecodes::_fast_iaccess_0);
 696     __ jccb(Assembler::equal, rewrite);
 697 
 698     // if _agetfield then reqrite to _fast_aaccess_0
 699     assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == 
 700            Bytecodes::_aload_0, 
 701            "fix bytecode definition");
 702     __ cmpl(rbx, Bytecodes::_fast_agetfield);
 703     __ movl(bc, Bytecodes::_fast_aaccess_0);
 704     __ jccb(Assembler::equal, rewrite);
 705 
 706     // if _fgetfield then reqrite to _fast_faccess_0
 707     assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == 
 708            Bytecodes::_aload_0,
 709            "fix bytecode definition");
 710     __ cmpl(rbx, Bytecodes::_fast_fgetfield);
 711     __ movl(bc, Bytecodes::_fast_faccess_0);
 712     __ jccb(Assembler::equal, rewrite);
 713 
 714     // else rewrite to _fast_aload0
 715     assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == 
 716            Bytecodes::_aload_0,
 717            "fix bytecode definition");
 718     __ movl(bc, Bytecodes::_fast_aload_0);
 719 
 720     // rewrite
 721     // bc: fast bytecode
 722     __ bind(rewrite);
 723     patch_bytecode(Bytecodes::_aload_0, bc, rbx, false);
 724 
 725     __ bind(done);
 726   } else {
 727     aload(0);
 728   }
 729 }
 730 
 731 void TemplateTable::istore() {
 732   transition(itos, vtos);
 733   locals_index(rbx);
 734   __ movl(iaddress(rbx), rax);
 735   __ tag_local(frame::TagValue, rbx);
 736 }
 737 
 738 void TemplateTable::lstore() {
 739   transition(ltos, vtos);
 740   locals_index(rbx);
 741   __ movq(laddress(rbx), rax);
 742   __ tag_local(frame::TagCategory2, rbx);
 743 }
 744 
 745 void TemplateTable::fstore() {
 746   transition(ftos, vtos);
 747   locals_index(rbx);
 748   __ movflt(faddress(rbx), xmm0);
 749   __ tag_local(frame::TagValue, rbx);
 750 }
 751 
 752 void TemplateTable::dstore() {
 753   transition(dtos, vtos);
 754   locals_index(rbx);
 755   __ movdbl(daddress(rbx), xmm0);
 756   __ tag_local(frame::TagCategory2, rbx);
 757 }
 758 
 759 void TemplateTable::astore() {
 760   transition(vtos, vtos);
 761   __ pop_ptr(rax, rdx);    // will need to pop tag too
 762   locals_index(rbx);
 763   __ movq(aaddress(rbx), rax);
 764   __ tag_local(rdx, rbx);  // store tag from stack, might be returnAddr
 765 }
 766 
 767 void TemplateTable::wide_istore() {
 768   transition(vtos, vtos);
 769   __ pop_i();
 770   locals_index_wide(rbx);
 771   __ movl(iaddress(rbx), rax);
 772   __ tag_local(frame::TagValue, rbx);
 773 }
 774 
 775 void TemplateTable::wide_lstore() {
 776   transition(vtos, vtos);
 777   __ pop_l();
 778   locals_index_wide(rbx);
 779   __ movq(laddress(rbx), rax);
 780   __ tag_local(frame::TagCategory2, rbx);
 781 }
 782 
 783 void TemplateTable::wide_fstore() {
 784   transition(vtos, vtos);
 785   __ pop_f();
 786   locals_index_wide(rbx);
 787   __ movflt(faddress(rbx), xmm0);
 788   __ tag_local(frame::TagValue, rbx);
 789 }
 790 
 791 void TemplateTable::wide_dstore() {
 792   transition(vtos, vtos);
 793   __ pop_d();
 794   locals_index_wide(rbx);
 795   __ movdbl(daddress(rbx), xmm0);
 796   __ tag_local(frame::TagCategory2, rbx);
 797 }
 798 
 799 void TemplateTable::wide_astore() {
 800   transition(vtos, vtos);
 801   __ pop_ptr(rax, rdx);    // will need to pop tag too
 802   locals_index_wide(rbx);
 803   __ movq(aaddress(rbx), rax);
 804   __ tag_local(rdx, rbx);  // store tag from stack, might be returnAddr
 805 }
 806 
 807 void TemplateTable::iastore() {
 808   transition(itos, vtos);
 809   __ pop_i(rbx);
 810   __ pop_ptr(rdx);
 811   // eax: value
 812   // ebx: index
 813   // rdx: array
 814   index_check(rdx, rbx); // prefer index in ebx
 815   __ movl(Address(rdx, rbx, 
 816                   Address::times_4,
 817                   arrayOopDesc::base_offset_in_bytes(T_INT)),
 818           rax);
 819 }
 820 
 821 void TemplateTable::lastore() {
 822   transition(ltos, vtos);
 823   __ pop_i(rbx);
 824   __ pop_ptr(rdx);
 825   // rax: value
 826   // ebx: index
 827   // rdx: array
 828   index_check(rdx, rbx); // prefer index in ebx
 829   __ movq(Address(rdx, rbx, 
 830                   Address::times_8, 
 831                   arrayOopDesc::base_offset_in_bytes(T_LONG)),
 832           rax);
 833 }
 834 
 835 void TemplateTable::fastore() {
 836   transition(ftos, vtos);
 837   __ pop_i(rbx);
 838   __ pop_ptr(rdx);
 839   // xmm0: value
 840   // ebx:  index
 841   // rdx:  array
 842   index_check(rdx, rbx); // prefer index in ebx
 843   __ movflt(Address(rdx, rbx, 
 844                    Address::times_4, 
 845                    arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
 846            xmm0);
 847 }
 848 
 849 void TemplateTable::dastore() {
 850   transition(dtos, vtos);
 851   __ pop_i(rbx);
 852   __ pop_ptr(rdx);
 853   // xmm0: value
 854   // ebx:  index
 855   // rdx:  array
 856   index_check(rdx, rbx); // prefer index in ebx
 857   __ movdbl(Address(rdx, rbx, 
 858                    Address::times_8, 
 859                    arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
 860            xmm0);
 861 }
 862 
 863 void TemplateTable::aastore() {
 864   Label is_null, ok_is_subtype, done;
 865   transition(vtos, vtos);
 866   // stack: ..., array, index, value
 867   __ movq(rax, at_tos());    // value
 868   __ movl(rcx, at_tos_p1()); // index
 869   __ movq(rdx, at_tos_p2()); // array
 870   index_check(rdx, rcx);     // kills rbx
 871   // do array store check - check for NULL value first
 872   __ testq(rax, rax);
 873   __ jcc(Assembler::zero, is_null);
 874 
 875   // Move subklass into rbx
 876   __ movq(rbx, Address(rax, oopDesc::klass_offset_in_bytes()));
 877   // Move superklass into rax
 878   __ movq(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
 879   __ movq(rax, Address(rax, 
 880                        sizeof(oopDesc) + 
 881                        objArrayKlass::element_klass_offset_in_bytes()));
 882   // Compress array + index*8 + 12 into a single register.  Frees rcx.
 883   __ leaq(rdx, Address(rdx, rcx, 
 884                        Address::times_8, 
 885                        arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 886 
 887   // Generate subtype check.  Blows rcx, rdi
 888   // Superklass in rax.  Subklass in rbx.
 889   __ gen_subtype_check(rbx, ok_is_subtype);
 890 
 891   // Come here on failure
 892   // object is at TOS
 893   __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
 894 
 895   // Come here on success
 896   __ bind(ok_is_subtype);
 897   __ movq(rax, at_tos()); // Value
 898   __ movq(Address(rdx, 0), rax);
 899   __ store_check(rdx);
 900   __ jmp(done);
 901 
 902   // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
 903   __ bind(is_null);
 904   __ profile_null_seen(rbx);
 905   __ movq(Address(rdx, rcx, 
 906                   Address::times_8, 
 907                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)), 
 908           rax);
 909 
 910   // Pop stack arguments
 911   __ bind(done);
 912   __ addq(rsp, 3 * Interpreter::stackElementSize());
 913 }
 914 
 915 void TemplateTable::bastore() {
 916   transition(itos, vtos);
 917   __ pop_i(rbx);
 918   __ pop_ptr(rdx);
 919   // eax: value
 920   // ebx: index
 921   // rdx: array
 922   index_check(rdx, rbx); // prefer index in ebx
 923   __ movb(Address(rdx, rbx, 
 924                   Address::times_1, 
 925                   arrayOopDesc::base_offset_in_bytes(T_BYTE)), 
 926           rax);
 927 }
 928 
 929 void TemplateTable::castore() {
 930   transition(itos, vtos);
 931   __ pop_i(rbx);
 932   __ pop_ptr(rdx);
 933   // eax: value
 934   // ebx: index
 935   // rdx: array
 936   index_check(rdx, rbx);  // prefer index in ebx
 937   __ movw(Address(rdx, rbx, 
 938                   Address::times_2,
 939                   arrayOopDesc::base_offset_in_bytes(T_CHAR)),
 940           rax);
 941 }
 942 
 943 void TemplateTable::sastore() {
 944   castore();
 945 }
 946 
 947 void TemplateTable::istore(int n) {
 948   transition(itos, vtos);
 949   __ movl(iaddress(n), rax);
 950   __ tag_local(frame::TagValue, n);
 951 }
 952 
 953 void TemplateTable::lstore(int n) {
 954   transition(ltos, vtos);
 955   __ movq(laddress(n), rax);
 956   __ tag_local(frame::TagCategory2, n);
 957 }
 958 
 959 void TemplateTable::fstore(int n) {
 960   transition(ftos, vtos);
 961   __ movflt(faddress(n), xmm0);
 962   __ tag_local(frame::TagValue, n);
 963 }
 964 
 965 void TemplateTable::dstore(int n) {
 966   transition(dtos, vtos);
 967   __ movdbl(daddress(n), xmm0);
 968   __ tag_local(frame::TagCategory2, n);
 969 }
 970 
 971 void TemplateTable::astore(int n) {
 972   transition(vtos, vtos);
 973   __ pop_ptr(rax, rdx);
 974   __ movq(aaddress(n), rax);
 975   __ tag_local(rdx, n);
 976 }
 977 
 978 void TemplateTable::pop() {
 979   transition(vtos, vtos);
 980   __ addq(rsp, Interpreter::stackElementSize());
 981 }
 982 
 983 void TemplateTable::pop2() {
 984   transition(vtos, vtos);
 985   __ addq(rsp, 2 * Interpreter::stackElementSize());
 986 }
 987 
 988 void TemplateTable::dup() {
 989   transition(vtos, vtos);
 990   __ load_ptr_and_tag(0, rax, rdx);
 991   __ push_ptr(rax, rdx);
 992   // stack: ..., a, a
 993 }
 994 
 995 void TemplateTable::dup_x1() {
 996   transition(vtos, vtos);
 997   // stack: ..., a, b
 998   __ load_ptr_and_tag(0, rax, rdx);  // load b
 999   __ load_ptr_and_tag(1, rcx, rbx);  // load a
1000   __ store_ptr_and_tag(1, rax, rdx); // store b
1001   __ store_ptr_and_tag(0, rcx, rbx); // store a
1002   __ push_ptr(rax, rdx);             // push b
1003   // stack: ..., b, a, b
1004 }
1005 
1006 void TemplateTable::dup_x2() {
1007   transition(vtos, vtos);
1008   // stack: ..., a, b, c
1009   __ load_ptr_and_tag(0, rax, rdx);  // load c
1010   __ load_ptr_and_tag(2, rcx, rbx);  // load a
1011   __ store_ptr_and_tag(2, rax, rdx); // store c in a
1012   __ push_ptr(rax, rdx);             // push c
1013   // stack: ..., c, b, c, c
1014   __ load_ptr_and_tag(2, rax, rdx);  // load b
1015   __ store_ptr_and_tag(2, rcx, rbx); // store a in b
1016   // stack: ..., c, a, c, c
1017   __ store_ptr_and_tag(1, rax, rdx); // store b in c
1018   // stack: ..., c, a, b, c
1019 }
1020 
1021 void TemplateTable::dup2() {
1022   transition(vtos, vtos);
1023   // stack: ..., a, b
1024   __ load_ptr_and_tag(1, rax, rdx);  // load a
1025   __ push_ptr(rax, rdx);             // push a
1026   __ load_ptr_and_tag(1, rax, rdx);  // load b
1027   __ push_ptr(rax, rdx);             // push b
1028   // stack: ..., a, b, a, b
1029 }
1030 
1031 void TemplateTable::dup2_x1() {
1032   transition(vtos, vtos);
1033   // stack: ..., a, b, c
1034   __ load_ptr_and_tag(0, rcx, rbx);  // load c
1035   __ load_ptr_and_tag(1, rax, rdx);  // load b
1036   __ push_ptr(rax, rdx);             // push b
1037   __ push_ptr(rcx, rbx);             // push c
1038   // stack: ..., a, b, c, b, c
1039   __ store_ptr_and_tag(3, rcx, rbx); // store c in b
1040   // stack: ..., a, c, c, b, c
1041   __ load_ptr_and_tag(4, rcx, rbx);  // load a
1042   __ store_ptr_and_tag(2, rcx, rbx); // store a in 2nd c
1043   // stack: ..., a, c, a, b, c
1044   __ store_ptr_and_tag(4, rax, rdx); // store b in a
1045   // stack: ..., b, c, a, b, c
1046 }
1047 
1048 void TemplateTable::dup2_x2() {
1049   transition(vtos, vtos);
1050   // stack: ..., a, b, c, d
1051   __ load_ptr_and_tag(0, rcx, rbx);  // load d
1052   __ load_ptr_and_tag(1, rax, rdx);  // load c
1053   __ push_ptr(rax, rdx);             // push c
1054   __ push_ptr(rcx, rbx);             // push d
1055   // stack: ..., a, b, c, d, c, d
1056   __ load_ptr_and_tag(4, rax, rdx);  // load b
1057   __ store_ptr_and_tag(2, rax, rdx); // store b in d
1058   __ store_ptr_and_tag(4, rcx, rbx); // store d in b
1059   // stack: ..., a, d, c, b, c, d
1060   __ load_ptr_and_tag(5, rcx, rbx);  // load a
1061   __ load_ptr_and_tag(3, rax, rdx);  // load c
1062   __ store_ptr_and_tag(3, rcx, rbx); // store a in c
1063   __ store_ptr_and_tag(5, rax, rdx); // store c in a
1064   // stack: ..., c, d, a, b, c, d
1065 }
1066 
1067 void TemplateTable::swap() {
1068   transition(vtos, vtos);
1069   // stack: ..., a, b
1070   __ load_ptr_and_tag(1, rcx, rbx);  // load a
1071   __ load_ptr_and_tag(0, rax, rdx);  // load b
1072   __ store_ptr_and_tag(0, rcx, rbx); // store a in b
1073   __ store_ptr_and_tag(1, rax, rdx); // store b in a
1074   // stack: ..., b, a
1075 }
1076 
1077 void TemplateTable::iop2(Operation op) {
1078   transition(itos, itos);
1079   switch (op) {
1080   case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
1081   case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
1082   case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
1083   case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
1084   case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
1085   case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
1086   case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break;
1087   case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break;
1088   case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break;
1089   default   : ShouldNotReachHere();
1090   }
1091 }
1092 
1093 void TemplateTable::lop2(Operation op) {
1094   transition(ltos, ltos);
1095   switch (op) {
1096   case add  :                    __ pop_l(rdx); __ addq (rax, rdx); break;
1097   case sub  : __ movq(rdx, rax); __ pop_l(rax); __ subq (rax, rdx); break;
1098   case _and :                    __ pop_l(rdx); __ andq (rax, rdx); break;
1099   case _or  :                    __ pop_l(rdx); __ orq  (rax, rdx); break;
1100   case _xor :                    __ pop_l(rdx); __ xorq (rax, rdx); break;
1101   default : ShouldNotReachHere();
1102   }
1103 }
1104 
1105 void TemplateTable::idiv() {
1106   transition(itos, itos);
1107   __ movl(rcx, rax);
1108   __ pop_i(rax);
1109   // Note: could xor eax and ecx and compare with (-1 ^ min_int). If
1110   //       they are not equal, one could do a normal division (no correction
1111   //       needed), which may speed up this implementation for the common case.
1112   //       (see also JVM spec., p.243 & p.271)
1113   __ corrected_idivl(rcx);
1114 }
1115 
1116 void TemplateTable::irem() {
1117   transition(itos, itos);
1118   __ movl(rcx, rax);
1119   __ pop_i(rax);
1120   // Note: could xor eax and ecx and compare with (-1 ^ min_int). If
1121   //       they are not equal, one could do a normal division (no correction
1122   //       needed), which may speed up this implementation for the common case.
1123   //       (see also JVM spec., p.243 & p.271)
1124   __ corrected_idivl(rcx);
1125   __ movl(rax, rdx);
1126 }
1127 
1128 void TemplateTable::lmul() {
1129   transition(ltos, ltos);
1130   __ pop_l(rdx);
1131   __ imulq(rax, rdx);
1132 }
1133 
1134 void TemplateTable::ldiv() {
1135   transition(ltos, ltos);
1136   __ movq(rcx, rax);
1137   __ pop_l(rax);
1138   // generate explicit div0 check
1139   __ testq(rcx, rcx);
1140   __ jump_cc(Assembler::zero,
1141              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1142   // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
1143   //       they are not equal, one could do a normal division (no correction
1144   //       needed), which may speed up this implementation for the common case.
1145   //       (see also JVM spec., p.243 & p.271)
1146   __ corrected_idivq(rcx); // kills rbx
1147 }
1148 
1149 void TemplateTable::lrem() {
1150   transition(ltos, ltos);
1151   __ movq(rcx, rax);
1152   __ pop_l(rax);
1153   __ testq(rcx, rcx);
1154   __ jump_cc(Assembler::zero,
1155              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1156   // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
1157   //       they are not equal, one could do a normal division (no correction
1158   //       needed), which may speed up this implementation for the common case.
1159   //       (see also JVM spec., p.243 & p.271)
1160   __ corrected_idivq(rcx); // kills rbx
1161   __ movq(rax, rdx);
1162 }
1163 
1164 void TemplateTable::lshl() {
1165   transition(itos, ltos);
1166   __ movl(rcx, rax);                             // get shift count
1167   __ pop_l(rax);                                 // get shift value
1168   __ shlq(rax);
1169 }
1170 
1171 void TemplateTable::lshr() {
1172   transition(itos, ltos);
1173   __ movl(rcx, rax);                             // get shift count
1174   __ pop_l(rax);                                 // get shift value
1175   __ sarq(rax);
1176 }
1177 
1178 void TemplateTable::lushr() {
1179   transition(itos, ltos);
1180   __ movl(rcx, rax);                             // get shift count
1181   __ pop_l(rax);                                 // get shift value
1182   __ shrq(rax);
1183 }
1184 
1185 void TemplateTable::fop2(Operation op) {
1186   transition(ftos, ftos);
1187   switch (op) {
1188   case add:
1189     __ addss(xmm0, at_rsp());
1190     __ addq(rsp, Interpreter::stackElementSize());
1191     break;
1192   case sub:
1193     __ movflt(xmm1, xmm0);
1194     __ pop_f(xmm0);
1195     __ subss(xmm0, xmm1);
1196     break;
1197   case mul:
1198     __ mulss(xmm0, at_rsp());
1199     __ addq(rsp, Interpreter::stackElementSize());
1200     break;
1201   case div:
1202     __ movflt(xmm1, xmm0);
1203     __ pop_f(xmm0);
1204     __ divss(xmm0, xmm1);
1205     break;
1206   case rem:
1207     __ movflt(xmm1, xmm0);
1208     __ pop_f(xmm0);
1209     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
1210     break;
1211   default:
1212     ShouldNotReachHere();
1213     break;
1214   }
1215 }
1216 
1217 void TemplateTable::dop2(Operation op) {
1218   transition(dtos, dtos);
1219   switch (op) {
1220   case add:
1221     __ addsd(xmm0, at_rsp());
1222     __ addq(rsp, 2 * Interpreter::stackElementSize());
1223     break;
1224   case sub:
1225     __ movdbl(xmm1, xmm0);
1226     __ pop_d(xmm0); 
1227     __ subsd(xmm0, xmm1);
1228     break;
1229   case mul:
1230     __ mulsd(xmm0, at_rsp());
1231     __ addq(rsp, 2 * Interpreter::stackElementSize());
1232     break;
1233   case div:
1234     __ movdbl(xmm1, xmm0);
1235     __ pop_d(xmm0);
1236     __ divsd(xmm0, xmm1);
1237     break;
1238   case rem:
1239     __ movdbl(xmm1, xmm0);
1240     __ pop_d(xmm0);
1241     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
1242     break;
1243   default:
1244     ShouldNotReachHere();
1245     break;
1246   }
1247 }
1248 
1249 void TemplateTable::ineg() {
1250   transition(itos, itos);
1251   __ negl(rax);
1252 }
1253 
1254 void TemplateTable::lneg() {
1255   transition(ltos, ltos);
1256   __ negq(rax);
1257 }
1258 
1259 // Note: 'double' and 'long long' have 32-bits alignment on x86.
1260 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
1261   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
1262   // of 128-bits operands for SSE instructions.
1263   jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
1264   // Store the value to a 128-bits operand.
1265   operand[0] = lo;
1266   operand[1] = hi;
1267   return operand;
1268 }
1269 
1270 // Buffer for 128-bits masks used by SSE instructions.
1271 static jlong float_signflip_pool[2*2];
1272 static jlong double_signflip_pool[2*2];
1273 
1274 void TemplateTable::fneg() {
1275   transition(ftos, ftos);
1276   static jlong *float_signflip  = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000); 
1277   __ xorps(xmm0, ExternalAddress((address) float_signflip));
1278 }
1279 
1280 void TemplateTable::dneg() {
1281   transition(dtos, dtos);
1282   static jlong *double_signflip  = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000); 
1283   __ xorpd(xmm0, ExternalAddress((address) double_signflip));
1284 }
1285 
1286 void TemplateTable::iinc() {
1287   transition(vtos, vtos);
1288   __ load_signed_byte(rdx, at_bcp(2)); // get constant
1289   locals_index(rbx);
1290   __ addl(iaddress(rbx), rdx);
1291 }
1292 
1293 void TemplateTable::wide_iinc() {
1294   transition(vtos, vtos);
1295   __ movl(rdx, at_bcp(4)); // get constant
1296   locals_index_wide(rbx);
1297   __ bswapl(rdx); // swap bytes & sign-extend constant
1298   __ sarl(rdx, 16);
1299   __ addl(iaddress(rbx), rdx);
1300   // Note: should probably use only one movl to get both
1301   //       the index and the constant -> fix this
1302 }
1303 
1304 void TemplateTable::convert() {
1305   // Checking
1306 #ifdef ASSERT
1307   {
1308     TosState tos_in  = ilgl;
1309     TosState tos_out = ilgl;
1310     switch (bytecode()) {
1311     case Bytecodes::_i2l: // fall through
1312     case Bytecodes::_i2f: // fall through
1313     case Bytecodes::_i2d: // fall through
1314     case Bytecodes::_i2b: // fall through
1315     case Bytecodes::_i2c: // fall through
1316     case Bytecodes::_i2s: tos_in = itos; break;
1317     case Bytecodes::_l2i: // fall through
1318     case Bytecodes::_l2f: // fall through
1319     case Bytecodes::_l2d: tos_in = ltos; break;
1320     case Bytecodes::_f2i: // fall through
1321     case Bytecodes::_f2l: // fall through
1322     case Bytecodes::_f2d: tos_in = ftos; break;
1323     case Bytecodes::_d2i: // fall through
1324     case Bytecodes::_d2l: // fall through
1325     case Bytecodes::_d2f: tos_in = dtos; break;
1326     default             : ShouldNotReachHere();
1327     }
1328     switch (bytecode()) {
1329     case Bytecodes::_l2i: // fall through
1330     case Bytecodes::_f2i: // fall through
1331     case Bytecodes::_d2i: // fall through
1332     case Bytecodes::_i2b: // fall through
1333     case Bytecodes::_i2c: // fall through
1334     case Bytecodes::_i2s: tos_out = itos; break;
1335     case Bytecodes::_i2l: // fall through
1336     case Bytecodes::_f2l: // fall through
1337     case Bytecodes::_d2l: tos_out = ltos; break;
1338     case Bytecodes::_i2f: // fall through
1339     case Bytecodes::_l2f: // fall through
1340     case Bytecodes::_d2f: tos_out = ftos; break;
1341     case Bytecodes::_i2d: // fall through
1342     case Bytecodes::_l2d: // fall through
1343     case Bytecodes::_f2d: tos_out = dtos; break;
1344     default             : ShouldNotReachHere();
1345     }
1346     transition(tos_in, tos_out);
1347   }
1348 #endif // ASSERT
1349 
1350   static const int64_t is_nan = 0x8000000000000000L;
1351 
1352   // Conversion
1353   switch (bytecode()) {
1354   case Bytecodes::_i2l:
1355     __ movslq(rax, rax);
1356     break;
1357   case Bytecodes::_i2f:
1358     __ cvtsi2ssl(xmm0, rax);
1359     break;
1360   case Bytecodes::_i2d:
1361     __ cvtsi2sdl(xmm0, rax);
1362     break;
1363   case Bytecodes::_i2b:
1364     __ movsbl(rax, rax);
1365     break;
1366   case Bytecodes::_i2c:
1367     __ movzwl(rax, rax);
1368     break;
1369   case Bytecodes::_i2s:
1370     __ movswl(rax, rax);
1371     break;
1372   case Bytecodes::_l2i:
1373     __ movl(rax, rax);
1374     break;
1375   case Bytecodes::_l2f:
1376     __ cvtsi2ssq(xmm0, rax);
1377     break;
1378   case Bytecodes::_l2d:
1379     __ cvtsi2sdq(xmm0, rax);
1380     break;
1381   case Bytecodes::_f2i:
1382   {
1383     Label L;
1384     __ cvttss2sil(rax, xmm0);
1385     __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
1386     __ jcc(Assembler::notEqual, L);
1387     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
1388     __ bind(L);
1389   }
1390     break;
1391   case Bytecodes::_f2l:
1392   {
1393     Label L;
1394     __ cvttss2siq(rax, xmm0);
1395     // NaN or overflow/underflow?
1396     __ cmp64(rax, ExternalAddress((address) &is_nan));
1397     __ jcc(Assembler::notEqual, L);
1398     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
1399     __ bind(L);
1400   }
1401     break;
1402   case Bytecodes::_f2d:
1403     __ cvtss2sd(xmm0, xmm0);
1404     break;
1405   case Bytecodes::_d2i:
1406   {
1407     Label L;
1408     __ cvttsd2sil(rax, xmm0);
1409     __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
1410     __ jcc(Assembler::notEqual, L);
1411     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
1412     __ bind(L);
1413   }
1414     break;
1415   case Bytecodes::_d2l:
1416   {
1417     Label L;
1418     __ cvttsd2siq(rax, xmm0);
1419     // NaN or overflow/underflow?
1420     __ cmp64(rax, ExternalAddress((address) &is_nan));
1421     __ jcc(Assembler::notEqual, L);
1422     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
1423     __ bind(L);
1424   }
1425     break;
1426   case Bytecodes::_d2f:
1427     __ cvtsd2ss(xmm0, xmm0);
1428     break;
1429   default:
1430     ShouldNotReachHere();
1431   }
1432 }
1433 
1434 void TemplateTable::lcmp() {
1435   transition(ltos, itos);
1436   Label done;
1437   __ pop_l(rdx);
1438   __ cmpq(rdx, rax);
1439   __ movl(rax, -1);
1440   __ jccb(Assembler::less, done);
1441   __ setb(Assembler::notEqual, rax);
1442   __ movzbl(rax, rax);
1443   __ bind(done);
1444 }
1445 
1446 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
1447   Label done;
1448   if (is_float) {
1449     // XXX get rid of pop here, use ... reg, mem32
1450     __ pop_f(xmm1);
1451     __ ucomiss(xmm1, xmm0);
1452   } else {
1453     // XXX get rid of pop here, use ... reg, mem64
1454     __ pop_d(xmm1);
1455     __ ucomisd(xmm1, xmm0);
1456   }
1457   if (unordered_result < 0) {
1458     __ movl(rax, -1);
1459     __ jccb(Assembler::parity, done);
1460     __ jccb(Assembler::below, done);
1461     __ setb(Assembler::notEqual, rdx);
1462     __ movzbl(rax, rdx);
1463   } else {
1464     __ movl(rax, 1);
1465     __ jccb(Assembler::parity, done);
1466     __ jccb(Assembler::above, done);
1467     __ movl(rax, 0);
1468     __ jccb(Assembler::equal, done);
1469     __ decrementl(rax);
1470   }
1471   __ bind(done);
1472 }
1473 
1474 void TemplateTable::branch(bool is_jsr, bool is_wide) {
1475   __ get_method(rcx); // rcx holds method
1476   __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
1477                                      // holds bumped taken count
1478 
1479   const ByteSize be_offset = methodOopDesc::backedge_counter_offset() + 
1480                              InvocationCounter::counter_offset();
1481   const ByteSize inv_offset = methodOopDesc::invocation_counter_offset() +
1482                               InvocationCounter::counter_offset();
1483   const int method_offset = frame::interpreter_frame_method_offset * wordSize;
1484 
1485   // Load up edx with the branch displacement
1486   __ movl(rdx, at_bcp(1));
1487   __ bswapl(rdx);
1488 
1489   if (!is_wide) {
1490     __ sarl(rdx, 16);
1491   }
1492   __ movslq(rdx, rdx);
1493 
1494   // Handle all the JSR stuff here, then exit.
1495   // It's much shorter and cleaner than intermingling with the non-JSR
1496   // normal-branch stuff occuring below.
1497   if (is_jsr) {
1498     // Pre-load the next target bytecode into rbx
1499     __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1, 0));
1500 
1501     // compute return address as bci in rax
1502     __ leaq(rax, at_bcp((is_wide ? 5 : 3) -
1503                         in_bytes(constMethodOopDesc::codes_offset())));
1504     __ subq(rax, Address(rcx, methodOopDesc::const_offset()));
1505     // Adjust the bcp in r13 by the displacement in rdx
1506     __ addq(r13, rdx);
1507     // jsr returns atos that is not an oop
1508     __ push_i(rax);
1509     __ dispatch_only(vtos);
1510     return;
1511   }
1512 
1513   // Normal (non-jsr) branch handling
1514 
1515   // Adjust the bcp in r13 by the displacement in rdx  
1516   __ addq(r13, rdx);
1517 
1518   assert(UseLoopCounter || !UseOnStackReplacement, 
1519          "on-stack-replacement requires loop counters");
1520   Label backedge_counter_overflow;
1521   Label profile_method;
1522   Label dispatch;
1523   if (UseLoopCounter) {
1524     // increment backedge counter for backward branches
1525     // rax: MDO
1526     // ebx: MDO bumped taken-count
1527     // rcx: method
1528     // rdx: target offset
1529     // r13: target bcp
1530     // r14: locals pointer
1531     __ testl(rdx, rdx);             // check if forward or backward branch
1532     __ jcc(Assembler::positive, dispatch); // count only if backward branch
1533 
1534     // increment counter 
1535     __ movl(rax, Address(rcx, be_offset));        // load backedge counter
1536     __ incrementl(rax, InvocationCounter::count_increment); // increment
1537                                                             // counter
1538     __ movl(Address(rcx, be_offset), rax);        // store counter
1539 
1540     __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
1541     __ andl(rax, InvocationCounter::count_mask_value); // and the status bits
1542     __ addl(rax, Address(rcx, be_offset));        // add both counters
1543 
1544     if (ProfileInterpreter) {
1545       // Test to see if we should create a method data oop
1546       __ cmp32(rax,
1547                ExternalAddress((address) &InvocationCounter::InterpreterProfileLimit));
1548       __ jcc(Assembler::less, dispatch);
1549 
1550       // if no method data exists, go to profile method
1551       __ test_method_data_pointer(rax, profile_method);
1552 
1553       if (UseOnStackReplacement) {
1554         // check for overflow against ebx which is the MDO taken count
1555         __ cmp32(rbx,
1556                  ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
1557         __ jcc(Assembler::below, dispatch);
1558 
1559         // When ProfileInterpreter is on, the backedge_count comes
1560         // from the methodDataOop, which value does not get reset on
1561         // the call to frequency_counter_overflow().  To avoid
1562         // excessive calls to the overflow routine while the method is
1563         // being compiled, add a second test to make sure the overflow
1564         // function is called only once every overflow_frequency.
1565         const int overflow_frequency = 1024;
1566         __ andl(rbx, overflow_frequency - 1);
1567         __ jcc(Assembler::zero, backedge_counter_overflow);
1568 
1569       }
1570     } else {
1571       if (UseOnStackReplacement) {
1572         // check for overflow against eax, which is the sum of the
1573         // counters
1574         __ cmp32(rax,
1575                  ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
1576         __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
1577 
1578       }
1579     }
1580     __ bind(dispatch);
1581   }
1582 
1583   // Pre-load the next target bytecode into rbx
1584   __ load_unsigned_byte(rbx, Address(r13, 0));
1585 
1586   // continue with the bytecode @ target
1587   // eax: return bci for jsr's, unused otherwise
1588   // ebx: target bytecode
1589   // r13: target bcp
1590   __ dispatch_only(vtos);
1591 
1592   if (UseLoopCounter) {
1593     if (ProfileInterpreter) {
1594       // Out-of-line code to allocate method data oop.
1595       __ bind(profile_method);
1596       __ call_VM(noreg, 
1597                  CAST_FROM_FN_PTR(address, 
1598                                   InterpreterRuntime::profile_method), r13);
1599       __ load_unsigned_byte(rbx, Address(r13, 0));  // restore target bytecode
1600       __ movq(rcx, Address(rbp, method_offset));
1601       __ movq(rcx, Address(rcx, 
1602                            in_bytes(methodOopDesc::method_data_offset())));
1603       __ movq(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize),
1604               rcx);
1605       __ test_method_data_pointer(rcx, dispatch);
1606       // offset non-null mdp by MDO::data_offset() + IR::profile_method()
1607       __ addq(rcx, in_bytes(methodDataOopDesc::data_offset()));
1608       __ addq(rcx, rax);
1609       __ movq(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize),
1610               rcx);
1611       __ jmp(dispatch);
1612     }
1613 
1614     if (UseOnStackReplacement) {
1615       // invocation counter overflow
1616       __ bind(backedge_counter_overflow);
1617       __ negq(rdx);
1618       __ addq(rdx, r13); // branch bcp
1619       // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
1620       __ call_VM(noreg,
1621                  CAST_FROM_FN_PTR(address, 
1622                                   InterpreterRuntime::frequency_counter_overflow),
1623                  rdx);
1624       __ load_unsigned_byte(rbx, Address(r13, 0));  // restore target bytecode
1625 
1626       // rax: osr nmethod (osr ok) or NULL (osr not possible)
1627       // ebx: target bytecode
1628       // rdx: scratch
1629       // r14: locals pointer
1630       // r13: bcp
1631       __ testq(rax, rax);                        // test result
1632       __ jcc(Assembler::zero, dispatch);         // no osr if null
1633       // nmethod may have been invalidated (VM may block upon call_VM return)
1634       __ movl(rcx, Address(rax, nmethod::entry_bci_offset()));
1635       __ cmpl(rcx, InvalidOSREntryBci);
1636       __ jcc(Assembler::equal, dispatch);
1637       
1638       // We have the address of an on stack replacement routine in eax        
1639       // We need to prepare to execute the OSR method. First we must
1640       // migrate the locals and monitors off of the stack.
1641 
1642       __ movq(r13, rax);                             // save the nmethod
1643 
1644       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
1645 
1646       // eax is OSR buffer, move it to expected parameter location
1647       __ movq(j_rarg0, rax);
1648 
1649       // We use j_rarg definitions here so that registers don't conflict as parameter
1650       // registers change across platforms as we are in the midst of a calling
1651       // sequence to the OSR nmethod and we don't want collision. These are NOT parameters.
1652 
1653       const Register retaddr = j_rarg2;
1654       const Register sender_sp = j_rarg1;
1655 
1656       // pop the interpreter frame
1657       __ movq(sender_sp, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
1658       __ leave();                                // remove frame anchor
1659       __ popq(retaddr);                            // get return address
1660       __ movq(rsp, sender_sp);                   // set sp to sender sp
1661       // Ensure compiled code always sees stack at proper alignment
1662       __ andq(rsp, -(StackAlignmentInBytes));
1663 
1664       // unlike x86 we need no specialized return from compiled code
1665       // to the interpreter or the call stub.
1666 
1667       // push the return address
1668       __ pushq(retaddr);
1669 
1670       // and begin the OSR nmethod
1671       __ jmp(Address(r13, nmethod::osr_entry_point_offset()));
1672     }
1673   }
1674 }
1675 
1676 
1677 void TemplateTable::if_0cmp(Condition cc) {
1678   transition(itos, vtos);
1679   // assume branch is more often taken than not (loops use backward branches)
1680   Label not_taken;
1681   __ testl(rax, rax);
1682   __ jcc(j_not(cc), not_taken);
1683   branch(false, false);
1684   __ bind(not_taken);
1685   __ profile_not_taken_branch(rax);
1686 }
1687 
1688 void TemplateTable::if_icmp(Condition cc) {
1689   transition(itos, vtos);
1690   // assume branch is more often taken than not (loops use backward branches)
1691   Label not_taken;
1692   __ pop_i(rdx);
1693   __ cmpl(rdx, rax);
1694   __ jcc(j_not(cc), not_taken);
1695   branch(false, false);
1696   __ bind(not_taken);
1697   __ profile_not_taken_branch(rax);
1698 }
1699 
1700 void TemplateTable::if_nullcmp(Condition cc) {
1701   transition(atos, vtos);
1702   // assume branch is more often taken than not (loops use backward branches)
1703   Label not_taken;
1704   __ testq(rax, rax);
1705   __ jcc(j_not(cc), not_taken);
1706   branch(false, false);
1707   __ bind(not_taken);
1708   __ profile_not_taken_branch(rax);
1709 }
1710 
1711 void TemplateTable::if_acmp(Condition cc) {
1712   transition(atos, vtos);
1713   // assume branch is more often taken than not (loops use backward branches)
1714   Label not_taken;
1715   __ pop_ptr(rdx);
1716   __ cmpq(rdx, rax);
1717   __ jcc(j_not(cc), not_taken);
1718   branch(false, false);
1719   __ bind(not_taken);
1720   __ profile_not_taken_branch(rax);
1721 }
1722 
1723 void TemplateTable::ret() {
1724   transition(vtos, vtos);
1725   locals_index(rbx);
1726   __ movq(rbx, aaddress(rbx)); // get return bci, compute return bcp
1727   __ profile_ret(rbx, rcx);
1728   __ get_method(rax);
1729   __ movq(r13, Address(rax, methodOopDesc::const_offset()));
1730   __ leaq(r13, Address(r13, rbx, Address::times_1,
1731                        constMethodOopDesc::codes_offset()));
1732   __ dispatch_next(vtos);
1733 }
1734 
1735 void TemplateTable::wide_ret() {
1736   transition(vtos, vtos);
1737   locals_index_wide(rbx);
1738   __ movq(rbx, aaddress(rbx)); // get return bci, compute return bcp
1739   __ profile_ret(rbx, rcx);
1740   __ get_method(rax);
1741   __ movq(r13, Address(rax, methodOopDesc::const_offset()));
1742   __ leaq(r13, Address(r13, rbx, Address::times_1, constMethodOopDesc::codes_offset()));
1743   __ dispatch_next(vtos);
1744 }
1745 
1746 void TemplateTable::tableswitch() {
1747   Label default_case, continue_execution;
1748   transition(itos, vtos);
1749   // align r13
1750   __ leaq(rbx, at_bcp(BytesPerInt));
1751   __ andq(rbx, -BytesPerInt);
1752   // load lo & hi
1753   __ movl(rcx, Address(rbx, BytesPerInt));
1754   __ movl(rdx, Address(rbx, 2 * BytesPerInt));
1755   __ bswapl(rcx);
1756   __ bswapl(rdx);
1757   // check against lo & hi
1758   __ cmpl(rax, rcx);
1759   __ jcc(Assembler::less, default_case);
1760   __ cmpl(rax, rdx);
1761   __ jcc(Assembler::greater, default_case);
1762   // lookup dispatch offset
1763   __ subl(rax, rcx);
1764   __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt));
1765   __ profile_switch_case(rax, rbx, rcx);
1766   // continue execution
1767   __ bind(continue_execution);
1768   __ bswapl(rdx);
1769   __ movslq(rdx, rdx);
1770   __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1));
1771   __ addq(r13, rdx);
1772   __ dispatch_only(vtos);
1773   // handle default
1774   __ bind(default_case);
1775   __ profile_switch_default(rax);
1776   __ movl(rdx, Address(rbx, 0));
1777   __ jmp(continue_execution);
1778 }
1779 
1780 void TemplateTable::lookupswitch() {
1781   transition(itos, itos);
1782   __ stop("lookupswitch bytecode should have been rewritten");
1783 }
1784 
1785 void TemplateTable::fast_linearswitch() {
1786   transition(itos, vtos);
1787   Label loop_entry, loop, found, continue_execution;  
1788   // bswap rax so we can avoid bswapping the table entries
1789   __ bswapl(rax);
1790   // align r13
1791   __ leaq(rbx, at_bcp(BytesPerInt)); // btw: should be able to get rid of
1792                                      // this instruction (change offsets
1793                                      // below)
1794   __ andq(rbx, -BytesPerInt);
1795   // set counter
1796   __ movl(rcx, Address(rbx, BytesPerInt));  
1797   __ bswapl(rcx);
1798   __ jmpb(loop_entry);
1799   // table search
1800   __ bind(loop);
1801   __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * BytesPerInt));
1802   __ jcc(Assembler::equal, found);
1803   __ bind(loop_entry);
1804   __ decrementl(rcx);
1805   __ jcc(Assembler::greaterEqual, loop);
1806   // default case
1807   __ profile_switch_default(rax);
1808   __ movl(rdx, Address(rbx, 0));
1809   __ jmp(continue_execution);
1810   // entry found -> get offset
1811   __ bind(found);
1812   __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * BytesPerInt));
1813   __ profile_switch_case(rcx, rax, rbx);
1814   // continue execution
1815   __ bind(continue_execution);  
1816   __ bswapl(rdx);
1817   __ movslq(rdx, rdx);
1818   __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1));
1819   __ addq(r13, rdx);
1820   __ dispatch_only(vtos);
1821 }
1822 
1823 void TemplateTable::fast_binaryswitch() {
1824   transition(itos, vtos);
1825   // Implementation using the following core algorithm:
1826   //
1827   // int binary_search(int key, LookupswitchPair* array, int n) {
1828   //   // Binary search according to "Methodik des Programmierens" by
1829   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
1830   //   int i = 0;
1831   //   int j = n;
1832   //   while (i+1 < j) {
1833   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
1834   //     // with      Q: for all i: 0 <= i < n: key < a[i]
1835   //     // where a stands for the array and assuming that the (inexisting)
1836   //     // element a[n] is infinitely big.
1837   //     int h = (i + j) >> 1;
1838   //     // i < h < j
1839   //     if (key < array[h].fast_match()) {
1840   //       j = h;
1841   //     } else {
1842   //       i = h;
1843   //     }
1844   //   }
1845   //   // R: a[i] <= key < a[i+1] or Q
1846   //   // (i.e., if key is within array, i is the correct index)
1847   //   return i;
1848   // }
1849 
1850   // Register allocation
1851   const Register key   = rax; // already set (tosca)
1852   const Register array = rbx;
1853   const Register i     = rcx;
1854   const Register j     = rdx;
1855   const Register h     = rdi;
1856   const Register temp  = rsi;
1857 
1858   // Find array start
1859   __ leaq(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
1860                                            // get rid of this
1861                                            // instruction (change
1862                                            // offsets below)
1863   __ andq(array, -BytesPerInt);
1864 
1865   // Initialize i & j
1866   __ xorl(i, i);                            // i = 0;
1867   __ movl(j, Address(array, -BytesPerInt)); // j = length(array);    
1868 
1869   // Convert j into native byteordering  
1870   __ bswapl(j);
1871 
1872   // And start
1873   Label entry;
1874   __ jmp(entry);
1875 
1876   // binary search loop
1877   { 
1878     Label loop;
1879     __ bind(loop);
1880     // int h = (i + j) >> 1;
1881     __ leal(h, Address(i, j, Address::times_1)); // h = i + j;
1882     __ sarl(h, 1);                               // h = (i + j) >> 1;
1883     // if (key < array[h].fast_match()) {
1884     //   j = h;
1885     // } else {
1886     //   i = h;
1887     // }
1888     // Convert array[h].match to native byte-ordering before compare
1889     __ movl(temp, Address(array, h, Address::times_8));
1890     __ bswapl(temp);
1891     __ cmpl(key, temp);
1892     // j = h if (key <  array[h].fast_match())
1893     __ cmovl(Assembler::less, j, h);
1894     // i = h if (key >= array[h].fast_match())
1895     __ cmovl(Assembler::greaterEqual, i, h);
1896     // while (i+1 < j)
1897     __ bind(entry);
1898     __ leal(h, Address(i, 1)); // i+1
1899     __ cmpl(h, j);             // i+1 < j
1900     __ jcc(Assembler::less, loop);
1901   }
1902 
1903   // end of binary search, result index is i (must check again!)
1904   Label default_case;
1905   // Convert array[i].match to native byte-ordering before compare
1906   __ movl(temp, Address(array, i, Address::times_8));
1907   __ bswapl(temp);
1908   __ cmpl(key, temp);
1909   __ jcc(Assembler::notEqual, default_case);
1910 
1911   // entry found -> j = offset
1912   __ movl(j , Address(array, i, Address::times_8, BytesPerInt));
1913   __ profile_switch_case(i, key, array);
1914   __ bswapl(j);
1915   __ movslq(j, j);
1916   __ load_unsigned_byte(rbx, Address(r13, j, Address::times_1));
1917   __ addq(r13, j);
1918   __ dispatch_only(vtos);
1919 
1920   // default case -> j = default offset
1921   __ bind(default_case);
1922   __ profile_switch_default(i);
1923   __ movl(j, Address(array, -2 * BytesPerInt));
1924   __ bswapl(j);
1925   __ movslq(j, j);
1926   __ load_unsigned_byte(rbx, Address(r13, j, Address::times_1));
1927   __ addq(r13, j);
1928   __ dispatch_only(vtos);
1929 }
1930 
1931 
1932 void TemplateTable::_return(TosState state) {
1933   transition(state, state);
1934   assert(_desc->calls_vm(), 
1935          "inconsistent calls_vm information"); // call in remove_activation
1936 
1937   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
1938     assert(state == vtos, "only valid state");
1939     __ movq(c_rarg1, aaddress(0));
1940     __ movq(rdi, Address(c_rarg1, oopDesc::klass_offset_in_bytes()));
1941     __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
1942     __ testl(rdi, JVM_ACC_HAS_FINALIZER);
1943     Label skip_register_finalizer;
1944     __ jcc(Assembler::zero, skip_register_finalizer);
1945 
1946     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
1947 
1948     __ bind(skip_register_finalizer);
1949   }
1950 
1951   __ remove_activation(state, r13);
1952   __ jmp(r13);
1953 }
1954 
1955 // ----------------------------------------------------------------------------
1956 // Volatile variables demand their effects be made known to all CPU's
1957 // in order.  Store buffers on most chips allow reads & writes to
1958 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
1959 // without some kind of memory barrier (i.e., it's not sufficient that
1960 // the interpreter does not reorder volatile references, the hardware
1961 // also must not reorder them).
1962 // 
1963 // According to the new Java Memory Model (JMM):
1964 // (1) All volatiles are serialized wrt to each other.  ALSO reads &
1965 //     writes act as aquire & release, so:
1966 // (2) A read cannot let unrelated NON-volatile memory refs that
1967 //     happen after the read float up to before the read.  It's OK for
1968 //     non-volatile memory refs that happen before the volatile read to
1969 //     float down below it.
1970 // (3) Similar a volatile write cannot let unrelated NON-volatile
1971 //     memory refs that happen BEFORE the write float down to after the
1972 //     write.  It's OK for non-volatile memory refs that happen after the
1973 //     volatile write to float up before it.
1974 //
1975 // We only put in barriers around volatile refs (they are expensive),
1976 // not _between_ memory refs (that would require us to track the
1977 // flavor of the previous memory refs).  Requirements (2) and (3)
1978 // require some barriers before volatile stores and after volatile
1979 // loads.  These nearly cover requirement (1) but miss the
1980 // volatile-store-volatile-load case.  This final case is placed after
1981 // volatile-stores although it could just as well go before
1982 // volatile-loads.
1983 void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits 
1984                                      order_constraint) {
1985   // Helper function to insert a is-volatile test and memory barrier
1986   if (os::is_MP()) { // Not needed on single CPU
1987     __ membar(order_constraint);
1988   }
1989 }
1990 
1991 void TemplateTable::resolve_cache_and_index(int byte_no,
1992                                             Register Rcache,
1993                                             Register index) {
1994   assert(byte_no == 1 || byte_no == 2, "byte_no out of range");
1995 
1996   const Register temp = rbx;
1997   assert_different_registers(Rcache, index, temp);
1998 
1999   const int shift_count = (1 + byte_no) * BitsPerByte;
2000   Label resolved;
2001   __ get_cache_and_index_at_bcp(Rcache, index, 1);
2002   __ movl(temp, Address(Rcache, 
2003                         index, Address::times_8, 
2004                         constantPoolCacheOopDesc::base_offset() + 
2005                         ConstantPoolCacheEntry::indices_offset()));
2006   __ shrl(temp, shift_count);
2007   // have we resolved this bytecode? 
2008   __ andl(temp, 0xFF);
2009   __ cmpl(temp, (int) bytecode());
2010   __ jcc(Assembler::equal, resolved);
2011 
2012   // resolve first time through
2013   address entry;
2014   switch (bytecode()) {
2015   case Bytecodes::_getstatic:
2016   case Bytecodes::_putstatic:
2017   case Bytecodes::_getfield:
2018   case Bytecodes::_putfield:
2019     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
2020     break;
2021   case Bytecodes::_invokevirtual:
2022   case Bytecodes::_invokespecial:
2023   case Bytecodes::_invokestatic:
2024   case Bytecodes::_invokeinterface: 
2025     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
2026     break;
2027   default:
2028     ShouldNotReachHere();
2029     break;
2030   }
2031   __ movl(temp, (int) bytecode());
2032   __ call_VM(noreg, entry, temp);
2033 
2034   // Update registers with resolved info
2035   __ get_cache_and_index_at_bcp(Rcache, index, 1);
2036   __ bind(resolved);
2037 }
2038 
2039 // The Rcache and index registers must be set before call
2040 void TemplateTable::load_field_cp_cache_entry(Register obj,
2041                                               Register cache,
2042                                               Register index,
2043                                               Register off,
2044                                               Register flags,
2045                                               bool is_static = false) {
2046   assert_different_registers(cache, index, flags, off);
2047 
2048   ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
2049   // Field offset
2050   __ movq(off, Address(cache, index, Address::times_8, 
2051                        in_bytes(cp_base_offset + 
2052                                 ConstantPoolCacheEntry::f2_offset())));
2053   // Flags    
2054   __ movl(flags, Address(cache, index, Address::times_8,
2055                          in_bytes(cp_base_offset + 
2056                                   ConstantPoolCacheEntry::flags_offset())));
2057 
2058   // klass overwrite register
2059   if (is_static) {
2060     __ movq(obj, Address(cache, index, Address::times_8,
2061                          in_bytes(cp_base_offset + 
2062                                   ConstantPoolCacheEntry::f1_offset())));
2063   }
2064 }
2065 
2066 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
2067                                                Register method,
2068                                                Register itable_index,
2069                                                Register flags,
2070                                                bool is_invokevirtual,
2071                                                bool is_invokevfinal /*unused*/) {
2072   // setup registers
2073   const Register cache = rcx;
2074   const Register index = rdx;
2075   assert_different_registers(method, flags);
2076   assert_different_registers(method, cache, index);
2077   assert_different_registers(itable_index, flags);
2078   assert_different_registers(itable_index, cache, index);
2079   // determine constant pool cache field offsets
2080   const int method_offset = in_bytes(
2081     constantPoolCacheOopDesc::base_offset() +
2082       (is_invokevirtual
2083        ? ConstantPoolCacheEntry::f2_offset()
2084        : ConstantPoolCacheEntry::f1_offset()));
2085   const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
2086                                     ConstantPoolCacheEntry::flags_offset());
2087   // access constant pool cache fields
2088   const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
2089                                     ConstantPoolCacheEntry::f2_offset());
2090 
2091   resolve_cache_and_index(byte_no, cache, index);
2092 
2093   assert(wordSize == 8, "adjust code below");
2094   __ movq(method, Address(cache, index, Address::times_8, method_offset));
2095   if (itable_index != noreg) {
2096     __ movq(itable_index, 
2097             Address(cache, index, Address::times_8, index_offset));
2098   }
2099   __ movl(flags , Address(cache, index, Address::times_8, flags_offset));
2100 }
2101 
2102 
2103 // The registers cache and index expected to be set before call.
2104 // Correct values of the cache and index registers are preserved.
2105 void TemplateTable::jvmti_post_field_access(Register cache, Register index,
2106                                             bool is_static, bool has_tos) {
2107   // do the JVMTI work here to avoid disturbing the register state below
2108   // We use c_rarg registers here because we want to use the register used in
2109   // the call to the VM
2110   if (JvmtiExport::can_post_field_access()) {
2111     // Check to see if a field access watch has been set before we
2112     // take the time to call into the VM.
2113     Label L1;
2114     assert_different_registers(cache, index, rax);
2115     __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2116     __ testl(rax, rax);
2117     __ jcc(Assembler::zero, L1);
2118 
2119     __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
2120 
2121     // cache entry pointer
2122     __ addq(c_rarg2, in_bytes(constantPoolCacheOopDesc::base_offset()));
2123     __ shll(c_rarg3, LogBytesPerWord);
2124     __ addq(c_rarg2, c_rarg3);
2125     if (is_static) {
2126       __ xorl(c_rarg1, c_rarg1); // NULL object reference
2127     } else {
2128       __ movq(c_rarg1, at_tos()); // get object pointer without popping it
2129       __ verify_oop(c_rarg1);
2130     }
2131     // c_rarg1: object pointer or NULL
2132     // c_rarg2: cache entry pointer
2133     // c_rarg3: jvalue object on the stack
2134     __ call_VM(noreg, CAST_FROM_FN_PTR(address, 
2135                                        InterpreterRuntime::post_field_access),
2136                c_rarg1, c_rarg2, c_rarg3);
2137     __ get_cache_and_index_at_bcp(cache, index, 1);
2138     __ bind(L1);
2139   } 
2140 }
2141 
2142 void TemplateTable::pop_and_check_object(Register r) {
2143   __ pop_ptr(r);
2144   __ null_check(r);  // for field access must check obj.
2145   __ verify_oop(r);
2146 }
2147 
2148 void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
2149   transition(vtos, vtos);
2150 
2151   const Register cache = rcx;
2152   const Register index = rdx;
2153   const Register obj   = c_rarg3;
2154   const Register off   = rbx;
2155   const Register flags = rax;
2156   const Register bc = c_rarg3; // uses same reg as obj, so don't mix them
2157 
2158   resolve_cache_and_index(byte_no, cache, index);
2159   jvmti_post_field_access(cache, index, is_static, false);
2160   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2161 
2162   if (!is_static) {
2163     // obj is on the stack
2164     pop_and_check_object(obj);   
2165   }
2166 
2167   const Address field(obj, off, Address::times_1);
2168 
2169   Label Done, notByte, notInt, notShort, notChar, 
2170               notLong, notFloat, notObj, notDouble;
2171 
2172   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
2173   assert(btos == 0, "change code, btos != 0");
2174 
2175   __ andl(flags, 0x0F);
2176   __ jcc(Assembler::notZero, notByte);
2177   // btos
2178   __ load_signed_byte(rax, field);
2179   __ push(btos);
2180   // Rewrite bytecode to be faster
2181   if (!is_static) {
2182     patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
2183   }
2184   __ jmp(Done);
2185 
2186   __ bind(notByte);
2187   __ cmpl(flags, atos);
2188   __ jcc(Assembler::notEqual, notObj);
2189   // atos
2190   __ movq(rax, field);
2191   __ push(atos);
2192   if (!is_static) {
2193     patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx);
2194   }
2195   __ jmp(Done);
2196 
2197   __ bind(notObj);
2198   __ cmpl(flags, itos);
2199   __ jcc(Assembler::notEqual, notInt);
2200   // itos
2201   __ movl(rax, field);
2202   __ push(itos);
2203   // Rewrite bytecode to be faster
2204   if (!is_static) {
2205     patch_bytecode(Bytecodes::_fast_igetfield, bc, rbx);
2206   }
2207   __ jmp(Done);
2208 
2209   __ bind(notInt);
2210   __ cmpl(flags, ctos);
2211   __ jcc(Assembler::notEqual, notChar);
2212   // ctos
2213   __ load_unsigned_word(rax, field);
2214   __ push(ctos);
2215   // Rewrite bytecode to be faster
2216   if (!is_static) {
2217     patch_bytecode(Bytecodes::_fast_cgetfield, bc, rbx);
2218   }
2219   __ jmp(Done);
2220 
2221   __ bind(notChar);
2222   __ cmpl(flags, stos);
2223   __ jcc(Assembler::notEqual, notShort);
2224   // stos
2225   __ load_signed_word(rax, field);
2226   __ push(stos);
2227   // Rewrite bytecode to be faster
2228   if (!is_static) {
2229     patch_bytecode(Bytecodes::_fast_sgetfield, bc, rbx);
2230   }
2231   __ jmp(Done);
2232 
2233   __ bind(notShort);
2234   __ cmpl(flags, ltos);
2235   __ jcc(Assembler::notEqual, notLong);
2236   // ltos
2237   __ movq(rax, field);
2238   __ push(ltos);
2239   // Rewrite bytecode to be faster
2240   if (!is_static) {
2241     patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx);
2242   }
2243   __ jmp(Done);
2244 
2245   __ bind(notLong);
2246   __ cmpl(flags, ftos);
2247   __ jcc(Assembler::notEqual, notFloat);
2248   // ftos
2249   __ movflt(xmm0, field);
2250   __ push(ftos);
2251   // Rewrite bytecode to be faster
2252   if (!is_static) {
2253     patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
2254   }
2255   __ jmp(Done);
2256 
2257   __ bind(notFloat);
2258 #ifdef ASSERT
2259   __ cmpl(flags, dtos);
2260   __ jcc(Assembler::notEqual, notDouble);
2261 #endif
2262   // dtos
2263   __ movdbl(xmm0, field);
2264   __ push(dtos);
2265   // Rewrite bytecode to be faster
2266   if (!is_static) {
2267     patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
2268   }
2269 #ifdef ASSERT
2270   __ jmp(Done);
2271 
2272   __ bind(notDouble);
2273   __ stop("Bad state");
2274 #endif
2275 
2276   __ bind(Done);
2277   // [jk] not needed currently
2278   // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadLoad |
2279   //                                              Assembler::LoadStore));
2280 }
2281 
2282 
2283 void TemplateTable::getfield(int byte_no) {
2284   getfield_or_static(byte_no, false);
2285 }
2286 
2287 void TemplateTable::getstatic(int byte_no) {
2288   getfield_or_static(byte_no, true);
2289 }
2290 
2291 // The registers cache and index expected to be set before call.
2292 // The function may destroy various registers, just not the cache and index registers.
2293 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
2294   transition(vtos, vtos);
2295 
2296   ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
2297 
2298   if (JvmtiExport::can_post_field_modification()) {
2299     // Check to see if a field modification watch has been set before
2300     // we take the time to call into the VM.
2301     Label L1;
2302     assert_different_registers(cache, index, rax);
2303     __ mov32(rax, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2304     __ testl(rax, rax);
2305     __ jcc(Assembler::zero, L1);
2306 
2307     __ get_cache_and_index_at_bcp(c_rarg2, rscratch1, 1);
2308 
2309     if (is_static) {
2310       // Life is simple.  Null out the object pointer.
2311       __ xorl(c_rarg1, c_rarg1);
2312     } else {
2313       // Life is harder. The stack holds the value on top, followed by
2314       // the object.  We don't know the size of the value, though; it
2315       // could be one or two words depending on its type. As a result,
2316       // we must find the type to determine where the object is.
2317       __ movl(c_rarg3, Address(c_rarg2, rscratch1, 
2318                            Address::times_8, 
2319                            in_bytes(cp_base_offset +
2320                                      ConstantPoolCacheEntry::flags_offset())));
2321       __ shrl(c_rarg3, ConstantPoolCacheEntry::tosBits);
2322       // Make sure we don't need to mask rcx for tosBits after the
2323       // above shift
2324       ConstantPoolCacheEntry::verify_tosBits();
2325       __ movq(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
2326       __ cmpl(c_rarg3, ltos);
2327       __ cmovq(Assembler::equal,
2328                c_rarg1, at_tos_p2()); // ltos (two word jvalue)
2329       __ cmpl(c_rarg3, dtos);
2330       __ cmovq(Assembler::equal,
2331                c_rarg1, at_tos_p2()); // dtos (two word jvalue)
2332     }
2333     // cache entry pointer
2334     __ addq(c_rarg2, in_bytes(cp_base_offset));
2335     __ shll(rscratch1, LogBytesPerWord);
2336     __ addq(c_rarg2, rscratch1);
2337     // object (tos)
2338     __ movq(c_rarg3, rsp);
2339     // c_rarg1: object pointer set up above (NULL if static)
2340     // c_rarg2: cache entry pointer
2341     // c_rarg3: jvalue object on the stack
2342     __ call_VM(noreg, 
2343                CAST_FROM_FN_PTR(address, 
2344                                 InterpreterRuntime::post_field_modification),
2345                c_rarg1, c_rarg2, c_rarg3);
2346     __ get_cache_and_index_at_bcp(cache, index, 1);
2347     __ bind(L1);
2348   }
2349 }
2350 
2351 void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
2352   transition(vtos, vtos);
2353 
2354   const Register cache = rcx;
2355   const Register index = rdx;
2356   const Register obj   = rcx;
2357   const Register off   = rbx;
2358   const Register flags = rax;
2359   const Register bc    = c_rarg3;
2360 
2361   resolve_cache_and_index(byte_no, cache, index);
2362   jvmti_post_field_mod(cache, index, is_static);
2363   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2364 
2365   // [jk] not needed currently
2366   // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
2367   //                                              Assembler::StoreStore));
2368 
2369   Label notVolatile, Done;
2370   __ movl(rdx, flags);
2371   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
2372   __ andl(rdx, 0x1);
2373 
2374   // field address
2375   const Address field(obj, off, Address::times_1);
2376 
2377   Label notByte, notInt, notShort, notChar, 
2378         notLong, notFloat, notObj, notDouble;
2379 
2380   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
2381 
2382   assert(btos == 0, "change code, btos != 0");
2383   __ andl(flags, 0x0f);
2384   __ jcc(Assembler::notZero, notByte);
2385   // btos
2386   __ pop(btos);
2387   if (!is_static) pop_and_check_object(obj);
2388   __ movb(field, rax);
2389   if (!is_static) {
2390     patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx);
2391   }
2392   __ jmp(Done);
2393 
2394   __ bind(notByte);
2395   __ cmpl(flags, atos);
2396   __ jcc(Assembler::notEqual, notObj);
2397   // atos
2398   __ pop(atos);
2399   if (!is_static) pop_and_check_object(obj);
2400   __ movq(field, rax);
2401   __ store_check(obj, field); // Need to mark card
2402   if (!is_static) {
2403     patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx);
2404   }
2405   __ jmp(Done);
2406 
2407   __ bind(notObj);
2408   __ cmpl(flags, itos);
2409   __ jcc(Assembler::notEqual, notInt);
2410   // itos
2411   __ pop(itos);
2412   if (!is_static) pop_and_check_object(obj);
2413   __ movl(field, rax);
2414   if (!is_static) {
2415     patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx);
2416   }
2417   __ jmp(Done);
2418 
2419   __ bind(notInt);
2420   __ cmpl(flags, ctos);
2421   __ jcc(Assembler::notEqual, notChar);
2422   // ctos
2423   __ pop(ctos);
2424   if (!is_static) pop_and_check_object(obj);
2425   __ movw(field, rax);
2426   if (!is_static) {
2427     patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx);
2428   }
2429   __ jmp(Done);
2430 
2431   __ bind(notChar);
2432   __ cmpl(flags, stos);
2433   __ jcc(Assembler::notEqual, notShort);
2434   // stos
2435   __ pop(stos);
2436   if (!is_static) pop_and_check_object(obj);
2437   __ movw(field, rax);
2438   if (!is_static) {
2439     patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx);
2440   }
2441   __ jmp(Done);
2442 
2443   __ bind(notShort);
2444   __ cmpl(flags, ltos);
2445   __ jcc(Assembler::notEqual, notLong);
2446   // ltos
2447   __ pop(ltos);
2448   if (!is_static) pop_and_check_object(obj);
2449   __ movq(field, rax);
2450   if (!is_static) {
2451     patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx);
2452   }
2453   __ jmp(Done);
2454 
2455   __ bind(notLong);
2456   __ cmpl(flags, ftos);
2457   __ jcc(Assembler::notEqual, notFloat);
2458   // ftos
2459   __ pop(ftos);
2460   if (!is_static) pop_and_check_object(obj);
2461   __ movflt(field, xmm0);
2462   if (!is_static) {
2463     patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx);
2464   }
2465   __ jmp(Done);
2466 
2467   __ bind(notFloat);
2468 #ifdef ASSERT
2469   __ cmpl(flags, dtos);
2470   __ jcc(Assembler::notEqual, notDouble);
2471 #endif
2472   // dtos
2473   __ pop(dtos);
2474   if (!is_static) pop_and_check_object(obj);
2475   __ movdbl(field, xmm0);
2476   if (!is_static) {
2477     patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx);
2478   }
2479 
2480 #ifdef ASSERT
2481   __ jmp(Done);
2482 
2483   __ bind(notDouble);
2484   __ stop("Bad state");
2485 #endif
2486 
2487   __ bind(Done);
2488   // Check for volatile store
2489   __ testl(rdx, rdx);
2490   __ jcc(Assembler::zero, notVolatile);
2491   volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
2492                                                Assembler::StoreStore));
2493 
2494   __ bind(notVolatile);
2495 }
2496 
2497 void TemplateTable::putfield(int byte_no) {
2498   putfield_or_static(byte_no, false);
2499 }
2500 
2501 void TemplateTable::putstatic(int byte_no) {
2502   putfield_or_static(byte_no, true);
2503 }
2504 
2505 void TemplateTable::jvmti_post_fast_field_mod() {
2506   if (JvmtiExport::can_post_field_modification()) {
2507     // Check to see if a field modification watch has been set before
2508     // we take the time to call into the VM.
2509     Label L2;
2510     __ mov32(c_rarg3, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2511     __ testl(c_rarg3, c_rarg3);
2512     __ jcc(Assembler::zero, L2);
2513     __ pop_ptr(rbx);                  // copy the object pointer from tos
2514     __ verify_oop(rbx);
2515     __ push_ptr(rbx);                 // put the object pointer back on tos
2516     __ subq(rsp, sizeof(jvalue));  // add space for a jvalue object
2517     __ movq(c_rarg3, rsp);
2518     const Address field(c_rarg3, 0);
2519 
2520     switch (bytecode()) {          // load values into the jvalue object
2521     case Bytecodes::_fast_aputfield: // fall through
2522     case Bytecodes::_fast_lputfield: __ movq(field, rax); break;
2523     case Bytecodes::_fast_iputfield: __ movl(field, rax); break;
2524     case Bytecodes::_fast_bputfield: __ movb(field, rax); break;
2525     case Bytecodes::_fast_sputfield: // fall through
2526     case Bytecodes::_fast_cputfield: __ movw(field, rax); break;
2527     case Bytecodes::_fast_fputfield: __ movflt(field, xmm0); break;
2528     case Bytecodes::_fast_dputfield: __ movdbl(field, xmm0); break;
2529     default:
2530       ShouldNotReachHere();
2531     }
2532 
2533     // Save rax because call_VM() will clobber it, then use it for
2534     // JVMTI purposes
2535     __ pushq(rax);
2536     // access constant pool cache entry
2537     __ get_cache_entry_pointer_at_bcp(c_rarg2, rax, 1);
2538     __ verify_oop(rbx);
2539     // rbx: object pointer copied above
2540     // c_rarg2: cache entry pointer
2541     // c_rarg3: jvalue object on the stack
2542     __ call_VM(noreg,
2543                CAST_FROM_FN_PTR(address, 
2544                                 InterpreterRuntime::post_field_modification),
2545                rbx, c_rarg2, c_rarg3);
2546     __ popq(rax);     // restore lower value   
2547     __ addq(rsp, sizeof(jvalue));  // release jvalue object space
2548     __ bind(L2);
2549   }
2550 }
2551 
2552 void TemplateTable::fast_storefield(TosState state) {
2553   transition(state, vtos);
2554 
2555   ByteSize base = constantPoolCacheOopDesc::base_offset();
2556 
2557   jvmti_post_fast_field_mod();
2558 
2559   // access constant pool cache
2560   __ get_cache_and_index_at_bcp(rcx, rbx, 1);
2561 
2562   // test for volatile with rdx
2563   __ movl(rdx, Address(rcx, rbx, Address::times_8, 
2564                        in_bytes(base +
2565                                 ConstantPoolCacheEntry::flags_offset())));
2566 
2567   // replace index with field offset from cache entry
2568   __ movq(rbx, Address(rcx, rbx, Address::times_8,
2569                        in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
2570 
2571   // [jk] not needed currently
2572   // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
2573   //                                              Assembler::StoreStore));
2574 
2575   Label notVolatile;
2576   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
2577   __ andl(rdx, 0x1);
2578 
2579   // Get object from stack
2580   pop_and_check_object(rcx);
2581 
2582   // field address
2583   const Address field(rcx, rbx, Address::times_1);
2584 
2585   // access field
2586   switch (bytecode()) {
2587   case Bytecodes::_fast_aputfield: 
2588     __ movq(field, rax); 
2589     __ store_check(rcx, field);
2590     break;
2591   case Bytecodes::_fast_lputfield:
2592     __ movq(field, rax);
2593     break;
2594   case Bytecodes::_fast_iputfield:
2595     __ movl(field, rax);
2596     break;
2597   case Bytecodes::_fast_bputfield:
2598     __ movb(field, rax);
2599     break;
2600   case Bytecodes::_fast_sputfield:
2601     // fall through
2602   case Bytecodes::_fast_cputfield:
2603     __ movw(field, rax);
2604     break;
2605   case Bytecodes::_fast_fputfield:
2606     __ movflt(field, xmm0);
2607     break;
2608   case Bytecodes::_fast_dputfield:
2609     __ movdbl(field, xmm0); 
2610     break;
2611   default:
2612     ShouldNotReachHere();
2613   }
2614 
2615   // Check for volatile store
2616   __ testl(rdx, rdx);
2617   __ jcc(Assembler::zero, notVolatile);
2618   volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
2619                                                Assembler::StoreStore));
2620   __ bind(notVolatile);
2621 }
2622 
2623 
2624 void TemplateTable::fast_accessfield(TosState state) {
2625   transition(atos, state);
2626 
2627   // Do the JVMTI work here to avoid disturbing the register state below
2628   if (JvmtiExport::can_post_field_access()) {
2629     // Check to see if a field access watch has been set before we
2630     // take the time to call into the VM.
2631     Label L1;
2632     __ mov32(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2633     __ testl(rcx, rcx);
2634     __ jcc(Assembler::zero, L1);
2635     // access constant pool cache entry
2636     __ get_cache_entry_pointer_at_bcp(c_rarg2, rcx, 1);
2637     __ movq(r12, rax);  // save object pointer before call_VM() clobbers it
2638     __ verify_oop(rax);
2639     __ movq(c_rarg1, rax);
2640     // c_rarg1: object pointer copied above
2641     // c_rarg2: cache entry pointer
2642     __ call_VM(noreg, 
2643                CAST_FROM_FN_PTR(address, 
2644                                 InterpreterRuntime::post_field_access),
2645                c_rarg1, c_rarg2);
2646     __ movq(rax, r12); // restore object pointer
2647     __ bind(L1);
2648   }
2649 
2650   // access constant pool cache
2651   __ get_cache_and_index_at_bcp(rcx, rbx, 1);
2652   // replace index with field offset from cache entry
2653   // [jk] not needed currently
2654   // if (os::is_MP()) {
2655   //   __ movl(rdx, Address(rcx, rbx, Address::times_8, 
2656   //                        in_bytes(constantPoolCacheOopDesc::base_offset() +
2657   //                                 ConstantPoolCacheEntry::flags_offset())));
2658   //   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
2659   //   __ andl(rdx, 0x1);
2660   // }
2661   __ movq(rbx, Address(rcx, rbx, Address::times_8,
2662                        in_bytes(constantPoolCacheOopDesc::base_offset() + 
2663                                 ConstantPoolCacheEntry::f2_offset())));
2664 
2665   // rax: object
2666   __ verify_oop(rax);
2667   __ null_check(rax);
2668   Address field(rax, rbx, Address::times_1);
2669 
2670   // access field
2671   switch (bytecode()) {
2672   case Bytecodes::_fast_agetfield:
2673     __ movq(rax, field);
2674     __ verify_oop(rax);
2675     break;
2676   case Bytecodes::_fast_lgetfield:
2677     __ movq(rax, field);
2678     break;
2679   case Bytecodes::_fast_igetfield:
2680     __ movl(rax, field);
2681     break;
2682   case Bytecodes::_fast_bgetfield: 
2683     __ movsbl(rax, field);
2684     break;
2685   case Bytecodes::_fast_sgetfield:
2686     __ load_signed_word(rax, field);
2687     break;
2688   case Bytecodes::_fast_cgetfield:
2689     __ load_unsigned_word(rax, field);
2690     break;
2691   case Bytecodes::_fast_fgetfield:
2692     __ movflt(xmm0, field);
2693     break;
2694   case Bytecodes::_fast_dgetfield:
2695     __ movdbl(xmm0, field);
2696     break;
2697   default:
2698     ShouldNotReachHere();
2699   }
2700   // [jk] not needed currently
2701   // if (os::is_MP()) { 
2702   //   Label notVolatile;
2703   //   __ testl(rdx, rdx);
2704   //   __ jcc(Assembler::zero, notVolatile);
2705   //   __ membar(Assembler::LoadLoad);
2706   //   __ bind(notVolatile);
2707   //};
2708 }
2709 
2710 void TemplateTable::fast_xaccess(TosState state) {
2711   transition(vtos, state);
2712 
2713   // get receiver
2714   __ movq(rax, aaddress(0));
2715   debug_only(__ verify_local_tag(frame::TagReference, 0));
2716   // access constant pool cache
2717   __ get_cache_and_index_at_bcp(rcx, rdx, 2);
2718   __ movq(rbx, 
2719           Address(rcx, rdx, Address::times_8, 
2720                   in_bytes(constantPoolCacheOopDesc::base_offset() + 
2721                            ConstantPoolCacheEntry::f2_offset())));
2722   // make sure exception is reported in correct bcp range (getfield is
2723   // next instruction)
2724   __ incrementq(r13);
2725   __ null_check(rax);
2726   switch (state) {
2727   case itos: 
2728     __ movl(rax, Address(rax, rbx, Address::times_1));
2729     break;
2730   case atos:
2731     __ movq(rax, Address(rax, rbx, Address::times_1));
2732     __ verify_oop(rax);
2733     break;
2734   case ftos:
2735     __ movflt(xmm0, Address(rax, rbx, Address::times_1));
2736     break;
2737   default:
2738     ShouldNotReachHere();
2739   }
2740 
2741   // [jk] not needed currently
2742   // if (os::is_MP()) {
2743   //   Label notVolatile;
2744   //   __ movl(rdx, Address(rcx, rdx, Address::times_8, 
2745   //                        in_bytes(constantPoolCacheOopDesc::base_offset() +
2746   //                                 ConstantPoolCacheEntry::flags_offset())));
2747   //   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
2748   //   __ testl(rdx, 0x1);
2749   //   __ jcc(Assembler::zero, notVolatile);
2750   //   __ membar(Assembler::LoadLoad);
2751   //   __ bind(notVolatile);
2752   // }
2753 
2754   __ decrementq(r13);
2755 }
2756 
2757 
2758 
2759 //-----------------------------------------------------------------------------
2760 // Calls
2761 
2762 void TemplateTable::count_calls(Register method, Register temp) {  
2763   // implemented elsewhere
2764   ShouldNotReachHere();
2765 }
2766 
2767 void TemplateTable::prepare_invoke(Register method, 
2768                                    Register index, 
2769                                    int byte_no, 
2770                                    Bytecodes::Code code) {
2771   // determine flags
2772   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
2773   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
2774   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
2775   const bool load_receiver       = code != Bytecodes::_invokestatic;
2776   const bool receiver_null_check = is_invokespecial;
2777   const bool save_flags = is_invokeinterface || is_invokevirtual;
2778   // setup registers & access constant pool cache
2779   const Register recv   = rcx;
2780   const Register flags  = rdx;  
2781   assert_different_registers(method, index, recv, flags);
2782 
2783   // save 'interpreter return address'
2784   __ save_bcp();
2785 
2786   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual);
2787 
2788   // load receiver if needed (note: no return address pushed yet)
2789   if (load_receiver) {
2790     __ movl(recv, flags);
2791     __ andl(recv, 0xFF);
2792     if (TaggedStackInterpreter) __ shll(recv, 1);  // index*2
2793     __ movq(recv, Address(rsp, recv, Address::times_8, -Interpreter::expr_offset_in_bytes(1)));
2794     __ verify_oop(recv);
2795   }
2796 
2797   // do null check if needed
2798   if (receiver_null_check) {
2799     __ null_check(recv);
2800   }
2801 
2802   if (save_flags) {
2803     __ movl(r13, flags);
2804   }
2805 
2806   // compute return type
2807   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
2808   // Make sure we don't need to mask flags for tosBits after the above shift
2809   ConstantPoolCacheEntry::verify_tosBits();
2810   // load return address
2811   { 
2812     ExternalAddress return_5((address)Interpreter::return_5_addrs_by_index_table());
2813     ExternalAddress return_3((address)Interpreter::return_3_addrs_by_index_table());
2814     __ lea(rscratch1, (is_invokeinterface ? return_5 : return_3));
2815     __ movq(flags, Address(rscratch1, flags, Address::times_8));
2816   }
2817 
2818   // push return address
2819   __ pushq(flags);
2820 
2821   // Restore flag field from the constant pool cache, and restore esi
2822   // for later null checks.  r13 is the bytecode pointer
2823   if (save_flags) {
2824     __ movl(flags, r13);
2825     __ restore_bcp();
2826   }
2827 }
2828 
2829 
2830 void TemplateTable::invokevirtual_helper(Register index,
2831                                          Register recv,
2832                                          Register flags) {
2833   // Uses temporary registers rax, rdx  assert_different_registers(index, recv, rax, rdx);
2834 
2835   // Test for an invoke of a final method
2836   Label notFinal;
2837   __ movl(rax, flags);
2838   __ andl(rax, (1 << ConstantPoolCacheEntry::vfinalMethod));
2839   __ jcc(Assembler::zero, notFinal);
2840 
2841   const Register method = index;  // method must be rbx
2842   assert(method == rbx,
2843          "methodOop must be rbx for interpreter calling convention");
2844 
2845   // do the call - the index is actually the method to call
2846   __ verify_oop(method);
2847 
2848   // It's final, need a null check here!
2849   __ null_check(recv);
2850 
2851   // profile this call
2852   __ profile_final_call(rax);
2853 
2854   __ jump_from_interpreted(method, rax);
2855 
2856   __ bind(notFinal);
2857 
2858   // get receiver klass
2859   __ null_check(recv, oopDesc::klass_offset_in_bytes());
2860   __ movq(rax, Address(recv, oopDesc::klass_offset_in_bytes()));
2861 
2862   __ verify_oop(rax);
2863 
2864   // profile this call
2865   __ profile_virtual_call(rax, r14, rdx);
2866 
2867   // get target methodOop & entry point
2868   const int base = instanceKlass::vtable_start_offset() * wordSize;    
2869   assert(vtableEntry::size() * wordSize == 8, 
2870          "adjust the scaling in the code below");
2871   __ movq(method, Address(rax, index, 
2872                           Address::times_8, 
2873                           base + vtableEntry::method_offset_in_bytes()));  
2874   __ movq(rdx, Address(method, methodOopDesc::interpreter_entry_offset()));  
2875   __ jump_from_interpreted(method, rdx);
2876 }
2877 
2878 
2879 void TemplateTable::invokevirtual(int byte_no) {
2880   transition(vtos, vtos);
2881   prepare_invoke(rbx, noreg, byte_no, bytecode());
2882 
2883   // rbx: index
2884   // rcx: receiver    
2885   // rdx: flags    
2886 
2887   invokevirtual_helper(rbx, rcx, rdx);
2888 }
2889 
2890 
2891 void TemplateTable::invokespecial(int byte_no) {
2892   transition(vtos, vtos);
2893   prepare_invoke(rbx, noreg, byte_no, bytecode());
2894   // do the call
2895   __ verify_oop(rbx);
2896   __ profile_call(rax);
2897   __ jump_from_interpreted(rbx, rax);
2898 }
2899 
2900 
2901 void TemplateTable::invokestatic(int byte_no) {
2902   transition(vtos, vtos);
2903   prepare_invoke(rbx, noreg, byte_no, bytecode());
2904   // do the call
2905   __ verify_oop(rbx);
2906   __ profile_call(rax);
2907   __ jump_from_interpreted(rbx, rax);
2908 }
2909 
2910 void TemplateTable::fast_invokevfinal(int byte_no) {
2911   transition(vtos, vtos);
2912   __ stop("fast_invokevfinal not used on amd64");
2913 }
2914 
2915 void TemplateTable::invokeinterface(int byte_no) {
2916   transition(vtos, vtos);
2917   prepare_invoke(rax, rbx, byte_no, bytecode());
2918   
2919   // rax: Interface
2920   // rbx: index
2921   // rcx: receiver    
2922   // rdx: flags
2923 
2924   // Special case of invokeinterface called for virtual method of
2925   // java.lang.Object.  See cpCacheOop.cpp for details.
2926   // This code isn't produced by javac, but could be produced by
2927   // another compliant java compiler.
2928   Label notMethod;
2929   __ movl(r14, rdx);
2930   __ andl(r14, (1 << ConstantPoolCacheEntry::methodInterface));
2931   __ jcc(Assembler::zero, notMethod);
2932 
2933   invokevirtual_helper(rbx, rcx, rdx);
2934   __ bind(notMethod);
2935 
2936   // Get receiver klass into rdx - also a null check
2937   __ restore_locals(); // restore r14
2938   __ movq(rdx, Address(rcx, oopDesc::klass_offset_in_bytes()));
2939   __ verify_oop(rdx);
2940 
2941   // profile this call
2942   __ profile_virtual_call(rdx, r13, r14);
2943 
2944   __ movq(r14, rdx); // Save klassOop in r14
2945 
2946   // Compute start of first itableOffsetEntry (which is at the end of
2947   // the vtable)
2948   const int base = instanceKlass::vtable_start_offset() * wordSize;
2949   // Get length of vtable
2950   assert(vtableEntry::size() * wordSize == 8, 
2951          "adjust the scaling in the code below");
2952   __ movl(r13, Address(rdx, 
2953                        instanceKlass::vtable_length_offset() * wordSize));
2954   __ leaq(rdx, Address(rdx, r13, Address::times_8, base));
2955   
2956   if (HeapWordsPerLong > 1) {
2957     // Round up to align_object_offset boundary
2958     __ round_to_q(rdx, BytesPerLong);
2959   }
2960 
2961   Label entry, search, interface_ok;
2962   
2963   __ jmpb(entry);   
2964   __ bind(search);
2965   __ addq(rdx, itableOffsetEntry::size() * wordSize);
2966   
2967   __ bind(entry);    
2968 
2969   // Check that the entry is non-null.  A null entry means that the
2970   // receiver class doesn't implement the interface, and wasn't the
2971   // same as the receiver class checked when the interface was
2972   // resolved.
2973   __ pushq(rdx);
2974   __ movq(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
2975   __ testq(rdx, rdx);
2976   __ jcc(Assembler::notZero, interface_ok);
2977   // throw exception
2978   __ popq(rdx); // pop saved register first.
2979   __ popq(rbx); // pop return address (pushed by prepare_invoke)
2980   __ restore_bcp(); // r13 must be correct for exception handler (was
2981                     // destroyed)
2982   __ restore_locals(); // make sure locals pointer is correct as well
2983                        // (was destroyed)
2984   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
2985                    InterpreterRuntime::throw_IncompatibleClassChangeError));
2986   // the call_VM checks for exception, so we should never return here.
2987   __ should_not_reach_here();
2988   __ bind(interface_ok);
2989 
2990   __ popq(rdx);
2991 
2992   __ cmpq(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
2993   __ jcc(Assembler::notEqual, search);
2994         
2995   __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes()));
2996 
2997   __ addq(rdx, r14); // Add offset to klassOop
2998   assert(itableMethodEntry::size() * wordSize == 8,
2999          "adjust the scaling in the code below");
3000   __ movq(rbx, Address(rdx, rbx, Address::times_8));
3001   // rbx: methodOop to call
3002   // rcx: receiver
3003   // Check for abstract method error
3004   // Note: This should be done more efficiently via a
3005   // throw_abstract_method_error interpreter entry point and a
3006   // conditional jump to it in case of a null method.
3007   { 
3008     Label L;
3009     __ testq(rbx, rbx);
3010     __ jcc(Assembler::notZero, L);
3011     // throw exception
3012     // note: must restore interpreter registers to canonical
3013     //       state for exception handling to work correctly!
3014     __ popq(rbx);  // pop return address (pushed by prepare_invoke)
3015     __ restore_bcp(); // r13 must be correct for exception handler
3016                       // (was destroyed)
3017     __ restore_locals(); // make sure locals pointer is correct as
3018                          // well (was destroyed)
3019     __ call_VM(noreg, 
3020                CAST_FROM_FN_PTR(address, 
3021                              InterpreterRuntime::throw_AbstractMethodError));
3022     // the call_VM checks for exception, so we should never return here.
3023     __ should_not_reach_here();
3024     __ bind(L);
3025   }
3026 
3027   __ movq(rcx, Address(rbx, methodOopDesc::interpreter_entry_offset()));  
3028 
3029   // do the call
3030   // rcx: receiver
3031   // rbx: methodOop
3032   __ jump_from_interpreted(rbx, rdx);
3033 }
3034 
3035 //-----------------------------------------------------------------------------
3036 // Allocation
3037 
3038 void TemplateTable::_new() {
3039   transition(vtos, atos);
3040   __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
3041   Label slow_case;
3042   Label done;
3043   Label initialize_header;
3044   Label initialize_object; // including clearing the fields
3045   Label allocate_shared;
3046   ExternalAddress top((address)Universe::heap()->top_addr());
3047   ExternalAddress end((address)Universe::heap()->end_addr());
3048 
3049   __ get_cpool_and_tags(rsi, rax);
3050   // get instanceKlass
3051   __ movq(rsi, Address(rsi, rdx, 
3052                        Address::times_8, sizeof(constantPoolOopDesc)));
3053 
3054   // make sure the class we're about to instantiate has been
3055   // resolved. Note: slow_case does a pop of stack, which is why we
3056   // loaded class/pushed above
3057   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
3058   __ cmpb(Address(rax, rdx, Address::times_1, tags_offset),
3059           JVM_CONSTANT_Class);
3060   __ jcc(Assembler::notEqual, slow_case);
3061 
3062   // make sure klass is initialized & doesn't have finalizer
3063   // make sure klass is fully initialized
3064   __ cmpl(Address(rsi, 
3065                   instanceKlass::init_state_offset_in_bytes() + 
3066                   sizeof(oopDesc)), 
3067           instanceKlass::fully_initialized);
3068   __ jcc(Assembler::notEqual, slow_case);
3069 
3070   // get instance_size in instanceKlass (scaled to a count of bytes)
3071   __ movl(rdx, 
3072           Address(rsi, 
3073                   Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
3074   // test to see if it has a finalizer or is malformed in some way
3075   __ testl(rdx, Klass::_lh_instance_slow_path_bit);
3076   __ jcc(Assembler::notZero, slow_case);
3077 
3078   // Allocate the instance
3079   // 1) Try to allocate in the TLAB
3080   // 2) if fail and the object is large allocate in the shared Eden
3081   // 3) if the above fails (or is not applicable), go to a slow case
3082   // (creates a new TLAB, etc.)
3083 
3084   const bool allow_shared_alloc =
3085     Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
3086 
3087   if (UseTLAB) {
3088     __ movq(rax, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
3089     __ leaq(rbx, Address(rax, rdx, Address::times_1));
3090     __ cmpq(rbx, Address(r15_thread, in_bytes(JavaThread::tlab_end_offset())));
3091     __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
3092     __ movq(Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
3093     if (ZeroTLAB) {
3094       // the fields have been already cleared
3095       __ jmp(initialize_header);
3096     } else {
3097       // initialize both the header and fields
3098       __ jmp(initialize_object);
3099     }
3100   }
3101 
3102   // Allocation in the shared Eden, if allowed.
3103   //
3104   // rdx: instance size in bytes
3105   if (allow_shared_alloc) {
3106     __ bind(allocate_shared);
3107 
3108     const Register RtopAddr = rscratch1;
3109     const Register RendAddr = rscratch2;
3110 
3111     __ lea(RtopAddr, top);
3112     __ lea(RendAddr, end);
3113     __ movq(rax, Address(RtopAddr, 0));
3114 
3115     // For retries rax gets set by cmpxchgq
3116     Label retry;
3117     __ bind(retry);
3118     __ leaq(rbx, Address(rax, rdx, Address::times_1));
3119     __ cmpq(rbx, Address(RendAddr, 0));
3120     __ jcc(Assembler::above, slow_case);
3121 
3122     // Compare rax with the top addr, and if still equal, store the new
3123     // top addr in rbx at the address of the top addr pointer. Sets ZF if was
3124     // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
3125     //
3126     // rax: object begin
3127     // rbx: object end
3128     // rdx: instance size in bytes
3129     if (os::is_MP()) {
3130       __ lock();
3131     }
3132     __ cmpxchgq(rbx, Address(RtopAddr, 0));
3133 
3134     // if someone beat us on the allocation, try again, otherwise continue 
3135     __ jcc(Assembler::notEqual, retry);
3136   }
3137 
3138   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
3139     // The object is initialized before the header.  If the object size is
3140     // zero, go directly to the header initialization.
3141     __ bind(initialize_object);
3142     __ decrementl(rdx, sizeof(oopDesc));
3143     __ jcc(Assembler::zero, initialize_header);
3144 
3145     // Initialize object fields
3146     __ xorl(rcx, rcx); // use zero reg to clear memory (shorter code)
3147     __ shrl(rdx, LogBytesPerLong);  // divide by oopSize to simplify the loop
3148     { 
3149       Label loop;
3150       __ bind(loop);
3151       __ movq(Address(rax, rdx, Address::times_8,
3152                       sizeof(oopDesc) - oopSize), 
3153               rcx);
3154       __ decrementl(rdx);
3155       __ jcc(Assembler::notZero, loop);
3156     }
3157 
3158     // initialize object header only.
3159     __ bind(initialize_header);
3160     if (UseBiasedLocking) {
3161       __ movq(rscratch1, Address(rsi, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
3162       __ movq(Address(rax, oopDesc::mark_offset_in_bytes()), rscratch1);
3163     } else {
3164       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), 
3165                (intptr_t) markOopDesc::prototype()); // header (address 0x1)
3166     }
3167     __ movq(Address(rax, oopDesc::klass_offset_in_bytes()), rsi);  // klass
3168     __ jmp(done);
3169   }
3170 
3171   {
3172     SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
3173     // Trigger dtrace event for fastpath
3174     __ push(atos); // save the return value
3175     __ call_VM_leaf(
3176          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
3177     __ pop(atos); // restore the return value
3178   }
3179 
3180   // slow case
3181   __ bind(slow_case);
3182   __ get_constant_pool(c_rarg1);
3183   __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
3184   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
3185   __ verify_oop(rax);
3186 
3187   // continue
3188   __ bind(done);
3189 }
3190 
3191 void TemplateTable::newarray() {
3192   transition(itos, atos);
3193   __ load_unsigned_byte(c_rarg1, at_bcp(1));
3194   __ movl(c_rarg2, rax);
3195   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
3196           c_rarg1, c_rarg2);
3197 }
3198 
3199 void TemplateTable::anewarray() {
3200   transition(itos, atos);
3201   __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
3202   __ get_constant_pool(c_rarg1);
3203   __ movl(c_rarg3, rax);
3204   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), 
3205           c_rarg1, c_rarg2, c_rarg3);
3206 }
3207 
3208 void TemplateTable::arraylength() {
3209   transition(atos, itos);
3210   __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
3211   __ movl(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
3212 }
3213 
3214 void TemplateTable::checkcast() {
3215   transition(atos, atos);
3216   Label done, is_null, ok_is_subtype, quicked, resolved;
3217   __ testq(rax, rax); // object is in rax
3218   __ jcc(Assembler::zero, is_null);
3219 
3220   // Get cpool & tags index
3221   __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
3222   __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
3223   // See if bytecode has already been quicked
3224   __ cmpb(Address(rdx, rbx, 
3225                   Address::times_1, 
3226                   typeArrayOopDesc::header_size(T_BYTE) * wordSize),
3227           JVM_CONSTANT_Class);
3228   __ jcc(Assembler::equal, quicked);
3229 
3230   __ movq(r12, rcx); // save rcx XXX
3231   __ push(atos); // save receiver for result, and for GC
3232   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
3233   __ pop_ptr(rdx); // restore receiver
3234   __ movq(rcx, r12); // restore rcx XXX
3235   __ jmpb(resolved);
3236 
3237   // Get superklass in rax and subklass in rbx
3238   __ bind(quicked);
3239   __ movq(rdx, rax); // Save object in rdx; rax needed for subtype check
3240   __ movq(rax, Address(rcx, rbx, 
3241                        Address::times_8, sizeof(constantPoolOopDesc)));
3242 
3243   __ bind(resolved);
3244   __ movq(rbx, Address(rdx, oopDesc::klass_offset_in_bytes()));
3245 
3246   // Generate subtype check.  Blows rcx, rdi.  Object in rdx.
3247   // Superklass in rax.  Subklass in rbx.
3248   __ gen_subtype_check(rbx, ok_is_subtype);
3249 
3250   // Come here on failure
3251   __ push_ptr(rdx);
3252   // object is at TOS
3253   __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
3254 
3255   // Come here on success
3256   __ bind(ok_is_subtype);
3257   __ movq(rax, rdx); // Restore object in rdx
3258 
3259   // Collect counts on whether this check-cast sees NULLs a lot or not.
3260   if (ProfileInterpreter) {
3261     __ jmp(done);
3262     __ bind(is_null);
3263     __ profile_null_seen(rcx);
3264   } else {
3265     __ bind(is_null);   // same as 'done'
3266   }
3267   __ bind(done);
3268 }
3269 
3270 void TemplateTable::instanceof() {
3271   transition(atos, itos);
3272   Label done, is_null, ok_is_subtype, quicked, resolved;
3273   __ testq(rax, rax);
3274   __ jcc(Assembler::zero, is_null);
3275 
3276   // Get cpool & tags index
3277   __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
3278   __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
3279   // See if bytecode has already been quicked
3280   __ cmpb(Address(rdx, rbx,
3281                   Address::times_1, 
3282                   typeArrayOopDesc::header_size(T_BYTE) * wordSize),
3283           JVM_CONSTANT_Class);
3284   __ jcc(Assembler::equal, quicked);
3285 
3286   __ movq(r12, rcx); // save rcx
3287   __ push(atos); // save receiver for result, and for GC
3288   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
3289   __ pop_ptr(rdx); // restore receiver
3290   __ movq(rdx, Address(rdx, oopDesc::klass_offset_in_bytes()));
3291   __ movq(rcx, r12); // restore rcx
3292   __ jmpb(resolved);
3293 
3294   // Get superklass in rax and subklass in rdx
3295   __ bind(quicked);
3296   __ movq(rdx, Address(rax, oopDesc::klass_offset_in_bytes()));
3297   __ movq(rax, Address(rcx, rbx,
3298                        Address::times_8, sizeof(constantPoolOopDesc)));
3299 
3300   __ bind(resolved);
3301 
3302   // Generate subtype check.  Blows rcx, rdi
3303   // Superklass in rax.  Subklass in rdx.
3304   __ gen_subtype_check(rdx, ok_is_subtype);
3305 
3306   // Come here on failure
3307   __ xorl(rax, rax);
3308   __ jmpb(done);
3309   // Come here on success
3310   __ bind(ok_is_subtype);
3311   __ movl(rax, 1);
3312 
3313   // Collect counts on whether this test sees NULLs a lot or not.
3314   if (ProfileInterpreter) {
3315     __ jmp(done);
3316     __ bind(is_null);
3317     __ profile_null_seen(rcx);
3318   } else {
3319     __ bind(is_null);   // same as 'done'
3320   }
3321   __ bind(done);
3322   // rax = 0: obj == NULL or  obj is not an instanceof the specified klass
3323   // rax = 1: obj != NULL and obj is     an instanceof the specified klass
3324 }
3325 
3326 //-----------------------------------------------------------------------------
3327 // Breakpoints
3328 void TemplateTable::_breakpoint() {
3329   // Note: We get here even if we are single stepping..
3330   // jbug inists on setting breakpoints at every bytecode 
3331   // even if we are in single step mode.  
3332  
3333   transition(vtos, vtos);
3334 
3335   // get the unpatched byte code
3336   __ get_method(c_rarg1);
3337   __ call_VM(noreg, 
3338              CAST_FROM_FN_PTR(address, 
3339                               InterpreterRuntime::get_original_bytecode_at),
3340              c_rarg1, r13);
3341   __ movq(rbx, rax);
3342 
3343   // post the breakpoint event
3344   __ get_method(c_rarg1);
3345   __ call_VM(noreg, 
3346              CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
3347              c_rarg1, r13);
3348 
3349   // complete the execution of original bytecode
3350   __ dispatch_only_normal(vtos);
3351 } 
3352 
3353 //-----------------------------------------------------------------------------
3354 // Exceptions
3355 
3356 void TemplateTable::athrow() {
3357   transition(atos, vtos);
3358   __ null_check(rax);
3359   __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
3360 }
3361 
3362 //-----------------------------------------------------------------------------
3363 // Synchronization
3364 //
3365 // Note: monitorenter & exit are symmetric routines; which is reflected
3366 //       in the assembly code structure as well
3367 //
3368 // Stack layout:
3369 //
3370 // [expressions  ] <--- rsp               = expression stack top
3371 // ..
3372 // [expressions  ]
3373 // [monitor entry] <--- monitor block top = expression stack bot
3374 // ..
3375 // [monitor entry]
3376 // [frame data   ] <--- monitor block bot
3377 // ...
3378 // [saved rbp    ] <--- rbp
3379 void TemplateTable::monitorenter() {
3380   transition(atos, vtos);
3381 
3382   // check for NULL object
3383   __ null_check(rax);
3384 
3385   const Address monitor_block_top(
3386         rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
3387   const Address monitor_block_bot(
3388         rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
3389   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
3390 
3391   Label allocated;
3392 
3393   // initialize entry pointer
3394   __ xorl(c_rarg1, c_rarg1); // points to free slot or NULL
3395 
3396   // find a free slot in the monitor block (result in c_rarg1)
3397   { 
3398     Label entry, loop, exit;
3399     __ movq(c_rarg3, monitor_block_top); // points to current entry,
3400                                      // starting with top-most entry
3401     __ leaq(c_rarg2, monitor_block_bot); // points to word before bottom
3402                                      // of monitor block
3403     __ jmpb(entry);
3404 
3405     __ bind(loop);
3406     // check if current entry is used
3407     __ cmpq(Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()), (int) NULL);
3408     // if not used then remember entry in c_rarg1
3409     __ cmovq(Assembler::equal, c_rarg1, c_rarg3); 
3410     // check if current entry is for same object
3411     __ cmpq(rax, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
3412     // if same object then stop searching
3413     __ jccb(Assembler::equal, exit);
3414     // otherwise advance to next entry
3415     __ addq(c_rarg3, entry_size);
3416     __ bind(entry);
3417     // check if bottom reached
3418     __ cmpq(c_rarg3, c_rarg2);
3419     // if not at bottom then check this entry
3420     __ jcc(Assembler::notEqual, loop);
3421     __ bind(exit);
3422   }
3423 
3424   __ testq(c_rarg1, c_rarg1); // check if a slot has been found
3425   __ jcc(Assembler::notZero, allocated); // if found, continue with that one
3426 
3427   // allocate one if there's no free slot
3428   { 
3429     Label entry, loop;
3430     // 1. compute new pointers       // rsp: old expression stack top
3431     __ movq(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom
3432     __ subq(rsp, entry_size);        // move expression stack top
3433     __ subq(c_rarg1, entry_size);        // move expression stack bottom
3434     __ movq(c_rarg3, rsp);               // set start value for copy loop
3435     __ movq(monitor_block_bot, c_rarg1); // set new monitor block bottom
3436     __ jmp(entry);
3437     // 2. move expression stack contents
3438     __ bind(loop);
3439     __ movq(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
3440                                             // word from old location
3441     __ movq(Address(c_rarg3, 0), c_rarg2);  // and store it at new location
3442     __ addq(c_rarg3, wordSize);             // advance to next word
3443     __ bind(entry);
3444     __ cmpq(c_rarg3, c_rarg1);              // check if bottom reached
3445     __ jcc(Assembler::notEqual, loop);      // if not at bottom then
3446                                             // copy next word
3447   }
3448   
3449   // call run-time routine
3450   // c_rarg1: points to monitor entry
3451   __ bind(allocated);
3452 
3453   // Increment bcp to point to the next bytecode, so exception
3454   // handling for async. exceptions work correctly.
3455   // The object has already been poped from the stack, so the
3456   // expression stack looks correct.
3457   __ incrementq(r13);
3458 
3459   // store object  
3460   __ movq(Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()), rax); 
3461   __ lock_object(c_rarg1);
3462 
3463   // check to make sure this monitor doesn't cause stack overflow after locking
3464   __ save_bcp();  // in case of exception
3465   __ generate_stack_overflow_check(0);
3466 
3467   // The bcp has already been incremented. Just need to dispatch to
3468   // next instruction.
3469   __ dispatch_next(vtos);
3470 }
3471 
3472 
3473 void TemplateTable::monitorexit() {
3474   transition(atos, vtos);
3475 
3476   // check for NULL object
3477   __ null_check(rax);
3478 
3479   const Address monitor_block_top(
3480         rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
3481   const Address monitor_block_bot(
3482         rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
3483   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
3484 
3485   Label found;
3486 
3487   // find matching slot
3488   { 
3489     Label entry, loop;
3490     __ movq(c_rarg1, monitor_block_top); // points to current entry,
3491                                      // starting with top-most entry
3492     __ leaq(c_rarg2, monitor_block_bot); // points to word before bottom
3493                                      // of monitor block
3494     __ jmpb(entry);
3495 
3496     __ bind(loop);
3497     // check if current entry is for same object
3498     __ cmpq(rax, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
3499     // if same object then stop searching
3500     __ jcc(Assembler::equal, found);
3501     // otherwise advance to next entry
3502     __ addq(c_rarg1, entry_size);
3503     __ bind(entry);
3504     // check if bottom reached
3505     __ cmpq(c_rarg1, c_rarg2);
3506     // if not at bottom then check this entry
3507     __ jcc(Assembler::notEqual, loop);
3508   }
3509 
3510   // error handling. Unlocking was not block-structured
3511   __ call_VM(noreg, CAST_FROM_FN_PTR(address, 
3512                    InterpreterRuntime::throw_illegal_monitor_state_exception));
3513   __ should_not_reach_here();
3514 
3515   // call run-time routine
3516   // rsi: points to monitor entry
3517   __ bind(found);
3518   __ push_ptr(rax); // make sure object is on stack (contract with oopMaps)  
3519   __ unlock_object(c_rarg1);    
3520   __ pop_ptr(rax); // discard object  
3521 }
3522 
3523 
3524 // Wide instructions
3525 void TemplateTable::wide() {
3526   transition(vtos, vtos);
3527   __ load_unsigned_byte(rbx, at_bcp(1));
3528   __ lea(rscratch1, ExternalAddress((address)Interpreter::_wentry_point));
3529   __ jmp(Address(rscratch1, rbx, Address::times_8));
3530   // Note: the r13 increment step is part of the individual wide
3531   // bytecode implementations
3532 }
3533 
3534 
3535 // Multi arrays
3536 void TemplateTable::multianewarray() {
3537   transition(vtos, atos);
3538   __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
3539   // last dim is on top of stack; we want address of first one:
3540   // first_addr = last_addr + (ndims - 1) * wordSize
3541   if (TaggedStackInterpreter) __ shll(rax, 1);  // index*2
3542   __ leaq(c_rarg1, Address(rsp, rax, Address::times_8, -wordSize));
3543   call_VM(rax, 
3544           CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
3545           c_rarg1);
3546   __ load_unsigned_byte(rbx, at_bcp(3));
3547   if (TaggedStackInterpreter) __ shll(rbx, 1);  // index*2
3548   __ leaq(rsp, Address(rsp, rbx, Address::times_8));
3549 }