hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp

Print this page
rev 611 : Merge

@@ -1,10 +1,7 @@
-#ifdef USE_PRAGMA_IDENT_SRC
-#pragma ident "@(#)templateTable_x86_32.cpp     1.323 07/09/17 09:26:00 JVM"
-#endif
 /*
- * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.

@@ -108,10 +105,82 @@
 
 
 //----------------------------------------------------------------------------------------------------
 // Miscelaneous helper routines
 
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == rax, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        // We do it regardless of precise because we need the registers
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != rdx) {
+            __ movl(rdx, obj.base());
+          }
+        } else {
+          __ leal(rdx, obj);
+        }
+        __ get_thread(rcx);
+        __ save_bcp();
+        __ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg);
+
+        // Do the actual store
+        // noreg means NULL
+        if (val == noreg) {
+          __ movl(Address(rdx, 0), NULL_WORD);
+          // No post barrier for NULL
+        } else {
+          __ movl(Address(rdx, 0), val);
+          __ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi);
+        }
+        __ restore_bcp();
+
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ movl(obj, NULL_WORD);
+        } else {
+          __ movl(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ leal(rdx, obj);
+            __ store_check(rdx);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ movl(obj, NULL_WORD);
+      } else {
+        __ movl(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 Address TemplateTable::at_bcp(int offset) {
   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   return Address(rsi, offset);
 }
 

@@ -120,16 +189,18 @@
                                    Register scratch,
                                    bool load_bc_into_scratch/*=true*/) {
                                    
   if (!RewriteBytecodes) return;
   // the pair bytecodes have already done the load.
-  if (load_bc_into_scratch) __ movl(bc, bytecode);
+  if (load_bc_into_scratch) {
+    __ movl(bc, bytecode);
+  }
   Label patch_done;
   if (JvmtiExport::can_post_breakpoint()) {
     Label fast_patch;
     // if a breakpoint is present we can't rewrite the stream directly
-    __ movzxb(scratch, at_bcp(0));
+    __ movzbl(scratch, at_bcp(0));
     __ cmpl(scratch, Bytecodes::_breakpoint);
     __ jcc(Assembler::notEqual, fast_patch);
     __ get_method(scratch);
     // Let breakpoint table handling rewrite to quicker bytecode 
     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, rsi, bc);

@@ -170,33 +241,33 @@
 
 
 
 void TemplateTable::aconst_null() {
   transition(vtos, atos);
-  __ xorl(rax, rax);
+  __ xorptr(rax, rax);
 }
 
 
 void TemplateTable::iconst(int value) {
   transition(vtos, itos);
   if (value == 0) {
-    __ xorl(rax, rax);
+    __ xorptr(rax, rax);
   } else {
-    __ movl(rax, value);
+    __ movptr(rax, value);
   }
 }
 
 
 void TemplateTable::lconst(int value) {
   transition(vtos, ltos);
   if (value == 0) {
-    __ xorl(rax, rax);
+    __ xorptr(rax, rax);
   } else {
-    __ movl(rax, value);
+    __ movptr(rax, value);
   }
   assert(value >= 0, "check this code");
-  __ xorl(rdx, rdx);
+  __ xorptr(rdx, rdx);
 }
 
 
 void TemplateTable::fconst(int value) {
   transition(vtos, ftos);

@@ -224,11 +295,11 @@
 
 
 void TemplateTable::sipush() {
   transition(vtos, itos);
   __ load_unsigned_word(rax, at_bcp(1));
-  __ bswap(rax);
+  __ bswapl(rax);
   __ sarl(rax, 16);
 }
 
 void TemplateTable::ldc(bool wide) {
   transition(vtos, vtos);

@@ -242,11 +313,11 @@
   __ get_cpool_and_tags(rcx, rax);
   const int base_offset = constantPoolOopDesc::header_size() * wordSize;
   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
 
   // get type
-  __ xorl(rdx, rdx);
+  __ xorptr(rdx, rdx);
   __ movb(rdx, Address(rax, rbx, Address::times_1, tags_offset));
 
   // unresolved string - get the resolved string
   __ cmpl(rdx, JVM_CONSTANT_UnresolvedString);
   __ jccb(Assembler::equal, call_ldc);

@@ -272,11 +343,11 @@
 
   __ bind(notClass);
   __ cmpl(rdx, JVM_CONSTANT_Float);
   __ jccb(Assembler::notEqual, notFloat);
   // ftos
-  __ fld_s(    Address(rcx, rbx, Address::times_4, base_offset));
+  __ fld_s(    Address(rcx, rbx, Address::times_ptr, base_offset));
   __ push(ftos);
   __ jmp(Done);
 
   __ bind(notFloat);
 #ifdef ASSERT

@@ -289,17 +360,18 @@
     __ bind(L);
   }
 #endif
   Label isOop;
   // atos and itos
-  __ movl(rax, Address(rcx, rbx, Address::times_4, base_offset));
   // String is only oop type we will see here
   __ cmpl(rdx, JVM_CONSTANT_String);
   __ jccb(Assembler::equal, isOop);
+  __ movl(rax, Address(rcx, rbx, Address::times_ptr, base_offset));
   __ push(itos);
   __ jmp(Done);
   __ bind(isOop);
+  __ movptr(rax, Address(rcx, rbx, Address::times_ptr, base_offset));
   __ push(atos);
 
   if (VerifyOops) {
     __ verify_oop(rax);
   }

@@ -317,28 +389,28 @@
 
   // get type
   __ cmpb(Address(rax, rbx, Address::times_1, tags_offset), JVM_CONSTANT_Double);
   __ jccb(Assembler::notEqual, Long);
   // dtos
-  __ fld_d(    Address(rcx, rbx, Address::times_4, base_offset));
+  __ fld_d(    Address(rcx, rbx, Address::times_ptr, base_offset));
   __ push(dtos);
   __ jmpb(Done);
 
   __ bind(Long);
   // ltos
-  __ movl(rax, Address(rcx, rbx, Address::times_4, base_offset + 0 * wordSize));
-  __ movl(rdx, Address(rcx, rbx, Address::times_4, base_offset + 1 * wordSize));
+  __ movptr(rax, Address(rcx, rbx, Address::times_ptr, base_offset + 0 * wordSize));
+  NOT_LP64(__ movptr(rdx, Address(rcx, rbx, Address::times_ptr, base_offset + 1 * wordSize)));
 
   __ push(ltos);
 
   __ bind(Done);
 }
 
 
 void TemplateTable::locals_index(Register reg, int offset) {
   __ load_unsigned_byte(reg, at_bcp(offset));
-  __ negl(reg); 
+  __ negptr(reg);
 }
 
 
 void TemplateTable::iload() {
   transition(vtos, itos);

@@ -400,12 +472,12 @@
 
 
 void TemplateTable::lload() {
   transition(vtos, ltos);
   locals_index(rbx);
-  __ movl(rax, laddress(rbx));
-  __ movl(rdx, haddress(rbx));
+  __ movptr(rax, laddress(rbx));
+  NOT_LP64(__ movl(rdx, haddress(rbx)));
   debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
 }
 
 
 void TemplateTable::fload() {

@@ -422,34 +494,34 @@
   if (TaggedStackInterpreter) {
     // Get double out of locals array, onto temp stack and load with
     // float instruction into ST0
     __ movl(rax, laddress(rbx));
     __ movl(rdx, haddress(rbx));
-    __ pushl(rdx);  // push hi first
-    __ pushl(rax);
+    __ push(rdx);  // push hi first
+    __ push(rax);
     __ fld_d(Address(rsp, 0));
-    __ addl(rsp, 2*wordSize);
+    __ addptr(rsp, 2*wordSize);
     debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
   } else {
     __ fld_d(daddress(rbx));
   }
 }
 
 
 void TemplateTable::aload() {
   transition(vtos, atos);
   locals_index(rbx);
-  __ movl(rax, iaddress(rbx));
+  __ movptr(rax, aaddress(rbx));
   debug_only(__ verify_local_tag(frame::TagReference, rbx));
 }
 
 
 void TemplateTable::locals_index_wide(Register reg) {
   __ movl(reg, at_bcp(2));
-  __ bswap(reg);
+  __ bswapl(reg);
   __ shrl(reg, 16);
-  __ negl(reg); 
+  __ negptr(reg);
 }
 
 
 void TemplateTable::wide_iload() {
   transition(vtos, itos);

@@ -460,12 +532,12 @@
 
 
 void TemplateTable::wide_lload() {
   transition(vtos, ltos);
   locals_index_wide(rbx);
-  __ movl(rax, laddress(rbx));
-  __ movl(rdx, haddress(rbx));
+  __ movptr(rax, laddress(rbx));
+  NOT_LP64(__ movl(rdx, haddress(rbx)));
   debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
 }
 
 
 void TemplateTable::wide_fload() {

@@ -482,12 +554,12 @@
   if (TaggedStackInterpreter) {
     // Get double out of locals array, onto temp stack and load with
     // float instruction into ST0
     __ movl(rax, laddress(rbx));
     __ movl(rdx, haddress(rbx));
-    __ pushl(rdx);  // push hi first
-    __ pushl(rax);
+    __ push(rdx);  // push hi first
+    __ push(rax);
     __ fld_d(Address(rsp, 0));
     __ addl(rsp, 2*wordSize);
     debug_only(__ verify_local_tag(frame::TagCategory2, rbx));
   } else {
     __ fld_d(daddress(rbx));

@@ -496,11 +568,11 @@
 
 
 void TemplateTable::wide_aload() {
   transition(vtos, atos);
   locals_index_wide(rbx);
-  __ movl(rax, iaddress(rbx));
+  __ movptr(rax, aaddress(rbx));
   debug_only(__ verify_local_tag(frame::TagReference, rbx));
 }
 
 void TemplateTable::index_check(Register array, Register index) {
   // Pop ptr into array

@@ -510,16 +582,17 @@
 
 void TemplateTable::index_check_without_pop(Register array, Register index) {
   // destroys rbx,
   // check array
   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
+  LP64_ONLY(__ movslq(index, index));
   // check index
   __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
   if (index != rbx) {
     // ??? convention: move aberrant index into rbx, for exception message
     assert(rbx != array, "different registers");
-    __ movl(rbx, index);
+    __ mov(rbx, index);
   }
   __ jump_cc(Assembler::aboveEqual,
              ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
 }
 

@@ -536,14 +609,14 @@
 void TemplateTable::laload() {
   transition(itos, ltos);
   // rax,: index
   // rdx: array
   index_check(rdx, rax);
-  __ movl(rbx, rax);
+  __ mov(rbx, rax);
   // rbx,: index
-  __ movl(rax, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize));
-  __ movl(rdx, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize));
+  __ movptr(rax, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize));
+  NOT_LP64(__ movl(rdx, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize)));
 }
 
 
 void TemplateTable::faload() {
   transition(itos, ftos);

@@ -566,33 +639,33 @@
 void TemplateTable::aaload() {
   transition(itos, atos);
   // rdx: array
   index_check(rdx, rax);  // kills rbx,
   // rax,: index
-  __ movl(rax, Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ movptr(rax, Address(rdx, rax, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 }
 
 
 void TemplateTable::baload() {
   transition(itos, itos);
   // rdx: array
   index_check(rdx, rax);  // kills rbx,
   // rax,: index
   // can do better code for P5 - fix this at some point
   __ load_signed_byte(rbx, Address(rdx, rax, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)));
-  __ movl(rax, rbx);
+  __ mov(rax, rbx);
 }
 
 
 void TemplateTable::caload() {
   transition(itos, itos);
   // rdx: array
   index_check(rdx, rax);  // kills rbx,
   // rax,: index
   // can do better code for P5 - may want to improve this at some point
   __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
-  __ movl(rax, rbx);
+  __ mov(rax, rbx);
 }
 
 // iload followed by caload frequent pair
 void TemplateTable::fast_icaload() {
   transition(vtos, itos);

@@ -603,21 +676,21 @@
 
   // rdx: array
   index_check(rdx, rax);
   // rax,: index
   __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
-  __ movl(rax, rbx);
+  __ mov(rax, rbx);
 }
 
 void TemplateTable::saload() {
   transition(itos, itos);
   // rdx: array
   index_check(rdx, rax);  // kills rbx,
   // rax,: index
   // can do better code for P5 - may want to improve this at some point
   __ load_signed_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
-  __ movl(rax, rbx);
+  __ mov(rax, rbx);
 }
 
 
 void TemplateTable::iload(int n) {
   transition(vtos, itos);

@@ -626,12 +699,12 @@
 }
 
 
 void TemplateTable::lload(int n) {
   transition(vtos, ltos);
-  __ movl(rax, laddress(n));
-  __ movl(rdx, haddress(n));
+  __ movptr(rax, laddress(n));
+  NOT_LP64(__ movptr(rdx, haddress(n)));
   debug_only(__ verify_local_tag(frame::TagCategory2, n));
 }
 
 
 void TemplateTable::fload(int n) {

@@ -646,24 +719,24 @@
   if (TaggedStackInterpreter) {
     // Get double out of locals array, onto temp stack and load with
     // float instruction into ST0
     __ movl(rax, laddress(n));
     __ movl(rdx, haddress(n));
-    __ pushl(rdx);  // push hi first
-    __ pushl(rax);
+    __ push(rdx);  // push hi first
+    __ push(rax);
     __ fld_d(Address(rsp, 0));
-    __ addl(rsp, 2*wordSize);  // reset rsp
+    __ addptr(rsp, 2*wordSize);  // reset rsp
     debug_only(__ verify_local_tag(frame::TagCategory2, n));
   } else {
     __ fld_d(daddress(n));
   }
 }
 
 
 void TemplateTable::aload(int n) {
   transition(vtos, atos);
-  __ movl(rax, aaddress(n));
+  __ movptr(rax, aaddress(n));
   debug_only(__ verify_local_tag(frame::TagReference, n));
 }
 
 
 void TemplateTable::aload_0() {

@@ -741,12 +814,12 @@
 
 
 void TemplateTable::lstore() {
   transition(ltos, vtos);
   locals_index(rbx);
-  __ movl(laddress(rbx), rax);
-  __ movl(haddress(rbx), rdx);
+  __ movptr(laddress(rbx), rax);
+  NOT_LP64(__ movptr(haddress(rbx), rdx));
   __ tag_local(frame::TagCategory2, rbx);
 }
 
 
 void TemplateTable::fstore() {

@@ -760,16 +833,16 @@
 void TemplateTable::dstore() {
   transition(dtos, vtos);
   locals_index(rbx);
   if (TaggedStackInterpreter) {
     // Store double on stack and reload into locals nonadjacently
-    __ subl(rsp, 2 * wordSize);
+    __ subptr(rsp, 2 * wordSize);
     __ fstp_d(Address(rsp, 0));
-    __ popl(rax);
-    __ popl(rdx);
-    __ movl(laddress(rbx), rax);
-    __ movl(haddress(rbx), rdx);
+    __ pop(rax);
+    __ pop(rdx);
+    __ movptr(laddress(rbx), rax);
+    __ movptr(haddress(rbx), rdx);
     __ tag_local(frame::TagCategory2, rbx);
   } else {
     __ fstp_d(daddress(rbx));
   }
 }

@@ -777,11 +850,11 @@
 
 void TemplateTable::astore() {
   transition(vtos, vtos);
   __ pop_ptr(rax, rdx);   // will need to pop tag too
   locals_index(rbx);
-  __ movl(aaddress(rbx), rax);
+  __ movptr(aaddress(rbx), rax);
   __ tag_local(rdx, rbx);    // need to store same tag in local may be returnAddr
 }
 
 
 void TemplateTable::wide_istore() {

@@ -795,12 +868,12 @@
 
 void TemplateTable::wide_lstore() {
   transition(vtos, vtos);
   __ pop_l(rax, rdx);
   locals_index_wide(rbx);
-  __ movl(laddress(rbx), rax);
-  __ movl(haddress(rbx), rdx);
+  __ movptr(laddress(rbx), rax);
+  NOT_LP64(__ movl(haddress(rbx), rdx));
   __ tag_local(frame::TagCategory2, rbx);
 }
 
 
 void TemplateTable::wide_fstore() {

@@ -815,11 +888,11 @@
 
 void TemplateTable::wide_astore() {
   transition(vtos, vtos);
   __ pop_ptr(rax, rdx);
   locals_index_wide(rbx);
-  __ movl(aaddress(rbx), rax);
+  __ movptr(aaddress(rbx), rax);
   __ tag_local(rdx, rbx);
 }
 
 
 void TemplateTable::iastore() {

@@ -839,12 +912,12 @@
   // rax,: low(value)
   // rcx: array
   // rdx: high(value)
   index_check(rcx, rbx);  // prefer index in rbx,
   // rbx,: index
-  __ movl(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize), rax);
-  __ movl(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize), rdx);
+  __ movptr(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize), rax);
+  NOT_LP64(__ movl(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize), rdx));
 }
 
 
 void TemplateTable::fastore() {
   transition(ftos, vtos);

@@ -870,25 +943,27 @@
 
 void TemplateTable::aastore() {
   Label is_null, ok_is_subtype, done;
   transition(vtos, vtos);
   // stack: ..., array, index, value
-  __ movl(rax, at_tos());     // Value
+  __ movptr(rax, at_tos());     // Value
   __ movl(rcx, at_tos_p1());  // Index
-  __ movl(rdx, at_tos_p2());  // Array
+  __ movptr(rdx, at_tos_p2());  // Array
+
+  Address element_address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   index_check_without_pop(rdx, rcx);      // kills rbx,
   // do array store check - check for NULL value first
-  __ testl(rax, rax);
+  __ testptr(rax, rax);
   __ jcc(Assembler::zero, is_null);
 
   // Move subklass into EBX
-  __ movl(rbx, Address(rax, oopDesc::klass_offset_in_bytes()));
+  __ movptr(rbx, Address(rax, oopDesc::klass_offset_in_bytes()));
   // Move superklass into EAX
-  __ movl(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
-  __ movl(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
-  // Compress array+index*4+12 into a single register.  Frees ECX.
-  __ leal(rdx, Address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ movptr(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
+  __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
+  // Compress array+index*wordSize+12 into a single register.  Frees ECX.
+  __ lea(rdx, element_address);
 
   // Generate subtype check.  Blows ECX.  Resets EDI to locals.
   // Superklass in EAX.  Subklass in EBX.
   __ gen_subtype_check( rbx, ok_is_subtype );
 

@@ -896,23 +971,28 @@
   // object is at TOS
   __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
 
   // Come here on success
   __ bind(ok_is_subtype);
-  __ movl(rax, at_rsp());     // Value
-  __ movl(Address(rdx, 0), rax);
-  __ store_check(rdx);
-  __ jmpb(done);
+
+  // Get the value to store
+  __ movptr(rax, at_rsp());
+  // and store it with appropriate barrier
+  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
+
+  __ jmp(done);
 
   // Have a NULL in EAX, EDX=array, ECX=index.  Store NULL at ary[idx]
   __ bind(is_null);
   __ profile_null_seen(rbx);
-  __ movl(Address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax);
+
+  // Store NULL, (noreg means NULL to do_oop_store)
+  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
 
   // Pop stack arguments
   __ bind(done);
-  __ addl(rsp, 3 * Interpreter::stackElementSize());
+  __ addptr(rsp, 3 * Interpreter::stackElementSize());
 }
 
 
 void TemplateTable::bastore() {
   transition(itos, vtos);

@@ -948,12 +1028,12 @@
 }
 
 
 void TemplateTable::lstore(int n) {
   transition(ltos, vtos);
-  __ movl(laddress(n), rax);
-  __ movl(haddress(n), rdx);
+  __ movptr(laddress(n), rax);
+  NOT_LP64(__ movptr(haddress(n), rdx));
   __ tag_local(frame::TagCategory2, n);
 }
 
 
 void TemplateTable::fstore(int n) {

@@ -964,14 +1044,14 @@
 
 
 void TemplateTable::dstore(int n) {
   transition(dtos, vtos);
   if (TaggedStackInterpreter) {
-    __ subl(rsp, 2 * wordSize);
+    __ subptr(rsp, 2 * wordSize);
     __ fstp_d(Address(rsp, 0));
-    __ popl(rax);
-    __ popl(rdx);
+    __ pop(rax);
+    __ pop(rdx);
     __ movl(laddress(n), rax);
     __ movl(haddress(n), rdx);
     __ tag_local(frame::TagCategory2, n);
   } else {
     __ fstp_d(daddress(n));

@@ -980,24 +1060,24 @@
 
 
 void TemplateTable::astore(int n) {
   transition(vtos, vtos);
   __ pop_ptr(rax, rdx);
-  __ movl(aaddress(n), rax);
+  __ movptr(aaddress(n), rax);
   __ tag_local(rdx, n);
 }
 
 
 void TemplateTable::pop() {
   transition(vtos, vtos);
-  __ addl(rsp, Interpreter::stackElementSize());
+  __ addptr(rsp, Interpreter::stackElementSize());
 }
 
 
 void TemplateTable::pop2() {
   transition(vtos, vtos);
-  __ addl(rsp, 2*Interpreter::stackElementSize());
+  __ addptr(rsp, 2*Interpreter::stackElementSize());
 }
 
 
 void TemplateTable::dup() {
   transition(vtos, vtos);

@@ -1100,18 +1180,18 @@
 
 void TemplateTable::iop2(Operation op) {
   transition(itos, itos);
   switch (op) {
     case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
-    case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
+    case sub  : __ mov(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
     case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
     case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
     case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
     case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
-    case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
-    case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
-    case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
+    case shl  : __ mov(rcx, rax); __ pop_i(rax); __ shll (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
+    case shr  : __ mov(rcx, rax); __ pop_i(rax); __ sarl (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
+    case ushr : __ mov(rcx, rax); __ pop_i(rax); __ shrl (rax);      break; // implicit masking of lower 5 bits by Intel shift instr.
     default   : ShouldNotReachHere();
   }
 }
 
 

@@ -1119,22 +1199,22 @@
   transition(ltos, ltos);
   __ pop_l(rbx, rcx);
   switch (op) {
     case add : __ addl(rax, rbx); __ adcl(rdx, rcx); break;
     case sub : __ subl(rbx, rax); __ sbbl(rcx, rdx);
-               __ movl(rax, rbx); __ movl(rdx, rcx); break;
+               __ mov(rax, rbx); __ mov(rdx, rcx); break;
     case _and: __ andl(rax, rbx); __ andl(rdx, rcx); break;
     case _or : __ orl (rax, rbx); __ orl (rdx, rcx); break;
     case _xor: __ xorl(rax, rbx); __ xorl(rdx, rcx); break;
     default : ShouldNotReachHere();
   }
 }
 
 
 void TemplateTable::idiv() {
   transition(itos, itos);
-  __ movl(rcx, rax);
+  __ mov(rcx, rax);
   __ pop_i(rax);
   // Note: could xor rax, and rcx and compare with (-1 ^ min_int). If
   //       they are not equal, one could do a normal division (no correction
   //       needed), which may speed up this implementation for the common case.
   //       (see also JVM spec., p.243 & p.271)

@@ -1142,56 +1222,56 @@
 }
 
 
 void TemplateTable::irem() {
   transition(itos, itos);
-  __ movl(rcx, rax);
+  __ mov(rcx, rax);
   __ pop_i(rax);
   // Note: could xor rax, and rcx and compare with (-1 ^ min_int). If
   //       they are not equal, one could do a normal division (no correction
   //       needed), which may speed up this implementation for the common case.
   //       (see also JVM spec., p.243 & p.271)
   __ corrected_idivl(rcx);
-  __ movl(rax, rdx);
+  __ mov(rax, rdx);
 }
 
 
 void TemplateTable::lmul() {
   transition(ltos, ltos);
   __ pop_l(rbx, rcx);
-  __ pushl(rcx); __ pushl(rbx);
-  __ pushl(rdx); __ pushl(rax);
+  __ push(rcx); __ push(rbx);
+  __ push(rdx); __ push(rax);
   __ lmul(2 * wordSize, 0);
-  __ addl(rsp, 4 * wordSize);  // take off temporaries
+  __ addptr(rsp, 4 * wordSize);  // take off temporaries
 }
 
 
 void TemplateTable::ldiv() {
   transition(ltos, ltos);
   __ pop_l(rbx, rcx);
-  __ pushl(rcx); __ pushl(rbx);
-  __ pushl(rdx); __ pushl(rax);
+  __ push(rcx); __ push(rbx);
+  __ push(rdx); __ push(rax);
   // check if y = 0
   __ orl(rax, rdx);
   __ jump_cc(Assembler::zero,
              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv));
-  __ addl(rsp, 4 * wordSize);  // take off temporaries
+  __ addptr(rsp, 4 * wordSize);  // take off temporaries
 }
 
 
 void TemplateTable::lrem() {
   transition(ltos, ltos);
   __ pop_l(rbx, rcx);
-  __ pushl(rcx); __ pushl(rbx);
-  __ pushl(rdx); __ pushl(rax);
+  __ push(rcx); __ push(rbx);
+  __ push(rdx); __ push(rax);
   // check if y = 0
   __ orl(rax, rdx);
   __ jump_cc(Assembler::zero,
              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem));
-  __ addl(rsp, 4 * wordSize);
+  __ addptr(rsp, 4 * wordSize);
 }
 
 
 void TemplateTable::lshl() {
   transition(itos, ltos);

@@ -1201,19 +1281,19 @@
 }
 
 
 void TemplateTable::lshr() {
   transition(itos, ltos);
-  __ movl(rcx, rax);                             // get shift count
+  __ mov(rcx, rax);                              // get shift count
   __ pop_l(rax, rdx);                            // get shift value
   __ lshr(rdx, rax, true);
 }
 
 
 void TemplateTable::lushr() {
   transition(itos, ltos);
-  __ movl(rcx, rax);                             // get shift count
+  __ mov(rcx, rax);                              // get shift count
   __ pop_l(rax, rdx);                            // get shift value
   __ lshr(rdx, rax);
 }
 
 

@@ -1227,11 +1307,11 @@
     case div: __ fdivr_s(at_rsp());                break;
     case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
     default : ShouldNotReachHere();
   }
   __ f2ieee();
-  __ popl(rax);  // pop float thing off
+  __ pop(rax);  // pop float thing off
 }
 
 
 void TemplateTable::dop2(Operation op) {
   transition(dtos, dtos);

@@ -1281,12 +1361,12 @@
     case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
     default : ShouldNotReachHere();
   }
   __ d2ieee();
   // Pop double precision number from rsp.
-  __ popl(rax);
-  __ popl(rdx);
+  __ pop(rax);
+  __ pop(rdx);
 }
 
 
 void TemplateTable::ineg() {
   transition(itos, itos);

@@ -1322,11 +1402,11 @@
 
 void TemplateTable::wide_iinc() {
   transition(vtos, vtos);
   __ movl(rdx, at_bcp(4));                       // get constant
   locals_index_wide(rbx);
-  __ bswap(rdx);                                 // swap bytes & sign-extend constant
+  __ bswapl(rdx);                                 // swap bytes & sign-extend constant
   __ sarl(rdx, 16);
   __ addl(iaddress(rbx), rdx);
   // Note: should probably use only one movl to get both
   //       the index and the constant -> fix this
 }

@@ -1376,88 +1456,91 @@
     transition(tos_in, tos_out);
   }
 #endif // ASSERT
 
   // Conversion
-  // (Note: use pushl(rcx)/popl(rcx) for 1/2-word stack-ptr manipulation)
+  // (Note: use push(rcx)/pop(rcx) for 1/2-word stack-ptr manipulation)
   switch (bytecode()) {
     case Bytecodes::_i2l:
       __ extend_sign(rdx, rax);
       break;
     case Bytecodes::_i2f:
-      __ pushl(rax);         // store int on tos
+      __ push(rax);          // store int on tos
       __ fild_s(at_rsp());   // load int to ST0
       __ f2ieee();           // truncate to float size
-      __ popl(rcx);          // adjust rsp
+      __ pop(rcx);           // adjust rsp
       break;
     case Bytecodes::_i2d:
-      __ pushl(rax);         // add one slot for d2ieee()
-      __ pushl(rax);         // store int on tos
+      __ push(rax);          // add one slot for d2ieee()
+      __ push(rax);          // store int on tos
       __ fild_s(at_rsp());   // load int to ST0
       __ d2ieee();           // truncate to double size
-      __ popl(rcx);          // adjust rsp
-      __ popl(rcx);
+      __ pop(rcx);           // adjust rsp
+      __ pop(rcx);
       break;
     case Bytecodes::_i2b:
       __ shll(rax, 24);      // truncate upper 24 bits
       __ sarl(rax, 24);      // and sign-extend byte
+      LP64_ONLY(__ movsbl(rax, rax));
       break;
     case Bytecodes::_i2c:
       __ andl(rax, 0xFFFF);  // truncate upper 16 bits
+      LP64_ONLY(__ movzwl(rax, rax));
       break;
     case Bytecodes::_i2s:
       __ shll(rax, 16);      // truncate upper 16 bits
       __ sarl(rax, 16);      // and sign-extend short
+      LP64_ONLY(__ movswl(rax, rax));
       break;
     case Bytecodes::_l2i:
       /* nothing to do */
       break;
     case Bytecodes::_l2f:
-      __ pushl(rdx);         // store long on tos
-      __ pushl(rax);
+      __ push(rdx);          // store long on tos
+      __ push(rax);
       __ fild_d(at_rsp());   // load long to ST0
       __ f2ieee();           // truncate to float size
-      __ popl(rcx);          // adjust rsp
-      __ popl(rcx);
+      __ pop(rcx);           // adjust rsp
+      __ pop(rcx);
       break;
     case Bytecodes::_l2d:
-      __ pushl(rdx);         // store long on tos
-      __ pushl(rax);
+      __ push(rdx);          // store long on tos
+      __ push(rax);
       __ fild_d(at_rsp());   // load long to ST0
       __ d2ieee();           // truncate to double size
-      __ popl(rcx);          // adjust rsp
-      __ popl(rcx);
+      __ pop(rcx);           // adjust rsp
+      __ pop(rcx);
       break;
     case Bytecodes::_f2i:
-      __ pushl(rcx);         // reserve space for argument
+      __ push(rcx);          // reserve space for argument
       __ fstp_s(at_rsp());   // pass float argument on stack
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
       break;
     case Bytecodes::_f2l:
-      __ pushl(rcx);         // reserve space for argument
+      __ push(rcx);          // reserve space for argument
       __ fstp_s(at_rsp());   // pass float argument on stack
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
       break;
     case Bytecodes::_f2d:
       /* nothing to do */
       break;
     case Bytecodes::_d2i:
-      __ pushl(rcx);         // reserve space for argument
-      __ pushl(rcx);
+      __ push(rcx);          // reserve space for argument
+      __ push(rcx);
       __ fstp_d(at_rsp());   // pass double argument on stack
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
       break;
     case Bytecodes::_d2l:
-      __ pushl(rcx);         // reserve space for argument
-      __ pushl(rcx);
+      __ push(rcx);          // reserve space for argument
+      __ push(rcx);
       __ fstp_d(at_rsp());   // pass double argument on stack
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
       break;
     case Bytecodes::_d2f:
-      __ pushl(rcx);         // reserve space for f2ieee()
+      __ push(rcx);          // reserve space for f2ieee()
       __ f2ieee();           // truncate to float size
-      __ popl(rcx);          // adjust rsp
+      __ pop(rcx);           // adjust rsp
       break;
     default             :
       ShouldNotReachHere();
   }
 }

@@ -1466,24 +1549,24 @@
 void TemplateTable::lcmp() {
   transition(ltos, itos);
   // y = rdx:rax
   __ pop_l(rbx, rcx);             // get x = rcx:rbx
   __ lcmp2int(rcx, rbx, rdx, rax);// rcx := cmp(x, y)
-  __ movl(rax, rcx);
+  __ mov(rax, rcx);
 }
 
 
 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
   if (is_float) {
     __ pop_ftos_to_rsp();
     __ fld_s(at_rsp());
   } else {
     __ pop_dtos_to_rsp();
     __ fld_d(at_rsp());
-    __ popl(rdx);
+    __ pop(rdx);
   }
-  __ popl(rcx);
+  __ pop(rcx);
   __ fcmp2int(rax, unordered_result < 0);
 }
 
 
 void TemplateTable::branch(bool is_jsr, bool is_wide) {

@@ -1494,36 +1577,38 @@
   const ByteSize inv_offset = methodOopDesc::invocation_counter_offset() + InvocationCounter::counter_offset();
   const int method_offset = frame::interpreter_frame_method_offset * wordSize;
 
   // Load up EDX with the branch displacement
   __ movl(rdx, at_bcp(1));
-  __ bswap(rdx);
+  __ bswapl(rdx);
   if (!is_wide) __ sarl(rdx, 16);
+  LP64_ONLY(__ movslq(rdx, rdx));
+
 
   // Handle all the JSR stuff here, then exit.
   // It's much shorter and cleaner than intermingling with the
   // non-JSR normal-branch stuff occuring below.
   if (is_jsr) {
     // Pre-load the next target bytecode into EBX
     __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1, 0));
 
     // compute return address as bci in rax,
-    __ leal(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset())));
-    __ subl(rax, Address(rcx, methodOopDesc::const_offset()));
-    // Adjust the bcp in ESI by the displacement in EDX
-    __ addl(rsi, rdx);
+    __ lea(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset())));
+    __ subptr(rax, Address(rcx, methodOopDesc::const_offset()));
+    // Adjust the bcp in RSI by the displacement in EDX
+    __ addptr(rsi, rdx);
     // Push return address
     __ push_i(rax);
     // jsr returns vtos
     __ dispatch_only_noverify(vtos);
     return;
   }
 
   // Normal (non-jsr) branch handling
 
-  // Adjust the bcp in ESI by the displacement in EDX
-  __ addl(rsi, rdx);
+  // Adjust the bcp in RSI by the displacement in EDX
+  __ addptr(rsi, rdx);
 
   assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
   Label backedge_counter_overflow;
   Label profile_method;
   Label dispatch;

@@ -1538,11 +1623,11 @@
     __ testl(rdx, rdx);             // check if forward or backward branch
     __ jcc(Assembler::positive, dispatch); // count only if backward branch
 
     // increment counter 
     __ movl(rax, Address(rcx, be_offset));        // load backedge counter
-    __ increment(rax, InvocationCounter::count_increment); // increment counter
+    __ incrementl(rax, InvocationCounter::count_increment); // increment counter
     __ movl(Address(rcx, be_offset), rax);        // store counter
 
     __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
     __ andl(rax, InvocationCounter::count_mask_value);     // and the status bits
     __ addl(rax, Address(rcx, be_offset));        // add both counters

@@ -1566,11 +1651,11 @@
         // methodDataOop, which value does not get reset on the call to 
         // frequency_counter_overflow().  To avoid excessive calls to the overflow
         // routine while the method is being compiled, add a second test to make 
         // sure the overflow function is called only once every overflow_frequency.
         const int overflow_frequency = 1024;
-        __ andl(rbx, overflow_frequency-1);
+        __ andptr(rbx, overflow_frequency-1);
         __ jcc(Assembler::zero, backedge_counter_overflow);
 
       }
     } else {
       if (UseOnStackReplacement) {

@@ -1597,59 +1682,59 @@
     if (ProfileInterpreter) {
       // Out-of-line code to allocate method data oop.
       __ bind(profile_method);
       __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method), rsi);
       __ load_unsigned_byte(rbx, Address(rsi, 0));  // restore target bytecode
-      __ movl(rcx, Address(rbp, method_offset));
-      __ movl(rcx, Address(rcx, in_bytes(methodOopDesc::method_data_offset())));
-      __ movl(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), rcx);
+      __ movptr(rcx, Address(rbp, method_offset));
+      __ movptr(rcx, Address(rcx, in_bytes(methodOopDesc::method_data_offset())));
+      __ movptr(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), rcx);
       __ test_method_data_pointer(rcx, dispatch);
       // offset non-null mdp by MDO::data_offset() + IR::profile_method()
-      __ addl(rcx, in_bytes(methodDataOopDesc::data_offset()));
-      __ addl(rcx, rax);
-      __ movl(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), rcx);
+      __ addptr(rcx, in_bytes(methodDataOopDesc::data_offset()));
+      __ addptr(rcx, rax);
+      __ movptr(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), rcx);
       __ jmp(dispatch);
     }
 
     if (UseOnStackReplacement) {
 
       // invocation counter overflow
       __ bind(backedge_counter_overflow);
-      __ negl(rdx);
-      __ addl(rdx, rsi);        // branch bcp
+      __ negptr(rdx);
+      __ addptr(rdx, rsi);        // branch bcp
       call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), rdx);
       __ load_unsigned_byte(rbx, Address(rsi, 0));  // restore target bytecode
 
       // rax,: osr nmethod (osr ok) or NULL (osr not possible)
       // rbx,: target bytecode
       // rdx: scratch
       // rdi: locals pointer
       // rsi: bcp
-      __ testl(rax, rax);                        // test result
+      __ testptr(rax, rax);                      // test result
       __ jcc(Assembler::zero, dispatch);         // no osr if null
       // nmethod may have been invalidated (VM may block upon call_VM return)
       __ movl(rcx, Address(rax, nmethod::entry_bci_offset()));
       __ cmpl(rcx, InvalidOSREntryBci);
       __ jcc(Assembler::equal, dispatch);
       
       // We have the address of an on stack replacement routine in rax,        
       // We need to prepare to execute the OSR method. First we must
       // migrate the locals and monitors off of the stack.
 
-      __ movl(rsi, rax);                             // save the nmethod
+      __ mov(rbx, rax);                             // save the nmethod
 
       const Register thread = rcx;
       __ get_thread(thread);
       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
       // rax, is OSR buffer, move it to expected parameter location
-      __ movl(rcx, rax);
+      __ mov(rcx, rax);
 
       // pop the interpreter frame
-      __ movl(rdx, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
+      __ movptr(rdx, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
       __ leave();                                // remove frame anchor
-      __ popl(rdi);                              // get return address
-      __ movl(rsp, rdx);                         // set sp to sender sp
+      __ pop(rdi);                               // get return address
+      __ mov(rsp, rdx);                          // set sp to sender sp
 
 
       Label skip;
       Label chkint;
 

@@ -1664,36 +1749,36 @@
 
       __ cmp32(rdi, ExternalAddress(StubRoutines::_call_stub_return_address));
       __ jcc(Assembler::notEqual, chkint);
 
       // yes adjust to the specialized call stub  return.
-      assert(StubRoutines::i486::get_call_stub_compiled_return() != NULL, "must be set");
-      __ lea(rdi, ExternalAddress(StubRoutines::i486::get_call_stub_compiled_return()));
+      assert(StubRoutines::x86::get_call_stub_compiled_return() != NULL, "must be set");
+      __ lea(rdi, ExternalAddress(StubRoutines::x86::get_call_stub_compiled_return()));
       __ jmp(skip);
 
       __ bind(chkint);
 
       // Are we returning to the interpreter? Look for sentinel
 
-      __ cmpl(Address(rdi, -8), Interpreter::return_sentinel);
+      __ cmpl(Address(rdi, -2*wordSize), Interpreter::return_sentinel);
       __ jcc(Assembler::notEqual, skip);
 
       // Adjust to compiled return back to interpreter
 
-      __ movl(rdi, Address(rdi, -4));
+      __ movptr(rdi, Address(rdi, -wordSize));
       __ bind(skip);
 
       // Align stack pointer for compiled code (note that caller is
       // responsible for undoing this fixup by remembering the old SP
       // in an rbp,-relative location)
-      __ andl(rsp, -(StackAlignmentInBytes));
+      __ andptr(rsp, -(StackAlignmentInBytes));
 
       // push the (possibly adjusted) return address
-      __ pushl(rdi);
+      __ push(rdi);
 
       // and begin the OSR nmethod
-      __ jmp(Address(rsi, nmethod::osr_entry_point_offset()));
+      __ jmp(Address(rbx, nmethod::osr_entry_point_offset()));
     }
   }
 }
 
 

@@ -1724,11 +1809,11 @@
 
 void TemplateTable::if_nullcmp(Condition cc) {
   transition(atos, vtos);
   // assume branch is more often taken than not (loops use backward branches)
   Label not_taken;
-  __ testl(rax, rax);
+  __ testptr(rax, rax);
   __ jcc(j_not(cc), not_taken);
   branch(false, false);
   __ bind(not_taken);
   __ profile_not_taken_branch(rax);
 }

@@ -1737,68 +1822,68 @@
 void TemplateTable::if_acmp(Condition cc) {
   transition(atos, vtos);
   // assume branch is more often taken than not (loops use backward branches)
   Label not_taken;
   __ pop_ptr(rdx);
-  __ cmpl(rdx, rax);
+  __ cmpptr(rdx, rax);
   __ jcc(j_not(cc), not_taken);
   branch(false, false);
   __ bind(not_taken);
   __ profile_not_taken_branch(rax);
 }
 
 
 void TemplateTable::ret() {
   transition(vtos, vtos);
   locals_index(rbx);
-  __ movl(rbx, iaddress(rbx));                   // get return bci, compute return bcp
+  __ movptr(rbx, iaddress(rbx));                   // get return bci, compute return bcp
   __ profile_ret(rbx, rcx);
   __ get_method(rax);
-  __ movl(rsi, Address(rax, methodOopDesc::const_offset()));
-  __ leal(rsi, Address(rsi, rbx, Address::times_1,
+  __ movptr(rsi, Address(rax, methodOopDesc::const_offset()));
+  __ lea(rsi, Address(rsi, rbx, Address::times_1,
                        constMethodOopDesc::codes_offset()));
   __ dispatch_next(vtos);
 }
 
 
 void TemplateTable::wide_ret() {
   transition(vtos, vtos);
   locals_index_wide(rbx);
-  __ movl(rbx, iaddress(rbx));                   // get return bci, compute return bcp
+  __ movptr(rbx, iaddress(rbx));                   // get return bci, compute return bcp
   __ profile_ret(rbx, rcx);
   __ get_method(rax);
-  __ movl(rsi, Address(rax, methodOopDesc::const_offset()));
-  __ leal(rsi, Address(rsi, rbx, Address::times_1, constMethodOopDesc::codes_offset()));
+  __ movptr(rsi, Address(rax, methodOopDesc::const_offset()));
+  __ lea(rsi, Address(rsi, rbx, Address::times_1, constMethodOopDesc::codes_offset()));
   __ dispatch_next(vtos);
 }
 
 
 void TemplateTable::tableswitch() {
   Label default_case, continue_execution;
   transition(itos, vtos);
   // align rsi
-  __ leal(rbx, at_bcp(wordSize));
-  __ andl(rbx, -wordSize);
+  __ lea(rbx, at_bcp(wordSize));
+  __ andptr(rbx, -wordSize);
   // load lo & hi
   __ movl(rcx, Address(rbx, 1 * wordSize));
   __ movl(rdx, Address(rbx, 2 * wordSize));
-  __ bswap(rcx);
-  __ bswap(rdx);
+  __ bswapl(rcx);
+  __ bswapl(rdx);
   // check against lo & hi
   __ cmpl(rax, rcx);
   __ jccb(Assembler::less, default_case);
   __ cmpl(rax, rdx);
   __ jccb(Assembler::greater, default_case);
   // lookup dispatch offset
   __ subl(rax, rcx);
-  __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * wordSize));
+  __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt));
   __ profile_switch_case(rax, rbx, rcx);
   // continue execution
   __ bind(continue_execution);
-  __ bswap(rdx);
+  __ bswapl(rdx);
   __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1));
-  __ addl(rsi, rdx);
+  __ addptr(rsi, rdx);
   __ dispatch_only(vtos);
   // handle default
   __ bind(default_case);
   __ profile_switch_default(rax);
   __ movl(rdx, Address(rbx, 0));

@@ -1813,25 +1898,25 @@
 
 
 void TemplateTable::fast_linearswitch() {
   transition(itos, vtos);
   Label loop_entry, loop, found, continue_execution;  
-  // bswap rax, so we can avoid bswapping the table entries
-  __ bswap(rax);
+  // bswapl rax, so we can avoid bswapping the table entries
+  __ bswapl(rax);
   // align rsi
-  __ leal(rbx, at_bcp(wordSize));                // btw: should be able to get rid of this instruction (change offsets below)
-  __ andl(rbx, -wordSize);
+  __ lea(rbx, at_bcp(wordSize));                // btw: should be able to get rid of this instruction (change offsets below)
+  __ andptr(rbx, -wordSize);
   // set counter
   __ movl(rcx, Address(rbx, wordSize));  
-  __ bswap(rcx);
+  __ bswapl(rcx);
   __ jmpb(loop_entry);
   // table search
   __ bind(loop);
   __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * wordSize));
   __ jccb(Assembler::equal, found);
   __ bind(loop_entry);
-  __ decrement(rcx);
+  __ decrementl(rcx);
   __ jcc(Assembler::greaterEqual, loop);
   // default case
   __ profile_switch_default(rax);
   __ movl(rdx, Address(rbx, 0));
   __ jmpb(continue_execution);

@@ -1839,13 +1924,13 @@
   __ bind(found);
   __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * wordSize));
   __ profile_switch_case(rcx, rax, rbx);
   // continue execution
   __ bind(continue_execution);  
-  __ bswap(rdx);
+  __ bswapl(rdx);
   __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1));
-  __ addl(rsi, rdx);
+  __ addptr(rsi, rdx);
   __ dispatch_only(vtos);
 }
 
 
 void TemplateTable::fast_binaryswitch() {

@@ -1883,17 +1968,17 @@
   const Register h     = rdi;                    // needs to be restored
   const Register temp  = rsi;
   // setup array
   __ save_bcp();
 
-  __ leal(array, at_bcp(3*wordSize));            // btw: should be able to get rid of this instruction (change offsets below)
-  __ andl(array, -wordSize);
+  __ lea(array, at_bcp(3*wordSize));             // btw: should be able to get rid of this instruction (change offsets below)
+  __ andptr(array, -wordSize);
   // initialize i & j
   __ xorl(i, i);                                 // i = 0;
   __ movl(j, Address(array, -wordSize));         // j = length(array);    
   // Convert j into native byteordering  
-  __ bswap(j);
+  __ bswapl(j);
   // and start
   Label entry;
   __ jmp(entry);
 
   // binary search loop

@@ -1907,22 +1992,22 @@
     // } else {
     //   i = h;
     // }
     // Convert array[h].match to native byte-ordering before compare
     __ movl(temp, Address(array, h, Address::times_8, 0*wordSize));
-    __ bswap(temp);
+    __ bswapl(temp);
     __ cmpl(key, temp);
     if (VM_Version::supports_cmov()) {
       __ cmovl(Assembler::less        , j, h);   // j = h if (key <  array[h].fast_match())
       __ cmovl(Assembler::greaterEqual, i, h);   // i = h if (key >= array[h].fast_match())
     } else {
       Label set_i, end_of_if;
       __ jccb(Assembler::greaterEqual, set_i);    // {
-      __ movl(j, h);                             //   j = h;
+      __ mov(j, h);                                //   j = h;
       __ jmp(end_of_if);                         // }
       __ bind(set_i);                            // else {
-      __ movl(i, h);                             //   i = h;
+      __ mov(i, h);                                //   i = h;
       __ bind(end_of_if);                        // }
     }
     // while (i+1 < j)
     __ bind(entry);
     __ leal(h, Address(i, 1));                   // i+1

@@ -1932,46 +2017,48 @@
 
   // end of binary search, result index is i (must check again!)
   Label default_case;
   // Convert array[i].match to native byte-ordering before compare
   __ movl(temp, Address(array, i, Address::times_8, 0*wordSize));
-  __ bswap(temp);
+  __ bswapl(temp);
   __ cmpl(key, temp);
   __ jcc(Assembler::notEqual, default_case);
 
   // entry found -> j = offset
   __ movl(j , Address(array, i, Address::times_8, 1*wordSize));
   __ profile_switch_case(i, key, array);
-  __ bswap(j);
+  __ bswapl(j);
+  LP64_ONLY(__ movslq(j, j));
   __ restore_bcp();
   __ restore_locals();                           // restore rdi
   __ load_unsigned_byte(rbx, Address(rsi, j, Address::times_1));
   
-  __ addl(rsi, j);
+  __ addptr(rsi, j);
   __ dispatch_only(vtos);
 
   // default case -> j = default offset
   __ bind(default_case);
   __ profile_switch_default(i);
   __ movl(j, Address(array, -2*wordSize));
-  __ bswap(j);
+  __ bswapl(j);
+  LP64_ONLY(__ movslq(j, j));
   __ restore_bcp();
   __ restore_locals();                           // restore rdi
   __ load_unsigned_byte(rbx, Address(rsi, j, Address::times_1));
-  __ addl(rsi, j);
+  __ addptr(rsi, j);
   __ dispatch_only(vtos);
 }
 
 
 void TemplateTable::_return(TosState state) {
   transition(state, state);
   assert(_desc->calls_vm(), "inconsistent calls_vm information"); // call in remove_activation
 
   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
     assert(state == vtos, "only valid state");
-    __ movl(rax, aaddress(0));
-    __ movl(rdi, Address(rax, oopDesc::klass_offset_in_bytes()));
+    __ movptr(rax, aaddress(0));
+    __ movptr(rdi, Address(rax, oopDesc::klass_offset_in_bytes()));
     __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
     __ testl(rdi, JVM_ACC_HAS_FINALIZER);
     Label skip_register_finalizer;
     __ jcc(Assembler::zero, skip_register_finalizer);
 

@@ -2008,14 +2095,14 @@
 // previous memory refs).  Requirements (2) and (3) require some barriers
 // before volatile stores and after volatile loads.  These nearly cover
 // requirement (1) but miss the volatile-store-volatile-load case.  This final
 // case is placed after volatile-stores although it could just as well go
 // before volatile-loads.
-void TemplateTable::volatile_barrier( ) {
+void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits order_constraint ) {
   // Helper function to insert a is-volatile test and memory barrier
   if( !os::is_MP() ) return;    // Not needed on single CPU
-  __ membar();
+  __ membar(order_constraint);
 }
 
 void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index) {
   assert(byte_no == 1 || byte_no == 2, "byte_no out of range");
 

@@ -2024,14 +2111,17 @@
   assert_different_registers(Rcache, index, temp);
 
   const int shift_count = (1 + byte_no)*BitsPerByte;
   Label resolved;
   __ get_cache_and_index_at_bcp(Rcache, index, 1);
-  __ movl(temp, Address(Rcache, index, Address::times_4, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
+  __ movl(temp, Address(Rcache,
+                          index,
+                          Address::times_ptr,
+                          constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
   __ shrl(temp, shift_count);
   // have we resolved this bytecode? 
-  __ andl(temp, 0xFF);
+  __ andptr(temp, 0xFF);
   __ cmpl(temp, (int)bytecode());
   __ jcc(Assembler::equal, resolved);
 
   // resolve first time through
   address entry;

@@ -2063,19 +2153,19 @@
                                               bool is_static = false) {
   assert_different_registers(cache, index, flags, off);
 
   ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
   // Field offset
-  __ movl(off, Address(cache, index, Address::times_4, 
+  __ movptr(off, Address(cache, index, Address::times_ptr,
            in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())));
   // Flags    
-  __ movl(flags, Address(cache, index, Address::times_4,
+  __ movl(flags, Address(cache, index, Address::times_ptr,
            in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())));
 
   // klass     overwrite register
   if (is_static) {
-    __ movl(obj, Address(cache, index, Address::times_4,
+    __ movptr(obj, Address(cache, index, Address::times_ptr,
              in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())));
   }
 }
 
 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,

@@ -2105,16 +2195,15 @@
   const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
                                     ConstantPoolCacheEntry::f2_offset());
 
   resolve_cache_and_index(byte_no, cache, index);
 
-  assert(wordSize == 4, "adjust code below");
-  __ movl(method, Address(cache, index, Address::times_4, method_offset));
+  __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
   if (itable_index != noreg) {
-    __ movl(itable_index, Address(cache, index, Address::times_4, index_offset));
+    __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
   }
-  __ movl(flags , Address(cache, index, Address::times_4, flags_offset ));
+  __ movl(flags , Address(cache, index, Address::times_ptr, flags_offset ));
 }
 
 
 // The registers cache and index expected to be set before call.
 // Correct values of the cache and index registers are preserved.

@@ -2130,15 +2219,15 @@
     __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
     __ testl(rax,rax);
     __ jcc(Assembler::zero, L1);
 
     // cache entry pointer
-    __ addl(cache, in_bytes(constantPoolCacheOopDesc::base_offset()));
+    __ addptr(cache, in_bytes(constantPoolCacheOopDesc::base_offset()));
     __ shll(index, LogBytesPerWord);
-    __ addl(cache, index);
+    __ addptr(cache, index);
     if (is_static) {
-      __ movl(rax, 0);      // NULL object reference
+      __ xorptr(rax, rax);      // NULL object reference
     } else {
       __ pop(atos);         // Get the object
       __ verify_oop(rax);
       __ push(atos);        // Restore stack state
     }

@@ -2178,11 +2267,11 @@
   Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
 
   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
   assert(btos == 0, "change code, btos != 0");
   // btos
-  __ andl(flags, 0x0f);
+  __ andptr(flags, 0x0f);
   __ jcc(Assembler::notZero, notByte);
 
   __ load_signed_byte(rax, lo ); 
   __ push(btos);
   // Rewrite bytecode to be faster

@@ -2246,14 +2335,14 @@
   __ jcc(Assembler::notEqual, notLong);
 
   // Generate code as if volatile.  There just aren't enough registers to
   // save that information and this code is faster than the test.
   __ fild_d(lo);                // Must load atomically
-  __ subl(rsp,2*wordSize);      // Make space for store
+  __ subptr(rsp,2*wordSize);    // Make space for store
   __ fistp_d(Address(rsp,0));
-  __ popl(rax);
-  __ popl(rdx);
+  __ pop(rax);
+  __ pop(rdx);
 
   __ push(ltos);
   // Don't rewrite to _fast_lgetfield for potential volatile case.
   __ jmp(Done);
 

@@ -2320,43 +2409,43 @@
     // registers have to be correspondingly used after this line.
     __ get_cache_and_index_at_bcp(rax, rdx, 1);
 
     if (is_static) {
       // Life is simple.  Null out the object pointer.
-      __ xorl(rbx, rbx);
+      __ xorptr(rbx, rbx);
     } else {
       // Life is harder. The stack holds the value on top, followed by the object.
       // We don't know the size of the value, though; it could be one or two words
       // depending on its type. As a result, we must find the type to determine where
       // the object is.
       Label two_word, valsize_known;
-      __ movl(rcx, Address(rax, rdx, Address::times_4, in_bytes(cp_base_offset +
+      __ movl(rcx, Address(rax, rdx, Address::times_ptr, in_bytes(cp_base_offset +
                                    ConstantPoolCacheEntry::flags_offset())));
-      __ movl(rbx, rsp);
+      __ mov(rbx, rsp);
       __ shrl(rcx, ConstantPoolCacheEntry::tosBits);
       // Make sure we don't need to mask rcx for tosBits after the above shift
       ConstantPoolCacheEntry::verify_tosBits();
       __ cmpl(rcx, ltos);
       __ jccb(Assembler::equal, two_word);
       __ cmpl(rcx, dtos);
       __ jccb(Assembler::equal, two_word);
-      __ addl(rbx, Interpreter::expr_offset_in_bytes(1)); // one word jvalue (not ltos, dtos)
+      __ addptr(rbx, Interpreter::expr_offset_in_bytes(1)); // one word jvalue (not ltos, dtos)
       __ jmpb(valsize_known);
 
       __ bind(two_word);
-      __ addl(rbx, Interpreter::expr_offset_in_bytes(2)); // two words jvalue
+      __ addptr(rbx, Interpreter::expr_offset_in_bytes(2)); // two words jvalue
     
       __ bind(valsize_known);
       // setup object pointer
-      __ movl(rbx, Address(rbx, 0));
+      __ movptr(rbx, Address(rbx, 0));
     }
     // cache entry pointer
-    __ addl(rax, in_bytes(cp_base_offset));
+    __ addptr(rax, in_bytes(cp_base_offset));
     __ shll(rdx, LogBytesPerWord);
-    __ addl(rax, rdx);
+    __ addptr(rax, rdx);
     // object (tos)
-    __ movl(rcx, rsp);
+    __ mov(rcx, rsp);
     // rbx,: object pointer set up above (NULL if static)
     // rax,: cache entry pointer
     // rcx: jvalue object on the stack
     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification),
                rbx, rax, rcx);

@@ -2427,15 +2516,16 @@
   __ jcc(Assembler::notEqual, notObj);
 
   __ pop(atos);
   if (!is_static) pop_and_check_object(obj);
 
-  __ movl(lo, rax );
-  __ store_check(obj, lo);  // Need to mark card
+  do_oop_store(_masm, lo, rax, _bs->kind(), false);
+
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx);
   }
+
   __ jmp(Done);
 
   __ bind(notObj);
   // ctos
   __ cmpl(flags, ctos );

@@ -2473,25 +2563,27 @@
 
   __ pop(ltos);  // overwrites rdx, do this after testing volatile.
   if (!is_static) pop_and_check_object(obj);
   
   // Replace with real volatile test
-  __ pushl(rdx);
-  __ pushl(rax);                // Must update atomically with FIST
+  __ push(rdx);
+  __ push(rax);                 // Must update atomically with FIST
   __ fild_d(Address(rsp,0));    // So load into FPU register
   __ fistp_d(lo);               // and put into memory atomically
-  __ addl(rsp,2*wordSize);
-  volatile_barrier();
+  __ addptr(rsp, 2*wordSize);
+  // volatile_barrier();
+  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
+                                               Assembler::StoreStore));
   // Don't rewrite volatile version
   __ jmp(notVolatile);
 
   __ bind(notVolatileLong);
 
   __ pop(ltos);  // overwrites rdx
   if (!is_static) pop_and_check_object(obj);
-  __ movl(hi, rdx);
-  __ movl(lo, rax);
+  NOT_LP64(__ movptr(hi, rdx));
+  __ movptr(lo, rax);
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_lputfield, rcx, rbx);
   }
   __ jmp(notVolatile);
 

@@ -2528,11 +2620,12 @@
   __ bind(Done);
 
   // Check for volatile store
   __ testl(rdx, rdx);
   __ jcc(Assembler::zero, notVolatile);
-  volatile_barrier( );
+  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
+                                               Assembler::StoreStore));
   __ bind(notVolatile);
 }
 
 
 void TemplateTable::putfield(int byte_no) {

@@ -2553,46 +2646,53 @@
     __ testl(rcx,rcx);
     __ jcc(Assembler::zero, L2);
     __ pop_ptr(rbx);               // copy the object pointer from tos
     __ verify_oop(rbx);
     __ push_ptr(rbx);              // put the object pointer back on tos
-    __ subl(rsp, sizeof(jvalue));  // add space for a jvalue object      
-    __ movl(rcx, rsp);
+    __ subptr(rsp, sizeof(jvalue));  // add space for a jvalue object
+    __ mov(rcx, rsp);
     __ push_ptr(rbx);                 // save object pointer so we can steal rbx,
-    __ movl(rbx, 0);
+    __ xorptr(rbx, rbx);
     const Address lo_value(rcx, rbx, Address::times_1, 0*wordSize);
     const Address hi_value(rcx, rbx, Address::times_1, 1*wordSize);
     switch (bytecode()) {          // load values into the jvalue object
     case Bytecodes::_fast_bputfield: __ movb(lo_value, rax); break;
     case Bytecodes::_fast_sputfield: __ movw(lo_value, rax); break;
     case Bytecodes::_fast_cputfield: __ movw(lo_value, rax); break;
     case Bytecodes::_fast_iputfield: __ movl(lo_value, rax);                         break;
-    case Bytecodes::_fast_lputfield: __ movl(hi_value, rdx); __ movl(lo_value, rax); break;
+    case Bytecodes::_fast_lputfield:
+      NOT_LP64(__ movptr(hi_value, rdx));
+      __ movptr(lo_value, rax);
+      break;
+
     // need to call fld_s() after fstp_s() to restore the value for below
     case Bytecodes::_fast_fputfield: __ fstp_s(lo_value); __ fld_s(lo_value);        break;
+
     // need to call fld_d() after fstp_d() to restore the value for below
     case Bytecodes::_fast_dputfield: __ fstp_d(lo_value); __ fld_d(lo_value);        break;
+
     // since rcx is not an object we don't call store_check() here
-    case Bytecodes::_fast_aputfield: __ movl(lo_value, rax);                         break;
+    case Bytecodes::_fast_aputfield: __ movptr(lo_value, rax);                       break;
+
     default:  ShouldNotReachHere();
     }
     __ pop_ptr(rbx);  // restore copy of object pointer
 
     // Save rax, and sometimes rdx because call_VM() will clobber them,
     // then use them for JVM/DI purposes
-    __ pushl(rax);
-    if (bytecode() == Bytecodes::_fast_lputfield) __ pushl(rdx);
+    __ push(rax);
+    if (bytecode() == Bytecodes::_fast_lputfield) __ push(rdx);
     // access constant pool cache entry
     __ get_cache_entry_pointer_at_bcp(rax, rdx, 1);
     __ verify_oop(rbx);
     // rbx,: object pointer copied above
     // rax,: cache entry pointer
     // rcx: jvalue object on the stack
     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, rax, rcx);
-    if (bytecode() == Bytecodes::_fast_lputfield) __ popl(rdx);  // restore high value
-    __ popl(rax);     // restore lower value   
-    __ addl(rsp, sizeof(jvalue));  // release jvalue object space
+    if (bytecode() == Bytecodes::_fast_lputfield) __ pop(rdx);  // restore high value
+    __ pop(rax);     // restore lower value
+    __ addptr(rsp, sizeof(jvalue));  // release jvalue object space
     __ bind(L2);
   }
 }
 
 void TemplateTable::fast_storefield(TosState state) {

@@ -2604,16 +2704,16 @@
 
   // access constant pool cache
   __ get_cache_and_index_at_bcp(rcx, rbx, 1);
 
   // test for volatile with rdx but rdx is tos register for lputfield.
-  if (bytecode() == Bytecodes::_fast_lputfield) __ pushl(rdx);
-  __ movl(rdx, Address(rcx, rbx, Address::times_4, in_bytes(base +
+  if (bytecode() == Bytecodes::_fast_lputfield) __ push(rdx);
+  __ movl(rdx, Address(rcx, rbx, Address::times_ptr, in_bytes(base +
                        ConstantPoolCacheEntry::flags_offset())));
 
   // replace index with field offset from cache entry
-  __ movl(rbx, Address(rcx, rbx, Address::times_4, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
+  __ movptr(rbx, Address(rcx, rbx, Address::times_ptr, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
 
   // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
   // volatile_barrier( );
 
   Label notVolatile, Done;

@@ -2621,11 +2721,11 @@
   __ andl(rdx, 0x1);
   // Check for volatile store
   __ testl(rdx, rdx);
   __ jcc(Assembler::zero, notVolatile);
 
-  if (bytecode() == Bytecodes::_fast_lputfield) __ popl(rdx);
+  if (bytecode() == Bytecodes::_fast_lputfield) __ pop(rdx);
 
   // Get object from stack
   pop_and_check_object(rcx);
 
   // field addresses

@@ -2636,40 +2736,54 @@
   switch (bytecode()) {
     case Bytecodes::_fast_bputfield: __ movb(lo, rax); break;
     case Bytecodes::_fast_sputfield: // fall through
     case Bytecodes::_fast_cputfield: __ movw(lo, rax); break;
     case Bytecodes::_fast_iputfield: __ movl(lo, rax); break;
-    case Bytecodes::_fast_lputfield: __ movl(hi, rdx); __ movl(lo, rax);        break;
+    case Bytecodes::_fast_lputfield:
+      NOT_LP64(__ movptr(hi, rdx));
+      __ movptr(lo, rax);
+      break;
     case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
     case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
-    case Bytecodes::_fast_aputfield: __ movl(lo, rax); __ store_check(rcx, lo); break;
+    case Bytecodes::_fast_aputfield: {
+      do_oop_store(_masm, lo, rax, _bs->kind(), false);
+      break;
+    }
     default:
       ShouldNotReachHere();
   }
 
   Label done;
-  volatile_barrier( );
-  __ jmpb(done);
+  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
+                                               Assembler::StoreStore));
+  // Barriers are so large that short branch doesn't reach!
+  __ jmp(done);
 
   // Same code as above, but don't need rdx to test for volatile.
   __ bind(notVolatile);
 
-  if (bytecode() == Bytecodes::_fast_lputfield) __ popl(rdx);
+  if (bytecode() == Bytecodes::_fast_lputfield) __ pop(rdx);
 
   // Get object from stack
   pop_and_check_object(rcx);
 
   // access field
   switch (bytecode()) {
     case Bytecodes::_fast_bputfield: __ movb(lo, rax); break;
     case Bytecodes::_fast_sputfield: // fall through
     case Bytecodes::_fast_cputfield: __ movw(lo, rax); break;
     case Bytecodes::_fast_iputfield: __ movl(lo, rax); break;
-    case Bytecodes::_fast_lputfield: __ movl(hi, rdx); __ movl(lo, rax);        break;
+    case Bytecodes::_fast_lputfield:
+      NOT_LP64(__ movptr(hi, rdx));
+      __ movptr(lo, rax);
+      break;
     case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
     case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
-    case Bytecodes::_fast_aputfield: __ movl(lo, rax); __ store_check(rcx, lo); break;
+    case Bytecodes::_fast_aputfield: {
+      do_oop_store(_masm, lo, rax, _bs->kind(), false);
+      break;
+    }
     default:
       ShouldNotReachHere();
   }
   __ bind(done);
 }

@@ -2698,11 +2812,14 @@
   }
 
   // access constant pool cache
   __ get_cache_and_index_at_bcp(rcx, rbx, 1);
   // replace index with field offset from cache entry
-  __ movl(rbx, Address(rcx, rbx, Address::times_4, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset())));
+  __ movptr(rbx, Address(rcx,
+                         rbx,
+                         Address::times_ptr,
+                         in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset())));
 
 
   // rax,: object
   __ verify_oop(rax);
   __ null_check(rax);

@@ -2710,18 +2827,18 @@
   const Address lo = Address(rax, rbx, Address::times_1, 0*wordSize);
   const Address hi = Address(rax, rbx, Address::times_1, 1*wordSize);
 
   // access field
   switch (bytecode()) {
-    case Bytecodes::_fast_bgetfield: __ movsxb(rax, lo );                 break;
+    case Bytecodes::_fast_bgetfield: __ movsbl(rax, lo );                 break;
     case Bytecodes::_fast_sgetfield: __ load_signed_word(rax, lo );       break;
     case Bytecodes::_fast_cgetfield: __ load_unsigned_word(rax, lo );     break;
     case Bytecodes::_fast_igetfield: __ movl(rax, lo);                    break;
     case Bytecodes::_fast_lgetfield: __ stop("should not be rewritten");  break;
     case Bytecodes::_fast_fgetfield: __ fld_s(lo);                        break;
     case Bytecodes::_fast_dgetfield: __ fld_d(lo);                        break;
-    case Bytecodes::_fast_agetfield: __ movl(rax, lo); __ verify_oop(rax); break;
+    case Bytecodes::_fast_agetfield: __ movptr(rax, lo); __ verify_oop(rax); break;
     default:
       ShouldNotReachHere();
   }
 
   // Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO)

@@ -2729,23 +2846,26 @@
 }
 
 void TemplateTable::fast_xaccess(TosState state) {
   transition(vtos, state);
   // get receiver
-  __ movl(rax, aaddress(0));
+  __ movptr(rax, aaddress(0));
   debug_only(__ verify_local_tag(frame::TagReference, 0));
   // access constant pool cache
   __ get_cache_and_index_at_bcp(rcx, rdx, 2);
-  __ movl(rbx, Address(rcx, rdx, Address::times_4, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset())));
+  __ movptr(rbx, Address(rcx,
+                         rdx,
+                         Address::times_ptr,
+                         in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset())));
   // make sure exception is reported in correct bcp range (getfield is next instruction)
   __ increment(rsi);
   __ null_check(rax);
   const Address lo = Address(rax, rbx, Address::times_1, 0*wordSize);
   if (state == itos) {
     __ movl(rax, lo);
   } else if (state == atos) {
-    __ movl(rax, lo);
+    __ movptr(rax, lo);
     __ verify_oop(rax);
   } else if (state == ftos) {
     __ fld_s(lo);
   } else {
     ShouldNotReachHere();

@@ -2785,42 +2905,41 @@
   // load receiver if needed (note: no return address pushed yet)
   if (load_receiver) {
     __ movl(recv, flags);
     __ andl(recv, 0xFF);
     // recv count is 0 based?
-    __ movl(recv, Address(rsp, recv, Interpreter::stackElementScale(), -Interpreter::expr_offset_in_bytes(1)));
+    __ movptr(recv, Address(rsp, recv, Interpreter::stackElementScale(), -Interpreter::expr_offset_in_bytes(1)));
     __ verify_oop(recv);
   }
 
   // do null check if needed
   if (receiver_null_check) {
     __ null_check(recv);
   }
 
   if (save_flags) {
-    __ movl(rsi, flags);
+    __ mov(rsi, flags);
   }
 
   // compute return type
   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
   // Make sure we don't need to mask flags for tosBits after the above shift
   ConstantPoolCacheEntry::verify_tosBits();
   // load return address
-  { const int table =
-      is_invokeinterface
-      ? (int)Interpreter::return_5_addrs_by_index_table()
-      : (int)Interpreter::return_3_addrs_by_index_table();
-    __ movl(flags, Address(noreg, flags, Address::times_4, table));
+  {
+    ExternalAddress table(is_invokeinterface ? (address)Interpreter::return_5_addrs_by_index_table() :
+                                               (address)Interpreter::return_3_addrs_by_index_table());
+    __ movptr(flags, ArrayAddress(table, Address(noreg, flags, Address::times_ptr)));
   }
 
   // push return address
-  __ pushl(flags);
+  __ push(flags);
 
   // Restore flag value from the constant pool cache, and restore rsi
   // for later null checks.  rsi is the bytecode pointer
   if (save_flags) {
-    __ movl(flags, rsi);
+    __ mov(flags, rsi);
     __ restore_bcp();
   }
 }
 
 

@@ -2853,20 +2972,20 @@
   __ bind(notFinal);
 
   // get receiver klass
   __ null_check(recv, oopDesc::klass_offset_in_bytes());
   // Keep recv in rcx for callee expects it there
-  __ movl(rax, Address(recv, oopDesc::klass_offset_in_bytes()));
+  __ movptr(rax, Address(recv, oopDesc::klass_offset_in_bytes()));
   __ verify_oop(rax);
 
   // profile this call
   __ profile_virtual_call(rax, rdi, rdx);
 
   // get target methodOop & entry point
   const int base = instanceKlass::vtable_start_offset() * wordSize;    
   assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
-  __ movl(method, Address(rax, index, Address::times_4, base + vtableEntry::method_offset_in_bytes()));  
+  __ movptr(method, Address(rax, index, Address::times_ptr, base + vtableEntry::method_offset_in_bytes()));
   __ jump_from_interpreted(method, rdx);
 }
 
 
 void TemplateTable::invokevirtual(int byte_no) {

@@ -2928,76 +3047,76 @@
   invokevirtual_helper(rbx, rcx, rdx);
   __ bind(notMethod);
 
   // Get receiver klass into rdx - also a null check
   __ restore_locals();  // restore rdi
-  __ movl(rdx, Address(rcx, oopDesc::klass_offset_in_bytes()));
+  __ movptr(rdx, Address(rcx, oopDesc::klass_offset_in_bytes()));
   __ verify_oop(rdx);
 
   // profile this call
   __ profile_virtual_call(rdx, rsi, rdi);
 
-  __ movl(rdi, rdx); // Save klassOop in rdi
+  __ mov(rdi, rdx); // Save klassOop in rdi
 
   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
   const int base = instanceKlass::vtable_start_offset() * wordSize;    
-  assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
+  assert(vtableEntry::size() * wordSize == (1 << (int)Address::times_ptr), "adjust the scaling in the code below");
   __ movl(rsi, Address(rdx, instanceKlass::vtable_length_offset() * wordSize)); // Get length of vtable
-  __ leal(rdx, Address(rdx, rsi, Address::times_4, base));
+  __ lea(rdx, Address(rdx, rsi, Address::times_4, base));
   if (HeapWordsPerLong > 1) {
     // Round up to align_object_offset boundary
     __ round_to(rdx, BytesPerLong);
   }
 
   Label entry, search, interface_ok;
   
   __ jmpb(entry);   
   __ bind(search);
-  __ addl(rdx, itableOffsetEntry::size() * wordSize);
+  __ addptr(rdx, itableOffsetEntry::size() * wordSize);
   
   __ bind(entry);
 
   // Check that the entry is non-null.  A null entry means that the receiver
   // class doesn't implement the interface, and wasn't the same as the
   // receiver class checked when the interface was resolved.
-  __ pushl(rdx);
-  __ movl(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ testl(rdx, rdx);
+  __ push(rdx);
+  __ movptr(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
+  __ testptr(rdx, rdx);
   __ jcc(Assembler::notZero, interface_ok);
   // throw exception
-  __ popl(rdx);          // pop saved register first.
-  __ popl(rbx);          // pop return address (pushed by prepare_invoke)
+  __ pop(rdx);           // pop saved register first.
+  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
   __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
                    InterpreterRuntime::throw_IncompatibleClassChangeError));
   // the call_VM checks for exception, so we should never return here.
   __ should_not_reach_here();
   __ bind(interface_ok);
 
-    __ popl(rdx);
+    __ pop(rdx);
 
-    __ cmpl(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
+    __ cmpptr(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
     __ jcc(Assembler::notEqual, search);
         
     __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes()));      
-    __ addl(rdx, rdi); // Add offset to klassOop
-    assert(itableMethodEntry::size() * wordSize == 4, "adjust the scaling in the code below");
-    __ movl(rbx, Address(rdx, rbx, Address::times_4));
+    __ addptr(rdx, rdi); // Add offset to klassOop
+    assert(itableMethodEntry::size() * wordSize == (1 << (int)Address::times_ptr), "adjust the scaling in the code below");
+    __ movptr(rbx, Address(rdx, rbx, Address::times_ptr));
     // rbx,: methodOop to call
     // rcx: receiver
     // Check for abstract method error
     // Note: This should be done more efficiently via a throw_abstract_method_error
     //       interpreter entry point and a conditional jump to it in case of a null
     //       method.
     { Label L;
-      __ testl(rbx, rbx);
+      __ testptr(rbx, rbx);
       __ jcc(Assembler::notZero, L);
       // throw exception
           // note: must restore interpreter registers to canonical
           //       state for exception handling to work correctly!
-          __ popl(rbx);          // pop return address (pushed by prepare_invoke)
+          __ pop(rbx);           // pop return address (pushed by prepare_invoke)
           __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
           __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
       __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
       // the call_VM checks for exception, so we should never return here.
       __ should_not_reach_here();

@@ -3020,16 +3139,14 @@
   Label done;
   Label initialize_header;
   Label initialize_object;  // including clearing the fields
   Label allocate_shared;
 
-  ExternalAddress heap_top((address)Universe::heap()->top_addr());
-
   __ get_cpool_and_tags(rcx, rax);
   // get instanceKlass
-  __ movl(rcx, Address(rcx, rdx, Address::times_4, sizeof(constantPoolOopDesc)));
-  __ pushl(rcx);  // save the contexts of klass for initializing the header
+  __ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(constantPoolOopDesc)));
+  __ push(rcx);  // save the contexts of klass for initializing the header
 
   // make sure the class we're about to instantiate has been resolved. 
   // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
   __ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);

@@ -3058,15 +3175,15 @@
 
   if (UseTLAB) {
     const Register thread = rcx;
 
     __ get_thread(thread);
-    __ movl(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
-    __ leal(rbx, Address(rax, rdx, Address::times_1));
-    __ cmpl(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
+    __ movptr(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
+    __ lea(rbx, Address(rax, rdx, Address::times_1));
+    __ cmpptr(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
     __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
-    __ movl(Address(thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
+    __ movptr(Address(thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
     if (ZeroTLAB) {
       // the fields have been already cleared
       __ jmp(initialize_header);
     } else {
       // initialize both the header and fields

@@ -3078,26 +3195,27 @@
   //
   // rdx: instance size in bytes
   if (allow_shared_alloc) {
     __ bind(allocate_shared);
 
+    ExternalAddress heap_top((address)Universe::heap()->top_addr());
+
     Label retry;
     __ bind(retry);
-    __ mov32(rax, heap_top);
-    __ leal(rbx, Address(rax, rdx, Address::times_1));
-    __ cmp32(rbx, ExternalAddress((address)Universe::heap()->end_addr()));
+    __ movptr(rax, heap_top);
+    __ lea(rbx, Address(rax, rdx, Address::times_1));
+    __ cmpptr(rbx, ExternalAddress((address)Universe::heap()->end_addr()));
     __ jcc(Assembler::above, slow_case);
 
     // Compare rax, with the top addr, and if still equal, store the new
     // top addr in rbx, at the address of the top addr pointer. Sets ZF if was
     // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
     //
     // rax,: object begin
     // rbx,: object end
     // rdx: instance size in bytes
-    if (os::is_MP()) __ lock();
-    __ cmpxchgptr(rbx, heap_top);
+    __ locked_cmpxchgptr(rbx, heap_top);
 
     // if someone beat us on the allocation, try again, otherwise continue 
     __ jcc(Assembler::notEqual, retry);
   }
 

@@ -3125,28 +3243,28 @@
 #endif
 
     // initialize remaining object fields: rdx was a multiple of 8
     { Label loop;
     __ bind(loop);
-    __ movl(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
-    __ movl(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx);
+    __ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
+    NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx));
     __ decrement(rdx);
     __ jcc(Assembler::notZero, loop);
     }
 
     // initialize object header only.
     __ bind(initialize_header);
     if (UseBiasedLocking) {
-      __ popl(rcx);   // get saved klass back in the register.
-      __ movl(rbx, Address(rcx, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
-      __ movl(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
-    } else {
-      __ movl(Address(rax, oopDesc::mark_offset_in_bytes ()),
-              (int)markOopDesc::prototype()); // header
-      __ popl(rcx);   // get saved klass back in the register.
+      __ pop(rcx);   // get saved klass back in the register.
+      __ movptr(rbx, Address(rcx, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
+    } else {
+      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()),
+                (int32_t)markOopDesc::prototype()); // header
+      __ pop(rcx);   // get saved klass back in the register.
     }
-    __ movl(Address(rax, oopDesc::klass_offset_in_bytes()), rcx);  // klass
+    __ movptr(Address(rax, oopDesc::klass_offset_in_bytes()), rcx);  // klass
 
     {
       SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
       // Trigger dtrace event for fastpath
       __ push(atos);

@@ -3158,11 +3276,11 @@
     __ jmp(done);
   }
 
   // slow case
   __ bind(slow_case);
-  __ popl(rcx);   // restore stack pointer to what it was when we came in.
+  __ pop(rcx);   // restore stack pointer to what it was when we came in.
   __ get_constant_pool(rax);
   __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rax, rdx);
 
   // continue

@@ -3195,11 +3313,11 @@
 
 
 void TemplateTable::checkcast() {
   transition(atos, atos);
   Label done, is_null, ok_is_subtype, quicked, resolved;
-  __ testl(rax, rax);   // Object is in EAX
+  __ testptr(rax, rax);   // Object is in EAX
   __ jcc(Assembler::zero, is_null);
 
   // Get cpool & tags index
   __ get_cpool_and_tags(rcx, rdx); // ECX=cpool, EDX=tags array
   __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // EBX=index

@@ -3212,28 +3330,28 @@
   __ pop_ptr(rdx);
   __ jmpb(resolved);
 
   // Get superklass in EAX and subklass in EBX
   __ bind(quicked);
-  __ movl(rdx, rax);          // Save object in EDX; EAX needed for subtype check
-  __ movl(rax, Address(rcx, rbx, Address::times_4, sizeof(constantPoolOopDesc)));
+  __ mov(rdx, rax);          // Save object in EDX; EAX needed for subtype check
+  __ movptr(rax, Address(rcx, rbx, Address::times_ptr, sizeof(constantPoolOopDesc)));
 
   __ bind(resolved);
-  __ movl(rbx, Address(rdx, oopDesc::klass_offset_in_bytes()));
+  __ movptr(rbx, Address(rdx, oopDesc::klass_offset_in_bytes()));
 
   // Generate subtype check.  Blows ECX.  Resets EDI.  Object in EDX.
   // Superklass in EAX.  Subklass in EBX.
   __ gen_subtype_check( rbx, ok_is_subtype );
 
   // Come here on failure
-  __ pushl(rdx);
+  __ push(rdx);
   // object is at TOS
   __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
 
   // Come here on success
   __ bind(ok_is_subtype);
-  __ movl(rax,rdx);           // Restore object in EDX
+  __ mov(rax,rdx);           // Restore object in EDX
 
   // Collect counts on whether this check-cast sees NULLs a lot or not.
   if (ProfileInterpreter) {
     __ jmp(done);
     __ bind(is_null);

@@ -3246,11 +3364,11 @@
 
 
 void TemplateTable::instanceof() {
   transition(atos, itos);
   Label done, is_null, ok_is_subtype, quicked, resolved;
-  __ testl(rax, rax);
+  __ testptr(rax, rax);
   __ jcc(Assembler::zero, is_null);
 
   // Get cpool & tags index
   __ get_cpool_and_tags(rcx, rdx); // ECX=cpool, EDX=tags array
   __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // EBX=index

@@ -3259,17 +3377,17 @@
   __ jcc(Assembler::equal, quicked);
 
   __ push(atos);
   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
   __ pop_ptr(rdx);
-  __ movl(rdx, Address(rdx, oopDesc::klass_offset_in_bytes()));
+  __ movptr(rdx, Address(rdx, oopDesc::klass_offset_in_bytes()));
   __ jmp(resolved);
 
   // Get superklass in EAX and subklass in EDX
   __ bind(quicked);
-  __ movl(rdx, Address(rax, oopDesc::klass_offset_in_bytes()));
-  __ movl(rax, Address(rcx, rbx, Address::times_4, sizeof(constantPoolOopDesc)));
+  __ movptr(rdx, Address(rax, oopDesc::klass_offset_in_bytes()));
+  __ movptr(rax, Address(rcx, rbx, Address::times_ptr, sizeof(constantPoolOopDesc)));
 
   __ bind(resolved);
 
   // Generate subtype check.  Blows ECX.  Resets EDI.
   // Superklass in EAX.  Subklass in EDX.

@@ -3307,11 +3425,11 @@
   transition(vtos, vtos);
 
   // get the unpatched byte code
   __ get_method(rcx);
   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), rcx, rsi);
-  __ movl(rbx, rax);
+  __ mov(rbx, rax);
 
   // post the breakpoint event
   __ get_method(rcx);
   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), rcx, rsi);
 

@@ -3363,54 +3481,54 @@
   // initialize entry pointer
   __ xorl(rdx, rdx);                             // points to free slot or NULL
 
   // find a free slot in the monitor block (result in rdx)
   { Label entry, loop, exit;
-    __ movl(rcx, monitor_block_top);             // points to current entry, starting with top-most entry
-    __ leal(rbx, monitor_block_bot);             // points to word before bottom of monitor block
+    __ movptr(rcx, monitor_block_top);            // points to current entry, starting with top-most entry
+    __ lea(rbx, monitor_block_bot);               // points to word before bottom of monitor block
     __ jmpb(entry);
 
     __ bind(loop);
-    __ cmpl(Address(rcx, BasicObjectLock::obj_offset_in_bytes()), NULL_WORD);  // check if current entry is used
+    __ cmpptr(Address(rcx, BasicObjectLock::obj_offset_in_bytes()), (int32_t)NULL_WORD);  // check if current entry is used
 
 // TODO - need new func here - kbt
     if (VM_Version::supports_cmov()) {
-      __ cmovl(Assembler::equal, rdx, rcx);      // if not used then remember entry in rdx
+      __ cmov(Assembler::equal, rdx, rcx);       // if not used then remember entry in rdx
     } else {
       Label L;
       __ jccb(Assembler::notEqual, L);
-      __ movl(rdx, rcx);                         // if not used then remember entry in rdx
+      __ mov(rdx, rcx);                          // if not used then remember entry in rdx
       __ bind(L);
     }
-    __ cmpl(rax, Address(rcx, BasicObjectLock::obj_offset_in_bytes()));   // check if current entry is for same object
+    __ cmpptr(rax, Address(rcx, BasicObjectLock::obj_offset_in_bytes()));   // check if current entry is for same object
     __ jccb(Assembler::equal, exit);              // if same object then stop searching
-    __ addl(rcx, entry_size);                    // otherwise advance to next entry
+    __ addptr(rcx, entry_size);                  // otherwise advance to next entry
     __ bind(entry);
-    __ cmpl(rcx, rbx);                           // check if bottom reached
+    __ cmpptr(rcx, rbx);                         // check if bottom reached
     __ jcc(Assembler::notEqual, loop);           // if not at bottom then check this entry
     __ bind(exit);
   }
 
-  __ testl(rdx, rdx);                            // check if a slot has been found
+  __ testptr(rdx, rdx);                          // check if a slot has been found
   __ jccb(Assembler::notZero, allocated);         // if found, continue with that one
 
   // allocate one if there's no free slot
   { Label entry, loop;
     // 1. compute new pointers                   // rsp: old expression stack top
-    __ movl(rdx, monitor_block_bot);             // rdx: old expression stack bottom
-    __ subl(rsp, entry_size);                    // move expression stack top
-    __ subl(rdx, entry_size);                    // move expression stack bottom
-    __ movl(rcx, rsp);                           // set start value for copy loop
-    __ movl(monitor_block_bot, rdx);             // set new monitor block top
+    __ movptr(rdx, monitor_block_bot);           // rdx: old expression stack bottom
+    __ subptr(rsp, entry_size);                  // move expression stack top
+    __ subptr(rdx, entry_size);                  // move expression stack bottom
+    __ mov(rcx, rsp);                            // set start value for copy loop
+    __ movptr(monitor_block_bot, rdx);           // set new monitor block top
     __ jmp(entry);
     // 2. move expression stack contents
     __ bind(loop);
-    __ movl(rbx, Address(rcx, entry_size));      // load expression stack word from old location
-    __ movl(Address(rcx, 0), rbx);               // and store it at new location
-    __ addl(rcx, wordSize);                      // advance to next word
+    __ movptr(rbx, Address(rcx, entry_size));    // load expression stack word from old location
+    __ movptr(Address(rcx, 0), rbx);             // and store it at new location
+    __ addptr(rcx, wordSize);                    // advance to next word
     __ bind(entry);
-    __ cmpl(rcx, rdx);                           // check if bottom reached
+    __ cmpptr(rcx, rdx);                         // check if bottom reached
     __ jcc(Assembler::notEqual, loop);           // if not at bottom then copy next word
   }
   
   // call run-time routine
   // rdx: points to monitor entry

@@ -3418,11 +3536,11 @@
 
   // Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly. 
   // The object has already been poped from the stack, so the expression stack looks correct.
   __ increment(rsi);
 
-  __ movl(Address(rdx, BasicObjectLock::obj_offset_in_bytes()), rax);     // store object  
+  __ movptr(Address(rdx, BasicObjectLock::obj_offset_in_bytes()), rax);     // store object
   __ lock_object(rdx);  
 
   // check to make sure this monitor doesn't cause stack overflow after locking
   __ save_bcp();  // in case of exception
   __ generate_stack_overflow_check(0);

@@ -3443,20 +3561,20 @@
   const int entry_size =         (     frame::interpreter_frame_monitor_size()           * wordSize);
   Label found;
 
   // find matching slot
   { Label entry, loop;
-    __ movl(rdx, monitor_block_top);             // points to current entry, starting with top-most entry
-    __ leal(rbx, monitor_block_bot);             // points to word before bottom of monitor block
+    __ movptr(rdx, monitor_block_top);           // points to current entry, starting with top-most entry
+    __ lea(rbx, monitor_block_bot);             // points to word before bottom of monitor block
     __ jmpb(entry);
 
     __ bind(loop);
-    __ cmpl(rax, Address(rdx, BasicObjectLock::obj_offset_in_bytes()));   // check if current entry is for same object
+    __ cmpptr(rax, Address(rdx, BasicObjectLock::obj_offset_in_bytes()));   // check if current entry is for same object
     __ jcc(Assembler::equal, found);             // if same object then stop searching
-    __ addl(rdx, entry_size);                    // otherwise advance to next entry
+    __ addptr(rdx, entry_size);                  // otherwise advance to next entry
     __ bind(entry);
-    __ cmpl(rdx, rbx);                           // check if bottom reached
+    __ cmpptr(rdx, rbx);                         // check if bottom reached
     __ jcc(Assembler::notEqual, loop);           // if not at bottom then check this entry
   }
 
   // error handling. Unlocking was not block-structured
   Label end;

@@ -3477,11 +3595,12 @@
 // Wide instructions
 
 void TemplateTable::wide() {
   transition(vtos, vtos);
   __ load_unsigned_byte(rbx, at_bcp(1));
-  __ jmp(Address(noreg, rbx, Address::times_4, int(Interpreter::_wentry_point)));
+  ExternalAddress wtable((address)Interpreter::_wentry_point);
+  __ jump(ArrayAddress(wtable, Address(noreg, rbx, Address::times_ptr)));
   // Note: the rsi increment step is part of the individual wide bytecode implementations
 }
 
 
 //----------------------------------------------------------------------------------------------------

@@ -3491,12 +3610,12 @@
   transition(vtos, atos);
   __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
   // last dim is on top of stack; we want address of first one:
   // first_addr = last_addr + (ndims - 1) * stackElementSize - 1*wordsize
   // the latter wordSize to point to the beginning of the array.
-  __ leal(  rax, Address(rsp, rax, Interpreter::stackElementScale(), -wordSize));
+  __ lea(  rax, Address(rsp, rax, Interpreter::stackElementScale(), -wordSize));
   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), rax);     // pass in rax,
   __ load_unsigned_byte(rbx, at_bcp(3));
-  __ leal(rsp, Address(rsp, rbx, Interpreter::stackElementScale()));  // get rid of counts
+  __ lea(rsp, Address(rsp, rbx, Interpreter::stackElementScale()));  // get rid of counts
 }
 
 #endif /* !CC_INTERP */