# HG changeset patch
# User goetz
# Date 1429094696 -7200
# Node ID aff04f621af28240e37cab0ffa7fd090b5112e6b
# Parent  364dd48a2c48ace207c0aa886033a1bce448f1e6
8077838: Recent developments for ppc.
Reviewed-by: kvn

diff --git a/src/cpu/ppc/vm/assembler_ppc.cpp b/src/cpu/ppc/vm/assembler_ppc.cpp
--- a/src/cpu/ppc/vm/assembler_ppc.cpp
+++ b/src/cpu/ppc/vm/assembler_ppc.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,8 +85,7 @@
 }
 
 // Low-level andi-one-instruction-macro.
-void Assembler::andi(Register a, Register s, const int ui16) {
-  assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
+void Assembler::andi(Register a, Register s, const long ui16) {
   if (is_power_of_2_long(((jlong) ui16)+1)) {
     // pow2minus1
     clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
@@ -97,6 +96,7 @@
     // negpow2
     clrrdi(a, s, log2_long((jlong)-ui16));
   } else {
+    assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
     andi_(a, s, ui16);
   }
 }
@@ -356,7 +356,6 @@
 // 16 bit immediate offset.
 int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
   // Avoid accidentally trying to use R0 for indexed addressing.
-  assert(d != R0, "R0 not allowed");
   assert_different_registers(d, tmp);
 
   short xa, xb, xc, xd; // Four 16-bit chunks of const.
@@ -370,6 +369,58 @@
     return 0;
   }
 
+  int retval = 0;
+  if (return_simm16_rest) {
+    retval = xd;
+    x = rem << 16;
+    xd = 0;
+  }
+
+  if (d == R0) { // Can't use addi.
+    if (is_simm(x, 32)) { // opt 2: simm32
+      lis(d, x >> 16);
+      if (xd) ori(d, d, (unsigned short)xd);
+    } else {
+      // 64-bit value: x = xa xb xc xd
+      xa = (x >> 48) & 0xffff;
+      xb = (x >> 32) & 0xffff;
+      xc = (x >> 16) & 0xffff;
+      bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
+      if (tmp == noreg || (xc == 0 && xd == 0)) {
+        if (xa_loaded) {
+          lis(d, xa);
+          if (xb) { ori(d, d, (unsigned short)xb); }
+        } else {
+          li(d, xb);
+        }
+        sldi(d, d, 32);
+        if (xc) { oris(d, d, (unsigned short)xc); }
+        if (xd) { ori( d, d, (unsigned short)xd); }
+      } else {
+        // Exploit instruction level parallelism if we have a tmp register.
+        bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
+        if (xa_loaded) {
+          lis(tmp, xa);
+        }
+        if (xc_loaded) {
+          lis(d, xc);
+        }
+        if (xa_loaded) {
+          if (xb) { ori(tmp, tmp, (unsigned short)xb); }
+        } else {
+          li(tmp, xb);
+        }
+        if (xc_loaded) {
+          if (xd) { ori(d, d, (unsigned short)xd); }
+        } else {
+          li(d, xd);
+        }
+        insrdi(d, tmp, 32, 0);
+      }
+    }
+    return retval;
+  }
+
   xc = rem & 0xFFFF; // Next 16-bit chunk.
   rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
 
@@ -377,28 +428,27 @@
     lis(d, xc);
   } else { // High 32 bits needed.
 
-    if (tmp != noreg) { // opt 3: We have a temp reg.
+    if (tmp != noreg  && (int)x != 0) { // opt 3: We have a temp reg.
       // No carry propagation between xc and higher chunks here (use logical instructions).
       xa = (x >> 48) & 0xffff;
       xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
-      bool load_xa = (xa != 0) || (xb < 0);
+      bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
       bool return_xd = false;
 
-      if (load_xa) { lis(tmp, xa); }
+      if (xa_loaded) { lis(tmp, xa); }
       if (xc) { lis(d, xc); }
-      if (load_xa) {
+      if (xa_loaded) {
         if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
       } else {
-        li(tmp, xb); // non-negative
+        li(tmp, xb);
       }
       if (xc) {
-        if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
-        else if (xd) { addi(d, d, xd); }
+        if (xd) { addi(d, d, xd); }
       } else {
         li(d, xd);
       }
       insrdi(d, tmp, 32, 0);
-      return return_xd ? xd : 0; // non-negative
+      return retval;
     }
 
     xb = rem & 0xFFFF; // Next 16-bit chunk.
@@ -417,11 +467,51 @@
     if (xc) { addis(d, d, xc); }
   }
 
-  // opt 5: Return offset to be inserted into following instruction.
-  if (return_simm16_rest) return xd;
+  if (xd) { addi(d, d, xd); }
+  return retval;
+}
 
-  if (xd) { addi(d, d, xd); }
-  return 0;
+// We emit only one addition to s to optimize latency.
+int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
+  assert(s != R0 && s != tmp, "unsupported");
+  long rem = x;
+
+  // Case 1: Can use mr or addi.
+  short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
+  rem = (rem >> 16) + ((unsigned short)xd >> 15);
+  if (rem == 0) {
+    if (xd == 0) {
+      if (d != s) { mr(d, s); }
+      return 0;
+    }
+    if (return_simm16_rest) {
+      return xd;
+    }
+    addi(d, s, xd);
+    return 0;
+  }
+
+  // Case 2: Can use addis.
+  if (xd == 0) {
+    short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
+    rem = (rem >> 16) + ((unsigned short)xd >> 15);
+    if (rem == 0) {
+      addis(d, s, xc);
+      return 0;
+    }
+  }
+
+  // Other cases: load & add.
+  Register tmp1 = tmp,
+           tmp2 = noreg;
+  if ((d != tmp) && (d != s)) {
+    // Can use d.
+    tmp1 = d;
+    tmp2 = tmp;
+  }
+  int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
+  add(d, tmp1, s);
+  return simm16_rest;
 }
 
 #ifndef PRODUCT
diff --git a/src/cpu/ppc/vm/assembler_ppc.hpp b/src/cpu/ppc/vm/assembler_ppc.hpp
--- a/src/cpu/ppc/vm/assembler_ppc.hpp
+++ b/src/cpu/ppc/vm/assembler_ppc.hpp
@@ -224,10 +224,13 @@
     ADDIS_OPCODE  = (15u << OPCODE_SHIFT),
     ADDIC__OPCODE = (13u << OPCODE_SHIFT),
     ADDE_OPCODE   = (31u << OPCODE_SHIFT | 138u << 1),
+    ADDME_OPCODE  = (31u << OPCODE_SHIFT | 234u << 1),
+    ADDZE_OPCODE  = (31u << OPCODE_SHIFT | 202u << 1),
     SUBF_OPCODE   = (31u << OPCODE_SHIFT |  40u << 1),
     SUBFC_OPCODE  = (31u << OPCODE_SHIFT |   8u << 1),
     SUBFE_OPCODE  = (31u << OPCODE_SHIFT | 136u << 1),
     SUBFIC_OPCODE = (8u  << OPCODE_SHIFT),
+    SUBFME_OPCODE = (31u << OPCODE_SHIFT | 232u << 1),
     SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1),
     DIVW_OPCODE   = (31u << OPCODE_SHIFT | 491u << 1),
     MULLW_OPCODE  = (31u << OPCODE_SHIFT | 235u << 1),
@@ -657,6 +660,9 @@
     SYNC_OPCODE    = (31u << OPCODE_SHIFT |  598u << 1),
     EIEIO_OPCODE   = (31u << OPCODE_SHIFT |  854u << 1),
 
+    // Wait instructions for polling.
+    WAIT_OPCODE    = (31u << OPCODE_SHIFT |   62u << 1),
+
     // Trap instructions
     TDI_OPCODE     = (2u  << OPCODE_SHIFT),
     TWI_OPCODE     = (3u  << OPCODE_SHIFT),
@@ -666,8 +672,10 @@
     // Atomics.
     LWARX_OPCODE   = (31u << OPCODE_SHIFT |   20u << 1),
     LDARX_OPCODE   = (31u << OPCODE_SHIFT |   84u << 1),
+    LQARX_OPCODE   = (31u << OPCODE_SHIFT |  276u << 1),
     STWCX_OPCODE   = (31u << OPCODE_SHIFT |  150u << 1),
-    STDCX_OPCODE   = (31u << OPCODE_SHIFT |  214u << 1)
+    STDCX_OPCODE   = (31u << OPCODE_SHIFT |  214u << 1),
+    STQCX_OPCODE   = (31u << OPCODE_SHIFT |  182u << 1)
 
   };
 
@@ -1171,6 +1179,14 @@
   inline void adde_(  Register d, Register a, Register b);
   inline void subfe(  Register d, Register a, Register b);
   inline void subfe_( Register d, Register a, Register b);
+  inline void addme(  Register d, Register a);
+  inline void addme_( Register d, Register a);
+  inline void subfme( Register d, Register a);
+  inline void subfme_(Register d, Register a);
+  inline void addze(  Register d, Register a);
+  inline void addze_( Register d, Register a);
+  inline void subfze( Register d, Register a);
+  inline void subfze_(Register d, Register a);
   inline void neg(    Register d, Register a);
   inline void neg_(   Register d, Register a);
   inline void mulli(  Register d, Register a, int si16);
@@ -1189,6 +1205,38 @@
   inline void divw(   Register d, Register a, Register b);
   inline void divw_(  Register d, Register a, Register b);
 
+  // Fixed-Point Arithmetic Instructions with Overflow detection
+  inline void addo(    Register d, Register a, Register b);
+  inline void addo_(   Register d, Register a, Register b);
+  inline void subfo(   Register d, Register a, Register b);
+  inline void subfo_(  Register d, Register a, Register b);
+  inline void addco(   Register d, Register a, Register b);
+  inline void addco_(  Register d, Register a, Register b);
+  inline void subfco(  Register d, Register a, Register b);
+  inline void subfco_( Register d, Register a, Register b);
+  inline void addeo(   Register d, Register a, Register b);
+  inline void addeo_(  Register d, Register a, Register b);
+  inline void subfeo(  Register d, Register a, Register b);
+  inline void subfeo_( Register d, Register a, Register b);
+  inline void addmeo(  Register d, Register a);
+  inline void addmeo_( Register d, Register a);
+  inline void subfmeo( Register d, Register a);
+  inline void subfmeo_(Register d, Register a);
+  inline void addzeo(  Register d, Register a);
+  inline void addzeo_( Register d, Register a);
+  inline void subfzeo( Register d, Register a);
+  inline void subfzeo_(Register d, Register a);
+  inline void nego(    Register d, Register a);
+  inline void nego_(   Register d, Register a);
+  inline void mulldo(  Register d, Register a, Register b);
+  inline void mulldo_( Register d, Register a, Register b);
+  inline void mullwo(  Register d, Register a, Register b);
+  inline void mullwo_( Register d, Register a, Register b);
+  inline void divdo(   Register d, Register a, Register b);
+  inline void divdo_(  Register d, Register a, Register b);
+  inline void divwo(   Register d, Register a, Register b);
+  inline void divwo_(  Register d, Register a, Register b);
+
   // extended mnemonics
   inline void li(   Register d, int si16);
   inline void lis(  Register d, int si16);
@@ -1303,7 +1351,7 @@
   inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
 
   // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
-         void andi(   Register a, Register s, int ui16);   // optimized version
+         void andi(   Register a, Register s, long ui16);   // optimized version
   inline void andi_(  Register a, Register s, int ui16);
   inline void andis_( Register a, Register s, int ui16);
   inline void ori(    Register a, Register s, int ui16);
@@ -1688,14 +1736,21 @@
   inline void isync();
   inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
 
+  // Wait instructions for polling. Attention: May result in SIGILL.
+  inline void wait();
+  inline void waitrsv(); // >=Power7
+
   // atomics
   inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
   inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
+  inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
   inline bool lxarx_hint_exclusive_access();
   inline void lwarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
   inline void ldarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
+  inline void lqarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
   inline void stwcx_( Register s, Register a, Register b);
   inline void stdcx_( Register s, Register a, Register b);
+  inline void stqcx_( Register s, Register a, Register b);
 
   // Instructions for adjusting thread priority for simultaneous
   // multithreading (SMT) on Power5.
@@ -2054,10 +2109,13 @@
   // Atomics: use ra0mem to disallow R0 as base.
   inline void lwarx_unchecked(Register d, Register b, int eh1);
   inline void ldarx_unchecked(Register d, Register b, int eh1);
+  inline void lqarx_unchecked(Register d, Register b, int eh1);
   inline void lwarx( Register d, Register b, bool hint_exclusive_access);
   inline void ldarx( Register d, Register b, bool hint_exclusive_access);
+  inline void lqarx( Register d, Register b, bool hint_exclusive_access);
   inline void stwcx_(Register s, Register b);
   inline void stdcx_(Register s, Register b);
+  inline void stqcx_(Register s, Register b);
   inline void lfs(   FloatRegister d, int si16);
   inline void lfsx(  FloatRegister d, Register b);
   inline void lfd(   FloatRegister d, int si16);
@@ -2120,6 +2178,20 @@
     return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest);
   }
 
+  // If return_simm16_rest, the return value needs to get added afterwards.
+         int add_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false);
+  inline int add_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
+    return add_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
+  }
+
+  // If return_simm16_rest, the return value needs to get added afterwards.
+  inline int sub_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false) {
+    return add_const_optimized(d, s, -x, tmp, return_simm16_rest);
+  }
+  inline int sub_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
+    return sub_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
+  }
+
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 #ifdef CHECK_DELAY
diff --git a/src/cpu/ppc/vm/assembler_ppc.inline.hpp b/src/cpu/ppc/vm/assembler_ppc.inline.hpp
--- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp
+++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp
@@ -100,6 +100,14 @@
 inline void Assembler::adde_(  Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
 inline void Assembler::subfe(  Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
 inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::addme(  Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::addme_( Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(1)); }
+inline void Assembler::subfme( Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::subfme_(Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(0) | rc(1)); }
+inline void Assembler::addze(  Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::addze_( Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(1)); }
+inline void Assembler::subfze( Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::subfze_(Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(0) | rc(1)); }
 inline void Assembler::neg(    Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(0) | rc(0)); }
 inline void Assembler::neg_(   Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(0) | rc(1)); }
 inline void Assembler::mulli(  Register d, Register a, int si16)   { emit_int32(MULLI_OPCODE  | rt(d) | ra(a) | simm(si16, 16)); }
@@ -118,6 +126,38 @@
 inline void Assembler::divw(   Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
 inline void Assembler::divw_(  Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
 
+// Fixed-Point Arithmetic Instructions with Overflow detection
+inline void Assembler::addo(    Register d, Register a, Register b) { emit_int32(ADD_OPCODE    | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addo_(   Register d, Register a, Register b) { emit_int32(ADD_OPCODE    | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfo(   Register d, Register a, Register b) { emit_int32(SUBF_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfo_(  Register d, Register a, Register b) { emit_int32(SUBF_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addco(   Register d, Register a, Register b) { emit_int32(ADDC_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addco_(  Register d, Register a, Register b) { emit_int32(ADDC_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfco(  Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfco_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addeo(   Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addeo_(  Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfeo(  Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfeo_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addmeo(  Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::addmeo_( Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::subfmeo( Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::subfmeo_(Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::addzeo(  Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::addzeo_( Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::subfzeo( Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::subfzeo_(Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::nego(    Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(1) | rc(0)); }
+inline void Assembler::nego_(   Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(1) | rc(1)); }
+inline void Assembler::mulldo(  Register d, Register a, Register b) { emit_int32(MULLD_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::mulldo_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::mullwo(  Register d, Register a, Register b) { emit_int32(MULLW_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::mullwo_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::divdo(   Register d, Register a, Register b) { emit_int32(DIVD_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::divdo_(  Register d, Register a, Register b) { emit_int32(DIVD_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::divwo(   Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::divwo_(  Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+
 // extended mnemonics
 inline void Assembler::li(   Register d, int si16)             { Assembler::addi_r0ok( d, R0, si16); }
 inline void Assembler::lis(  Register d, int si16)             { Assembler::addis_r0ok(d, R0, si16); }
@@ -540,15 +580,22 @@
 inline void Assembler::isync()     { emit_int32( ISYNC_OPCODE); }
 inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); }
 
+// Wait instructions for polling.
+inline void Assembler::wait()    { emit_int32( WAIT_OPCODE); }
+inline void Assembler::waitrsv() { emit_int32( WAIT_OPCODE | 1<<(31-10)); } // WC=0b01 >=Power7
+
 // atomics
 // Use ra0mem to disallow R0 as base.
 inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
 inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
+inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
 inline bool Assembler::lxarx_hint_exclusive_access()                                          { return VM_Version::has_lxarxeh(); }
 inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::stwcx_(Register s, Register a, Register b)                             { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
 inline void Assembler::stdcx_(Register s, Register a, Register b)                             { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
+inline void Assembler::stqcx_(Register s, Register a, Register b)                             { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
 
 // Instructions for adjusting thread priority
 // for simultaneous multithreading (SMT) on POWER5.
@@ -873,10 +920,13 @@
 // ra0 version
 inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
 inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
+inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
 inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::stwcx_(Register s, Register b)                            { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); }
 inline void Assembler::stdcx_(Register s, Register b)                            { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::stqcx_(Register s, Register b)                            { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); }
 
 // ra0 version
 inline void Assembler::lfs( FloatRegister d, int si16)   { emit_int32( LFS_OPCODE  | frt(d) | simm(si16,16)); }
diff --git a/src/cpu/ppc/vm/c2_globals_ppc.hpp b/src/cpu/ppc/vm/c2_globals_ppc.hpp
--- a/src/cpu/ppc/vm/c2_globals_ppc.hpp
+++ b/src/cpu/ppc/vm/c2_globals_ppc.hpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,7 @@
 define_pd_global(intx, FLOATPRESSURE,                28);
 define_pd_global(intx, FreqInlineSize,               175);
 define_pd_global(intx, MinJumpTableSize,             10);
-define_pd_global(intx, INTPRESSURE,                  25);
+define_pd_global(intx, INTPRESSURE,                  26);
 define_pd_global(intx, InteriorEntryAlignment,       16);
 define_pd_global(size_t, NewSizeThreadIncrease,      ScaleForWordSize(4*K));
 define_pd_global(intx, RegisterCostAreaRatio,        16000);
diff --git a/src/cpu/ppc/vm/globals_ppc.hpp b/src/cpu/ppc/vm/globals_ppc.hpp
--- a/src/cpu/ppc/vm/globals_ppc.hpp
+++ b/src/cpu/ppc/vm/globals_ppc.hpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,7 +58,7 @@
 // GC Ergo Flags
 define_pd_global(size_t, CMSYoungGenPerWorker, 16*M);  // Default max size of CMS young gen, per GC worker thread.
 
-define_pd_global(uintx, TypeProfileLevel, 0);
+define_pd_global(uintx, TypeProfileLevel, 111);
 
 // Platform dependent flag handling: flags only defined on this platform.
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct)  \
@@ -71,14 +71,26 @@
                                                                             \
   product(uintx, PowerArchitecturePPC64, 0,                                 \
           "CPU Version: x for PowerX. Currently recognizes Power5 to "      \
-          "Power7. Default is 0. CPUs newer than Power7 will be "           \
-          "recognized as Power7.")                                          \
+          "Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
                                                                             \
   /* Reoptimize code-sequences of calls at runtime, e.g. replace an */      \
   /* indirect call by a direct call.                                */      \
   product(bool, ReoptimizeCallSequences, true,                              \
           "Reoptimize code-sequences of calls at runtime.")                 \
                                                                             \
+  /* Power 8: Configure Data Stream Control Register. */                    \
+  product(uint64_t,DSCR_PPC64, (uintx)-1,                                   \
+          "Power8 or later: Specify encoded value for Data Stream Control " \
+          "Register")                                                       \
+  product(uint64_t,DSCR_DPFD_PPC64, 8,                                      \
+          "Power8 or later: DPFD (default prefetch depth) value of the "    \
+          "Data Stream Control Register."                                   \
+          " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch")    \
+  product(uint64_t,DSCR_URG_PPC64, 8,                                       \
+          "Power8 or later: URG (depth attainment urgency) value of the "   \
+          "Data Stream Control Register."                                   \
+          " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch")    \
+                                                                            \
   product(bool, UseLoadInstructionsForStackBangingPPC64, false,             \
           "Use load instructions for stack banging.")                       \
                                                                             \
@@ -121,6 +133,41 @@
                                                                             \
   product(bool, ZapMemory, false, "Write 0x0101... to empty memory."        \
           " Use this to ease debugging.")                                   \
-
+                                                                            \
+  /* Use Restricted Transactional Memory for lock eliding */                \
+  product(bool, UseRTMLocking, false,                                       \
+          "Enable RTM lock eliding for inflated locks in compiled code")    \
+                                                                            \
+  experimental(bool, UseRTMForStackLocks, false,                            \
+          "Enable RTM lock eliding for stack locks in compiled code")       \
+                                                                            \
+  product(bool, UseRTMDeopt, false,                                         \
+          "Perform deopt and recompilation based on RTM abort ratio")       \
+                                                                            \
+  product(uintx, RTMRetryCount, 5,                                          \
+          "Number of RTM retries on lock abort or busy")                    \
+                                                                            \
+  experimental(intx, RTMSpinLoopCount, 100,                                 \
+          "Spin count for lock to become free before RTM retry")            \
+                                                                            \
+  experimental(intx, RTMAbortThreshold, 1000,                               \
+          "Calculate abort ratio after this number of aborts")              \
+                                                                            \
+  experimental(intx, RTMLockingThreshold, 10000,                            \
+          "Lock count at which to do RTM lock eliding without "             \
+          "abort ratio calculation")                                        \
+                                                                            \
+  experimental(intx, RTMAbortRatio, 50,                                     \
+          "Lock abort ratio at which to stop use RTM lock eliding")         \
+                                                                            \
+  experimental(intx, RTMTotalCountIncrRate, 64,                             \
+          "Increment total RTM attempted lock count once every n times")    \
+                                                                            \
+  experimental(intx, RTMLockingCalculationDelay, 0,                         \
+          "Number of milliseconds to wait before start calculating aborts " \
+          "for RTM locking")                                                \
+                                                                            \
+  experimental(bool, UseRTMXendForLockBusy, true,                           \
+          "Use RTM Xend instead of Xabort when lock busy")                  \
 
 #endif // CPU_PPC_VM_GLOBALS_PPC_HPP
diff --git a/src/cpu/ppc/vm/interp_masm_ppc_64.cpp b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
@@ -446,7 +446,7 @@
 }
 
 // Load object from cpool->resolved_references(index).
-void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
+void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index, Label *is_null) {
   assert_different_registers(result, index);
   get_constant_pool(result);
 
@@ -469,7 +469,7 @@
 #endif
   // Add in the index.
   add(result, tmp, result);
-  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
+  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, is_null);
 }
 
 // Generate a subtype check: branch to ok_is_subtype if sub_klass is
@@ -876,7 +876,6 @@
     // If condition is true we are done and hence we can store 0 in the displaced
     // header indicating it is a recursive lock.
     bne(CCR0, slow_case);
-    release();
     std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
         BasicLock::displaced_header_offset_in_bytes(), monitor);
     b(done);
@@ -1861,7 +1860,7 @@
     const Register mdp = tmp1;
     add(mdp, tmp1, R28_mdx);
 
-    // Pffset of the current profile entry to update.
+    // Offset of the current profile entry to update.
     const Register entry_offset = tmp2;
     // entry_offset = array len in number of cells
     ld(entry_offset, in_bytes(ArrayData::array_len_offset()), mdp);
diff --git a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,7 +85,7 @@
                          Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype);
 
   // Load object from cpool->resolved_references(index).
-  void load_resolved_reference_at_index(Register result, Register index);
+  void load_resolved_reference_at_index(Register result, Register index, Label *is_null = NULL);
 
   void generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1);
   void load_receiver(Register Rparam_count, Register Rrecv_dst);
diff --git a/src/cpu/ppc/vm/interpreter_ppc.hpp b/src/cpu/ppc/vm/interpreter_ppc.hpp
--- a/src/cpu/ppc/vm/interpreter_ppc.hpp
+++ b/src/cpu/ppc/vm/interpreter_ppc.hpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,4 +47,4 @@
   }
 #endif
 
-#endif // CPU_PPC_VM_INTERPRETER_PPC_PP
+#endif // CPU_PPC_VM_INTERPRETER_PPC_HPP
diff --git a/src/cpu/ppc/vm/macroAssembler_ppc.cpp b/src/cpu/ppc/vm/macroAssembler_ppc.cpp
--- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1455,7 +1455,7 @@
 // Several special cases exist to avoid that unnecessary information is generated.
 //
 void MacroAssembler::cmpxchgd(ConditionRegister flag,
-                              Register dest_current_value, Register compare_value, Register exchange_value,
+                              Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
                               Register addr_base, int semantics, bool cmpxchgx_hint,
                               Register int_flag_success, Label* failed_ext, bool contention_hint) {
   Label retry;
@@ -1465,7 +1465,7 @@
 
   // Save one branch if result is returned via register and result register is different from the other ones.
   bool use_result_reg    = (int_flag_success!=noreg);
-  bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value &&
+  bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() &&
                             int_flag_success!=exchange_value && int_flag_success!=addr_base);
   assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
 
@@ -1481,7 +1481,7 @@
   // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
   if (contention_hint) { // Don't try to reserve if cmp fails.
     ld(dest_current_value, 0, addr_base);
-    cmpd(flag, dest_current_value, compare_value);
+    cmpd(flag, compare_value, dest_current_value);
     bne(flag, failed);
   }
 
@@ -1489,7 +1489,7 @@
   bind(retry);
 
   ldarx(dest_current_value, addr_base, cmpxchgx_hint);
-  cmpd(flag, dest_current_value, compare_value);
+  cmpd(flag, compare_value, dest_current_value);
   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
     bne_predict_not_taken(flag, failed);
   } else {
@@ -1873,7 +1873,6 @@
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
-  fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
            /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
            /*where=*/obj_reg,
@@ -1909,7 +1908,6 @@
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
-  fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
                  /*where=*/obj_reg,
@@ -1946,7 +1944,6 @@
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
-  fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
                  /*where=*/obj_reg,
@@ -1987,9 +1984,371 @@
   beq(cr_reg, done);
 }
 
+// TM on PPC64.
+void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
+  Label retry;
+  bind(retry);
+  ldarx(result, addr, /*hint*/ false);
+  addi(result, result, simm16);
+  stdcx_(result, addr);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
+  } else {
+    bne(                  CCR0, retry); // stXcx_ sets CCR0
+  }
+}
+
+void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
+  Label retry;
+  bind(retry);
+  lwarx(result, addr, /*hint*/ false);
+  ori(result, result, uimm16);
+  stwcx_(result, addr);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
+  } else {
+    bne(                  CCR0, retry); // stXcx_ sets CCR0
+  }
+}
+
+#if INCLUDE_RTM_OPT
+
+// Update rtm_counters based on abort status
+// input: abort_status
+//        rtm_counters (RTMLockingCounters*)
+void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
+  // Mapping to keep PreciseRTMLockingStatistics similar to x86.
+  // x86 ppc (! means inverted, ? means not the same)
+  //  0   31  Set if abort caused by XABORT instruction.
+  //  1  ! 7  If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
+  //  2   13  Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
+  //  3   10  Set if an internal buffer overflowed.
+  //  4  ?12  Set if a debug breakpoint was hit.
+  //  5  ?32  Set if an abort occurred during execution of a nested transaction.
+  const  int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too.
+                                 Assembler::tm_failure_persistent, // inverted: transient
+                                 Assembler::tm_trans_cf,
+                                 Assembler::tm_footprint_of,
+                                 Assembler::tm_non_trans_cf,
+                                 Assembler::tm_suspended};
+  const bool tm_failure_inv[] = {false, true, false, false, false, false};
+  assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!");
+
+  const Register addr_Reg = R0;
+  // Keep track of offset to where rtm_counters_Reg had pointed to.
+  int counters_offs = RTMLockingCounters::abort_count_offset();
+  addi(addr_Reg, rtm_counters_Reg, counters_offs);
+  const Register temp_Reg = rtm_counters_Reg;
+
+  //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
+  ldx(temp_Reg, addr_Reg);
+  addi(temp_Reg, temp_Reg, 1);
+  stdx(temp_Reg, addr_Reg);
+
+  if (PrintPreciseRTMLockingStatistics) {
+    int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs;
+
+    //mftexasr(abort_status); done by caller
+    for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
+      counters_offs += counters_offs_delta;
+      li(temp_Reg, counters_offs_delta); // can't use addi with R0
+      add(addr_Reg, addr_Reg, temp_Reg); // point to next counter
+      counters_offs_delta = sizeof(uintx);
+
+      Label check_abort;
+      rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0);
+      if (tm_failure_inv[i]) {
+        bne(CCR0, check_abort);
+      } else {
+        beq(CCR0, check_abort);
+      }
+      //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
+      ldx(temp_Reg, addr_Reg);
+      addi(temp_Reg, temp_Reg, 1);
+      stdx(temp_Reg, addr_Reg);
+      bind(check_abort);
+    }
+  }
+  li(temp_Reg, -counters_offs); // can't use addi with R0
+  add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore
+}
+
+// Branch if (random & (count-1) != 0), count is 2^n
+// tmp and CR0 are killed
+void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
+  mftb(tmp);
+  andi_(tmp, tmp, count-1);
+  bne(CCR0, brLabel);
+}
+
+// Perform abort ratio calculation, set no_rtm bit if high ratio.
+// input:  rtm_counters_Reg (RTMLockingCounters* address) - KILLED
+void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
+                                                 RTMLockingCounters* rtm_counters,
+                                                 Metadata* method_data) {
+  Label L_done, L_check_always_rtm1, L_check_always_rtm2;
+
+  if (RTMLockingCalculationDelay > 0) {
+    // Delay calculation.
+    ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
+    cmpdi(CCR0, rtm_counters_Reg, 0);
+    beq(CCR0, L_done);
+    load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
+  }
+  // Abort ratio calculation only if abort_count > RTMAbortThreshold.
+  //   Aborted transactions = abort_count * 100
+  //   All transactions = total_count *  RTMTotalCountIncrRate
+  //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
+  ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
+  cmpdi(CCR0, R0, RTMAbortThreshold);
+  blt(CCR0, L_check_always_rtm2);
+  mulli(R0, R0, 100);
+
+  const Register tmpReg = rtm_counters_Reg;
+  ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
+  mulli(tmpReg, tmpReg, RTMTotalCountIncrRate);
+  mulli(tmpReg, tmpReg, RTMAbortRatio);
+  cmpd(CCR0, R0, tmpReg);
+  blt(CCR0, L_check_always_rtm1); // jump to reload
+  if (method_data != NULL) {
+    // Set rtm_state to "no rtm" in MDO.
+    // Not using a metadata relocation. Method and Class Loader are kept alive anyway.
+    // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
+    load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
+    atomic_ori_int(R0, tmpReg, NoRTM);
+  }
+  b(L_done);
+
+  bind(L_check_always_rtm1);
+  load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
+  bind(L_check_always_rtm2);
+  ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
+  cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
+  blt(CCR0, L_done);
+  if (method_data != NULL) {
+    // Set rtm_state to "always rtm" in MDO.
+    // Not using a metadata relocation. See above.
+    load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
+    atomic_ori_int(R0, tmpReg, UseRTM);
+  }
+  bind(L_done);
+}
+
+// Update counters and perform abort ratio calculation.
+// input: abort_status_Reg
+void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
+                                   RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data,
+                                   bool profile_rtm) {
+
+  assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+  // Update rtm counters based on state at abort.
+  // Reads abort_status_Reg, updates flags.
+  assert_different_registers(abort_status_Reg, temp_Reg);
+  load_const_optimized(temp_Reg, (address)rtm_counters, R0);
+  rtm_counters_update(abort_status_Reg, temp_Reg);
+  if (profile_rtm) {
+    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+    rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
+  }
+}
+
+// Retry on abort if abort's status indicates non-persistent failure.
+// inputs: retry_count_Reg
+//       : abort_status_Reg
+// output: retry_count_Reg decremented by 1
+void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
+                                             Label& retryLabel, Label* checkRetry) {
+  Label doneRetry;
+  rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
+  bne(CCR0, doneRetry);
+  if (checkRetry) { bind(*checkRetry); }
+  addic_(retry_count_Reg, retry_count_Reg, -1);
+  blt(CCR0, doneRetry);
+  smt_yield(); // Can't use wait(). No permission (SIGILL).
+  b(retryLabel);
+  bind(doneRetry);
+}
+
+// Spin and retry if lock is busy.
+// inputs: box_Reg (monitor address)
+//       : retry_count_Reg
+// output: retry_count_Reg decremented by 1
+// CTR is killed
+void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
+  Label SpinLoop, doneRetry;
+  addic_(retry_count_Reg, retry_count_Reg, -1);
+  blt(CCR0, doneRetry);
+  li(R0, RTMSpinLoopCount);
+  mtctr(R0);
+
+  bind(SpinLoop);
+  smt_yield(); // Can't use waitrsv(). No permission (SIGILL).
+  bdz(retryLabel);
+  ld(R0, 0, owner_addr_Reg);
+  cmpdi(CCR0, R0, 0);
+  bne(CCR0, SpinLoop);
+  b(retryLabel);
+
+  bind(doneRetry);
+}
+
+// Use RTM for normal stack locks.
+// Input: objReg (object to lock)
+void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
+                                       Register obj, Register mark_word, Register tmp,
+                                       Register retry_on_abort_count_Reg,
+                                       RTMLockingCounters* stack_rtm_counters,
+                                       Metadata* method_data, bool profile_rtm,
+                                       Label& DONE_LABEL, Label& IsInflated) {
+  assert(UseRTMForStackLocks, "why call this otherwise?");
+  assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+  Label L_rtm_retry, L_decrement_retry, L_on_abort;
+
+  if (RTMRetryCount > 0) {
+    load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
+    bind(L_rtm_retry);
+  }
+  andi_(R0, mark_word, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
+  bne(CCR0, IsInflated);
+
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    Label L_noincrement;
+    if (RTMTotalCountIncrRate > 1) {
+      branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement);
+    }
+    assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
+    load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
+    //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
+    ldx(mark_word, tmp);
+    addi(mark_word, mark_word, 1);
+    stdx(mark_word, tmp);
+    bind(L_noincrement);
+  }
+  tbegin_();
+  beq(CCR0, L_on_abort);
+  ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);         // Reload in transaction, conflicts need to be tracked.
+  andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+  cmpwi(flag, R0, markOopDesc::unlocked_value);                // bits = 001 unlocked
+  beq(flag, DONE_LABEL);                                       // all done if unlocked
+
+  if (UseRTMXendForLockBusy) {
+    tend_();
+    b(L_decrement_retry);
+  } else {
+    tabort_();
+  }
+  bind(L_on_abort);
+  const Register abort_status_Reg = tmp;
+  mftexasr(abort_status_Reg);
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
+  }
+  ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
+  if (RTMRetryCount > 0) {
+    // Retry on lock abort if abort status is not permanent.
+    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
+  } else {
+    bind(L_decrement_retry);
+  }
+}
+
+// Use RTM for inflating locks
+// inputs: obj       (object to lock)
+//         mark_word (current header - KILLED)
+//         boxReg    (on-stack box address (displaced header location) - KILLED)
+void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
+                                          Register obj, Register mark_word, Register boxReg,
+                                          Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
+                                          RTMLockingCounters* rtm_counters,
+                                          Metadata* method_data, bool profile_rtm,
+                                          Label& DONE_LABEL) {
+  assert(UseRTMLocking, "why call this otherwise?");
+  Label L_rtm_retry, L_decrement_retry, L_on_abort;
+  // Clean monitor_value bit to get valid pointer.
+  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+
+  // Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark().
+  std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
+  const Register tmpReg = boxReg;
+  const Register owner_addr_Reg = mark_word;
+  addi(owner_addr_Reg, mark_word, owner_offset);
+
+  if (RTMRetryCount > 0) {
+    load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy.
+    load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
+    bind(L_rtm_retry);
+  }
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    Label L_noincrement;
+    if (RTMTotalCountIncrRate > 1) {
+      branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement);
+    }
+    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+    load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
+    //atomic_inc_ptr(R0, tmpReg); We don't increment atomically
+    ldx(tmpReg, R0);
+    addi(tmpReg, tmpReg, 1);
+    stdx(tmpReg, R0);
+    bind(L_noincrement);
+  }
+  tbegin_();
+  beq(CCR0, L_on_abort);
+  // We don't reload mark word. Will only be reset at safepoint.
+  ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
+  cmpdi(flag, R0, 0);
+  beq(flag, DONE_LABEL);
+
+  if (UseRTMXendForLockBusy) {
+    tend_();
+    b(L_decrement_retry);
+  } else {
+    tabort_();
+  }
+  bind(L_on_abort);
+  const Register abort_status_Reg = tmpReg;
+  mftexasr(abort_status_Reg);
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
+    // Restore owner_addr_Reg
+    ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
+#ifdef ASSERT
+    andi_(R0, mark_word, markOopDesc::monitor_value);
+    asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint.
+#endif
+    addi(owner_addr_Reg, mark_word, owner_offset);
+  }
+  if (RTMRetryCount > 0) {
+    // Retry on lock abort if abort status is not permanent.
+    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
+  }
+
+  // Appears unlocked - try to swing _owner from null to non-null.
+  cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
+           MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+           MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
+
+  if (RTMRetryCount > 0) {
+    // success done else retry
+    b(DONE_LABEL);
+    bind(L_decrement_retry);
+    // Spin and retry if lock is busy.
+    rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
+  } else {
+    bind(L_decrement_retry);
+  }
+}
+
+#endif //  INCLUDE_RTM_OPT
+
 // "The box" is the space on the stack where we copy the object mark.
 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
-                                               Register temp, Register displaced_header, Register current_header) {
+                                               Register temp, Register displaced_header, Register current_header,
+                                               bool try_bias,
+                                               RTMLockingCounters* rtm_counters,
+                                               RTMLockingCounters* stack_rtm_counters,
+                                               Metadata* method_data,
+                                               bool use_rtm, bool profile_rtm) {
   assert_different_registers(oop, box, temp, displaced_header, current_header);
   assert(flag != CCR0, "bad condition register");
   Label cont;
@@ -2006,10 +2365,18 @@
     return;
   }
 
-  if (UseBiasedLocking) {
+  if (try_bias) {
     biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
   }
 
+#if INCLUDE_RTM_OPT
+  if (UseRTMForStackLocks && use_rtm) {
+    rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
+                      stack_rtm_counters, method_data, profile_rtm,
+                      cont, object_has_monitor);
+  }
+#endif // INCLUDE_RTM_OPT
+
   // Handle existing monitor.
   if ((EmitSync & 0x02) == 0) {
     // The object has an existing monitor iff (mark & monitor_value) != 0.
@@ -2066,14 +2433,22 @@
     bind(object_has_monitor);
     // The object's monitor m is unlocked iff m->owner == NULL,
     // otherwise m->owner may contain a thread or a stack address.
-    //
+
+#if INCLUDE_RTM_OPT
+    // Use the same RTM locking code in 32- and 64-bit VM.
+    if (use_rtm) {
+      rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
+                           rtm_counters, method_data, profile_rtm, cont);
+    } else {
+#endif // INCLUDE_RTM_OPT
+
     // Try to CAS m->owner from NULL to current thread.
     addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
     li(displaced_header, 0);
     // CmpxchgX sets flag to cmpX(current, displaced).
     cmpxchgd(/*flag=*/flag,
              /*current_value=*/current_header,
-             /*compare_value=*/displaced_header,
+             /*compare_value=*/(intptr_t)0,
              /*exchange_value=*/R16_thread,
              /*where=*/temp,
              MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
@@ -2095,6 +2470,10 @@
     //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
     //                           "monitor->OwnerIsThread shouldn't be 0", -1);
 #   endif
+
+#if INCLUDE_RTM_OPT
+    } // use_rtm()
+#endif
   }
 
   bind(cont);
@@ -2103,7 +2482,8 @@
 }
 
 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
-                                                 Register temp, Register displaced_header, Register current_header) {
+                                                 Register temp, Register displaced_header, Register current_header,
+                                                 bool try_bias, bool use_rtm) {
   assert_different_registers(oop, box, temp, displaced_header, current_header);
   assert(flag != CCR0, "bad condition register");
   Label cont;
@@ -2115,10 +2495,24 @@
     return;
   }
 
-  if (UseBiasedLocking) {
+  if (try_bias) {
     biased_locking_exit(flag, oop, current_header, cont);
   }
 
+#if INCLUDE_RTM_OPT
+  if (UseRTMForStackLocks && use_rtm) {
+    assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+    Label L_regular_unlock;
+    ld(current_header, oopDesc::mark_offset_in_bytes(), oop);         // fetch markword
+    andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+    cmpwi(flag, R0, markOopDesc::unlocked_value);                     // bits = 001 unlocked
+    bne(flag, L_regular_unlock);                                      // else RegularLock
+    tend_();                                                          // otherwise end...
+    b(cont);                                                          // ... and we're done
+    bind(L_regular_unlock);
+  }
+#endif
+
   // Find the lock address and load the displaced header from the stack.
   ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
 
@@ -2129,13 +2523,12 @@
   // Handle existing monitor.
   if ((EmitSync & 0x02) == 0) {
     // The object has an existing monitor iff (mark & monitor_value) != 0.
+    RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
     ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
-    andi(temp, current_header, markOopDesc::monitor_value);
-    cmpdi(flag, temp, 0);
-    bne(flag, object_has_monitor);
+    andi_(R0, current_header, markOopDesc::monitor_value);
+    bne(CCR0, object_has_monitor);
   }
 
-
   // Check if it is still a light weight lock, this is is true if we see
   // the stack address of the basicLock in the markOop of the object.
   // Cmpxchg sets flag to cmpd(current_header, box).
@@ -2158,6 +2551,20 @@
     bind(object_has_monitor);
     addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
     ld(temp,             ObjectMonitor::owner_offset_in_bytes(), current_header);
+
+    // It's inflated.
+#if INCLUDE_RTM_OPT
+    if (use_rtm) {
+      Label L_regular_inflated_unlock;
+      // Clean monitor_value bit to get valid pointer
+      cmpdi(flag, temp, 0);
+      bne(flag, L_regular_inflated_unlock);
+      tend_();
+      b(cont);
+      bind(L_regular_inflated_unlock);
+    }
+#endif
+
     ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
     xorr(temp, R16_thread, temp);      // Will be 0 if we are the owner.
     orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
@@ -2441,6 +2848,8 @@
   //   oop_result
   //   R16_thread->in_bytes(JavaThread::vm_result_offset())
 
+  verify_thread();
+
   ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
   li(R0, 0);
   std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
@@ -2462,26 +2871,24 @@
   std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
 }
 
-
-void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+Register MacroAssembler::encode_klass_not_null(Register dst, Register src) {
   Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
   if (Universe::narrow_klass_base() != 0) {
     // Use dst as temp if it is free.
-    load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg);
-    sub(dst, current, R0);
+    sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0);
     current = dst;
   }
   if (Universe::narrow_klass_shift() != 0) {
     srdi(dst, current, Universe::narrow_klass_shift());
     current = dst;
   }
-  mr_if_needed(dst, current); // Move may be required.
+  return current;
 }
 
 void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
   if (UseCompressedClassPointers) {
-    encode_klass_not_null(ck, klass);
-    stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop);
+    Register compressedKlass = encode_klass_not_null(ck, klass);
+    stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop);
   } else {
     std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
   }
@@ -2514,8 +2921,7 @@
     sldi(shifted_src, src, Universe::narrow_klass_shift());
   }
   if (Universe::narrow_klass_base() != 0) {
-    load_const(R0, Universe::narrow_klass_base());
-    add(dst, shifted_src, R0);
+    add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0);
   }
 }
 
diff --git a/src/cpu/ppc/vm/macroAssembler_ppc.hpp b/src/cpu/ppc/vm/macroAssembler_ppc.hpp
--- a/src/cpu/ppc/vm/macroAssembler_ppc.hpp
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.hpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 #define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
 
 #include "asm/assembler.hpp"
+#include "runtime/rtmLocking.hpp"
 #include "utilities/macros.hpp"
 
 // MacroAssembler extends Assembler by a few frequently used macros.
@@ -432,8 +433,8 @@
                 int semantics, bool cmpxchgx_hint = false,
                 Register int_flag_success = noreg, bool contention_hint = false);
   void cmpxchgd(ConditionRegister flag,
-                Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
-                int semantics, bool cmpxchgx_hint = false,
+                Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
+                Register addr_base, int semantics, bool cmpxchgx_hint = false,
                 Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false);
 
   // interface method calling
@@ -506,8 +507,42 @@
   // biased locking exit case failed.
   void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done);
 
-  void compiler_fast_lock_object(  ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
-  void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
+  void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
+  void atomic_ori_int(Register addr, Register result, int uimm16);
+
+#if INCLUDE_RTM_OPT
+  void rtm_counters_update(Register abort_status, Register rtm_counters);
+  void branch_on_random_using_tb(Register tmp, int count, Label& brLabel);
+  void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data);
+  void rtm_profiling(Register abort_status_Reg, Register temp_Reg,
+                     RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
+  void rtm_retry_lock_on_abort(Register retry_count, Register abort_status,
+                               Label& retryLabel, Label* checkRetry = NULL);
+  void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel);
+  void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp,
+                         Register retry_on_abort_count,
+                         RTMLockingCounters* stack_rtm_counters,
+                         Metadata* method_data, bool profile_rtm,
+                         Label& DONE_LABEL, Label& IsInflated);
+  void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box,
+                            Register retry_on_busy_count, Register retry_on_abort_count,
+                            RTMLockingCounters* rtm_counters,
+                            Metadata* method_data, bool profile_rtm,
+                            Label& DONE_LABEL);
+#endif
+
+  void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
+                                 Register tmp1, Register tmp2, Register tmp3,
+                                 bool try_bias = UseBiasedLocking,
+                                 RTMLockingCounters* rtm_counters = NULL,
+                                 RTMLockingCounters* stack_rtm_counters = NULL,
+                                 Metadata* method_data = NULL,
+                                 bool use_rtm = false, bool profile_rtm = false);
+
+  void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
+                                   Register tmp1, Register tmp2, Register tmp3,
+                                   bool try_bias = UseBiasedLocking, bool use_rtm = false);
 
   // Support for serializing memory accesses between threads
   void serialize_memory(Register thread, Register tmp1, Register tmp2);
@@ -576,7 +611,7 @@
                                       Register tmp = noreg);
 
   // Null allowed.
-  inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg);
+  inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg, Label *is_null = NULL);
 
   // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong.
   // src == d allowed.
@@ -593,7 +628,7 @@
   void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
   static int instr_size_for_decode_klass_not_null();
   void decode_klass_not_null(Register dst, Register src = noreg);
-  void encode_klass_not_null(Register dst, Register src = noreg);
+  Register encode_klass_not_null(Register dst, Register src = noreg);
 
   // Load common heap base into register.
   void reinit_heapbase(Register d, Register tmp = noreg);
diff --git a/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp b/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp
--- a/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -333,19 +333,29 @@
   }
 }
 
-inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1) {
+inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1, Label *is_null) {
   if (UseCompressedOops) {
     lwz(d, offs, s1);
-    decode_heap_oop(d);
+    if (is_null != NULL) {
+      cmpwi(CCR0, d, 0);
+      beq(CCR0, *is_null);
+      decode_heap_oop_not_null(d);
+    } else {
+      decode_heap_oop(d);
+    }
   } else {
     ld(d, offs, s1);
+    if (is_null != NULL) {
+      cmpdi(CCR0, d, 0);
+      beq(CCR0, *is_null);
+    }
   }
 }
 
 inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register src) {
   Register current = (src != noreg) ? src : d; // Oop to be compressed is in d if no src provided.
   if (Universe::narrow_oop_base_overlaps()) {
-    sub(d, current, R30);
+    sub_const_optimized(d, current, Universe::narrow_oop_base(), R0);
     current = d;
   }
   if (Universe::narrow_oop_shift() != 0) {
@@ -358,7 +368,7 @@
 inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register src) {
   if (Universe::narrow_oop_base_disjoint() && src != noreg && src != d &&
       Universe::narrow_oop_shift() != 0) {
-    mr(d, R30);
+    load_const_optimized(d, Universe::narrow_oop_base(), R0);
     rldimi(d, src, Universe::narrow_oop_shift(), 32-Universe::narrow_oop_shift());
     return d;
   }
@@ -369,7 +379,7 @@
     current = d;
   }
   if (Universe::narrow_oop_base() != NULL) {
-    add(d, current, R30);
+    add_const_optimized(d, current, Universe::narrow_oop_base(), R0);
     current = d;
   }
   return current; // Decoded oop is in this register.
@@ -377,11 +387,19 @@
 
 inline void MacroAssembler::decode_heap_oop(Register d) {
   Label isNull;
+  bool use_isel = false;
   if (Universe::narrow_oop_base() != NULL) {
     cmpwi(CCR0, d, 0);
-    beq(CCR0, isNull);
+    if (VM_Version::has_isel()) {
+      use_isel = true;
+    } else {
+      beq(CCR0, isNull);
+    }
   }
   decode_heap_oop_not_null(d);
+  if (use_isel) {
+    isel_0(d, CCR0, Assembler::equal);
+  }
   bind(isNull);
 }
 
diff --git a/src/cpu/ppc/vm/methodHandles_ppc.hpp b/src/cpu/ppc/vm/methodHandles_ppc.hpp
--- a/src/cpu/ppc/vm/methodHandles_ppc.hpp
+++ b/src/cpu/ppc/vm/methodHandles_ppc.hpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,9 +27,6 @@
 // These definitions are inlined into class MethodHandles.
 
 // Adapters
-//static unsigned int adapter_code_size() {
-//  return 32*K DEBUG_ONLY(+ 16*K) + (TraceMethodHandles ? 16*K : 0) + (VerifyMethodHandles ? 32*K : 0);
-//}
 enum /* platform_dependent_constants */ {
   adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
 };
@@ -45,7 +42,9 @@
 
   static void verify_method_handle(MacroAssembler* _masm, Register mh_reg,
                                    Register temp_reg, Register temp2_reg) {
-    Unimplemented();
+    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
+                 temp_reg, temp2_reg,
+                 "reference is a MH");
   }
 
   static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
diff --git a/src/cpu/ppc/vm/ppc.ad b/src/cpu/ppc/vm/ppc.ad
--- a/src/cpu/ppc/vm/ppc.ad
+++ b/src/cpu/ppc/vm/ppc.ad
@@ -447,8 +447,8 @@
   R26,
   R27,
   R28,
-/*R29*/             // global TOC
-/*R30*/             // Narrow Oop Base
+/*R29,*/             // global TOC
+  R30,
   R31
 );
 
@@ -484,58 +484,11 @@
   R26,
   R27,
   R28,
-/*R29*/
-/*R30*/             // Narrow Oop Base
+/*R29,*/
+  R30,
   R31
 );
 
-// Complement-required-in-pipeline operands for narrow oops.
-reg_class bits32_reg_ro_not_complement (
-/*R0*/     // R0
-  R1,      // SP
-  R2,      // TOC
-  R3,
-  R4,
-  R5,
-  R6,
-  R7,
-  R8,
-  R9,
-  R10,
-  R11,
-  R12,
-/*R13,*/   // system thread id
-  R14,
-  R15,
-  R16,    // R16_thread
-  R17,
-  R18,
-  R19,
-  R20,
-  R21,
-  R22,
-/*R23,
-  R24,
-  R25,
-  R26,
-  R27,
-  R28,*/
-/*R29,*/ // TODO: let allocator handle TOC!!
-/*R30,*/
-  R31
-);
-
-// Complement-required-in-pipeline operands for narrow oops.
-// See 64-bit declaration.
-reg_class bits32_reg_ro_complement (
-  R23,
-  R24,
-  R25,
-  R26,
-  R27,
-  R28
-);
-
 reg_class rscratch1_bits32_reg(R11);
 reg_class rscratch2_bits32_reg(R12);
 reg_class rarg1_bits32_reg(R3);
@@ -591,8 +544,8 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+  R30_H, R30,
   R31_H, R31
 );
 
@@ -629,8 +582,8 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+  R30_H, R30,
   R31_H, R31
 );
 
@@ -667,8 +620,8 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+  R30_H, R30,
   R31_H, R31
 );
 
@@ -704,64 +657,11 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/ // TODO: let allocator handle TOC!!
-/*R30_H, R30,*/
+/*R29_H, R29,*/ // TODO: let allocator handle TOC!!
+  R30_H, R30,
   R31_H, R31
 );
 
-// Complement-required-in-pipeline operands.
-reg_class bits64_reg_ro_not_complement (
-/*R0_H,  R0*/     // R0
-  R1_H,  R1,      // SP
-  R2_H,  R2,      // TOC
-  R3_H,  R3,
-  R4_H,  R4,
-  R5_H,  R5,
-  R6_H,  R6,
-  R7_H,  R7,
-  R8_H,  R8,
-  R9_H,  R9,
-  R10_H, R10,
-  R11_H, R11,
-  R12_H, R12,
-/*R13_H, R13*/   // system thread id
-  R14_H, R14,
-  R15_H, R15,
-  R16_H, R16,    // R16_thread
-  R17_H, R17,
-  R18_H, R18,
-  R19_H, R19,
-  R20_H, R20,
-  R21_H, R21,
-  R22_H, R22,
-/*R23_H, R23,
-  R24_H, R24,
-  R25_H, R25,
-  R26_H, R26,
-  R27_H, R27,
-  R28_H, R28,*/
-/*R29_H, R29*/ // TODO: let allocator handle TOC!!
-/*R30_H, R30,*/
-  R31_H, R31
-);
-
-// Complement-required-in-pipeline operands.
-// This register mask is used for the trap instructions that implement
-// the null checks on AIX. The trap instruction first computes the
-// complement of the value it shall trap on. Because of this, the
-// instruction can not be scheduled in the same cycle as an other
-// instruction reading the normal value of the same register. So we
-// force the value to check into 'bits64_reg_ro_not_complement'
-// and then copy it to 'bits64_reg_ro_complement' for the trap.
-reg_class bits64_reg_ro_complement (
-  R23_H, R23,
-  R24_H, R24,
-  R25_H, R25,
-  R26_H, R26,
-  R27_H, R27,
-  R28_H, R28
-);
-
 
 // ----------------------------
 // Special Class for Condition Code Flags Register
@@ -777,6 +677,17 @@
   CCR7
 );
 
+reg_class int_flags_ro(
+  CCR0,
+  CCR1,
+  CCR2,
+  CCR3,
+  CCR4,
+  CCR5,
+  CCR6,
+  CCR7
+);
+
 reg_class int_flags_CR0(CCR0);
 reg_class int_flags_CR1(CCR1);
 reg_class int_flags_CR6(CCR6);
@@ -2876,7 +2787,7 @@
 
   // Use release_store for card-marking to ensure that previous
   // oop-stores are visible before the card-mark change.
-  enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr) %{
+  enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // FIXME: Implement this as a cmove and use a fixed condition code
     // register which is written on every transition to compiled code,
@@ -2897,8 +2808,8 @@
     // Check CMSCollectorCardTableModRefBSExt::_requires_release and do the
     // StoreStore barrier conditionally.
     __ lwz(R0, 0, $releaseFieldAddr$$Register);
-    __ cmpwi(CCR0, R0, 0);
-    __ beq_predict_taken(CCR0, skip_storestore);
+    __ cmpwi($crx$$CondRegister, R0, 0);
+    __ beq_predict_taken($crx$$CondRegister, skip_storestore);
 #endif
     __ li(R0, 0);
     __ membar(Assembler::StoreStore);
@@ -3108,7 +3019,7 @@
     nodes->push(n2);
   %}
 
-  enc_class enc_cmove_reg(iRegIdst dst, flagsReg crx, iRegIsrc src, cmpOp cmp) %{
+  enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
 
     MacroAssembler _masm(&cbuf);
@@ -3123,7 +3034,7 @@
     __ bind(done);
   %}
 
-  enc_class enc_cmove_imm(iRegIdst dst, flagsReg crx, immI16 src, cmpOp cmp) %{
+  enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
 
     MacroAssembler _masm(&cbuf);
@@ -3269,7 +3180,7 @@
     __ bind(done);
   %}
 
-  enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL mem ) %{
+  enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
 
     MacroAssembler _masm(&cbuf);
@@ -3281,7 +3192,7 @@
     __ bind(done);
   %}
 
-  enc_class enc_bc(flagsReg crx, cmpOp cmp, Label lbl) %{
+  enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_bc);
 
     MacroAssembler _masm(&cbuf);
@@ -3309,7 +3220,7 @@
           l);
   %}
 
-  enc_class enc_bc_far(flagsReg crx, cmpOp cmp, Label lbl) %{
+  enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // The scheduler doesn't know about branch shortening, so we set the opcode
     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
     // TODO: PPC port $archOpcode(ppc64Opcode_bc);
@@ -3341,7 +3252,7 @@
   %}
 
   // Branch used with Power6 scheduling (can be shortened without changing the node).
-  enc_class enc_bc_short_far(flagsReg crx, cmpOp cmp, Label lbl) %{
+  enc_class enc_bc_short_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // The scheduler doesn't know about branch shortening, so we set the opcode
     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
     // TODO: PPC port $archOpcode(ppc64Opcode_bc);
@@ -4700,6 +4611,15 @@
   interface(REG_INTER);
 %}
 
+operand flagsRegSrc() %{
+  constraint(ALLOC_IN_RC(int_flags_ro));
+  match(RegFlags);
+  match(flagsReg);
+  match(flagsRegCR0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Condition Code Flag Register CR0
 operand flagsRegCR0() %{
   constraint(ALLOC_IN_RC(int_flags_CR0));
@@ -4783,6 +4703,13 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits32_reg_ro));
   match(DecodeN reg);
+  format %{ "$reg" %}
+  interface(REG_INTER)
+%}
+
+operand iRegN2P_klass(iRegNsrc reg) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits32_reg_ro));
   match(DecodeNKlass reg);
   format %{ "$reg" %}
   interface(REG_INTER)
@@ -4839,6 +4766,19 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(DecodeN reg);
+  op_cost(100);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indirectNarrow_klass(iRegNsrc reg) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(DecodeNKlass reg);
   op_cost(100);
   format %{ "[$reg]" %}
@@ -4855,6 +4795,19 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeN reg) offset);
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeNKlass reg) offset);
   op_cost(100);
   format %{ "[$reg + $offset]" %}
@@ -4871,6 +4824,19 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeN reg) offset);
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeNKlass reg) offset);
   op_cost(100);
   format %{ "[$reg + $offset]" %}
@@ -4998,9 +4964,9 @@
 // encoding and format. The classic case of this is memory operands.
 // Indirect is not included since its use is limited to Compare & Swap.
 
-opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indOffset16Narrow);
+opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass);
 // Memory operand where offsets are 4-aligned. Required for ld, std.
-opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4);
+opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass);
 opclass indirectMemory(indirect, indirectNarrow);
 
 // Special opclass for I and ConvL2I.
@@ -5009,7 +4975,7 @@
 // Operand classes to match encode and decode. iRegN_P2N is only used
 // for storeN. I have never seen an encode node elsewhere.
 opclass iRegN_P2N(iRegNsrc, iRegP2N);
-opclass iRegP_N2P(iRegPsrc, iRegN2P);
+opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass);
 
 //----------PIPELINE-----------------------------------------------------------
 
@@ -5593,6 +5559,19 @@
   ins_pipe(pipe_class_memory);
 %}
 
+instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{
+  match(Set dst (DecodeNKlass (LoadNKlass mem)));
+  // SAPJVM GL 2014-05-21 Differs.
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 &&
+            _kids[0]->_leaf->as_Load()->is_unordered());
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
+  size(4);
+  ins_encode( enc_lwz(dst, mem) );
+  ins_pipe(pipe_class_memory);
+%}
+
 // Load Pointer
 instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
   match(Set dst (LoadP mem));
@@ -5669,8 +5648,9 @@
 %}
 
 // Load Float acquire.
-instruct loadF_ac(regF dst, memory mem) %{
+instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{
   match(Set dst (LoadF mem));
+  effect(TEMP cr0);
   ins_cost(3*MEMORY_REF_COST);
 
   format %{ "LFS     $dst, $mem \t// acquire\n\t"
@@ -5705,8 +5685,9 @@
 %}
 
 // Load Double - aligned acquire.
-instruct loadD_ac(regD dst, memory mem) %{
+instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{
   match(Set dst (LoadD mem));
+  effect(TEMP cr0);
   ins_cost(3*MEMORY_REF_COST);
 
   format %{ "LFD     $dst, $mem \t// acquire\n\t"
@@ -6034,11 +6015,10 @@
 instruct loadBase(iRegLdst dst) %{
   effect(DEF dst);
 
-  format %{ "MR      $dst, r30_heapbase" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_or);
-    __ mr($dst$$Register, R30);
+  format %{ "LoadConst $dst, heapbase" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ load_const_optimized($dst$$Register, Universe::narrow_oop_base(), R0);
   %}
   ins_pipe(pipe_class_default);
 %}
@@ -6114,7 +6094,7 @@
   effect(TEMP src2);
   ins_cost(DEFAULT_COST);
 
-  format %{ "ORI    $dst, $src1, $src2 \t// narrow klass lo" %}
+  format %{ "ORI     $dst, $src1, $src2 \t// narrow klass lo" %}
   size(4);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_ori);
@@ -6563,8 +6543,9 @@
 // do a releasing store. For this, it gets the address of
 // CMSCollectorCardTableModRefBSExt::_requires_release as input.
 // (Using releaseFieldAddr in the match rule is a hack.)
-instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{
+instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{
   match(Set mem (StoreCM mem releaseFieldAddr));
+  effect(TEMP crx);
   predicate(false);
   ins_cost(MEMORY_REF_COST);
 
@@ -6572,7 +6553,7 @@
   ins_cannot_rematerialize(true);
 
   format %{ "STB     #0, $mem \t// CMS card-mark byte (must be 0!), checking requires_release in [$releaseFieldAddr]" %}
-  ins_encode( enc_cms_card_mark(mem, releaseFieldAddr) );
+  ins_encode( enc_cms_card_mark(mem, releaseFieldAddr, crx) );
   ins_pipe(pipe_class_memory);
 %}
 
@@ -6589,8 +6570,9 @@
   expand %{
     immL baseImm %{ 0 /* TODO: PPC port (jlong)CMSCollectorCardTableModRefBSExt::requires_release_address() */ %}
     iRegLdst releaseFieldAddress;
+    flagsReg crx;
     loadConL_Ex(releaseFieldAddress, baseImm);
-    storeCM_CMS(mem, releaseFieldAddress);
+    storeCM_CMS(mem, releaseFieldAddress, crx);
   %}
 %}
 
@@ -6639,39 +6621,34 @@
   predicate(false);
 
   format %{ "SUB     $dst, $src, oop_base \t// encode" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_subf);
-    __ subf($dst$$Register, R30, $src$$Register);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ sub_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 // Conditional sub base.
-instruct cond_sub_base(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   match(Set dst (EncodeP (Binary crx src1)));
   predicate(false);
 
-  ins_variable_size_depending_on_alignment(true);
-
   format %{ "BEQ     $crx, done\n\t"
-            "SUB     $dst, $src1, R30 \t// encode: subtract base if != NULL\n"
+            "SUB     $dst, $src1, heapbase \t// encode: subtract base if != NULL\n"
             "done:" %}
-  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     Label done;
     __ beq($crx$$CondRegister, done);
-    __ subf($dst$$Register, R30, $src1$$Register);
-    // TODO PPC port __ endgroup_if_needed(_size == 12);
+    __ sub_const_optimized($dst$$Register, $src1$$Register, Universe::narrow_oop_base(), R0);
     __ bind(done);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 // Power 7 can use isel instruction
-instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   match(Set dst (EncodeP (Binary crx src1)));
   predicate(false);
@@ -6777,42 +6754,37 @@
   match(Set dst (DecodeN src));
   predicate(false);
 
-  format %{ "ADD     $dst, $src, R30 \t// DecodeN, add oop base" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_add);
-    __ add($dst$$Register, $src$$Register, R30);
+  format %{ "ADD     $dst, $src, heapbase \t// DecodeN, add oop base" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 // conditianal add base for expand
-instruct cond_add_base(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   // NOTICE that the rule is nonsense - we just have to make sure that:
   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
-  match(Set dst (DecodeN (Binary crx src1)));
+  match(Set dst (DecodeN (Binary crx src)));
   predicate(false);
 
-  ins_variable_size_depending_on_alignment(true);
-
   format %{ "BEQ     $crx, done\n\t"
-            "ADD     $dst, $src1, R30 \t// DecodeN: add oop base if $src1 != NULL\n"
+            "ADD     $dst, $src, heapbase \t// DecodeN: add oop base if $src != NULL\n"
             "done:" %}
-  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling()) */? 12 : 8);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     Label done;
     __ beq($crx$$CondRegister, done);
-    __ add($dst$$Register, $src1$$Register, R30);
-    // TODO PPC port  __ endgroup_if_needed(_size == 12);
+    __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
     __ bind(done);
   %}
   ins_pipe(pipe_class_default);
 %}
 
-instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   // NOTICE that the rule is nonsense - we just have to make sure that:
   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
@@ -6888,7 +6860,7 @@
             Universe::narrow_oop_base_disjoint());
   ins_cost(DEFAULT_COST);
 
-  format %{ "MOV     $dst, R30 \t\n"
+  format %{ "MOV     $dst, heapbase \t\n"
             "RLDIMI  $dst, $src, shift, 32-shift \t// decode with disjoint base" %}
   postalloc_expand %{
     loadBaseNode *n1 = new loadBaseNode();
@@ -6946,7 +6918,7 @@
 
     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
     ra_->set_oop(n_cond_set, true);
-    
+
     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
     ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
@@ -7303,7 +7275,7 @@
 //----------Conditional Move---------------------------------------------------
 
 // Cmove using isel.
-instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
+instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7321,7 +7293,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
+instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7335,7 +7307,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{
+instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{
   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7349,7 +7321,7 @@
 %}
 
 // Cmove using isel.
-instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
+instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7367,7 +7339,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
+instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7381,7 +7353,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{
+instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{
   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7395,7 +7367,7 @@
 %}
 
 // Cmove using isel.
-instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
+instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7414,7 +7386,7 @@
 %}
 
 // Conditional move for RegN. Only cmov(reg, reg).
-instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
+instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7428,7 +7400,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{
+instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{
   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7442,7 +7414,7 @@
 %}
 
 // Cmove using isel.
-instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{
+instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{
   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7460,7 +7432,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{
+instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{
   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7474,7 +7446,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{
+instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{
   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7487,7 +7459,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{
+instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{
   match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7509,7 +7481,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{
+instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
   match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7542,8 +7514,9 @@
 // Mem_ptr must be a memory operand, else this node does not get
 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 // can be rematerialized which leads to errors.
-instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal) %{
+instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal, flagsRegCR0 cr0) %{
   match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
@@ -7560,16 +7533,16 @@
 // Mem_ptr must be a memory operand, else this node does not get
 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 // can be rematerialized which leads to errors.
-instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
-  match(Set crx (StorePConditional mem_ptr (Binary oldVal newVal)));
-  format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
-                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
-                noreg, NULL, true);
-  %}
-  ins_pipe(pipe_class_default);
+instruct storePConditional_regP_regP_regP(flagsRegCR0 cr0, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
+  match(Set cr0 (StorePConditional mem_ptr (Binary oldVal newVal)));
+  ins_cost(2*MEMORY_REF_COST);
+
+  format %{ "STDCX_  if ($cr0 = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_stdcx_);
+    __ stdcx_($newVal$$Register, $mem_ptr$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
 %}
 
 // Implement LoadPLocked. Must be ordered against changes of the memory location
@@ -7577,13 +7550,14 @@
 // Don't know whether this is ever used.
 instruct loadPLocked(iRegPdst dst, memory mem) %{
   match(Set dst (LoadPLocked mem));
-  ins_cost(MEMORY_REF_COST);
-
-  format %{ "LD      $dst, $mem \t// loadPLocked\n\t"
-            "TWI     $dst\n\t"
-            "ISYNC" %}
-  size(12);
-  ins_encode( enc_ld_ac(dst, mem) );
+  ins_cost(2*MEMORY_REF_COST);
+
+  format %{ "LDARX   $dst, $mem \t// loadPLocked\n\t" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_ldarx);
+    __ ldarx($dst$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
+  %}
   ins_pipe(pipe_class_memory);
 %}
 
@@ -7593,8 +7567,9 @@
 // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))"  cannot be
 // matched.
 
-instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2) %{
+instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7607,8 +7582,9 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2) %{
+instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7621,8 +7597,9 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2) %{
+instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7635,8 +7612,9 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2) %{
+instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7649,48 +7627,54 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
+instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndAddI mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndAddI $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndAddI(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
+instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndAddL mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndAddL $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndAddL(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
+instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetI mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetI $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
+instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetL mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetL $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src) %{
+instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetP mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetP $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src) %{
+instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetN mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetN $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
@@ -7898,18 +7882,8 @@
 %}
 
 // Immediate Subtraction
-// The compiler converts "x-c0" into "x+ -c0" (see SubINode::Ideal),
-// so this rule seems to be unused.
-instruct subI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
-  match(Set dst (SubI src1 src2));
-  format %{ "SUBI    $dst, $src1, $src2" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
-    __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
-  %}
-  ins_pipe(pipe_class_default);
-%}
+// Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
+// Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16.
 
 // SubI from constant (using subfic).
 instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
@@ -7989,22 +7963,6 @@
   ins_pipe(pipe_class_default);
 %}
 
-// Immediate Subtraction
-// The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
-// so this rule seems to be unused.
-// No constant pool entries required.
-instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
-  match(Set dst (SubL src1 src2));
-
-  format %{ "SUBI    $dst, $src1, $src2 \t// long" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
-    __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
-  %}
-  ins_pipe(pipe_class_default);
-%}
-
 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 // positive longs and 0xF...F for negative ones.
 instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
@@ -8165,7 +8123,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_bne_negI_reg(iRegIdst dst, flagsReg crx, iRegIsrc src1) %{
+instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{
   effect(USE_DEF dst, USE src1, USE crx);
   predicate(false);
 
@@ -8228,7 +8186,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_bne_negL_reg(iRegLdst dst, flagsReg crx, iRegLsrc src1) %{
+instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{
   effect(USE_DEF dst, USE src1, USE crx);
   predicate(false);
 
@@ -8281,7 +8239,7 @@
 %}
 
 // Long Remainder with registers
-instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
+instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
   match(Set dst (ModL src1 src2));
   ins_cost(10*DEFAULT_COST);
 
@@ -9011,7 +8969,6 @@
 instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
   match(Set dst (AndL src1 src2));
   effect(KILL cr0);
-  ins_cost(DEFAULT_COST);
 
   format %{ "ANDI    $dst, $src1, $src2 \t// long" %}
   size(4);
@@ -9803,7 +9760,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{
+instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE src);
   predicate(false);
@@ -9817,7 +9774,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsReg crx, stackSlotL mem) %{
+instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE mem);
   predicate(false);
@@ -9972,7 +9929,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{
+instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE src);
   predicate(false);
@@ -9986,7 +9943,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsReg crx, stackSlotL mem) %{
+instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE mem);
   predicate(false);
@@ -10255,7 +10212,6 @@
   size(4);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
-    // FIXME: avoid andi_ ?
     __ andi_(R0, $src1$$Register, $src2$$constant);
   %}
   ins_pipe(pipe_class_compare);
@@ -10302,13 +10258,12 @@
   size(4);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
-    // FIXME: avoid andi_ ?
     __ andi_(R0, $src1$$Register, $src2$$constant);
   %}
   ins_pipe(pipe_class_compare);
 %}
 
-instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{
+instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsRegSrc crx) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx);
   predicate(false);
@@ -10332,7 +10287,7 @@
   ins_pipe(pipe_class_compare);
 %}
 
-instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsReg crx) %{
+instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsRegSrc crx) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx);
   predicate(false);
@@ -10622,8 +10577,9 @@
 //----------Float Compares----------------------------------------------------
 
 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
+  // Needs matchrule, see cmpDUnordered.
+  match(Set crx (CmpF src1 src2)); 
   // no match-rule, false predicate
-  effect(DEF crx, USE src1, USE src2);
   predicate(false);
 
   format %{ "cmpFUrd $crx, $src1, $src2" %}
@@ -10731,8 +10687,14 @@
 %}
 
 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
-  // no match-rule, false predicate
-  effect(DEF crx, USE src1, USE src2);
+  // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the 
+  // node right before the conditional move using it. 
+  // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
+  // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
+  // crashed in register allocation where the flags Reg between cmpDUnoredered and a
+  // conditional move was supposed to be spilled.
+  match(Set crx (CmpD src1 src2)); 
+  // False predicate, shall not be matched.
   predicate(false);
 
   format %{ "cmpFUrd $crx, $src1, $src2" %}
@@ -10830,7 +10792,7 @@
 %}
 
 // Conditional Near Branch
-instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{
   // Same match rule as `branchConFar'.
   match(If cmp crx);
   effect(USE lbl);
@@ -10853,7 +10815,7 @@
 // expensive.
 //
 // Conditional Far Branch
-instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{
   // Same match rule as `branchCon'.
   match(If cmp crx);
   effect(USE crx, USE lbl);
@@ -10871,7 +10833,7 @@
 %}
 
 // Conditional Branch used with Power6 scheduler (can be far or short).
-instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchConSched(cmpOp cmp, flagsRegSrc crx, label lbl) %{
   // Same match rule as `branchCon'.
   match(If cmp crx);
   effect(USE crx, USE lbl);
@@ -10890,7 +10852,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{
   match(CountedLoopEnd cmp crx);
   effect(USE labl);
   ins_cost(BRANCH_COST);
@@ -10904,7 +10866,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{
   match(CountedLoopEnd cmp crx);
   effect(USE labl);
   predicate(!false /* TODO: PPC port HB_Schedule */);
@@ -10920,7 +10882,7 @@
 %}
 
 // Conditional Branch used with Power6 scheduler (can be far or short).
-instruct branchLoopEndSched(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEndSched(cmpOp cmp, flagsRegSrc crx, label labl) %{
   match(CountedLoopEnd cmp crx);
   effect(USE labl);
   predicate(false /* TODO: PPC port HB_Schedule */);
@@ -10969,13 +10931,14 @@
 instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
   match(Set crx (FastLock oop box));
   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
-  // TODO PPC port predicate(!UseNewFastLockPPC64 || UseBiasedLocking);
+  predicate(/*(!UseNewFastLockPPC64 || UseBiasedLocking) &&*/ !Compile::current()->use_rtm());
 
   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
-                                 $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
+                                 $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                 UseBiasedLocking && !UseOptoBiasInlining); // SAPJVM MD 2014-11-06 UseOptoBiasInlining
     // If locking was successfull, crx should indicate 'EQ'.
     // The compiler generates a branch to the runtime call to
     // _complete_monitor_locking_Java for the case where crx is 'NE'.
@@ -10983,15 +10946,58 @@
   ins_pipe(pipe_class_compare);
 %}
 
+// Separate version for TM. Use bound register for box to enable USE_KILL.
+instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
+  match(Set crx (FastLock oop box));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box);
+  predicate(Compile::current()->use_rtm());
+
+  format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
+                                 $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                 /*Biased Locking*/ false,
+                                 _rtm_counters, _stack_rtm_counters,
+                                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
+                                 /*TM*/ true, ra_->C->profile_rtm());
+    // If locking was successfull, crx should indicate 'EQ'.
+    // The compiler generates a branch to the runtime call to
+    // _complete_monitor_locking_Java for the case where crx is 'NE'.
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
 instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
   match(Set crx (FastUnlock oop box));
   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+  predicate(!Compile::current()->use_rtm());
 
   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
-                                   $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
+                                   $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                   UseBiasedLocking && !UseOptoBiasInlining,
+                                   false);
+    // If unlocking was successfull, crx should indicate 'EQ'.
+    // The compiler generates a branch to the runtime call to
+    // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
+  match(Set crx (FastUnlock oop box));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+  predicate(Compile::current()->use_rtm());
+
+  format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2 (TM)" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
+                                   $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                   /*Biased Locking*/ false, /*TM*/ true);
     // If unlocking was successfull, crx should indicate 'EQ'.
     // The compiler generates a branch to the runtime call to
     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
@@ -11658,6 +11664,66 @@
   ins_pipe(pipe_class_default);
 %}
 
+
+//----------Overflow Math Instructions-----------------------------------------
+
+// Note that we have to make sure that XER.SO is reset before using overflow instructions.
+// Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc).
+// Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.)
+
+instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+  match(Set cr0 (OverflowAddL op1 op2));
+
+  format %{ "add_    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ addo_(R0, $op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+  match(Set cr0 (OverflowSubL op1 op2));
+
+  format %{ "subfo_  R0, $op2, $op1\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ subfo_(R0, $op2$$Register, $op1$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
+  match(Set cr0 (OverflowSubL zero op2));
+
+  format %{ "nego_   R0, $op2\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ nego_(R0, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+  match(Set cr0 (OverflowMulL op1 op2));
+
+  format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ mulldo_(R0, $op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+
 // ============================================================================
 // Safepoint Instruction
 
diff --git a/src/cpu/ppc/vm/register_definitions_ppc.cpp b/src/cpu/ppc/vm/register_definitions_ppc.cpp
--- a/src/cpu/ppc/vm/register_definitions_ppc.cpp
+++ b/src/cpu/ppc/vm/register_definitions_ppc.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,19 +23,10 @@
  *
  */
 
-// make sure the defines don't screw up the declarations later on in this file
+// Make sure the defines don't screw up the declarations later on in this file.
 #define DONT_USE_REGISTER_DEFINES
 
-#include "precompiled.hpp"
-#include "asm/macroAssembler.hpp"
 #include "asm/register.hpp"
-#include "register_ppc.hpp"
-#ifdef TARGET_ARCH_MODEL_ppc_32
-# include "interp_masm_ppc_32.hpp"
-#endif
-#ifdef TARGET_ARCH_MODEL_ppc_64
-# include "interp_masm_ppc_64.hpp"
-#endif
 
 REGISTER_DEFINITION(Register, noreg);
 
diff --git a/src/cpu/ppc/vm/relocInfo_ppc.cpp b/src/cpu/ppc/vm/relocInfo_ppc.cpp
--- a/src/cpu/ppc/vm/relocInfo_ppc.cpp
+++ b/src/cpu/ppc/vm/relocInfo_ppc.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,14 +25,12 @@
 
 #include "precompiled.hpp"
 #include "asm/assembler.inline.hpp"
-#include "assembler_ppc.inline.hpp"
 #include "code/relocInfo.hpp"
 #include "nativeInst_ppc.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/safepoint.hpp"
 
 void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
-  bool copy_back_to_oop_pool = true;  // TODO: PPC port
   // The following comment is from the declaration of DataRelocation:
   //
   //  "The "o" (displacement) argument is relevant only to split relocations
diff --git a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
--- a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
+++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #include "code/debugInfoRec.hpp"
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
+#include "frame_ppc.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interp_masm.hpp"
 #include "oops/compiledICHolder.hpp"
@@ -194,8 +195,8 @@
   RegisterSaver_LiveIntReg(   R27 ),
   RegisterSaver_LiveIntReg(   R28 ),
   RegisterSaver_LiveIntReg(   R29 ),
-  RegisterSaver_LiveIntReg(   R31 ),
-  RegisterSaver_LiveIntReg(   R30 ), // r30 must be the last register
+  RegisterSaver_LiveIntReg(   R30 ),
+  RegisterSaver_LiveIntReg(   R31 ), // must be the last register (see save/restore functions below)
 };
 
 OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
@@ -229,29 +230,30 @@
 
   BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
 
-  // Save r30 in the last slot of the not yet pushed frame so that we
+  // Save r31 in the last slot of the not yet pushed frame so that we
   // can use it as scratch reg.
-  __ std(R30, -reg_size, R1_SP);
+  __ std(R31, -reg_size, R1_SP);
   assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
          "consistency check");
 
   // save the flags
   // Do the save_LR_CR by hand and adjust the return pc if requested.
-  __ mfcr(R30);
-  __ std(R30, _abi(cr), R1_SP);
+  __ mfcr(R31);
+  __ std(R31, _abi(cr), R1_SP);
   switch (return_pc_location) {
-    case return_pc_is_lr:    __ mflr(R30);           break;
-    case return_pc_is_r4:    __ mr(R30, R4);     break;
+    case return_pc_is_lr:    __ mflr(R31);           break;
+    case return_pc_is_r4:    __ mr(R31, R4);     break;
     case return_pc_is_thread_saved_exception_pc:
-                                 __ ld(R30, thread_(saved_exception_pc)); break;
+                             __ ld(R31, thread_(saved_exception_pc)); break;
     default: ShouldNotReachHere();
   }
-  if (return_pc_adjustment != 0)
-    __ addi(R30, R30, return_pc_adjustment);
-  __ std(R30, _abi(lr), R1_SP);
+  if (return_pc_adjustment != 0) {
+    __ addi(R31, R31, return_pc_adjustment);
+  }
+  __ std(R31, _abi(lr), R1_SP);
 
   // push a new frame
-  __ push_frame(frame_size_in_bytes, R30);
+  __ push_frame(frame_size_in_bytes, R31);
 
   // save all registers (ints and floats)
   offset = register_save_offset;
@@ -261,7 +263,7 @@
 
     switch (reg_type) {
       case RegisterSaver::int_reg: {
-        if (reg_num != 30) { // We spilled R30 right at the beginning.
+        if (reg_num != 31) { // We spilled R31 right at the beginning.
           __ std(as_Register(reg_num), offset, R1_SP);
         }
         break;
@@ -272,8 +274,8 @@
       }
       case RegisterSaver::special_reg: {
         if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
-          __ mfctr(R30);
-          __ std(R30, offset, R1_SP);
+          __ mfctr(R31);
+          __ std(R31, offset, R1_SP);
         } else {
           Unimplemented();
         }
@@ -321,7 +323,7 @@
 
     switch (reg_type) {
       case RegisterSaver::int_reg: {
-        if (reg_num != 30) // R30 restored at the end, it's the tmp reg!
+        if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
           __ ld(as_Register(reg_num), offset, R1_SP);
         break;
       }
@@ -332,8 +334,8 @@
       case RegisterSaver::special_reg: {
         if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
           if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
-            __ ld(R30, offset, R1_SP);
-            __ mtctr(R30);
+            __ ld(R31, offset, R1_SP);
+            __ mtctr(R31);
           }
         } else {
           Unimplemented();
@@ -350,10 +352,10 @@
   __ pop_frame();
 
   // restore the flags
-  __ restore_LR_CR(R30);
+  __ restore_LR_CR(R31);
 
   // restore scratch register's value
-  __ ld(R30, -reg_size, R1_SP);
+  __ ld(R31, -reg_size, R1_SP);
 
   BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
 }
@@ -2021,6 +2023,8 @@
   __ push_frame(frame_size_in_bytes, r_temp_1);          // Push the c2n adapter's frame.
   frame_done_pc = (intptr_t)__ pc();
 
+  __ verify_thread();
+
   // Native nmethod wrappers never take possesion of the oop arguments.
   // So the caller will gc the arguments.
   // The only thing we need an oopMap for is if the call is static.
@@ -2594,7 +2598,7 @@
 }
 
 uint SharedRuntime::out_preserve_stack_slots() {
-#ifdef COMPILER2
+#if defined(COMPILER1) || defined(COMPILER2)
   return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
 #else
   return 0;
@@ -2868,11 +2872,6 @@
   __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
   __ BIND(skip_restore_excp);
 
-  // reload narrro_oop_base
-  if (UseCompressedOops && Universe::narrow_oop_base() != 0) {
-    __ load_const_optimized(R30, Universe::narrow_oop_base());
-  }
-
   __ pop_frame();
 
   // stack: (deoptee, optional i2c, caller of deoptee, ...).
diff --git a/src/cpu/ppc/vm/stubGenerator_ppc.cpp b/src/cpu/ppc/vm/stubGenerator_ppc.cpp
--- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp
+++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp
@@ -261,9 +261,6 @@
       // global toc register
       __ load_const(R29, MacroAssembler::global_toc(), R11_scratch1);
 
-      // Load narrow oop base.
-      __ reinit_heapbase(R30, R11_scratch1);
-
       // Remember the senderSP so we interpreter can pop c2i arguments off of the stack
       // when called via a c2i.
 
@@ -418,6 +415,23 @@
   // or native call stub.  The pending exception in Thread is
   // converted into a Java-level exception.
   //
+  // Read:
+  //
+  //   LR:     The pc the runtime library callee wants to return to.
+  //           Since the exception occurred in the callee, the return pc
+  //           from the point of view of Java is the exception pc.
+  //   thread: Needed for method handles.
+  //
+  // Invalidate:
+  //
+  //   volatile registers (except below).
+  //
+  // Update:
+  //
+  //   R4_ARG2: exception
+  //
+  // (LR is unchanged and is live out).
+  //
   address generate_forward_exception() {
     StubCodeMark mark(this, "StubRoutines", "forward_exception");
     address start = __ pc();
@@ -1256,9 +1270,9 @@
     Register tmp3 = R8_ARG6;
 
 #if defined(ABI_ELFv2)
-     address nooverlap_target = aligned ?
-       StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
-       StubRoutines::jbyte_disjoint_arraycopy();
+    address nooverlap_target = aligned ?
+      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
+      StubRoutines::jbyte_disjoint_arraycopy();
 #else
     address nooverlap_target = aligned ?
       ((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() :
diff --git a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
+++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -264,11 +264,11 @@
       __ cmpdi(CCR0, Rmdo, 0);
       __ beq(CCR0, no_mdo);
 
-      // Increment invocation counter in the MDO.
-      const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
-      __ lwz(Rscratch2, mdo_ic_offs, Rmdo);
+      // Increment backedge counter in the MDO.
+      const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+      __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
       __ addi(Rscratch2, Rscratch2, increment);
-      __ stw(Rscratch2, mdo_ic_offs, Rmdo);
+      __ stw(Rscratch2, mdo_bc_offs, Rmdo);
       __ load_const_optimized(Rscratch1, mask, R0);
       __ and_(Rscratch1, Rscratch2, Rscratch1);
       __ bne(CCR0, done);
@@ -276,12 +276,12 @@
     }
 
     // Increment counter in MethodCounters*.
-    const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+    const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
     __ bind(no_mdo);
     __ get_method_counters(R19_method, R3_counters, done);
-    __ lwz(Rscratch2, mo_ic_offs, R3_counters);
+    __ lwz(Rscratch2, mo_bc_offs, R3_counters);
     __ addi(Rscratch2, Rscratch2, increment);
-    __ stw(Rscratch2, mo_ic_offs, R3_counters);
+    __ stw(Rscratch2, mo_bc_offs, R3_counters);
     __ load_const_optimized(Rscratch1, mask, R0);
     __ and_(Rscratch1, Rscratch2, Rscratch1);
     __ beq(CCR0, *overflow);
@@ -611,12 +611,7 @@
 // For others we can use a normal (native) entry.
 
 inline bool math_entry_available(AbstractInterpreter::MethodKind kind) {
-  // Provide math entry with debugging on demand.
-  // Note: Debugging changes which code will get executed:
-  // Debugging or disabled InlineIntrinsics: java method will get interpreted and performs a native call.
-  // Not debugging and enabled InlineIntrinics: processor instruction will get used.
-  // Result might differ slightly due to rounding etc.
-  if (!InlineIntrinsics && (!FLAG_IS_ERGO(InlineIntrinsics))) return false; // Generate a vanilla entry.
+  if (!InlineIntrinsics) return false;
 
   return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) ||
           (kind==Interpreter::java_lang_math_abs));
@@ -628,15 +623,8 @@
     return Interpreter::entry_for_kind(Interpreter::zerolocals);
   }
 
-  Label Lslow_path;
-  const Register Rjvmti_mode = R11_scratch1;
   address entry = __ pc();
 
-  // Provide math entry with debugging on demand.
-  __ lwz(Rjvmti_mode, thread_(interp_only_mode));
-  __ cmpwi(CCR0, Rjvmti_mode, 0);
-  __ bne(CCR0, Lslow_path); // jvmti_mode!=0
-
   __ lfd(F1_RET, Interpreter::stackElementSize, R15_esp);
 
   // Pop c2i arguments (if any) off when we return.
@@ -659,9 +647,6 @@
   // And we're done.
   __ blr();
 
-  // Provide slow path for JVMTI case.
-  __ bind(Lslow_path);
-  __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R12_scratch2);
   __ flush();
 
   return entry;
diff --git a/src/cpu/ppc/vm/templateInterpreter_ppc.hpp b/src/cpu/ppc/vm/templateInterpreter_ppc.hpp
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.hpp
+++ b/src/cpu/ppc/vm/templateInterpreter_ppc.hpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
   // Run with +PrintInterpreter to get the VM to print out the size.
   // Max size with JVMTI
 
-  const static int InterpreterCodeSize = 210*K;
+  const static int InterpreterCodeSize = 230*K;
 
 #endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
 
diff --git a/src/cpu/ppc/vm/templateTable_ppc_64.cpp b/src/cpu/ppc/vm/templateTable_ppc_64.cpp
--- a/src/cpu/ppc/vm/templateTable_ppc_64.cpp
+++ b/src/cpu/ppc/vm/templateTable_ppc_64.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2013, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -375,23 +375,22 @@
 
   int index_size = wide ? sizeof(u2) : sizeof(u1);
   const Register Rscratch = R11_scratch1;
-  Label resolved;
+  Label is_null;
 
   // We are resolved if the resolved reference cache entry contains a
   // non-null object (CallSite, etc.)
   __ get_cache_index_at_bcp(Rscratch, 1, index_size);  // Load index.
-  __ load_resolved_reference_at_index(R17_tos, Rscratch);
-  __ cmpdi(CCR0, R17_tos, 0);
-  __ bne(CCR0, resolved);
+  __ load_resolved_reference_at_index(R17_tos, Rscratch, &is_null);
+  __ verify_oop(R17_tos);
+  __ dispatch_epilog(atos, Bytecodes::length_for(bytecode()));
+
+  __ bind(is_null);
   __ load_const_optimized(R3_ARG1, (int)bytecode());
 
   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 
   // First time invocation - must resolve first.
   __ call_VM(R17_tos, entry, R3_ARG1);
-
-  __ align(32, 12);
-  __ bind(resolved);
   __ verify_oop(R17_tos);
 }
 
@@ -3795,9 +3794,9 @@
   transition(atos, itos);
 
   Label Ldone, Lis_null, Lquicked, Lresolved;
-  Register Roffset         = R5_ARG3,
+  Register Roffset         = R6_ARG4,
            RobjKlass       = R4_ARG2,
-           RspecifiedKlass = R6_ARG4, // Generate_ClassCastException_verbose_handler will expect the value in this register.
+           RspecifiedKlass = R5_ARG3,
            Rcpool          = R11_scratch1,
            Rtags           = R12_scratch2;
 
diff --git a/src/cpu/ppc/vm/vm_version_ppc.cpp b/src/cpu/ppc/vm/vm_version_ppc.cpp
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp
+++ b/src/cpu/ppc/vm/vm_version_ppc.cpp
@@ -32,12 +32,13 @@
 #include "runtime/os.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "utilities/defaultStream.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "vm_version_ppc.hpp"
 
 # include <sys/sysinfo.h>
 
 int VM_Version::_features = VM_Version::unknown_m;
-int VM_Version::_measured_cache_line_size = 128; // default value
+int VM_Version::_measured_cache_line_size = 32; // pessimistic init value
 const char* VM_Version::_features_str = "";
 bool VM_Version::_is_determine_features_test_running = false;
 
@@ -55,7 +56,9 @@
 
   // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
   if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
-    if (VM_Version::has_popcntw()) {
+    if (VM_Version::has_lqarx()) {
+      FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
+    } else if (VM_Version::has_popcntw()) {
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
     } else if (VM_Version::has_cmpb()) {
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6);
@@ -66,8 +69,14 @@
     }
   }
   guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
-            PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7,
-            "PowerArchitecturePPC64 should be 0, 5, 6 or 7");
+            PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 ||
+            PowerArchitecturePPC64 == 8,
+            "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8");
+
+  // Power 8: Configure Data Stream Control Register.
+  if (PowerArchitecturePPC64 >= 8) {
+    config_dscr();
+  }
 
   if (!UseSIGTRAP) {
     MSG(TrapBasedICMissChecks);
@@ -97,7 +106,7 @@
   // Create and print feature-string.
   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
   jio_snprintf(buf, sizeof(buf),
-               "ppc64%s%s%s%s%s%s%s%s",
+               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_fsqrt()   ? " fsqrt"   : ""),
                (has_isel()    ? " isel"    : ""),
                (has_lxarxeh() ? " lxarxeh" : ""),
@@ -106,11 +115,17 @@
                (has_popcntb() ? " popcntb" : ""),
                (has_popcntw() ? " popcntw" : ""),
                (has_fcfids()  ? " fcfids"  : ""),
-               (has_vand()    ? " vand"    : "")
+               (has_vand()    ? " vand"    : ""),
+               (has_lqarx()   ? " lqarx"   : ""),
+               (has_vcipher() ? " vcipher" : ""),
+               (has_vpmsumb() ? " vpmsumb" : ""),
+               (has_tcheck()  ? " tcheck"  : "")
                // Make sure number of %s matches num_features!
               );
   _features_str = os::strdup(buf);
-  NOT_PRODUCT(if (Verbose) print_features(););
+  if (Verbose) {
+    print_features();
+  }
 
   // PPC64 supports 8-byte compare-exchange operations (see
   // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
@@ -171,6 +186,58 @@
     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
+  // Adjust RTM (Restricted Transactional Memory) flags.
+  if (!has_tcheck() && UseRTMLocking) {
+    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+    // setting during arguments processing. See use_biased_locking().
+    // VM_Version_init() is executed after UseBiasedLocking is used
+    // in Thread::allocate().
+    vm_exit_during_initialization("RTM instructions are not available on this CPU");
+  }
+
+  if (UseRTMLocking) {
+#if INCLUDE_RTM_OPT
+    if (!UnlockExperimentalVMOptions) {
+      vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. "
+                                    "It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
+    } else {
+      warning("UseRTMLocking is only available as experimental option on this platform.");
+    }
+    if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
+      // RTM locking should be used only for applications with
+      // high lock contention. For now we do not use it by default.
+      vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
+    }
+    if (!is_power_of_2(RTMTotalCountIncrRate)) {
+      warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
+      FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
+    }
+    if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
+      warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
+      FLAG_SET_DEFAULT(RTMAbortRatio, 50);
+    }
+    FLAG_SET_ERGO(bool, UseNewFastLockPPC64, false); // Does not implement TM.
+    guarantee(RTMSpinLoopCount > 0, "unsupported");
+#else
+    // Only C2 does RTM locking optimization.
+    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+    // setting during arguments processing. See use_biased_locking().
+    vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
+#endif
+  } else { // !UseRTMLocking
+    if (UseRTMForStackLocks) {
+      if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
+        warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
+      }
+      FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
+    }
+    if (UseRTMDeopt) {
+      FLAG_SET_DEFAULT(UseRTMDeopt, false);
+    }
+    if (PrintPreciseRTMLockingStatistics) {
+      FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
+    }
+  }
 
   // This machine does not allow unaligned memory accesses
   if (UseUnalignedAccesses) {
@@ -180,6 +247,27 @@
   }
 }
 
+bool VM_Version::use_biased_locking() {
+#if INCLUDE_RTM_OPT
+  // RTM locking is most useful when there is high lock contention and
+  // low data contention. With high lock contention the lock is usually
+  // inflated and biased locking is not suitable for that case.
+  // RTM locking code requires that biased locking is off.
+  // Note: we can't switch off UseBiasedLocking in get_processor_features()
+  // because it is used by Thread::allocate() which is called before
+  // VM_Version::initialize().
+  if (UseRTMLocking && UseBiasedLocking) {
+    if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
+      FLAG_SET_DEFAULT(UseBiasedLocking, false);
+    } else {
+      warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
+      UseBiasedLocking = false;
+    }
+  }
+#endif
+  return UseBiasedLocking;
+}
+
 void VM_Version::print_features() {
   tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size());
 }
@@ -443,16 +531,19 @@
   // Don't use R0 in ldarx.
   // Keep R3_ARG1 unmodified, it contains &field (see below).
   // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
-  a->fsqrt(F3, F4);                            // code[0] -> fsqrt_m
-  a->fsqrts(F3, F4);                           // code[1] -> fsqrts_m
-  a->isel(R7, R5, R6, 0);                      // code[2] -> isel_m
-  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
-  a->cmpb(R7, R5, R6);                         // code[4] -> bcmp
-  //a->mftgpr(R7, F3);                         // code[5] -> mftgpr
-  a->popcntb(R7, R5);                          // code[6] -> popcntb
-  a->popcntw(R7, R5);                          // code[7] -> popcntw
-  a->fcfids(F3, F4);                           // code[8] -> fcfids
-  a->vand(VR0, VR0, VR0);                      // code[9] -> vand
+  a->fsqrt(F3, F4);                            // code[0]  -> fsqrt_m
+  a->fsqrts(F3, F4);                           // code[1]  -> fsqrts_m
+  a->isel(R7, R5, R6, 0);                      // code[2]  -> isel_m
+  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3]  -> lxarx_m
+  a->cmpb(R7, R5, R6);                         // code[4]  -> cmpb
+  a->popcntb(R7, R5);                          // code[5]  -> popcntb
+  a->popcntw(R7, R5);                          // code[6]  -> popcntw
+  a->fcfids(F3, F4);                           // code[7]  -> fcfids
+  a->vand(VR0, VR0, VR0);                      // code[8]  -> vand
+  a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[9]  -> lqarx_m
+  a->vcipher(VR0, VR1, VR2);                   // code[10] -> vcipher
+  a->vpmsumb(VR0, VR1, VR2);                   // code[11] -> vpmsumb
+  a->tcheck(0);                                // code[12] -> tcheck
   a->blr();
 
   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@@ -491,11 +582,14 @@
   if (code[feature_cntr++]) features |= isel_m;
   if (code[feature_cntr++]) features |= lxarxeh_m;
   if (code[feature_cntr++]) features |= cmpb_m;
-  //if(code[feature_cntr++])features |= mftgpr_m;
   if (code[feature_cntr++]) features |= popcntb_m;
   if (code[feature_cntr++]) features |= popcntw_m;
   if (code[feature_cntr++]) features |= fcfids_m;
   if (code[feature_cntr++]) features |= vand_m;
+  if (code[feature_cntr++]) features |= lqarx_m;
+  if (code[feature_cntr++]) features |= vcipher_m;
+  if (code[feature_cntr++]) features |= vpmsumb_m;
+  if (code[feature_cntr++]) features |= tcheck_m;
 
   // Print the detection code.
   if (PrintAssembly) {
@@ -507,6 +601,69 @@
   _features = features;
 }
 
+// Power 8: Configure Data Stream Control Register.
+void VM_Version::config_dscr() {
+  assert(has_tcheck(), "Only execute on Power 8 or later!");
+
+  // 7 InstWords for each call (function descriptor + blr instruction).
+  const int code_size = (2+2*7)*BytesPerInstWord;
+
+  // Allocate space for the code.
+  ResourceMark rm;
+  CodeBuffer cb("config_dscr", code_size, 0);
+  MacroAssembler* a = new MacroAssembler(&cb);
+
+  // Emit code.
+  uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->emit_fd();
+  uint32_t *code = (uint32_t *)a->pc();
+  a->mfdscr(R3);
+  a->blr();
+
+  void (*set_dscr)(long) = (void(*)(long))(void *)a->emit_fd();
+  a->mtdscr(R3);
+  a->blr();
+
+  uint32_t *code_end = (uint32_t *)a->pc();
+  a->flush();
+
+  // Print the detection code.
+  if (PrintAssembly) {
+    ttyLocker ttyl;
+    tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", code);
+    Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
+  }
+
+  // Apply the configuration if needed.
+  uint64_t dscr_val = (*get_dscr)();
+  if (Verbose) {
+    tty->print_cr("dscr value was 0x%lx" , dscr_val);
+  }
+  bool change_requested = false;
+  if (DSCR_PPC64 != (uintx)-1) {
+    dscr_val = DSCR_PPC64;
+    change_requested = true;
+  }
+  if (DSCR_DPFD_PPC64 <= 7) {
+    uint64_t mask = 0x7;
+    if ((dscr_val & mask) != DSCR_DPFD_PPC64) {
+      dscr_val = (dscr_val & ~mask) | (DSCR_DPFD_PPC64);
+      change_requested = true;
+    }
+  }
+  if (DSCR_URG_PPC64 <= 7) {
+    uint64_t mask = 0x7 << 6;
+    if ((dscr_val & mask) != DSCR_DPFD_PPC64 << 6) {
+      dscr_val = (dscr_val & ~mask) | (DSCR_URG_PPC64 << 6);
+      change_requested = true;
+    }
+  }
+  if (change_requested) {
+    (*set_dscr)(dscr_val);
+    if (Verbose) {
+      tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)());
+    }
+  }
+}
 
 static int saved_features = 0;
 
diff --git a/src/cpu/ppc/vm/vm_version_ppc.hpp b/src/cpu/ppc/vm/vm_version_ppc.hpp
--- a/src/cpu/ppc/vm/vm_version_ppc.hpp
+++ b/src/cpu/ppc/vm/vm_version_ppc.hpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,10 @@
     popcntw,
     fcfids,
     vand,
-    dcba,
+    lqarx,
+    vcipher,
+    vpmsumb,
+    tcheck,
     num_features // last entry to count features
   };
   enum Feature_Flag_Set {
@@ -55,7 +58,10 @@
     popcntw_m             = (1 << popcntw),
     fcfids_m              = (1 << fcfids ),
     vand_m                = (1 << vand   ),
-    dcba_m                = (1 << dcba   ),
+    lqarx_m               = (1 << lqarx  ),
+    vcipher_m             = (1 << vcipher),
+    vpmsumb_m             = (1 << vpmsumb),
+    tcheck_m              = (1 << tcheck ),
     all_features_m        = -1
   };
   static int  _features;
@@ -65,12 +71,16 @@
 
   static void print_features();
   static void determine_features(); // also measures cache line size
+  static void config_dscr(); // Power 8: Configure Data Stream Control Register.
   static void determine_section_size();
   static void power6_micro_bench();
 public:
   // Initialization
   static void initialize();
 
+  // Override Abstract_VM_Version implementation
+  static bool use_biased_locking();
+
   static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
   // CPU instruction support
   static bool has_fsqrt()   { return (_features & fsqrt_m) != 0; }
@@ -82,7 +92,10 @@
   static bool has_popcntw() { return (_features & popcntw_m) != 0; }
   static bool has_fcfids()  { return (_features & fcfids_m) != 0; }
   static bool has_vand()    { return (_features & vand_m) != 0; }
-  static bool has_dcba()    { return (_features & dcba_m) != 0; }
+  static bool has_lqarx()   { return (_features & lqarx_m) != 0; }
+  static bool has_vcipher() { return (_features & vcipher_m) != 0; }
+  static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; }
+  static bool has_tcheck()  { return (_features & tcheck_m) != 0; }
 
   static const char* cpu_features() { return _features_str; }
 
diff --git a/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp b/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp
--- a/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp
+++ b/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,7 +24,6 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "code/vtableStubs.hpp"
 #include "interp_masm_ppc_64.hpp"