--- old/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	2014-08-22 15:27:32.040109035 -0700
+++ new/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	2014-08-22 15:27:31.948109038 -0700
@@ -4480,31 +4480,40 @@
 }
 
 HeapWord* G1CollectedHeap::par_allocate_during_gc(GCAllocPurpose purpose,
-                                                  size_t word_size) {
+                                                  size_t word_size, 
+                                                  oop const old,
+                                                  uint age) {
+  
+  HeapWord* result = NULL;
+  
   if (purpose == GCAllocForSurvived) {
-    HeapWord* result = survivor_attempt_allocation(word_size);
+    result = survivor_attempt_allocation(word_size);
     if (result != NULL) {
-      return result;
+      _g1h->_gc_tracer_stw->report_promotion_to_new_plab(old, age, false, word_size);
     } else {
       // Let's try to allocate in the old gen in case we can fit the
       // object there.
-      return old_attempt_allocation(word_size);
+      result = old_attempt_allocation(word_size);
+      if (result != NULL) {
+        _g1h->_gc_tracer_stw->report_promotion_to_new_plab(old, age, true, word_size);
+      }
     }
   } else {
     assert(purpose ==  GCAllocForTenured, "sanity");
-    HeapWord* result = old_attempt_allocation(word_size);
+    result = old_attempt_allocation(word_size);
     if (result != NULL) {
-      return result;
+      _g1h->_gc_tracer_stw->report_promotion_to_new_plab(old, age, true, word_size);
     } else {
       // Let's try to allocate in the survivors in case we can fit the
       // object there.
-      return survivor_attempt_allocation(word_size);
+      result = survivor_attempt_allocation(word_size);
+      if (result != NULL) {
+        _g1h->_gc_tracer_stw->report_promotion_to_new_plab(old, age, false, word_size);
+      }
     }
   }
 
-  ShouldNotReachHere();
-  // Trying to keep some compilers happy.
-  return NULL;
+  return result;
 }
 
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
--- old/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	2014-08-22 15:27:32.432109022 -0700
+++ new/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	2014-08-22 15:27:32.348109024 -0700
@@ -611,7 +611,8 @@
   // allocation region, either by picking one or expanding the
   // heap, and then allocate a block of the given size. The block
   // may not be a humongous - it must fit into a single heap region.
-  HeapWord* par_allocate_during_gc(GCAllocPurpose purpose, size_t word_size);
+  HeapWord* par_allocate_during_gc(GCAllocPurpose purpose, size_t word_size,
+                                   oop const old, uint age);
 
   HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose,
                                     HeapRegion*    alloc_region,
--- old/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	2014-08-22 15:27:32.760109010 -0700
+++ new/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	2014-08-22 15:27:32.676109013 -0700
@@ -160,11 +160,11 @@
          (!from_region->is_young() && young_index == 0), "invariant" );
   G1CollectorPolicy* g1p = _g1h->g1_policy();
   markOop m = old->mark();
-  int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
+  uint age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
                                            : m->age();
   GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
                                                              word_sz);
-  HeapWord* obj_ptr = allocate(alloc_purpose, word_sz);
+  HeapWord* obj_ptr = allocate(alloc_purpose, old, age);
 #ifndef PRODUCT
   // Should this evacuation fail?
   if (_g1h->evacuation_should_fail()) {
@@ -252,15 +252,17 @@
   return obj;
 }
 
-HeapWord* G1ParScanThreadState::allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
+HeapWord* G1ParScanThreadState::allocate_slow(GCAllocPurpose purpose, 
+                                              oop const old, uint age) {
   HeapWord* obj = NULL;
+  size_t word_sz = old->size();
   size_t gclab_word_size = _g1h->desired_plab_sz(purpose);
   if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
     G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
     add_to_alloc_buffer_waste(alloc_buf->words_remaining());
     alloc_buf->retire(false /* end_of_gc */, false /* retain */);
 
-    HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size);
+    HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size, old, age);
     if (buf == NULL) {
       return NULL; // Let caller handle allocation failure.
     }
@@ -271,7 +273,7 @@
     obj = alloc_buf->allocate(word_sz);
     assert(obj != NULL, "buffer was definitely big enough...");
   } else {
-    obj = _g1h->par_allocate_during_gc(purpose, word_sz);
+    obj = _g1h->par_allocate_during_gc(purpose, word_sz, old, age);
   }
   return obj;
 }
@@ -287,8 +289,9 @@
   }
 }
 
-HeapWord* G1ParScanThreadState::allocate(GCAllocPurpose purpose, size_t word_sz) {
+HeapWord* G1ParScanThreadState::allocate(GCAllocPurpose purpose, oop const old, uint age) {
   HeapWord* obj = NULL;
+  size_t word_sz = old->size();
   if (purpose == GCAllocForSurvived) {
     obj = alloc_buffer(GCAllocForSurvived)->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
   } else {
@@ -297,7 +300,7 @@
   if (obj != NULL) {
     return obj;
   }
-  return allocate_slow(purpose, word_sz);
+  return allocate_slow(purpose, old, age);
 }
 
 void G1ParScanThreadState::retire_alloc_buffers() {
--- old/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp	2014-08-22 15:27:33.068108999 -0700
+++ new/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp	2014-08-22 15:27:32.984109002 -0700
@@ -128,8 +128,8 @@
 
  private:
 
-  inline HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz);
-  inline HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz);
+  inline HeapWord* allocate(GCAllocPurpose purpose, oop const old, uint age);
+  inline HeapWord* allocate_slow(GCAllocPurpose purpose, oop const old, uint age);
   inline void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz);
 
  public:
--- old/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	2014-08-22 15:27:33.376108989 -0700
+++ new/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	2014-08-22 15:27:33.292108992 -0700
@@ -69,12 +69,14 @@
                                        ObjToScanQueueSet* work_queue_set_,
                                        Stack<oop, mtGC>* overflow_stacks_,
                                        size_t desired_plab_sz_,
+                                       ParNewTracer* gc_tracer,
                                        ParallelTaskTerminator& term_) :
   _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_),
   _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false),
   _overflow_stack(overflow_stacks_ ? overflow_stacks_ + thread_num_ : NULL),
   _ageTable(false), // false ==> not the global age table, no perf data.
   _to_space_alloc_buffer(desired_plab_sz_),
+  _gc_tracer(gc_tracer),
   _to_space_closure(gen_, this), _old_gen_closure(gen_, this),
   _to_space_root_closure(gen_, this), _old_gen_root_closure(gen_, this),
   _older_gen_closure(gen_, this),
@@ -222,7 +224,7 @@
   assert(young_gen()->overflow_list() == NULL, "Error");
 }
 
-HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) {
+HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz, oop const old, uint age) {
 
   // Otherwise, if the object is small enough, try to reallocate the
   // buffer.
@@ -252,6 +254,7 @@
         plab->set_word_size(buf_size);
         plab->set_buf(buf_space);
         record_survivor_plab(buf_space, buf_size);
+        gc_tracer()->report_promotion_to_new_plab(old, age, false, buf_size);
         obj = plab->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
         // Note that we cannot compare buf_size < word_sz below
         // because of AlignmentReserve (see ParGCAllocBuffer::allocate()).
@@ -267,6 +270,7 @@
 
     } else {
       // Too large; allocate the object individually.
+      gc_tracer()->report_promotion_to_new_plab(old, age, false, old->size());
       obj = sp->par_allocate(word_sz);
     }
   }
@@ -303,6 +307,7 @@
                         ObjToScanQueueSet&      queue_set,
                         Stack<oop, mtGC>*       overflow_stacks_,
                         size_t                  desired_plab_sz,
+                        ParNewTracer*           gc_tracer,
                         ParallelTaskTerminator& term);
 
   ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); }
@@ -337,7 +342,8 @@
   int num_threads, Space& to_space, ParNewGeneration& gen,
   Generation& old_gen, ObjToScanQueueSet& queue_set,
   Stack<oop, mtGC>* overflow_stacks,
-  size_t desired_plab_sz, ParallelTaskTerminator& term)
+  size_t desired_plab_sz,
+  ParNewTracer* gc_tracer, ParallelTaskTerminator& term)
   : ResourceArray(sizeof(ParScanThreadState), num_threads),
     _gen(gen), _next_gen(old_gen), _term(term)
 {
@@ -348,7 +354,7 @@
   for (int i = 0; i < num_threads; ++i) {
     new ((ParScanThreadState*)_data + i)
         ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set,
-                           overflow_stacks, desired_plab_sz, term);
+                           overflow_stacks, desired_plab_sz, gc_tracer, term);
   }
 }
 
@@ -980,7 +986,7 @@
   ParallelTaskTerminator _term(n_workers, task_queues());
   ParScanThreadStateSet thread_state_set(workers->active_workers(),
                                          *to(), *this, *_next_gen, *task_queues(),
-                                         _overflow_stacks, desired_plab_sz(), _term);
+                                         _overflow_stacks, desired_plab_sz(), &gc_tracer, _term);
 
   ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set);
   gch->set_par_threads(n_workers);
@@ -1180,7 +1186,7 @@
 
   // Try allocating obj in to-space (unless too old)
   if (dummyOld.age() < tenuring_threshold()) {
-    new_obj = (oop)par_scan_state->alloc_in_to_space(sz);
+    new_obj = (oop)par_scan_state->alloc_in_to_space(sz, old, dummyOld.age());
     if (new_obj == NULL) {
       set_survivor_overflow(true);
     }
@@ -1307,7 +1313,7 @@
 
   // Try allocating obj in to-space (unless too old)
   if (dummyOld.age() < tenuring_threshold()) {
-    new_obj = (oop)par_scan_state->alloc_in_to_space(sz);
+    new_obj = (oop)par_scan_state->alloc_in_to_space(sz, old, dummyOld.age());
     if (new_obj == NULL) {
       set_survivor_overflow(true);
     }
--- old/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	2014-08-22 15:27:33.708108977 -0700
+++ new/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	2014-08-22 15:27:33.624108980 -0700
@@ -95,6 +95,9 @@
 
   HeapWord *_young_old_boundary;
 
+  ParNewTracer* _gc_tracer;
+  ParNewTracer* gc_tracer() { return _gc_tracer; }
+
   int _hash_seed;
   int _thread_num;
   ageTable _ageTable;
@@ -132,6 +135,7 @@
                      ObjToScanQueueSet* work_queue_set_,
                      Stack<oop, mtGC>* overflow_stacks_,
                      size_t desired_plab_sz_,
+                     ParNewTracer* gc_tracer,
                      ParallelTaskTerminator& term_);
 
  public:
@@ -165,12 +169,12 @@
   int  thread_num() { return _thread_num; }
 
   // Allocate a to-space block of size "sz", or else return NULL.
-  HeapWord* alloc_in_to_space_slow(size_t word_sz);
+  HeapWord* alloc_in_to_space_slow(size_t word_sz, oop const old, uint age);
 
-  HeapWord* alloc_in_to_space(size_t word_sz) {
+  HeapWord* alloc_in_to_space(size_t word_sz, oop const old, uint age) {
     HeapWord* obj = to_space_alloc_buffer()->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
     if (obj != NULL) return obj;
-    else return alloc_in_to_space_slow(word_sz);
+    else return alloc_in_to_space_slow(word_sz, old, age);
   }
 
   HeapWord* young_old_boundary() { return _young_old_boundary; }
--- old/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	2014-08-22 15:27:34.016108966 -0700
+++ new/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	2014-08-22 15:27:33.932108969 -0700
@@ -84,12 +84,11 @@
   if (!test_mark->is_marked()) {
     bool new_obj_is_tenured = false;
     size_t new_obj_size = o->size();
+    // Find the objects age, MT safe.
+    uint age = (test_mark->has_displaced_mark_helper() /* o->has_displaced_mark() */) ?
+      test_mark->displaced_mark_helper()->age() : test_mark->age();
 
     if (!promote_immediately) {
-      // Find the objects age, MT safe.
-      uint age = (test_mark->has_displaced_mark_helper() /* o->has_displaced_mark() */) ?
-        test_mark->displaced_mark_helper()->age() : test_mark->age();
-
       // Try allocating obj in to-space (unless too old)
       if (age < PSScavenge::tenuring_threshold()) {
         new_obj = (oop) _young_lab.allocate(new_obj_size);
@@ -97,6 +96,7 @@
           // Do we allocate directly, or flush and refill?
           if (new_obj_size > (YoungPLABSize / 2)) {
             // Allocate this object directly
+            PSScavenge::_gc_tracer.report_promotion_to_new_plab(o, age, false, new_obj_size);
             new_obj = (oop)young_space()->cas_allocate(new_obj_size);
           } else {
             // Flush and fill
@@ -106,6 +106,7 @@
             if (lab_base != NULL) {
               _young_lab.initialize(MemRegion(lab_base, YoungPLABSize));
               // Try the young lab allocation again.
+              PSScavenge::_gc_tracer.report_promotion_to_new_plab(o, age, false, _young_lab.capacity());
               new_obj = (oop) _young_lab.allocate(new_obj_size);
             } else {
               _young_gen_is_full = true;
@@ -131,6 +132,7 @@
           // Do we allocate directly, or flush and refill?
           if (new_obj_size > (OldPLABSize / 2)) {
             // Allocate this object directly
+            PSScavenge::_gc_tracer.report_promotion_to_new_plab(o, age, true, new_obj_size);
             new_obj = (oop)old_gen()->cas_allocate(new_obj_size);
           } else {
             // Flush and fill
@@ -146,6 +148,7 @@
               }
 #endif
               _old_lab.initialize(MemRegion(lab_base, OldPLABSize));
+              PSScavenge::_gc_tracer.report_promotion_to_new_plab(o, age, true, _old_lab.capacity());
               // Try the old lab allocation again.
               new_obj = (oop) _old_lab.allocate(new_obj_size);
             }
--- old/src/share/vm/gc_implementation/shared/gcTrace.cpp	2014-08-22 15:27:34.324108956 -0700
+++ new/src/share/vm/gc_implementation/shared/gcTrace.cpp	2014-08-22 15:27:34.240108959 -0700
@@ -31,9 +31,11 @@
 #include "gc_implementation/shared/objectCountEventSender.hpp"
 #include "memory/heapInspection.hpp"
 #include "memory/referenceProcessorStats.hpp"
+#include "runtime/handles.hpp"
 #include "runtime/os.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/ticks.inline.hpp"
+#include "trace/tracing.hpp"
 
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/g1/evacuationInfo.hpp"
@@ -172,6 +174,32 @@
   _tenuring_threshold = tenuring_threshold;
 }
 
+void YoungGCTracer::report_promotion_to_new_plab(oop const old, uint age, bool tenured, size_t plab_size) {
+
+  EventPromotionSample event;
+  
+  if (event.should_commit()) {
+    event.set_gcId(_shared_gc_info.gc_id().id());
+    event.set_class(KlassHandle(old->klass())());
+    event.set_objectSize((size_t)old->size());
+    event.set_tenured(tenured);
+    event.set_age(age);
+
+    // If the PLAB size is the same as the object size the copy was done directly 
+    // to the heap instead of allocating a new PLAB to copy the object to
+    if ((size_t)old->size() == plab_size) {
+      event.set_directAllocation(true);
+      // Set plabSize to 0 as it otherwise will be a random value
+      event.set_plabSize(0);
+    } else {
+      event.set_directAllocation(false);
+      event.set_plabSize(plab_size);
+    }
+
+    event.commit();
+  }
+}
+
 void OldGCTracer::report_gc_end_impl(const Ticks& timestamp, TimePartitions* time_partitions) {
   assert_set_gc_id();
 
--- old/src/share/vm/gc_implementation/shared/gcTrace.hpp	2014-08-22 15:27:34.632108945 -0700
+++ new/src/share/vm/gc_implementation/shared/gcTrace.hpp	2014-08-22 15:27:34.548108948 -0700
@@ -157,6 +157,21 @@
   void report_promotion_failed(const PromotionFailedInfo& pf_info);
   void report_tenuring_threshold(const uint tenuring_threshold);
 
+  /*
+   * Generates and commits a Promotion Sample if the trace event is enabled.
+   * 
+   * The object age is always required as it is not certain that the mark word 
+   * can be trusted at this stage.
+   * 
+   * tenured should be true if the object has been promoted to the old 
+   * space during this GC, if the object is copied to survivor space 
+   * from young space or survivor space (aging) tenured should be false.
+   * 
+   * If the plab_size is the same as object size the assumption is that the 
+   * object was directly allocated instead of copied into a PLAB.
+   */
+  void report_promotion_to_new_plab(oop const old, uint age, bool tenured, size_t plab_size);
+
  private:
   void send_young_gc_event() const;
   void send_promotion_failed_event(const PromotionFailedInfo& pf_info) const;
--- old/src/share/vm/trace/trace.xml	2014-08-22 15:27:34.940108934 -0700
+++ new/src/share/vm/trace/trace.xml	2014-08-22 15:27:34.856108937 -0700
@@ -425,6 +425,18 @@
       <value type="CLASS" field="class" label="Class" description="Class of allocated object"/>
       <value type="BYTES64" field="allocationSize" label="Allocation Size"/>
     </event>
+
+    <!-- Promotion event -->
+    <event id="PromotionSample" path="java/promotion_sample" label="Promotion Sample"
+        description="Object was copied to a new Promotion Local Allocation Buffer (PLAB)" has_thread="true" has_stacktrace="false" is_instant="true">
+      <value type="UINT" field="gcId" label="GC ID" relation="GC_ID" description="ID of GC during which the object was promoted."/>
+      <value type="CLASS" field="class" label="Class" description="Class of promoted object."/>
+      <value type="BYTES64" field="objectSize" label="Object Size" description="Size of promoted object."/>
+      <value type="UINT" field="age" label="Object Age" description="Age of promoted object."/>
+      <value type="BOOLEAN" field="tenured" label="Tenured" description="Object was promoted to Old Space."/>
+      <value type="BOOLEAN" field="directAllocation" label="Direct Allocation" description="Object was promoted to directly to heap without using a PLAB."/>
+      <value type="BYTES64" field="plabSize" label="PLAB Size" description="Size of the PLAB allocated as part of the promotion."/>
+    </event>
   </events>
 
   <xi:include href="../../../closed/share/vm/trace/traceeventtypes.xml" xmlns:xi="http://www.w3.org/2001/XInclude">