package org.openjdk; import org.openjdk.jmh.annotations.*; import java.util.concurrent.TimeUnit; @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(5) @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) public class PredictedCompareTo { private long a, b; @Setup public void setup() { a = 1L; b = 2L; } @Benchmark @CompilerControl(CompilerControl.Mode.DONT_INLINE) public int const_const() { return Long.compare(1L, 2L); } @Benchmark @CompilerControl(CompilerControl.Mode.DONT_INLINE) public int field_const() { return Long.compare(a, 2L); } @Benchmark @CompilerControl(CompilerControl.Mode.DONT_INLINE) public int const_field() { return Long.compare(1L, b); } @Benchmark @CompilerControl(CompilerControl.Mode.DONT_INLINE) public int field_field() { return Long.compare(a, b); } /* === const_const Benchmark Mode Cnt Score Error Units # Baseline PredictedCompareTo.const_const avgt 25 2.875 ± 0.002 ns/op # Patched PredictedCompareTo.const_const avgt 25 2.876 ± 0.002 ns/op Baseline: 6.22% 6.28% 0x00007f1e155f7700: sub $0x18,%rsp 2.36% 2.00% 0x00007f1e155f7707: mov %rbp,0x10(%rsp) 0.01% 0x00007f1e155f770c: mov $0xffffffff,%eax 6.18% 5.75% 0x00007f1e155f7711: add $0x10,%rsp 0x00007f1e155f7715: pop %rbp 17.83% 23.77% 0x00007f1e155f7716: test %eax,0x12efd8e4(%rip) 0.01% 0x00007f1e155f771c: retq Patched: 6.68% 7.26% 0x00007f5ffc323200: sub $0x18,%rsp 2.08% 2.24% 0x00007f5ffc323207: mov %rbp,0x10(%rsp) 0.01% 0x00007f5ffc32320c: mov $0xffffffff,%eax 6.83% 5.67% 0x00007f5ffc323211: add $0x10,%rsp 0x00007f5ffc323215: pop %rbp 17.16% 23.31% 0x00007f5ffc323216: test %eax,0x11e1ede4(%rip) 0x00007f5ffc32321c: retq The generated code is the same. === const_field: Benchmark Mode Cnt Score Error Units # Baseline PredictedCompareTo.const_field avgt 25 3.215 ± 0.007 ns/op # Patched PredictedCompareTo.const_field avgt 25 3.395 ± 0.006 ns/op Baseline: 0.01% 0x00007f86455f6208: sub $0x20,%rsp 1.45% 1.87% 0x00007f86455f620c: mov 0x18(%rsi),%r10 6.22% 5.80% 0x00007f86455f6210: cmp $0x1,%r10 0.01% ╭ 0x00007f86455f6214: jle 0x00007f86455f6227 │ 0x00007f86455f6216: mov $0xffffffff,%eax 1.82% 2.01% │ 0x00007f86455f621b: add $0x20,%rsp │ 0x00007f86455f621f: pop %rbp 8.36% 10.26% │ 0x00007f86455f6220: test %eax,0x12393dda(%rip) 0.03% 0.03% │ 0x00007f86455f6226: retq Patched: 6.14% 0.53% 0x00007fad44b85000: sub $0x18,%rsp 1.19% 1.15% 0x00007fad44b85007: mov %rbp,0x10(%rsp) 0x00007fad44b8500c: mov 0x18(%rsi),%r10 6.09% 6.86% 0x00007fad44b85010: mov $0x1,%r11d 0x00007fad44b85016: cmp %r10,%r11 <---- why not $0x1 exactly? 1.38% 1.11% 0x00007fad44b85019: mov $0xffffffff,%eax ╭ 0x00007fad44b8501e: jl 0x00007fad44b85026 │ 0x00007fad44b85020: setne %al │ 0x00007fad44b85023: movzbl %al,%eax 6.00% 8.08% ↘ 0x00007fad44b85026: add $0x10,%rsp 0x00007fad44b8502a: pop %rbp 17.13% 23.78% 0x00007fad44b8502b: test %eax,0x11e93fcf(%rip) 0x00007fad44b85031: retq The patched version has more instructions on a hotpath, which explains a bit of the performance degradation. The baseline version also enjoys the branch profiling information to lay out the code on straight path. === field_const Benchmark Mode Cnt Score Error Units # Baseline PredictedCompareTo.field_const avgt 25 3.207 ± 0.006 ns/op # Patched PredictedCompareTo.field_const avgt 25 3.394 ± 0.005 ns/op Baseline: 0.03% 0x00007f4aa95f9a88: sub $0x20,%rsp 1.89% 1.91% 0x00007f4aa95f9a8c: mov 0x10(%rsi),%r10 5.55% 5.71% 0x00007f4aa95f9a90: cmp $0x2,%r10 ╭ 0x00007f4aa95f9a94: jge 0x00007f4aa95f9aa7 │ 0x00007f4aa95f9a96: mov $0xffffffff,%eax 1.90% 2.40% │ 0x00007f4aa95f9a9b: add $0x20,%rsp │ 0x00007f4aa95f9a9f: pop %rbp 8.96% 9.92% │ 0x00007f4aa95f9aa0: test %eax,0x1474f55a(%rip) 0.03% │ 0x00007f4aa95f9aa6: retq Patched: 5.98% 0.55% 0x00007fe0e096a000: sub $0x18,%rsp 1.51% 1.22% 0x00007fe0e096a007: mov %rbp,0x10(%rsp) 0x00007fe0e096a00c: mov 0x10(%rsi),%r10 5.50% 6.46% 0x00007fe0e096a010: mov $0x2,%r11d 0x00007fe0e096a016: cmp %r11,%r10 <---- why not $0x2 exactly? 1.34% 1.46% 0x00007fe0e096a019: mov $0xffffffff,%eax 0.01% ╭ 0x00007fe0e096a01e: jl 0x00007fe0e096a026 │ 0x00007fe0e096a020: setne %al │ 0x00007fe0e096a023: movzbl %al,%eax 5.57% 7.81% ↘ 0x00007fe0e096a026: add $0x10,%rsp 0x00007fe0e096a02a: pop %rbp 17.89% 23.36% 0x00007fe0e096a02b: test %eax,0x11f44fcf(%rip) 0x00007fe0e096a031: retq The patched version has more instructions on a hotpath, which explains a bit of the performance degradation. The baseline version also enjoys the branch profiling information to lay out the code on straight path. === field_field Benchmark Mode Cnt Score Error Units # Baseline PredictedCompareTo.field_field avgt 25 3.182 ± 0.008 ns/op # Patched PredictedCompareTo.field_field avgt 25 3.128 ± 0.001 ns/op Baseline: 1.71% 0.86% 0x00007fd328b84d88: sub $0x20,%rsp 1.19% 0.91% 0x00007fd328b84d8c: mov 0x18(%rsi),%r10 4.85% 4.10% 0x00007fd328b84d90: mov 0x10(%rsi),%r11 1.56% 0.75% 0x00007fd328b84d94: cmp %r10,%r11 ╭ 0x00007fd328b84d97: jge 0x00007fd328b84daa 1.01% 1.01% │ 0x00007fd328b84d99: mov $0xffffffff,%eax 0.37% 0.46% │ 0x00007fd328b84d9e: add $0x20,%rsp 4.53% 4.70% │ 0x00007fd328b84da2: pop %rbp 1.34% 0.35% │ 0x00007fd328b84da3: test %eax,0x11e36257(%rip) 0.81% 1.36% │ 0x00007fd328b84da9: retq Patched: 3.73% 3.11% 0x00007f54f15f7680: sub $0x18,%rsp 4.62% 4.97% 0x00007f54f15f7687: mov %rbp,0x10(%rsp) 0x00007f54f15f768c: mov 0x18(%rsi),%r10 3.96% 3.62% 0x00007f54f15f7690: mov 0x10(%rsi),%r11 0x00007f54f15f7694: cmp %r10,%r11 4.34% 3.80% 0x00007f54f15f7697: mov $0xffffffff,%eax ╭ 0x00007f54f15f769c: jl 0x00007f54f15f76a4 │ 0x00007f54f15f769e: setne %al │ 0x00007f54f15f76a1: movzbl %al,%eax 3.66% 4.14% ↘ 0x00007f54f15f76a4: add $0x10,%rsp 0x00007f54f15f76a8: pop %rbp 9.41% 8.72% 0x00007f54f15f76a9: test %eax,0x1453c951(%rip) 0x00007f54f15f76af: retq */ }