package org.openjdk; import org.openjdk.jmh.annotations.*; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Random; import java.util.concurrent.TimeUnit; @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(3) @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) @State(Scope.Thread) public class SHA256Bench { private MessageDigest md; private byte[] src; @Param("SHA-256") String algo; @Param({"16", "64", "256", "1024", "8192"}) int size; @Setup public void setup() throws NoSuchAlgorithmException { md = MessageDigest.getInstance(algo); src = new byte[size]; } @Setup(Level.Iteration) public void shake() { new Random().nextBytes(src); } @Benchmark public byte[] test() { md.reset(); md.update(src); return md.digest(); } /* i7-4790K (Haswell) 4.0 GHz, Linux x86_64 Reference point -- OpenSSL: $ openssl speed sha256 Doing sha256 for 3s on 16 size blocks: 15846169 sha256's in 3.00s Doing sha256 for 3s on 64 size blocks: 8707343 sha256's in 3.00s Doing sha256 for 3s on 256 size blocks: 4293698 sha256's in 3.00s Doing sha256 for 3s on 1024 size blocks: 1351880 sha256's in 3.00s Doing sha256 for 3s on 8192 size blocks: 182838 sha256's in 3.00s OpenSSL 1.0.2g 1 Mar 2016 built on: reproducible build, date unspecified options:bn(64,64) rc4(16x,int) des(idx,cisc,16,int) aes(partial) blowfish(idx) The 'numbers' are in 1000s of bytes per second processed. type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes sha256 84512.90k 185756.65k 366395.56k 461441.71k 499269.63k Note that OpenSSL computes the number of rounds per _three seconds_. Rated to ops/sec, OpenSSL does: 16-size: 5282056 ops/sec 64-size: 2902447 ops/sec 256-size: 1431232 ops/sec 1024-size: 450626 ops/sec 8192-size: 60946 ops/sec Now, to the JCA digests: JDK 8u111: Benchmark (algo) (size) Mode Cnt Score Error Units SHA256Bench.test SHA-256 16 thrpt 15 2734800.908 ± 81373.885 ops/s SHA256Bench.test SHA-256 64 thrpt 15 1496200.202 ± 23691.981 ops/s SHA256Bench.test SHA-256 256 thrpt 15 623124.510 ± 3054.716 ops/s SHA256Bench.test SHA-256 1024 thrpt 15 187464.852 ± 97.242 ops/s SHA256Bench.test SHA-256 8192 thrpt 15 24757.347 ± 568.135 ops/s JDK 9b149: Benchmark (algo) (size) Mode Cnt Score Error Units SHA256Bench.test SHA-256 16 thrpt 15 4570336.758 ± 117494.786 ops/s SHA256Bench.test SHA-256 64 thrpt 15 2453077.909 ± 21029.204 ops/s SHA256Bench.test SHA-256 256 thrpt 15 1135474.577 ± 3667.487 ops/s SHA256Bench.test SHA-256 1024 thrpt 15 379731.026 ± 1125.489 ops/s SHA256Bench.test SHA-256 8192 thrpt 15 53049.187 ± 215.312 ops/s So, JDK 8u111 is roughly 0.40x-0.51x of OpenSSL perf, and JDK 9b149 is 0.86x-0.87x of OpenSSL perf. The cause for JDK improvement is SHA-256 intrinsics, implemented as https://bugs.openjdk.java.net/browse/JDK-8154495 (note there is a HW accelerated version with Intel SHA -- https://bugs.openjdk.java.net/browse/JDK-8150767 -- not available for the CPU under test). Turning that off with -XX:+UnlockDiagnosticVMOptions -XX:-UseSHA256Intrinsics returns performance to JDK 8 levels: Benchmark (algo) (size) Mode Cnt Score Error Units SHA256Bench.test SHA-256 16 thrpt 15 2926996.463 ± 15871.951 ops/s SHA256Bench.test SHA-256 64 thrpt 15 1558658.097 ± 9394.061 ops/s SHA256Bench.test SHA-256 256 thrpt 15 623332.276 ± 3285.403 ops/s SHA256Bench.test SHA-256 1024 thrpt 15 183552.956 ± 5838.000 ops/s SHA256Bench.test SHA-256 8192 thrpt 15 24755.823 ± 22.784 ops/s A brief look into generated code reveals JDK 9 code indeed uses AVX2 for computing, and perfnorm suggests we are peaking the cycles-per-instruction for this kind of workload. Maybe a little shorter insn stream in AVX2 SHA-256 implementation would close the remaining gap against OpenSSL: Benchmark (algo) (size) Mode Cnt Score Error Units SHA256Bench.test SHA-256 8192 thrpt 5 52971.711 ± 3268.504 ops/s SHA256Bench.test:CPI SHA-256 8192 thrpt 0.287 #/op SHA256Bench.test:L1-dcache-load-misses SHA-256 8192 thrpt 8.193 #/op SHA256Bench.test:L1-dcache-loads SHA-256 8192 thrpt 11469.457 #/op SHA256Bench.test:L1-dcache-stores SHA-256 8192 thrpt 2354.057 #/op SHA256Bench.test:L1-icache-load-misses SHA-256 8192 thrpt 0.548 #/op SHA256Bench.test:LLC-load-misses SHA-256 8192 thrpt 0.114 #/op SHA256Bench.test:LLC-loads SHA-256 8192 thrpt 0.928 #/op SHA256Bench.test:LLC-store-misses SHA-256 8192 thrpt 0.036 #/op SHA256Bench.test:LLC-stores SHA-256 8192 thrpt 0.074 #/op SHA256Bench.test:branch-misses SHA-256 8192 thrpt 13.905 #/op SHA256Bench.test:branches SHA-256 8192 thrpt 1718.328 #/op SHA256Bench.test:cycles SHA-256 8192 thrpt 74933.608 #/op SHA256Bench.test:dTLB-load-misses SHA-256 8192 thrpt 0.031 #/op SHA256Bench.test:dTLB-loads SHA-256 8192 thrpt 11487.683 #/op SHA256Bench.test:dTLB-store-misses SHA-256 8192 thrpt 0.018 #/op SHA256Bench.test:dTLB-stores SHA-256 8192 thrpt 2360.639 #/op SHA256Bench.test:iTLB-load-misses SHA-256 8192 thrpt 0.073 #/op SHA256Bench.test:iTLB-loads SHA-256 8192 thrpt 0.021 #/op SHA256Bench.test:instructions SHA-256 8192 thrpt 261543.268 #/op */ }