= ConstructBench @State(Scope.Benchmark) @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(3) public class ConstructBench { @Param({"256"}) int size; @Param({"0", "1", "16", "64", "256"}) int first; private char[] src; @Setup public void setup() { long seed = Integer.getInteger("seed", 1234567890); Random r = new Random(seed); char[] base = new char[size]; for (int c = 0; c < size; c++) { base[c] = (char) ((char)r.nextInt(Byte.MAX_VALUE) & 0x00FF); } src = Arrays.copyOf(base, base.length); if (first < src.length) { src[first] = 0xFF43; } } @Benchmark @CompilerControl(CompilerControl.Mode.DONT_INLINE) public String test() { return new String(src); } } == i7 Haswell Benchmark (first) (size) Mode Cnt Score Error Units # C2, before patch ConstructBench.test 0 256 avgt 15 77.627 ± 2.816 ns/op ConstructBench.test 1 256 avgt 15 75.774 ± 0.703 ns/op ConstructBench.test 16 256 avgt 15 77.545 ± 1.107 ns/op ConstructBench.test 64 256 avgt 15 79.654 ± 0.970 ns/op ConstructBench.test 256 256 avgt 15 30.310 ± 0.433 ns/op # C2, after patch ConstructBench.test 0 256 avgt 15 80.797 ± 2.909 ns/op ConstructBench.test 1 256 avgt 15 78.656 ± 3.406 ns/op ConstructBench.test 16 256 avgt 15 77.074 ± 1.052 ns/op ConstructBench.test 64 256 avgt 15 79.956 ± 1.470 ns/op ConstructBench.test 256 256 avgt 15 30.429 ± 0.444 ns/op # C1, before patch ConstructBench.test 0 256 avgt 15 262.773 ± 2.212 ns/op ConstructBench.test 1 256 avgt 15 266.390 ± 2.185 ns/op ConstructBench.test 16 256 avgt 15 279.047 ± 2.307 ns/op ConstructBench.test 64 256 avgt 15 346.797 ± 3.725 ns/op ConstructBench.test 256 256 avgt 15 355.006 ± 2.171 ns/op # C1, after patch ConstructBench.test 0 256 avgt 15 232.017 ± 2.404 ns/op ConstructBench.test 1 256 avgt 15 237.653 ± 3.621 ns/op ConstructBench.test 16 256 avgt 15 253.138 ± 4.423 ns/op ConstructBench.test 64 256 avgt 15 297.838 ± 3.171 ns/op ConstructBench.test 256 256 avgt 15 291.175 ± 3.025 ns/op C2 generated code is the same before/after: it is covered by intrinsics. C1 generated code: Before: 0.87% 0.70% ↗ 0x00007f3fbce6ee98: mov %rdx,%rdi 13.73% 16.82% │ 0x00007f3fbce6ee9b: inc %edi 1.21% 0.97% │ 0x00007f3fbce6ee9d: movslq %edx,%rbx 0.79% 0.25% │ 0x00007f3fbce6eea0: cmp 0xc(%rsi),%edx │ 0x00007f3fbce6eea3: jae 0x00007f3fbce6ef17 1.62% 1.08% │ 0x00007f3fbce6eea9: movzwl 0x10(%rsi,%rbx,2),%ebx 14.40% 15.97% │ 0x00007f3fbce6eeae: mov %rbx,%rdx ; <--- 0.98% 0.72% │ 0x00007f3fbce6eeb1: shr $0x8,%edx ; <--- 0.98% 1.02% │ 0x00007f3fbce6eeb4: cmp $0x0,%edx ; <--- │ 0x00007f3fbce6eeb7: jne 0x00007f3fbce6eeed 14.05% 17.56% │ 0x00007f3fbce6eebd: mov %r8,%rdx 0.71% 0.53% │ 0x00007f3fbce6eec0: inc %edx 1.27% 0.85% │ 0x00007f3fbce6eec2: movslq %r8d,%r11 0.64% 0.51% │ 0x00007f3fbce6eec5: cmp 0xc(%rcx),%r8d │ 0x00007f3fbce6eec9: jae 0x00007f3fbce6ef2a 14.46% 17.39% │ 0x00007f3fbce6eecf: mov %bl,0x10(%rcx,%r11,1) 1.41% 1.33% │ 0x00007f3fbce6eed4: inc %eax 0.52% 0.64% │ 0x00007f3fbce6eed6: test %eax,0x196b5124(%rip) 1.12% 0.61% │ 0x00007f3fbce6eedc: mov %rdx,%r8 13.40% 10.10% │ 0x00007f3fbce6eedf: mov %rdi,%rdx 0.77% 0.70% │ 0x00007f3fbce6eee2: cmp %r9d,%eax │ 0x00007f3fbce6eee5: jge 0x00007f3fbce6eefe 0.77% 0.42% ╰ 0x00007f3fbce6eeeb: jmp 0x00007f3fbce6ee98 After: 4.75% 4.49% ↗ 0x00007f3999f38398: movslq %edx,%rdi 6.00% 6.18% │ 0x00007f3999f3839b: cmp 0xc(%rsi),%edx 7.73% 7.85% │ 0x00007f3999f383a4: movzwl 0x10(%rsi,%rdi,2),%edi 8.07% 9.11% │ 0x00007f3999f383a9: cmp $0xff,%edi │ 0x00007f3999f383af: jg 0x00007f3999f383df 7.78% 8.42% │ 0x00007f3999f383b5: movslq %r8d,%rbx 5.23% 5.49% │ 0x00007f3999f383b8: cmp 0xc(%rcx),%r8d │ 0x00007f3999f383bc: jae 0x00007f3999f3841c 6.44% 7.17% │ 0x00007f3999f383c2: mov %dil,0x10(%rcx,%rbx,1) 7.69% 7.87% │ 0x00007f3999f383c7: inc %r8d 5.92% 7.94% │ 0x00007f3999f383ca: inc %edx 4.43% 4.76% │ 0x00007f3999f383cc: inc %eax 5.92% 6.53% │ 0x00007f3999f383ce: test %eax,0x19793c2c(%rip) 7.61% 7.95% │ 0x00007f3999f383d4: cmp %r9d,%eax │ 0x00007f3999f383d7: jge 0x00007f3999f383f0 6.81% 6.42% ╰ 0x00007f3999f383dd: jmp 0x00007f3999f38398 = ScanBench @State(Scope.Benchmark) @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(3) public class ScanBench { @Param({"512"}) int size; @Param({"-1", "0", "1", "2", "4", "8", "16", "32", "64", "128", "256"}) int first; private char[] src; @Setup public void setup() { long seed = Integer.getInteger("seed", 1234567890); Random r = new Random(seed); char[] base = new char[size]; for (int c = 0; c < size; c++) { base[c] = (char) ((char)r.nextInt(Byte.MAX_VALUE) & 0x00FF); } src = Arrays.copyOf(base, base.length); if (first >= 0) { src[first] = 0xFF43; } for (char c = Character.MIN_VALUE; c < Character.MAX_VALUE; c++) { int t = c; boolean t1 = t > 0xFF; boolean t2 = (t >>> 8) != 0; boolean t3 = (t & 0xFF00) != 0; if (t1 != t2) throw new IllegalStateException("t1 != t2"); if (t1 != t3) throw new IllegalStateException("t1 != t3"); } } @Benchmark @CompilerControl(CompilerControl.Mode.DONT_INLINE) public char[] test1() { char[] src = this.src; for (char c : src) { if (c > 0xFF) return null; } return src; } @Benchmark @CompilerControl(CompilerControl.Mode.DONT_INLINE) public char[] test2() { char[] src = this.src; for (char c : src) { if ((c >>> 8) != 0) return null; } return src; } @Benchmark @CompilerControl(CompilerControl.Mode.DONT_INLINE) public char[] test3() { char[] src = this.src; for (char c : src) { if ((c & 0xFF00) != 0) return null; } return src; } } == Atom === C1: Benchmark (first) (size) Mode Cnt Score Error Units ScanBench.test1 -1 512 avgt 15 2298.066 ± 2.325 ns/op ScanBench.test1 0 512 avgt 15 37.824 ± 0.024 ns/op ScanBench.test1 1 512 avgt 15 41.636 ± 0.034 ns/op ScanBench.test1 2 512 avgt 15 45.413 ± 0.040 ns/op ScanBench.test1 4 512 avgt 15 52.983 ± 0.028 ns/op ScanBench.test1 8 512 avgt 15 76.936 ± 0.049 ns/op ScanBench.test1 16 512 avgt 15 109.751 ± 0.090 ns/op ScanBench.test1 32 512 avgt 15 180.391 ± 0.150 ns/op ScanBench.test1 64 512 avgt 15 321.659 ± 0.195 ns/op ScanBench.test1 128 512 avgt 15 604.086 ± 0.437 ns/op ScanBench.test1 256 512 avgt 15 1169.456 ± 1.563 ns/op ScanBench.test2 -1 512 avgt 15 2304.609 ± 2.085 ns/op ScanBench.test2 0 512 avgt 15 38.438 ± 0.021 ns/op ScanBench.test2 1 512 avgt 15 42.867 ± 0.020 ns/op ScanBench.test2 2 512 avgt 15 47.285 ± 0.033 ns/op ScanBench.test2 4 512 avgt 15 56.119 ± 0.051 ns/op ScanBench.test2 8 512 avgt 15 82.574 ± 0.078 ns/op ScanBench.test2 16 512 avgt 15 117.876 ± 0.050 ns/op ScanBench.test2 32 512 avgt 15 188.577 ± 0.208 ns/op ScanBench.test2 64 512 avgt 15 329.736 ± 0.243 ns/op ScanBench.test2 128 512 avgt 15 612.332 ± 0.665 ns/op ScanBench.test2 256 512 avgt 15 1177.385 ± 1.092 ns/op ScanBench.test3 -1 512 avgt 15 2619.872 ± 3.663 ns/op ScanBench.test3 0 512 avgt 15 38.448 ± 0.019 ns/op ScanBench.test3 1 512 avgt 15 42.882 ± 0.035 ns/op ScanBench.test3 2 512 avgt 15 47.292 ± 0.031 ns/op ScanBench.test3 4 512 avgt 15 56.141 ± 0.033 ns/op ScanBench.test3 8 512 avgt 15 82.032 ± 0.129 ns/op ScanBench.test3 16 512 avgt 15 120.441 ± 0.069 ns/op ScanBench.test3 32 512 avgt 15 201.249 ± 0.258 ns/op ScanBench.test3 64 512 avgt 15 362.703 ± 0.260 ns/op ScanBench.test3 128 512 avgt 15 685.620 ± 0.455 ns/op ScanBench.test3 256 512 avgt 15 1331.644 ± 0.854 ns/op === C2 Benchmark (first) (size) Mode Cnt Score Error Units ScanBench.test1 -1 512 avgt 15 1198.498 ± 3.921 ns/op ScanBench.test1 0 512 avgt 15 34.050 ± 0.024 ns/op ScanBench.test1 1 512 avgt 15 43.530 ± 0.079 ns/op ScanBench.test1 2 512 avgt 15 47.923 ± 0.046 ns/op ScanBench.test1 4 512 avgt 15 47.508 ± 3.294 ns/op ScanBench.test1 8 512 avgt 15 55.502 ± 0.039 ns/op ScanBench.test1 16 512 avgt 15 83.480 ± 5.394 ns/op ScanBench.test1 32 512 avgt 15 111.594 ± 0.050 ns/op ScanBench.test1 64 512 avgt 15 182.173 ± 0.071 ns/op ScanBench.test1 128 512 avgt 15 303.205 ± 0.179 ns/op ScanBench.test1 256 512 avgt 15 565.962 ± 0.778 ns/op ScanBench.test2 -1 512 avgt 15 1191.342 ± 0.858 ns/op ScanBench.test2 0 512 avgt 15 34.657 ± 0.015 ns/op ScanBench.test2 1 512 avgt 15 44.756 ± 0.061 ns/op ScanBench.test2 2 512 avgt 15 49.787 ± 0.024 ns/op ScanBench.test2 4 512 avgt 15 48.945 ± 0.324 ns/op ScanBench.test2 8 512 avgt 15 72.478 ± 5.125 ns/op ScanBench.test2 16 512 avgt 15 92.212 ± 1.460 ns/op ScanBench.test2 32 512 avgt 15 127.963 ± 0.129 ns/op ScanBench.test2 64 512 avgt 15 209.058 ± 0.719 ns/op ScanBench.test2 128 512 avgt 15 331.321 ± 2.593 ns/op ScanBench.test2 256 512 avgt 15 611.992 ± 0.168 ns/op ScanBench.test3 -1 512 avgt 15 1204.217 ± 3.044 ns/op ScanBench.test3 0 512 avgt 15 34.654 ± 0.014 ns/op ScanBench.test3 1 512 avgt 15 43.494 ± 0.040 ns/op ScanBench.test3 2 512 avgt 15 47.893 ± 0.032 ns/op ScanBench.test3 4 512 avgt 15 46.235 ± 0.664 ns/op ScanBench.test3 8 512 avgt 15 55.054 ± 0.661 ns/op ScanBench.test3 16 512 avgt 15 81.329 ± 2.846 ns/op ScanBench.test3 32 512 avgt 15 112.454 ± 0.699 ns/op ScanBench.test3 64 512 avgt 15 182.208 ± 0.167 ns/op ScanBench.test3 128 512 avgt 15 303.185 ± 0.292 ns/op ScanBench.test3 256 512 avgt 15 565.695 ± 0.686 ns/op == i7 Haswell === C1 Benchmark (first) (size) Mode Cnt Score Error Units ScanBench.test1 -1 512 avgt 9 238.740 ± 3.355 ns/op ScanBench.test1 0 512 avgt 9 4.096 ± 0.010 ns/op ScanBench.test1 1 512 avgt 9 4.754 ± 0.004 ns/op ScanBench.test1 2 512 avgt 9 5.680 ± 0.053 ns/op ScanBench.test1 4 512 avgt 9 6.813 ± 0.264 ns/op ScanBench.test1 8 512 avgt 9 8.795 ± 0.080 ns/op ScanBench.test1 16 512 avgt 9 12.909 ± 0.380 ns/op ScanBench.test1 32 512 avgt 9 20.854 ± 0.222 ns/op ScanBench.test1 64 512 avgt 9 43.036 ± 0.070 ns/op ScanBench.test1 128 512 avgt 9 73.216 ± 0.877 ns/op ScanBench.test1 256 512 avgt 9 129.960 ± 0.720 ns/op ScanBench.test2 -1 512 avgt 9 271.169 ± 0.437 ns/op ScanBench.test2 0 512 avgt 9 4.170 ± 0.026 ns/op ScanBench.test2 1 512 avgt 9 4.830 ± 0.275 ns/op ScanBench.test2 2 512 avgt 9 5.649 ± 0.042 ns/op ScanBench.test2 4 512 avgt 9 6.771 ± 0.019 ns/op ScanBench.test2 8 512 avgt 9 8.948 ± 0.638 ns/op ScanBench.test2 16 512 avgt 9 12.774 ± 0.018 ns/op ScanBench.test2 32 512 avgt 9 20.839 ± 0.214 ns/op ScanBench.test2 64 512 avgt 9 43.737 ± 0.062 ns/op ScanBench.test2 128 512 avgt 9 76.163 ± 0.132 ns/op ScanBench.test2 256 512 avgt 9 140.645 ± 0.283 ns/op ScanBench.test3 -1 512 avgt 9 269.901 ± 0.518 ns/op ScanBench.test3 0 512 avgt 9 4.015 ± 0.004 ns/op ScanBench.test3 1 512 avgt 9 4.758 ± 0.008 ns/op ScanBench.test3 2 512 avgt 9 5.653 ± 0.065 ns/op ScanBench.test3 4 512 avgt 9 6.770 ± 0.060 ns/op ScanBench.test3 8 512 avgt 9 8.995 ± 0.203 ns/op ScanBench.test3 16 512 avgt 9 12.993 ± 0.004 ns/op ScanBench.test3 32 512 avgt 9 21.272 ± 0.249 ns/op ScanBench.test3 64 512 avgt 9 44.073 ± 0.355 ns/op ScanBench.test3 128 512 avgt 9 76.448 ± 2.283 ns/op ScanBench.test3 256 512 avgt 9 142.029 ± 5.541 ns/op === C2 Benchmark (first) (size) Mode Cnt Score Error Units ScanBench.test1 -1 512 avgt 9 98.064 ± 0.220 ns/op ScanBench.test1 0 512 avgt 9 3.445 ± 0.032 ns/op ScanBench.test1 1 512 avgt 9 3.878 ± 0.024 ns/op ScanBench.test1 2 512 avgt 9 4.697 ± 0.187 ns/op ScanBench.test1 4 512 avgt 9 5.366 ± 0.125 ns/op ScanBench.test1 8 512 avgt 9 5.502 ± 0.070 ns/op ScanBench.test1 16 512 avgt 9 6.931 ± 0.101 ns/op ScanBench.test1 32 512 avgt 9 9.863 ± 0.128 ns/op ScanBench.test1 64 512 avgt 9 19.368 ± 5.249 ns/op ScanBench.test1 128 512 avgt 9 30.787 ± 2.935 ns/op ScanBench.test1 256 512 avgt 9 51.864 ± 0.141 ns/op ScanBench.test2 -1 512 avgt 9 148.420 ± 0.466 ns/op ScanBench.test2 0 512 avgt 9 3.502 ± 0.008 ns/op ScanBench.test2 1 512 avgt 9 4.080 ± 0.101 ns/op ScanBench.test2 2 512 avgt 9 4.799 ± 0.703 ns/op ScanBench.test2 4 512 avgt 9 4.995 ± 0.224 ns/op ScanBench.test2 8 512 avgt 9 6.181 ± 0.059 ns/op ScanBench.test2 16 512 avgt 9 8.057 ± 0.046 ns/op ScanBench.test2 32 512 avgt 9 12.094 ± 0.159 ns/op ScanBench.test2 64 512 avgt 9 21.243 ± 1.351 ns/op ScanBench.test2 128 512 avgt 9 36.635 ± 0.052 ns/op ScanBench.test2 256 512 avgt 9 70.728 ± 0.540 ns/op ScanBench.test3 -1 512 avgt 9 124.693 ± 0.348 ns/op ScanBench.test3 0 512 avgt 9 3.517 ± 0.009 ns/op ScanBench.test3 1 512 avgt 9 4.018 ± 0.056 ns/op ScanBench.test3 2 512 avgt 9 4.782 ± 0.016 ns/op ScanBench.test3 4 512 avgt 9 5.279 ± 0.240 ns/op ScanBench.test3 8 512 avgt 9 6.356 ± 0.194 ns/op ScanBench.test3 16 512 avgt 9 9.587 ± 1.980 ns/op ScanBench.test3 32 512 avgt 9 13.937 ± 0.210 ns/op ScanBench.test3 64 512 avgt 9 25.030 ± 3.792 ns/op ScanBench.test3 128 512 avgt 9 40.081 ± 0.795 ns/op ScanBench.test3 256 512 avgt 9 76.145 ± 0.555 ns/op ----------- C2: 0x00007f0af19f1926: movzwl 0x12(%rcx,%r11,2),%r9d 0x00007f0af19f192c: cmp $0xff,%r9d 0x00007f0af19f1933: jg 0x00007f0af19f19d5 0x00007f1b70d64c39: movzwl 0x12(%rcx,%r10,2),%r11d 0x00007f1b70d64c3f: shr $0x8,%r11d 0x00007f1b70d64c43: test %r11d,%r11d 0x00007f1b70d64c46: jne 0x00007f1b70d64cd5 0x00007f9b999f1ccc: movzwl 0x12(%rdi,%r10,2),%r8d 0x00007f9b999f1cd2: test $0xff00,%r8d 0x00007f9b999f1cd9: jne 0x00007f9b999f1d55 ----------- C1: 0x00007fb1f5b4fc43: movzwl 0x10(%rsi,%rbx,2),%ebx 0x00007fb1f5b4fc48: cmp $0xff,%ebx 0x00007fb1f5b4fc4e: jg 0x00007fb1f5b4fc66 0x00007f5fd10f08c3: movzwl 0x10(%rsi,%rbx,2),%ebx 0x00007f5fd10f08c8: shr $0x8,%ebx 0x00007f5fd10f08cb: cmp $0x0,%ebx 0x00007f8449300a43: movzwl 0x10(%rsi,%rbx,2),%ebx 0x00007f8449300a48: and $0xff00,%ebx 0x00007f8449300a4e: cmp $0x0,%ebx