# Background Running applications on qemu full system emulation may be very slow. We can also verify OpenJDK SVE support on qemu user mode emulation, which is relatively faster and easy to use. # QEMU setup A QEMU version newer than 4.0 is preferred. It can be installed directly like: $ apt install qemu-user # On Debian/Ubuntu $ dnf install qemu-user-binfmt # On Fedora Or build qemu from source code, e.g.: $ wget https://download.qemu.org/qemu-4.2.0.tar.xz $ tar Jxf qemu-4.2.0.tar.xz && cd qemu-4.2.0 $ ./configure --target-list=aarch64-linux-user --prefix=$INSTALL $ make install -j16 # OpenJDK SVE user emulation support We need to disable SVE feature checking in JVM and make JVM works for different vector lengths. A simple patch [1] can be applied on top of [2]. The patch [1] is just to make sure that UseSVE option will not be disabled when no SVE feature detected in running host, and will also set MaxVectorSize based on current emulated SVE vector length. [1] http://cr.openjdk.java.net/~njian/8231441/user-emulation.patch [2] http://cr.openjdk.java.net/~njian/8231441/webrev.02/ # Running Java with SVE support on QEMU user mode emulation To run on a SVE vector reg size of 256-bit (2*128) qemu emulator. Simply run the commandline below in an AArch64 Linux environment: $ qemu-aarch64 -cpu max,sve-max-vq=2 java -XX:UseSVE=1 -XX:CompileCommand=print,Simple.addVector Simple $ # (-XX:UseSVE=1 is required, as JVM cannot detect SVE feature and set this option automaticatlly on user mode emulation.) The generated loop: ;; B30: # out( B30 B31 ) <- in( B29 B30 ) Loop( B30-B30 inner main of N206 strip mined) Freq: 4.37395e+06 0x0000004012868120: sbfiz x10, x20, #2, #32 0x0000004012868124: add x11, x16, x10 ;*iaload {reexecute=0 rethrow=0 return_oop=0} ; - Simple::addVector@16 (line 19) 0x0000004012868128: add x12, x15, x10 ;*iaload {reexecute=0 rethrow=0 return_oop=0} ; - Simple::addVector@13 (line 19) 0x000000401286812c: add x13, x11, #0x10 0x0000004012868130: add x14, x12, #0x10 0x0000004012868134: ld1w {z16.s}, p7/z, [x13] 0x0000004012868138: ld1w {z17.s}, p7/z, [x14] 0x000000401286813c: add z16.s, z16.s, z17.s 0x0000004012868140: add x10, x19, x10 0x0000004012868144: add x13, x10, #0x10 0x0000004012868148: st1w {z16.s}, p7, [x13] 0x000000401286814c: add x13, x12, #0x30 0x0000004012868150: add x14, x11, #0x30 0x0000004012868154: ld1w {z16.s}, p7/z, [x13] 0x0000004012868158: ld1w {z17.s}, p7/z, [x14] 0x000000401286815c: add z16.s, z17.s, z16.s 0x0000004012868160: add x13, x10, #0x30 0x0000004012868164: st1w {z16.s}, p7, [x13] 0x0000004012868168: add x13, x11, #0x50 0x000000401286816c: add x14, x12, #0x50 0x0000004012868170: ld1w {z16.s}, p7/z, [x13] 0x0000004012868174: ld1w {z17.s}, p7/z, [x14] 0x0000004012868178: add z16.s, z16.s, z17.s 0x000000401286817c: add x13, x10, #0x50 0x0000004012868180: st1w {z16.s}, p7, [x13] 0x0000004012868184: add x11, x11, #0x70 0x0000004012868188: add x12, x12, #0x70 0x000000401286818c: ld1w {z16.s}, p7/z, [x11] 0x0000004012868190: ld1w {z17.s}, p7/z, [x12] 0x0000004012868194: add z16.s, z16.s, z17.s 0x0000004012868198: add x10, x10, #0x70 0x000000401286819c: st1w {z16.s}, p7, [x10] ;*iastore {reexecute=0 rethrow=0 return_oop=0} ; - Simple::addVector@18 (line 19) 0x00000040128681a0: add w20, w20, #0x20 ;*iinc {reexecute=0 rethrow=0 return_oop=0} ; - Simple::addVector@19 (line 18) 0x00000040128681a4: cmp w20, w6 0x00000040128681a8: b.lt 0x0000004012868120 // b.tstop;*if_icmpge {reexecute=0 rethrow=0 return_oop=0} ; - Simple::addVector@6 (line 18)