1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 /**
  26  * @test
  27  * @bug 6340864
  28  * @summary Implement vectorization optimizations in hotspot-server
  29  *
  30  * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestByteVect
  31  */
  32 
  33 public class TestByteVect {
  34   private static final int ARRLEN = 997;
  35   private static final int ITERS  = 11000;
  36   private static final int ADD_INIT = 0;
  37   private static final int BIT_MASK = 0xB7;
  38   private static final int VALUE = 3;
  39   private static final int SHIFT = 8;
  40 
  41   public static void main(String args[]) {
  42     System.out.println("Testing Byte vectors");
  43     int errn = test();
  44     if (errn > 0) {
  45       System.err.println("FAILED: " + errn + " errors");
  46       System.exit(97);
  47     }
  48     System.out.println("PASSED");
  49   }
  50 
  51   static int test() {
  52     byte[] a0 = new byte[ARRLEN];
  53     byte[] a1 = new byte[ARRLEN];
  54     byte[] a2 = new byte[ARRLEN];
  55     byte[] a3 = new byte[ARRLEN];
  56     byte[] a4 = new byte[ARRLEN];
  57     short[] p2 = new short[ARRLEN/2];
  58       int[] p4 = new   int[ARRLEN/4];
  59      long[] p8 = new  long[ARRLEN/8];
  60     // Initialize
  61     int gold_sum = 0;
  62     for (int i=0; i<ARRLEN; i++) {
  63       byte val = (byte)(ADD_INIT+i);
  64       gold_sum += val;
  65       a1[i] = val;
  66       a2[i] = (byte)VALUE;
  67       a3[i] = (byte)-VALUE;
  68       a4[i] = (byte)BIT_MASK;
  69     }
  70     System.out.println("Warmup");
  71     for (int i=0; i<ITERS; i++) {
  72       test_sum(a1);
  73       test_addc(a0, a1);
  74       test_addv(a0, a1, (byte)VALUE);
  75       test_adda(a0, a1, a2);
  76       test_subc(a0, a1);
  77       test_subv(a0, a1, (byte)VALUE);
  78       test_suba(a0, a1, a2);
  79       test_mulc(a0, a1);
  80       test_mulv(a0, a1, (byte)VALUE);
  81       test_mula(a0, a1, a2);
  82       test_divc(a0, a1);
  83       test_divv(a0, a1, (byte)VALUE);
  84       test_diva(a0, a1, a2);
  85       test_mulc_n(a0, a1);
  86       test_mulv(a0, a1, (byte)-VALUE);
  87       test_mula(a0, a1, a3);
  88       test_divc_n(a0, a1);
  89       test_divv(a0, a1, (byte)-VALUE);
  90       test_diva(a0, a1, a3);
  91       test_andc(a0, a1);
  92       test_andv(a0, a1, (byte)BIT_MASK);
  93       test_anda(a0, a1, a4);
  94       test_orc(a0, a1);
  95       test_orv(a0, a1, (byte)BIT_MASK);
  96       test_ora(a0, a1, a4);
  97       test_xorc(a0, a1);
  98       test_xorv(a0, a1, (byte)BIT_MASK);
  99       test_xora(a0, a1, a4);
 100       test_sllc(a0, a1);
 101       test_sllv(a0, a1, VALUE);
 102       test_srlc(a0, a1);
 103       test_srlv(a0, a1, VALUE);
 104       test_srac(a0, a1);
 105       test_srav(a0, a1, VALUE);
 106       test_sllc_n(a0, a1);
 107       test_sllv(a0, a1, -VALUE);
 108       test_srlc_n(a0, a1);
 109       test_srlv(a0, a1, -VALUE);
 110       test_srac_n(a0, a1);
 111       test_srav(a0, a1, -VALUE);
 112       test_sllc_o(a0, a1);
 113       test_sllv(a0, a1, SHIFT);
 114       test_srlc_o(a0, a1);
 115       test_srlv(a0, a1, SHIFT);
 116       test_srac_o(a0, a1);
 117       test_srav(a0, a1, SHIFT);
 118       test_sllc_on(a0, a1);
 119       test_sllv(a0, a1, -SHIFT);
 120       test_srlc_on(a0, a1);
 121       test_srlv(a0, a1, -SHIFT);
 122       test_srac_on(a0, a1);
 123       test_srav(a0, a1, -SHIFT);
 124       test_pack2(p2, a1);
 125       test_unpack2(a0, p2);
 126       test_pack2_swap(p2, a1);
 127       test_unpack2_swap(a0, p2);
 128       test_pack4(p4, a1);
 129       test_unpack4(a0, p4);
 130       test_pack4_swap(p4, a1);
 131       test_unpack4_swap(a0, p4);
 132       test_pack8(p8, a1);
 133       test_unpack8(a0, p8);
 134       test_pack8_swap(p8, a1);
 135       test_unpack8_swap(a0, p8);
 136     }
 137     // Test and verify results
 138     System.out.println("Verification");
 139     int errn = 0;
 140     {
 141       int sum = test_sum(a1);
 142       if (sum != gold_sum) {
 143         System.err.println("test_sum:  " + sum + " != " + gold_sum);
 144         errn++;
 145       }
 146 
 147       test_addc(a0, a1);
 148       for (int i=0; i<ARRLEN; i++) {
 149         errn += verify("test_addc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 150       }
 151       test_addv(a0, a1, (byte)VALUE);
 152       for (int i=0; i<ARRLEN; i++) {
 153         errn += verify("test_addv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 154       }
 155       test_adda(a0, a1, a2);
 156       for (int i=0; i<ARRLEN; i++) {
 157         errn += verify("test_adda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 158       }
 159 
 160       test_subc(a0, a1);
 161       for (int i=0; i<ARRLEN; i++) {
 162         errn += verify("test_subc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 163       }
 164       test_subv(a0, a1, (byte)VALUE);
 165       for (int i=0; i<ARRLEN; i++) {
 166         errn += verify("test_subv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 167       }
 168       test_suba(a0, a1, a2);
 169       for (int i=0; i<ARRLEN; i++) {
 170         errn += verify("test_suba: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 171       }
 172 
 173       test_mulc(a0, a1);
 174       for (int i=0; i<ARRLEN; i++) {
 175         errn += verify("test_mulc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 176       }
 177       test_mulv(a0, a1, (byte)VALUE);
 178       for (int i=0; i<ARRLEN; i++) {
 179         errn += verify("test_mulv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 180       }
 181       test_mula(a0, a1, a2);
 182       for (int i=0; i<ARRLEN; i++) {
 183         errn += verify("test_mula: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 184       }
 185 
 186       test_divc(a0, a1);
 187       for (int i=0; i<ARRLEN; i++) {
 188         errn += verify("test_divc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 189       }
 190       test_divv(a0, a1, (byte)VALUE);
 191       for (int i=0; i<ARRLEN; i++) {
 192         errn += verify("test_divv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 193       }
 194       test_diva(a0, a1, a2);
 195       for (int i=0; i<ARRLEN; i++) {
 196         errn += verify("test_diva: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 197       }
 198 
 199       test_mulc_n(a0, a1);
 200       for (int i=0; i<ARRLEN; i++) {
 201         errn += verify("test_mulc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 202       }
 203       test_mulv(a0, a1, (byte)-VALUE);
 204       for (int i=0; i<ARRLEN; i++) {
 205         errn += verify("test_mulv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 206       }
 207       test_mula(a0, a1, a3);
 208       for (int i=0; i<ARRLEN; i++) {
 209         errn += verify("test_mula_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 210       }
 211 
 212       test_divc_n(a0, a1);
 213       for (int i=0; i<ARRLEN; i++) {
 214         errn += verify("test_divc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 215       }
 216       test_divv(a0, a1, (byte)-VALUE);
 217       for (int i=0; i<ARRLEN; i++) {
 218         errn += verify("test_divv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 219       }
 220       test_diva(a0, a1, a3);
 221       for (int i=0; i<ARRLEN; i++) {
 222         errn += verify("test_diva_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 223       }
 224 
 225       test_andc(a0, a1);
 226       for (int i=0; i<ARRLEN; i++) {
 227         errn += verify("test_andc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 228       }
 229       test_andv(a0, a1, (byte)BIT_MASK);
 230       for (int i=0; i<ARRLEN; i++) {
 231         errn += verify("test_andv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 232       }
 233       test_anda(a0, a1, a4);
 234       for (int i=0; i<ARRLEN; i++) {
 235         errn += verify("test_anda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 236       }
 237 
 238       test_orc(a0, a1);
 239       for (int i=0; i<ARRLEN; i++) {
 240         errn += verify("test_orc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 241       }
 242       test_orv(a0, a1, (byte)BIT_MASK);
 243       for (int i=0; i<ARRLEN; i++) {
 244         errn += verify("test_orv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 245       }
 246       test_ora(a0, a1, a4);
 247       for (int i=0; i<ARRLEN; i++) {
 248         errn += verify("test_ora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 249       }
 250 
 251       test_xorc(a0, a1);
 252       for (int i=0; i<ARRLEN; i++) {
 253         errn += verify("test_xorc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 254       }
 255       test_xorv(a0, a1, (byte)BIT_MASK);
 256       for (int i=0; i<ARRLEN; i++) {
 257         errn += verify("test_xorv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 258       }
 259       test_xora(a0, a1, a4);
 260       for (int i=0; i<ARRLEN; i++) {
 261         errn += verify("test_xora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 262       }
 263 
 264       test_sllc(a0, a1);
 265       for (int i=0; i<ARRLEN; i++) {
 266         errn += verify("test_sllc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
 267       }
 268       test_sllv(a0, a1, VALUE);
 269       for (int i=0; i<ARRLEN; i++) {
 270         errn += verify("test_sllv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
 271       }
 272 
 273       test_srlc(a0, a1);
 274       for (int i=0; i<ARRLEN; i++) {
 275         errn += verify("test_srlc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
 276       }
 277       test_srlv(a0, a1, VALUE);
 278       for (int i=0; i<ARRLEN; i++) {
 279         errn += verify("test_srlv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
 280       }
 281 
 282       test_srac(a0, a1);
 283       for (int i=0; i<ARRLEN; i++) {
 284         errn += verify("test_srac: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
 285       }
 286       test_srav(a0, a1, VALUE);
 287       for (int i=0; i<ARRLEN; i++) {
 288         errn += verify("test_srav: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
 289       }
 290 
 291       test_sllc_n(a0, a1);
 292       for (int i=0; i<ARRLEN; i++) {
 293         errn += verify("test_sllc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
 294       }
 295       test_sllv(a0, a1, -VALUE);
 296       for (int i=0; i<ARRLEN; i++) {
 297         errn += verify("test_sllv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
 298       }
 299 
 300       test_srlc_n(a0, a1);
 301       for (int i=0; i<ARRLEN; i++) {
 302         errn += verify("test_srlc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
 303       }
 304       test_srlv(a0, a1, -VALUE);
 305       for (int i=0; i<ARRLEN; i++) {
 306         errn += verify("test_srlv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
 307       }
 308 
 309       test_srac_n(a0, a1);
 310       for (int i=0; i<ARRLEN; i++) {
 311         errn += verify("test_srac_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
 312       }
 313       test_srav(a0, a1, -VALUE);
 314       for (int i=0; i<ARRLEN; i++) {
 315         errn += verify("test_srav_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
 316       }
 317 
 318       test_sllc_o(a0, a1);
 319       for (int i=0; i<ARRLEN; i++) {
 320         errn += verify("test_sllc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
 321       }
 322       test_sllv(a0, a1, SHIFT);
 323       for (int i=0; i<ARRLEN; i++) {
 324         errn += verify("test_sllv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
 325       }
 326 
 327       test_srlc_o(a0, a1);
 328       for (int i=0; i<ARRLEN; i++) {
 329         errn += verify("test_srlc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
 330       }
 331       test_srlv(a0, a1, SHIFT);
 332       for (int i=0; i<ARRLEN; i++) {
 333         errn += verify("test_srlv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
 334       }
 335 
 336       test_srac_o(a0, a1);
 337       for (int i=0; i<ARRLEN; i++) {
 338         errn += verify("test_srac_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
 339       }
 340       test_srav(a0, a1, SHIFT);
 341       for (int i=0; i<ARRLEN; i++) {
 342         errn += verify("test_srav_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
 343       }
 344 
 345       test_sllc_on(a0, a1);
 346       for (int i=0; i<ARRLEN; i++) {
 347         errn += verify("test_sllc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
 348       }
 349       test_sllv(a0, a1, -SHIFT);
 350       for (int i=0; i<ARRLEN; i++) {
 351         errn += verify("test_sllv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
 352       }
 353 
 354       test_srlc_on(a0, a1);
 355       for (int i=0; i<ARRLEN; i++) {
 356         errn += verify("test_srlc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
 357       }
 358       test_srlv(a0, a1, -SHIFT);
 359       for (int i=0; i<ARRLEN; i++) {
 360         errn += verify("test_srlv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
 361       }
 362 
 363       test_srac_on(a0, a1);
 364       for (int i=0; i<ARRLEN; i++) {
 365         errn += verify("test_srac_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
 366       }
 367       test_srav(a0, a1, -SHIFT);
 368       for (int i=0; i<ARRLEN; i++) {
 369         errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
 370       }
 371 
 372       test_pack2(p2, a1);
 373       for (int i=0; i<ARRLEN/2; i++) {
 374         errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
 375       }
 376       for (int i=0; i<ARRLEN; i++) {
 377         a0[i] = -1;
 378       }
 379       test_unpack2(a0, p2);
 380       for (int i=0; i<(ARRLEN&(-2)); i++) {
 381         errn += verify("test_unpack2: ", i, a0[i], (byte)(ADD_INIT+i));
 382       }
 383 
 384       test_pack2_swap(p2, a1);
 385       for (int i=0; i<ARRLEN/2; i++) {
 386         errn += verify("test_pack2_swap: ", i, p2[i], (short)(((short)(ADD_INIT+2*i+1) & 0xFF) | ((short)(ADD_INIT+2*i) << 8)));
 387       }
 388       for (int i=0; i<ARRLEN; i++) {
 389         a0[i] = -1;
 390       }
 391       test_unpack2_swap(a0, p2);
 392       for (int i=0; i<(ARRLEN&(-2)); i++) {
 393         errn += verify("test_unpack2_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 394       }
 395 
 396       test_pack4(p4, a1);
 397       for (int i=0; i<ARRLEN/4; i++) {
 398         errn += verify("test_pack4: ", i, p4[i],  ((int)(ADD_INIT+4*i+0) & 0xFF) |
 399                                                  (((int)(ADD_INIT+4*i+1) & 0xFF) <<  8)  |
 400                                                  (((int)(ADD_INIT+4*i+2) & 0xFF) << 16)  |
 401                                                  (((int)(ADD_INIT+4*i+3) & 0xFF) << 24));
 402       }
 403       for (int i=0; i<ARRLEN; i++) {
 404         a0[i] = -1;
 405       }
 406       test_unpack4(a0, p4);
 407       for (int i=0; i<(ARRLEN&(-4)); i++) {
 408         errn += verify("test_unpack4: ", i, a0[i], (byte)(ADD_INIT+i));
 409       }
 410 
 411       test_pack4_swap(p4, a1);
 412       for (int i=0; i<ARRLEN/4; i++) {
 413         errn += verify("test_pack4_swap: ", i, p4[i],  ((int)(ADD_INIT+4*i+3) & 0xFF) |
 414                                                       (((int)(ADD_INIT+4*i+2) & 0xFF) <<  8)  |
 415                                                       (((int)(ADD_INIT+4*i+1) & 0xFF) << 16)  |
 416                                                       (((int)(ADD_INIT+4*i+0) & 0xFF) << 24));
 417       }
 418       for (int i=0; i<ARRLEN; i++) {
 419         a0[i] = -1;
 420       }
 421       test_unpack4_swap(a0, p4);
 422       for (int i=0; i<(ARRLEN&(-4)); i++) {
 423         errn += verify("test_unpack4_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 424       }
 425 
 426       test_pack8(p8, a1);
 427       for (int i=0; i<ARRLEN/8; i++) {
 428         errn += verify("test_pack8: ", i, p8[i],  ((long)(ADD_INIT+8*i+0) & 0xFFl) |
 429                                                  (((long)(ADD_INIT+8*i+1) & 0xFFl) <<  8)  |
 430                                                  (((long)(ADD_INIT+8*i+2) & 0xFFl) << 16)  |
 431                                                  (((long)(ADD_INIT+8*i+3) & 0xFFl) << 24)  |
 432                                                  (((long)(ADD_INIT+8*i+4) & 0xFFl) << 32)  |
 433                                                  (((long)(ADD_INIT+8*i+5) & 0xFFl) << 40)  |
 434                                                  (((long)(ADD_INIT+8*i+6) & 0xFFl) << 48)  |
 435                                                  (((long)(ADD_INIT+8*i+7) & 0xFFl) << 56));
 436       }
 437       for (int i=0; i<ARRLEN; i++) {
 438         a0[i] = -1;
 439       }
 440       test_unpack8(a0, p8);
 441       for (int i=0; i<(ARRLEN&(-8)); i++) {
 442         errn += verify("test_unpack8: ", i, a0[i], (byte)(ADD_INIT+i));
 443       }
 444 
 445       test_pack8_swap(p8, a1);
 446       for (int i=0; i<ARRLEN/8; i++) {
 447         errn += verify("test_pack8_swap: ", i, p8[i],  ((long)(ADD_INIT+8*i+7) & 0xFFl) |
 448                                                       (((long)(ADD_INIT+8*i+6) & 0xFFl) <<  8)  |
 449                                                       (((long)(ADD_INIT+8*i+5) & 0xFFl) << 16)  |
 450                                                       (((long)(ADD_INIT+8*i+4) & 0xFFl) << 24)  |
 451                                                       (((long)(ADD_INIT+8*i+3) & 0xFFl) << 32)  |
 452                                                       (((long)(ADD_INIT+8*i+2) & 0xFFl) << 40)  |
 453                                                       (((long)(ADD_INIT+8*i+1) & 0xFFl) << 48)  |
 454                                                       (((long)(ADD_INIT+8*i+0) & 0xFFl) << 56));
 455       }
 456       for (int i=0; i<ARRLEN; i++) {
 457         a0[i] = -1;
 458       }
 459       test_unpack8_swap(a0, p8);
 460       for (int i=0; i<(ARRLEN&(-8)); i++) {
 461         errn += verify("test_unpack8_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 462       }
 463 
 464     }
 465 
 466     if (errn > 0)
 467       return errn;
 468 
 469     System.out.println("Time");
 470     long start, end;
 471 
 472     start = System.currentTimeMillis();
 473     for (int i=0; i<ITERS; i++) {
 474       test_sum(a1);
 475     }
 476     end = System.currentTimeMillis();
 477     System.out.println("test_sum: " + (end - start));
 478 
 479     start = System.currentTimeMillis();
 480     for (int i=0; i<ITERS; i++) {
 481       test_addc(a0, a1);
 482     }
 483     end = System.currentTimeMillis();
 484     System.out.println("test_addc: " + (end - start));
 485     start = System.currentTimeMillis();
 486     for (int i=0; i<ITERS; i++) {
 487       test_addv(a0, a1, (byte)VALUE);
 488     }
 489     end = System.currentTimeMillis();
 490     System.out.println("test_addv: " + (end - start));
 491     start = System.currentTimeMillis();
 492     for (int i=0; i<ITERS; i++) {
 493       test_adda(a0, a1, a2);
 494     }
 495     end = System.currentTimeMillis();
 496     System.out.println("test_adda: " + (end - start));
 497 
 498     start = System.currentTimeMillis();
 499     for (int i=0; i<ITERS; i++) {
 500       test_subc(a0, a1);
 501     }
 502     end = System.currentTimeMillis();
 503     System.out.println("test_subc: " + (end - start));
 504     start = System.currentTimeMillis();
 505     for (int i=0; i<ITERS; i++) {
 506       test_subv(a0, a1, (byte)VALUE);
 507     }
 508     end = System.currentTimeMillis();
 509     System.out.println("test_subv: " + (end - start));
 510     start = System.currentTimeMillis();
 511     for (int i=0; i<ITERS; i++) {
 512       test_suba(a0, a1, a2);
 513     }
 514     end = System.currentTimeMillis();
 515     System.out.println("test_suba: " + (end - start));
 516 
 517     start = System.currentTimeMillis();
 518     for (int i=0; i<ITERS; i++) {
 519       test_mulc(a0, a1);
 520     }
 521     end = System.currentTimeMillis();
 522     System.out.println("test_mulc: " + (end - start));
 523     start = System.currentTimeMillis();
 524     for (int i=0; i<ITERS; i++) {
 525       test_mulv(a0, a1, (byte)VALUE);
 526     }
 527     end = System.currentTimeMillis();
 528     System.out.println("test_mulv: " + (end - start));
 529     start = System.currentTimeMillis();
 530     for (int i=0; i<ITERS; i++) {
 531       test_mula(a0, a1, a2);
 532     }
 533     end = System.currentTimeMillis();
 534     System.out.println("test_mula: " + (end - start));
 535 
 536     start = System.currentTimeMillis();
 537     for (int i=0; i<ITERS; i++) {
 538       test_divc(a0, a1);
 539     }
 540     end = System.currentTimeMillis();
 541     System.out.println("test_divc: " + (end - start));
 542     start = System.currentTimeMillis();
 543     for (int i=0; i<ITERS; i++) {
 544       test_divv(a0, a1, (byte)VALUE);
 545     }
 546     end = System.currentTimeMillis();
 547     System.out.println("test_divv: " + (end - start));
 548     start = System.currentTimeMillis();
 549     for (int i=0; i<ITERS; i++) {
 550       test_diva(a0, a1, a2);
 551     }
 552     end = System.currentTimeMillis();
 553     System.out.println("test_diva: " + (end - start));
 554 
 555     start = System.currentTimeMillis();
 556     for (int i=0; i<ITERS; i++) {
 557       test_mulc_n(a0, a1);
 558     }
 559     end = System.currentTimeMillis();
 560     System.out.println("test_mulc_n: " + (end - start));
 561     start = System.currentTimeMillis();
 562     for (int i=0; i<ITERS; i++) {
 563       test_mulv(a0, a1, (byte)-VALUE);
 564     }
 565     end = System.currentTimeMillis();
 566     System.out.println("test_mulv_n: " + (end - start));
 567     start = System.currentTimeMillis();
 568     for (int i=0; i<ITERS; i++) {
 569       test_mula(a0, a1, a3);
 570     }
 571     end = System.currentTimeMillis();
 572     System.out.println("test_mula_n: " + (end - start));
 573 
 574     start = System.currentTimeMillis();
 575     for (int i=0; i<ITERS; i++) {
 576       test_divc_n(a0, a1);
 577     }
 578     end = System.currentTimeMillis();
 579     System.out.println("test_divc_n: " + (end - start));
 580     start = System.currentTimeMillis();
 581     for (int i=0; i<ITERS; i++) {
 582       test_divv(a0, a1, (byte)-VALUE);
 583     }
 584     end = System.currentTimeMillis();
 585     System.out.println("test_divv_n: " + (end - start));
 586     start = System.currentTimeMillis();
 587     for (int i=0; i<ITERS; i++) {
 588       test_diva(a0, a1, a3);
 589     }
 590     end = System.currentTimeMillis();
 591     System.out.println("test_diva_n: " + (end - start));
 592 
 593     start = System.currentTimeMillis();
 594     for (int i=0; i<ITERS; i++) {
 595       test_andc(a0, a1);
 596     }
 597     end = System.currentTimeMillis();
 598     System.out.println("test_andc: " + (end - start));
 599     start = System.currentTimeMillis();
 600     for (int i=0; i<ITERS; i++) {
 601       test_andv(a0, a1, (byte)BIT_MASK);
 602     }
 603     end = System.currentTimeMillis();
 604     System.out.println("test_andv: " + (end - start));
 605     start = System.currentTimeMillis();
 606     for (int i=0; i<ITERS; i++) {
 607       test_anda(a0, a1, a4);
 608     }
 609     end = System.currentTimeMillis();
 610     System.out.println("test_anda: " + (end - start));
 611 
 612     start = System.currentTimeMillis();
 613     for (int i=0; i<ITERS; i++) {
 614       test_orc(a0, a1);
 615     }
 616     end = System.currentTimeMillis();
 617     System.out.println("test_orc: " + (end - start));
 618     start = System.currentTimeMillis();
 619     for (int i=0; i<ITERS; i++) {
 620       test_orv(a0, a1, (byte)BIT_MASK);
 621     }
 622     end = System.currentTimeMillis();
 623     System.out.println("test_orv: " + (end - start));
 624     start = System.currentTimeMillis();
 625     for (int i=0; i<ITERS; i++) {
 626       test_ora(a0, a1, a4);
 627     }
 628     end = System.currentTimeMillis();
 629     System.out.println("test_ora: " + (end - start));
 630 
 631     start = System.currentTimeMillis();
 632     for (int i=0; i<ITERS; i++) {
 633       test_xorc(a0, a1);
 634     }
 635     end = System.currentTimeMillis();
 636     System.out.println("test_xorc: " + (end - start));
 637     start = System.currentTimeMillis();
 638     for (int i=0; i<ITERS; i++) {
 639       test_xorv(a0, a1, (byte)BIT_MASK);
 640     }
 641     end = System.currentTimeMillis();
 642     System.out.println("test_xorv: " + (end - start));
 643     start = System.currentTimeMillis();
 644     for (int i=0; i<ITERS; i++) {
 645       test_xora(a0, a1, a4);
 646     }
 647     end = System.currentTimeMillis();
 648     System.out.println("test_xora: " + (end - start));
 649 
 650     start = System.currentTimeMillis();
 651     for (int i=0; i<ITERS; i++) {
 652       test_sllc(a0, a1);
 653     }
 654     end = System.currentTimeMillis();
 655     System.out.println("test_sllc: " + (end - start));
 656     start = System.currentTimeMillis();
 657     for (int i=0; i<ITERS; i++) {
 658       test_sllv(a0, a1, VALUE);
 659     }
 660     end = System.currentTimeMillis();
 661     System.out.println("test_sllv: " + (end - start));
 662 
 663     start = System.currentTimeMillis();
 664     for (int i=0; i<ITERS; i++) {
 665       test_srlc(a0, a1);
 666     }
 667     end = System.currentTimeMillis();
 668     System.out.println("test_srlc: " + (end - start));
 669     start = System.currentTimeMillis();
 670     for (int i=0; i<ITERS; i++) {
 671       test_srlv(a0, a1, VALUE);
 672     }
 673     end = System.currentTimeMillis();
 674     System.out.println("test_srlv: " + (end - start));
 675 
 676     start = System.currentTimeMillis();
 677     for (int i=0; i<ITERS; i++) {
 678       test_srac(a0, a1);
 679     }
 680     end = System.currentTimeMillis();
 681     System.out.println("test_srac: " + (end - start));
 682     start = System.currentTimeMillis();
 683     for (int i=0; i<ITERS; i++) {
 684       test_srav(a0, a1, VALUE);
 685     }
 686     end = System.currentTimeMillis();
 687     System.out.println("test_srav: " + (end - start));
 688 
 689     start = System.currentTimeMillis();
 690     for (int i=0; i<ITERS; i++) {
 691       test_sllc_n(a0, a1);
 692     }
 693     end = System.currentTimeMillis();
 694     System.out.println("test_sllc_n: " + (end - start));
 695     start = System.currentTimeMillis();
 696     for (int i=0; i<ITERS; i++) {
 697       test_sllv(a0, a1, -VALUE);
 698     }
 699     end = System.currentTimeMillis();
 700     System.out.println("test_sllv_n: " + (end - start));
 701 
 702     start = System.currentTimeMillis();
 703     for (int i=0; i<ITERS; i++) {
 704       test_srlc_n(a0, a1);
 705     }
 706     end = System.currentTimeMillis();
 707     System.out.println("test_srlc_n: " + (end - start));
 708     start = System.currentTimeMillis();
 709     for (int i=0; i<ITERS; i++) {
 710       test_srlv(a0, a1, -VALUE);
 711     }
 712     end = System.currentTimeMillis();
 713     System.out.println("test_srlv_n: " + (end - start));
 714 
 715     start = System.currentTimeMillis();
 716     for (int i=0; i<ITERS; i++) {
 717       test_srac_n(a0, a1);
 718     }
 719     end = System.currentTimeMillis();
 720     System.out.println("test_srac_n: " + (end - start));
 721     start = System.currentTimeMillis();
 722     for (int i=0; i<ITERS; i++) {
 723       test_srav(a0, a1, -VALUE);
 724     }
 725     end = System.currentTimeMillis();
 726     System.out.println("test_srav_n: " + (end - start));
 727 
 728     start = System.currentTimeMillis();
 729     for (int i=0; i<ITERS; i++) {
 730       test_sllc_o(a0, a1);
 731     }
 732     end = System.currentTimeMillis();
 733     System.out.println("test_sllc_o: " + (end - start));
 734     start = System.currentTimeMillis();
 735     for (int i=0; i<ITERS; i++) {
 736       test_sllv(a0, a1, SHIFT);
 737     }
 738     end = System.currentTimeMillis();
 739     System.out.println("test_sllv_o: " + (end - start));
 740 
 741     start = System.currentTimeMillis();
 742     for (int i=0; i<ITERS; i++) {
 743       test_srlc_o(a0, a1);
 744     }
 745     end = System.currentTimeMillis();
 746     System.out.println("test_srlc_o: " + (end - start));
 747     start = System.currentTimeMillis();
 748     for (int i=0; i<ITERS; i++) {
 749       test_srlv(a0, a1, SHIFT);
 750     }
 751     end = System.currentTimeMillis();
 752     System.out.println("test_srlv_o: " + (end - start));
 753 
 754     start = System.currentTimeMillis();
 755     for (int i=0; i<ITERS; i++) {
 756       test_srac_o(a0, a1);
 757     }
 758     end = System.currentTimeMillis();
 759     System.out.println("test_srac_o: " + (end - start));
 760     start = System.currentTimeMillis();
 761     for (int i=0; i<ITERS; i++) {
 762       test_srav(a0, a1, SHIFT);
 763     }
 764     end = System.currentTimeMillis();
 765     System.out.println("test_srav_o: " + (end - start));
 766 
 767     start = System.currentTimeMillis();
 768     for (int i=0; i<ITERS; i++) {
 769       test_sllc_on(a0, a1);
 770     }
 771     end = System.currentTimeMillis();
 772     System.out.println("test_sllc_on: " + (end - start));
 773     start = System.currentTimeMillis();
 774     for (int i=0; i<ITERS; i++) {
 775       test_sllv(a0, a1, -SHIFT);
 776     }
 777     end = System.currentTimeMillis();
 778     System.out.println("test_sllv_on: " + (end - start));
 779 
 780     start = System.currentTimeMillis();
 781     for (int i=0; i<ITERS; i++) {
 782       test_srlc_on(a0, a1);
 783     }
 784     end = System.currentTimeMillis();
 785     System.out.println("test_srlc_on: " + (end - start));
 786     start = System.currentTimeMillis();
 787     for (int i=0; i<ITERS; i++) {
 788       test_srlv(a0, a1, -SHIFT);
 789     }
 790     end = System.currentTimeMillis();
 791     System.out.println("test_srlv_on: " + (end - start));
 792 
 793     start = System.currentTimeMillis();
 794     for (int i=0; i<ITERS; i++) {
 795       test_srac_on(a0, a1);
 796     }
 797     end = System.currentTimeMillis();
 798     System.out.println("test_srac_on: " + (end - start));
 799     start = System.currentTimeMillis();
 800     for (int i=0; i<ITERS; i++) {
 801       test_srav(a0, a1, -SHIFT);
 802     }
 803     end = System.currentTimeMillis();
 804     System.out.println("test_srav_on: " + (end - start));
 805 
 806     start = System.currentTimeMillis();
 807     for (int i=0; i<ITERS; i++) {
 808       test_pack2(p2, a1);
 809     }
 810     end = System.currentTimeMillis();
 811     System.out.println("test_pack2: " + (end - start));
 812     start = System.currentTimeMillis();
 813     for (int i=0; i<ITERS; i++) {
 814       test_unpack2(a0, p2);
 815     }
 816     end = System.currentTimeMillis();
 817     System.out.println("test_unpack2: " + (end - start));
 818     start = System.currentTimeMillis();
 819     for (int i=0; i<ITERS; i++) {
 820       test_pack2_swap(p2, a1);
 821     }
 822     end = System.currentTimeMillis();
 823     System.out.println("test_pack2_swap: " + (end - start));
 824     start = System.currentTimeMillis();
 825     for (int i=0; i<ITERS; i++) {
 826       test_unpack2_swap(a0, p2);
 827     }
 828     end = System.currentTimeMillis();
 829     System.out.println("test_unpack2_swap: " + (end - start));
 830 
 831     start = System.currentTimeMillis();
 832     for (int i=0; i<ITERS; i++) {
 833       test_pack4(p4, a1);
 834     }
 835     end = System.currentTimeMillis();
 836     System.out.println("test_pack4: " + (end - start));
 837     start = System.currentTimeMillis();
 838     for (int i=0; i<ITERS; i++) {
 839       test_unpack4(a0, p4);
 840     }
 841     end = System.currentTimeMillis();
 842     System.out.println("test_unpack4: " + (end - start));
 843     start = System.currentTimeMillis();
 844     for (int i=0; i<ITERS; i++) {
 845       test_pack4_swap(p4, a1);
 846     }
 847     end = System.currentTimeMillis();
 848     System.out.println("test_pack4_swap: " + (end - start));
 849     start = System.currentTimeMillis();
 850     for (int i=0; i<ITERS; i++) {
 851       test_unpack4_swap(a0, p4);
 852     }
 853     end = System.currentTimeMillis();
 854     System.out.println("test_unpack4_swap: " + (end - start));
 855 
 856     start = System.currentTimeMillis();
 857     for (int i=0; i<ITERS; i++) {
 858       test_pack8(p8, a1);
 859     }
 860     end = System.currentTimeMillis();
 861     System.out.println("test_pack8: " + (end - start));
 862     start = System.currentTimeMillis();
 863     for (int i=0; i<ITERS; i++) {
 864       test_unpack8(a0, p8);
 865     }
 866     end = System.currentTimeMillis();
 867     System.out.println("test_unpack8: " + (end - start));
 868     start = System.currentTimeMillis();
 869     for (int i=0; i<ITERS; i++) {
 870       test_pack8_swap(p8, a1);
 871     }
 872     end = System.currentTimeMillis();
 873     System.out.println("test_pack8_swap: " + (end - start));
 874     start = System.currentTimeMillis();
 875     for (int i=0; i<ITERS; i++) {
 876       test_unpack8_swap(a0, p8);
 877     }
 878     end = System.currentTimeMillis();
 879     System.out.println("test_unpack8_swap: " + (end - start));
 880 
 881     return errn;
 882   }
 883 
 884   static int test_sum(byte[] a1) {
 885     int sum = 0;
 886     for (int i = 0; i < a1.length; i+=1) {
 887       sum += a1[i];
 888     }
 889     return sum;
 890   }
 891 
 892   static void test_addc(byte[] a0, byte[] a1) {
 893     for (int i = 0; i < a0.length; i+=1) {
 894       a0[i] = (byte)(a1[i]+VALUE);
 895     }
 896   }
 897   static void test_addv(byte[] a0, byte[] a1, byte b) {
 898     for (int i = 0; i < a0.length; i+=1) {
 899       a0[i] = (byte)(a1[i]+b);
 900     }
 901   }
 902   static void test_adda(byte[] a0, byte[] a1, byte[] a2) {
 903     for (int i = 0; i < a0.length; i+=1) {
 904       a0[i] = (byte)(a1[i]+a2[i]);
 905     }
 906   }
 907 
 908   static void test_subc(byte[] a0, byte[] a1) {
 909     for (int i = 0; i < a0.length; i+=1) {
 910       a0[i] = (byte)(a1[i]-VALUE);
 911     }
 912   }
 913   static void test_subv(byte[] a0, byte[] a1, byte b) {
 914     for (int i = 0; i < a0.length; i+=1) {
 915       a0[i] = (byte)(a1[i]-b);
 916     }
 917   }
 918   static void test_suba(byte[] a0, byte[] a1, byte[] a2) {
 919     for (int i = 0; i < a0.length; i+=1) {
 920       a0[i] = (byte)(a1[i]-a2[i]);
 921     }
 922   }
 923 
 924   static void test_mulc(byte[] a0, byte[] a1) {
 925     for (int i = 0; i < a0.length; i+=1) {
 926       a0[i] = (byte)(a1[i]*VALUE);
 927     }
 928   }
 929   static void test_mulc_n(byte[] a0, byte[] a1) {
 930     for (int i = 0; i < a0.length; i+=1) {
 931       a0[i] = (byte)(a1[i]*(-VALUE));
 932     }
 933   }
 934   static void test_mulv(byte[] a0, byte[] a1, byte b) {
 935     for (int i = 0; i < a0.length; i+=1) {
 936       a0[i] = (byte)(a1[i]*b);
 937     }
 938   }
 939   static void test_mula(byte[] a0, byte[] a1, byte[] a2) {
 940     for (int i = 0; i < a0.length; i+=1) {
 941       a0[i] = (byte)(a1[i]*a2[i]);
 942     }
 943   }
 944 
 945   static void test_divc(byte[] a0, byte[] a1) {
 946     for (int i = 0; i < a0.length; i+=1) {
 947       a0[i] = (byte)(a1[i]/VALUE);
 948     }
 949   }
 950   static void test_divc_n(byte[] a0, byte[] a1) {
 951     for (int i = 0; i < a0.length; i+=1) {
 952       a0[i] = (byte)(a1[i]/(-VALUE));
 953     }
 954   }
 955   static void test_divv(byte[] a0, byte[] a1, byte b) {
 956     for (int i = 0; i < a0.length; i+=1) {
 957       a0[i] = (byte)(a1[i]/b);
 958     }
 959   }
 960   static void test_diva(byte[] a0, byte[] a1, byte[] a2) {
 961     for (int i = 0; i < a0.length; i+=1) {
 962       a0[i] = (byte)(a1[i]/a2[i]);
 963     }
 964   }
 965 
 966   static void test_andc(byte[] a0, byte[] a1) {
 967     for (int i = 0; i < a0.length; i+=1) {
 968       a0[i] = (byte)(a1[i]&BIT_MASK);
 969     }
 970   }
 971   static void test_andv(byte[] a0, byte[] a1, byte b) {
 972     for (int i = 0; i < a0.length; i+=1) {
 973       a0[i] = (byte)(a1[i]&b);
 974     }
 975   }
 976   static void test_anda(byte[] a0, byte[] a1, byte[] a2) {
 977     for (int i = 0; i < a0.length; i+=1) {
 978       a0[i] = (byte)(a1[i]&a2[i]);
 979     }
 980   }
 981 
 982   static void test_orc(byte[] a0, byte[] a1) {
 983     for (int i = 0; i < a0.length; i+=1) {
 984       a0[i] = (byte)(a1[i]|BIT_MASK);
 985     }
 986   }
 987   static void test_orv(byte[] a0, byte[] a1, byte b) {
 988     for (int i = 0; i < a0.length; i+=1) {
 989       a0[i] = (byte)(a1[i]|b);
 990     }
 991   }
 992   static void test_ora(byte[] a0, byte[] a1, byte[] a2) {
 993     for (int i = 0; i < a0.length; i+=1) {
 994       a0[i] = (byte)(a1[i]|a2[i]);
 995     }
 996   }
 997 
 998   static void test_xorc(byte[] a0, byte[] a1) {
 999     for (int i = 0; i < a0.length; i+=1) {
1000       a0[i] = (byte)(a1[i]^BIT_MASK);
1001     }
1002   }
1003   static void test_xorv(byte[] a0, byte[] a1, byte b) {
1004     for (int i = 0; i < a0.length; i+=1) {
1005       a0[i] = (byte)(a1[i]^b);
1006     }
1007   }
1008   static void test_xora(byte[] a0, byte[] a1, byte[] a2) {
1009     for (int i = 0; i < a0.length; i+=1) {
1010       a0[i] = (byte)(a1[i]^a2[i]);
1011     }
1012   }
1013 
1014   static void test_sllc(byte[] a0, byte[] a1) {
1015     for (int i = 0; i < a0.length; i+=1) {
1016       a0[i] = (byte)(a1[i]<<VALUE);
1017     }
1018   }
1019   static void test_sllc_n(byte[] a0, byte[] a1) {
1020     for (int i = 0; i < a0.length; i+=1) {
1021       a0[i] = (byte)(a1[i]<<(-VALUE));
1022     }
1023   }
1024   static void test_sllc_o(byte[] a0, byte[] a1) {
1025     for (int i = 0; i < a0.length; i+=1) {
1026       a0[i] = (byte)(a1[i]<<SHIFT);
1027     }
1028   }
1029   static void test_sllc_on(byte[] a0, byte[] a1) {
1030     for (int i = 0; i < a0.length; i+=1) {
1031       a0[i] = (byte)(a1[i]<<(-SHIFT));
1032     }
1033   }
1034   static void test_sllv(byte[] a0, byte[] a1, int b) {
1035     for (int i = 0; i < a0.length; i+=1) {
1036       a0[i] = (byte)(a1[i]<<b);
1037     }
1038   }
1039 
1040   static void test_srlc(byte[] a0, byte[] a1) {
1041     for (int i = 0; i < a0.length; i+=1) {
1042       a0[i] = (byte)(a1[i]>>>VALUE);
1043     }
1044   }
1045   static void test_srlc_n(byte[] a0, byte[] a1) {
1046     for (int i = 0; i < a0.length; i+=1) {
1047       a0[i] = (byte)(a1[i]>>>(-VALUE));
1048     }
1049   }
1050   static void test_srlc_o(byte[] a0, byte[] a1) {
1051     for (int i = 0; i < a0.length; i+=1) {
1052       a0[i] = (byte)(a1[i]>>>SHIFT);
1053     }
1054   }
1055   static void test_srlc_on(byte[] a0, byte[] a1) {
1056     for (int i = 0; i < a0.length; i+=1) {
1057       a0[i] = (byte)(a1[i]>>>(-SHIFT));
1058     }
1059   }
1060   static void test_srlv(byte[] a0, byte[] a1, int b) {
1061     for (int i = 0; i < a0.length; i+=1) {
1062       a0[i] = (byte)(a1[i]>>>b);
1063     }
1064   }
1065 
1066   static void test_srac(byte[] a0, byte[] a1) {
1067     for (int i = 0; i < a0.length; i+=1) {
1068       a0[i] = (byte)(a1[i]>>VALUE);
1069     }
1070   }
1071   static void test_srac_n(byte[] a0, byte[] a1) {
1072     for (int i = 0; i < a0.length; i+=1) {
1073       a0[i] = (byte)(a1[i]>>(-VALUE));
1074     }
1075   }
1076   static void test_srac_o(byte[] a0, byte[] a1) {
1077     for (int i = 0; i < a0.length; i+=1) {
1078       a0[i] = (byte)(a1[i]>>SHIFT);
1079     }
1080   }
1081   static void test_srac_on(byte[] a0, byte[] a1) {
1082     for (int i = 0; i < a0.length; i+=1) {
1083       a0[i] = (byte)(a1[i]>>(-SHIFT));
1084     }
1085   }
1086   static void test_srav(byte[] a0, byte[] a1, int b) {
1087     for (int i = 0; i < a0.length; i+=1) {
1088       a0[i] = (byte)(a1[i]>>b);
1089     }
1090   }
1091 
1092   static void test_pack2(short[] p2, byte[] a1) {
1093     if (p2.length*2 > a1.length) return;
1094     for (int i = 0; i < p2.length; i+=1) {
1095       short l0 = (short)a1[i*2+0];
1096       short l1 = (short)a1[i*2+1];
1097       p2[i] = (short)((l1 << 8) | (l0 & 0xFF));
1098     }
1099   }
1100   static void test_unpack2(byte[] a0, short[] p2) {
1101     if (p2.length*2 > a0.length) return;
1102     for (int i = 0; i < p2.length; i+=1) {
1103       short l = p2[i];
1104       a0[i*2+0] = (byte)(l & 0xFF);
1105       a0[i*2+1] = (byte)(l >> 8);
1106     }
1107   }
1108   static void test_pack2_swap(short[] p2, byte[] a1) {
1109     if (p2.length*2 > a1.length) return;
1110     for (int i = 0; i < p2.length; i+=1) {
1111       short l0 = (short)a1[i*2+0];
1112       short l1 = (short)a1[i*2+1];
1113       p2[i] = (short)((l0 << 8) | (l1 & 0xFF));
1114     }
1115   }
1116   static void test_unpack2_swap(byte[] a0, short[] p2) {
1117     if (p2.length*2 > a0.length) return;
1118     for (int i = 0; i < p2.length; i+=1) {
1119       short l = p2[i];
1120       a0[i*2+0] = (byte)(l >> 8);
1121       a0[i*2+1] = (byte)(l & 0xFF);
1122     }
1123   }
1124 
1125   static void test_pack4(int[] p4, byte[] a1) {
1126     if (p4.length*4 > a1.length) return;
1127     for (int i = 0; i < p4.length; i+=1) {
1128       int l0 = (int)a1[i*4+0];
1129       int l1 = (int)a1[i*4+1];
1130       int l2 = (int)a1[i*4+2];
1131       int l3 = (int)a1[i*4+3];
1132       p4[i] = (l0 & 0xFF) |
1133              ((l1 & 0xFF) <<  8) |
1134              ((l2 & 0xFF) << 16) |
1135              ((l3 & 0xFF) << 24);
1136     }
1137   }
1138   static void test_unpack4(byte[] a0, int[] p4) {
1139     if (p4.length*4 > a0.length) return;
1140     for (int i = 0; i < p4.length; i+=1) {
1141       int l = p4[i];
1142       a0[i*4+0] = (byte)(l & 0xFF);
1143       a0[i*4+1] = (byte)(l >>  8);
1144       a0[i*4+2] = (byte)(l >> 16);
1145       a0[i*4+3] = (byte)(l >> 24);
1146     }
1147   }
1148   static void test_pack4_swap(int[] p4, byte[] a1) {
1149     if (p4.length*4 > a1.length) return;
1150     for (int i = 0; i < p4.length; i+=1) {
1151       int l0 = (int)a1[i*4+0];
1152       int l1 = (int)a1[i*4+1];
1153       int l2 = (int)a1[i*4+2];
1154       int l3 = (int)a1[i*4+3];
1155       p4[i] = (l3 & 0xFF) |
1156              ((l2 & 0xFF) <<  8) |
1157              ((l1 & 0xFF) << 16) |
1158              ((l0 & 0xFF) << 24);
1159     }
1160   }
1161   static void test_unpack4_swap(byte[] a0, int[] p4) {
1162     if (p4.length*4 > a0.length) return;
1163     for (int i = 0; i < p4.length; i+=1) {
1164       int l = p4[i];
1165       a0[i*4+0] = (byte)(l >> 24);
1166       a0[i*4+1] = (byte)(l >> 16);
1167       a0[i*4+2] = (byte)(l >>  8);
1168       a0[i*4+3] = (byte)(l & 0xFF);
1169     }
1170   }
1171 
1172   static void test_pack8(long[] p8, byte[] a1) {
1173     if (p8.length*8 > a1.length) return;
1174     for (int i = 0; i < p8.length; i+=1) {
1175       long l0 = (long)a1[i*8+0];
1176       long l1 = (long)a1[i*8+1];
1177       long l2 = (long)a1[i*8+2];
1178       long l3 = (long)a1[i*8+3];
1179       long l4 = (long)a1[i*8+4];
1180       long l5 = (long)a1[i*8+5];
1181       long l6 = (long)a1[i*8+6];
1182       long l7 = (long)a1[i*8+7];
1183       p8[i] = (l0 & 0xFFl) |
1184              ((l1 & 0xFFl) <<  8) |
1185              ((l2 & 0xFFl) << 16) |
1186              ((l3 & 0xFFl) << 24) |
1187              ((l4 & 0xFFl) << 32) |
1188              ((l5 & 0xFFl) << 40) |
1189              ((l6 & 0xFFl) << 48) |
1190              ((l7 & 0xFFl) << 56);
1191     }
1192   }
1193   static void test_unpack8(byte[] a0, long[] p8) {
1194     if (p8.length*8 > a0.length) return;
1195     for (int i = 0; i < p8.length; i+=1) {
1196       long l = p8[i];
1197       a0[i*8+0] = (byte)(l & 0xFFl);
1198       a0[i*8+1] = (byte)(l >>  8);
1199       a0[i*8+2] = (byte)(l >> 16);
1200       a0[i*8+3] = (byte)(l >> 24);
1201       a0[i*8+4] = (byte)(l >> 32);
1202       a0[i*8+5] = (byte)(l >> 40);
1203       a0[i*8+6] = (byte)(l >> 48);
1204       a0[i*8+7] = (byte)(l >> 56);
1205     }
1206   }
1207   static void test_pack8_swap(long[] p8, byte[] a1) {
1208     if (p8.length*8 > a1.length) return;
1209     for (int i = 0; i < p8.length; i+=1) {
1210       long l0 = (long)a1[i*8+0];
1211       long l1 = (long)a1[i*8+1];
1212       long l2 = (long)a1[i*8+2];
1213       long l3 = (long)a1[i*8+3];
1214       long l4 = (long)a1[i*8+4];
1215       long l5 = (long)a1[i*8+5];
1216       long l6 = (long)a1[i*8+6];
1217       long l7 = (long)a1[i*8+7];
1218       p8[i] = (l7 & 0xFFl) |
1219              ((l6 & 0xFFl) <<  8) |
1220              ((l5 & 0xFFl) << 16) |
1221              ((l4 & 0xFFl) << 24) |
1222              ((l3 & 0xFFl) << 32) |
1223              ((l2 & 0xFFl) << 40) |
1224              ((l1 & 0xFFl) << 48) |
1225              ((l0 & 0xFFl) << 56);
1226     }
1227   }
1228   static void test_unpack8_swap(byte[] a0, long[] p8) {
1229     if (p8.length*8 > a0.length) return;
1230     for (int i = 0; i < p8.length; i+=1) {
1231       long l = p8[i];
1232       a0[i*8+0] = (byte)(l >> 56);
1233       a0[i*8+1] = (byte)(l >> 48);
1234       a0[i*8+2] = (byte)(l >> 40);
1235       a0[i*8+3] = (byte)(l >> 32);
1236       a0[i*8+4] = (byte)(l >> 24);
1237       a0[i*8+5] = (byte)(l >> 16);
1238       a0[i*8+6] = (byte)(l >>  8);
1239       a0[i*8+7] = (byte)(l & 0xFFl);
1240     }
1241   }
1242 
1243   static int verify(String text, int i, byte elem, byte val) {
1244     if (elem != val) {
1245       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1246       return 1;
1247     }
1248     return 0;
1249   }
1250 
1251   static int verify(String text, int i, short elem, short val) {
1252     if (elem != val) {
1253       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1254       return 1;
1255     }
1256     return 0;
1257   }
1258 
1259   static int verify(String text, int i, int elem, int val) {
1260     if (elem != val) {
1261       System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val));
1262       return 1;
1263     }
1264     return 0;
1265   }
1266 
1267   static int verify(String text, int i, long elem, long val) {
1268     if (elem != val) {
1269       System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
1270       return 1;
1271     }
1272     return 0;
1273   }
1274 }