1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @bug 6340864
  27  * @summary Implement vectorization optimizations in hotspot-server
  28  *
  29  * @run main/othervm/timeout=400 -Xbatch -Xmx128m compiler.c2.cr6340864.TestByteVect
  30  * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=8 compiler.c2.cr6340864.TestByteVect
  31  * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=16 compiler.c2.cr6340864.TestByteVect
  32  * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=32 compiler.c2.cr6340864.TestByteVect
  33  */
  34 
  35 package compiler.c2.cr6340864;
  36 
  37 public class TestByteVect {
  38   private static final int ARRLEN = 997;
  39   private static final int ITERS  = 11000;
  40   private static final int ADD_INIT = 63;
  41   private static final int BIT_MASK = 0xB7;
  42   private static final int VALUE = 3;
  43   private static final int SHIFT = 8;
  44 
  45   public static void main(String args[]) {
  46     System.out.println("Testing Byte vectors");
  47     int errn = test();
  48     if (errn > 0) {
  49       System.err.println("FAILED: " + errn + " errors");
  50       System.exit(97);
  51     }
  52     System.out.println("PASSED");
  53   }
  54 
  55   static int test() {
  56     byte[] a0 = new byte[ARRLEN];
  57     byte[] a1 = new byte[ARRLEN];
  58     byte[] a2 = new byte[ARRLEN];
  59     byte[] a3 = new byte[ARRLEN];
  60     byte[] a4 = new byte[ARRLEN];
  61     short[] p2 = new short[ARRLEN/2];
  62       int[] p4 = new   int[ARRLEN/4];
  63      long[] p8 = new  long[ARRLEN/8];
  64     // Initialize
  65     int gold_sum = 0;
  66     for (int i=0; i<ARRLEN; i++) {
  67       byte val = (byte)(ADD_INIT+i);
  68       gold_sum += val;
  69       a1[i] = val;
  70       a2[i] = (byte)VALUE;
  71       a3[i] = (byte)-VALUE;
  72       a4[i] = (byte)BIT_MASK;
  73     }
  74     System.out.println("Warmup");
  75     for (int i=0; i<ITERS; i++) {
  76       test_sum(a1);
  77       test_addc(a0, a1);
  78       test_addv(a0, a1, (byte)VALUE);
  79       test_adda(a0, a1, a2);
  80       test_subc(a0, a1);
  81       test_subv(a0, a1, (byte)VALUE);
  82       test_suba(a0, a1, a2);
  83 
  84       test_mulc(a0, a1);
  85       test_mulv(a0, a1, (byte)VALUE);
  86       test_mula(a0, a1, a2);
  87       test_divc(a0, a1);
  88       test_divv(a0, a1, (byte)VALUE);
  89       test_diva(a0, a1, a2);
  90       test_mulc_n(a0, a1);
  91       test_mulv(a0, a1, (byte)-VALUE);
  92       test_mula(a0, a1, a3);
  93       test_divc_n(a0, a1);
  94       test_divv(a0, a1, (byte)-VALUE);
  95       test_diva(a0, a1, a3);
  96 
  97       test_andc(a0, a1);
  98       test_andv(a0, a1, (byte)BIT_MASK);
  99       test_anda(a0, a1, a4);
 100       test_orc(a0, a1);
 101       test_orv(a0, a1, (byte)BIT_MASK);
 102       test_ora(a0, a1, a4);
 103       test_xorc(a0, a1);
 104       test_xorv(a0, a1, (byte)BIT_MASK);
 105       test_xora(a0, a1, a4);
 106 
 107       test_sllc(a0, a1);
 108       test_sllv(a0, a1, VALUE);
 109       test_srlc(a0, a1);
 110       test_srlv(a0, a1, VALUE);
 111       test_srac(a0, a1);
 112       test_srav(a0, a1, VALUE);
 113 
 114       test_sllc_n(a0, a1);
 115       test_sllv(a0, a1, -VALUE);
 116       test_srlc_n(a0, a1);
 117       test_srlv(a0, a1, -VALUE);
 118       test_srac_n(a0, a1);
 119       test_srav(a0, a1, -VALUE);
 120 
 121       test_sllc_o(a0, a1);
 122       test_sllv(a0, a1, SHIFT);
 123       test_srlc_o(a0, a1);
 124       test_srlv(a0, a1, SHIFT);
 125       test_srac_o(a0, a1);
 126       test_srav(a0, a1, SHIFT);
 127 
 128       test_sllc_on(a0, a1);
 129       test_sllv(a0, a1, -SHIFT);
 130       test_srlc_on(a0, a1);
 131       test_srlv(a0, a1, -SHIFT);
 132       test_srac_on(a0, a1);
 133       test_srav(a0, a1, -SHIFT);
 134 
 135       test_sllc_add(a0, a1);
 136       test_sllv_add(a0, a1, ADD_INIT);
 137       test_srlc_add(a0, a1);
 138       test_srlv_add(a0, a1, ADD_INIT);
 139       test_srac_add(a0, a1);
 140       test_srav_add(a0, a1, ADD_INIT);
 141 
 142       test_sllc_and(a0, a1);
 143       test_sllv_and(a0, a1, BIT_MASK);
 144       test_srlc_and(a0, a1);
 145       test_srlv_and(a0, a1, BIT_MASK);
 146       test_srac_and(a0, a1);
 147       test_srav_and(a0, a1, BIT_MASK);
 148 
 149       test_pack2(p2, a1);
 150       test_unpack2(a0, p2);
 151       test_pack2_swap(p2, a1);
 152       test_unpack2_swap(a0, p2);
 153       test_pack4(p4, a1);
 154       test_unpack4(a0, p4);
 155       test_pack4_swap(p4, a1);
 156       test_unpack4_swap(a0, p4);
 157       test_pack8(p8, a1);
 158       test_unpack8(a0, p8);
 159       test_pack8_swap(p8, a1);
 160       test_unpack8_swap(a0, p8);
 161     }
 162     // Test and verify results
 163     System.out.println("Verification");
 164     int errn = 0;
 165     {
 166       int sum = test_sum(a1);
 167       if (sum != gold_sum) {
 168         System.err.println("test_sum:  " + sum + " != " + gold_sum);
 169         errn++;
 170       }
 171 
 172       test_addc(a0, a1);
 173       for (int i=0; i<ARRLEN; i++) {
 174         errn += verify("test_addc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 175       }
 176       test_addv(a0, a1, (byte)VALUE);
 177       for (int i=0; i<ARRLEN; i++) {
 178         errn += verify("test_addv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 179       }
 180       test_adda(a0, a1, a2);
 181       for (int i=0; i<ARRLEN; i++) {
 182         errn += verify("test_adda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 183       }
 184 
 185       test_subc(a0, a1);
 186       for (int i=0; i<ARRLEN; i++) {
 187         errn += verify("test_subc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 188       }
 189       test_subv(a0, a1, (byte)VALUE);
 190       for (int i=0; i<ARRLEN; i++) {
 191         errn += verify("test_subv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 192       }
 193       test_suba(a0, a1, a2);
 194       for (int i=0; i<ARRLEN; i++) {
 195         errn += verify("test_suba: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 196       }
 197 
 198       test_mulc(a0, a1);
 199       for (int i=0; i<ARRLEN; i++) {
 200         errn += verify("test_mulc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 201       }
 202       test_mulv(a0, a1, (byte)VALUE);
 203       for (int i=0; i<ARRLEN; i++) {
 204         errn += verify("test_mulv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 205       }
 206       test_mula(a0, a1, a2);
 207       for (int i=0; i<ARRLEN; i++) {
 208         errn += verify("test_mula: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 209       }
 210 
 211       test_divc(a0, a1);
 212       for (int i=0; i<ARRLEN; i++) {
 213         errn += verify("test_divc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 214       }
 215       test_divv(a0, a1, (byte)VALUE);
 216       for (int i=0; i<ARRLEN; i++) {
 217         errn += verify("test_divv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 218       }
 219       test_diva(a0, a1, a2);
 220       for (int i=0; i<ARRLEN; i++) {
 221         errn += verify("test_diva: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 222       }
 223 
 224       test_mulc_n(a0, a1);
 225       for (int i=0; i<ARRLEN; i++) {
 226         errn += verify("test_mulc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 227       }
 228       test_mulv(a0, a1, (byte)-VALUE);
 229       for (int i=0; i<ARRLEN; i++) {
 230         errn += verify("test_mulv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 231       }
 232       test_mula(a0, a1, a3);
 233       for (int i=0; i<ARRLEN; i++) {
 234         errn += verify("test_mula_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 235       }
 236 
 237       test_divc_n(a0, a1);
 238       for (int i=0; i<ARRLEN; i++) {
 239         errn += verify("test_divc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 240       }
 241       test_divv(a0, a1, (byte)-VALUE);
 242       for (int i=0; i<ARRLEN; i++) {
 243         errn += verify("test_divv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 244       }
 245       test_diva(a0, a1, a3);
 246       for (int i=0; i<ARRLEN; i++) {
 247         errn += verify("test_diva_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 248       }
 249 
 250       test_andc(a0, a1);
 251       for (int i=0; i<ARRLEN; i++) {
 252         errn += verify("test_andc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 253       }
 254       test_andv(a0, a1, (byte)BIT_MASK);
 255       for (int i=0; i<ARRLEN; i++) {
 256         errn += verify("test_andv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 257       }
 258       test_anda(a0, a1, a4);
 259       for (int i=0; i<ARRLEN; i++) {
 260         errn += verify("test_anda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 261       }
 262 
 263       test_orc(a0, a1);
 264       for (int i=0; i<ARRLEN; i++) {
 265         errn += verify("test_orc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 266       }
 267       test_orv(a0, a1, (byte)BIT_MASK);
 268       for (int i=0; i<ARRLEN; i++) {
 269         errn += verify("test_orv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 270       }
 271       test_ora(a0, a1, a4);
 272       for (int i=0; i<ARRLEN; i++) {
 273         errn += verify("test_ora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 274       }
 275 
 276       test_xorc(a0, a1);
 277       for (int i=0; i<ARRLEN; i++) {
 278         errn += verify("test_xorc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 279       }
 280       test_xorv(a0, a1, (byte)BIT_MASK);
 281       for (int i=0; i<ARRLEN; i++) {
 282         errn += verify("test_xorv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 283       }
 284       test_xora(a0, a1, a4);
 285       for (int i=0; i<ARRLEN; i++) {
 286         errn += verify("test_xora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 287       }
 288 
 289       test_sllc(a0, a1);
 290       for (int i=0; i<ARRLEN; i++) {
 291         errn += verify("test_sllc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
 292       }
 293       test_sllv(a0, a1, VALUE);
 294       for (int i=0; i<ARRLEN; i++) {
 295         errn += verify("test_sllv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
 296       }
 297 
 298       test_srlc(a0, a1);
 299       for (int i=0; i<ARRLEN; i++) {
 300         errn += verify("test_srlc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
 301       }
 302       test_srlv(a0, a1, VALUE);
 303       for (int i=0; i<ARRLEN; i++) {
 304         errn += verify("test_srlv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
 305       }
 306 
 307       test_srac(a0, a1);
 308       for (int i=0; i<ARRLEN; i++) {
 309         errn += verify("test_srac: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
 310       }
 311       test_srav(a0, a1, VALUE);
 312       for (int i=0; i<ARRLEN; i++) {
 313         errn += verify("test_srav: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
 314       }
 315 
 316       test_sllc_n(a0, a1);
 317       for (int i=0; i<ARRLEN; i++) {
 318         errn += verify("test_sllc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
 319       }
 320       test_sllv(a0, a1, -VALUE);
 321       for (int i=0; i<ARRLEN; i++) {
 322         errn += verify("test_sllv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
 323       }
 324 
 325       test_srlc_n(a0, a1);
 326       for (int i=0; i<ARRLEN; i++) {
 327         errn += verify("test_srlc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
 328       }
 329       test_srlv(a0, a1, -VALUE);
 330       for (int i=0; i<ARRLEN; i++) {
 331         errn += verify("test_srlv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
 332       }
 333 
 334       test_srac_n(a0, a1);
 335       for (int i=0; i<ARRLEN; i++) {
 336         errn += verify("test_srac_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
 337       }
 338       test_srav(a0, a1, -VALUE);
 339       for (int i=0; i<ARRLEN; i++) {
 340         errn += verify("test_srav_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
 341       }
 342 
 343       test_sllc_o(a0, a1);
 344       for (int i=0; i<ARRLEN; i++) {
 345         errn += verify("test_sllc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
 346       }
 347       test_sllv(a0, a1, SHIFT);
 348       for (int i=0; i<ARRLEN; i++) {
 349         errn += verify("test_sllv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
 350       }
 351 
 352       test_srlc_o(a0, a1);
 353       for (int i=0; i<ARRLEN; i++) {
 354         errn += verify("test_srlc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
 355       }
 356       test_srlv(a0, a1, SHIFT);
 357       for (int i=0; i<ARRLEN; i++) {
 358         errn += verify("test_srlv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
 359       }
 360 
 361       test_srac_o(a0, a1);
 362       for (int i=0; i<ARRLEN; i++) {
 363         errn += verify("test_srac_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
 364       }
 365       test_srav(a0, a1, SHIFT);
 366       for (int i=0; i<ARRLEN; i++) {
 367         errn += verify("test_srav_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
 368       }
 369 
 370       test_sllc_on(a0, a1);
 371       for (int i=0; i<ARRLEN; i++) {
 372         errn += verify("test_sllc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
 373       }
 374       test_sllv(a0, a1, -SHIFT);
 375       for (int i=0; i<ARRLEN; i++) {
 376         errn += verify("test_sllv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
 377       }
 378 
 379       test_srlc_on(a0, a1);
 380       for (int i=0; i<ARRLEN; i++) {
 381         errn += verify("test_srlc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
 382       }
 383       test_srlv(a0, a1, -SHIFT);
 384       for (int i=0; i<ARRLEN; i++) {
 385         errn += verify("test_srlv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
 386       }
 387 
 388       test_srac_on(a0, a1);
 389       for (int i=0; i<ARRLEN; i++) {
 390         errn += verify("test_srac_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
 391       }
 392       test_srav(a0, a1, -SHIFT);
 393       for (int i=0; i<ARRLEN; i++) {
 394         errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
 395       }
 396 
 397       test_sllc_add(a0, a1);
 398       for (int i=0; i<ARRLEN; i++) {
 399         errn += verify("test_sllc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
 400       }
 401       test_sllv_add(a0, a1, ADD_INIT);
 402       for (int i=0; i<ARRLEN; i++) {
 403         errn += verify("test_sllv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
 404       }
 405 
 406       test_srlc_add(a0, a1);
 407       for (int i=0; i<ARRLEN; i++) {
 408         errn += verify("test_srlc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
 409       }
 410       test_srlv_add(a0, a1, ADD_INIT);
 411       for (int i=0; i<ARRLEN; i++) {
 412         errn += verify("test_srlv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
 413       }
 414 
 415       test_srac_add(a0, a1);
 416       for (int i=0; i<ARRLEN; i++) {
 417         errn += verify("test_srac_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
 418       }
 419       test_srav_add(a0, a1, ADD_INIT);
 420       for (int i=0; i<ARRLEN; i++) {
 421         errn += verify("test_srav_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
 422       }
 423 
 424       test_sllc_and(a0, a1);
 425       for (int i=0; i<ARRLEN; i++) {
 426         errn += verify("test_sllc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
 427       }
 428       test_sllv_and(a0, a1, BIT_MASK);
 429       for (int i=0; i<ARRLEN; i++) {
 430         errn += verify("test_sllv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
 431       }
 432 
 433       test_srlc_and(a0, a1);
 434       for (int i=0; i<ARRLEN; i++) {
 435         errn += verify("test_srlc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
 436       }
 437       test_srlv_and(a0, a1, BIT_MASK);
 438       for (int i=0; i<ARRLEN; i++) {
 439         errn += verify("test_srlv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
 440       }
 441 
 442       test_srac_and(a0, a1);
 443       for (int i=0; i<ARRLEN; i++) {
 444         errn += verify("test_srac_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
 445       }
 446       test_srav_and(a0, a1, BIT_MASK);
 447       for (int i=0; i<ARRLEN; i++) {
 448         errn += verify("test_srav_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
 449       }
 450 
 451       test_pack2(p2, a1);
 452       for (int i=0; i<ARRLEN/2; i++) {
 453         errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
 454       }
 455       for (int i=0; i<ARRLEN; i++) {
 456         a0[i] = -1;
 457       }
 458       test_unpack2(a0, p2);
 459       for (int i=0; i<(ARRLEN&(-2)); i++) {
 460         errn += verify("test_unpack2: ", i, a0[i], (byte)(ADD_INIT+i));
 461       }
 462 
 463       test_pack2_swap(p2, a1);
 464       for (int i=0; i<ARRLEN/2; i++) {
 465         errn += verify("test_pack2_swap: ", i, p2[i], (short)(((short)(ADD_INIT+2*i+1) & 0xFF) | ((short)(ADD_INIT+2*i) << 8)));
 466       }
 467       for (int i=0; i<ARRLEN; i++) {
 468         a0[i] = -1;
 469       }
 470       test_unpack2_swap(a0, p2);
 471       for (int i=0; i<(ARRLEN&(-2)); i++) {
 472         errn += verify("test_unpack2_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 473       }
 474 
 475       test_pack4(p4, a1);
 476       for (int i=0; i<ARRLEN/4; i++) {
 477         errn += verify("test_pack4: ", i, p4[i],  ((int)(ADD_INIT+4*i+0) & 0xFF) |
 478                                                  (((int)(ADD_INIT+4*i+1) & 0xFF) <<  8)  |
 479                                                  (((int)(ADD_INIT+4*i+2) & 0xFF) << 16)  |
 480                                                  (((int)(ADD_INIT+4*i+3) & 0xFF) << 24));
 481       }
 482       for (int i=0; i<ARRLEN; i++) {
 483         a0[i] = -1;
 484       }
 485       test_unpack4(a0, p4);
 486       for (int i=0; i<(ARRLEN&(-4)); i++) {
 487         errn += verify("test_unpack4: ", i, a0[i], (byte)(ADD_INIT+i));
 488       }
 489 
 490       test_pack4_swap(p4, a1);
 491       for (int i=0; i<ARRLEN/4; i++) {
 492         errn += verify("test_pack4_swap: ", i, p4[i],  ((int)(ADD_INIT+4*i+3) & 0xFF) |
 493                                                       (((int)(ADD_INIT+4*i+2) & 0xFF) <<  8)  |
 494                                                       (((int)(ADD_INIT+4*i+1) & 0xFF) << 16)  |
 495                                                       (((int)(ADD_INIT+4*i+0) & 0xFF) << 24));
 496       }
 497       for (int i=0; i<ARRLEN; i++) {
 498         a0[i] = -1;
 499       }
 500       test_unpack4_swap(a0, p4);
 501       for (int i=0; i<(ARRLEN&(-4)); i++) {
 502         errn += verify("test_unpack4_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 503       }
 504 
 505       test_pack8(p8, a1);
 506       for (int i=0; i<ARRLEN/8; i++) {
 507         errn += verify("test_pack8: ", i, p8[i],  ((long)(ADD_INIT+8*i+0) & 0xFFl) |
 508                                                  (((long)(ADD_INIT+8*i+1) & 0xFFl) <<  8)  |
 509                                                  (((long)(ADD_INIT+8*i+2) & 0xFFl) << 16)  |
 510                                                  (((long)(ADD_INIT+8*i+3) & 0xFFl) << 24)  |
 511                                                  (((long)(ADD_INIT+8*i+4) & 0xFFl) << 32)  |
 512                                                  (((long)(ADD_INIT+8*i+5) & 0xFFl) << 40)  |
 513                                                  (((long)(ADD_INIT+8*i+6) & 0xFFl) << 48)  |
 514                                                  (((long)(ADD_INIT+8*i+7) & 0xFFl) << 56));
 515       }
 516       for (int i=0; i<ARRLEN; i++) {
 517         a0[i] = -1;
 518       }
 519       test_unpack8(a0, p8);
 520       for (int i=0; i<(ARRLEN&(-8)); i++) {
 521         errn += verify("test_unpack8: ", i, a0[i], (byte)(ADD_INIT+i));
 522       }
 523 
 524       test_pack8_swap(p8, a1);
 525       for (int i=0; i<ARRLEN/8; i++) {
 526         errn += verify("test_pack8_swap: ", i, p8[i],  ((long)(ADD_INIT+8*i+7) & 0xFFl) |
 527                                                       (((long)(ADD_INIT+8*i+6) & 0xFFl) <<  8)  |
 528                                                       (((long)(ADD_INIT+8*i+5) & 0xFFl) << 16)  |
 529                                                       (((long)(ADD_INIT+8*i+4) & 0xFFl) << 24)  |
 530                                                       (((long)(ADD_INIT+8*i+3) & 0xFFl) << 32)  |
 531                                                       (((long)(ADD_INIT+8*i+2) & 0xFFl) << 40)  |
 532                                                       (((long)(ADD_INIT+8*i+1) & 0xFFl) << 48)  |
 533                                                       (((long)(ADD_INIT+8*i+0) & 0xFFl) << 56));
 534       }
 535       for (int i=0; i<ARRLEN; i++) {
 536         a0[i] = -1;
 537       }
 538       test_unpack8_swap(a0, p8);
 539       for (int i=0; i<(ARRLEN&(-8)); i++) {
 540         errn += verify("test_unpack8_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 541       }
 542 
 543     }
 544 
 545     if (errn > 0)
 546       return errn;
 547 
 548     System.out.println("Time");
 549     long start, end;
 550 
 551     start = System.currentTimeMillis();
 552     for (int i=0; i<ITERS; i++) {
 553       test_sum(a1);
 554     }
 555     end = System.currentTimeMillis();
 556     System.out.println("test_sum: " + (end - start));
 557 
 558     start = System.currentTimeMillis();
 559     for (int i=0; i<ITERS; i++) {
 560       test_addc(a0, a1);
 561     }
 562     end = System.currentTimeMillis();
 563     System.out.println("test_addc: " + (end - start));
 564     start = System.currentTimeMillis();
 565     for (int i=0; i<ITERS; i++) {
 566       test_addv(a0, a1, (byte)VALUE);
 567     }
 568     end = System.currentTimeMillis();
 569     System.out.println("test_addv: " + (end - start));
 570     start = System.currentTimeMillis();
 571     for (int i=0; i<ITERS; i++) {
 572       test_adda(a0, a1, a2);
 573     }
 574     end = System.currentTimeMillis();
 575     System.out.println("test_adda: " + (end - start));
 576 
 577     start = System.currentTimeMillis();
 578     for (int i=0; i<ITERS; i++) {
 579       test_subc(a0, a1);
 580     }
 581     end = System.currentTimeMillis();
 582     System.out.println("test_subc: " + (end - start));
 583     start = System.currentTimeMillis();
 584     for (int i=0; i<ITERS; i++) {
 585       test_subv(a0, a1, (byte)VALUE);
 586     }
 587     end = System.currentTimeMillis();
 588     System.out.println("test_subv: " + (end - start));
 589     start = System.currentTimeMillis();
 590     for (int i=0; i<ITERS; i++) {
 591       test_suba(a0, a1, a2);
 592     }
 593     end = System.currentTimeMillis();
 594     System.out.println("test_suba: " + (end - start));
 595 
 596     start = System.currentTimeMillis();
 597     for (int i=0; i<ITERS; i++) {
 598       test_mulc(a0, a1);
 599     }
 600     end = System.currentTimeMillis();
 601     System.out.println("test_mulc: " + (end - start));
 602     start = System.currentTimeMillis();
 603     for (int i=0; i<ITERS; i++) {
 604       test_mulv(a0, a1, (byte)VALUE);
 605     }
 606     end = System.currentTimeMillis();
 607     System.out.println("test_mulv: " + (end - start));
 608     start = System.currentTimeMillis();
 609     for (int i=0; i<ITERS; i++) {
 610       test_mula(a0, a1, a2);
 611     }
 612     end = System.currentTimeMillis();
 613     System.out.println("test_mula: " + (end - start));
 614 
 615     start = System.currentTimeMillis();
 616     for (int i=0; i<ITERS; i++) {
 617       test_divc(a0, a1);
 618     }
 619     end = System.currentTimeMillis();
 620     System.out.println("test_divc: " + (end - start));
 621     start = System.currentTimeMillis();
 622     for (int i=0; i<ITERS; i++) {
 623       test_divv(a0, a1, (byte)VALUE);
 624     }
 625     end = System.currentTimeMillis();
 626     System.out.println("test_divv: " + (end - start));
 627     start = System.currentTimeMillis();
 628     for (int i=0; i<ITERS; i++) {
 629       test_diva(a0, a1, a2);
 630     }
 631     end = System.currentTimeMillis();
 632     System.out.println("test_diva: " + (end - start));
 633 
 634     start = System.currentTimeMillis();
 635     for (int i=0; i<ITERS; i++) {
 636       test_mulc_n(a0, a1);
 637     }
 638     end = System.currentTimeMillis();
 639     System.out.println("test_mulc_n: " + (end - start));
 640     start = System.currentTimeMillis();
 641     for (int i=0; i<ITERS; i++) {
 642       test_mulv(a0, a1, (byte)-VALUE);
 643     }
 644     end = System.currentTimeMillis();
 645     System.out.println("test_mulv_n: " + (end - start));
 646     start = System.currentTimeMillis();
 647     for (int i=0; i<ITERS; i++) {
 648       test_mula(a0, a1, a3);
 649     }
 650     end = System.currentTimeMillis();
 651     System.out.println("test_mula_n: " + (end - start));
 652 
 653     start = System.currentTimeMillis();
 654     for (int i=0; i<ITERS; i++) {
 655       test_divc_n(a0, a1);
 656     }
 657     end = System.currentTimeMillis();
 658     System.out.println("test_divc_n: " + (end - start));
 659     start = System.currentTimeMillis();
 660     for (int i=0; i<ITERS; i++) {
 661       test_divv(a0, a1, (byte)-VALUE);
 662     }
 663     end = System.currentTimeMillis();
 664     System.out.println("test_divv_n: " + (end - start));
 665     start = System.currentTimeMillis();
 666     for (int i=0; i<ITERS; i++) {
 667       test_diva(a0, a1, a3);
 668     }
 669     end = System.currentTimeMillis();
 670     System.out.println("test_diva_n: " + (end - start));
 671 
 672     start = System.currentTimeMillis();
 673     for (int i=0; i<ITERS; i++) {
 674       test_andc(a0, a1);
 675     }
 676     end = System.currentTimeMillis();
 677     System.out.println("test_andc: " + (end - start));
 678     start = System.currentTimeMillis();
 679     for (int i=0; i<ITERS; i++) {
 680       test_andv(a0, a1, (byte)BIT_MASK);
 681     }
 682     end = System.currentTimeMillis();
 683     System.out.println("test_andv: " + (end - start));
 684     start = System.currentTimeMillis();
 685     for (int i=0; i<ITERS; i++) {
 686       test_anda(a0, a1, a4);
 687     }
 688     end = System.currentTimeMillis();
 689     System.out.println("test_anda: " + (end - start));
 690 
 691     start = System.currentTimeMillis();
 692     for (int i=0; i<ITERS; i++) {
 693       test_orc(a0, a1);
 694     }
 695     end = System.currentTimeMillis();
 696     System.out.println("test_orc: " + (end - start));
 697     start = System.currentTimeMillis();
 698     for (int i=0; i<ITERS; i++) {
 699       test_orv(a0, a1, (byte)BIT_MASK);
 700     }
 701     end = System.currentTimeMillis();
 702     System.out.println("test_orv: " + (end - start));
 703     start = System.currentTimeMillis();
 704     for (int i=0; i<ITERS; i++) {
 705       test_ora(a0, a1, a4);
 706     }
 707     end = System.currentTimeMillis();
 708     System.out.println("test_ora: " + (end - start));
 709 
 710     start = System.currentTimeMillis();
 711     for (int i=0; i<ITERS; i++) {
 712       test_xorc(a0, a1);
 713     }
 714     end = System.currentTimeMillis();
 715     System.out.println("test_xorc: " + (end - start));
 716     start = System.currentTimeMillis();
 717     for (int i=0; i<ITERS; i++) {
 718       test_xorv(a0, a1, (byte)BIT_MASK);
 719     }
 720     end = System.currentTimeMillis();
 721     System.out.println("test_xorv: " + (end - start));
 722     start = System.currentTimeMillis();
 723     for (int i=0; i<ITERS; i++) {
 724       test_xora(a0, a1, a4);
 725     }
 726     end = System.currentTimeMillis();
 727     System.out.println("test_xora: " + (end - start));
 728 
 729     start = System.currentTimeMillis();
 730     for (int i=0; i<ITERS; i++) {
 731       test_sllc(a0, a1);
 732     }
 733     end = System.currentTimeMillis();
 734     System.out.println("test_sllc: " + (end - start));
 735     start = System.currentTimeMillis();
 736     for (int i=0; i<ITERS; i++) {
 737       test_sllv(a0, a1, VALUE);
 738     }
 739     end = System.currentTimeMillis();
 740     System.out.println("test_sllv: " + (end - start));
 741 
 742     start = System.currentTimeMillis();
 743     for (int i=0; i<ITERS; i++) {
 744       test_srlc(a0, a1);
 745     }
 746     end = System.currentTimeMillis();
 747     System.out.println("test_srlc: " + (end - start));
 748     start = System.currentTimeMillis();
 749     for (int i=0; i<ITERS; i++) {
 750       test_srlv(a0, a1, VALUE);
 751     }
 752     end = System.currentTimeMillis();
 753     System.out.println("test_srlv: " + (end - start));
 754 
 755     start = System.currentTimeMillis();
 756     for (int i=0; i<ITERS; i++) {
 757       test_srac(a0, a1);
 758     }
 759     end = System.currentTimeMillis();
 760     System.out.println("test_srac: " + (end - start));
 761     start = System.currentTimeMillis();
 762     for (int i=0; i<ITERS; i++) {
 763       test_srav(a0, a1, VALUE);
 764     }
 765     end = System.currentTimeMillis();
 766     System.out.println("test_srav: " + (end - start));
 767 
 768     start = System.currentTimeMillis();
 769     for (int i=0; i<ITERS; i++) {
 770       test_sllc_n(a0, a1);
 771     }
 772     end = System.currentTimeMillis();
 773     System.out.println("test_sllc_n: " + (end - start));
 774     start = System.currentTimeMillis();
 775     for (int i=0; i<ITERS; i++) {
 776       test_sllv(a0, a1, -VALUE);
 777     }
 778     end = System.currentTimeMillis();
 779     System.out.println("test_sllv_n: " + (end - start));
 780 
 781     start = System.currentTimeMillis();
 782     for (int i=0; i<ITERS; i++) {
 783       test_srlc_n(a0, a1);
 784     }
 785     end = System.currentTimeMillis();
 786     System.out.println("test_srlc_n: " + (end - start));
 787     start = System.currentTimeMillis();
 788     for (int i=0; i<ITERS; i++) {
 789       test_srlv(a0, a1, -VALUE);
 790     }
 791     end = System.currentTimeMillis();
 792     System.out.println("test_srlv_n: " + (end - start));
 793 
 794     start = System.currentTimeMillis();
 795     for (int i=0; i<ITERS; i++) {
 796       test_srac_n(a0, a1);
 797     }
 798     end = System.currentTimeMillis();
 799     System.out.println("test_srac_n: " + (end - start));
 800     start = System.currentTimeMillis();
 801     for (int i=0; i<ITERS; i++) {
 802       test_srav(a0, a1, -VALUE);
 803     }
 804     end = System.currentTimeMillis();
 805     System.out.println("test_srav_n: " + (end - start));
 806 
 807     start = System.currentTimeMillis();
 808     for (int i=0; i<ITERS; i++) {
 809       test_sllc_o(a0, a1);
 810     }
 811     end = System.currentTimeMillis();
 812     System.out.println("test_sllc_o: " + (end - start));
 813     start = System.currentTimeMillis();
 814     for (int i=0; i<ITERS; i++) {
 815       test_sllv(a0, a1, SHIFT);
 816     }
 817     end = System.currentTimeMillis();
 818     System.out.println("test_sllv_o: " + (end - start));
 819 
 820     start = System.currentTimeMillis();
 821     for (int i=0; i<ITERS; i++) {
 822       test_srlc_o(a0, a1);
 823     }
 824     end = System.currentTimeMillis();
 825     System.out.println("test_srlc_o: " + (end - start));
 826     start = System.currentTimeMillis();
 827     for (int i=0; i<ITERS; i++) {
 828       test_srlv(a0, a1, SHIFT);
 829     }
 830     end = System.currentTimeMillis();
 831     System.out.println("test_srlv_o: " + (end - start));
 832 
 833     start = System.currentTimeMillis();
 834     for (int i=0; i<ITERS; i++) {
 835       test_srac_o(a0, a1);
 836     }
 837     end = System.currentTimeMillis();
 838     System.out.println("test_srac_o: " + (end - start));
 839     start = System.currentTimeMillis();
 840     for (int i=0; i<ITERS; i++) {
 841       test_srav(a0, a1, SHIFT);
 842     }
 843     end = System.currentTimeMillis();
 844     System.out.println("test_srav_o: " + (end - start));
 845 
 846     start = System.currentTimeMillis();
 847     for (int i=0; i<ITERS; i++) {
 848       test_sllc_on(a0, a1);
 849     }
 850     end = System.currentTimeMillis();
 851     System.out.println("test_sllc_on: " + (end - start));
 852     start = System.currentTimeMillis();
 853     for (int i=0; i<ITERS; i++) {
 854       test_sllv(a0, a1, -SHIFT);
 855     }
 856     end = System.currentTimeMillis();
 857     System.out.println("test_sllv_on: " + (end - start));
 858 
 859     start = System.currentTimeMillis();
 860     for (int i=0; i<ITERS; i++) {
 861       test_srlc_on(a0, a1);
 862     }
 863     end = System.currentTimeMillis();
 864     System.out.println("test_srlc_on: " + (end - start));
 865     start = System.currentTimeMillis();
 866     for (int i=0; i<ITERS; i++) {
 867       test_srlv(a0, a1, -SHIFT);
 868     }
 869     end = System.currentTimeMillis();
 870     System.out.println("test_srlv_on: " + (end - start));
 871 
 872     start = System.currentTimeMillis();
 873     for (int i=0; i<ITERS; i++) {
 874       test_srac_on(a0, a1);
 875     }
 876     end = System.currentTimeMillis();
 877     System.out.println("test_srac_on: " + (end - start));
 878     start = System.currentTimeMillis();
 879     for (int i=0; i<ITERS; i++) {
 880       test_srav(a0, a1, -SHIFT);
 881     }
 882     end = System.currentTimeMillis();
 883     System.out.println("test_srav_on: " + (end - start));
 884 
 885     start = System.currentTimeMillis();
 886     for (int i=0; i<ITERS; i++) {
 887       test_sllc_add(a0, a1);
 888     }
 889     end = System.currentTimeMillis();
 890     System.out.println("test_sllc_add: " + (end - start));
 891     start = System.currentTimeMillis();
 892     for (int i=0; i<ITERS; i++) {
 893       test_sllv_add(a0, a1, ADD_INIT);
 894     }
 895     end = System.currentTimeMillis();
 896     System.out.println("test_sllv_add: " + (end - start));
 897 
 898     start = System.currentTimeMillis();
 899     for (int i=0; i<ITERS; i++) {
 900       test_srlc_add(a0, a1);
 901     }
 902     end = System.currentTimeMillis();
 903     System.out.println("test_srlc_add: " + (end - start));
 904     start = System.currentTimeMillis();
 905     for (int i=0; i<ITERS; i++) {
 906       test_srlv_add(a0, a1, ADD_INIT);
 907     }
 908     end = System.currentTimeMillis();
 909     System.out.println("test_srlv_add: " + (end - start));
 910 
 911     start = System.currentTimeMillis();
 912     for (int i=0; i<ITERS; i++) {
 913       test_srac_add(a0, a1);
 914     }
 915     end = System.currentTimeMillis();
 916     System.out.println("test_srac_add: " + (end - start));
 917     start = System.currentTimeMillis();
 918     for (int i=0; i<ITERS; i++) {
 919       test_srav_add(a0, a1, ADD_INIT);
 920     }
 921     end = System.currentTimeMillis();
 922     System.out.println("test_srav_add: " + (end - start));
 923 
 924     start = System.currentTimeMillis();
 925     for (int i=0; i<ITERS; i++) {
 926       test_sllc_and(a0, a1);
 927     }
 928     end = System.currentTimeMillis();
 929     System.out.println("test_sllc_and: " + (end - start));
 930     start = System.currentTimeMillis();
 931     for (int i=0; i<ITERS; i++) {
 932       test_sllv_and(a0, a1, BIT_MASK);
 933     }
 934     end = System.currentTimeMillis();
 935     System.out.println("test_sllv_and: " + (end - start));
 936 
 937     start = System.currentTimeMillis();
 938     for (int i=0; i<ITERS; i++) {
 939       test_srlc_and(a0, a1);
 940     }
 941     end = System.currentTimeMillis();
 942     System.out.println("test_srlc_and: " + (end - start));
 943     start = System.currentTimeMillis();
 944     for (int i=0; i<ITERS; i++) {
 945       test_srlv_and(a0, a1, BIT_MASK);
 946     }
 947     end = System.currentTimeMillis();
 948     System.out.println("test_srlv_and: " + (end - start));
 949 
 950     start = System.currentTimeMillis();
 951     for (int i=0; i<ITERS; i++) {
 952       test_srac_and(a0, a1);
 953     }
 954     end = System.currentTimeMillis();
 955     System.out.println("test_srac_and: " + (end - start));
 956     start = System.currentTimeMillis();
 957     for (int i=0; i<ITERS; i++) {
 958       test_srav_and(a0, a1, BIT_MASK);
 959     }
 960     end = System.currentTimeMillis();
 961     System.out.println("test_srav_and: " + (end - start));
 962 
 963     start = System.currentTimeMillis();
 964     for (int i=0; i<ITERS; i++) {
 965       test_pack2(p2, a1);
 966     }
 967     end = System.currentTimeMillis();
 968     System.out.println("test_pack2: " + (end - start));
 969     start = System.currentTimeMillis();
 970     for (int i=0; i<ITERS; i++) {
 971       test_unpack2(a0, p2);
 972     }
 973     end = System.currentTimeMillis();
 974     System.out.println("test_unpack2: " + (end - start));
 975     start = System.currentTimeMillis();
 976     for (int i=0; i<ITERS; i++) {
 977       test_pack2_swap(p2, a1);
 978     }
 979     end = System.currentTimeMillis();
 980     System.out.println("test_pack2_swap: " + (end - start));
 981     start = System.currentTimeMillis();
 982     for (int i=0; i<ITERS; i++) {
 983       test_unpack2_swap(a0, p2);
 984     }
 985     end = System.currentTimeMillis();
 986     System.out.println("test_unpack2_swap: " + (end - start));
 987 
 988     start = System.currentTimeMillis();
 989     for (int i=0; i<ITERS; i++) {
 990       test_pack4(p4, a1);
 991     }
 992     end = System.currentTimeMillis();
 993     System.out.println("test_pack4: " + (end - start));
 994     start = System.currentTimeMillis();
 995     for (int i=0; i<ITERS; i++) {
 996       test_unpack4(a0, p4);
 997     }
 998     end = System.currentTimeMillis();
 999     System.out.println("test_unpack4: " + (end - start));
1000     start = System.currentTimeMillis();
1001     for (int i=0; i<ITERS; i++) {
1002       test_pack4_swap(p4, a1);
1003     }
1004     end = System.currentTimeMillis();
1005     System.out.println("test_pack4_swap: " + (end - start));
1006     start = System.currentTimeMillis();
1007     for (int i=0; i<ITERS; i++) {
1008       test_unpack4_swap(a0, p4);
1009     }
1010     end = System.currentTimeMillis();
1011     System.out.println("test_unpack4_swap: " + (end - start));
1012 
1013     start = System.currentTimeMillis();
1014     for (int i=0; i<ITERS; i++) {
1015       test_pack8(p8, a1);
1016     }
1017     end = System.currentTimeMillis();
1018     System.out.println("test_pack8: " + (end - start));
1019     start = System.currentTimeMillis();
1020     for (int i=0; i<ITERS; i++) {
1021       test_unpack8(a0, p8);
1022     }
1023     end = System.currentTimeMillis();
1024     System.out.println("test_unpack8: " + (end - start));
1025     start = System.currentTimeMillis();
1026     for (int i=0; i<ITERS; i++) {
1027       test_pack8_swap(p8, a1);
1028     }
1029     end = System.currentTimeMillis();
1030     System.out.println("test_pack8_swap: " + (end - start));
1031     start = System.currentTimeMillis();
1032     for (int i=0; i<ITERS; i++) {
1033       test_unpack8_swap(a0, p8);
1034     }
1035     end = System.currentTimeMillis();
1036     System.out.println("test_unpack8_swap: " + (end - start));
1037 
1038     return errn;
1039   }
1040 
1041   static int test_sum(byte[] a1) {
1042     int sum = 0;
1043     for (int i = 0; i < a1.length; i+=1) {
1044       sum += a1[i];
1045     }
1046     return sum;
1047   }
1048 
1049   static void test_addc(byte[] a0, byte[] a1) {
1050     for (int i = 0; i < a0.length; i+=1) {
1051       a0[i] = (byte)(a1[i]+VALUE);
1052     }
1053   }
1054   static void test_addv(byte[] a0, byte[] a1, byte b) {
1055     for (int i = 0; i < a0.length; i+=1) {
1056       a0[i] = (byte)(a1[i]+b);
1057     }
1058   }
1059   static void test_adda(byte[] a0, byte[] a1, byte[] a2) {
1060     for (int i = 0; i < a0.length; i+=1) {
1061       a0[i] = (byte)(a1[i]+a2[i]);
1062     }
1063   }
1064 
1065   static void test_subc(byte[] a0, byte[] a1) {
1066     for (int i = 0; i < a0.length; i+=1) {
1067       a0[i] = (byte)(a1[i]-VALUE);
1068     }
1069   }
1070   static void test_subv(byte[] a0, byte[] a1, byte b) {
1071     for (int i = 0; i < a0.length; i+=1) {
1072       a0[i] = (byte)(a1[i]-b);
1073     }
1074   }
1075   static void test_suba(byte[] a0, byte[] a1, byte[] a2) {
1076     for (int i = 0; i < a0.length; i+=1) {
1077       a0[i] = (byte)(a1[i]-a2[i]);
1078     }
1079   }
1080 
1081   static void test_mulc(byte[] a0, byte[] a1) {
1082     for (int i = 0; i < a0.length; i+=1) {
1083       a0[i] = (byte)(a1[i]*VALUE);
1084     }
1085   }
1086   static void test_mulc_n(byte[] a0, byte[] a1) {
1087     for (int i = 0; i < a0.length; i+=1) {
1088       a0[i] = (byte)(a1[i]*(-VALUE));
1089     }
1090   }
1091   static void test_mulv(byte[] a0, byte[] a1, byte b) {
1092     for (int i = 0; i < a0.length; i+=1) {
1093       a0[i] = (byte)(a1[i]*b);
1094     }
1095   }
1096   static void test_mula(byte[] a0, byte[] a1, byte[] a2) {
1097     for (int i = 0; i < a0.length; i+=1) {
1098       a0[i] = (byte)(a1[i]*a2[i]);
1099     }
1100   }
1101 
1102   static void test_divc(byte[] a0, byte[] a1) {
1103     for (int i = 0; i < a0.length; i+=1) {
1104       a0[i] = (byte)(a1[i]/VALUE);
1105     }
1106   }
1107   static void test_divc_n(byte[] a0, byte[] a1) {
1108     for (int i = 0; i < a0.length; i+=1) {
1109       a0[i] = (byte)(a1[i]/(-VALUE));
1110     }
1111   }
1112   static void test_divv(byte[] a0, byte[] a1, byte b) {
1113     for (int i = 0; i < a0.length; i+=1) {
1114       a0[i] = (byte)(a1[i]/b);
1115     }
1116   }
1117   static void test_diva(byte[] a0, byte[] a1, byte[] a2) {
1118     for (int i = 0; i < a0.length; i+=1) {
1119       a0[i] = (byte)(a1[i]/a2[i]);
1120     }
1121   }
1122 
1123   static void test_andc(byte[] a0, byte[] a1) {
1124     for (int i = 0; i < a0.length; i+=1) {
1125       a0[i] = (byte)(a1[i]&BIT_MASK);
1126     }
1127   }
1128   static void test_andv(byte[] a0, byte[] a1, byte b) {
1129     for (int i = 0; i < a0.length; i+=1) {
1130       a0[i] = (byte)(a1[i]&b);
1131     }
1132   }
1133   static void test_anda(byte[] a0, byte[] a1, byte[] a2) {
1134     for (int i = 0; i < a0.length; i+=1) {
1135       a0[i] = (byte)(a1[i]&a2[i]);
1136     }
1137   }
1138 
1139   static void test_orc(byte[] a0, byte[] a1) {
1140     for (int i = 0; i < a0.length; i+=1) {
1141       a0[i] = (byte)(a1[i]|BIT_MASK);
1142     }
1143   }
1144   static void test_orv(byte[] a0, byte[] a1, byte b) {
1145     for (int i = 0; i < a0.length; i+=1) {
1146       a0[i] = (byte)(a1[i]|b);
1147     }
1148   }
1149   static void test_ora(byte[] a0, byte[] a1, byte[] a2) {
1150     for (int i = 0; i < a0.length; i+=1) {
1151       a0[i] = (byte)(a1[i]|a2[i]);
1152     }
1153   }
1154 
1155   static void test_xorc(byte[] a0, byte[] a1) {
1156     for (int i = 0; i < a0.length; i+=1) {
1157       a0[i] = (byte)(a1[i]^BIT_MASK);
1158     }
1159   }
1160   static void test_xorv(byte[] a0, byte[] a1, byte b) {
1161     for (int i = 0; i < a0.length; i+=1) {
1162       a0[i] = (byte)(a1[i]^b);
1163     }
1164   }
1165   static void test_xora(byte[] a0, byte[] a1, byte[] a2) {
1166     for (int i = 0; i < a0.length; i+=1) {
1167       a0[i] = (byte)(a1[i]^a2[i]);
1168     }
1169   }
1170 
1171   static void test_sllc(byte[] a0, byte[] a1) {
1172     for (int i = 0; i < a0.length; i+=1) {
1173       a0[i] = (byte)(a1[i]<<VALUE);
1174     }
1175   }
1176   static void test_sllc_n(byte[] a0, byte[] a1) {
1177     for (int i = 0; i < a0.length; i+=1) {
1178       a0[i] = (byte)(a1[i]<<(-VALUE));
1179     }
1180   }
1181   static void test_sllc_o(byte[] a0, byte[] a1) {
1182     for (int i = 0; i < a0.length; i+=1) {
1183       a0[i] = (byte)(a1[i]<<SHIFT);
1184     }
1185   }
1186   static void test_sllc_on(byte[] a0, byte[] a1) {
1187     for (int i = 0; i < a0.length; i+=1) {
1188       a0[i] = (byte)(a1[i]<<(-SHIFT));
1189     }
1190   }
1191   static void test_sllv(byte[] a0, byte[] a1, int b) {
1192     for (int i = 0; i < a0.length; i+=1) {
1193       a0[i] = (byte)(a1[i]<<b);
1194     }
1195   }
1196   static void test_sllc_add(byte[] a0, byte[] a1) {
1197     for (int i = 0; i < a0.length; i+=1) {
1198       a0[i] = (byte)((a1[i] + ADD_INIT)<<VALUE);
1199     }
1200   }
1201   static void test_sllv_add(byte[] a0, byte[] a1, int b) {
1202     for (int i = 0; i < a0.length; i+=1) {
1203       a0[i] = (byte)((a1[i] + b)<<VALUE);
1204     }
1205   }
1206   static void test_sllc_and(byte[] a0, byte[] a1) {
1207     for (int i = 0; i < a0.length; i+=1) {
1208       a0[i] = (byte)((a1[i] & BIT_MASK)<<VALUE);
1209     }
1210   }
1211   static void test_sllv_and(byte[] a0, byte[] a1, int b) {
1212     for (int i = 0; i < a0.length; i+=1) {
1213       a0[i] = (byte)((a1[i] & b)<<VALUE);
1214     }
1215   }
1216 
1217   static void test_srlc(byte[] a0, byte[] a1) {
1218     for (int i = 0; i < a0.length; i+=1) {
1219       a0[i] = (byte)(a1[i]>>>VALUE);
1220     }
1221   }
1222   static void test_srlc_n(byte[] a0, byte[] a1) {
1223     for (int i = 0; i < a0.length; i+=1) {
1224       a0[i] = (byte)(a1[i]>>>(-VALUE));
1225     }
1226   }
1227   static void test_srlc_o(byte[] a0, byte[] a1) {
1228     for (int i = 0; i < a0.length; i+=1) {
1229       a0[i] = (byte)(a1[i]>>>SHIFT);
1230     }
1231   }
1232   static void test_srlc_on(byte[] a0, byte[] a1) {
1233     for (int i = 0; i < a0.length; i+=1) {
1234       a0[i] = (byte)(a1[i]>>>(-SHIFT));
1235     }
1236   }
1237   static void test_srlv(byte[] a0, byte[] a1, int b) {
1238     for (int i = 0; i < a0.length; i+=1) {
1239       a0[i] = (byte)(a1[i]>>>b);
1240     }
1241   }
1242   static void test_srlc_add(byte[] a0, byte[] a1) {
1243     for (int i = 0; i < a0.length; i+=1) {
1244       a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE);
1245     }
1246   }
1247   static void test_srlv_add(byte[] a0, byte[] a1, int b) {
1248     for (int i = 0; i < a0.length; i+=1) {
1249       a0[i] = (byte)((a1[i] + b)>>>VALUE);
1250     }
1251   }
1252   static void test_srlc_and(byte[] a0, byte[] a1) {
1253     for (int i = 0; i < a0.length; i+=1) {
1254       a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE);
1255     }
1256   }
1257   static void test_srlv_and(byte[] a0, byte[] a1, int b) {
1258     for (int i = 0; i < a0.length; i+=1) {
1259       a0[i] = (byte)((a1[i] & b)>>>VALUE);
1260     }
1261   }
1262 
1263   static void test_srac(byte[] a0, byte[] a1) {
1264     for (int i = 0; i < a0.length; i+=1) {
1265       a0[i] = (byte)(a1[i]>>VALUE);
1266     }
1267   }
1268   static void test_srac_n(byte[] a0, byte[] a1) {
1269     for (int i = 0; i < a0.length; i+=1) {
1270       a0[i] = (byte)(a1[i]>>(-VALUE));
1271     }
1272   }
1273   static void test_srac_o(byte[] a0, byte[] a1) {
1274     for (int i = 0; i < a0.length; i+=1) {
1275       a0[i] = (byte)(a1[i]>>SHIFT);
1276     }
1277   }
1278   static void test_srac_on(byte[] a0, byte[] a1) {
1279     for (int i = 0; i < a0.length; i+=1) {
1280       a0[i] = (byte)(a1[i]>>(-SHIFT));
1281     }
1282   }
1283   static void test_srav(byte[] a0, byte[] a1, int b) {
1284     for (int i = 0; i < a0.length; i+=1) {
1285       a0[i] = (byte)(a1[i]>>b);
1286     }
1287   }
1288   static void test_srac_add(byte[] a0, byte[] a1) {
1289     for (int i = 0; i < a0.length; i+=1) {
1290       a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE);
1291     }
1292   }
1293   static void test_srav_add(byte[] a0, byte[] a1, int b) {
1294     for (int i = 0; i < a0.length; i+=1) {
1295       a0[i] = (byte)((a1[i] + b)>>VALUE);
1296     }
1297   }
1298   static void test_srac_and(byte[] a0, byte[] a1) {
1299     for (int i = 0; i < a0.length; i+=1) {
1300       a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE);
1301     }
1302   }
1303   static void test_srav_and(byte[] a0, byte[] a1, int b) {
1304     for (int i = 0; i < a0.length; i+=1) {
1305       a0[i] = (byte)((a1[i] & b)>>VALUE);
1306     }
1307   }
1308 
1309   static void test_pack2(short[] p2, byte[] a1) {
1310     if (p2.length*2 > a1.length) return;
1311     for (int i = 0; i < p2.length; i+=1) {
1312       short l0 = (short)a1[i*2+0];
1313       short l1 = (short)a1[i*2+1];
1314       p2[i] = (short)((l1 << 8) | (l0 & 0xFF));
1315     }
1316   }
1317   static void test_unpack2(byte[] a0, short[] p2) {
1318     if (p2.length*2 > a0.length) return;
1319     for (int i = 0; i < p2.length; i+=1) {
1320       short l = p2[i];
1321       a0[i*2+0] = (byte)(l & 0xFF);
1322       a0[i*2+1] = (byte)(l >> 8);
1323     }
1324   }
1325   static void test_pack2_swap(short[] p2, byte[] a1) {
1326     if (p2.length*2 > a1.length) return;
1327     for (int i = 0; i < p2.length; i+=1) {
1328       short l0 = (short)a1[i*2+0];
1329       short l1 = (short)a1[i*2+1];
1330       p2[i] = (short)((l0 << 8) | (l1 & 0xFF));
1331     }
1332   }
1333   static void test_unpack2_swap(byte[] a0, short[] p2) {
1334     if (p2.length*2 > a0.length) return;
1335     for (int i = 0; i < p2.length; i+=1) {
1336       short l = p2[i];
1337       a0[i*2+0] = (byte)(l >> 8);
1338       a0[i*2+1] = (byte)(l & 0xFF);
1339     }
1340   }
1341 
1342   static void test_pack4(int[] p4, byte[] a1) {
1343     if (p4.length*4 > a1.length) return;
1344     for (int i = 0; i < p4.length; i+=1) {
1345       int l0 = (int)a1[i*4+0];
1346       int l1 = (int)a1[i*4+1];
1347       int l2 = (int)a1[i*4+2];
1348       int l3 = (int)a1[i*4+3];
1349       p4[i] = (l0 & 0xFF) |
1350              ((l1 & 0xFF) <<  8) |
1351              ((l2 & 0xFF) << 16) |
1352              ((l3 & 0xFF) << 24);
1353     }
1354   }
1355   static void test_unpack4(byte[] a0, int[] p4) {
1356     if (p4.length*4 > a0.length) return;
1357     for (int i = 0; i < p4.length; i+=1) {
1358       int l = p4[i];
1359       a0[i*4+0] = (byte)(l & 0xFF);
1360       a0[i*4+1] = (byte)(l >>  8);
1361       a0[i*4+2] = (byte)(l >> 16);
1362       a0[i*4+3] = (byte)(l >> 24);
1363     }
1364   }
1365   static void test_pack4_swap(int[] p4, byte[] a1) {
1366     if (p4.length*4 > a1.length) return;
1367     for (int i = 0; i < p4.length; i+=1) {
1368       int l0 = (int)a1[i*4+0];
1369       int l1 = (int)a1[i*4+1];
1370       int l2 = (int)a1[i*4+2];
1371       int l3 = (int)a1[i*4+3];
1372       p4[i] = (l3 & 0xFF) |
1373              ((l2 & 0xFF) <<  8) |
1374              ((l1 & 0xFF) << 16) |
1375              ((l0 & 0xFF) << 24);
1376     }
1377   }
1378   static void test_unpack4_swap(byte[] a0, int[] p4) {
1379     if (p4.length*4 > a0.length) return;
1380     for (int i = 0; i < p4.length; i+=1) {
1381       int l = p4[i];
1382       a0[i*4+0] = (byte)(l >> 24);
1383       a0[i*4+1] = (byte)(l >> 16);
1384       a0[i*4+2] = (byte)(l >>  8);
1385       a0[i*4+3] = (byte)(l & 0xFF);
1386     }
1387   }
1388 
1389   static void test_pack8(long[] p8, byte[] a1) {
1390     if (p8.length*8 > a1.length) return;
1391     for (int i = 0; i < p8.length; i+=1) {
1392       long l0 = (long)a1[i*8+0];
1393       long l1 = (long)a1[i*8+1];
1394       long l2 = (long)a1[i*8+2];
1395       long l3 = (long)a1[i*8+3];
1396       long l4 = (long)a1[i*8+4];
1397       long l5 = (long)a1[i*8+5];
1398       long l6 = (long)a1[i*8+6];
1399       long l7 = (long)a1[i*8+7];
1400       p8[i] = (l0 & 0xFFl) |
1401              ((l1 & 0xFFl) <<  8) |
1402              ((l2 & 0xFFl) << 16) |
1403              ((l3 & 0xFFl) << 24) |
1404              ((l4 & 0xFFl) << 32) |
1405              ((l5 & 0xFFl) << 40) |
1406              ((l6 & 0xFFl) << 48) |
1407              ((l7 & 0xFFl) << 56);
1408     }
1409   }
1410   static void test_unpack8(byte[] a0, long[] p8) {
1411     if (p8.length*8 > a0.length) return;
1412     for (int i = 0; i < p8.length; i+=1) {
1413       long l = p8[i];
1414       a0[i*8+0] = (byte)(l & 0xFFl);
1415       a0[i*8+1] = (byte)(l >>  8);
1416       a0[i*8+2] = (byte)(l >> 16);
1417       a0[i*8+3] = (byte)(l >> 24);
1418       a0[i*8+4] = (byte)(l >> 32);
1419       a0[i*8+5] = (byte)(l >> 40);
1420       a0[i*8+6] = (byte)(l >> 48);
1421       a0[i*8+7] = (byte)(l >> 56);
1422     }
1423   }
1424   static void test_pack8_swap(long[] p8, byte[] a1) {
1425     if (p8.length*8 > a1.length) return;
1426     for (int i = 0; i < p8.length; i+=1) {
1427       long l0 = (long)a1[i*8+0];
1428       long l1 = (long)a1[i*8+1];
1429       long l2 = (long)a1[i*8+2];
1430       long l3 = (long)a1[i*8+3];
1431       long l4 = (long)a1[i*8+4];
1432       long l5 = (long)a1[i*8+5];
1433       long l6 = (long)a1[i*8+6];
1434       long l7 = (long)a1[i*8+7];
1435       p8[i] = (l7 & 0xFFl) |
1436              ((l6 & 0xFFl) <<  8) |
1437              ((l5 & 0xFFl) << 16) |
1438              ((l4 & 0xFFl) << 24) |
1439              ((l3 & 0xFFl) << 32) |
1440              ((l2 & 0xFFl) << 40) |
1441              ((l1 & 0xFFl) << 48) |
1442              ((l0 & 0xFFl) << 56);
1443     }
1444   }
1445   static void test_unpack8_swap(byte[] a0, long[] p8) {
1446     if (p8.length*8 > a0.length) return;
1447     for (int i = 0; i < p8.length; i+=1) {
1448       long l = p8[i];
1449       a0[i*8+0] = (byte)(l >> 56);
1450       a0[i*8+1] = (byte)(l >> 48);
1451       a0[i*8+2] = (byte)(l >> 40);
1452       a0[i*8+3] = (byte)(l >> 32);
1453       a0[i*8+4] = (byte)(l >> 24);
1454       a0[i*8+5] = (byte)(l >> 16);
1455       a0[i*8+6] = (byte)(l >>  8);
1456       a0[i*8+7] = (byte)(l & 0xFFl);
1457     }
1458   }
1459 
1460   static int verify(String text, int i, byte elem, byte val) {
1461     if (elem != val) {
1462       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1463       return 1;
1464     }
1465     return 0;
1466   }
1467 
1468   static int verify(String text, int i, short elem, short val) {
1469     if (elem != val) {
1470       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1471       return 1;
1472     }
1473     return 0;
1474   }
1475 
1476   static int verify(String text, int i, int elem, int val) {
1477     if (elem != val) {
1478       System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val));
1479       return 1;
1480     }
1481     return 0;
1482   }
1483 
1484   static int verify(String text, int i, long elem, long val) {
1485     if (elem != val) {
1486       System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
1487       return 1;
1488     }
1489     return 0;
1490   }
1491 }