1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @bug 6340864
  27  * @summary Implement vectorization optimizations in hotspot-server
  28  *
  29  * @run main/othervm/timeout=400 -Xbatch -Xmx128m compiler.c2.cr6340864.TestByteVect
  30  */
  31 
  32 package compiler.c2.cr6340864;
  33 
  34 public class TestByteVect {
  35   private static final int ARRLEN = 997;
  36   private static final int ITERS  = 11000;
  37   private static final int ADD_INIT = 63;
  38   private static final int BIT_MASK = 0xB7;
  39   private static final int VALUE = 3;
  40   private static final int SHIFT = 8;
  41 
  42   public static void main(String args[]) {
  43     System.out.println("Testing Byte vectors");
  44     int errn = test();
  45     if (errn > 0) {
  46       System.err.println("FAILED: " + errn + " errors");
  47       System.exit(97);
  48     }
  49     System.out.println("PASSED");
  50   }
  51 
  52   static int test() {
  53     byte[] a0 = new byte[ARRLEN];
  54     byte[] a1 = new byte[ARRLEN];
  55     byte[] a2 = new byte[ARRLEN];
  56     byte[] a3 = new byte[ARRLEN];
  57     byte[] a4 = new byte[ARRLEN];
  58     short[] p2 = new short[ARRLEN/2];
  59       int[] p4 = new   int[ARRLEN/4];
  60      long[] p8 = new  long[ARRLEN/8];
  61     // Initialize
  62     int gold_sum = 0;
  63     for (int i=0; i<ARRLEN; i++) {
  64       byte val = (byte)(ADD_INIT+i);
  65       gold_sum += val;
  66       a1[i] = val;
  67       a2[i] = (byte)VALUE;
  68       a3[i] = (byte)-VALUE;
  69       a4[i] = (byte)BIT_MASK;
  70     }
  71     System.out.println("Warmup");
  72     for (int i=0; i<ITERS; i++) {
  73       test_sum(a1);
  74       test_addc(a0, a1);
  75       test_addv(a0, a1, (byte)VALUE);
  76       test_adda(a0, a1, a2);
  77       test_subc(a0, a1);
  78       test_subv(a0, a1, (byte)VALUE);
  79       test_suba(a0, a1, a2);
  80 
  81       test_mulc(a0, a1);
  82       test_mulv(a0, a1, (byte)VALUE);
  83       test_mula(a0, a1, a2);
  84       test_divc(a0, a1);
  85       test_divv(a0, a1, (byte)VALUE);
  86       test_diva(a0, a1, a2);
  87       test_mulc_n(a0, a1);
  88       test_mulv(a0, a1, (byte)-VALUE);
  89       test_mula(a0, a1, a3);
  90       test_divc_n(a0, a1);
  91       test_divv(a0, a1, (byte)-VALUE);
  92       test_diva(a0, a1, a3);
  93 
  94       test_andc(a0, a1);
  95       test_andv(a0, a1, (byte)BIT_MASK);
  96       test_anda(a0, a1, a4);
  97       test_orc(a0, a1);
  98       test_orv(a0, a1, (byte)BIT_MASK);
  99       test_ora(a0, a1, a4);
 100       test_xorc(a0, a1);
 101       test_xorv(a0, a1, (byte)BIT_MASK);
 102       test_xora(a0, a1, a4);
 103 
 104       test_sllc(a0, a1);
 105       test_sllv(a0, a1, VALUE);
 106       test_srlc(a0, a1);
 107       test_srlv(a0, a1, VALUE);
 108       test_srac(a0, a1);
 109       test_srav(a0, a1, VALUE);
 110 
 111       test_sllc_n(a0, a1);
 112       test_sllv(a0, a1, -VALUE);
 113       test_srlc_n(a0, a1);
 114       test_srlv(a0, a1, -VALUE);
 115       test_srac_n(a0, a1);
 116       test_srav(a0, a1, -VALUE);
 117 
 118       test_sllc_o(a0, a1);
 119       test_sllv(a0, a1, SHIFT);
 120       test_srlc_o(a0, a1);
 121       test_srlv(a0, a1, SHIFT);
 122       test_srac_o(a0, a1);
 123       test_srav(a0, a1, SHIFT);
 124 
 125       test_sllc_on(a0, a1);
 126       test_sllv(a0, a1, -SHIFT);
 127       test_srlc_on(a0, a1);
 128       test_srlv(a0, a1, -SHIFT);
 129       test_srac_on(a0, a1);
 130       test_srav(a0, a1, -SHIFT);
 131 
 132       test_sllc_add(a0, a1);
 133       test_sllv_add(a0, a1, ADD_INIT);
 134       test_srlc_add(a0, a1);
 135       test_srlv_add(a0, a1, ADD_INIT);
 136       test_srac_add(a0, a1);
 137       test_srav_add(a0, a1, ADD_INIT);
 138 
 139       test_sllc_and(a0, a1);
 140       test_sllv_and(a0, a1, BIT_MASK);
 141       test_srlc_and(a0, a1);
 142       test_srlv_and(a0, a1, BIT_MASK);
 143       test_srac_and(a0, a1);
 144       test_srav_and(a0, a1, BIT_MASK);
 145 
 146       test_pack2(p2, a1);
 147       test_unpack2(a0, p2);
 148       test_pack2_swap(p2, a1);
 149       test_unpack2_swap(a0, p2);
 150       test_pack4(p4, a1);
 151       test_unpack4(a0, p4);
 152       test_pack4_swap(p4, a1);
 153       test_unpack4_swap(a0, p4);
 154       test_pack8(p8, a1);
 155       test_unpack8(a0, p8);
 156       test_pack8_swap(p8, a1);
 157       test_unpack8_swap(a0, p8);
 158     }
 159     // Test and verify results
 160     System.out.println("Verification");
 161     int errn = 0;
 162     {
 163       int sum = test_sum(a1);
 164       if (sum != gold_sum) {
 165         System.err.println("test_sum:  " + sum + " != " + gold_sum);
 166         errn++;
 167       }
 168 
 169       test_addc(a0, a1);
 170       for (int i=0; i<ARRLEN; i++) {
 171         errn += verify("test_addc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 172       }
 173       test_addv(a0, a1, (byte)VALUE);
 174       for (int i=0; i<ARRLEN; i++) {
 175         errn += verify("test_addv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 176       }
 177       test_adda(a0, a1, a2);
 178       for (int i=0; i<ARRLEN; i++) {
 179         errn += verify("test_adda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 180       }
 181 
 182       test_subc(a0, a1);
 183       for (int i=0; i<ARRLEN; i++) {
 184         errn += verify("test_subc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 185       }
 186       test_subv(a0, a1, (byte)VALUE);
 187       for (int i=0; i<ARRLEN; i++) {
 188         errn += verify("test_subv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 189       }
 190       test_suba(a0, a1, a2);
 191       for (int i=0; i<ARRLEN; i++) {
 192         errn += verify("test_suba: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 193       }
 194 
 195       test_mulc(a0, a1);
 196       for (int i=0; i<ARRLEN; i++) {
 197         errn += verify("test_mulc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 198       }
 199       test_mulv(a0, a1, (byte)VALUE);
 200       for (int i=0; i<ARRLEN; i++) {
 201         errn += verify("test_mulv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 202       }
 203       test_mula(a0, a1, a2);
 204       for (int i=0; i<ARRLEN; i++) {
 205         errn += verify("test_mula: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 206       }
 207 
 208       test_divc(a0, a1);
 209       for (int i=0; i<ARRLEN; i++) {
 210         errn += verify("test_divc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 211       }
 212       test_divv(a0, a1, (byte)VALUE);
 213       for (int i=0; i<ARRLEN; i++) {
 214         errn += verify("test_divv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 215       }
 216       test_diva(a0, a1, a2);
 217       for (int i=0; i<ARRLEN; i++) {
 218         errn += verify("test_diva: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 219       }
 220 
 221       test_mulc_n(a0, a1);
 222       for (int i=0; i<ARRLEN; i++) {
 223         errn += verify("test_mulc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 224       }
 225       test_mulv(a0, a1, (byte)-VALUE);
 226       for (int i=0; i<ARRLEN; i++) {
 227         errn += verify("test_mulv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 228       }
 229       test_mula(a0, a1, a3);
 230       for (int i=0; i<ARRLEN; i++) {
 231         errn += verify("test_mula_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 232       }
 233 
 234       test_divc_n(a0, a1);
 235       for (int i=0; i<ARRLEN; i++) {
 236         errn += verify("test_divc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 237       }
 238       test_divv(a0, a1, (byte)-VALUE);
 239       for (int i=0; i<ARRLEN; i++) {
 240         errn += verify("test_divv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 241       }
 242       test_diva(a0, a1, a3);
 243       for (int i=0; i<ARRLEN; i++) {
 244         errn += verify("test_diva_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 245       }
 246 
 247       test_andc(a0, a1);
 248       for (int i=0; i<ARRLEN; i++) {
 249         errn += verify("test_andc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 250       }
 251       test_andv(a0, a1, (byte)BIT_MASK);
 252       for (int i=0; i<ARRLEN; i++) {
 253         errn += verify("test_andv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 254       }
 255       test_anda(a0, a1, a4);
 256       for (int i=0; i<ARRLEN; i++) {
 257         errn += verify("test_anda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 258       }
 259 
 260       test_orc(a0, a1);
 261       for (int i=0; i<ARRLEN; i++) {
 262         errn += verify("test_orc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 263       }
 264       test_orv(a0, a1, (byte)BIT_MASK);
 265       for (int i=0; i<ARRLEN; i++) {
 266         errn += verify("test_orv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 267       }
 268       test_ora(a0, a1, a4);
 269       for (int i=0; i<ARRLEN; i++) {
 270         errn += verify("test_ora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 271       }
 272 
 273       test_xorc(a0, a1);
 274       for (int i=0; i<ARRLEN; i++) {
 275         errn += verify("test_xorc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 276       }
 277       test_xorv(a0, a1, (byte)BIT_MASK);
 278       for (int i=0; i<ARRLEN; i++) {
 279         errn += verify("test_xorv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 280       }
 281       test_xora(a0, a1, a4);
 282       for (int i=0; i<ARRLEN; i++) {
 283         errn += verify("test_xora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 284       }
 285 
 286       test_sllc(a0, a1);
 287       for (int i=0; i<ARRLEN; i++) {
 288         errn += verify("test_sllc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
 289       }
 290       test_sllv(a0, a1, VALUE);
 291       for (int i=0; i<ARRLEN; i++) {
 292         errn += verify("test_sllv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
 293       }
 294 
 295       test_srlc(a0, a1);
 296       for (int i=0; i<ARRLEN; i++) {
 297         errn += verify("test_srlc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
 298       }
 299       test_srlv(a0, a1, VALUE);
 300       for (int i=0; i<ARRLEN; i++) {
 301         errn += verify("test_srlv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
 302       }
 303 
 304       test_srac(a0, a1);
 305       for (int i=0; i<ARRLEN; i++) {
 306         errn += verify("test_srac: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
 307       }
 308       test_srav(a0, a1, VALUE);
 309       for (int i=0; i<ARRLEN; i++) {
 310         errn += verify("test_srav: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
 311       }
 312 
 313       test_sllc_n(a0, a1);
 314       for (int i=0; i<ARRLEN; i++) {
 315         errn += verify("test_sllc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
 316       }
 317       test_sllv(a0, a1, -VALUE);
 318       for (int i=0; i<ARRLEN; i++) {
 319         errn += verify("test_sllv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
 320       }
 321 
 322       test_srlc_n(a0, a1);
 323       for (int i=0; i<ARRLEN; i++) {
 324         errn += verify("test_srlc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
 325       }
 326       test_srlv(a0, a1, -VALUE);
 327       for (int i=0; i<ARRLEN; i++) {
 328         errn += verify("test_srlv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
 329       }
 330 
 331       test_srac_n(a0, a1);
 332       for (int i=0; i<ARRLEN; i++) {
 333         errn += verify("test_srac_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
 334       }
 335       test_srav(a0, a1, -VALUE);
 336       for (int i=0; i<ARRLEN; i++) {
 337         errn += verify("test_srav_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
 338       }
 339 
 340       test_sllc_o(a0, a1);
 341       for (int i=0; i<ARRLEN; i++) {
 342         errn += verify("test_sllc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
 343       }
 344       test_sllv(a0, a1, SHIFT);
 345       for (int i=0; i<ARRLEN; i++) {
 346         errn += verify("test_sllv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
 347       }
 348 
 349       test_srlc_o(a0, a1);
 350       for (int i=0; i<ARRLEN; i++) {
 351         errn += verify("test_srlc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
 352       }
 353       test_srlv(a0, a1, SHIFT);
 354       for (int i=0; i<ARRLEN; i++) {
 355         errn += verify("test_srlv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
 356       }
 357 
 358       test_srac_o(a0, a1);
 359       for (int i=0; i<ARRLEN; i++) {
 360         errn += verify("test_srac_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
 361       }
 362       test_srav(a0, a1, SHIFT);
 363       for (int i=0; i<ARRLEN; i++) {
 364         errn += verify("test_srav_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
 365       }
 366 
 367       test_sllc_on(a0, a1);
 368       for (int i=0; i<ARRLEN; i++) {
 369         errn += verify("test_sllc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
 370       }
 371       test_sllv(a0, a1, -SHIFT);
 372       for (int i=0; i<ARRLEN; i++) {
 373         errn += verify("test_sllv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
 374       }
 375 
 376       test_srlc_on(a0, a1);
 377       for (int i=0; i<ARRLEN; i++) {
 378         errn += verify("test_srlc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
 379       }
 380       test_srlv(a0, a1, -SHIFT);
 381       for (int i=0; i<ARRLEN; i++) {
 382         errn += verify("test_srlv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
 383       }
 384 
 385       test_srac_on(a0, a1);
 386       for (int i=0; i<ARRLEN; i++) {
 387         errn += verify("test_srac_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
 388       }
 389       test_srav(a0, a1, -SHIFT);
 390       for (int i=0; i<ARRLEN; i++) {
 391         errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
 392       }
 393 
 394       test_sllc_add(a0, a1);
 395       for (int i=0; i<ARRLEN; i++) {
 396         errn += verify("test_sllc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
 397       }
 398       test_sllv_add(a0, a1, ADD_INIT);
 399       for (int i=0; i<ARRLEN; i++) {
 400         errn += verify("test_sllv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
 401       }
 402 
 403       test_srlc_add(a0, a1);
 404       for (int i=0; i<ARRLEN; i++) {
 405         errn += verify("test_srlc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
 406       }
 407       test_srlv_add(a0, a1, ADD_INIT);
 408       for (int i=0; i<ARRLEN; i++) {
 409         errn += verify("test_srlv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
 410       }
 411 
 412       test_srac_add(a0, a1);
 413       for (int i=0; i<ARRLEN; i++) {
 414         errn += verify("test_srac_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
 415       }
 416       test_srav_add(a0, a1, ADD_INIT);
 417       for (int i=0; i<ARRLEN; i++) {
 418         errn += verify("test_srav_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
 419       }
 420 
 421       test_sllc_and(a0, a1);
 422       for (int i=0; i<ARRLEN; i++) {
 423         errn += verify("test_sllc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
 424       }
 425       test_sllv_and(a0, a1, BIT_MASK);
 426       for (int i=0; i<ARRLEN; i++) {
 427         errn += verify("test_sllv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
 428       }
 429 
 430       test_srlc_and(a0, a1);
 431       for (int i=0; i<ARRLEN; i++) {
 432         errn += verify("test_srlc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
 433       }
 434       test_srlv_and(a0, a1, BIT_MASK);
 435       for (int i=0; i<ARRLEN; i++) {
 436         errn += verify("test_srlv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
 437       }
 438 
 439       test_srac_and(a0, a1);
 440       for (int i=0; i<ARRLEN; i++) {
 441         errn += verify("test_srac_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
 442       }
 443       test_srav_and(a0, a1, BIT_MASK);
 444       for (int i=0; i<ARRLEN; i++) {
 445         errn += verify("test_srav_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
 446       }
 447 
 448       test_pack2(p2, a1);
 449       for (int i=0; i<ARRLEN/2; i++) {
 450         errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
 451       }
 452       for (int i=0; i<ARRLEN; i++) {
 453         a0[i] = -1;
 454       }
 455       test_unpack2(a0, p2);
 456       for (int i=0; i<(ARRLEN&(-2)); i++) {
 457         errn += verify("test_unpack2: ", i, a0[i], (byte)(ADD_INIT+i));
 458       }
 459 
 460       test_pack2_swap(p2, a1);
 461       for (int i=0; i<ARRLEN/2; i++) {
 462         errn += verify("test_pack2_swap: ", i, p2[i], (short)(((short)(ADD_INIT+2*i+1) & 0xFF) | ((short)(ADD_INIT+2*i) << 8)));
 463       }
 464       for (int i=0; i<ARRLEN; i++) {
 465         a0[i] = -1;
 466       }
 467       test_unpack2_swap(a0, p2);
 468       for (int i=0; i<(ARRLEN&(-2)); i++) {
 469         errn += verify("test_unpack2_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 470       }
 471 
 472       test_pack4(p4, a1);
 473       for (int i=0; i<ARRLEN/4; i++) {
 474         errn += verify("test_pack4: ", i, p4[i],  ((int)(ADD_INIT+4*i+0) & 0xFF) |
 475                                                  (((int)(ADD_INIT+4*i+1) & 0xFF) <<  8)  |
 476                                                  (((int)(ADD_INIT+4*i+2) & 0xFF) << 16)  |
 477                                                  (((int)(ADD_INIT+4*i+3) & 0xFF) << 24));
 478       }
 479       for (int i=0; i<ARRLEN; i++) {
 480         a0[i] = -1;
 481       }
 482       test_unpack4(a0, p4);
 483       for (int i=0; i<(ARRLEN&(-4)); i++) {
 484         errn += verify("test_unpack4: ", i, a0[i], (byte)(ADD_INIT+i));
 485       }
 486 
 487       test_pack4_swap(p4, a1);
 488       for (int i=0; i<ARRLEN/4; i++) {
 489         errn += verify("test_pack4_swap: ", i, p4[i],  ((int)(ADD_INIT+4*i+3) & 0xFF) |
 490                                                       (((int)(ADD_INIT+4*i+2) & 0xFF) <<  8)  |
 491                                                       (((int)(ADD_INIT+4*i+1) & 0xFF) << 16)  |
 492                                                       (((int)(ADD_INIT+4*i+0) & 0xFF) << 24));
 493       }
 494       for (int i=0; i<ARRLEN; i++) {
 495         a0[i] = -1;
 496       }
 497       test_unpack4_swap(a0, p4);
 498       for (int i=0; i<(ARRLEN&(-4)); i++) {
 499         errn += verify("test_unpack4_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 500       }
 501 
 502       test_pack8(p8, a1);
 503       for (int i=0; i<ARRLEN/8; i++) {
 504         errn += verify("test_pack8: ", i, p8[i],  ((long)(ADD_INIT+8*i+0) & 0xFFl) |
 505                                                  (((long)(ADD_INIT+8*i+1) & 0xFFl) <<  8)  |
 506                                                  (((long)(ADD_INIT+8*i+2) & 0xFFl) << 16)  |
 507                                                  (((long)(ADD_INIT+8*i+3) & 0xFFl) << 24)  |
 508                                                  (((long)(ADD_INIT+8*i+4) & 0xFFl) << 32)  |
 509                                                  (((long)(ADD_INIT+8*i+5) & 0xFFl) << 40)  |
 510                                                  (((long)(ADD_INIT+8*i+6) & 0xFFl) << 48)  |
 511                                                  (((long)(ADD_INIT+8*i+7) & 0xFFl) << 56));
 512       }
 513       for (int i=0; i<ARRLEN; i++) {
 514         a0[i] = -1;
 515       }
 516       test_unpack8(a0, p8);
 517       for (int i=0; i<(ARRLEN&(-8)); i++) {
 518         errn += verify("test_unpack8: ", i, a0[i], (byte)(ADD_INIT+i));
 519       }
 520 
 521       test_pack8_swap(p8, a1);
 522       for (int i=0; i<ARRLEN/8; i++) {
 523         errn += verify("test_pack8_swap: ", i, p8[i],  ((long)(ADD_INIT+8*i+7) & 0xFFl) |
 524                                                       (((long)(ADD_INIT+8*i+6) & 0xFFl) <<  8)  |
 525                                                       (((long)(ADD_INIT+8*i+5) & 0xFFl) << 16)  |
 526                                                       (((long)(ADD_INIT+8*i+4) & 0xFFl) << 24)  |
 527                                                       (((long)(ADD_INIT+8*i+3) & 0xFFl) << 32)  |
 528                                                       (((long)(ADD_INIT+8*i+2) & 0xFFl) << 40)  |
 529                                                       (((long)(ADD_INIT+8*i+1) & 0xFFl) << 48)  |
 530                                                       (((long)(ADD_INIT+8*i+0) & 0xFFl) << 56));
 531       }
 532       for (int i=0; i<ARRLEN; i++) {
 533         a0[i] = -1;
 534       }
 535       test_unpack8_swap(a0, p8);
 536       for (int i=0; i<(ARRLEN&(-8)); i++) {
 537         errn += verify("test_unpack8_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 538       }
 539 
 540     }
 541 
 542     if (errn > 0)
 543       return errn;
 544 
 545     System.out.println("Time");
 546     long start, end;
 547 
 548     start = System.currentTimeMillis();
 549     for (int i=0; i<ITERS; i++) {
 550       test_sum(a1);
 551     }
 552     end = System.currentTimeMillis();
 553     System.out.println("test_sum: " + (end - start));
 554 
 555     start = System.currentTimeMillis();
 556     for (int i=0; i<ITERS; i++) {
 557       test_addc(a0, a1);
 558     }
 559     end = System.currentTimeMillis();
 560     System.out.println("test_addc: " + (end - start));
 561     start = System.currentTimeMillis();
 562     for (int i=0; i<ITERS; i++) {
 563       test_addv(a0, a1, (byte)VALUE);
 564     }
 565     end = System.currentTimeMillis();
 566     System.out.println("test_addv: " + (end - start));
 567     start = System.currentTimeMillis();
 568     for (int i=0; i<ITERS; i++) {
 569       test_adda(a0, a1, a2);
 570     }
 571     end = System.currentTimeMillis();
 572     System.out.println("test_adda: " + (end - start));
 573 
 574     start = System.currentTimeMillis();
 575     for (int i=0; i<ITERS; i++) {
 576       test_subc(a0, a1);
 577     }
 578     end = System.currentTimeMillis();
 579     System.out.println("test_subc: " + (end - start));
 580     start = System.currentTimeMillis();
 581     for (int i=0; i<ITERS; i++) {
 582       test_subv(a0, a1, (byte)VALUE);
 583     }
 584     end = System.currentTimeMillis();
 585     System.out.println("test_subv: " + (end - start));
 586     start = System.currentTimeMillis();
 587     for (int i=0; i<ITERS; i++) {
 588       test_suba(a0, a1, a2);
 589     }
 590     end = System.currentTimeMillis();
 591     System.out.println("test_suba: " + (end - start));
 592 
 593     start = System.currentTimeMillis();
 594     for (int i=0; i<ITERS; i++) {
 595       test_mulc(a0, a1);
 596     }
 597     end = System.currentTimeMillis();
 598     System.out.println("test_mulc: " + (end - start));
 599     start = System.currentTimeMillis();
 600     for (int i=0; i<ITERS; i++) {
 601       test_mulv(a0, a1, (byte)VALUE);
 602     }
 603     end = System.currentTimeMillis();
 604     System.out.println("test_mulv: " + (end - start));
 605     start = System.currentTimeMillis();
 606     for (int i=0; i<ITERS; i++) {
 607       test_mula(a0, a1, a2);
 608     }
 609     end = System.currentTimeMillis();
 610     System.out.println("test_mula: " + (end - start));
 611 
 612     start = System.currentTimeMillis();
 613     for (int i=0; i<ITERS; i++) {
 614       test_divc(a0, a1);
 615     }
 616     end = System.currentTimeMillis();
 617     System.out.println("test_divc: " + (end - start));
 618     start = System.currentTimeMillis();
 619     for (int i=0; i<ITERS; i++) {
 620       test_divv(a0, a1, (byte)VALUE);
 621     }
 622     end = System.currentTimeMillis();
 623     System.out.println("test_divv: " + (end - start));
 624     start = System.currentTimeMillis();
 625     for (int i=0; i<ITERS; i++) {
 626       test_diva(a0, a1, a2);
 627     }
 628     end = System.currentTimeMillis();
 629     System.out.println("test_diva: " + (end - start));
 630 
 631     start = System.currentTimeMillis();
 632     for (int i=0; i<ITERS; i++) {
 633       test_mulc_n(a0, a1);
 634     }
 635     end = System.currentTimeMillis();
 636     System.out.println("test_mulc_n: " + (end - start));
 637     start = System.currentTimeMillis();
 638     for (int i=0; i<ITERS; i++) {
 639       test_mulv(a0, a1, (byte)-VALUE);
 640     }
 641     end = System.currentTimeMillis();
 642     System.out.println("test_mulv_n: " + (end - start));
 643     start = System.currentTimeMillis();
 644     for (int i=0; i<ITERS; i++) {
 645       test_mula(a0, a1, a3);
 646     }
 647     end = System.currentTimeMillis();
 648     System.out.println("test_mula_n: " + (end - start));
 649 
 650     start = System.currentTimeMillis();
 651     for (int i=0; i<ITERS; i++) {
 652       test_divc_n(a0, a1);
 653     }
 654     end = System.currentTimeMillis();
 655     System.out.println("test_divc_n: " + (end - start));
 656     start = System.currentTimeMillis();
 657     for (int i=0; i<ITERS; i++) {
 658       test_divv(a0, a1, (byte)-VALUE);
 659     }
 660     end = System.currentTimeMillis();
 661     System.out.println("test_divv_n: " + (end - start));
 662     start = System.currentTimeMillis();
 663     for (int i=0; i<ITERS; i++) {
 664       test_diva(a0, a1, a3);
 665     }
 666     end = System.currentTimeMillis();
 667     System.out.println("test_diva_n: " + (end - start));
 668 
 669     start = System.currentTimeMillis();
 670     for (int i=0; i<ITERS; i++) {
 671       test_andc(a0, a1);
 672     }
 673     end = System.currentTimeMillis();
 674     System.out.println("test_andc: " + (end - start));
 675     start = System.currentTimeMillis();
 676     for (int i=0; i<ITERS; i++) {
 677       test_andv(a0, a1, (byte)BIT_MASK);
 678     }
 679     end = System.currentTimeMillis();
 680     System.out.println("test_andv: " + (end - start));
 681     start = System.currentTimeMillis();
 682     for (int i=0; i<ITERS; i++) {
 683       test_anda(a0, a1, a4);
 684     }
 685     end = System.currentTimeMillis();
 686     System.out.println("test_anda: " + (end - start));
 687 
 688     start = System.currentTimeMillis();
 689     for (int i=0; i<ITERS; i++) {
 690       test_orc(a0, a1);
 691     }
 692     end = System.currentTimeMillis();
 693     System.out.println("test_orc: " + (end - start));
 694     start = System.currentTimeMillis();
 695     for (int i=0; i<ITERS; i++) {
 696       test_orv(a0, a1, (byte)BIT_MASK);
 697     }
 698     end = System.currentTimeMillis();
 699     System.out.println("test_orv: " + (end - start));
 700     start = System.currentTimeMillis();
 701     for (int i=0; i<ITERS; i++) {
 702       test_ora(a0, a1, a4);
 703     }
 704     end = System.currentTimeMillis();
 705     System.out.println("test_ora: " + (end - start));
 706 
 707     start = System.currentTimeMillis();
 708     for (int i=0; i<ITERS; i++) {
 709       test_xorc(a0, a1);
 710     }
 711     end = System.currentTimeMillis();
 712     System.out.println("test_xorc: " + (end - start));
 713     start = System.currentTimeMillis();
 714     for (int i=0; i<ITERS; i++) {
 715       test_xorv(a0, a1, (byte)BIT_MASK);
 716     }
 717     end = System.currentTimeMillis();
 718     System.out.println("test_xorv: " + (end - start));
 719     start = System.currentTimeMillis();
 720     for (int i=0; i<ITERS; i++) {
 721       test_xora(a0, a1, a4);
 722     }
 723     end = System.currentTimeMillis();
 724     System.out.println("test_xora: " + (end - start));
 725 
 726     start = System.currentTimeMillis();
 727     for (int i=0; i<ITERS; i++) {
 728       test_sllc(a0, a1);
 729     }
 730     end = System.currentTimeMillis();
 731     System.out.println("test_sllc: " + (end - start));
 732     start = System.currentTimeMillis();
 733     for (int i=0; i<ITERS; i++) {
 734       test_sllv(a0, a1, VALUE);
 735     }
 736     end = System.currentTimeMillis();
 737     System.out.println("test_sllv: " + (end - start));
 738 
 739     start = System.currentTimeMillis();
 740     for (int i=0; i<ITERS; i++) {
 741       test_srlc(a0, a1);
 742     }
 743     end = System.currentTimeMillis();
 744     System.out.println("test_srlc: " + (end - start));
 745     start = System.currentTimeMillis();
 746     for (int i=0; i<ITERS; i++) {
 747       test_srlv(a0, a1, VALUE);
 748     }
 749     end = System.currentTimeMillis();
 750     System.out.println("test_srlv: " + (end - start));
 751 
 752     start = System.currentTimeMillis();
 753     for (int i=0; i<ITERS; i++) {
 754       test_srac(a0, a1);
 755     }
 756     end = System.currentTimeMillis();
 757     System.out.println("test_srac: " + (end - start));
 758     start = System.currentTimeMillis();
 759     for (int i=0; i<ITERS; i++) {
 760       test_srav(a0, a1, VALUE);
 761     }
 762     end = System.currentTimeMillis();
 763     System.out.println("test_srav: " + (end - start));
 764 
 765     start = System.currentTimeMillis();
 766     for (int i=0; i<ITERS; i++) {
 767       test_sllc_n(a0, a1);
 768     }
 769     end = System.currentTimeMillis();
 770     System.out.println("test_sllc_n: " + (end - start));
 771     start = System.currentTimeMillis();
 772     for (int i=0; i<ITERS; i++) {
 773       test_sllv(a0, a1, -VALUE);
 774     }
 775     end = System.currentTimeMillis();
 776     System.out.println("test_sllv_n: " + (end - start));
 777 
 778     start = System.currentTimeMillis();
 779     for (int i=0; i<ITERS; i++) {
 780       test_srlc_n(a0, a1);
 781     }
 782     end = System.currentTimeMillis();
 783     System.out.println("test_srlc_n: " + (end - start));
 784     start = System.currentTimeMillis();
 785     for (int i=0; i<ITERS; i++) {
 786       test_srlv(a0, a1, -VALUE);
 787     }
 788     end = System.currentTimeMillis();
 789     System.out.println("test_srlv_n: " + (end - start));
 790 
 791     start = System.currentTimeMillis();
 792     for (int i=0; i<ITERS; i++) {
 793       test_srac_n(a0, a1);
 794     }
 795     end = System.currentTimeMillis();
 796     System.out.println("test_srac_n: " + (end - start));
 797     start = System.currentTimeMillis();
 798     for (int i=0; i<ITERS; i++) {
 799       test_srav(a0, a1, -VALUE);
 800     }
 801     end = System.currentTimeMillis();
 802     System.out.println("test_srav_n: " + (end - start));
 803 
 804     start = System.currentTimeMillis();
 805     for (int i=0; i<ITERS; i++) {
 806       test_sllc_o(a0, a1);
 807     }
 808     end = System.currentTimeMillis();
 809     System.out.println("test_sllc_o: " + (end - start));
 810     start = System.currentTimeMillis();
 811     for (int i=0; i<ITERS; i++) {
 812       test_sllv(a0, a1, SHIFT);
 813     }
 814     end = System.currentTimeMillis();
 815     System.out.println("test_sllv_o: " + (end - start));
 816 
 817     start = System.currentTimeMillis();
 818     for (int i=0; i<ITERS; i++) {
 819       test_srlc_o(a0, a1);
 820     }
 821     end = System.currentTimeMillis();
 822     System.out.println("test_srlc_o: " + (end - start));
 823     start = System.currentTimeMillis();
 824     for (int i=0; i<ITERS; i++) {
 825       test_srlv(a0, a1, SHIFT);
 826     }
 827     end = System.currentTimeMillis();
 828     System.out.println("test_srlv_o: " + (end - start));
 829 
 830     start = System.currentTimeMillis();
 831     for (int i=0; i<ITERS; i++) {
 832       test_srac_o(a0, a1);
 833     }
 834     end = System.currentTimeMillis();
 835     System.out.println("test_srac_o: " + (end - start));
 836     start = System.currentTimeMillis();
 837     for (int i=0; i<ITERS; i++) {
 838       test_srav(a0, a1, SHIFT);
 839     }
 840     end = System.currentTimeMillis();
 841     System.out.println("test_srav_o: " + (end - start));
 842 
 843     start = System.currentTimeMillis();
 844     for (int i=0; i<ITERS; i++) {
 845       test_sllc_on(a0, a1);
 846     }
 847     end = System.currentTimeMillis();
 848     System.out.println("test_sllc_on: " + (end - start));
 849     start = System.currentTimeMillis();
 850     for (int i=0; i<ITERS; i++) {
 851       test_sllv(a0, a1, -SHIFT);
 852     }
 853     end = System.currentTimeMillis();
 854     System.out.println("test_sllv_on: " + (end - start));
 855 
 856     start = System.currentTimeMillis();
 857     for (int i=0; i<ITERS; i++) {
 858       test_srlc_on(a0, a1);
 859     }
 860     end = System.currentTimeMillis();
 861     System.out.println("test_srlc_on: " + (end - start));
 862     start = System.currentTimeMillis();
 863     for (int i=0; i<ITERS; i++) {
 864       test_srlv(a0, a1, -SHIFT);
 865     }
 866     end = System.currentTimeMillis();
 867     System.out.println("test_srlv_on: " + (end - start));
 868 
 869     start = System.currentTimeMillis();
 870     for (int i=0; i<ITERS; i++) {
 871       test_srac_on(a0, a1);
 872     }
 873     end = System.currentTimeMillis();
 874     System.out.println("test_srac_on: " + (end - start));
 875     start = System.currentTimeMillis();
 876     for (int i=0; i<ITERS; i++) {
 877       test_srav(a0, a1, -SHIFT);
 878     }
 879     end = System.currentTimeMillis();
 880     System.out.println("test_srav_on: " + (end - start));
 881 
 882     start = System.currentTimeMillis();
 883     for (int i=0; i<ITERS; i++) {
 884       test_sllc_add(a0, a1);
 885     }
 886     end = System.currentTimeMillis();
 887     System.out.println("test_sllc_add: " + (end - start));
 888     start = System.currentTimeMillis();
 889     for (int i=0; i<ITERS; i++) {
 890       test_sllv_add(a0, a1, ADD_INIT);
 891     }
 892     end = System.currentTimeMillis();
 893     System.out.println("test_sllv_add: " + (end - start));
 894 
 895     start = System.currentTimeMillis();
 896     for (int i=0; i<ITERS; i++) {
 897       test_srlc_add(a0, a1);
 898     }
 899     end = System.currentTimeMillis();
 900     System.out.println("test_srlc_add: " + (end - start));
 901     start = System.currentTimeMillis();
 902     for (int i=0; i<ITERS; i++) {
 903       test_srlv_add(a0, a1, ADD_INIT);
 904     }
 905     end = System.currentTimeMillis();
 906     System.out.println("test_srlv_add: " + (end - start));
 907 
 908     start = System.currentTimeMillis();
 909     for (int i=0; i<ITERS; i++) {
 910       test_srac_add(a0, a1);
 911     }
 912     end = System.currentTimeMillis();
 913     System.out.println("test_srac_add: " + (end - start));
 914     start = System.currentTimeMillis();
 915     for (int i=0; i<ITERS; i++) {
 916       test_srav_add(a0, a1, ADD_INIT);
 917     }
 918     end = System.currentTimeMillis();
 919     System.out.println("test_srav_add: " + (end - start));
 920 
 921     start = System.currentTimeMillis();
 922     for (int i=0; i<ITERS; i++) {
 923       test_sllc_and(a0, a1);
 924     }
 925     end = System.currentTimeMillis();
 926     System.out.println("test_sllc_and: " + (end - start));
 927     start = System.currentTimeMillis();
 928     for (int i=0; i<ITERS; i++) {
 929       test_sllv_and(a0, a1, BIT_MASK);
 930     }
 931     end = System.currentTimeMillis();
 932     System.out.println("test_sllv_and: " + (end - start));
 933 
 934     start = System.currentTimeMillis();
 935     for (int i=0; i<ITERS; i++) {
 936       test_srlc_and(a0, a1);
 937     }
 938     end = System.currentTimeMillis();
 939     System.out.println("test_srlc_and: " + (end - start));
 940     start = System.currentTimeMillis();
 941     for (int i=0; i<ITERS; i++) {
 942       test_srlv_and(a0, a1, BIT_MASK);
 943     }
 944     end = System.currentTimeMillis();
 945     System.out.println("test_srlv_and: " + (end - start));
 946 
 947     start = System.currentTimeMillis();
 948     for (int i=0; i<ITERS; i++) {
 949       test_srac_and(a0, a1);
 950     }
 951     end = System.currentTimeMillis();
 952     System.out.println("test_srac_and: " + (end - start));
 953     start = System.currentTimeMillis();
 954     for (int i=0; i<ITERS; i++) {
 955       test_srav_and(a0, a1, BIT_MASK);
 956     }
 957     end = System.currentTimeMillis();
 958     System.out.println("test_srav_and: " + (end - start));
 959 
 960     start = System.currentTimeMillis();
 961     for (int i=0; i<ITERS; i++) {
 962       test_pack2(p2, a1);
 963     }
 964     end = System.currentTimeMillis();
 965     System.out.println("test_pack2: " + (end - start));
 966     start = System.currentTimeMillis();
 967     for (int i=0; i<ITERS; i++) {
 968       test_unpack2(a0, p2);
 969     }
 970     end = System.currentTimeMillis();
 971     System.out.println("test_unpack2: " + (end - start));
 972     start = System.currentTimeMillis();
 973     for (int i=0; i<ITERS; i++) {
 974       test_pack2_swap(p2, a1);
 975     }
 976     end = System.currentTimeMillis();
 977     System.out.println("test_pack2_swap: " + (end - start));
 978     start = System.currentTimeMillis();
 979     for (int i=0; i<ITERS; i++) {
 980       test_unpack2_swap(a0, p2);
 981     }
 982     end = System.currentTimeMillis();
 983     System.out.println("test_unpack2_swap: " + (end - start));
 984 
 985     start = System.currentTimeMillis();
 986     for (int i=0; i<ITERS; i++) {
 987       test_pack4(p4, a1);
 988     }
 989     end = System.currentTimeMillis();
 990     System.out.println("test_pack4: " + (end - start));
 991     start = System.currentTimeMillis();
 992     for (int i=0; i<ITERS; i++) {
 993       test_unpack4(a0, p4);
 994     }
 995     end = System.currentTimeMillis();
 996     System.out.println("test_unpack4: " + (end - start));
 997     start = System.currentTimeMillis();
 998     for (int i=0; i<ITERS; i++) {
 999       test_pack4_swap(p4, a1);
1000     }
1001     end = System.currentTimeMillis();
1002     System.out.println("test_pack4_swap: " + (end - start));
1003     start = System.currentTimeMillis();
1004     for (int i=0; i<ITERS; i++) {
1005       test_unpack4_swap(a0, p4);
1006     }
1007     end = System.currentTimeMillis();
1008     System.out.println("test_unpack4_swap: " + (end - start));
1009 
1010     start = System.currentTimeMillis();
1011     for (int i=0; i<ITERS; i++) {
1012       test_pack8(p8, a1);
1013     }
1014     end = System.currentTimeMillis();
1015     System.out.println("test_pack8: " + (end - start));
1016     start = System.currentTimeMillis();
1017     for (int i=0; i<ITERS; i++) {
1018       test_unpack8(a0, p8);
1019     }
1020     end = System.currentTimeMillis();
1021     System.out.println("test_unpack8: " + (end - start));
1022     start = System.currentTimeMillis();
1023     for (int i=0; i<ITERS; i++) {
1024       test_pack8_swap(p8, a1);
1025     }
1026     end = System.currentTimeMillis();
1027     System.out.println("test_pack8_swap: " + (end - start));
1028     start = System.currentTimeMillis();
1029     for (int i=0; i<ITERS; i++) {
1030       test_unpack8_swap(a0, p8);
1031     }
1032     end = System.currentTimeMillis();
1033     System.out.println("test_unpack8_swap: " + (end - start));
1034 
1035     return errn;
1036   }
1037 
1038   static int test_sum(byte[] a1) {
1039     int sum = 0;
1040     for (int i = 0; i < a1.length; i+=1) {
1041       sum += a1[i];
1042     }
1043     return sum;
1044   }
1045 
1046   static void test_addc(byte[] a0, byte[] a1) {
1047     for (int i = 0; i < a0.length; i+=1) {
1048       a0[i] = (byte)(a1[i]+VALUE);
1049     }
1050   }
1051   static void test_addv(byte[] a0, byte[] a1, byte b) {
1052     for (int i = 0; i < a0.length; i+=1) {
1053       a0[i] = (byte)(a1[i]+b);
1054     }
1055   }
1056   static void test_adda(byte[] a0, byte[] a1, byte[] a2) {
1057     for (int i = 0; i < a0.length; i+=1) {
1058       a0[i] = (byte)(a1[i]+a2[i]);
1059     }
1060   }
1061 
1062   static void test_subc(byte[] a0, byte[] a1) {
1063     for (int i = 0; i < a0.length; i+=1) {
1064       a0[i] = (byte)(a1[i]-VALUE);
1065     }
1066   }
1067   static void test_subv(byte[] a0, byte[] a1, byte b) {
1068     for (int i = 0; i < a0.length; i+=1) {
1069       a0[i] = (byte)(a1[i]-b);
1070     }
1071   }
1072   static void test_suba(byte[] a0, byte[] a1, byte[] a2) {
1073     for (int i = 0; i < a0.length; i+=1) {
1074       a0[i] = (byte)(a1[i]-a2[i]);
1075     }
1076   }
1077 
1078   static void test_mulc(byte[] a0, byte[] a1) {
1079     for (int i = 0; i < a0.length; i+=1) {
1080       a0[i] = (byte)(a1[i]*VALUE);
1081     }
1082   }
1083   static void test_mulc_n(byte[] a0, byte[] a1) {
1084     for (int i = 0; i < a0.length; i+=1) {
1085       a0[i] = (byte)(a1[i]*(-VALUE));
1086     }
1087   }
1088   static void test_mulv(byte[] a0, byte[] a1, byte b) {
1089     for (int i = 0; i < a0.length; i+=1) {
1090       a0[i] = (byte)(a1[i]*b);
1091     }
1092   }
1093   static void test_mula(byte[] a0, byte[] a1, byte[] a2) {
1094     for (int i = 0; i < a0.length; i+=1) {
1095       a0[i] = (byte)(a1[i]*a2[i]);
1096     }
1097   }
1098 
1099   static void test_divc(byte[] a0, byte[] a1) {
1100     for (int i = 0; i < a0.length; i+=1) {
1101       a0[i] = (byte)(a1[i]/VALUE);
1102     }
1103   }
1104   static void test_divc_n(byte[] a0, byte[] a1) {
1105     for (int i = 0; i < a0.length; i+=1) {
1106       a0[i] = (byte)(a1[i]/(-VALUE));
1107     }
1108   }
1109   static void test_divv(byte[] a0, byte[] a1, byte b) {
1110     for (int i = 0; i < a0.length; i+=1) {
1111       a0[i] = (byte)(a1[i]/b);
1112     }
1113   }
1114   static void test_diva(byte[] a0, byte[] a1, byte[] a2) {
1115     for (int i = 0; i < a0.length; i+=1) {
1116       a0[i] = (byte)(a1[i]/a2[i]);
1117     }
1118   }
1119 
1120   static void test_andc(byte[] a0, byte[] a1) {
1121     for (int i = 0; i < a0.length; i+=1) {
1122       a0[i] = (byte)(a1[i]&BIT_MASK);
1123     }
1124   }
1125   static void test_andv(byte[] a0, byte[] a1, byte b) {
1126     for (int i = 0; i < a0.length; i+=1) {
1127       a0[i] = (byte)(a1[i]&b);
1128     }
1129   }
1130   static void test_anda(byte[] a0, byte[] a1, byte[] a2) {
1131     for (int i = 0; i < a0.length; i+=1) {
1132       a0[i] = (byte)(a1[i]&a2[i]);
1133     }
1134   }
1135 
1136   static void test_orc(byte[] a0, byte[] a1) {
1137     for (int i = 0; i < a0.length; i+=1) {
1138       a0[i] = (byte)(a1[i]|BIT_MASK);
1139     }
1140   }
1141   static void test_orv(byte[] a0, byte[] a1, byte b) {
1142     for (int i = 0; i < a0.length; i+=1) {
1143       a0[i] = (byte)(a1[i]|b);
1144     }
1145   }
1146   static void test_ora(byte[] a0, byte[] a1, byte[] a2) {
1147     for (int i = 0; i < a0.length; i+=1) {
1148       a0[i] = (byte)(a1[i]|a2[i]);
1149     }
1150   }
1151 
1152   static void test_xorc(byte[] a0, byte[] a1) {
1153     for (int i = 0; i < a0.length; i+=1) {
1154       a0[i] = (byte)(a1[i]^BIT_MASK);
1155     }
1156   }
1157   static void test_xorv(byte[] a0, byte[] a1, byte b) {
1158     for (int i = 0; i < a0.length; i+=1) {
1159       a0[i] = (byte)(a1[i]^b);
1160     }
1161   }
1162   static void test_xora(byte[] a0, byte[] a1, byte[] a2) {
1163     for (int i = 0; i < a0.length; i+=1) {
1164       a0[i] = (byte)(a1[i]^a2[i]);
1165     }
1166   }
1167 
1168   static void test_sllc(byte[] a0, byte[] a1) {
1169     for (int i = 0; i < a0.length; i+=1) {
1170       a0[i] = (byte)(a1[i]<<VALUE);
1171     }
1172   }
1173   static void test_sllc_n(byte[] a0, byte[] a1) {
1174     for (int i = 0; i < a0.length; i+=1) {
1175       a0[i] = (byte)(a1[i]<<(-VALUE));
1176     }
1177   }
1178   static void test_sllc_o(byte[] a0, byte[] a1) {
1179     for (int i = 0; i < a0.length; i+=1) {
1180       a0[i] = (byte)(a1[i]<<SHIFT);
1181     }
1182   }
1183   static void test_sllc_on(byte[] a0, byte[] a1) {
1184     for (int i = 0; i < a0.length; i+=1) {
1185       a0[i] = (byte)(a1[i]<<(-SHIFT));
1186     }
1187   }
1188   static void test_sllv(byte[] a0, byte[] a1, int b) {
1189     for (int i = 0; i < a0.length; i+=1) {
1190       a0[i] = (byte)(a1[i]<<b);
1191     }
1192   }
1193   static void test_sllc_add(byte[] a0, byte[] a1) {
1194     for (int i = 0; i < a0.length; i+=1) {
1195       a0[i] = (byte)((a1[i] + ADD_INIT)<<VALUE);
1196     }
1197   }
1198   static void test_sllv_add(byte[] a0, byte[] a1, int b) {
1199     for (int i = 0; i < a0.length; i+=1) {
1200       a0[i] = (byte)((a1[i] + b)<<VALUE);
1201     }
1202   }
1203   static void test_sllc_and(byte[] a0, byte[] a1) {
1204     for (int i = 0; i < a0.length; i+=1) {
1205       a0[i] = (byte)((a1[i] & BIT_MASK)<<VALUE);
1206     }
1207   }
1208   static void test_sllv_and(byte[] a0, byte[] a1, int b) {
1209     for (int i = 0; i < a0.length; i+=1) {
1210       a0[i] = (byte)((a1[i] & b)<<VALUE);
1211     }
1212   }
1213 
1214   static void test_srlc(byte[] a0, byte[] a1) {
1215     for (int i = 0; i < a0.length; i+=1) {
1216       a0[i] = (byte)(a1[i]>>>VALUE);
1217     }
1218   }
1219   static void test_srlc_n(byte[] a0, byte[] a1) {
1220     for (int i = 0; i < a0.length; i+=1) {
1221       a0[i] = (byte)(a1[i]>>>(-VALUE));
1222     }
1223   }
1224   static void test_srlc_o(byte[] a0, byte[] a1) {
1225     for (int i = 0; i < a0.length; i+=1) {
1226       a0[i] = (byte)(a1[i]>>>SHIFT);
1227     }
1228   }
1229   static void test_srlc_on(byte[] a0, byte[] a1) {
1230     for (int i = 0; i < a0.length; i+=1) {
1231       a0[i] = (byte)(a1[i]>>>(-SHIFT));
1232     }
1233   }
1234   static void test_srlv(byte[] a0, byte[] a1, int b) {
1235     for (int i = 0; i < a0.length; i+=1) {
1236       a0[i] = (byte)(a1[i]>>>b);
1237     }
1238   }
1239   static void test_srlc_add(byte[] a0, byte[] a1) {
1240     for (int i = 0; i < a0.length; i+=1) {
1241       a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE);
1242     }
1243   }
1244   static void test_srlv_add(byte[] a0, byte[] a1, int b) {
1245     for (int i = 0; i < a0.length; i+=1) {
1246       a0[i] = (byte)((a1[i] + b)>>>VALUE);
1247     }
1248   }
1249   static void test_srlc_and(byte[] a0, byte[] a1) {
1250     for (int i = 0; i < a0.length; i+=1) {
1251       a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE);
1252     }
1253   }
1254   static void test_srlv_and(byte[] a0, byte[] a1, int b) {
1255     for (int i = 0; i < a0.length; i+=1) {
1256       a0[i] = (byte)((a1[i] & b)>>>VALUE);
1257     }
1258   }
1259 
1260   static void test_srac(byte[] a0, byte[] a1) {
1261     for (int i = 0; i < a0.length; i+=1) {
1262       a0[i] = (byte)(a1[i]>>VALUE);
1263     }
1264   }
1265   static void test_srac_n(byte[] a0, byte[] a1) {
1266     for (int i = 0; i < a0.length; i+=1) {
1267       a0[i] = (byte)(a1[i]>>(-VALUE));
1268     }
1269   }
1270   static void test_srac_o(byte[] a0, byte[] a1) {
1271     for (int i = 0; i < a0.length; i+=1) {
1272       a0[i] = (byte)(a1[i]>>SHIFT);
1273     }
1274   }
1275   static void test_srac_on(byte[] a0, byte[] a1) {
1276     for (int i = 0; i < a0.length; i+=1) {
1277       a0[i] = (byte)(a1[i]>>(-SHIFT));
1278     }
1279   }
1280   static void test_srav(byte[] a0, byte[] a1, int b) {
1281     for (int i = 0; i < a0.length; i+=1) {
1282       a0[i] = (byte)(a1[i]>>b);
1283     }
1284   }
1285   static void test_srac_add(byte[] a0, byte[] a1) {
1286     for (int i = 0; i < a0.length; i+=1) {
1287       a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE);
1288     }
1289   }
1290   static void test_srav_add(byte[] a0, byte[] a1, int b) {
1291     for (int i = 0; i < a0.length; i+=1) {
1292       a0[i] = (byte)((a1[i] + b)>>VALUE);
1293     }
1294   }
1295   static void test_srac_and(byte[] a0, byte[] a1) {
1296     for (int i = 0; i < a0.length; i+=1) {
1297       a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE);
1298     }
1299   }
1300   static void test_srav_and(byte[] a0, byte[] a1, int b) {
1301     for (int i = 0; i < a0.length; i+=1) {
1302       a0[i] = (byte)((a1[i] & b)>>VALUE);
1303     }
1304   }
1305 
1306   static void test_pack2(short[] p2, byte[] a1) {
1307     if (p2.length*2 > a1.length) return;
1308     for (int i = 0; i < p2.length; i+=1) {
1309       short l0 = (short)a1[i*2+0];
1310       short l1 = (short)a1[i*2+1];
1311       p2[i] = (short)((l1 << 8) | (l0 & 0xFF));
1312     }
1313   }
1314   static void test_unpack2(byte[] a0, short[] p2) {
1315     if (p2.length*2 > a0.length) return;
1316     for (int i = 0; i < p2.length; i+=1) {
1317       short l = p2[i];
1318       a0[i*2+0] = (byte)(l & 0xFF);
1319       a0[i*2+1] = (byte)(l >> 8);
1320     }
1321   }
1322   static void test_pack2_swap(short[] p2, byte[] a1) {
1323     if (p2.length*2 > a1.length) return;
1324     for (int i = 0; i < p2.length; i+=1) {
1325       short l0 = (short)a1[i*2+0];
1326       short l1 = (short)a1[i*2+1];
1327       p2[i] = (short)((l0 << 8) | (l1 & 0xFF));
1328     }
1329   }
1330   static void test_unpack2_swap(byte[] a0, short[] p2) {
1331     if (p2.length*2 > a0.length) return;
1332     for (int i = 0; i < p2.length; i+=1) {
1333       short l = p2[i];
1334       a0[i*2+0] = (byte)(l >> 8);
1335       a0[i*2+1] = (byte)(l & 0xFF);
1336     }
1337   }
1338 
1339   static void test_pack4(int[] p4, byte[] a1) {
1340     if (p4.length*4 > a1.length) return;
1341     for (int i = 0; i < p4.length; i+=1) {
1342       int l0 = (int)a1[i*4+0];
1343       int l1 = (int)a1[i*4+1];
1344       int l2 = (int)a1[i*4+2];
1345       int l3 = (int)a1[i*4+3];
1346       p4[i] = (l0 & 0xFF) |
1347              ((l1 & 0xFF) <<  8) |
1348              ((l2 & 0xFF) << 16) |
1349              ((l3 & 0xFF) << 24);
1350     }
1351   }
1352   static void test_unpack4(byte[] a0, int[] p4) {
1353     if (p4.length*4 > a0.length) return;
1354     for (int i = 0; i < p4.length; i+=1) {
1355       int l = p4[i];
1356       a0[i*4+0] = (byte)(l & 0xFF);
1357       a0[i*4+1] = (byte)(l >>  8);
1358       a0[i*4+2] = (byte)(l >> 16);
1359       a0[i*4+3] = (byte)(l >> 24);
1360     }
1361   }
1362   static void test_pack4_swap(int[] p4, byte[] a1) {
1363     if (p4.length*4 > a1.length) return;
1364     for (int i = 0; i < p4.length; i+=1) {
1365       int l0 = (int)a1[i*4+0];
1366       int l1 = (int)a1[i*4+1];
1367       int l2 = (int)a1[i*4+2];
1368       int l3 = (int)a1[i*4+3];
1369       p4[i] = (l3 & 0xFF) |
1370              ((l2 & 0xFF) <<  8) |
1371              ((l1 & 0xFF) << 16) |
1372              ((l0 & 0xFF) << 24);
1373     }
1374   }
1375   static void test_unpack4_swap(byte[] a0, int[] p4) {
1376     if (p4.length*4 > a0.length) return;
1377     for (int i = 0; i < p4.length; i+=1) {
1378       int l = p4[i];
1379       a0[i*4+0] = (byte)(l >> 24);
1380       a0[i*4+1] = (byte)(l >> 16);
1381       a0[i*4+2] = (byte)(l >>  8);
1382       a0[i*4+3] = (byte)(l & 0xFF);
1383     }
1384   }
1385 
1386   static void test_pack8(long[] p8, byte[] a1) {
1387     if (p8.length*8 > a1.length) return;
1388     for (int i = 0; i < p8.length; i+=1) {
1389       long l0 = (long)a1[i*8+0];
1390       long l1 = (long)a1[i*8+1];
1391       long l2 = (long)a1[i*8+2];
1392       long l3 = (long)a1[i*8+3];
1393       long l4 = (long)a1[i*8+4];
1394       long l5 = (long)a1[i*8+5];
1395       long l6 = (long)a1[i*8+6];
1396       long l7 = (long)a1[i*8+7];
1397       p8[i] = (l0 & 0xFFl) |
1398              ((l1 & 0xFFl) <<  8) |
1399              ((l2 & 0xFFl) << 16) |
1400              ((l3 & 0xFFl) << 24) |
1401              ((l4 & 0xFFl) << 32) |
1402              ((l5 & 0xFFl) << 40) |
1403              ((l6 & 0xFFl) << 48) |
1404              ((l7 & 0xFFl) << 56);
1405     }
1406   }
1407   static void test_unpack8(byte[] a0, long[] p8) {
1408     if (p8.length*8 > a0.length) return;
1409     for (int i = 0; i < p8.length; i+=1) {
1410       long l = p8[i];
1411       a0[i*8+0] = (byte)(l & 0xFFl);
1412       a0[i*8+1] = (byte)(l >>  8);
1413       a0[i*8+2] = (byte)(l >> 16);
1414       a0[i*8+3] = (byte)(l >> 24);
1415       a0[i*8+4] = (byte)(l >> 32);
1416       a0[i*8+5] = (byte)(l >> 40);
1417       a0[i*8+6] = (byte)(l >> 48);
1418       a0[i*8+7] = (byte)(l >> 56);
1419     }
1420   }
1421   static void test_pack8_swap(long[] p8, byte[] a1) {
1422     if (p8.length*8 > a1.length) return;
1423     for (int i = 0; i < p8.length; i+=1) {
1424       long l0 = (long)a1[i*8+0];
1425       long l1 = (long)a1[i*8+1];
1426       long l2 = (long)a1[i*8+2];
1427       long l3 = (long)a1[i*8+3];
1428       long l4 = (long)a1[i*8+4];
1429       long l5 = (long)a1[i*8+5];
1430       long l6 = (long)a1[i*8+6];
1431       long l7 = (long)a1[i*8+7];
1432       p8[i] = (l7 & 0xFFl) |
1433              ((l6 & 0xFFl) <<  8) |
1434              ((l5 & 0xFFl) << 16) |
1435              ((l4 & 0xFFl) << 24) |
1436              ((l3 & 0xFFl) << 32) |
1437              ((l2 & 0xFFl) << 40) |
1438              ((l1 & 0xFFl) << 48) |
1439              ((l0 & 0xFFl) << 56);
1440     }
1441   }
1442   static void test_unpack8_swap(byte[] a0, long[] p8) {
1443     if (p8.length*8 > a0.length) return;
1444     for (int i = 0; i < p8.length; i+=1) {
1445       long l = p8[i];
1446       a0[i*8+0] = (byte)(l >> 56);
1447       a0[i*8+1] = (byte)(l >> 48);
1448       a0[i*8+2] = (byte)(l >> 40);
1449       a0[i*8+3] = (byte)(l >> 32);
1450       a0[i*8+4] = (byte)(l >> 24);
1451       a0[i*8+5] = (byte)(l >> 16);
1452       a0[i*8+6] = (byte)(l >>  8);
1453       a0[i*8+7] = (byte)(l & 0xFFl);
1454     }
1455   }
1456 
1457   static int verify(String text, int i, byte elem, byte val) {
1458     if (elem != val) {
1459       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1460       return 1;
1461     }
1462     return 0;
1463   }
1464 
1465   static int verify(String text, int i, short elem, short val) {
1466     if (elem != val) {
1467       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1468       return 1;
1469     }
1470     return 0;
1471   }
1472 
1473   static int verify(String text, int i, int elem, int val) {
1474     if (elem != val) {
1475       System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val));
1476       return 1;
1477     }
1478     return 0;
1479   }
1480 
1481   static int verify(String text, int i, long elem, long val) {
1482     if (elem != val) {
1483       System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
1484       return 1;
1485     }
1486     return 0;
1487   }
1488 }