1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 /**
  26  * @test
  27  * @bug 6340864
  28  * @summary Implement vectorization optimizations in hotspot-server
  29  *
  30  * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestByteVect
  31  */
  32 
  33 public class TestByteVect {
  34   private static final int ARRLEN = 997;
  35   private static final int ITERS  = 11000;
  36   private static final int ADD_INIT = 63;
  37   private static final int BIT_MASK = 0xB7;
  38   private static final int VALUE = 3;
  39   private static final int SHIFT = 8;
  40 
  41   public static void main(String args[]) {
  42     System.out.println("Testing Byte vectors");
  43     int errn = test();
  44     if (errn > 0) {
  45       System.err.println("FAILED: " + errn + " errors");
  46       System.exit(97);
  47     }
  48     System.out.println("PASSED");
  49   }
  50 
  51   static int test() {
  52     byte[] a0 = new byte[ARRLEN];
  53     byte[] a1 = new byte[ARRLEN];
  54     byte[] a2 = new byte[ARRLEN];
  55     byte[] a3 = new byte[ARRLEN];
  56     byte[] a4 = new byte[ARRLEN];
  57     short[] p2 = new short[ARRLEN/2];
  58       int[] p4 = new   int[ARRLEN/4];
  59      long[] p8 = new  long[ARRLEN/8];
  60     // Initialize
  61     int gold_sum = 0;
  62     for (int i=0; i<ARRLEN; i++) {
  63       byte val = (byte)(ADD_INIT+i);
  64       gold_sum += val;
  65       a1[i] = val;
  66       a2[i] = (byte)VALUE;
  67       a3[i] = (byte)-VALUE;
  68       a4[i] = (byte)BIT_MASK;
  69     }
  70     System.out.println("Warmup");
  71     for (int i=0; i<ITERS; i++) {
  72       test_sum(a1);
  73       test_addc(a0, a1);
  74       test_addv(a0, a1, (byte)VALUE);
  75       test_adda(a0, a1, a2);
  76       test_subc(a0, a1);
  77       test_subv(a0, a1, (byte)VALUE);
  78       test_suba(a0, a1, a2);
  79 
  80       test_mulc(a0, a1);
  81       test_mulv(a0, a1, (byte)VALUE);
  82       test_mula(a0, a1, a2);
  83       test_divc(a0, a1);
  84       test_divv(a0, a1, (byte)VALUE);
  85       test_diva(a0, a1, a2);
  86       test_mulc_n(a0, a1);
  87       test_mulv(a0, a1, (byte)-VALUE);
  88       test_mula(a0, a1, a3);
  89       test_divc_n(a0, a1);
  90       test_divv(a0, a1, (byte)-VALUE);
  91       test_diva(a0, a1, a3);
  92 
  93       test_andc(a0, a1);
  94       test_andv(a0, a1, (byte)BIT_MASK);
  95       test_anda(a0, a1, a4);
  96       test_orc(a0, a1);
  97       test_orv(a0, a1, (byte)BIT_MASK);
  98       test_ora(a0, a1, a4);
  99       test_xorc(a0, a1);
 100       test_xorv(a0, a1, (byte)BIT_MASK);
 101       test_xora(a0, a1, a4);
 102 
 103       test_sllc(a0, a1);
 104       test_sllv(a0, a1, VALUE);
 105       test_srlc(a0, a1);
 106       test_srlv(a0, a1, VALUE);
 107       test_srac(a0, a1);
 108       test_srav(a0, a1, VALUE);
 109 
 110       test_sllc_n(a0, a1);
 111       test_sllv(a0, a1, -VALUE);
 112       test_srlc_n(a0, a1);
 113       test_srlv(a0, a1, -VALUE);
 114       test_srac_n(a0, a1);
 115       test_srav(a0, a1, -VALUE);
 116 
 117       test_sllc_o(a0, a1);
 118       test_sllv(a0, a1, SHIFT);
 119       test_srlc_o(a0, a1);
 120       test_srlv(a0, a1, SHIFT);
 121       test_srac_o(a0, a1);
 122       test_srav(a0, a1, SHIFT);
 123 
 124       test_sllc_on(a0, a1);
 125       test_sllv(a0, a1, -SHIFT);
 126       test_srlc_on(a0, a1);
 127       test_srlv(a0, a1, -SHIFT);
 128       test_srac_on(a0, a1);
 129       test_srav(a0, a1, -SHIFT);
 130 
 131       test_sllc_add(a0, a1);
 132       test_sllv_add(a0, a1, ADD_INIT);
 133       test_srlc_add(a0, a1);
 134       test_srlv_add(a0, a1, ADD_INIT);
 135       test_srac_add(a0, a1);
 136       test_srav_add(a0, a1, ADD_INIT);
 137 
 138       test_sllc_and(a0, a1);
 139       test_sllv_and(a0, a1, BIT_MASK);
 140       test_srlc_and(a0, a1);
 141       test_srlv_and(a0, a1, BIT_MASK);
 142       test_srac_and(a0, a1);
 143       test_srav_and(a0, a1, BIT_MASK);
 144 
 145       test_pack2(p2, a1);
 146       test_unpack2(a0, p2);
 147       test_pack2_swap(p2, a1);
 148       test_unpack2_swap(a0, p2);
 149       test_pack4(p4, a1);
 150       test_unpack4(a0, p4);
 151       test_pack4_swap(p4, a1);
 152       test_unpack4_swap(a0, p4);
 153       test_pack8(p8, a1);
 154       test_unpack8(a0, p8);
 155       test_pack8_swap(p8, a1);
 156       test_unpack8_swap(a0, p8);
 157     }
 158     // Test and verify results
 159     System.out.println("Verification");
 160     int errn = 0;
 161     {
 162       int sum = test_sum(a1);
 163       if (sum != gold_sum) {
 164         System.err.println("test_sum:  " + sum + " != " + gold_sum);
 165         errn++;
 166       }
 167 
 168       test_addc(a0, a1);
 169       for (int i=0; i<ARRLEN; i++) {
 170         errn += verify("test_addc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 171       }
 172       test_addv(a0, a1, (byte)VALUE);
 173       for (int i=0; i<ARRLEN; i++) {
 174         errn += verify("test_addv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 175       }
 176       test_adda(a0, a1, a2);
 177       for (int i=0; i<ARRLEN; i++) {
 178         errn += verify("test_adda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 179       }
 180 
 181       test_subc(a0, a1);
 182       for (int i=0; i<ARRLEN; i++) {
 183         errn += verify("test_subc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 184       }
 185       test_subv(a0, a1, (byte)VALUE);
 186       for (int i=0; i<ARRLEN; i++) {
 187         errn += verify("test_subv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 188       }
 189       test_suba(a0, a1, a2);
 190       for (int i=0; i<ARRLEN; i++) {
 191         errn += verify("test_suba: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 192       }
 193 
 194       test_mulc(a0, a1);
 195       for (int i=0; i<ARRLEN; i++) {
 196         errn += verify("test_mulc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 197       }
 198       test_mulv(a0, a1, (byte)VALUE);
 199       for (int i=0; i<ARRLEN; i++) {
 200         errn += verify("test_mulv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 201       }
 202       test_mula(a0, a1, a2);
 203       for (int i=0; i<ARRLEN; i++) {
 204         errn += verify("test_mula: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 205       }
 206 
 207       test_divc(a0, a1);
 208       for (int i=0; i<ARRLEN; i++) {
 209         errn += verify("test_divc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 210       }
 211       test_divv(a0, a1, (byte)VALUE);
 212       for (int i=0; i<ARRLEN; i++) {
 213         errn += verify("test_divv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 214       }
 215       test_diva(a0, a1, a2);
 216       for (int i=0; i<ARRLEN; i++) {
 217         errn += verify("test_diva: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 218       }
 219 
 220       test_mulc_n(a0, a1);
 221       for (int i=0; i<ARRLEN; i++) {
 222         errn += verify("test_mulc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 223       }
 224       test_mulv(a0, a1, (byte)-VALUE);
 225       for (int i=0; i<ARRLEN; i++) {
 226         errn += verify("test_mulv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 227       }
 228       test_mula(a0, a1, a3);
 229       for (int i=0; i<ARRLEN; i++) {
 230         errn += verify("test_mula_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 231       }
 232 
 233       test_divc_n(a0, a1);
 234       for (int i=0; i<ARRLEN; i++) {
 235         errn += verify("test_divc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 236       }
 237       test_divv(a0, a1, (byte)-VALUE);
 238       for (int i=0; i<ARRLEN; i++) {
 239         errn += verify("test_divv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 240       }
 241       test_diva(a0, a1, a3);
 242       for (int i=0; i<ARRLEN; i++) {
 243         errn += verify("test_diva_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 244       }
 245 
 246       test_andc(a0, a1);
 247       for (int i=0; i<ARRLEN; i++) {
 248         errn += verify("test_andc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 249       }
 250       test_andv(a0, a1, (byte)BIT_MASK);
 251       for (int i=0; i<ARRLEN; i++) {
 252         errn += verify("test_andv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 253       }
 254       test_anda(a0, a1, a4);
 255       for (int i=0; i<ARRLEN; i++) {
 256         errn += verify("test_anda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 257       }
 258 
 259       test_orc(a0, a1);
 260       for (int i=0; i<ARRLEN; i++) {
 261         errn += verify("test_orc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 262       }
 263       test_orv(a0, a1, (byte)BIT_MASK);
 264       for (int i=0; i<ARRLEN; i++) {
 265         errn += verify("test_orv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 266       }
 267       test_ora(a0, a1, a4);
 268       for (int i=0; i<ARRLEN; i++) {
 269         errn += verify("test_ora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 270       }
 271 
 272       test_xorc(a0, a1);
 273       for (int i=0; i<ARRLEN; i++) {
 274         errn += verify("test_xorc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 275       }
 276       test_xorv(a0, a1, (byte)BIT_MASK);
 277       for (int i=0; i<ARRLEN; i++) {
 278         errn += verify("test_xorv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 279       }
 280       test_xora(a0, a1, a4);
 281       for (int i=0; i<ARRLEN; i++) {
 282         errn += verify("test_xora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 283       }
 284 
 285       test_sllc(a0, a1);
 286       for (int i=0; i<ARRLEN; i++) {
 287         errn += verify("test_sllc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
 288       }
 289       test_sllv(a0, a1, VALUE);
 290       for (int i=0; i<ARRLEN; i++) {
 291         errn += verify("test_sllv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
 292       }
 293 
 294       test_srlc(a0, a1);
 295       for (int i=0; i<ARRLEN; i++) {
 296         errn += verify("test_srlc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
 297       }
 298       test_srlv(a0, a1, VALUE);
 299       for (int i=0; i<ARRLEN; i++) {
 300         errn += verify("test_srlv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
 301       }
 302 
 303       test_srac(a0, a1);
 304       for (int i=0; i<ARRLEN; i++) {
 305         errn += verify("test_srac: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
 306       }
 307       test_srav(a0, a1, VALUE);
 308       for (int i=0; i<ARRLEN; i++) {
 309         errn += verify("test_srav: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
 310       }
 311 
 312       test_sllc_n(a0, a1);
 313       for (int i=0; i<ARRLEN; i++) {
 314         errn += verify("test_sllc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
 315       }
 316       test_sllv(a0, a1, -VALUE);
 317       for (int i=0; i<ARRLEN; i++) {
 318         errn += verify("test_sllv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
 319       }
 320 
 321       test_srlc_n(a0, a1);
 322       for (int i=0; i<ARRLEN; i++) {
 323         errn += verify("test_srlc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
 324       }
 325       test_srlv(a0, a1, -VALUE);
 326       for (int i=0; i<ARRLEN; i++) {
 327         errn += verify("test_srlv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
 328       }
 329 
 330       test_srac_n(a0, a1);
 331       for (int i=0; i<ARRLEN; i++) {
 332         errn += verify("test_srac_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
 333       }
 334       test_srav(a0, a1, -VALUE);
 335       for (int i=0; i<ARRLEN; i++) {
 336         errn += verify("test_srav_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
 337       }
 338 
 339       test_sllc_o(a0, a1);
 340       for (int i=0; i<ARRLEN; i++) {
 341         errn += verify("test_sllc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
 342       }
 343       test_sllv(a0, a1, SHIFT);
 344       for (int i=0; i<ARRLEN; i++) {
 345         errn += verify("test_sllv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
 346       }
 347 
 348       test_srlc_o(a0, a1);
 349       for (int i=0; i<ARRLEN; i++) {
 350         errn += verify("test_srlc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
 351       }
 352       test_srlv(a0, a1, SHIFT);
 353       for (int i=0; i<ARRLEN; i++) {
 354         errn += verify("test_srlv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
 355       }
 356 
 357       test_srac_o(a0, a1);
 358       for (int i=0; i<ARRLEN; i++) {
 359         errn += verify("test_srac_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
 360       }
 361       test_srav(a0, a1, SHIFT);
 362       for (int i=0; i<ARRLEN; i++) {
 363         errn += verify("test_srav_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
 364       }
 365 
 366       test_sllc_on(a0, a1);
 367       for (int i=0; i<ARRLEN; i++) {
 368         errn += verify("test_sllc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
 369       }
 370       test_sllv(a0, a1, -SHIFT);
 371       for (int i=0; i<ARRLEN; i++) {
 372         errn += verify("test_sllv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
 373       }
 374 
 375       test_srlc_on(a0, a1);
 376       for (int i=0; i<ARRLEN; i++) {
 377         errn += verify("test_srlc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
 378       }
 379       test_srlv(a0, a1, -SHIFT);
 380       for (int i=0; i<ARRLEN; i++) {
 381         errn += verify("test_srlv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
 382       }
 383 
 384       test_srac_on(a0, a1);
 385       for (int i=0; i<ARRLEN; i++) {
 386         errn += verify("test_srac_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
 387       }
 388       test_srav(a0, a1, -SHIFT);
 389       for (int i=0; i<ARRLEN; i++) {
 390         errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
 391       }
 392 
 393       test_sllc_add(a0, a1);
 394       for (int i=0; i<ARRLEN; i++) {
 395         errn += verify("test_sllc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
 396       }
 397       test_sllv_add(a0, a1, ADD_INIT);
 398       for (int i=0; i<ARRLEN; i++) {
 399         errn += verify("test_sllv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
 400       }
 401 
 402       test_srlc_add(a0, a1);
 403       for (int i=0; i<ARRLEN; i++) {
 404         errn += verify("test_srlc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
 405       }
 406       test_srlv_add(a0, a1, ADD_INIT);
 407       for (int i=0; i<ARRLEN; i++) {
 408         errn += verify("test_srlv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
 409       }
 410 
 411       test_srac_add(a0, a1);
 412       for (int i=0; i<ARRLEN; i++) {
 413         errn += verify("test_srac_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
 414       }
 415       test_srav_add(a0, a1, ADD_INIT);
 416       for (int i=0; i<ARRLEN; i++) {
 417         errn += verify("test_srav_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
 418       }
 419 
 420       test_sllc_and(a0, a1);
 421       for (int i=0; i<ARRLEN; i++) {
 422         errn += verify("test_sllc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
 423       }
 424       test_sllv_and(a0, a1, BIT_MASK);
 425       for (int i=0; i<ARRLEN; i++) {
 426         errn += verify("test_sllv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
 427       }
 428 
 429       test_srlc_and(a0, a1);
 430       for (int i=0; i<ARRLEN; i++) {
 431         errn += verify("test_srlc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
 432       }
 433       test_srlv_and(a0, a1, BIT_MASK);
 434       for (int i=0; i<ARRLEN; i++) {
 435         errn += verify("test_srlv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
 436       }
 437 
 438       test_srac_and(a0, a1);
 439       for (int i=0; i<ARRLEN; i++) {
 440         errn += verify("test_srac_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
 441       }
 442       test_srav_and(a0, a1, BIT_MASK);
 443       for (int i=0; i<ARRLEN; i++) {
 444         errn += verify("test_srav_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
 445       }
 446 
 447       test_pack2(p2, a1);
 448       for (int i=0; i<ARRLEN/2; i++) {
 449         errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
 450       }
 451       for (int i=0; i<ARRLEN; i++) {
 452         a0[i] = -1;
 453       }
 454       test_unpack2(a0, p2);
 455       for (int i=0; i<(ARRLEN&(-2)); i++) {
 456         errn += verify("test_unpack2: ", i, a0[i], (byte)(ADD_INIT+i));
 457       }
 458 
 459       test_pack2_swap(p2, a1);
 460       for (int i=0; i<ARRLEN/2; i++) {
 461         errn += verify("test_pack2_swap: ", i, p2[i], (short)(((short)(ADD_INIT+2*i+1) & 0xFF) | ((short)(ADD_INIT+2*i) << 8)));
 462       }
 463       for (int i=0; i<ARRLEN; i++) {
 464         a0[i] = -1;
 465       }
 466       test_unpack2_swap(a0, p2);
 467       for (int i=0; i<(ARRLEN&(-2)); i++) {
 468         errn += verify("test_unpack2_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 469       }
 470 
 471       test_pack4(p4, a1);
 472       for (int i=0; i<ARRLEN/4; i++) {
 473         errn += verify("test_pack4: ", i, p4[i],  ((int)(ADD_INIT+4*i+0) & 0xFF) |
 474                                                  (((int)(ADD_INIT+4*i+1) & 0xFF) <<  8)  |
 475                                                  (((int)(ADD_INIT+4*i+2) & 0xFF) << 16)  |
 476                                                  (((int)(ADD_INIT+4*i+3) & 0xFF) << 24));
 477       }
 478       for (int i=0; i<ARRLEN; i++) {
 479         a0[i] = -1;
 480       }
 481       test_unpack4(a0, p4);
 482       for (int i=0; i<(ARRLEN&(-4)); i++) {
 483         errn += verify("test_unpack4: ", i, a0[i], (byte)(ADD_INIT+i));
 484       }
 485 
 486       test_pack4_swap(p4, a1);
 487       for (int i=0; i<ARRLEN/4; i++) {
 488         errn += verify("test_pack4_swap: ", i, p4[i],  ((int)(ADD_INIT+4*i+3) & 0xFF) |
 489                                                       (((int)(ADD_INIT+4*i+2) & 0xFF) <<  8)  |
 490                                                       (((int)(ADD_INIT+4*i+1) & 0xFF) << 16)  |
 491                                                       (((int)(ADD_INIT+4*i+0) & 0xFF) << 24));
 492       }
 493       for (int i=0; i<ARRLEN; i++) {
 494         a0[i] = -1;
 495       }
 496       test_unpack4_swap(a0, p4);
 497       for (int i=0; i<(ARRLEN&(-4)); i++) {
 498         errn += verify("test_unpack4_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 499       }
 500 
 501       test_pack8(p8, a1);
 502       for (int i=0; i<ARRLEN/8; i++) {
 503         errn += verify("test_pack8: ", i, p8[i],  ((long)(ADD_INIT+8*i+0) & 0xFFl) |
 504                                                  (((long)(ADD_INIT+8*i+1) & 0xFFl) <<  8)  |
 505                                                  (((long)(ADD_INIT+8*i+2) & 0xFFl) << 16)  |
 506                                                  (((long)(ADD_INIT+8*i+3) & 0xFFl) << 24)  |
 507                                                  (((long)(ADD_INIT+8*i+4) & 0xFFl) << 32)  |
 508                                                  (((long)(ADD_INIT+8*i+5) & 0xFFl) << 40)  |
 509                                                  (((long)(ADD_INIT+8*i+6) & 0xFFl) << 48)  |
 510                                                  (((long)(ADD_INIT+8*i+7) & 0xFFl) << 56));
 511       }
 512       for (int i=0; i<ARRLEN; i++) {
 513         a0[i] = -1;
 514       }
 515       test_unpack8(a0, p8);
 516       for (int i=0; i<(ARRLEN&(-8)); i++) {
 517         errn += verify("test_unpack8: ", i, a0[i], (byte)(ADD_INIT+i));
 518       }
 519 
 520       test_pack8_swap(p8, a1);
 521       for (int i=0; i<ARRLEN/8; i++) {
 522         errn += verify("test_pack8_swap: ", i, p8[i],  ((long)(ADD_INIT+8*i+7) & 0xFFl) |
 523                                                       (((long)(ADD_INIT+8*i+6) & 0xFFl) <<  8)  |
 524                                                       (((long)(ADD_INIT+8*i+5) & 0xFFl) << 16)  |
 525                                                       (((long)(ADD_INIT+8*i+4) & 0xFFl) << 24)  |
 526                                                       (((long)(ADD_INIT+8*i+3) & 0xFFl) << 32)  |
 527                                                       (((long)(ADD_INIT+8*i+2) & 0xFFl) << 40)  |
 528                                                       (((long)(ADD_INIT+8*i+1) & 0xFFl) << 48)  |
 529                                                       (((long)(ADD_INIT+8*i+0) & 0xFFl) << 56));
 530       }
 531       for (int i=0; i<ARRLEN; i++) {
 532         a0[i] = -1;
 533       }
 534       test_unpack8_swap(a0, p8);
 535       for (int i=0; i<(ARRLEN&(-8)); i++) {
 536         errn += verify("test_unpack8_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 537       }
 538 
 539     }
 540 
 541     if (errn > 0)
 542       return errn;
 543 
 544     System.out.println("Time");
 545     long start, end;
 546 
 547     start = System.currentTimeMillis();
 548     for (int i=0; i<ITERS; i++) {
 549       test_sum(a1);
 550     }
 551     end = System.currentTimeMillis();
 552     System.out.println("test_sum: " + (end - start));
 553 
 554     start = System.currentTimeMillis();
 555     for (int i=0; i<ITERS; i++) {
 556       test_addc(a0, a1);
 557     }
 558     end = System.currentTimeMillis();
 559     System.out.println("test_addc: " + (end - start));
 560     start = System.currentTimeMillis();
 561     for (int i=0; i<ITERS; i++) {
 562       test_addv(a0, a1, (byte)VALUE);
 563     }
 564     end = System.currentTimeMillis();
 565     System.out.println("test_addv: " + (end - start));
 566     start = System.currentTimeMillis();
 567     for (int i=0; i<ITERS; i++) {
 568       test_adda(a0, a1, a2);
 569     }
 570     end = System.currentTimeMillis();
 571     System.out.println("test_adda: " + (end - start));
 572 
 573     start = System.currentTimeMillis();
 574     for (int i=0; i<ITERS; i++) {
 575       test_subc(a0, a1);
 576     }
 577     end = System.currentTimeMillis();
 578     System.out.println("test_subc: " + (end - start));
 579     start = System.currentTimeMillis();
 580     for (int i=0; i<ITERS; i++) {
 581       test_subv(a0, a1, (byte)VALUE);
 582     }
 583     end = System.currentTimeMillis();
 584     System.out.println("test_subv: " + (end - start));
 585     start = System.currentTimeMillis();
 586     for (int i=0; i<ITERS; i++) {
 587       test_suba(a0, a1, a2);
 588     }
 589     end = System.currentTimeMillis();
 590     System.out.println("test_suba: " + (end - start));
 591 
 592     start = System.currentTimeMillis();
 593     for (int i=0; i<ITERS; i++) {
 594       test_mulc(a0, a1);
 595     }
 596     end = System.currentTimeMillis();
 597     System.out.println("test_mulc: " + (end - start));
 598     start = System.currentTimeMillis();
 599     for (int i=0; i<ITERS; i++) {
 600       test_mulv(a0, a1, (byte)VALUE);
 601     }
 602     end = System.currentTimeMillis();
 603     System.out.println("test_mulv: " + (end - start));
 604     start = System.currentTimeMillis();
 605     for (int i=0; i<ITERS; i++) {
 606       test_mula(a0, a1, a2);
 607     }
 608     end = System.currentTimeMillis();
 609     System.out.println("test_mula: " + (end - start));
 610 
 611     start = System.currentTimeMillis();
 612     for (int i=0; i<ITERS; i++) {
 613       test_divc(a0, a1);
 614     }
 615     end = System.currentTimeMillis();
 616     System.out.println("test_divc: " + (end - start));
 617     start = System.currentTimeMillis();
 618     for (int i=0; i<ITERS; i++) {
 619       test_divv(a0, a1, (byte)VALUE);
 620     }
 621     end = System.currentTimeMillis();
 622     System.out.println("test_divv: " + (end - start));
 623     start = System.currentTimeMillis();
 624     for (int i=0; i<ITERS; i++) {
 625       test_diva(a0, a1, a2);
 626     }
 627     end = System.currentTimeMillis();
 628     System.out.println("test_diva: " + (end - start));
 629 
 630     start = System.currentTimeMillis();
 631     for (int i=0; i<ITERS; i++) {
 632       test_mulc_n(a0, a1);
 633     }
 634     end = System.currentTimeMillis();
 635     System.out.println("test_mulc_n: " + (end - start));
 636     start = System.currentTimeMillis();
 637     for (int i=0; i<ITERS; i++) {
 638       test_mulv(a0, a1, (byte)-VALUE);
 639     }
 640     end = System.currentTimeMillis();
 641     System.out.println("test_mulv_n: " + (end - start));
 642     start = System.currentTimeMillis();
 643     for (int i=0; i<ITERS; i++) {
 644       test_mula(a0, a1, a3);
 645     }
 646     end = System.currentTimeMillis();
 647     System.out.println("test_mula_n: " + (end - start));
 648 
 649     start = System.currentTimeMillis();
 650     for (int i=0; i<ITERS; i++) {
 651       test_divc_n(a0, a1);
 652     }
 653     end = System.currentTimeMillis();
 654     System.out.println("test_divc_n: " + (end - start));
 655     start = System.currentTimeMillis();
 656     for (int i=0; i<ITERS; i++) {
 657       test_divv(a0, a1, (byte)-VALUE);
 658     }
 659     end = System.currentTimeMillis();
 660     System.out.println("test_divv_n: " + (end - start));
 661     start = System.currentTimeMillis();
 662     for (int i=0; i<ITERS; i++) {
 663       test_diva(a0, a1, a3);
 664     }
 665     end = System.currentTimeMillis();
 666     System.out.println("test_diva_n: " + (end - start));
 667 
 668     start = System.currentTimeMillis();
 669     for (int i=0; i<ITERS; i++) {
 670       test_andc(a0, a1);
 671     }
 672     end = System.currentTimeMillis();
 673     System.out.println("test_andc: " + (end - start));
 674     start = System.currentTimeMillis();
 675     for (int i=0; i<ITERS; i++) {
 676       test_andv(a0, a1, (byte)BIT_MASK);
 677     }
 678     end = System.currentTimeMillis();
 679     System.out.println("test_andv: " + (end - start));
 680     start = System.currentTimeMillis();
 681     for (int i=0; i<ITERS; i++) {
 682       test_anda(a0, a1, a4);
 683     }
 684     end = System.currentTimeMillis();
 685     System.out.println("test_anda: " + (end - start));
 686 
 687     start = System.currentTimeMillis();
 688     for (int i=0; i<ITERS; i++) {
 689       test_orc(a0, a1);
 690     }
 691     end = System.currentTimeMillis();
 692     System.out.println("test_orc: " + (end - start));
 693     start = System.currentTimeMillis();
 694     for (int i=0; i<ITERS; i++) {
 695       test_orv(a0, a1, (byte)BIT_MASK);
 696     }
 697     end = System.currentTimeMillis();
 698     System.out.println("test_orv: " + (end - start));
 699     start = System.currentTimeMillis();
 700     for (int i=0; i<ITERS; i++) {
 701       test_ora(a0, a1, a4);
 702     }
 703     end = System.currentTimeMillis();
 704     System.out.println("test_ora: " + (end - start));
 705 
 706     start = System.currentTimeMillis();
 707     for (int i=0; i<ITERS; i++) {
 708       test_xorc(a0, a1);
 709     }
 710     end = System.currentTimeMillis();
 711     System.out.println("test_xorc: " + (end - start));
 712     start = System.currentTimeMillis();
 713     for (int i=0; i<ITERS; i++) {
 714       test_xorv(a0, a1, (byte)BIT_MASK);
 715     }
 716     end = System.currentTimeMillis();
 717     System.out.println("test_xorv: " + (end - start));
 718     start = System.currentTimeMillis();
 719     for (int i=0; i<ITERS; i++) {
 720       test_xora(a0, a1, a4);
 721     }
 722     end = System.currentTimeMillis();
 723     System.out.println("test_xora: " + (end - start));
 724 
 725     start = System.currentTimeMillis();
 726     for (int i=0; i<ITERS; i++) {
 727       test_sllc(a0, a1);
 728     }
 729     end = System.currentTimeMillis();
 730     System.out.println("test_sllc: " + (end - start));
 731     start = System.currentTimeMillis();
 732     for (int i=0; i<ITERS; i++) {
 733       test_sllv(a0, a1, VALUE);
 734     }
 735     end = System.currentTimeMillis();
 736     System.out.println("test_sllv: " + (end - start));
 737 
 738     start = System.currentTimeMillis();
 739     for (int i=0; i<ITERS; i++) {
 740       test_srlc(a0, a1);
 741     }
 742     end = System.currentTimeMillis();
 743     System.out.println("test_srlc: " + (end - start));
 744     start = System.currentTimeMillis();
 745     for (int i=0; i<ITERS; i++) {
 746       test_srlv(a0, a1, VALUE);
 747     }
 748     end = System.currentTimeMillis();
 749     System.out.println("test_srlv: " + (end - start));
 750 
 751     start = System.currentTimeMillis();
 752     for (int i=0; i<ITERS; i++) {
 753       test_srac(a0, a1);
 754     }
 755     end = System.currentTimeMillis();
 756     System.out.println("test_srac: " + (end - start));
 757     start = System.currentTimeMillis();
 758     for (int i=0; i<ITERS; i++) {
 759       test_srav(a0, a1, VALUE);
 760     }
 761     end = System.currentTimeMillis();
 762     System.out.println("test_srav: " + (end - start));
 763 
 764     start = System.currentTimeMillis();
 765     for (int i=0; i<ITERS; i++) {
 766       test_sllc_n(a0, a1);
 767     }
 768     end = System.currentTimeMillis();
 769     System.out.println("test_sllc_n: " + (end - start));
 770     start = System.currentTimeMillis();
 771     for (int i=0; i<ITERS; i++) {
 772       test_sllv(a0, a1, -VALUE);
 773     }
 774     end = System.currentTimeMillis();
 775     System.out.println("test_sllv_n: " + (end - start));
 776 
 777     start = System.currentTimeMillis();
 778     for (int i=0; i<ITERS; i++) {
 779       test_srlc_n(a0, a1);
 780     }
 781     end = System.currentTimeMillis();
 782     System.out.println("test_srlc_n: " + (end - start));
 783     start = System.currentTimeMillis();
 784     for (int i=0; i<ITERS; i++) {
 785       test_srlv(a0, a1, -VALUE);
 786     }
 787     end = System.currentTimeMillis();
 788     System.out.println("test_srlv_n: " + (end - start));
 789 
 790     start = System.currentTimeMillis();
 791     for (int i=0; i<ITERS; i++) {
 792       test_srac_n(a0, a1);
 793     }
 794     end = System.currentTimeMillis();
 795     System.out.println("test_srac_n: " + (end - start));
 796     start = System.currentTimeMillis();
 797     for (int i=0; i<ITERS; i++) {
 798       test_srav(a0, a1, -VALUE);
 799     }
 800     end = System.currentTimeMillis();
 801     System.out.println("test_srav_n: " + (end - start));
 802 
 803     start = System.currentTimeMillis();
 804     for (int i=0; i<ITERS; i++) {
 805       test_sllc_o(a0, a1);
 806     }
 807     end = System.currentTimeMillis();
 808     System.out.println("test_sllc_o: " + (end - start));
 809     start = System.currentTimeMillis();
 810     for (int i=0; i<ITERS; i++) {
 811       test_sllv(a0, a1, SHIFT);
 812     }
 813     end = System.currentTimeMillis();
 814     System.out.println("test_sllv_o: " + (end - start));
 815 
 816     start = System.currentTimeMillis();
 817     for (int i=0; i<ITERS; i++) {
 818       test_srlc_o(a0, a1);
 819     }
 820     end = System.currentTimeMillis();
 821     System.out.println("test_srlc_o: " + (end - start));
 822     start = System.currentTimeMillis();
 823     for (int i=0; i<ITERS; i++) {
 824       test_srlv(a0, a1, SHIFT);
 825     }
 826     end = System.currentTimeMillis();
 827     System.out.println("test_srlv_o: " + (end - start));
 828 
 829     start = System.currentTimeMillis();
 830     for (int i=0; i<ITERS; i++) {
 831       test_srac_o(a0, a1);
 832     }
 833     end = System.currentTimeMillis();
 834     System.out.println("test_srac_o: " + (end - start));
 835     start = System.currentTimeMillis();
 836     for (int i=0; i<ITERS; i++) {
 837       test_srav(a0, a1, SHIFT);
 838     }
 839     end = System.currentTimeMillis();
 840     System.out.println("test_srav_o: " + (end - start));
 841 
 842     start = System.currentTimeMillis();
 843     for (int i=0; i<ITERS; i++) {
 844       test_sllc_on(a0, a1);
 845     }
 846     end = System.currentTimeMillis();
 847     System.out.println("test_sllc_on: " + (end - start));
 848     start = System.currentTimeMillis();
 849     for (int i=0; i<ITERS; i++) {
 850       test_sllv(a0, a1, -SHIFT);
 851     }
 852     end = System.currentTimeMillis();
 853     System.out.println("test_sllv_on: " + (end - start));
 854 
 855     start = System.currentTimeMillis();
 856     for (int i=0; i<ITERS; i++) {
 857       test_srlc_on(a0, a1);
 858     }
 859     end = System.currentTimeMillis();
 860     System.out.println("test_srlc_on: " + (end - start));
 861     start = System.currentTimeMillis();
 862     for (int i=0; i<ITERS; i++) {
 863       test_srlv(a0, a1, -SHIFT);
 864     }
 865     end = System.currentTimeMillis();
 866     System.out.println("test_srlv_on: " + (end - start));
 867 
 868     start = System.currentTimeMillis();
 869     for (int i=0; i<ITERS; i++) {
 870       test_srac_on(a0, a1);
 871     }
 872     end = System.currentTimeMillis();
 873     System.out.println("test_srac_on: " + (end - start));
 874     start = System.currentTimeMillis();
 875     for (int i=0; i<ITERS; i++) {
 876       test_srav(a0, a1, -SHIFT);
 877     }
 878     end = System.currentTimeMillis();
 879     System.out.println("test_srav_on: " + (end - start));
 880 
 881     start = System.currentTimeMillis();
 882     for (int i=0; i<ITERS; i++) {
 883       test_sllc_add(a0, a1);
 884     }
 885     end = System.currentTimeMillis();
 886     System.out.println("test_sllc_add: " + (end - start));
 887     start = System.currentTimeMillis();
 888     for (int i=0; i<ITERS; i++) {
 889       test_sllv_add(a0, a1, ADD_INIT);
 890     }
 891     end = System.currentTimeMillis();
 892     System.out.println("test_sllv_add: " + (end - start));
 893 
 894     start = System.currentTimeMillis();
 895     for (int i=0; i<ITERS; i++) {
 896       test_srlc_add(a0, a1);
 897     }
 898     end = System.currentTimeMillis();
 899     System.out.println("test_srlc_add: " + (end - start));
 900     start = System.currentTimeMillis();
 901     for (int i=0; i<ITERS; i++) {
 902       test_srlv_add(a0, a1, ADD_INIT);
 903     }
 904     end = System.currentTimeMillis();
 905     System.out.println("test_srlv_add: " + (end - start));
 906 
 907     start = System.currentTimeMillis();
 908     for (int i=0; i<ITERS; i++) {
 909       test_srac_add(a0, a1);
 910     }
 911     end = System.currentTimeMillis();
 912     System.out.println("test_srac_add: " + (end - start));
 913     start = System.currentTimeMillis();
 914     for (int i=0; i<ITERS; i++) {
 915       test_srav_add(a0, a1, ADD_INIT);
 916     }
 917     end = System.currentTimeMillis();
 918     System.out.println("test_srav_add: " + (end - start));
 919 
 920     start = System.currentTimeMillis();
 921     for (int i=0; i<ITERS; i++) {
 922       test_sllc_and(a0, a1);
 923     }
 924     end = System.currentTimeMillis();
 925     System.out.println("test_sllc_and: " + (end - start));
 926     start = System.currentTimeMillis();
 927     for (int i=0; i<ITERS; i++) {
 928       test_sllv_and(a0, a1, BIT_MASK);
 929     }
 930     end = System.currentTimeMillis();
 931     System.out.println("test_sllv_and: " + (end - start));
 932 
 933     start = System.currentTimeMillis();
 934     for (int i=0; i<ITERS; i++) {
 935       test_srlc_and(a0, a1);
 936     }
 937     end = System.currentTimeMillis();
 938     System.out.println("test_srlc_and: " + (end - start));
 939     start = System.currentTimeMillis();
 940     for (int i=0; i<ITERS; i++) {
 941       test_srlv_and(a0, a1, BIT_MASK);
 942     }
 943     end = System.currentTimeMillis();
 944     System.out.println("test_srlv_and: " + (end - start));
 945 
 946     start = System.currentTimeMillis();
 947     for (int i=0; i<ITERS; i++) {
 948       test_srac_and(a0, a1);
 949     }
 950     end = System.currentTimeMillis();
 951     System.out.println("test_srac_and: " + (end - start));
 952     start = System.currentTimeMillis();
 953     for (int i=0; i<ITERS; i++) {
 954       test_srav_and(a0, a1, BIT_MASK);
 955     }
 956     end = System.currentTimeMillis();
 957     System.out.println("test_srav_and: " + (end - start));
 958 
 959     start = System.currentTimeMillis();
 960     for (int i=0; i<ITERS; i++) {
 961       test_pack2(p2, a1);
 962     }
 963     end = System.currentTimeMillis();
 964     System.out.println("test_pack2: " + (end - start));
 965     start = System.currentTimeMillis();
 966     for (int i=0; i<ITERS; i++) {
 967       test_unpack2(a0, p2);
 968     }
 969     end = System.currentTimeMillis();
 970     System.out.println("test_unpack2: " + (end - start));
 971     start = System.currentTimeMillis();
 972     for (int i=0; i<ITERS; i++) {
 973       test_pack2_swap(p2, a1);
 974     }
 975     end = System.currentTimeMillis();
 976     System.out.println("test_pack2_swap: " + (end - start));
 977     start = System.currentTimeMillis();
 978     for (int i=0; i<ITERS; i++) {
 979       test_unpack2_swap(a0, p2);
 980     }
 981     end = System.currentTimeMillis();
 982     System.out.println("test_unpack2_swap: " + (end - start));
 983 
 984     start = System.currentTimeMillis();
 985     for (int i=0; i<ITERS; i++) {
 986       test_pack4(p4, a1);
 987     }
 988     end = System.currentTimeMillis();
 989     System.out.println("test_pack4: " + (end - start));
 990     start = System.currentTimeMillis();
 991     for (int i=0; i<ITERS; i++) {
 992       test_unpack4(a0, p4);
 993     }
 994     end = System.currentTimeMillis();
 995     System.out.println("test_unpack4: " + (end - start));
 996     start = System.currentTimeMillis();
 997     for (int i=0; i<ITERS; i++) {
 998       test_pack4_swap(p4, a1);
 999     }
1000     end = System.currentTimeMillis();
1001     System.out.println("test_pack4_swap: " + (end - start));
1002     start = System.currentTimeMillis();
1003     for (int i=0; i<ITERS; i++) {
1004       test_unpack4_swap(a0, p4);
1005     }
1006     end = System.currentTimeMillis();
1007     System.out.println("test_unpack4_swap: " + (end - start));
1008 
1009     start = System.currentTimeMillis();
1010     for (int i=0; i<ITERS; i++) {
1011       test_pack8(p8, a1);
1012     }
1013     end = System.currentTimeMillis();
1014     System.out.println("test_pack8: " + (end - start));
1015     start = System.currentTimeMillis();
1016     for (int i=0; i<ITERS; i++) {
1017       test_unpack8(a0, p8);
1018     }
1019     end = System.currentTimeMillis();
1020     System.out.println("test_unpack8: " + (end - start));
1021     start = System.currentTimeMillis();
1022     for (int i=0; i<ITERS; i++) {
1023       test_pack8_swap(p8, a1);
1024     }
1025     end = System.currentTimeMillis();
1026     System.out.println("test_pack8_swap: " + (end - start));
1027     start = System.currentTimeMillis();
1028     for (int i=0; i<ITERS; i++) {
1029       test_unpack8_swap(a0, p8);
1030     }
1031     end = System.currentTimeMillis();
1032     System.out.println("test_unpack8_swap: " + (end - start));
1033 
1034     return errn;
1035   }
1036 
1037   static int test_sum(byte[] a1) {
1038     int sum = 0;
1039     for (int i = 0; i < a1.length; i+=1) {
1040       sum += a1[i];
1041     }
1042     return sum;
1043   }
1044 
1045   static void test_addc(byte[] a0, byte[] a1) {
1046     for (int i = 0; i < a0.length; i+=1) {
1047       a0[i] = (byte)(a1[i]+VALUE);
1048     }
1049   }
1050   static void test_addv(byte[] a0, byte[] a1, byte b) {
1051     for (int i = 0; i < a0.length; i+=1) {
1052       a0[i] = (byte)(a1[i]+b);
1053     }
1054   }
1055   static void test_adda(byte[] a0, byte[] a1, byte[] a2) {
1056     for (int i = 0; i < a0.length; i+=1) {
1057       a0[i] = (byte)(a1[i]+a2[i]);
1058     }
1059   }
1060 
1061   static void test_subc(byte[] a0, byte[] a1) {
1062     for (int i = 0; i < a0.length; i+=1) {
1063       a0[i] = (byte)(a1[i]-VALUE);
1064     }
1065   }
1066   static void test_subv(byte[] a0, byte[] a1, byte b) {
1067     for (int i = 0; i < a0.length; i+=1) {
1068       a0[i] = (byte)(a1[i]-b);
1069     }
1070   }
1071   static void test_suba(byte[] a0, byte[] a1, byte[] a2) {
1072     for (int i = 0; i < a0.length; i+=1) {
1073       a0[i] = (byte)(a1[i]-a2[i]);
1074     }
1075   }
1076 
1077   static void test_mulc(byte[] a0, byte[] a1) {
1078     for (int i = 0; i < a0.length; i+=1) {
1079       a0[i] = (byte)(a1[i]*VALUE);
1080     }
1081   }
1082   static void test_mulc_n(byte[] a0, byte[] a1) {
1083     for (int i = 0; i < a0.length; i+=1) {
1084       a0[i] = (byte)(a1[i]*(-VALUE));
1085     }
1086   }
1087   static void test_mulv(byte[] a0, byte[] a1, byte b) {
1088     for (int i = 0; i < a0.length; i+=1) {
1089       a0[i] = (byte)(a1[i]*b);
1090     }
1091   }
1092   static void test_mula(byte[] a0, byte[] a1, byte[] a2) {
1093     for (int i = 0; i < a0.length; i+=1) {
1094       a0[i] = (byte)(a1[i]*a2[i]);
1095     }
1096   }
1097 
1098   static void test_divc(byte[] a0, byte[] a1) {
1099     for (int i = 0; i < a0.length; i+=1) {
1100       a0[i] = (byte)(a1[i]/VALUE);
1101     }
1102   }
1103   static void test_divc_n(byte[] a0, byte[] a1) {
1104     for (int i = 0; i < a0.length; i+=1) {
1105       a0[i] = (byte)(a1[i]/(-VALUE));
1106     }
1107   }
1108   static void test_divv(byte[] a0, byte[] a1, byte b) {
1109     for (int i = 0; i < a0.length; i+=1) {
1110       a0[i] = (byte)(a1[i]/b);
1111     }
1112   }
1113   static void test_diva(byte[] a0, byte[] a1, byte[] a2) {
1114     for (int i = 0; i < a0.length; i+=1) {
1115       a0[i] = (byte)(a1[i]/a2[i]);
1116     }
1117   }
1118 
1119   static void test_andc(byte[] a0, byte[] a1) {
1120     for (int i = 0; i < a0.length; i+=1) {
1121       a0[i] = (byte)(a1[i]&BIT_MASK);
1122     }
1123   }
1124   static void test_andv(byte[] a0, byte[] a1, byte b) {
1125     for (int i = 0; i < a0.length; i+=1) {
1126       a0[i] = (byte)(a1[i]&b);
1127     }
1128   }
1129   static void test_anda(byte[] a0, byte[] a1, byte[] a2) {
1130     for (int i = 0; i < a0.length; i+=1) {
1131       a0[i] = (byte)(a1[i]&a2[i]);
1132     }
1133   }
1134 
1135   static void test_orc(byte[] a0, byte[] a1) {
1136     for (int i = 0; i < a0.length; i+=1) {
1137       a0[i] = (byte)(a1[i]|BIT_MASK);
1138     }
1139   }
1140   static void test_orv(byte[] a0, byte[] a1, byte b) {
1141     for (int i = 0; i < a0.length; i+=1) {
1142       a0[i] = (byte)(a1[i]|b);
1143     }
1144   }
1145   static void test_ora(byte[] a0, byte[] a1, byte[] a2) {
1146     for (int i = 0; i < a0.length; i+=1) {
1147       a0[i] = (byte)(a1[i]|a2[i]);
1148     }
1149   }
1150 
1151   static void test_xorc(byte[] a0, byte[] a1) {
1152     for (int i = 0; i < a0.length; i+=1) {
1153       a0[i] = (byte)(a1[i]^BIT_MASK);
1154     }
1155   }
1156   static void test_xorv(byte[] a0, byte[] a1, byte b) {
1157     for (int i = 0; i < a0.length; i+=1) {
1158       a0[i] = (byte)(a1[i]^b);
1159     }
1160   }
1161   static void test_xora(byte[] a0, byte[] a1, byte[] a2) {
1162     for (int i = 0; i < a0.length; i+=1) {
1163       a0[i] = (byte)(a1[i]^a2[i]);
1164     }
1165   }
1166 
1167   static void test_sllc(byte[] a0, byte[] a1) {
1168     for (int i = 0; i < a0.length; i+=1) {
1169       a0[i] = (byte)(a1[i]<<VALUE);
1170     }
1171   }
1172   static void test_sllc_n(byte[] a0, byte[] a1) {
1173     for (int i = 0; i < a0.length; i+=1) {
1174       a0[i] = (byte)(a1[i]<<(-VALUE));
1175     }
1176   }
1177   static void test_sllc_o(byte[] a0, byte[] a1) {
1178     for (int i = 0; i < a0.length; i+=1) {
1179       a0[i] = (byte)(a1[i]<<SHIFT);
1180     }
1181   }
1182   static void test_sllc_on(byte[] a0, byte[] a1) {
1183     for (int i = 0; i < a0.length; i+=1) {
1184       a0[i] = (byte)(a1[i]<<(-SHIFT));
1185     }
1186   }
1187   static void test_sllv(byte[] a0, byte[] a1, int b) {
1188     for (int i = 0; i < a0.length; i+=1) {
1189       a0[i] = (byte)(a1[i]<<b);
1190     }
1191   }
1192   static void test_sllc_add(byte[] a0, byte[] a1) {
1193     for (int i = 0; i < a0.length; i+=1) {
1194       a0[i] = (byte)((a1[i] + ADD_INIT)<<VALUE);
1195     }
1196   }
1197   static void test_sllv_add(byte[] a0, byte[] a1, int b) {
1198     for (int i = 0; i < a0.length; i+=1) {
1199       a0[i] = (byte)((a1[i] + b)<<VALUE);
1200     }
1201   }
1202   static void test_sllc_and(byte[] a0, byte[] a1) {
1203     for (int i = 0; i < a0.length; i+=1) {
1204       a0[i] = (byte)((a1[i] & BIT_MASK)<<VALUE);
1205     }
1206   }
1207   static void test_sllv_and(byte[] a0, byte[] a1, int b) {
1208     for (int i = 0; i < a0.length; i+=1) {
1209       a0[i] = (byte)((a1[i] & b)<<VALUE);
1210     }
1211   }
1212 
1213   static void test_srlc(byte[] a0, byte[] a1) {
1214     for (int i = 0; i < a0.length; i+=1) {
1215       a0[i] = (byte)(a1[i]>>>VALUE);
1216     }
1217   }
1218   static void test_srlc_n(byte[] a0, byte[] a1) {
1219     for (int i = 0; i < a0.length; i+=1) {
1220       a0[i] = (byte)(a1[i]>>>(-VALUE));
1221     }
1222   }
1223   static void test_srlc_o(byte[] a0, byte[] a1) {
1224     for (int i = 0; i < a0.length; i+=1) {
1225       a0[i] = (byte)(a1[i]>>>SHIFT);
1226     }
1227   }
1228   static void test_srlc_on(byte[] a0, byte[] a1) {
1229     for (int i = 0; i < a0.length; i+=1) {
1230       a0[i] = (byte)(a1[i]>>>(-SHIFT));
1231     }
1232   }
1233   static void test_srlv(byte[] a0, byte[] a1, int b) {
1234     for (int i = 0; i < a0.length; i+=1) {
1235       a0[i] = (byte)(a1[i]>>>b);
1236     }
1237   }
1238   static void test_srlc_add(byte[] a0, byte[] a1) {
1239     for (int i = 0; i < a0.length; i+=1) {
1240       a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE);
1241     }
1242   }
1243   static void test_srlv_add(byte[] a0, byte[] a1, int b) {
1244     for (int i = 0; i < a0.length; i+=1) {
1245       a0[i] = (byte)((a1[i] + b)>>>VALUE);
1246     }
1247   }
1248   static void test_srlc_and(byte[] a0, byte[] a1) {
1249     for (int i = 0; i < a0.length; i+=1) {
1250       a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE);
1251     }
1252   }
1253   static void test_srlv_and(byte[] a0, byte[] a1, int b) {
1254     for (int i = 0; i < a0.length; i+=1) {
1255       a0[i] = (byte)((a1[i] & b)>>>VALUE);
1256     }
1257   }
1258 
1259   static void test_srac(byte[] a0, byte[] a1) {
1260     for (int i = 0; i < a0.length; i+=1) {
1261       a0[i] = (byte)(a1[i]>>VALUE);
1262     }
1263   }
1264   static void test_srac_n(byte[] a0, byte[] a1) {
1265     for (int i = 0; i < a0.length; i+=1) {
1266       a0[i] = (byte)(a1[i]>>(-VALUE));
1267     }
1268   }
1269   static void test_srac_o(byte[] a0, byte[] a1) {
1270     for (int i = 0; i < a0.length; i+=1) {
1271       a0[i] = (byte)(a1[i]>>SHIFT);
1272     }
1273   }
1274   static void test_srac_on(byte[] a0, byte[] a1) {
1275     for (int i = 0; i < a0.length; i+=1) {
1276       a0[i] = (byte)(a1[i]>>(-SHIFT));
1277     }
1278   }
1279   static void test_srav(byte[] a0, byte[] a1, int b) {
1280     for (int i = 0; i < a0.length; i+=1) {
1281       a0[i] = (byte)(a1[i]>>b);
1282     }
1283   }
1284   static void test_srac_add(byte[] a0, byte[] a1) {
1285     for (int i = 0; i < a0.length; i+=1) {
1286       a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE);
1287     }
1288   }
1289   static void test_srav_add(byte[] a0, byte[] a1, int b) {
1290     for (int i = 0; i < a0.length; i+=1) {
1291       a0[i] = (byte)((a1[i] + b)>>VALUE);
1292     }
1293   }
1294   static void test_srac_and(byte[] a0, byte[] a1) {
1295     for (int i = 0; i < a0.length; i+=1) {
1296       a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE);
1297     }
1298   }
1299   static void test_srav_and(byte[] a0, byte[] a1, int b) {
1300     for (int i = 0; i < a0.length; i+=1) {
1301       a0[i] = (byte)((a1[i] & b)>>VALUE);
1302     }
1303   }
1304 
1305   static void test_pack2(short[] p2, byte[] a1) {
1306     if (p2.length*2 > a1.length) return;
1307     for (int i = 0; i < p2.length; i+=1) {
1308       short l0 = (short)a1[i*2+0];
1309       short l1 = (short)a1[i*2+1];
1310       p2[i] = (short)((l1 << 8) | (l0 & 0xFF));
1311     }
1312   }
1313   static void test_unpack2(byte[] a0, short[] p2) {
1314     if (p2.length*2 > a0.length) return;
1315     for (int i = 0; i < p2.length; i+=1) {
1316       short l = p2[i];
1317       a0[i*2+0] = (byte)(l & 0xFF);
1318       a0[i*2+1] = (byte)(l >> 8);
1319     }
1320   }
1321   static void test_pack2_swap(short[] p2, byte[] a1) {
1322     if (p2.length*2 > a1.length) return;
1323     for (int i = 0; i < p2.length; i+=1) {
1324       short l0 = (short)a1[i*2+0];
1325       short l1 = (short)a1[i*2+1];
1326       p2[i] = (short)((l0 << 8) | (l1 & 0xFF));
1327     }
1328   }
1329   static void test_unpack2_swap(byte[] a0, short[] p2) {
1330     if (p2.length*2 > a0.length) return;
1331     for (int i = 0; i < p2.length; i+=1) {
1332       short l = p2[i];
1333       a0[i*2+0] = (byte)(l >> 8);
1334       a0[i*2+1] = (byte)(l & 0xFF);
1335     }
1336   }
1337 
1338   static void test_pack4(int[] p4, byte[] a1) {
1339     if (p4.length*4 > a1.length) return;
1340     for (int i = 0; i < p4.length; i+=1) {
1341       int l0 = (int)a1[i*4+0];
1342       int l1 = (int)a1[i*4+1];
1343       int l2 = (int)a1[i*4+2];
1344       int l3 = (int)a1[i*4+3];
1345       p4[i] = (l0 & 0xFF) |
1346              ((l1 & 0xFF) <<  8) |
1347              ((l2 & 0xFF) << 16) |
1348              ((l3 & 0xFF) << 24);
1349     }
1350   }
1351   static void test_unpack4(byte[] a0, int[] p4) {
1352     if (p4.length*4 > a0.length) return;
1353     for (int i = 0; i < p4.length; i+=1) {
1354       int l = p4[i];
1355       a0[i*4+0] = (byte)(l & 0xFF);
1356       a0[i*4+1] = (byte)(l >>  8);
1357       a0[i*4+2] = (byte)(l >> 16);
1358       a0[i*4+3] = (byte)(l >> 24);
1359     }
1360   }
1361   static void test_pack4_swap(int[] p4, byte[] a1) {
1362     if (p4.length*4 > a1.length) return;
1363     for (int i = 0; i < p4.length; i+=1) {
1364       int l0 = (int)a1[i*4+0];
1365       int l1 = (int)a1[i*4+1];
1366       int l2 = (int)a1[i*4+2];
1367       int l3 = (int)a1[i*4+3];
1368       p4[i] = (l3 & 0xFF) |
1369              ((l2 & 0xFF) <<  8) |
1370              ((l1 & 0xFF) << 16) |
1371              ((l0 & 0xFF) << 24);
1372     }
1373   }
1374   static void test_unpack4_swap(byte[] a0, int[] p4) {
1375     if (p4.length*4 > a0.length) return;
1376     for (int i = 0; i < p4.length; i+=1) {
1377       int l = p4[i];
1378       a0[i*4+0] = (byte)(l >> 24);
1379       a0[i*4+1] = (byte)(l >> 16);
1380       a0[i*4+2] = (byte)(l >>  8);
1381       a0[i*4+3] = (byte)(l & 0xFF);
1382     }
1383   }
1384 
1385   static void test_pack8(long[] p8, byte[] a1) {
1386     if (p8.length*8 > a1.length) return;
1387     for (int i = 0; i < p8.length; i+=1) {
1388       long l0 = (long)a1[i*8+0];
1389       long l1 = (long)a1[i*8+1];
1390       long l2 = (long)a1[i*8+2];
1391       long l3 = (long)a1[i*8+3];
1392       long l4 = (long)a1[i*8+4];
1393       long l5 = (long)a1[i*8+5];
1394       long l6 = (long)a1[i*8+6];
1395       long l7 = (long)a1[i*8+7];
1396       p8[i] = (l0 & 0xFFl) |
1397              ((l1 & 0xFFl) <<  8) |
1398              ((l2 & 0xFFl) << 16) |
1399              ((l3 & 0xFFl) << 24) |
1400              ((l4 & 0xFFl) << 32) |
1401              ((l5 & 0xFFl) << 40) |
1402              ((l6 & 0xFFl) << 48) |
1403              ((l7 & 0xFFl) << 56);
1404     }
1405   }
1406   static void test_unpack8(byte[] a0, long[] p8) {
1407     if (p8.length*8 > a0.length) return;
1408     for (int i = 0; i < p8.length; i+=1) {
1409       long l = p8[i];
1410       a0[i*8+0] = (byte)(l & 0xFFl);
1411       a0[i*8+1] = (byte)(l >>  8);
1412       a0[i*8+2] = (byte)(l >> 16);
1413       a0[i*8+3] = (byte)(l >> 24);
1414       a0[i*8+4] = (byte)(l >> 32);
1415       a0[i*8+5] = (byte)(l >> 40);
1416       a0[i*8+6] = (byte)(l >> 48);
1417       a0[i*8+7] = (byte)(l >> 56);
1418     }
1419   }
1420   static void test_pack8_swap(long[] p8, byte[] a1) {
1421     if (p8.length*8 > a1.length) return;
1422     for (int i = 0; i < p8.length; i+=1) {
1423       long l0 = (long)a1[i*8+0];
1424       long l1 = (long)a1[i*8+1];
1425       long l2 = (long)a1[i*8+2];
1426       long l3 = (long)a1[i*8+3];
1427       long l4 = (long)a1[i*8+4];
1428       long l5 = (long)a1[i*8+5];
1429       long l6 = (long)a1[i*8+6];
1430       long l7 = (long)a1[i*8+7];
1431       p8[i] = (l7 & 0xFFl) |
1432              ((l6 & 0xFFl) <<  8) |
1433              ((l5 & 0xFFl) << 16) |
1434              ((l4 & 0xFFl) << 24) |
1435              ((l3 & 0xFFl) << 32) |
1436              ((l2 & 0xFFl) << 40) |
1437              ((l1 & 0xFFl) << 48) |
1438              ((l0 & 0xFFl) << 56);
1439     }
1440   }
1441   static void test_unpack8_swap(byte[] a0, long[] p8) {
1442     if (p8.length*8 > a0.length) return;
1443     for (int i = 0; i < p8.length; i+=1) {
1444       long l = p8[i];
1445       a0[i*8+0] = (byte)(l >> 56);
1446       a0[i*8+1] = (byte)(l >> 48);
1447       a0[i*8+2] = (byte)(l >> 40);
1448       a0[i*8+3] = (byte)(l >> 32);
1449       a0[i*8+4] = (byte)(l >> 24);
1450       a0[i*8+5] = (byte)(l >> 16);
1451       a0[i*8+6] = (byte)(l >>  8);
1452       a0[i*8+7] = (byte)(l & 0xFFl);
1453     }
1454   }
1455 
1456   static int verify(String text, int i, byte elem, byte val) {
1457     if (elem != val) {
1458       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1459       return 1;
1460     }
1461     return 0;
1462   }
1463 
1464   static int verify(String text, int i, short elem, short val) {
1465     if (elem != val) {
1466       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1467       return 1;
1468     }
1469     return 0;
1470   }
1471 
1472   static int verify(String text, int i, int elem, int val) {
1473     if (elem != val) {
1474       System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val));
1475       return 1;
1476     }
1477     return 0;
1478   }
1479 
1480   static int verify(String text, int i, long elem, long val) {
1481     if (elem != val) {
1482       System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
1483       return 1;
1484     }
1485     return 0;
1486   }
1487 }