1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 /**
  26  * @test
  27  * @bug 6340864
  28  * @summary Implement vectorization optimizations in hotspot-server
  29  *
  30  * @run main/othervm/timeout=400 -Xbatch -Xmx64m compiler.c2.cr6340864.TestByteVect
  31  */
  32 
  33 package compiler.c2.cr6340864;
  34 
  35 public class TestByteVect {
  36   private static final int ARRLEN = 997;
  37   private static final int ITERS  = 11000;
  38   private static final int ADD_INIT = 63;
  39   private static final int BIT_MASK = 0xB7;
  40   private static final int VALUE = 3;
  41   private static final int SHIFT = 8;
  42 
  43   public static void main(String args[]) {
  44     System.out.println("Testing Byte vectors");
  45     int errn = test();
  46     if (errn > 0) {
  47       System.err.println("FAILED: " + errn + " errors");
  48       System.exit(97);
  49     }
  50     System.out.println("PASSED");
  51   }
  52 
  53   static int test() {
  54     byte[] a0 = new byte[ARRLEN];
  55     byte[] a1 = new byte[ARRLEN];
  56     byte[] a2 = new byte[ARRLEN];
  57     byte[] a3 = new byte[ARRLEN];
  58     byte[] a4 = new byte[ARRLEN];
  59     short[] p2 = new short[ARRLEN/2];
  60       int[] p4 = new   int[ARRLEN/4];
  61      long[] p8 = new  long[ARRLEN/8];
  62     // Initialize
  63     int gold_sum = 0;
  64     for (int i=0; i<ARRLEN; i++) {
  65       byte val = (byte)(ADD_INIT+i);
  66       gold_sum += val;
  67       a1[i] = val;
  68       a2[i] = (byte)VALUE;
  69       a3[i] = (byte)-VALUE;
  70       a4[i] = (byte)BIT_MASK;
  71     }
  72     System.out.println("Warmup");
  73     for (int i=0; i<ITERS; i++) {
  74       test_sum(a1);
  75       test_addc(a0, a1);
  76       test_addv(a0, a1, (byte)VALUE);
  77       test_adda(a0, a1, a2);
  78       test_subc(a0, a1);
  79       test_subv(a0, a1, (byte)VALUE);
  80       test_suba(a0, a1, a2);
  81 
  82       test_mulc(a0, a1);
  83       test_mulv(a0, a1, (byte)VALUE);
  84       test_mula(a0, a1, a2);
  85       test_divc(a0, a1);
  86       test_divv(a0, a1, (byte)VALUE);
  87       test_diva(a0, a1, a2);
  88       test_mulc_n(a0, a1);
  89       test_mulv(a0, a1, (byte)-VALUE);
  90       test_mula(a0, a1, a3);
  91       test_divc_n(a0, a1);
  92       test_divv(a0, a1, (byte)-VALUE);
  93       test_diva(a0, a1, a3);
  94 
  95       test_andc(a0, a1);
  96       test_andv(a0, a1, (byte)BIT_MASK);
  97       test_anda(a0, a1, a4);
  98       test_orc(a0, a1);
  99       test_orv(a0, a1, (byte)BIT_MASK);
 100       test_ora(a0, a1, a4);
 101       test_xorc(a0, a1);
 102       test_xorv(a0, a1, (byte)BIT_MASK);
 103       test_xora(a0, a1, a4);
 104 
 105       test_sllc(a0, a1);
 106       test_sllv(a0, a1, VALUE);
 107       test_srlc(a0, a1);
 108       test_srlv(a0, a1, VALUE);
 109       test_srac(a0, a1);
 110       test_srav(a0, a1, VALUE);
 111 
 112       test_sllc_n(a0, a1);
 113       test_sllv(a0, a1, -VALUE);
 114       test_srlc_n(a0, a1);
 115       test_srlv(a0, a1, -VALUE);
 116       test_srac_n(a0, a1);
 117       test_srav(a0, a1, -VALUE);
 118 
 119       test_sllc_o(a0, a1);
 120       test_sllv(a0, a1, SHIFT);
 121       test_srlc_o(a0, a1);
 122       test_srlv(a0, a1, SHIFT);
 123       test_srac_o(a0, a1);
 124       test_srav(a0, a1, SHIFT);
 125 
 126       test_sllc_on(a0, a1);
 127       test_sllv(a0, a1, -SHIFT);
 128       test_srlc_on(a0, a1);
 129       test_srlv(a0, a1, -SHIFT);
 130       test_srac_on(a0, a1);
 131       test_srav(a0, a1, -SHIFT);
 132 
 133       test_sllc_add(a0, a1);
 134       test_sllv_add(a0, a1, ADD_INIT);
 135       test_srlc_add(a0, a1);
 136       test_srlv_add(a0, a1, ADD_INIT);
 137       test_srac_add(a0, a1);
 138       test_srav_add(a0, a1, ADD_INIT);
 139 
 140       test_sllc_and(a0, a1);
 141       test_sllv_and(a0, a1, BIT_MASK);
 142       test_srlc_and(a0, a1);
 143       test_srlv_and(a0, a1, BIT_MASK);
 144       test_srac_and(a0, a1);
 145       test_srav_and(a0, a1, BIT_MASK);
 146 
 147       test_pack2(p2, a1);
 148       test_unpack2(a0, p2);
 149       test_pack2_swap(p2, a1);
 150       test_unpack2_swap(a0, p2);
 151       test_pack4(p4, a1);
 152       test_unpack4(a0, p4);
 153       test_pack4_swap(p4, a1);
 154       test_unpack4_swap(a0, p4);
 155       test_pack8(p8, a1);
 156       test_unpack8(a0, p8);
 157       test_pack8_swap(p8, a1);
 158       test_unpack8_swap(a0, p8);
 159     }
 160     // Test and verify results
 161     System.out.println("Verification");
 162     int errn = 0;
 163     {
 164       int sum = test_sum(a1);
 165       if (sum != gold_sum) {
 166         System.err.println("test_sum:  " + sum + " != " + gold_sum);
 167         errn++;
 168       }
 169 
 170       test_addc(a0, a1);
 171       for (int i=0; i<ARRLEN; i++) {
 172         errn += verify("test_addc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 173       }
 174       test_addv(a0, a1, (byte)VALUE);
 175       for (int i=0; i<ARRLEN; i++) {
 176         errn += verify("test_addv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 177       }
 178       test_adda(a0, a1, a2);
 179       for (int i=0; i<ARRLEN; i++) {
 180         errn += verify("test_adda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
 181       }
 182 
 183       test_subc(a0, a1);
 184       for (int i=0; i<ARRLEN; i++) {
 185         errn += verify("test_subc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 186       }
 187       test_subv(a0, a1, (byte)VALUE);
 188       for (int i=0; i<ARRLEN; i++) {
 189         errn += verify("test_subv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 190       }
 191       test_suba(a0, a1, a2);
 192       for (int i=0; i<ARRLEN; i++) {
 193         errn += verify("test_suba: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
 194       }
 195 
 196       test_mulc(a0, a1);
 197       for (int i=0; i<ARRLEN; i++) {
 198         errn += verify("test_mulc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 199       }
 200       test_mulv(a0, a1, (byte)VALUE);
 201       for (int i=0; i<ARRLEN; i++) {
 202         errn += verify("test_mulv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 203       }
 204       test_mula(a0, a1, a2);
 205       for (int i=0; i<ARRLEN; i++) {
 206         errn += verify("test_mula: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
 207       }
 208 
 209       test_divc(a0, a1);
 210       for (int i=0; i<ARRLEN; i++) {
 211         errn += verify("test_divc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 212       }
 213       test_divv(a0, a1, (byte)VALUE);
 214       for (int i=0; i<ARRLEN; i++) {
 215         errn += verify("test_divv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 216       }
 217       test_diva(a0, a1, a2);
 218       for (int i=0; i<ARRLEN; i++) {
 219         errn += verify("test_diva: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
 220       }
 221 
 222       test_mulc_n(a0, a1);
 223       for (int i=0; i<ARRLEN; i++) {
 224         errn += verify("test_mulc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 225       }
 226       test_mulv(a0, a1, (byte)-VALUE);
 227       for (int i=0; i<ARRLEN; i++) {
 228         errn += verify("test_mulv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 229       }
 230       test_mula(a0, a1, a3);
 231       for (int i=0; i<ARRLEN; i++) {
 232         errn += verify("test_mula_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
 233       }
 234 
 235       test_divc_n(a0, a1);
 236       for (int i=0; i<ARRLEN; i++) {
 237         errn += verify("test_divc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 238       }
 239       test_divv(a0, a1, (byte)-VALUE);
 240       for (int i=0; i<ARRLEN; i++) {
 241         errn += verify("test_divv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 242       }
 243       test_diva(a0, a1, a3);
 244       for (int i=0; i<ARRLEN; i++) {
 245         errn += verify("test_diva_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
 246       }
 247 
 248       test_andc(a0, a1);
 249       for (int i=0; i<ARRLEN; i++) {
 250         errn += verify("test_andc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 251       }
 252       test_andv(a0, a1, (byte)BIT_MASK);
 253       for (int i=0; i<ARRLEN; i++) {
 254         errn += verify("test_andv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 255       }
 256       test_anda(a0, a1, a4);
 257       for (int i=0; i<ARRLEN; i++) {
 258         errn += verify("test_anda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
 259       }
 260 
 261       test_orc(a0, a1);
 262       for (int i=0; i<ARRLEN; i++) {
 263         errn += verify("test_orc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 264       }
 265       test_orv(a0, a1, (byte)BIT_MASK);
 266       for (int i=0; i<ARRLEN; i++) {
 267         errn += verify("test_orv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 268       }
 269       test_ora(a0, a1, a4);
 270       for (int i=0; i<ARRLEN; i++) {
 271         errn += verify("test_ora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
 272       }
 273 
 274       test_xorc(a0, a1);
 275       for (int i=0; i<ARRLEN; i++) {
 276         errn += verify("test_xorc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 277       }
 278       test_xorv(a0, a1, (byte)BIT_MASK);
 279       for (int i=0; i<ARRLEN; i++) {
 280         errn += verify("test_xorv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 281       }
 282       test_xora(a0, a1, a4);
 283       for (int i=0; i<ARRLEN; i++) {
 284         errn += verify("test_xora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
 285       }
 286 
 287       test_sllc(a0, a1);
 288       for (int i=0; i<ARRLEN; i++) {
 289         errn += verify("test_sllc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
 290       }
 291       test_sllv(a0, a1, VALUE);
 292       for (int i=0; i<ARRLEN; i++) {
 293         errn += verify("test_sllv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
 294       }
 295 
 296       test_srlc(a0, a1);
 297       for (int i=0; i<ARRLEN; i++) {
 298         errn += verify("test_srlc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
 299       }
 300       test_srlv(a0, a1, VALUE);
 301       for (int i=0; i<ARRLEN; i++) {
 302         errn += verify("test_srlv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
 303       }
 304 
 305       test_srac(a0, a1);
 306       for (int i=0; i<ARRLEN; i++) {
 307         errn += verify("test_srac: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
 308       }
 309       test_srav(a0, a1, VALUE);
 310       for (int i=0; i<ARRLEN; i++) {
 311         errn += verify("test_srav: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
 312       }
 313 
 314       test_sllc_n(a0, a1);
 315       for (int i=0; i<ARRLEN; i++) {
 316         errn += verify("test_sllc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
 317       }
 318       test_sllv(a0, a1, -VALUE);
 319       for (int i=0; i<ARRLEN; i++) {
 320         errn += verify("test_sllv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
 321       }
 322 
 323       test_srlc_n(a0, a1);
 324       for (int i=0; i<ARRLEN; i++) {
 325         errn += verify("test_srlc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
 326       }
 327       test_srlv(a0, a1, -VALUE);
 328       for (int i=0; i<ARRLEN; i++) {
 329         errn += verify("test_srlv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
 330       }
 331 
 332       test_srac_n(a0, a1);
 333       for (int i=0; i<ARRLEN; i++) {
 334         errn += verify("test_srac_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
 335       }
 336       test_srav(a0, a1, -VALUE);
 337       for (int i=0; i<ARRLEN; i++) {
 338         errn += verify("test_srav_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
 339       }
 340 
 341       test_sllc_o(a0, a1);
 342       for (int i=0; i<ARRLEN; i++) {
 343         errn += verify("test_sllc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
 344       }
 345       test_sllv(a0, a1, SHIFT);
 346       for (int i=0; i<ARRLEN; i++) {
 347         errn += verify("test_sllv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
 348       }
 349 
 350       test_srlc_o(a0, a1);
 351       for (int i=0; i<ARRLEN; i++) {
 352         errn += verify("test_srlc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
 353       }
 354       test_srlv(a0, a1, SHIFT);
 355       for (int i=0; i<ARRLEN; i++) {
 356         errn += verify("test_srlv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
 357       }
 358 
 359       test_srac_o(a0, a1);
 360       for (int i=0; i<ARRLEN; i++) {
 361         errn += verify("test_srac_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
 362       }
 363       test_srav(a0, a1, SHIFT);
 364       for (int i=0; i<ARRLEN; i++) {
 365         errn += verify("test_srav_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
 366       }
 367 
 368       test_sllc_on(a0, a1);
 369       for (int i=0; i<ARRLEN; i++) {
 370         errn += verify("test_sllc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
 371       }
 372       test_sllv(a0, a1, -SHIFT);
 373       for (int i=0; i<ARRLEN; i++) {
 374         errn += verify("test_sllv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
 375       }
 376 
 377       test_srlc_on(a0, a1);
 378       for (int i=0; i<ARRLEN; i++) {
 379         errn += verify("test_srlc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
 380       }
 381       test_srlv(a0, a1, -SHIFT);
 382       for (int i=0; i<ARRLEN; i++) {
 383         errn += verify("test_srlv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
 384       }
 385 
 386       test_srac_on(a0, a1);
 387       for (int i=0; i<ARRLEN; i++) {
 388         errn += verify("test_srac_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
 389       }
 390       test_srav(a0, a1, -SHIFT);
 391       for (int i=0; i<ARRLEN; i++) {
 392         errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
 393       }
 394 
 395       test_sllc_add(a0, a1);
 396       for (int i=0; i<ARRLEN; i++) {
 397         errn += verify("test_sllc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
 398       }
 399       test_sllv_add(a0, a1, ADD_INIT);
 400       for (int i=0; i<ARRLEN; i++) {
 401         errn += verify("test_sllv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
 402       }
 403 
 404       test_srlc_add(a0, a1);
 405       for (int i=0; i<ARRLEN; i++) {
 406         errn += verify("test_srlc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
 407       }
 408       test_srlv_add(a0, a1, ADD_INIT);
 409       for (int i=0; i<ARRLEN; i++) {
 410         errn += verify("test_srlv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
 411       }
 412 
 413       test_srac_add(a0, a1);
 414       for (int i=0; i<ARRLEN; i++) {
 415         errn += verify("test_srac_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
 416       }
 417       test_srav_add(a0, a1, ADD_INIT);
 418       for (int i=0; i<ARRLEN; i++) {
 419         errn += verify("test_srav_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
 420       }
 421 
 422       test_sllc_and(a0, a1);
 423       for (int i=0; i<ARRLEN; i++) {
 424         errn += verify("test_sllc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
 425       }
 426       test_sllv_and(a0, a1, BIT_MASK);
 427       for (int i=0; i<ARRLEN; i++) {
 428         errn += verify("test_sllv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
 429       }
 430 
 431       test_srlc_and(a0, a1);
 432       for (int i=0; i<ARRLEN; i++) {
 433         errn += verify("test_srlc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
 434       }
 435       test_srlv_and(a0, a1, BIT_MASK);
 436       for (int i=0; i<ARRLEN; i++) {
 437         errn += verify("test_srlv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
 438       }
 439 
 440       test_srac_and(a0, a1);
 441       for (int i=0; i<ARRLEN; i++) {
 442         errn += verify("test_srac_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
 443       }
 444       test_srav_and(a0, a1, BIT_MASK);
 445       for (int i=0; i<ARRLEN; i++) {
 446         errn += verify("test_srav_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
 447       }
 448 
 449       test_pack2(p2, a1);
 450       for (int i=0; i<ARRLEN/2; i++) {
 451         errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
 452       }
 453       for (int i=0; i<ARRLEN; i++) {
 454         a0[i] = -1;
 455       }
 456       test_unpack2(a0, p2);
 457       for (int i=0; i<(ARRLEN&(-2)); i++) {
 458         errn += verify("test_unpack2: ", i, a0[i], (byte)(ADD_INIT+i));
 459       }
 460 
 461       test_pack2_swap(p2, a1);
 462       for (int i=0; i<ARRLEN/2; i++) {
 463         errn += verify("test_pack2_swap: ", i, p2[i], (short)(((short)(ADD_INIT+2*i+1) & 0xFF) | ((short)(ADD_INIT+2*i) << 8)));
 464       }
 465       for (int i=0; i<ARRLEN; i++) {
 466         a0[i] = -1;
 467       }
 468       test_unpack2_swap(a0, p2);
 469       for (int i=0; i<(ARRLEN&(-2)); i++) {
 470         errn += verify("test_unpack2_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 471       }
 472 
 473       test_pack4(p4, a1);
 474       for (int i=0; i<ARRLEN/4; i++) {
 475         errn += verify("test_pack4: ", i, p4[i],  ((int)(ADD_INIT+4*i+0) & 0xFF) |
 476                                                  (((int)(ADD_INIT+4*i+1) & 0xFF) <<  8)  |
 477                                                  (((int)(ADD_INIT+4*i+2) & 0xFF) << 16)  |
 478                                                  (((int)(ADD_INIT+4*i+3) & 0xFF) << 24));
 479       }
 480       for (int i=0; i<ARRLEN; i++) {
 481         a0[i] = -1;
 482       }
 483       test_unpack4(a0, p4);
 484       for (int i=0; i<(ARRLEN&(-4)); i++) {
 485         errn += verify("test_unpack4: ", i, a0[i], (byte)(ADD_INIT+i));
 486       }
 487 
 488       test_pack4_swap(p4, a1);
 489       for (int i=0; i<ARRLEN/4; i++) {
 490         errn += verify("test_pack4_swap: ", i, p4[i],  ((int)(ADD_INIT+4*i+3) & 0xFF) |
 491                                                       (((int)(ADD_INIT+4*i+2) & 0xFF) <<  8)  |
 492                                                       (((int)(ADD_INIT+4*i+1) & 0xFF) << 16)  |
 493                                                       (((int)(ADD_INIT+4*i+0) & 0xFF) << 24));
 494       }
 495       for (int i=0; i<ARRLEN; i++) {
 496         a0[i] = -1;
 497       }
 498       test_unpack4_swap(a0, p4);
 499       for (int i=0; i<(ARRLEN&(-4)); i++) {
 500         errn += verify("test_unpack4_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 501       }
 502 
 503       test_pack8(p8, a1);
 504       for (int i=0; i<ARRLEN/8; i++) {
 505         errn += verify("test_pack8: ", i, p8[i],  ((long)(ADD_INIT+8*i+0) & 0xFFl) |
 506                                                  (((long)(ADD_INIT+8*i+1) & 0xFFl) <<  8)  |
 507                                                  (((long)(ADD_INIT+8*i+2) & 0xFFl) << 16)  |
 508                                                  (((long)(ADD_INIT+8*i+3) & 0xFFl) << 24)  |
 509                                                  (((long)(ADD_INIT+8*i+4) & 0xFFl) << 32)  |
 510                                                  (((long)(ADD_INIT+8*i+5) & 0xFFl) << 40)  |
 511                                                  (((long)(ADD_INIT+8*i+6) & 0xFFl) << 48)  |
 512                                                  (((long)(ADD_INIT+8*i+7) & 0xFFl) << 56));
 513       }
 514       for (int i=0; i<ARRLEN; i++) {
 515         a0[i] = -1;
 516       }
 517       test_unpack8(a0, p8);
 518       for (int i=0; i<(ARRLEN&(-8)); i++) {
 519         errn += verify("test_unpack8: ", i, a0[i], (byte)(ADD_INIT+i));
 520       }
 521 
 522       test_pack8_swap(p8, a1);
 523       for (int i=0; i<ARRLEN/8; i++) {
 524         errn += verify("test_pack8_swap: ", i, p8[i],  ((long)(ADD_INIT+8*i+7) & 0xFFl) |
 525                                                       (((long)(ADD_INIT+8*i+6) & 0xFFl) <<  8)  |
 526                                                       (((long)(ADD_INIT+8*i+5) & 0xFFl) << 16)  |
 527                                                       (((long)(ADD_INIT+8*i+4) & 0xFFl) << 24)  |
 528                                                       (((long)(ADD_INIT+8*i+3) & 0xFFl) << 32)  |
 529                                                       (((long)(ADD_INIT+8*i+2) & 0xFFl) << 40)  |
 530                                                       (((long)(ADD_INIT+8*i+1) & 0xFFl) << 48)  |
 531                                                       (((long)(ADD_INIT+8*i+0) & 0xFFl) << 56));
 532       }
 533       for (int i=0; i<ARRLEN; i++) {
 534         a0[i] = -1;
 535       }
 536       test_unpack8_swap(a0, p8);
 537       for (int i=0; i<(ARRLEN&(-8)); i++) {
 538         errn += verify("test_unpack8_swap: ", i, a0[i], (byte)(ADD_INIT+i));
 539       }
 540 
 541     }
 542 
 543     if (errn > 0)
 544       return errn;
 545 
 546     System.out.println("Time");
 547     long start, end;
 548 
 549     start = System.currentTimeMillis();
 550     for (int i=0; i<ITERS; i++) {
 551       test_sum(a1);
 552     }
 553     end = System.currentTimeMillis();
 554     System.out.println("test_sum: " + (end - start));
 555 
 556     start = System.currentTimeMillis();
 557     for (int i=0; i<ITERS; i++) {
 558       test_addc(a0, a1);
 559     }
 560     end = System.currentTimeMillis();
 561     System.out.println("test_addc: " + (end - start));
 562     start = System.currentTimeMillis();
 563     for (int i=0; i<ITERS; i++) {
 564       test_addv(a0, a1, (byte)VALUE);
 565     }
 566     end = System.currentTimeMillis();
 567     System.out.println("test_addv: " + (end - start));
 568     start = System.currentTimeMillis();
 569     for (int i=0; i<ITERS; i++) {
 570       test_adda(a0, a1, a2);
 571     }
 572     end = System.currentTimeMillis();
 573     System.out.println("test_adda: " + (end - start));
 574 
 575     start = System.currentTimeMillis();
 576     for (int i=0; i<ITERS; i++) {
 577       test_subc(a0, a1);
 578     }
 579     end = System.currentTimeMillis();
 580     System.out.println("test_subc: " + (end - start));
 581     start = System.currentTimeMillis();
 582     for (int i=0; i<ITERS; i++) {
 583       test_subv(a0, a1, (byte)VALUE);
 584     }
 585     end = System.currentTimeMillis();
 586     System.out.println("test_subv: " + (end - start));
 587     start = System.currentTimeMillis();
 588     for (int i=0; i<ITERS; i++) {
 589       test_suba(a0, a1, a2);
 590     }
 591     end = System.currentTimeMillis();
 592     System.out.println("test_suba: " + (end - start));
 593 
 594     start = System.currentTimeMillis();
 595     for (int i=0; i<ITERS; i++) {
 596       test_mulc(a0, a1);
 597     }
 598     end = System.currentTimeMillis();
 599     System.out.println("test_mulc: " + (end - start));
 600     start = System.currentTimeMillis();
 601     for (int i=0; i<ITERS; i++) {
 602       test_mulv(a0, a1, (byte)VALUE);
 603     }
 604     end = System.currentTimeMillis();
 605     System.out.println("test_mulv: " + (end - start));
 606     start = System.currentTimeMillis();
 607     for (int i=0; i<ITERS; i++) {
 608       test_mula(a0, a1, a2);
 609     }
 610     end = System.currentTimeMillis();
 611     System.out.println("test_mula: " + (end - start));
 612 
 613     start = System.currentTimeMillis();
 614     for (int i=0; i<ITERS; i++) {
 615       test_divc(a0, a1);
 616     }
 617     end = System.currentTimeMillis();
 618     System.out.println("test_divc: " + (end - start));
 619     start = System.currentTimeMillis();
 620     for (int i=0; i<ITERS; i++) {
 621       test_divv(a0, a1, (byte)VALUE);
 622     }
 623     end = System.currentTimeMillis();
 624     System.out.println("test_divv: " + (end - start));
 625     start = System.currentTimeMillis();
 626     for (int i=0; i<ITERS; i++) {
 627       test_diva(a0, a1, a2);
 628     }
 629     end = System.currentTimeMillis();
 630     System.out.println("test_diva: " + (end - start));
 631 
 632     start = System.currentTimeMillis();
 633     for (int i=0; i<ITERS; i++) {
 634       test_mulc_n(a0, a1);
 635     }
 636     end = System.currentTimeMillis();
 637     System.out.println("test_mulc_n: " + (end - start));
 638     start = System.currentTimeMillis();
 639     for (int i=0; i<ITERS; i++) {
 640       test_mulv(a0, a1, (byte)-VALUE);
 641     }
 642     end = System.currentTimeMillis();
 643     System.out.println("test_mulv_n: " + (end - start));
 644     start = System.currentTimeMillis();
 645     for (int i=0; i<ITERS; i++) {
 646       test_mula(a0, a1, a3);
 647     }
 648     end = System.currentTimeMillis();
 649     System.out.println("test_mula_n: " + (end - start));
 650 
 651     start = System.currentTimeMillis();
 652     for (int i=0; i<ITERS; i++) {
 653       test_divc_n(a0, a1);
 654     }
 655     end = System.currentTimeMillis();
 656     System.out.println("test_divc_n: " + (end - start));
 657     start = System.currentTimeMillis();
 658     for (int i=0; i<ITERS; i++) {
 659       test_divv(a0, a1, (byte)-VALUE);
 660     }
 661     end = System.currentTimeMillis();
 662     System.out.println("test_divv_n: " + (end - start));
 663     start = System.currentTimeMillis();
 664     for (int i=0; i<ITERS; i++) {
 665       test_diva(a0, a1, a3);
 666     }
 667     end = System.currentTimeMillis();
 668     System.out.println("test_diva_n: " + (end - start));
 669 
 670     start = System.currentTimeMillis();
 671     for (int i=0; i<ITERS; i++) {
 672       test_andc(a0, a1);
 673     }
 674     end = System.currentTimeMillis();
 675     System.out.println("test_andc: " + (end - start));
 676     start = System.currentTimeMillis();
 677     for (int i=0; i<ITERS; i++) {
 678       test_andv(a0, a1, (byte)BIT_MASK);
 679     }
 680     end = System.currentTimeMillis();
 681     System.out.println("test_andv: " + (end - start));
 682     start = System.currentTimeMillis();
 683     for (int i=0; i<ITERS; i++) {
 684       test_anda(a0, a1, a4);
 685     }
 686     end = System.currentTimeMillis();
 687     System.out.println("test_anda: " + (end - start));
 688 
 689     start = System.currentTimeMillis();
 690     for (int i=0; i<ITERS; i++) {
 691       test_orc(a0, a1);
 692     }
 693     end = System.currentTimeMillis();
 694     System.out.println("test_orc: " + (end - start));
 695     start = System.currentTimeMillis();
 696     for (int i=0; i<ITERS; i++) {
 697       test_orv(a0, a1, (byte)BIT_MASK);
 698     }
 699     end = System.currentTimeMillis();
 700     System.out.println("test_orv: " + (end - start));
 701     start = System.currentTimeMillis();
 702     for (int i=0; i<ITERS; i++) {
 703       test_ora(a0, a1, a4);
 704     }
 705     end = System.currentTimeMillis();
 706     System.out.println("test_ora: " + (end - start));
 707 
 708     start = System.currentTimeMillis();
 709     for (int i=0; i<ITERS; i++) {
 710       test_xorc(a0, a1);
 711     }
 712     end = System.currentTimeMillis();
 713     System.out.println("test_xorc: " + (end - start));
 714     start = System.currentTimeMillis();
 715     for (int i=0; i<ITERS; i++) {
 716       test_xorv(a0, a1, (byte)BIT_MASK);
 717     }
 718     end = System.currentTimeMillis();
 719     System.out.println("test_xorv: " + (end - start));
 720     start = System.currentTimeMillis();
 721     for (int i=0; i<ITERS; i++) {
 722       test_xora(a0, a1, a4);
 723     }
 724     end = System.currentTimeMillis();
 725     System.out.println("test_xora: " + (end - start));
 726 
 727     start = System.currentTimeMillis();
 728     for (int i=0; i<ITERS; i++) {
 729       test_sllc(a0, a1);
 730     }
 731     end = System.currentTimeMillis();
 732     System.out.println("test_sllc: " + (end - start));
 733     start = System.currentTimeMillis();
 734     for (int i=0; i<ITERS; i++) {
 735       test_sllv(a0, a1, VALUE);
 736     }
 737     end = System.currentTimeMillis();
 738     System.out.println("test_sllv: " + (end - start));
 739 
 740     start = System.currentTimeMillis();
 741     for (int i=0; i<ITERS; i++) {
 742       test_srlc(a0, a1);
 743     }
 744     end = System.currentTimeMillis();
 745     System.out.println("test_srlc: " + (end - start));
 746     start = System.currentTimeMillis();
 747     for (int i=0; i<ITERS; i++) {
 748       test_srlv(a0, a1, VALUE);
 749     }
 750     end = System.currentTimeMillis();
 751     System.out.println("test_srlv: " + (end - start));
 752 
 753     start = System.currentTimeMillis();
 754     for (int i=0; i<ITERS; i++) {
 755       test_srac(a0, a1);
 756     }
 757     end = System.currentTimeMillis();
 758     System.out.println("test_srac: " + (end - start));
 759     start = System.currentTimeMillis();
 760     for (int i=0; i<ITERS; i++) {
 761       test_srav(a0, a1, VALUE);
 762     }
 763     end = System.currentTimeMillis();
 764     System.out.println("test_srav: " + (end - start));
 765 
 766     start = System.currentTimeMillis();
 767     for (int i=0; i<ITERS; i++) {
 768       test_sllc_n(a0, a1);
 769     }
 770     end = System.currentTimeMillis();
 771     System.out.println("test_sllc_n: " + (end - start));
 772     start = System.currentTimeMillis();
 773     for (int i=0; i<ITERS; i++) {
 774       test_sllv(a0, a1, -VALUE);
 775     }
 776     end = System.currentTimeMillis();
 777     System.out.println("test_sllv_n: " + (end - start));
 778 
 779     start = System.currentTimeMillis();
 780     for (int i=0; i<ITERS; i++) {
 781       test_srlc_n(a0, a1);
 782     }
 783     end = System.currentTimeMillis();
 784     System.out.println("test_srlc_n: " + (end - start));
 785     start = System.currentTimeMillis();
 786     for (int i=0; i<ITERS; i++) {
 787       test_srlv(a0, a1, -VALUE);
 788     }
 789     end = System.currentTimeMillis();
 790     System.out.println("test_srlv_n: " + (end - start));
 791 
 792     start = System.currentTimeMillis();
 793     for (int i=0; i<ITERS; i++) {
 794       test_srac_n(a0, a1);
 795     }
 796     end = System.currentTimeMillis();
 797     System.out.println("test_srac_n: " + (end - start));
 798     start = System.currentTimeMillis();
 799     for (int i=0; i<ITERS; i++) {
 800       test_srav(a0, a1, -VALUE);
 801     }
 802     end = System.currentTimeMillis();
 803     System.out.println("test_srav_n: " + (end - start));
 804 
 805     start = System.currentTimeMillis();
 806     for (int i=0; i<ITERS; i++) {
 807       test_sllc_o(a0, a1);
 808     }
 809     end = System.currentTimeMillis();
 810     System.out.println("test_sllc_o: " + (end - start));
 811     start = System.currentTimeMillis();
 812     for (int i=0; i<ITERS; i++) {
 813       test_sllv(a0, a1, SHIFT);
 814     }
 815     end = System.currentTimeMillis();
 816     System.out.println("test_sllv_o: " + (end - start));
 817 
 818     start = System.currentTimeMillis();
 819     for (int i=0; i<ITERS; i++) {
 820       test_srlc_o(a0, a1);
 821     }
 822     end = System.currentTimeMillis();
 823     System.out.println("test_srlc_o: " + (end - start));
 824     start = System.currentTimeMillis();
 825     for (int i=0; i<ITERS; i++) {
 826       test_srlv(a0, a1, SHIFT);
 827     }
 828     end = System.currentTimeMillis();
 829     System.out.println("test_srlv_o: " + (end - start));
 830 
 831     start = System.currentTimeMillis();
 832     for (int i=0; i<ITERS; i++) {
 833       test_srac_o(a0, a1);
 834     }
 835     end = System.currentTimeMillis();
 836     System.out.println("test_srac_o: " + (end - start));
 837     start = System.currentTimeMillis();
 838     for (int i=0; i<ITERS; i++) {
 839       test_srav(a0, a1, SHIFT);
 840     }
 841     end = System.currentTimeMillis();
 842     System.out.println("test_srav_o: " + (end - start));
 843 
 844     start = System.currentTimeMillis();
 845     for (int i=0; i<ITERS; i++) {
 846       test_sllc_on(a0, a1);
 847     }
 848     end = System.currentTimeMillis();
 849     System.out.println("test_sllc_on: " + (end - start));
 850     start = System.currentTimeMillis();
 851     for (int i=0; i<ITERS; i++) {
 852       test_sllv(a0, a1, -SHIFT);
 853     }
 854     end = System.currentTimeMillis();
 855     System.out.println("test_sllv_on: " + (end - start));
 856 
 857     start = System.currentTimeMillis();
 858     for (int i=0; i<ITERS; i++) {
 859       test_srlc_on(a0, a1);
 860     }
 861     end = System.currentTimeMillis();
 862     System.out.println("test_srlc_on: " + (end - start));
 863     start = System.currentTimeMillis();
 864     for (int i=0; i<ITERS; i++) {
 865       test_srlv(a0, a1, -SHIFT);
 866     }
 867     end = System.currentTimeMillis();
 868     System.out.println("test_srlv_on: " + (end - start));
 869 
 870     start = System.currentTimeMillis();
 871     for (int i=0; i<ITERS; i++) {
 872       test_srac_on(a0, a1);
 873     }
 874     end = System.currentTimeMillis();
 875     System.out.println("test_srac_on: " + (end - start));
 876     start = System.currentTimeMillis();
 877     for (int i=0; i<ITERS; i++) {
 878       test_srav(a0, a1, -SHIFT);
 879     }
 880     end = System.currentTimeMillis();
 881     System.out.println("test_srav_on: " + (end - start));
 882 
 883     start = System.currentTimeMillis();
 884     for (int i=0; i<ITERS; i++) {
 885       test_sllc_add(a0, a1);
 886     }
 887     end = System.currentTimeMillis();
 888     System.out.println("test_sllc_add: " + (end - start));
 889     start = System.currentTimeMillis();
 890     for (int i=0; i<ITERS; i++) {
 891       test_sllv_add(a0, a1, ADD_INIT);
 892     }
 893     end = System.currentTimeMillis();
 894     System.out.println("test_sllv_add: " + (end - start));
 895 
 896     start = System.currentTimeMillis();
 897     for (int i=0; i<ITERS; i++) {
 898       test_srlc_add(a0, a1);
 899     }
 900     end = System.currentTimeMillis();
 901     System.out.println("test_srlc_add: " + (end - start));
 902     start = System.currentTimeMillis();
 903     for (int i=0; i<ITERS; i++) {
 904       test_srlv_add(a0, a1, ADD_INIT);
 905     }
 906     end = System.currentTimeMillis();
 907     System.out.println("test_srlv_add: " + (end - start));
 908 
 909     start = System.currentTimeMillis();
 910     for (int i=0; i<ITERS; i++) {
 911       test_srac_add(a0, a1);
 912     }
 913     end = System.currentTimeMillis();
 914     System.out.println("test_srac_add: " + (end - start));
 915     start = System.currentTimeMillis();
 916     for (int i=0; i<ITERS; i++) {
 917       test_srav_add(a0, a1, ADD_INIT);
 918     }
 919     end = System.currentTimeMillis();
 920     System.out.println("test_srav_add: " + (end - start));
 921 
 922     start = System.currentTimeMillis();
 923     for (int i=0; i<ITERS; i++) {
 924       test_sllc_and(a0, a1);
 925     }
 926     end = System.currentTimeMillis();
 927     System.out.println("test_sllc_and: " + (end - start));
 928     start = System.currentTimeMillis();
 929     for (int i=0; i<ITERS; i++) {
 930       test_sllv_and(a0, a1, BIT_MASK);
 931     }
 932     end = System.currentTimeMillis();
 933     System.out.println("test_sllv_and: " + (end - start));
 934 
 935     start = System.currentTimeMillis();
 936     for (int i=0; i<ITERS; i++) {
 937       test_srlc_and(a0, a1);
 938     }
 939     end = System.currentTimeMillis();
 940     System.out.println("test_srlc_and: " + (end - start));
 941     start = System.currentTimeMillis();
 942     for (int i=0; i<ITERS; i++) {
 943       test_srlv_and(a0, a1, BIT_MASK);
 944     }
 945     end = System.currentTimeMillis();
 946     System.out.println("test_srlv_and: " + (end - start));
 947 
 948     start = System.currentTimeMillis();
 949     for (int i=0; i<ITERS; i++) {
 950       test_srac_and(a0, a1);
 951     }
 952     end = System.currentTimeMillis();
 953     System.out.println("test_srac_and: " + (end - start));
 954     start = System.currentTimeMillis();
 955     for (int i=0; i<ITERS; i++) {
 956       test_srav_and(a0, a1, BIT_MASK);
 957     }
 958     end = System.currentTimeMillis();
 959     System.out.println("test_srav_and: " + (end - start));
 960 
 961     start = System.currentTimeMillis();
 962     for (int i=0; i<ITERS; i++) {
 963       test_pack2(p2, a1);
 964     }
 965     end = System.currentTimeMillis();
 966     System.out.println("test_pack2: " + (end - start));
 967     start = System.currentTimeMillis();
 968     for (int i=0; i<ITERS; i++) {
 969       test_unpack2(a0, p2);
 970     }
 971     end = System.currentTimeMillis();
 972     System.out.println("test_unpack2: " + (end - start));
 973     start = System.currentTimeMillis();
 974     for (int i=0; i<ITERS; i++) {
 975       test_pack2_swap(p2, a1);
 976     }
 977     end = System.currentTimeMillis();
 978     System.out.println("test_pack2_swap: " + (end - start));
 979     start = System.currentTimeMillis();
 980     for (int i=0; i<ITERS; i++) {
 981       test_unpack2_swap(a0, p2);
 982     }
 983     end = System.currentTimeMillis();
 984     System.out.println("test_unpack2_swap: " + (end - start));
 985 
 986     start = System.currentTimeMillis();
 987     for (int i=0; i<ITERS; i++) {
 988       test_pack4(p4, a1);
 989     }
 990     end = System.currentTimeMillis();
 991     System.out.println("test_pack4: " + (end - start));
 992     start = System.currentTimeMillis();
 993     for (int i=0; i<ITERS; i++) {
 994       test_unpack4(a0, p4);
 995     }
 996     end = System.currentTimeMillis();
 997     System.out.println("test_unpack4: " + (end - start));
 998     start = System.currentTimeMillis();
 999     for (int i=0; i<ITERS; i++) {
1000       test_pack4_swap(p4, a1);
1001     }
1002     end = System.currentTimeMillis();
1003     System.out.println("test_pack4_swap: " + (end - start));
1004     start = System.currentTimeMillis();
1005     for (int i=0; i<ITERS; i++) {
1006       test_unpack4_swap(a0, p4);
1007     }
1008     end = System.currentTimeMillis();
1009     System.out.println("test_unpack4_swap: " + (end - start));
1010 
1011     start = System.currentTimeMillis();
1012     for (int i=0; i<ITERS; i++) {
1013       test_pack8(p8, a1);
1014     }
1015     end = System.currentTimeMillis();
1016     System.out.println("test_pack8: " + (end - start));
1017     start = System.currentTimeMillis();
1018     for (int i=0; i<ITERS; i++) {
1019       test_unpack8(a0, p8);
1020     }
1021     end = System.currentTimeMillis();
1022     System.out.println("test_unpack8: " + (end - start));
1023     start = System.currentTimeMillis();
1024     for (int i=0; i<ITERS; i++) {
1025       test_pack8_swap(p8, a1);
1026     }
1027     end = System.currentTimeMillis();
1028     System.out.println("test_pack8_swap: " + (end - start));
1029     start = System.currentTimeMillis();
1030     for (int i=0; i<ITERS; i++) {
1031       test_unpack8_swap(a0, p8);
1032     }
1033     end = System.currentTimeMillis();
1034     System.out.println("test_unpack8_swap: " + (end - start));
1035 
1036     return errn;
1037   }
1038 
1039   static int test_sum(byte[] a1) {
1040     int sum = 0;
1041     for (int i = 0; i < a1.length; i+=1) {
1042       sum += a1[i];
1043     }
1044     return sum;
1045   }
1046 
1047   static void test_addc(byte[] a0, byte[] a1) {
1048     for (int i = 0; i < a0.length; i+=1) {
1049       a0[i] = (byte)(a1[i]+VALUE);
1050     }
1051   }
1052   static void test_addv(byte[] a0, byte[] a1, byte b) {
1053     for (int i = 0; i < a0.length; i+=1) {
1054       a0[i] = (byte)(a1[i]+b);
1055     }
1056   }
1057   static void test_adda(byte[] a0, byte[] a1, byte[] a2) {
1058     for (int i = 0; i < a0.length; i+=1) {
1059       a0[i] = (byte)(a1[i]+a2[i]);
1060     }
1061   }
1062 
1063   static void test_subc(byte[] a0, byte[] a1) {
1064     for (int i = 0; i < a0.length; i+=1) {
1065       a0[i] = (byte)(a1[i]-VALUE);
1066     }
1067   }
1068   static void test_subv(byte[] a0, byte[] a1, byte b) {
1069     for (int i = 0; i < a0.length; i+=1) {
1070       a0[i] = (byte)(a1[i]-b);
1071     }
1072   }
1073   static void test_suba(byte[] a0, byte[] a1, byte[] a2) {
1074     for (int i = 0; i < a0.length; i+=1) {
1075       a0[i] = (byte)(a1[i]-a2[i]);
1076     }
1077   }
1078 
1079   static void test_mulc(byte[] a0, byte[] a1) {
1080     for (int i = 0; i < a0.length; i+=1) {
1081       a0[i] = (byte)(a1[i]*VALUE);
1082     }
1083   }
1084   static void test_mulc_n(byte[] a0, byte[] a1) {
1085     for (int i = 0; i < a0.length; i+=1) {
1086       a0[i] = (byte)(a1[i]*(-VALUE));
1087     }
1088   }
1089   static void test_mulv(byte[] a0, byte[] a1, byte b) {
1090     for (int i = 0; i < a0.length; i+=1) {
1091       a0[i] = (byte)(a1[i]*b);
1092     }
1093   }
1094   static void test_mula(byte[] a0, byte[] a1, byte[] a2) {
1095     for (int i = 0; i < a0.length; i+=1) {
1096       a0[i] = (byte)(a1[i]*a2[i]);
1097     }
1098   }
1099 
1100   static void test_divc(byte[] a0, byte[] a1) {
1101     for (int i = 0; i < a0.length; i+=1) {
1102       a0[i] = (byte)(a1[i]/VALUE);
1103     }
1104   }
1105   static void test_divc_n(byte[] a0, byte[] a1) {
1106     for (int i = 0; i < a0.length; i+=1) {
1107       a0[i] = (byte)(a1[i]/(-VALUE));
1108     }
1109   }
1110   static void test_divv(byte[] a0, byte[] a1, byte b) {
1111     for (int i = 0; i < a0.length; i+=1) {
1112       a0[i] = (byte)(a1[i]/b);
1113     }
1114   }
1115   static void test_diva(byte[] a0, byte[] a1, byte[] a2) {
1116     for (int i = 0; i < a0.length; i+=1) {
1117       a0[i] = (byte)(a1[i]/a2[i]);
1118     }
1119   }
1120 
1121   static void test_andc(byte[] a0, byte[] a1) {
1122     for (int i = 0; i < a0.length; i+=1) {
1123       a0[i] = (byte)(a1[i]&BIT_MASK);
1124     }
1125   }
1126   static void test_andv(byte[] a0, byte[] a1, byte b) {
1127     for (int i = 0; i < a0.length; i+=1) {
1128       a0[i] = (byte)(a1[i]&b);
1129     }
1130   }
1131   static void test_anda(byte[] a0, byte[] a1, byte[] a2) {
1132     for (int i = 0; i < a0.length; i+=1) {
1133       a0[i] = (byte)(a1[i]&a2[i]);
1134     }
1135   }
1136 
1137   static void test_orc(byte[] a0, byte[] a1) {
1138     for (int i = 0; i < a0.length; i+=1) {
1139       a0[i] = (byte)(a1[i]|BIT_MASK);
1140     }
1141   }
1142   static void test_orv(byte[] a0, byte[] a1, byte b) {
1143     for (int i = 0; i < a0.length; i+=1) {
1144       a0[i] = (byte)(a1[i]|b);
1145     }
1146   }
1147   static void test_ora(byte[] a0, byte[] a1, byte[] a2) {
1148     for (int i = 0; i < a0.length; i+=1) {
1149       a0[i] = (byte)(a1[i]|a2[i]);
1150     }
1151   }
1152 
1153   static void test_xorc(byte[] a0, byte[] a1) {
1154     for (int i = 0; i < a0.length; i+=1) {
1155       a0[i] = (byte)(a1[i]^BIT_MASK);
1156     }
1157   }
1158   static void test_xorv(byte[] a0, byte[] a1, byte b) {
1159     for (int i = 0; i < a0.length; i+=1) {
1160       a0[i] = (byte)(a1[i]^b);
1161     }
1162   }
1163   static void test_xora(byte[] a0, byte[] a1, byte[] a2) {
1164     for (int i = 0; i < a0.length; i+=1) {
1165       a0[i] = (byte)(a1[i]^a2[i]);
1166     }
1167   }
1168 
1169   static void test_sllc(byte[] a0, byte[] a1) {
1170     for (int i = 0; i < a0.length; i+=1) {
1171       a0[i] = (byte)(a1[i]<<VALUE);
1172     }
1173   }
1174   static void test_sllc_n(byte[] a0, byte[] a1) {
1175     for (int i = 0; i < a0.length; i+=1) {
1176       a0[i] = (byte)(a1[i]<<(-VALUE));
1177     }
1178   }
1179   static void test_sllc_o(byte[] a0, byte[] a1) {
1180     for (int i = 0; i < a0.length; i+=1) {
1181       a0[i] = (byte)(a1[i]<<SHIFT);
1182     }
1183   }
1184   static void test_sllc_on(byte[] a0, byte[] a1) {
1185     for (int i = 0; i < a0.length; i+=1) {
1186       a0[i] = (byte)(a1[i]<<(-SHIFT));
1187     }
1188   }
1189   static void test_sllv(byte[] a0, byte[] a1, int b) {
1190     for (int i = 0; i < a0.length; i+=1) {
1191       a0[i] = (byte)(a1[i]<<b);
1192     }
1193   }
1194   static void test_sllc_add(byte[] a0, byte[] a1) {
1195     for (int i = 0; i < a0.length; i+=1) {
1196       a0[i] = (byte)((a1[i] + ADD_INIT)<<VALUE);
1197     }
1198   }
1199   static void test_sllv_add(byte[] a0, byte[] a1, int b) {
1200     for (int i = 0; i < a0.length; i+=1) {
1201       a0[i] = (byte)((a1[i] + b)<<VALUE);
1202     }
1203   }
1204   static void test_sllc_and(byte[] a0, byte[] a1) {
1205     for (int i = 0; i < a0.length; i+=1) {
1206       a0[i] = (byte)((a1[i] & BIT_MASK)<<VALUE);
1207     }
1208   }
1209   static void test_sllv_and(byte[] a0, byte[] a1, int b) {
1210     for (int i = 0; i < a0.length; i+=1) {
1211       a0[i] = (byte)((a1[i] & b)<<VALUE);
1212     }
1213   }
1214 
1215   static void test_srlc(byte[] a0, byte[] a1) {
1216     for (int i = 0; i < a0.length; i+=1) {
1217       a0[i] = (byte)(a1[i]>>>VALUE);
1218     }
1219   }
1220   static void test_srlc_n(byte[] a0, byte[] a1) {
1221     for (int i = 0; i < a0.length; i+=1) {
1222       a0[i] = (byte)(a1[i]>>>(-VALUE));
1223     }
1224   }
1225   static void test_srlc_o(byte[] a0, byte[] a1) {
1226     for (int i = 0; i < a0.length; i+=1) {
1227       a0[i] = (byte)(a1[i]>>>SHIFT);
1228     }
1229   }
1230   static void test_srlc_on(byte[] a0, byte[] a1) {
1231     for (int i = 0; i < a0.length; i+=1) {
1232       a0[i] = (byte)(a1[i]>>>(-SHIFT));
1233     }
1234   }
1235   static void test_srlv(byte[] a0, byte[] a1, int b) {
1236     for (int i = 0; i < a0.length; i+=1) {
1237       a0[i] = (byte)(a1[i]>>>b);
1238     }
1239   }
1240   static void test_srlc_add(byte[] a0, byte[] a1) {
1241     for (int i = 0; i < a0.length; i+=1) {
1242       a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE);
1243     }
1244   }
1245   static void test_srlv_add(byte[] a0, byte[] a1, int b) {
1246     for (int i = 0; i < a0.length; i+=1) {
1247       a0[i] = (byte)((a1[i] + b)>>>VALUE);
1248     }
1249   }
1250   static void test_srlc_and(byte[] a0, byte[] a1) {
1251     for (int i = 0; i < a0.length; i+=1) {
1252       a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE);
1253     }
1254   }
1255   static void test_srlv_and(byte[] a0, byte[] a1, int b) {
1256     for (int i = 0; i < a0.length; i+=1) {
1257       a0[i] = (byte)((a1[i] & b)>>>VALUE);
1258     }
1259   }
1260 
1261   static void test_srac(byte[] a0, byte[] a1) {
1262     for (int i = 0; i < a0.length; i+=1) {
1263       a0[i] = (byte)(a1[i]>>VALUE);
1264     }
1265   }
1266   static void test_srac_n(byte[] a0, byte[] a1) {
1267     for (int i = 0; i < a0.length; i+=1) {
1268       a0[i] = (byte)(a1[i]>>(-VALUE));
1269     }
1270   }
1271   static void test_srac_o(byte[] a0, byte[] a1) {
1272     for (int i = 0; i < a0.length; i+=1) {
1273       a0[i] = (byte)(a1[i]>>SHIFT);
1274     }
1275   }
1276   static void test_srac_on(byte[] a0, byte[] a1) {
1277     for (int i = 0; i < a0.length; i+=1) {
1278       a0[i] = (byte)(a1[i]>>(-SHIFT));
1279     }
1280   }
1281   static void test_srav(byte[] a0, byte[] a1, int b) {
1282     for (int i = 0; i < a0.length; i+=1) {
1283       a0[i] = (byte)(a1[i]>>b);
1284     }
1285   }
1286   static void test_srac_add(byte[] a0, byte[] a1) {
1287     for (int i = 0; i < a0.length; i+=1) {
1288       a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE);
1289     }
1290   }
1291   static void test_srav_add(byte[] a0, byte[] a1, int b) {
1292     for (int i = 0; i < a0.length; i+=1) {
1293       a0[i] = (byte)((a1[i] + b)>>VALUE);
1294     }
1295   }
1296   static void test_srac_and(byte[] a0, byte[] a1) {
1297     for (int i = 0; i < a0.length; i+=1) {
1298       a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE);
1299     }
1300   }
1301   static void test_srav_and(byte[] a0, byte[] a1, int b) {
1302     for (int i = 0; i < a0.length; i+=1) {
1303       a0[i] = (byte)((a1[i] & b)>>VALUE);
1304     }
1305   }
1306 
1307   static void test_pack2(short[] p2, byte[] a1) {
1308     if (p2.length*2 > a1.length) return;
1309     for (int i = 0; i < p2.length; i+=1) {
1310       short l0 = (short)a1[i*2+0];
1311       short l1 = (short)a1[i*2+1];
1312       p2[i] = (short)((l1 << 8) | (l0 & 0xFF));
1313     }
1314   }
1315   static void test_unpack2(byte[] a0, short[] p2) {
1316     if (p2.length*2 > a0.length) return;
1317     for (int i = 0; i < p2.length; i+=1) {
1318       short l = p2[i];
1319       a0[i*2+0] = (byte)(l & 0xFF);
1320       a0[i*2+1] = (byte)(l >> 8);
1321     }
1322   }
1323   static void test_pack2_swap(short[] p2, byte[] a1) {
1324     if (p2.length*2 > a1.length) return;
1325     for (int i = 0; i < p2.length; i+=1) {
1326       short l0 = (short)a1[i*2+0];
1327       short l1 = (short)a1[i*2+1];
1328       p2[i] = (short)((l0 << 8) | (l1 & 0xFF));
1329     }
1330   }
1331   static void test_unpack2_swap(byte[] a0, short[] p2) {
1332     if (p2.length*2 > a0.length) return;
1333     for (int i = 0; i < p2.length; i+=1) {
1334       short l = p2[i];
1335       a0[i*2+0] = (byte)(l >> 8);
1336       a0[i*2+1] = (byte)(l & 0xFF);
1337     }
1338   }
1339 
1340   static void test_pack4(int[] p4, byte[] a1) {
1341     if (p4.length*4 > a1.length) return;
1342     for (int i = 0; i < p4.length; i+=1) {
1343       int l0 = (int)a1[i*4+0];
1344       int l1 = (int)a1[i*4+1];
1345       int l2 = (int)a1[i*4+2];
1346       int l3 = (int)a1[i*4+3];
1347       p4[i] = (l0 & 0xFF) |
1348              ((l1 & 0xFF) <<  8) |
1349              ((l2 & 0xFF) << 16) |
1350              ((l3 & 0xFF) << 24);
1351     }
1352   }
1353   static void test_unpack4(byte[] a0, int[] p4) {
1354     if (p4.length*4 > a0.length) return;
1355     for (int i = 0; i < p4.length; i+=1) {
1356       int l = p4[i];
1357       a0[i*4+0] = (byte)(l & 0xFF);
1358       a0[i*4+1] = (byte)(l >>  8);
1359       a0[i*4+2] = (byte)(l >> 16);
1360       a0[i*4+3] = (byte)(l >> 24);
1361     }
1362   }
1363   static void test_pack4_swap(int[] p4, byte[] a1) {
1364     if (p4.length*4 > a1.length) return;
1365     for (int i = 0; i < p4.length; i+=1) {
1366       int l0 = (int)a1[i*4+0];
1367       int l1 = (int)a1[i*4+1];
1368       int l2 = (int)a1[i*4+2];
1369       int l3 = (int)a1[i*4+3];
1370       p4[i] = (l3 & 0xFF) |
1371              ((l2 & 0xFF) <<  8) |
1372              ((l1 & 0xFF) << 16) |
1373              ((l0 & 0xFF) << 24);
1374     }
1375   }
1376   static void test_unpack4_swap(byte[] a0, int[] p4) {
1377     if (p4.length*4 > a0.length) return;
1378     for (int i = 0; i < p4.length; i+=1) {
1379       int l = p4[i];
1380       a0[i*4+0] = (byte)(l >> 24);
1381       a0[i*4+1] = (byte)(l >> 16);
1382       a0[i*4+2] = (byte)(l >>  8);
1383       a0[i*4+3] = (byte)(l & 0xFF);
1384     }
1385   }
1386 
1387   static void test_pack8(long[] p8, byte[] a1) {
1388     if (p8.length*8 > a1.length) return;
1389     for (int i = 0; i < p8.length; i+=1) {
1390       long l0 = (long)a1[i*8+0];
1391       long l1 = (long)a1[i*8+1];
1392       long l2 = (long)a1[i*8+2];
1393       long l3 = (long)a1[i*8+3];
1394       long l4 = (long)a1[i*8+4];
1395       long l5 = (long)a1[i*8+5];
1396       long l6 = (long)a1[i*8+6];
1397       long l7 = (long)a1[i*8+7];
1398       p8[i] = (l0 & 0xFFl) |
1399              ((l1 & 0xFFl) <<  8) |
1400              ((l2 & 0xFFl) << 16) |
1401              ((l3 & 0xFFl) << 24) |
1402              ((l4 & 0xFFl) << 32) |
1403              ((l5 & 0xFFl) << 40) |
1404              ((l6 & 0xFFl) << 48) |
1405              ((l7 & 0xFFl) << 56);
1406     }
1407   }
1408   static void test_unpack8(byte[] a0, long[] p8) {
1409     if (p8.length*8 > a0.length) return;
1410     for (int i = 0; i < p8.length; i+=1) {
1411       long l = p8[i];
1412       a0[i*8+0] = (byte)(l & 0xFFl);
1413       a0[i*8+1] = (byte)(l >>  8);
1414       a0[i*8+2] = (byte)(l >> 16);
1415       a0[i*8+3] = (byte)(l >> 24);
1416       a0[i*8+4] = (byte)(l >> 32);
1417       a0[i*8+5] = (byte)(l >> 40);
1418       a0[i*8+6] = (byte)(l >> 48);
1419       a0[i*8+7] = (byte)(l >> 56);
1420     }
1421   }
1422   static void test_pack8_swap(long[] p8, byte[] a1) {
1423     if (p8.length*8 > a1.length) return;
1424     for (int i = 0; i < p8.length; i+=1) {
1425       long l0 = (long)a1[i*8+0];
1426       long l1 = (long)a1[i*8+1];
1427       long l2 = (long)a1[i*8+2];
1428       long l3 = (long)a1[i*8+3];
1429       long l4 = (long)a1[i*8+4];
1430       long l5 = (long)a1[i*8+5];
1431       long l6 = (long)a1[i*8+6];
1432       long l7 = (long)a1[i*8+7];
1433       p8[i] = (l7 & 0xFFl) |
1434              ((l6 & 0xFFl) <<  8) |
1435              ((l5 & 0xFFl) << 16) |
1436              ((l4 & 0xFFl) << 24) |
1437              ((l3 & 0xFFl) << 32) |
1438              ((l2 & 0xFFl) << 40) |
1439              ((l1 & 0xFFl) << 48) |
1440              ((l0 & 0xFFl) << 56);
1441     }
1442   }
1443   static void test_unpack8_swap(byte[] a0, long[] p8) {
1444     if (p8.length*8 > a0.length) return;
1445     for (int i = 0; i < p8.length; i+=1) {
1446       long l = p8[i];
1447       a0[i*8+0] = (byte)(l >> 56);
1448       a0[i*8+1] = (byte)(l >> 48);
1449       a0[i*8+2] = (byte)(l >> 40);
1450       a0[i*8+3] = (byte)(l >> 32);
1451       a0[i*8+4] = (byte)(l >> 24);
1452       a0[i*8+5] = (byte)(l >> 16);
1453       a0[i*8+6] = (byte)(l >>  8);
1454       a0[i*8+7] = (byte)(l & 0xFFl);
1455     }
1456   }
1457 
1458   static int verify(String text, int i, byte elem, byte val) {
1459     if (elem != val) {
1460       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1461       return 1;
1462     }
1463     return 0;
1464   }
1465 
1466   static int verify(String text, int i, short elem, short val) {
1467     if (elem != val) {
1468       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1469       return 1;
1470     }
1471     return 0;
1472   }
1473 
1474   static int verify(String text, int i, int elem, int val) {
1475     if (elem != val) {
1476       System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val));
1477       return 1;
1478     }
1479     return 0;
1480   }
1481 
1482   static int verify(String text, int i, long elem, long val) {
1483     if (elem != val) {
1484       System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
1485       return 1;
1486     }
1487     return 0;
1488   }
1489 }