1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2015 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc/g1/g1BarrierSet.hpp"
  42 #include "gc/g1/g1CollectedHeap.inline.hpp"
  43 #include "gc/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) // nothing
  48 #else
  49 #define BLOCK_COMMENT(str) block_comment(str)
  50 #endif
  51 
  52 int AbstractAssembler::code_fill_byte() {
  53   return 0x00;                  // illegal instruction 0x00000000
  54 }
  55 
  56 
  57 // Patch instruction `inst' at offset `inst_pos' to refer to
  58 // `dest_pos' and return the resulting instruction.  We should have
  59 // pcs, not offsets, but since all is relative, it will work out fine.
  60 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
  61   int m = 0; // mask for displacement field
  62   int v = 0; // new value for displacement field
  63 
  64   switch (inv_op_ppc(inst)) {
  65   case b_op:  m = li(-1); v = li(disp(dest_pos, inst_pos)); break;
  66   case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;
  67     default: ShouldNotReachHere();
  68   }
  69   return inst & ~m | v;
  70 }
  71 
  72 // Return the offset, relative to _code_begin, of the destination of
  73 // the branch inst at offset pos.
  74 int Assembler::branch_destination(int inst, int pos) {
  75   int r = 0;
  76   switch (inv_op_ppc(inst)) {
  77     case b_op:  r = bxx_destination_offset(inst, pos); break;
  78     case bc_op: r = inv_bd_field(inst, pos); break;
  79     default: ShouldNotReachHere();
  80   }
  81   return r;
  82 }
  83 
  84 // Low-level andi-one-instruction-macro.
  85 void Assembler::andi(Register a, Register s, const long ui16) {
  86   if (is_power_of_2_long(((jlong) ui16)+1)) {
  87     // pow2minus1
  88     clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
  89   } else if (is_power_of_2_long((jlong) ui16)) {
  90     // pow2
  91     rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
  92   } else if (is_power_of_2_long((jlong)-ui16)) {
  93     // negpow2
  94     clrrdi(a, s, log2_long((jlong)-ui16));
  95   } else {
  96     assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
  97     andi_(a, s, ui16);
  98   }
  99 }
 100 
 101 // RegisterOrConstant version.
 102 void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {
 103   if (roc.is_constant()) {
 104     if (s1 == noreg) {
 105       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 106       Assembler::ld(d, simm16_rest, d);
 107     } else if (is_simm(roc.as_constant(), 16)) {
 108       Assembler::ld(d, roc.as_constant(), s1);
 109     } else {
 110       load_const_optimized(d, roc.as_constant());
 111       Assembler::ldx(d, d, s1);
 112     }
 113   } else {
 114     if (s1 == noreg)
 115       Assembler::ld(d, 0, roc.as_register());
 116     else
 117       Assembler::ldx(d, roc.as_register(), s1);
 118   }
 119 }
 120 
 121 void Assembler::lwa(Register d, RegisterOrConstant roc, Register s1) {
 122   if (roc.is_constant()) {
 123     if (s1 == noreg) {
 124       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 125       Assembler::lwa(d, simm16_rest, d);
 126     } else if (is_simm(roc.as_constant(), 16)) {
 127       Assembler::lwa(d, roc.as_constant(), s1);
 128     } else {
 129       load_const_optimized(d, roc.as_constant());
 130       Assembler::lwax(d, d, s1);
 131     }
 132   } else {
 133     if (s1 == noreg)
 134       Assembler::lwa(d, 0, roc.as_register());
 135     else
 136       Assembler::lwax(d, roc.as_register(), s1);
 137   }
 138 }
 139 
 140 void Assembler::lwz(Register d, RegisterOrConstant roc, Register s1) {
 141   if (roc.is_constant()) {
 142     if (s1 == noreg) {
 143       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 144       Assembler::lwz(d, simm16_rest, d);
 145     } else if (is_simm(roc.as_constant(), 16)) {
 146       Assembler::lwz(d, roc.as_constant(), s1);
 147     } else {
 148       load_const_optimized(d, roc.as_constant());
 149       Assembler::lwzx(d, d, s1);
 150     }
 151   } else {
 152     if (s1 == noreg)
 153       Assembler::lwz(d, 0, roc.as_register());
 154     else
 155       Assembler::lwzx(d, roc.as_register(), s1);
 156   }
 157 }
 158 
 159 void Assembler::lha(Register d, RegisterOrConstant roc, Register s1) {
 160   if (roc.is_constant()) {
 161     if (s1 == noreg) {
 162       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 163       Assembler::lha(d, simm16_rest, d);
 164     } else if (is_simm(roc.as_constant(), 16)) {
 165       Assembler::lha(d, roc.as_constant(), s1);
 166     } else {
 167       load_const_optimized(d, roc.as_constant());
 168       Assembler::lhax(d, d, s1);
 169     }
 170   } else {
 171     if (s1 == noreg)
 172       Assembler::lha(d, 0, roc.as_register());
 173     else
 174       Assembler::lhax(d, roc.as_register(), s1);
 175   }
 176 }
 177 
 178 void Assembler::lhz(Register d, RegisterOrConstant roc, Register s1) {
 179   if (roc.is_constant()) {
 180     if (s1 == noreg) {
 181       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 182       Assembler::lhz(d, simm16_rest, d);
 183     } else if (is_simm(roc.as_constant(), 16)) {
 184       Assembler::lhz(d, roc.as_constant(), s1);
 185     } else {
 186       load_const_optimized(d, roc.as_constant());
 187       Assembler::lhzx(d, d, s1);
 188     }
 189   } else {
 190     if (s1 == noreg)
 191       Assembler::lhz(d, 0, roc.as_register());
 192     else
 193       Assembler::lhzx(d, roc.as_register(), s1);
 194   }
 195 }
 196 
 197 void Assembler::lbz(Register d, RegisterOrConstant roc, Register s1) {
 198   if (roc.is_constant()) {
 199     if (s1 == noreg) {
 200       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 201       Assembler::lbz(d, simm16_rest, d);
 202     } else if (is_simm(roc.as_constant(), 16)) {
 203       Assembler::lbz(d, roc.as_constant(), s1);
 204     } else {
 205       load_const_optimized(d, roc.as_constant());
 206       Assembler::lbzx(d, d, s1);
 207     }
 208   } else {
 209     if (s1 == noreg)
 210       Assembler::lbz(d, 0, roc.as_register());
 211     else
 212       Assembler::lbzx(d, roc.as_register(), s1);
 213   }
 214 }
 215 
 216 void Assembler::std(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
 217   if (roc.is_constant()) {
 218     if (s1 == noreg) {
 219       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 220       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
 221       Assembler::std(d, simm16_rest, tmp);
 222     } else if (is_simm(roc.as_constant(), 16)) {
 223       Assembler::std(d, roc.as_constant(), s1);
 224     } else {
 225       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 226       load_const_optimized(tmp, roc.as_constant());
 227       Assembler::stdx(d, tmp, s1);
 228     }
 229   } else {
 230     if (s1 == noreg)
 231       Assembler::std(d, 0, roc.as_register());
 232     else
 233       Assembler::stdx(d, roc.as_register(), s1);
 234   }
 235 }
 236 
 237 void Assembler::stw(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
 238   if (roc.is_constant()) {
 239     if (s1 == noreg) {
 240       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 241       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
 242       Assembler::stw(d, simm16_rest, tmp);
 243     } else if (is_simm(roc.as_constant(), 16)) {
 244       Assembler::stw(d, roc.as_constant(), s1);
 245     } else {
 246       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 247       load_const_optimized(tmp, roc.as_constant());
 248       Assembler::stwx(d, tmp, s1);
 249     }
 250   } else {
 251     if (s1 == noreg)
 252       Assembler::stw(d, 0, roc.as_register());
 253     else
 254       Assembler::stwx(d, roc.as_register(), s1);
 255   }
 256 }
 257 
 258 void Assembler::sth(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
 259   if (roc.is_constant()) {
 260     if (s1 == noreg) {
 261       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 262       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
 263       Assembler::sth(d, simm16_rest, tmp);
 264     } else if (is_simm(roc.as_constant(), 16)) {
 265       Assembler::sth(d, roc.as_constant(), s1);
 266     } else {
 267       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 268       load_const_optimized(tmp, roc.as_constant());
 269       Assembler::sthx(d, tmp, s1);
 270     }
 271   } else {
 272     if (s1 == noreg)
 273       Assembler::sth(d, 0, roc.as_register());
 274     else
 275       Assembler::sthx(d, roc.as_register(), s1);
 276   }
 277 }
 278 
 279 void Assembler::stb(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
 280   if (roc.is_constant()) {
 281     if (s1 == noreg) {
 282       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 283       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
 284       Assembler::stb(d, simm16_rest, tmp);
 285     } else if (is_simm(roc.as_constant(), 16)) {
 286       Assembler::stb(d, roc.as_constant(), s1);
 287     } else {
 288       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 289       load_const_optimized(tmp, roc.as_constant());
 290       Assembler::stbx(d, tmp, s1);
 291     }
 292   } else {
 293     if (s1 == noreg)
 294       Assembler::stb(d, 0, roc.as_register());
 295     else
 296       Assembler::stbx(d, roc.as_register(), s1);
 297   }
 298 }
 299 
 300 void Assembler::add(Register d, RegisterOrConstant roc, Register s1) {
 301   if (roc.is_constant()) {
 302     intptr_t c = roc.as_constant();
 303     assert(is_simm(c, 16), "too big");
 304     addi(d, s1, (int)c);
 305   }
 306   else add(d, roc.as_register(), s1);
 307 }
 308 
 309 void Assembler::subf(Register d, RegisterOrConstant roc, Register s1) {
 310   if (roc.is_constant()) {
 311     intptr_t c = roc.as_constant();
 312     assert(is_simm(-c, 16), "too big");
 313     addi(d, s1, (int)-c);
 314   }
 315   else subf(d, roc.as_register(), s1);
 316 }
 317 
 318 void Assembler::cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1) {
 319   if (roc.is_constant()) {
 320     intptr_t c = roc.as_constant();
 321     assert(is_simm(c, 16), "too big");
 322     cmpdi(d, s1, (int)c);
 323   }
 324   else cmpd(d, roc.as_register(), s1);
 325 }
 326 
 327 // Load a 64 bit constant. Patchable.
 328 void Assembler::load_const(Register d, long x, Register tmp) {
 329   // 64-bit value: x = xa xb xc xd
 330   int xa = (x >> 48) & 0xffff;
 331   int xb = (x >> 32) & 0xffff;
 332   int xc = (x >> 16) & 0xffff;
 333   int xd = (x >>  0) & 0xffff;
 334   if (tmp == noreg) {
 335     Assembler::lis( d, (int)(short)xa);
 336     Assembler::ori( d, d, (unsigned int)xb);
 337     Assembler::sldi(d, d, 32);
 338     Assembler::oris(d, d, (unsigned int)xc);
 339     Assembler::ori( d, d, (unsigned int)xd);
 340   } else {
 341     // exploit instruction level parallelism if we have a tmp register
 342     assert_different_registers(d, tmp);
 343     Assembler::lis(tmp, (int)(short)xa);
 344     Assembler::lis(d, (int)(short)xc);
 345     Assembler::ori(tmp, tmp, (unsigned int)xb);
 346     Assembler::ori(d, d, (unsigned int)xd);
 347     Assembler::insrdi(d, tmp, 32, 0);
 348   }
 349 }
 350 
 351 // Load a 64 bit constant, optimized, not identifyable.
 352 // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
 353 // 16 bit immediate offset.
 354 int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
 355   // Avoid accidentally trying to use R0 for indexed addressing.
 356   assert_different_registers(d, tmp);
 357 
 358   short xa, xb, xc, xd; // Four 16-bit chunks of const.
 359   long rem = x;         // Remaining part of const.
 360 
 361   xd = rem & 0xFFFF;    // Lowest 16-bit chunk.
 362   rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.
 363 
 364   if (rem == 0) { // opt 1: simm16
 365     li(d, xd);
 366     return 0;
 367   }
 368 
 369   int retval = 0;
 370   if (return_simm16_rest) {
 371     retval = xd;
 372     x = rem << 16;
 373     xd = 0;
 374   }
 375 
 376   if (d == R0) { // Can't use addi.
 377     if (is_simm(x, 32)) { // opt 2: simm32
 378       lis(d, x >> 16);
 379       if (xd) ori(d, d, (unsigned short)xd);
 380     } else {
 381       // 64-bit value: x = xa xb xc xd
 382       xa = (x >> 48) & 0xffff;
 383       xb = (x >> 32) & 0xffff;
 384       xc = (x >> 16) & 0xffff;
 385       bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
 386       if (tmp == noreg || (xc == 0 && xd == 0)) {
 387         if (xa_loaded) {
 388           lis(d, xa);
 389           if (xb) { ori(d, d, (unsigned short)xb); }
 390         } else {
 391           li(d, xb);
 392         }
 393         sldi(d, d, 32);
 394         if (xc) { oris(d, d, (unsigned short)xc); }
 395         if (xd) { ori( d, d, (unsigned short)xd); }
 396       } else {
 397         // Exploit instruction level parallelism if we have a tmp register.
 398         bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
 399         if (xa_loaded) {
 400           lis(tmp, xa);
 401         }
 402         if (xc_loaded) {
 403           lis(d, xc);
 404         }
 405         if (xa_loaded) {
 406           if (xb) { ori(tmp, tmp, (unsigned short)xb); }
 407         } else {
 408           li(tmp, xb);
 409         }
 410         if (xc_loaded) {
 411           if (xd) { ori(d, d, (unsigned short)xd); }
 412         } else {
 413           li(d, xd);
 414         }
 415         insrdi(d, tmp, 32, 0);
 416       }
 417     }
 418     return retval;
 419   }
 420 
 421   xc = rem & 0xFFFF; // Next 16-bit chunk.
 422   rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
 423 
 424   if (rem == 0) { // opt 2: simm32
 425     lis(d, xc);
 426   } else { // High 32 bits needed.
 427 
 428     if (tmp != noreg  && (int)x != 0) { // opt 3: We have a temp reg.
 429       // No carry propagation between xc and higher chunks here (use logical instructions).
 430       xa = (x >> 48) & 0xffff;
 431       xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
 432       bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
 433       bool return_xd = false;
 434 
 435       if (xa_loaded) { lis(tmp, xa); }
 436       if (xc) { lis(d, xc); }
 437       if (xa_loaded) {
 438         if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
 439       } else {
 440         li(tmp, xb);
 441       }
 442       if (xc) {
 443         if (xd) { addi(d, d, xd); }
 444       } else {
 445         li(d, xd);
 446       }
 447       insrdi(d, tmp, 32, 0);
 448       return retval;
 449     }
 450 
 451     xb = rem & 0xFFFF; // Next 16-bit chunk.
 452     rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
 453 
 454     xa = rem & 0xFFFF; // Highest 16-bit chunk.
 455 
 456     // opt 4: avoid adding 0
 457     if (xa) { // Highest 16-bit needed?
 458       lis(d, xa);
 459       if (xb) { addi(d, d, xb); }
 460     } else {
 461       li(d, xb);
 462     }
 463     sldi(d, d, 32);
 464     if (xc) { addis(d, d, xc); }
 465   }
 466 
 467   if (xd) { addi(d, d, xd); }
 468   return retval;
 469 }
 470 
 471 // We emit only one addition to s to optimize latency.
 472 int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
 473   assert(s != R0 && s != tmp, "unsupported");
 474   long rem = x;
 475 
 476   // Case 1: Can use mr or addi.
 477   short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
 478   rem = (rem >> 16) + ((unsigned short)xd >> 15);
 479   if (rem == 0) {
 480     if (xd == 0) {
 481       if (d != s) { mr(d, s); }
 482       return 0;
 483     }
 484     if (return_simm16_rest && (d == s)) {
 485       return xd;
 486     }
 487     addi(d, s, xd);
 488     return 0;
 489   }
 490 
 491   // Case 2: Can use addis.
 492   if (xd == 0) {
 493     short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
 494     rem = (rem >> 16) + ((unsigned short)xd >> 15);
 495     if (rem == 0) {
 496       addis(d, s, xc);
 497       return 0;
 498     }
 499   }
 500 
 501   // Other cases: load & add.
 502   Register tmp1 = tmp,
 503            tmp2 = noreg;
 504   if ((d != tmp) && (d != s)) {
 505     // Can use d.
 506     tmp1 = d;
 507     tmp2 = tmp;
 508   }
 509   int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
 510   add(d, tmp1, s);
 511   return simm16_rest;
 512 }
 513 
 514 #ifndef PRODUCT
 515 // Test of ppc assembler.
 516 void Assembler::test_asm() {
 517   // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
 518   addi(   R0,  R1,  10);
 519   addis(  R5,  R2,  11);
 520   addic_( R3,  R31, 42);
 521   subfic( R21, R12, 2112);
 522   add(    R3,  R2,  R1);
 523   add_(   R11, R22, R30);
 524   subf(   R7,  R6,  R5);
 525   subf_(  R8,  R9,  R4);
 526   addc(   R11, R12, R13);
 527   addc_(  R14, R14, R14);
 528   subfc(  R15, R16, R17);
 529   subfc_( R18, R20, R19);
 530   adde(   R20, R22, R24);
 531   adde_(  R29, R27, R26);
 532   subfe(  R28, R1,  R0);
 533   subfe_( R21, R11, R29);
 534   neg(    R21, R22);
 535   neg_(   R13, R23);
 536   mulli(  R0,  R11, -31);
 537   mulld(  R1,  R18, R21);
 538   mulld_( R2,  R17, R22);
 539   mullw(  R3,  R16, R23);
 540   mullw_( R4,  R15, R24);
 541   divd(   R5,  R14, R25);
 542   divd_(  R6,  R13, R26);
 543   divw(   R7,  R12, R27);
 544   divw_(  R8,  R11, R28);
 545 
 546   li(     R3, -4711);
 547 
 548   // PPC 1, section 3.3.9, Fixed-Point Compare Instructions
 549   cmpi(   CCR7,  0, R27, 4711);
 550   cmp(    CCR0, 1, R14, R11);
 551   cmpli(  CCR5,  1, R17, 45);
 552   cmpl(   CCR3, 0, R9,  R10);
 553 
 554   cmpwi(  CCR7,  R27, 4711);
 555   cmpw(   CCR0, R14, R11);
 556   cmplwi( CCR5,  R17, 45);
 557   cmplw(  CCR3, R9,  R10);
 558 
 559   cmpdi(  CCR7,  R27, 4711);
 560   cmpd(   CCR0, R14, R11);
 561   cmpldi( CCR5,  R17, 45);
 562   cmpld(  CCR3, R9,  R10);
 563 
 564   // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
 565   andi_(  R4,  R5,  0xff);
 566   andis_( R12, R13, 0x7b51);
 567   ori(    R1,  R4,  13);
 568   oris(   R3,  R5,  177);
 569   xori(   R7,  R6,  51);
 570   xoris(  R29, R0,  1);
 571   andr(   R17, R21, R16);
 572   and_(   R3,  R5,  R15);
 573   orr(    R2,  R1,  R9);
 574   or_(    R17, R15, R11);
 575   xorr(   R19, R18, R10);
 576   xor_(   R31, R21, R11);
 577   nand(   R5,  R7,  R3);
 578   nand_(  R3,  R1,  R0);
 579   nor(    R2,  R3,  R5);
 580   nor_(   R3,  R6,  R8);
 581   andc(   R25, R12, R11);
 582   andc_(  R24, R22, R21);
 583   orc(    R20, R10, R12);
 584   orc_(   R22, R2,  R13);
 585 
 586   nop();
 587 
 588   // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
 589   sld(    R5,  R6,  R8);
 590   sld_(   R3,  R5,  R9);
 591   slw(    R2,  R1,  R10);
 592   slw_(   R6,  R26, R16);
 593   srd(    R16, R24, R8);
 594   srd_(   R21, R14, R7);
 595   srw(    R22, R25, R29);
 596   srw_(   R5,  R18, R17);
 597   srad(   R7,  R11, R0);
 598   srad_(  R9,  R13, R1);
 599   sraw(   R7,  R15, R2);
 600   sraw_(  R4,  R17, R3);
 601   sldi(   R3,  R18, 63);
 602   sldi_(  R2,  R20, 30);
 603   slwi(   R1,  R21, 30);
 604   slwi_(  R7,  R23, 8);
 605   srdi(   R0,  R19, 2);
 606   srdi_(  R12, R24, 5);
 607   srwi(   R13, R27, 6);
 608   srwi_(  R14, R29, 7);
 609   sradi(  R15, R30, 9);
 610   sradi_( R16, R31, 19);
 611   srawi(  R17, R31, 15);
 612   srawi_( R18, R31, 12);
 613 
 614   clrrdi( R3, R30, 5);
 615   clrldi( R9, R10, 11);
 616 
 617   rldicr( R19, R20, 13, 15);
 618   rldicr_(R20, R20, 16, 14);
 619   rldicl( R21, R21, 30, 33);
 620   rldicl_(R22, R1,  20, 25);
 621   rlwinm( R23, R2,  25, 10, 11);
 622   rlwinm_(R24, R3,  12, 13, 14);
 623 
 624   // PPC 1, section 3.3.2 Fixed-Point Load Instructions
 625   lwzx(   R3,  R5, R7);
 626   lwz(    R11,  0, R1);
 627   lwzu(   R31, -4, R11);
 628 
 629   lwax(   R3,  R5, R7);
 630   lwa(    R31, -4, R11);
 631   lhzx(   R3,  R5, R7);
 632   lhz(    R31, -4, R11);
 633   lhzu(   R31, -4, R11);
 634 
 635 
 636   lhax(   R3,  R5, R7);
 637   lha(    R31, -4, R11);
 638   lhau(   R11,  0, R1);
 639 
 640   lbzx(   R3,  R5, R7);
 641   lbz(    R31, -4, R11);
 642   lbzu(   R11,  0, R1);
 643 
 644   ld(     R31, -4, R11);
 645   ldx(    R3,  R5, R7);
 646   ldu(    R31, -4, R11);
 647 
 648   //  PPC 1, section 3.3.3 Fixed-Point Store Instructions
 649   stwx(   R3,  R5, R7);
 650   stw(    R31, -4, R11);
 651   stwu(   R11,  0, R1);
 652 
 653   sthx(   R3,  R5, R7 );
 654   sth(    R31, -4, R11);
 655   sthu(   R31, -4, R11);
 656 
 657   stbx(   R3,  R5, R7);
 658   stb(    R31, -4, R11);
 659   stbu(   R31, -4, R11);
 660 
 661   std(    R31, -4, R11);
 662   stdx(   R3,  R5, R7);
 663   stdu(   R31, -4, R11);
 664 
 665  // PPC 1, section 3.3.13 Move To/From System Register Instructions
 666   mtlr(   R3);
 667   mflr(   R3);
 668   mtctr(  R3);
 669   mfctr(  R3);
 670   mtcrf(  0xff, R15);
 671   mtcr(   R15);
 672   mtcrf(  0x03, R15);
 673   mtcr(   R15);
 674   mfcr(   R15);
 675 
 676  // PPC 1, section 2.4.1 Branch Instructions
 677   Label lbl1, lbl2, lbl3;
 678   bind(lbl1);
 679 
 680   b(pc());
 681   b(pc() - 8);
 682   b(lbl1);
 683   b(lbl2);
 684   b(lbl3);
 685 
 686   bl(pc() - 8);
 687   bl(lbl1);
 688   bl(lbl2);
 689 
 690   bcl(4, 10, pc() - 8);
 691   bcl(4, 10, lbl1);
 692   bcl(4, 10, lbl2);
 693 
 694   bclr( 4, 6, 0);
 695   bclrl(4, 6, 0);
 696 
 697   bind(lbl2);
 698 
 699   bcctr( 4, 6, 0);
 700   bcctrl(4, 6, 0);
 701 
 702   blt(CCR0, lbl2);
 703   bgt(CCR1, lbl2);
 704   beq(CCR2, lbl2);
 705   bso(CCR3, lbl2);
 706   bge(CCR4, lbl2);
 707   ble(CCR5, lbl2);
 708   bne(CCR6, lbl2);
 709   bns(CCR7, lbl2);
 710 
 711   bltl(CCR0, lbl2);
 712   bgtl(CCR1, lbl2);
 713   beql(CCR2, lbl2);
 714   bsol(CCR3, lbl2);
 715   bgel(CCR4, lbl2);
 716   blel(CCR5, lbl2);
 717   bnel(CCR6, lbl2);
 718   bnsl(CCR7, lbl2);
 719   blr();
 720 
 721   sync();
 722   icbi( R1, R2);
 723   dcbst(R2, R3);
 724 
 725   // FLOATING POINT instructions ppc.
 726   // PPC 1, section 4.6.2 Floating-Point Load Instructions
 727   lfs( F1, -11, R3);
 728   lfsu(F2, 123, R4);
 729   lfsx(F3, R5,  R6);
 730   lfd( F4, 456, R7);
 731   lfdu(F5, 789, R8);
 732   lfdx(F6, R10, R11);
 733 
 734   // PPC 1, section 4.6.3 Floating-Point Store Instructions
 735   stfs(  F7,  876, R12);
 736   stfsu( F8,  543, R13);
 737   stfsx( F9,  R14, R15);
 738   stfd(  F10, 210, R16);
 739   stfdu( F11, 111, R17);
 740   stfdx( F12, R18, R19);
 741 
 742   // PPC 1, section 4.6.4 Floating-Point Move Instructions
 743   fmr(   F13, F14);
 744   fmr_(  F14, F15);
 745   fneg(  F16, F17);
 746   fneg_( F18, F19);
 747   fabs(  F20, F21);
 748   fabs_( F22, F23);
 749   fnabs( F24, F25);
 750   fnabs_(F26, F27);
 751 
 752   // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic
 753   // Instructions
 754   fadd(  F28, F29, F30);
 755   fadd_( F31, F0,  F1);
 756   fadds( F2,  F3,  F4);
 757   fadds_(F5,  F6,  F7);
 758   fsub(  F8,  F9,  F10);
 759   fsub_( F11, F12, F13);
 760   fsubs( F14, F15, F16);
 761   fsubs_(F17, F18, F19);
 762   fmul(  F20, F21, F22);
 763   fmul_( F23, F24, F25);
 764   fmuls( F26, F27, F28);
 765   fmuls_(F29, F30, F31);
 766   fdiv(  F0,  F1,  F2);
 767   fdiv_( F3,  F4,  F5);
 768   fdivs( F6,  F7,  F8);
 769   fdivs_(F9,  F10, F11);
 770 
 771   // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion
 772   // Instructions
 773   frsp(  F12, F13);
 774   fctid( F14, F15);
 775   fctidz(F16, F17);
 776   fctiw( F18, F19);
 777   fctiwz(F20, F21);
 778   fcfid( F22, F23);
 779 
 780   // PPC 1, section 4.6.7 Floating-Point Compare Instructions
 781   fcmpu( CCR7, F24, F25);
 782 
 783   tty->print_cr("\ntest_asm disassembly (0x%lx 0x%lx):", p2i(code()->insts_begin()), p2i(code()->insts_end()));
 784   code()->decode();
 785 }
 786 
 787 #endif // !PRODUCT