New src/cpu/ppc/vm/assembler

   1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright 2012, 2015 SAP AG. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) // nothing
  48 #else
  49 #define BLOCK_COMMENT(str) block_comment(str)
  50 #endif
  51 
  52 int AbstractAssembler::code_fill_byte() {
  53   return 0x00;                  // illegal instruction 0x00000000
  54 }
  55 
  56 void Assembler::print_instruction(int inst) {
  57   Unimplemented();
  58 }
  59 
  60 // Patch instruction `inst' at offset `inst_pos' to refer to
  61 // `dest_pos' and return the resulting instruction.  We should have
  62 // pcs, not offsets, but since all is relative, it will work out fine.
  63 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
  64   int m = 0; // mask for displacement field
  65   int v = 0; // new value for displacement field
  66 
  67   switch (inv_op_ppc(inst)) {
  68   case b_op:  m = li(-1); v = li(disp(dest_pos, inst_pos)); break;
  69   case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;
  70     default: ShouldNotReachHere();
  71   }
  72   return inst & ~m | v;
  73 }
  74 
  75 // Return the offset, relative to _code_begin, of the destination of
  76 // the branch inst at offset pos.
  77 int Assembler::branch_destination(int inst, int pos) {
  78   int r = 0;
  79   switch (inv_op_ppc(inst)) {
  80     case b_op:  r = bxx_destination_offset(inst, pos); break;
  81     case bc_op: r = inv_bd_field(inst, pos); break;
  82     default: ShouldNotReachHere();
  83   }
  84   return r;
  85 }
  86 
  87 // Low-level andi-one-instruction-macro.
  88 void Assembler::andi(Register a, Register s, const long ui16) {
  89   if (is_power_of_2_long(((jlong) ui16)+1)) {
  90     // pow2minus1
  91     clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
  92   } else if (is_power_of_2_long((jlong) ui16)) {
  93     // pow2
  94     rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
  95   } else if (is_power_of_2_long((jlong)-ui16)) {
  96     // negpow2
  97     clrrdi(a, s, log2_long((jlong)-ui16));
  98   } else {
  99     assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
 100     andi_(a, s, ui16);
 101   }
 102 }
 103 
 104 // RegisterOrConstant version.
 105 void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {
 106   if (roc.is_constant()) {
 107     if (s1 == noreg) {
 108       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 109       Assembler::ld(d, simm16_rest, d);
 110     } else if (is_simm(roc.as_constant(), 16)) {
 111       Assembler::ld(d, roc.as_constant(), s1);
 112     } else {
 113       load_const_optimized(d, roc.as_constant());
 114       Assembler::ldx(d, d, s1);
 115     }
 116   } else {
 117     if (s1 == noreg)
 118       Assembler::ld(d, 0, roc.as_register());
 119     else
 120       Assembler::ldx(d, roc.as_register(), s1);
 121   }
 122 }
 123 
 124 void Assembler::lwa(Register d, RegisterOrConstant roc, Register s1) {
 125   if (roc.is_constant()) {
 126     if (s1 == noreg) {
 127       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 128       Assembler::lwa(d, simm16_rest, d);
 129     } else if (is_simm(roc.as_constant(), 16)) {
 130       Assembler::lwa(d, roc.as_constant(), s1);
 131     } else {
 132       load_const_optimized(d, roc.as_constant());
 133       Assembler::lwax(d, d, s1);
 134     }
 135   } else {
 136     if (s1 == noreg)
 137       Assembler::lwa(d, 0, roc.as_register());
 138     else
 139       Assembler::lwax(d, roc.as_register(), s1);
 140   }
 141 }
 142 
 143 void Assembler::lwz(Register d, RegisterOrConstant roc, Register s1) {
 144   if (roc.is_constant()) {
 145     if (s1 == noreg) {
 146       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 147       Assembler::lwz(d, simm16_rest, d);
 148     } else if (is_simm(roc.as_constant(), 16)) {
 149       Assembler::lwz(d, roc.as_constant(), s1);
 150     } else {
 151       load_const_optimized(d, roc.as_constant());
 152       Assembler::lwzx(d, d, s1);
 153     }
 154   } else {
 155     if (s1 == noreg)
 156       Assembler::lwz(d, 0, roc.as_register());
 157     else
 158       Assembler::lwzx(d, roc.as_register(), s1);
 159   }
 160 }
 161 
 162 void Assembler::lha(Register d, RegisterOrConstant roc, Register s1) {
 163   if (roc.is_constant()) {
 164     if (s1 == noreg) {
 165       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 166       Assembler::lha(d, simm16_rest, d);
 167     } else if (is_simm(roc.as_constant(), 16)) {
 168       Assembler::lha(d, roc.as_constant(), s1);
 169     } else {
 170       load_const_optimized(d, roc.as_constant());
 171       Assembler::lhax(d, d, s1);
 172     }
 173   } else {
 174     if (s1 == noreg)
 175       Assembler::lha(d, 0, roc.as_register());
 176     else
 177       Assembler::lhax(d, roc.as_register(), s1);
 178   }
 179 }
 180 
 181 void Assembler::lhz(Register d, RegisterOrConstant roc, Register s1) {
 182   if (roc.is_constant()) {
 183     if (s1 == noreg) {
 184       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 185       Assembler::lhz(d, simm16_rest, d);
 186     } else if (is_simm(roc.as_constant(), 16)) {
 187       Assembler::lhz(d, roc.as_constant(), s1);
 188     } else {
 189       load_const_optimized(d, roc.as_constant());
 190       Assembler::lhzx(d, d, s1);
 191     }
 192   } else {
 193     if (s1 == noreg)
 194       Assembler::lhz(d, 0, roc.as_register());
 195     else
 196       Assembler::lhzx(d, roc.as_register(), s1);
 197   }
 198 }
 199 
 200 void Assembler::lbz(Register d, RegisterOrConstant roc, Register s1) {
 201   if (roc.is_constant()) {
 202     if (s1 == noreg) {
 203       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 204       Assembler::lbz(d, simm16_rest, d);
 205     } else if (is_simm(roc.as_constant(), 16)) {
 206       Assembler::lbz(d, roc.as_constant(), s1);
 207     } else {
 208       load_const_optimized(d, roc.as_constant());
 209       Assembler::lbzx(d, d, s1);
 210     }
 211   } else {
 212     if (s1 == noreg)
 213       Assembler::lbz(d, 0, roc.as_register());
 214     else
 215       Assembler::lbzx(d, roc.as_register(), s1);
 216   }
 217 }
 218 
 219 void Assembler::std(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
 220   if (roc.is_constant()) {
 221     if (s1 == noreg) {
 222       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 223       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
 224       Assembler::std(d, simm16_rest, tmp);
 225     } else if (is_simm(roc.as_constant(), 16)) {
 226       Assembler::std(d, roc.as_constant(), s1);
 227     } else {
 228       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 229       load_const_optimized(tmp, roc.as_constant());
 230       Assembler::stdx(d, tmp, s1);
 231     }
 232   } else {
 233     if (s1 == noreg)
 234       Assembler::std(d, 0, roc.as_register());
 235     else
 236       Assembler::stdx(d, roc.as_register(), s1);
 237   }
 238 }
 239 
 240 void Assembler::stw(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
 241   if (roc.is_constant()) {
 242     if (s1 == noreg) {
 243       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 244       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
 245       Assembler::stw(d, simm16_rest, tmp);
 246     } else if (is_simm(roc.as_constant(), 16)) {
 247       Assembler::stw(d, roc.as_constant(), s1);
 248     } else {
 249       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 250       load_const_optimized(tmp, roc.as_constant());
 251       Assembler::stwx(d, tmp, s1);
 252     }
 253   } else {
 254     if (s1 == noreg)
 255       Assembler::stw(d, 0, roc.as_register());
 256     else
 257       Assembler::stwx(d, roc.as_register(), s1);
 258   }
 259 }
 260 
 261 void Assembler::sth(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
 262   if (roc.is_constant()) {
 263     if (s1 == noreg) {
 264       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 265       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
 266       Assembler::sth(d, simm16_rest, tmp);
 267     } else if (is_simm(roc.as_constant(), 16)) {
 268       Assembler::sth(d, roc.as_constant(), s1);
 269     } else {
 270       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 271       load_const_optimized(tmp, roc.as_constant());
 272       Assembler::sthx(d, tmp, s1);
 273     }
 274   } else {
 275     if (s1 == noreg)
 276       Assembler::sth(d, 0, roc.as_register());
 277     else
 278       Assembler::sthx(d, roc.as_register(), s1);
 279   }
 280 }
 281 
 282 void Assembler::stb(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
 283   if (roc.is_constant()) {
 284     if (s1 == noreg) {
 285       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 286       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
 287       Assembler::stb(d, simm16_rest, tmp);
 288     } else if (is_simm(roc.as_constant(), 16)) {
 289       Assembler::stb(d, roc.as_constant(), s1);
 290     } else {
 291       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
 292       load_const_optimized(tmp, roc.as_constant());
 293       Assembler::stbx(d, tmp, s1);
 294     }
 295   } else {
 296     if (s1 == noreg)
 297       Assembler::stb(d, 0, roc.as_register());
 298     else
 299       Assembler::stbx(d, roc.as_register(), s1);
 300   }
 301 }
 302 
 303 void Assembler::add(Register d, RegisterOrConstant roc, Register s1) {
 304   if (roc.is_constant()) {
 305     intptr_t c = roc.as_constant();
 306     assert(is_simm(c, 16), "too big");
 307     addi(d, s1, (int)c);
 308   }
 309   else add(d, roc.as_register(), s1);
 310 }
 311 
 312 void Assembler::subf(Register d, RegisterOrConstant roc, Register s1) {
 313   if (roc.is_constant()) {
 314     intptr_t c = roc.as_constant();
 315     assert(is_simm(-c, 16), "too big");
 316     addi(d, s1, (int)-c);
 317   }
 318   else subf(d, roc.as_register(), s1);
 319 }
 320 
 321 void Assembler::cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1) {
 322   if (roc.is_constant()) {
 323     intptr_t c = roc.as_constant();
 324     assert(is_simm(c, 16), "too big");
 325     cmpdi(d, s1, (int)c);
 326   }
 327   else cmpd(d, roc.as_register(), s1);
 328 }
 329 
 330 // Load a 64 bit constant. Patchable.
 331 void Assembler::load_const(Register d, long x, Register tmp) {
 332   // 64-bit value: x = xa xb xc xd
 333   int xa = (x >> 48) & 0xffff;
 334   int xb = (x >> 32) & 0xffff;
 335   int xc = (x >> 16) & 0xffff;
 336   int xd = (x >>  0) & 0xffff;
 337   if (tmp == noreg) {
 338     Assembler::lis( d, (int)(short)xa);
 339     Assembler::ori( d, d, (unsigned int)xb);
 340     Assembler::sldi(d, d, 32);
 341     Assembler::oris(d, d, (unsigned int)xc);
 342     Assembler::ori( d, d, (unsigned int)xd);
 343   } else {
 344     // exploit instruction level parallelism if we have a tmp register
 345     assert_different_registers(d, tmp);
 346     Assembler::lis(tmp, (int)(short)xa);
 347     Assembler::lis(d, (int)(short)xc);
 348     Assembler::ori(tmp, tmp, (unsigned int)xb);
 349     Assembler::ori(d, d, (unsigned int)xd);
 350     Assembler::insrdi(d, tmp, 32, 0);
 351   }
 352 }
 353 
 354 // Load a 64 bit constant, optimized, not identifyable.
 355 // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
 356 // 16 bit immediate offset.
 357 int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
 358   // Avoid accidentally trying to use R0 for indexed addressing.
 359   assert_different_registers(d, tmp);
 360 
 361   short xa, xb, xc, xd; // Four 16-bit chunks of const.
 362   long rem = x;         // Remaining part of const.
 363 
 364   xd = rem & 0xFFFF;    // Lowest 16-bit chunk.
 365   rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.
 366 
 367   if (rem == 0) { // opt 1: simm16
 368     li(d, xd);
 369     return 0;
 370   }
 371 
 372   int retval = 0;
 373   if (return_simm16_rest) {
 374     retval = xd;
 375     x = rem << 16;
 376     xd = 0;
 377   }
 378 
 379   if (d == R0) { // Can't use addi.
 380     if (is_simm(x, 32)) { // opt 2: simm32
 381       lis(d, x >> 16);
 382       if (xd) ori(d, d, (unsigned short)xd);
 383     } else {
 384       // 64-bit value: x = xa xb xc xd
 385       xa = (x >> 48) & 0xffff;
 386       xb = (x >> 32) & 0xffff;
 387       xc = (x >> 16) & 0xffff;
 388       bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
 389       if (tmp == noreg || (xc == 0 && xd == 0)) {
 390         if (xa_loaded) {
 391           lis(d, xa);
 392           if (xb) { ori(d, d, (unsigned short)xb); }
 393         } else {
 394           li(d, xb);
 395         }
 396         sldi(d, d, 32);
 397         if (xc) { oris(d, d, (unsigned short)xc); }
 398         if (xd) { ori( d, d, (unsigned short)xd); }
 399       } else {
 400         // Exploit instruction level parallelism if we have a tmp register.
 401         bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
 402         if (xa_loaded) {
 403           lis(tmp, xa);
 404         }
 405         if (xc_loaded) {
 406           lis(d, xc);
 407         }
 408         if (xa_loaded) {
 409           if (xb) { ori(tmp, tmp, (unsigned short)xb); }
 410         } else {
 411           li(tmp, xb);
 412         }
 413         if (xc_loaded) {
 414           if (xd) { ori(d, d, (unsigned short)xd); }
 415         } else {
 416           li(d, xd);
 417         }
 418         insrdi(d, tmp, 32, 0);
 419       }
 420     }
 421     return retval;
 422   }
 423 
 424   xc = rem & 0xFFFF; // Next 16-bit chunk.
 425   rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
 426 
 427   if (rem == 0) { // opt 2: simm32
 428     lis(d, xc);
 429   } else { // High 32 bits needed.
 430 
 431     if (tmp != noreg  && (int)x != 0) { // opt 3: We have a temp reg.
 432       // No carry propagation between xc and higher chunks here (use logical instructions).
 433       xa = (x >> 48) & 0xffff;
 434       xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
 435       bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
 436       bool return_xd = false;
 437 
 438       if (xa_loaded) { lis(tmp, xa); }
 439       if (xc) { lis(d, xc); }
 440       if (xa_loaded) {
 441         if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
 442       } else {
 443         li(tmp, xb);
 444       }
 445       if (xc) {
 446         if (xd) { addi(d, d, xd); }
 447       } else {
 448         li(d, xd);
 449       }
 450       insrdi(d, tmp, 32, 0);
 451       return retval;
 452     }
 453 
 454     xb = rem & 0xFFFF; // Next 16-bit chunk.
 455     rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
 456 
 457     xa = rem & 0xFFFF; // Highest 16-bit chunk.
 458 
 459     // opt 4: avoid adding 0
 460     if (xa) { // Highest 16-bit needed?
 461       lis(d, xa);
 462       if (xb) { addi(d, d, xb); }
 463     } else {
 464       li(d, xb);
 465     }
 466     sldi(d, d, 32);
 467     if (xc) { addis(d, d, xc); }
 468   }
 469 
 470   if (xd) { addi(d, d, xd); }
 471   return retval;
 472 }
 473 
 474 // We emit only one addition to s to optimize latency.
 475 int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
 476   assert(s != R0 && s != tmp, "unsupported");
 477   long rem = x;
 478 
 479   // Case 1: Can use mr or addi.
 480   short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
 481   rem = (rem >> 16) + ((unsigned short)xd >> 15);
 482   if (rem == 0) {
 483     if (xd == 0) {
 484       if (d != s) { mr(d, s); }
 485       return 0;
 486     }
 487     if (return_simm16_rest) {
 488       return xd;
 489     }
 490     addi(d, s, xd);
 491     return 0;
 492   }
 493 
 494   // Case 2: Can use addis.
 495   if (xd == 0) {
 496     short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
 497     rem = (rem >> 16) + ((unsigned short)xd >> 15);
 498     if (rem == 0) {
 499       addis(d, s, xc);
 500       return 0;
 501     }
 502   }
 503 
 504   // Other cases: load & add.
 505   Register tmp1 = tmp,
 506            tmp2 = noreg;
 507   if ((d != tmp) && (d != s)) {
 508     // Can use d.
 509     tmp1 = d;
 510     tmp2 = tmp;
 511   }
 512   int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
 513   add(d, tmp1, s);
 514   return simm16_rest;
 515 }
 516 
 517 #ifndef PRODUCT
 518 // Test of ppc assembler.
 519 void Assembler::test_asm() {
 520   // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
 521   addi(   R0,  R1,  10);
 522   addis(  R5,  R2,  11);
 523   addic_( R3,  R31, 42);
 524   subfic( R21, R12, 2112);
 525   add(    R3,  R2,  R1);
 526   add_(   R11, R22, R30);
 527   subf(   R7,  R6,  R5);
 528   subf_(  R8,  R9,  R4);
 529   addc(   R11, R12, R13);
 530   addc_(  R14, R14, R14);
 531   subfc(  R15, R16, R17);
 532   subfc_( R18, R20, R19);
 533   adde(   R20, R22, R24);
 534   adde_(  R29, R27, R26);
 535   subfe(  R28, R1,  R0);
 536   subfe_( R21, R11, R29);
 537   neg(    R21, R22);
 538   neg_(   R13, R23);
 539   mulli(  R0,  R11, -31);
 540   mulld(  R1,  R18, R21);
 541   mulld_( R2,  R17, R22);
 542   mullw(  R3,  R16, R23);
 543   mullw_( R4,  R15, R24);
 544   divd(   R5,  R14, R25);
 545   divd_(  R6,  R13, R26);
 546   divw(   R7,  R12, R27);
 547   divw_(  R8,  R11, R28);
 548 
 549   li(     R3, -4711);
 550 
 551   // PPC 1, section 3.3.9, Fixed-Point Compare Instructions
 552   cmpi(   CCR7,  0, R27, 4711);
 553   cmp(    CCR0, 1, R14, R11);
 554   cmpli(  CCR5,  1, R17, 45);
 555   cmpl(   CCR3, 0, R9,  R10);
 556 
 557   cmpwi(  CCR7,  R27, 4711);
 558   cmpw(   CCR0, R14, R11);
 559   cmplwi( CCR5,  R17, 45);
 560   cmplw(  CCR3, R9,  R10);
 561 
 562   cmpdi(  CCR7,  R27, 4711);
 563   cmpd(   CCR0, R14, R11);
 564   cmpldi( CCR5,  R17, 45);
 565   cmpld(  CCR3, R9,  R10);
 566 
 567   // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
 568   andi_(  R4,  R5,  0xff);
 569   andis_( R12, R13, 0x7b51);
 570   ori(    R1,  R4,  13);
 571   oris(   R3,  R5,  177);
 572   xori(   R7,  R6,  51);
 573   xoris(  R29, R0,  1);
 574   andr(   R17, R21, R16);
 575   and_(   R3,  R5,  R15);
 576   orr(    R2,  R1,  R9);
 577   or_(    R17, R15, R11);
 578   xorr(   R19, R18, R10);
 579   xor_(   R31, R21, R11);
 580   nand(   R5,  R7,  R3);
 581   nand_(  R3,  R1,  R0);
 582   nor(    R2,  R3,  R5);
 583   nor_(   R3,  R6,  R8);
 584   andc(   R25, R12, R11);
 585   andc_(  R24, R22, R21);
 586   orc(    R20, R10, R12);
 587   orc_(   R22, R2,  R13);
 588 
 589   nop();
 590 
 591   // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
 592   sld(    R5,  R6,  R8);
 593   sld_(   R3,  R5,  R9);
 594   slw(    R2,  R1,  R10);
 595   slw_(   R6,  R26, R16);
 596   srd(    R16, R24, R8);
 597   srd_(   R21, R14, R7);
 598   srw(    R22, R25, R29);
 599   srw_(   R5,  R18, R17);
 600   srad(   R7,  R11, R0);
 601   srad_(  R9,  R13, R1);
 602   sraw(   R7,  R15, R2);
 603   sraw_(  R4,  R17, R3);
 604   sldi(   R3,  R18, 63);
 605   sldi_(  R2,  R20, 30);
 606   slwi(   R1,  R21, 30);
 607   slwi_(  R7,  R23, 8);
 608   srdi(   R0,  R19, 2);
 609   srdi_(  R12, R24, 5);
 610   srwi(   R13, R27, 6);
 611   srwi_(  R14, R29, 7);
 612   sradi(  R15, R30, 9);
 613   sradi_( R16, R31, 19);
 614   srawi(  R17, R31, 15);
 615   srawi_( R18, R31, 12);
 616 
 617   clrrdi( R3, R30, 5);
 618   clrldi( R9, R10, 11);
 619 
 620   rldicr( R19, R20, 13, 15);
 621   rldicr_(R20, R20, 16, 14);
 622   rldicl( R21, R21, 30, 33);
 623   rldicl_(R22, R1,  20, 25);
 624   rlwinm( R23, R2,  25, 10, 11);
 625   rlwinm_(R24, R3,  12, 13, 14);
 626 
 627   // PPC 1, section 3.3.2 Fixed-Point Load Instructions
 628   lwzx(   R3,  R5, R7);
 629   lwz(    R11,  0, R1);
 630   lwzu(   R31, -4, R11);
 631 
 632   lwax(   R3,  R5, R7);
 633   lwa(    R31, -4, R11);
 634   lhzx(   R3,  R5, R7);
 635   lhz(    R31, -4, R11);
 636   lhzu(   R31, -4, R11);
 637 
 638 
 639   lhax(   R3,  R5, R7);
 640   lha(    R31, -4, R11);
 641   lhau(   R11,  0, R1);
 642 
 643   lbzx(   R3,  R5, R7);
 644   lbz(    R31, -4, R11);
 645   lbzu(   R11,  0, R1);
 646 
 647   ld(     R31, -4, R11);
 648   ldx(    R3,  R5, R7);
 649   ldu(    R31, -4, R11);
 650 
 651   //  PPC 1, section 3.3.3 Fixed-Point Store Instructions
 652   stwx(   R3,  R5, R7);
 653   stw(    R31, -4, R11);
 654   stwu(   R11,  0, R1);
 655 
 656   sthx(   R3,  R5, R7 );
 657   sth(    R31, -4, R11);
 658   sthu(   R31, -4, R11);
 659 
 660   stbx(   R3,  R5, R7);
 661   stb(    R31, -4, R11);
 662   stbu(   R31, -4, R11);
 663 
 664   std(    R31, -4, R11);
 665   stdx(   R3,  R5, R7);
 666   stdu(   R31, -4, R11);
 667 
 668  // PPC 1, section 3.3.13 Move To/From System Register Instructions
 669   mtlr(   R3);
 670   mflr(   R3);
 671   mtctr(  R3);
 672   mfctr(  R3);
 673   mtcrf(  0xff, R15);
 674   mtcr(   R15);
 675   mtcrf(  0x03, R15);
 676   mtcr(   R15);
 677   mfcr(   R15);
 678 
 679  // PPC 1, section 2.4.1 Branch Instructions
 680   Label lbl1, lbl2, lbl3;
 681   bind(lbl1);
 682 
 683   b(pc());
 684   b(pc() - 8);
 685   b(lbl1);
 686   b(lbl2);
 687   b(lbl3);
 688 
 689   bl(pc() - 8);
 690   bl(lbl1);
 691   bl(lbl2);
 692 
 693   bcl(4, 10, pc() - 8);
 694   bcl(4, 10, lbl1);
 695   bcl(4, 10, lbl2);
 696 
 697   bclr( 4, 6, 0);
 698   bclrl(4, 6, 0);
 699 
 700   bind(lbl2);
 701 
 702   bcctr( 4, 6, 0);
 703   bcctrl(4, 6, 0);
 704 
 705   blt(CCR0, lbl2);
 706   bgt(CCR1, lbl2);
 707   beq(CCR2, lbl2);
 708   bso(CCR3, lbl2);
 709   bge(CCR4, lbl2);
 710   ble(CCR5, lbl2);
 711   bne(CCR6, lbl2);
 712   bns(CCR7, lbl2);
 713 
 714   bltl(CCR0, lbl2);
 715   bgtl(CCR1, lbl2);
 716   beql(CCR2, lbl2);
 717   bsol(CCR3, lbl2);
 718   bgel(CCR4, lbl2);
 719   blel(CCR5, lbl2);
 720   bnel(CCR6, lbl2);
 721   bnsl(CCR7, lbl2);
 722   blr();
 723 
 724   sync();
 725   icbi( R1, R2);
 726   dcbst(R2, R3);
 727 
 728   // FLOATING POINT instructions ppc.
 729   // PPC 1, section 4.6.2 Floating-Point Load Instructions
 730   lfs( F1, -11, R3);
 731   lfsu(F2, 123, R4);
 732   lfsx(F3, R5,  R6);
 733   lfd( F4, 456, R7);
 734   lfdu(F5, 789, R8);
 735   lfdx(F6, R10, R11);
 736 
 737   // PPC 1, section 4.6.3 Floating-Point Store Instructions
 738   stfs(  F7,  876, R12);
 739   stfsu( F8,  543, R13);
 740   stfsx( F9,  R14, R15);
 741   stfd(  F10, 210, R16);
 742   stfdu( F11, 111, R17);
 743   stfdx( F12, R18, R19);
 744 
 745   // PPC 1, section 4.6.4 Floating-Point Move Instructions
 746   fmr(   F13, F14);
 747   fmr_(  F14, F15);
 748   fneg(  F16, F17);
 749   fneg_( F18, F19);
 750   fabs(  F20, F21);
 751   fabs_( F22, F23);
 752   fnabs( F24, F25);
 753   fnabs_(F26, F27);
 754 
 755   // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic
 756   // Instructions
 757   fadd(  F28, F29, F30);
 758   fadd_( F31, F0,  F1);
 759   fadds( F2,  F3,  F4);
 760   fadds_(F5,  F6,  F7);
 761   fsub(  F8,  F9,  F10);
 762   fsub_( F11, F12, F13);
 763   fsubs( F14, F15, F16);
 764   fsubs_(F17, F18, F19);
 765   fmul(  F20, F21, F22);
 766   fmul_( F23, F24, F25);
 767   fmuls( F26, F27, F28);
 768   fmuls_(F29, F30, F31);
 769   fdiv(  F0,  F1,  F2);
 770   fdiv_( F3,  F4,  F5);
 771   fdivs( F6,  F7,  F8);
 772   fdivs_(F9,  F10, F11);
 773 
 774   // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion
 775   // Instructions
 776   frsp(  F12, F13);
 777   fctid( F14, F15);
 778   fctidz(F16, F17);
 779   fctiw( F18, F19);
 780   fctiwz(F20, F21);
 781   fcfid( F22, F23);
 782 
 783   // PPC 1, section 4.6.7 Floating-Point Compare Instructions
 784   fcmpu( CCR7, F24, F25);
 785 
 786   tty->print_cr("\ntest_asm disassembly (0x%lx 0x%lx):", p2i(code()->insts_begin()), p2i(code()->insts_end()));
 787   code()->decode();
 788 }
 789 
 790 #endif // !PRODUCT