8077838-ppc-hs-comp Sdiff src/cpu/ppc/vm

src/cpu/ppc/vm/assembler_ppc.cpp

rev 8107 : 8077838: Recent developments for ppc.

   1 /*
   2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright 2012, 2014 SAP AG. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *

  68   case b_op:  m = li(-1); v = li(disp(dest_pos, inst_pos)); break;
  69   case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;
  70     default: ShouldNotReachHere();
  71   }
  72   return inst & ~m | v;
  73 }
  74 
  75 // Return the offset, relative to _code_begin, of the destination of
  76 // the branch inst at offset pos.
  77 int Assembler::branch_destination(int inst, int pos) {
  78   int r = 0;
  79   switch (inv_op_ppc(inst)) {
  80     case b_op:  r = bxx_destination_offset(inst, pos); break;
  81     case bc_op: r = inv_bd_field(inst, pos); break;
  82     default: ShouldNotReachHere();
  83   }
  84   return r;
  85 }
  86 
  87 // Low-level andi-one-instruction-macro.
  88 void Assembler::andi(Register a, Register s, const int ui16) {
  89   assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
  90   if (is_power_of_2_long(((jlong) ui16)+1)) {
  91     // pow2minus1
  92     clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
  93   } else if (is_power_of_2_long((jlong) ui16)) {
  94     // pow2
  95     rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
  96   } else if (is_power_of_2_long((jlong)-ui16)) {
  97     // negpow2
  98     clrrdi(a, s, log2_long((jlong)-ui16));
  99   } else {

 100     andi_(a, s, ui16);
 101   }
 102 }
 103 
 104 // RegisterOrConstant version.
 105 void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {
 106   if (roc.is_constant()) {
 107     if (s1 == noreg) {
 108       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 109       Assembler::ld(d, simm16_rest, d);
 110     } else if (is_simm(roc.as_constant(), 16)) {
 111       Assembler::ld(d, roc.as_constant(), s1);
 112     } else {
 113       load_const_optimized(d, roc.as_constant());
 114       Assembler::ldx(d, d, s1);
 115     }
 116   } else {
 117     if (s1 == noreg)
 118       Assembler::ld(d, 0, roc.as_register());
 119     else

 339     Assembler::ori( d, d, (unsigned int)xb);
 340     Assembler::sldi(d, d, 32);
 341     Assembler::oris(d, d, (unsigned int)xc);
 342     Assembler::ori( d, d, (unsigned int)xd);
 343   } else {
 344     // exploit instruction level parallelism if we have a tmp register
 345     assert_different_registers(d, tmp);
 346     Assembler::lis(tmp, (int)(short)xa);
 347     Assembler::lis(d, (int)(short)xc);
 348     Assembler::ori(tmp, tmp, (unsigned int)xb);
 349     Assembler::ori(d, d, (unsigned int)xd);
 350     Assembler::insrdi(d, tmp, 32, 0);
 351   }
 352 }
 353 
 354 // Load a 64 bit constant, optimized, not identifyable.
 355 // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
 356 // 16 bit immediate offset.
 357 int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
 358   // Avoid accidentally trying to use R0 for indexed addressing.
 359   assert(d != R0, "R0 not allowed");
 360   assert_different_registers(d, tmp);
 361 
 362   short xa, xb, xc, xd; // Four 16-bit chunks of const.
 363   long rem = x;         // Remaining part of const.
 364 
 365   xd = rem & 0xFFFF;    // Lowest 16-bit chunk.
 366   rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.
 367 
 368   if (rem == 0) { // opt 1: simm16
 369     li(d, xd);
 370     return 0;
 371   }
 372 




















































 373   xc = rem & 0xFFFF; // Next 16-bit chunk.
 374   rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
 375 
 376   if (rem == 0) { // opt 2: simm32
 377     lis(d, xc);
 378   } else { // High 32 bits needed.
 379 
 380     if (tmp != noreg) { // opt 3: We have a temp reg.
 381       // No carry propagation between xc and higher chunks here (use logical instructions).
 382       xa = (x >> 48) & 0xffff;
 383       xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
 384       bool load_xa = (xa != 0) || (xb < 0);
 385       bool return_xd = false;
 386 
 387       if (load_xa) { lis(tmp, xa); }
 388       if (xc) { lis(d, xc); }
 389       if (load_xa) {
 390         if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
 391       } else {
 392         li(tmp, xb); // non-negative
 393       }
 394       if (xc) {
 395         if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
 396         else if (xd) { addi(d, d, xd); }
 397       } else {
 398         li(d, xd);
 399       }
 400       insrdi(d, tmp, 32, 0);
 401       return return_xd ? xd : 0; // non-negative
 402     }
 403 
 404     xb = rem & 0xFFFF; // Next 16-bit chunk.
 405     rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
 406 
 407     xa = rem & 0xFFFF; // Highest 16-bit chunk.
 408 
 409     // opt 4: avoid adding 0
 410     if (xa) { // Highest 16-bit needed?
 411       lis(d, xa);
 412       if (xb) { addi(d, d, xb); }
 413     } else {
 414       li(d, xb);
 415     }
 416     sldi(d, d, 32);
 417     if (xc) { addis(d, d, xc); }
 418   }
 419 
 420   // opt 5: Return offset to be inserted into following instruction.
 421   if (return_simm16_rest) return xd;
 422 
 423   if (xd) { addi(d, d, xd); }














 424   return 0;





























 425 }
 426 
 427 #ifndef PRODUCT
 428 // Test of ppc assembler.
 429 void Assembler::test_asm() {
 430   // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
 431   addi(   R0,  R1,  10);
 432   addis(  R5,  R2,  11);
 433   addic_( R3,  R31, 42);
 434   subfic( R21, R12, 2112);
 435   add(    R3,  R2,  R1);
 436   add_(   R11, R22, R30);
 437   subf(   R7,  R6,  R5);
 438   subf_(  R8,  R9,  R4);
 439   addc(   R11, R12, R13);
 440   addc_(  R14, R14, R14);
 441   subfc(  R15, R16, R17);
 442   subfc_( R18, R20, R19);
 443   adde(   R20, R22, R24);
 444   adde_(  R29, R27, R26);

   1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright 2012, 2015 SAP AG. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *

  68   case b_op:  m = li(-1); v = li(disp(dest_pos, inst_pos)); break;
  69   case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;
  70     default: ShouldNotReachHere();
  71   }
  72   return inst & ~m | v;
  73 }
  74 
  75 // Return the offset, relative to _code_begin, of the destination of
  76 // the branch inst at offset pos.
  77 int Assembler::branch_destination(int inst, int pos) {
  78   int r = 0;
  79   switch (inv_op_ppc(inst)) {
  80     case b_op:  r = bxx_destination_offset(inst, pos); break;
  81     case bc_op: r = inv_bd_field(inst, pos); break;
  82     default: ShouldNotReachHere();
  83   }
  84   return r;
  85 }
  86 
  87 // Low-level andi-one-instruction-macro.
  88 void Assembler::andi(Register a, Register s, const long ui16) {

  89   if (is_power_of_2_long(((jlong) ui16)+1)) {
  90     // pow2minus1
  91     clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
  92   } else if (is_power_of_2_long((jlong) ui16)) {
  93     // pow2
  94     rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
  95   } else if (is_power_of_2_long((jlong)-ui16)) {
  96     // negpow2
  97     clrrdi(a, s, log2_long((jlong)-ui16));
  98   } else {
  99     assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
 100     andi_(a, s, ui16);
 101   }
 102 }
 103 
 104 // RegisterOrConstant version.
 105 void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {
 106   if (roc.is_constant()) {
 107     if (s1 == noreg) {
 108       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
 109       Assembler::ld(d, simm16_rest, d);
 110     } else if (is_simm(roc.as_constant(), 16)) {
 111       Assembler::ld(d, roc.as_constant(), s1);
 112     } else {
 113       load_const_optimized(d, roc.as_constant());
 114       Assembler::ldx(d, d, s1);
 115     }
 116   } else {
 117     if (s1 == noreg)
 118       Assembler::ld(d, 0, roc.as_register());
 119     else

 339     Assembler::ori( d, d, (unsigned int)xb);
 340     Assembler::sldi(d, d, 32);
 341     Assembler::oris(d, d, (unsigned int)xc);
 342     Assembler::ori( d, d, (unsigned int)xd);
 343   } else {
 344     // exploit instruction level parallelism if we have a tmp register
 345     assert_different_registers(d, tmp);
 346     Assembler::lis(tmp, (int)(short)xa);
 347     Assembler::lis(d, (int)(short)xc);
 348     Assembler::ori(tmp, tmp, (unsigned int)xb);
 349     Assembler::ori(d, d, (unsigned int)xd);
 350     Assembler::insrdi(d, tmp, 32, 0);
 351   }
 352 }
 353 
 354 // Load a 64 bit constant, optimized, not identifyable.
 355 // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
 356 // 16 bit immediate offset.
 357 int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
 358   // Avoid accidentally trying to use R0 for indexed addressing.

 359   assert_different_registers(d, tmp);
 360 
 361   short xa, xb, xc, xd; // Four 16-bit chunks of const.
 362   long rem = x;         // Remaining part of const.
 363 
 364   xd = rem & 0xFFFF;    // Lowest 16-bit chunk.
 365   rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.
 366 
 367   if (rem == 0) { // opt 1: simm16
 368     li(d, xd);
 369     return 0;
 370   }
 371 
 372   int retval = 0;
 373   if (return_simm16_rest) {
 374     retval = xd;
 375     x = rem << 16;
 376     xd = 0;
 377   }
 378 
 379   if (d == R0) { // Can't use addi.
 380     if (is_simm(x, 32)) { // opt 2: simm32
 381       lis(d, x >> 16);
 382       if (xd) ori(d, d, (unsigned short)xd);
 383     } else {
 384       // 64-bit value: x = xa xb xc xd
 385       xa = (x >> 48) & 0xffff;
 386       xb = (x >> 32) & 0xffff;
 387       xc = (x >> 16) & 0xffff;
 388       bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
 389       if (tmp == noreg || (xc == 0 && xd == 0)) {
 390         if (xa_loaded) {
 391           lis(d, xa);
 392           if (xb) { ori(d, d, (unsigned short)xb); }
 393         } else {
 394           li(d, xb);
 395         }
 396         sldi(d, d, 32);
 397         if (xc) { oris(d, d, (unsigned short)xc); }
 398         if (xd) { ori( d, d, (unsigned short)xd); }
 399       } else {
 400         // Exploit instruction level parallelism if we have a tmp register.
 401         bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
 402         if (xa_loaded) {
 403           lis(tmp, xa);
 404         }
 405         if (xc_loaded) {
 406           lis(d, xc);
 407         }
 408         if (xa_loaded) {
 409           if (xb) { ori(tmp, tmp, (unsigned short)xb); }
 410         } else {
 411           li(tmp, xb);
 412         }
 413         if (xc_loaded) {
 414           if (xd) { ori(d, d, (unsigned short)xd); }
 415         } else {
 416           li(d, xd);
 417         }
 418         insrdi(d, tmp, 32, 0);
 419       }
 420     }
 421     return retval;
 422   }
 423 
 424   xc = rem & 0xFFFF; // Next 16-bit chunk.
 425   rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
 426 
 427   if (rem == 0) { // opt 2: simm32
 428     lis(d, xc);
 429   } else { // High 32 bits needed.
 430 
 431     if (tmp != noreg  && (int)x != 0) { // opt 3: We have a temp reg.
 432       // No carry propagation between xc and higher chunks here (use logical instructions).
 433       xa = (x >> 48) & 0xffff;
 434       xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
 435       bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
 436       bool return_xd = false;
 437 
 438       if (xa_loaded) { lis(tmp, xa); }
 439       if (xc) { lis(d, xc); }
 440       if (xa_loaded) {
 441         if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
 442       } else {
 443         li(tmp, xb);
 444       }
 445       if (xc) {
 446         if (xd) { addi(d, d, xd); }

 447       } else {
 448         li(d, xd);
 449       }
 450       insrdi(d, tmp, 32, 0);
 451       return retval;
 452     }
 453 
 454     xb = rem & 0xFFFF; // Next 16-bit chunk.
 455     rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
 456 
 457     xa = rem & 0xFFFF; // Highest 16-bit chunk.
 458 
 459     // opt 4: avoid adding 0
 460     if (xa) { // Highest 16-bit needed?
 461       lis(d, xa);
 462       if (xb) { addi(d, d, xb); }
 463     } else {
 464       li(d, xb);
 465     }
 466     sldi(d, d, 32);
 467     if (xc) { addis(d, d, xc); }
 468   }
 469 



 470   if (xd) { addi(d, d, xd); }
 471   return retval;
 472 }
 473 
 474 // We emit only one addition to s to optimize latency.
 475 int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
 476   assert(s != R0 && s != tmp, "unsupported");
 477   long rem = x;
 478 
 479   // Case 1: Can use mr or addi.
 480   short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
 481   rem = (rem >> 16) + ((unsigned short)xd >> 15);
 482   if (rem == 0) {
 483     if (xd == 0) {
 484       if (d != s) { mr(d, s); }
 485       return 0;
 486     }
 487     if (return_simm16_rest) {
 488       return xd;
 489     }
 490     addi(d, s, xd);
 491     return 0;
 492   }
 493 
 494   // Case 2: Can use addis.
 495   if (xd == 0) {
 496     short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
 497     rem = (rem >> 16) + ((unsigned short)xd >> 15);
 498     if (rem == 0) {
 499       addis(d, s, xc);
 500       return 0;
 501     }
 502   }
 503 
 504   // Other cases: load & add.
 505   Register tmp1 = tmp,
 506            tmp2 = noreg;
 507   if ((d != tmp) && (d != s)) {
 508     // Can use d.
 509     tmp1 = d;
 510     tmp2 = tmp;
 511   }
 512   int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
 513   add(d, tmp1, s);
 514   return simm16_rest;
 515 }
 516 
 517 #ifndef PRODUCT
 518 // Test of ppc assembler.
 519 void Assembler::test_asm() {
 520   // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
 521   addi(   R0,  R1,  10);
 522   addis(  R5,  R2,  11);
 523   addic_( R3,  R31, 42);
 524   subfic( R21, R12, 2112);
 525   add(    R3,  R2,  R1);
 526   add_(   R11, R22, R30);
 527   subf(   R7,  R6,  R5);
 528   subf_(  R8,  R9,  R4);
 529   addc(   R11, R12, R13);
 530   addc_(  R14, R14, R14);
 531   subfc(  R15, R16, R17);
 532   subfc_( R18, R20, R19);
 533   adde(   R20, R22, R24);
 534   adde_(  R29, R27, R26);

< prev index next >