< prev index next >
src/cpu/ppc/vm/assembler_ppc.cpp
Print this page
rev 8109 : 8077838: Recent developments for ppc.
Reviewed-by: kvn
*** 1,8 ****
/*
! * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
! * Copyright 2012, 2014 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
--- 1,8 ----
/*
! * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
! * Copyright 2012, 2015 SAP AG. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*** 83,104 ****
}
return r;
}
// Low-level andi-one-instruction-macro.
! void Assembler::andi(Register a, Register s, const int ui16) {
! assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
if (is_power_of_2_long(((jlong) ui16)+1)) {
// pow2minus1
clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
} else if (is_power_of_2_long((jlong) ui16)) {
// pow2
rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
} else if (is_power_of_2_long((jlong)-ui16)) {
// negpow2
clrrdi(a, s, log2_long((jlong)-ui16));
} else {
andi_(a, s, ui16);
}
}
// RegisterOrConstant version.
--- 83,104 ----
}
return r;
}
// Low-level andi-one-instruction-macro.
! void Assembler::andi(Register a, Register s, const long ui16) {
if (is_power_of_2_long(((jlong) ui16)+1)) {
// pow2minus1
clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
} else if (is_power_of_2_long((jlong) ui16)) {
// pow2
rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
} else if (is_power_of_2_long((jlong)-ui16)) {
// negpow2
clrrdi(a, s, log2_long((jlong)-ui16));
} else {
+ assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
andi_(a, s, ui16);
}
}
// RegisterOrConstant version.
*** 354,364 ****
// Load a 64 bit constant, optimized, not identifyable.
// Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
// 16 bit immediate offset.
int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
// Avoid accidentally trying to use R0 for indexed addressing.
- assert(d != R0, "R0 not allowed");
assert_different_registers(d, tmp);
short xa, xb, xc, xd; // Four 16-bit chunks of const.
long rem = x; // Remaining part of const.
--- 354,363 ----
*** 368,406 ****
if (rem == 0) { // opt 1: simm16
li(d, xd);
return 0;
}
xc = rem & 0xFFFF; // Next 16-bit chunk.
rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
if (rem == 0) { // opt 2: simm32
lis(d, xc);
} else { // High 32 bits needed.
! if (tmp != noreg) { // opt 3: We have a temp reg.
// No carry propagation between xc and higher chunks here (use logical instructions).
xa = (x >> 48) & 0xffff;
xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
! bool load_xa = (xa != 0) || (xb < 0);
bool return_xd = false;
! if (load_xa) { lis(tmp, xa); }
if (xc) { lis(d, xc); }
! if (load_xa) {
if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
} else {
! li(tmp, xb); // non-negative
}
if (xc) {
! if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
! else if (xd) { addi(d, d, xd); }
} else {
li(d, xd);
}
insrdi(d, tmp, 32, 0);
! return return_xd ? xd : 0; // non-negative
}
xb = rem & 0xFFFF; // Next 16-bit chunk.
rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
--- 367,456 ----
if (rem == 0) { // opt 1: simm16
li(d, xd);
return 0;
}
+ int retval = 0;
+ if (return_simm16_rest) {
+ retval = xd;
+ x = rem << 16;
+ xd = 0;
+ }
+
+ if (d == R0) { // Can't use addi.
+ if (is_simm(x, 32)) { // opt 2: simm32
+ lis(d, x >> 16);
+ if (xd) ori(d, d, (unsigned short)xd);
+ } else {
+ // 64-bit value: x = xa xb xc xd
+ xa = (x >> 48) & 0xffff;
+ xb = (x >> 32) & 0xffff;
+ xc = (x >> 16) & 0xffff;
+ bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
+ if (tmp == noreg || (xc == 0 && xd == 0)) {
+ if (xa_loaded) {
+ lis(d, xa);
+ if (xb) { ori(d, d, (unsigned short)xb); }
+ } else {
+ li(d, xb);
+ }
+ sldi(d, d, 32);
+ if (xc) { oris(d, d, (unsigned short)xc); }
+ if (xd) { ori( d, d, (unsigned short)xd); }
+ } else {
+ // Exploit instruction level parallelism if we have a tmp register.
+ bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
+ if (xa_loaded) {
+ lis(tmp, xa);
+ }
+ if (xc_loaded) {
+ lis(d, xc);
+ }
+ if (xa_loaded) {
+ if (xb) { ori(tmp, tmp, (unsigned short)xb); }
+ } else {
+ li(tmp, xb);
+ }
+ if (xc_loaded) {
+ if (xd) { ori(d, d, (unsigned short)xd); }
+ } else {
+ li(d, xd);
+ }
+ insrdi(d, tmp, 32, 0);
+ }
+ }
+ return retval;
+ }
+
xc = rem & 0xFFFF; // Next 16-bit chunk.
rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
if (rem == 0) { // opt 2: simm32
lis(d, xc);
} else { // High 32 bits needed.
! if (tmp != noreg && (int)x != 0) { // opt 3: We have a temp reg.
// No carry propagation between xc and higher chunks here (use logical instructions).
xa = (x >> 48) & 0xffff;
xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
! bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
bool return_xd = false;
! if (xa_loaded) { lis(tmp, xa); }
if (xc) { lis(d, xc); }
! if (xa_loaded) {
if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
} else {
! li(tmp, xb);
}
if (xc) {
! if (xd) { addi(d, d, xd); }
} else {
li(d, xd);
}
insrdi(d, tmp, 32, 0);
! return retval;
}
xb = rem & 0xFFFF; // Next 16-bit chunk.
rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
*** 415,429 ****
}
sldi(d, d, 32);
if (xc) { addis(d, d, xc); }
}
- // opt 5: Return offset to be inserted into following instruction.
- if (return_simm16_rest) return xd;
-
if (xd) { addi(d, d, xd); }
return 0;
}
#ifndef PRODUCT
// Test of ppc assembler.
void Assembler::test_asm() {
--- 465,519 ----
}
sldi(d, d, 32);
if (xc) { addis(d, d, xc); }
}
if (xd) { addi(d, d, xd); }
+ return retval;
+ }
+
+ // We emit only one addition to s to optimize latency.
+ int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
+ assert(s != R0 && s != tmp, "unsupported");
+ long rem = x;
+
+ // Case 1: Can use mr or addi.
+ short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
+ rem = (rem >> 16) + ((unsigned short)xd >> 15);
+ if (rem == 0) {
+ if (xd == 0) {
+ if (d != s) { mr(d, s); }
return 0;
+ }
+ if (return_simm16_rest) {
+ return xd;
+ }
+ addi(d, s, xd);
+ return 0;
+ }
+
+ // Case 2: Can use addis.
+ if (xd == 0) {
+ short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
+ rem = (rem >> 16) + ((unsigned short)xd >> 15);
+ if (rem == 0) {
+ addis(d, s, xc);
+ return 0;
+ }
+ }
+
+ // Other cases: load & add.
+ Register tmp1 = tmp,
+ tmp2 = noreg;
+ if ((d != tmp) && (d != s)) {
+ // Can use d.
+ tmp1 = d;
+ tmp2 = tmp;
+ }
+ int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
+ add(d, tmp1, s);
+ return simm16_rest;
}
#ifndef PRODUCT
// Test of ppc assembler.
void Assembler::test_asm() {
< prev index next >