< prev index next >

src/cpu/ppc/vm/assembler_ppc.cpp

Print this page
rev 8109 : 8077838: Recent developments for ppc.
Reviewed-by: kvn

*** 1,8 **** /* ! * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ! * Copyright 2012, 2014 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. --- 1,8 ---- /* ! * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. ! * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation.
*** 83,104 **** } return r; } // Low-level andi-one-instruction-macro. ! void Assembler::andi(Register a, Register s, const int ui16) { ! assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate"); if (is_power_of_2_long(((jlong) ui16)+1)) { // pow2minus1 clrldi(a, s, 64-log2_long((((jlong) ui16)+1))); } else if (is_power_of_2_long((jlong) ui16)) { // pow2 rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16)); } else if (is_power_of_2_long((jlong)-ui16)) { // negpow2 clrrdi(a, s, log2_long((jlong)-ui16)); } else { andi_(a, s, ui16); } } // RegisterOrConstant version. --- 83,104 ---- } return r; } // Low-level andi-one-instruction-macro. ! void Assembler::andi(Register a, Register s, const long ui16) { if (is_power_of_2_long(((jlong) ui16)+1)) { // pow2minus1 clrldi(a, s, 64-log2_long((((jlong) ui16)+1))); } else if (is_power_of_2_long((jlong) ui16)) { // pow2 rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16)); } else if (is_power_of_2_long((jlong)-ui16)) { // negpow2 clrrdi(a, s, log2_long((jlong)-ui16)); } else { + assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate"); andi_(a, s, ui16); } } // RegisterOrConstant version.
*** 354,364 **** // Load a 64 bit constant, optimized, not identifyable. // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a // 16 bit immediate offset. int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) { // Avoid accidentally trying to use R0 for indexed addressing. - assert(d != R0, "R0 not allowed"); assert_different_registers(d, tmp); short xa, xb, xc, xd; // Four 16-bit chunks of const. long rem = x; // Remaining part of const. --- 354,363 ----
*** 368,406 **** if (rem == 0) { // opt 1: simm16 li(d, xd); return 0; } xc = rem & 0xFFFF; // Next 16-bit chunk. rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend. if (rem == 0) { // opt 2: simm32 lis(d, xc); } else { // High 32 bits needed. ! if (tmp != noreg) { // opt 3: We have a temp reg. // No carry propagation between xc and higher chunks here (use logical instructions). xa = (x >> 48) & 0xffff; xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0. ! bool load_xa = (xa != 0) || (xb < 0); bool return_xd = false; ! if (load_xa) { lis(tmp, xa); } if (xc) { lis(d, xc); } ! if (load_xa) { if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0. } else { ! li(tmp, xb); // non-negative } if (xc) { ! if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi. ! else if (xd) { addi(d, d, xd); } } else { li(d, xd); } insrdi(d, tmp, 32, 0); ! return return_xd ? xd : 0; // non-negative } xb = rem & 0xFFFF; // Next 16-bit chunk. rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend. --- 367,456 ---- if (rem == 0) { // opt 1: simm16 li(d, xd); return 0; } + int retval = 0; + if (return_simm16_rest) { + retval = xd; + x = rem << 16; + xd = 0; + } + + if (d == R0) { // Can't use addi. + if (is_simm(x, 32)) { // opt 2: simm32 + lis(d, x >> 16); + if (xd) ori(d, d, (unsigned short)xd); + } else { + // 64-bit value: x = xa xb xc xd + xa = (x >> 48) & 0xffff; + xb = (x >> 32) & 0xffff; + xc = (x >> 16) & 0xffff; + bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0); + if (tmp == noreg || (xc == 0 && xd == 0)) { + if (xa_loaded) { + lis(d, xa); + if (xb) { ori(d, d, (unsigned short)xb); } + } else { + li(d, xb); + } + sldi(d, d, 32); + if (xc) { oris(d, d, (unsigned short)xc); } + if (xd) { ori( d, d, (unsigned short)xd); } + } else { + // Exploit instruction level parallelism if we have a tmp register. + bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0); + if (xa_loaded) { + lis(tmp, xa); + } + if (xc_loaded) { + lis(d, xc); + } + if (xa_loaded) { + if (xb) { ori(tmp, tmp, (unsigned short)xb); } + } else { + li(tmp, xb); + } + if (xc_loaded) { + if (xd) { ori(d, d, (unsigned short)xd); } + } else { + li(d, xd); + } + insrdi(d, tmp, 32, 0); + } + } + return retval; + } + xc = rem & 0xFFFF; // Next 16-bit chunk. rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend. if (rem == 0) { // opt 2: simm32 lis(d, xc); } else { // High 32 bits needed. ! if (tmp != noreg && (int)x != 0) { // opt 3: We have a temp reg. // No carry propagation between xc and higher chunks here (use logical instructions). xa = (x >> 48) & 0xffff; xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0. ! bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0); bool return_xd = false; ! if (xa_loaded) { lis(tmp, xa); } if (xc) { lis(d, xc); } ! if (xa_loaded) { if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0. } else { ! li(tmp, xb); } if (xc) { ! if (xd) { addi(d, d, xd); } } else { li(d, xd); } insrdi(d, tmp, 32, 0); ! return retval; } xb = rem & 0xFFFF; // Next 16-bit chunk. rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
*** 415,429 **** } sldi(d, d, 32); if (xc) { addis(d, d, xc); } } - // opt 5: Return offset to be inserted into following instruction. - if (return_simm16_rest) return xd; - if (xd) { addi(d, d, xd); } return 0; } #ifndef PRODUCT // Test of ppc assembler. void Assembler::test_asm() { --- 465,519 ---- } sldi(d, d, 32); if (xc) { addis(d, d, xc); } } if (xd) { addi(d, d, xd); } + return retval; + } + + // We emit only one addition to s to optimize latency. + int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) { + assert(s != R0 && s != tmp, "unsupported"); + long rem = x; + + // Case 1: Can use mr or addi. + short xd = rem & 0xFFFF; // Lowest 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xd >> 15); + if (rem == 0) { + if (xd == 0) { + if (d != s) { mr(d, s); } return 0; + } + if (return_simm16_rest) { + return xd; + } + addi(d, s, xd); + return 0; + } + + // Case 2: Can use addis. + if (xd == 0) { + short xc = rem & 0xFFFF; // 2nd 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xd >> 15); + if (rem == 0) { + addis(d, s, xc); + return 0; + } + } + + // Other cases: load & add. + Register tmp1 = tmp, + tmp2 = noreg; + if ((d != tmp) && (d != s)) { + // Can use d. + tmp1 = d; + tmp2 = tmp; + } + int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest); + add(d, tmp1, s); + return simm16_rest; } #ifndef PRODUCT // Test of ppc assembler. void Assembler::test_asm() {
< prev index next >